aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/Makefile3
-rw-r--r--tools/arch/x86/include/asm/cpufeatures.h13
-rw-r--r--tools/arch/x86/include/asm/disabled-features.h21
-rw-r--r--tools/arch/x86/include/asm/msr-index.h13
-rw-r--r--tools/arch/x86/include/uapi/asm/kvm.h12
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-cgroup.rst16
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-feature.rst12
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-prog.rst5
-rw-r--r--tools/bpf/bpftool/Makefile13
-rw-r--r--tools/bpf/bpftool/bash-completion/bpftool30
-rw-r--r--tools/bpf/bpftool/btf.c57
-rw-r--r--tools/bpf/bpftool/btf_dumper.c29
-rw-r--r--tools/bpf/bpftool/cgroup.c162
-rw-r--r--tools/bpf/bpftool/common.c160
-rw-r--r--tools/bpf/bpftool/feature.c148
-rw-r--r--tools/bpf/bpftool/gen.c115
-rw-r--r--tools/bpf/bpftool/link.c61
-rw-r--r--tools/bpf/bpftool/main.c2
-rw-r--r--tools/bpf/bpftool/main.h24
-rw-r--r--tools/bpf/bpftool/map.c84
-rw-r--r--tools/bpf/bpftool/pids.c1
-rw-r--r--tools/bpf/bpftool/prog.c81
-rw-r--r--tools/bpf/bpftool/struct_ops.c2
-rw-r--r--tools/bpf/resolve_btfids/main.c40
-rw-r--r--tools/bpf/runqslower/Makefile7
-rw-r--r--tools/include/linux/btf_ids.h35
-rw-r--r--tools/include/linux/objtool.h11
-rw-r--r--tools/include/linux/sched/mm.h2
-rw-r--r--tools/include/nolibc/Makefile37
-rw-r--r--tools/include/nolibc/stdio.h4
-rw-r--r--tools/include/nolibc/stdlib.h7
-rw-r--r--tools/include/uapi/asm-generic/fcntl.h11
-rw-r--r--tools/include/uapi/drm/i915_drm.h6
-rw-r--r--tools/include/uapi/linux/bpf.h107
-rw-r--r--tools/include/uapi/linux/btf.h17
-rw-r--r--tools/include/uapi/linux/fs.h2
-rw-r--r--tools/include/uapi/linux/if_link.h1
-rw-r--r--tools/include/uapi/linux/if_tun.h2
-rw-r--r--tools/include/uapi/linux/kvm.h11
-rw-r--r--tools/include/uapi/linux/perf_event.h2
-rw-r--r--tools/include/uapi/linux/pkt_cls.h4
-rw-r--r--tools/include/uapi/linux/seg6.h4
-rw-r--r--tools/include/uapi/linux/usbdevice_fs.h4
-rw-r--r--tools/include/uapi/sound/asound.h2
-rw-r--r--tools/lib/bpf/Build2
-rw-r--r--tools/lib/bpf/Makefile2
-rw-r--r--tools/lib/bpf/bpf.c209
-rw-r--r--tools/lib/bpf/bpf.h109
-rw-r--r--tools/lib/bpf/bpf_core_read.h11
-rw-r--r--tools/lib/bpf/bpf_helpers.h13
-rw-r--r--tools/lib/bpf/bpf_tracing.h60
-rw-r--r--tools/lib/bpf/btf.c412
-rw-r--r--tools/lib/bpf/btf.h118
-rw-r--r--tools/lib/bpf/btf_dump.c160
-rw-r--r--tools/lib/bpf/gen_loader.c2
-rw-r--r--tools/lib/bpf/libbpf.c2222
-rw-r--r--tools/lib/bpf/libbpf.h569
-rw-r--r--tools/lib/bpf/libbpf.map123
-rw-r--r--tools/lib/bpf/libbpf_common.h16
-rw-r--r--tools/lib/bpf/libbpf_internal.h39
-rw-r--r--tools/lib/bpf/libbpf_legacy.h28
-rw-r--r--tools/lib/bpf/libbpf_probes.c125
-rw-r--r--tools/lib/bpf/linker.c7
-rw-r--r--tools/lib/bpf/netlink.c62
-rw-r--r--tools/lib/bpf/relo_core.c479
-rw-r--r--tools/lib/bpf/relo_core.h10
-rw-r--r--tools/lib/bpf/usdt.bpf.h16
-rw-r--r--tools/lib/bpf/usdt.c129
-rw-r--r--tools/objtool/arch/x86/decode.c5
-rw-r--r--tools/objtool/builtin-check.c13
-rw-r--r--tools/objtool/check.c335
-rw-r--r--tools/objtool/include/objtool/arch.h1
-rw-r--r--tools/objtool/include/objtool/builtin.h2
-rw-r--r--tools/objtool/include/objtool/check.h24
-rw-r--r--tools/objtool/include/objtool/elf.h1
-rw-r--r--tools/objtool/include/objtool/objtool.h1
-rw-r--r--tools/objtool/objtool.c1
-rw-r--r--tools/perf/builtin-trace.c2
-rwxr-xr-xtools/perf/scripts/python/arm-cs-trace-disasm.py34
-rw-r--r--tools/perf/tests/perf-time-to-tsc.c27
-rw-r--r--tools/perf/util/bpf-loader.c222
-rw-r--r--tools/perf/util/symbol-elf.c56
-rw-r--r--tools/power/cpupower/debug/i386/dump_psb.c6
-rw-r--r--tools/power/pm-graph/README6
-rwxr-xr-xtools/power/pm-graph/bootgraph.py20
-rw-r--r--tools/power/pm-graph/config/custom-timeline-functions.cfg2
-rwxr-xr-xtools/power/pm-graph/sleepgraph.py518
-rw-r--r--tools/power/x86/intel-speed-select/hfi-events.c2
-rw-r--r--tools/power/x86/intel-speed-select/isst-daemon.c2
-rw-r--r--tools/power/x86/turbostat/turbostat.8200
-rw-r--r--tools/power/x86/turbostat/turbostat.c240
-rw-r--r--tools/spi/spidev_test.c11
-rw-r--r--tools/testing/kunit/configs/arch_uml.config5
-rw-r--r--tools/testing/kunit/configs/coverage_uml.config11
-rwxr-xr-xtools/testing/kunit/kunit.py83
-rw-r--r--tools/testing/kunit/kunit_config.py54
-rw-r--r--tools/testing/kunit/kunit_kernel.py104
-rw-r--r--tools/testing/kunit/kunit_parser.py63
-rw-r--r--tools/testing/kunit/kunit_printer.py48
-rwxr-xr-xtools/testing/kunit/kunit_tool_test.py214
-rw-r--r--tools/testing/selftests/Makefile30
-rw-r--r--tools/testing/selftests/arm64/mte/Makefile1
-rw-r--r--tools/testing/selftests/arm64/signal/Makefile1
-rw-r--r--tools/testing/selftests/arm64/signal/test_signals.h4
-rw-r--r--tools/testing/selftests/bpf/.gitignore3
-rw-r--r--tools/testing/selftests/bpf/DENYLIST6
-rw-r--r--tools/testing/selftests/bpf/DENYLIST.s390x67
-rw-r--r--tools/testing/selftests/bpf/Makefile34
-rw-r--r--tools/testing/selftests/bpf/bench.c99
-rw-r--r--tools/testing/selftests/bpf/bench.h16
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c96
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_local_storage.c287
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c281
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh11
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_local_storage.sh24
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh11
-rw-r--r--tools/testing/selftests/bpf/benchs/run_common.sh17
-rw-r--r--tools/testing/selftests/bpf/bpf_legacy.h9
-rw-r--r--tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c10
-rw-r--r--tools/testing/selftests/bpf/btf_helpers.c25
-rw-r--r--tools/testing/selftests/bpf/config93
-rw-r--r--tools/testing/selftests/bpf/config.s390x147
-rw-r--r--tools/testing/selftests/bpf/config.x86_64251
-rw-r--r--tools/testing/selftests/bpf/network_helpers.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/attach_probe.c49
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_iter.c16
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_loop.c62
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_nf.c64
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c61
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf.c157
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_write.c126
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_extern.c17
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_reloc.c140
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_stress.c32
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/libbpf_str.c207
-rw-r--r--tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c313
-rw-r--r--tools/testing/selftests/bpf/prog_tests/probe_user.c35
-rw-r--r--tools/testing/selftests/bpf/prog_tests/resolve_btfids.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c11
-rw-r--r--tools/testing/selftests/bpf/prog_tests/send_signal.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/skeleton.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sock_fields.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_redirect.c8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_tunnel.c17
-rw-r--r--tools/testing/selftests/bpf/prog_tests/usdt.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c183
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_hashmap_full_update_bench.c40
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter.h7
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_ksym.c74
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_loop.c114
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_syscall_macro.c6
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_tracing_net.h1
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___diff.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___err_missing.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___val3_missing.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff.c3
-rw-r--r--tools/testing/selftests/bpf/progs/core_reloc_types.h190
-rw-r--r--tools/testing/selftests/bpf/progs/dynptr_fail.c10
-rw-r--r--tools/testing/selftests/bpf/progs/dynptr_success.c4
-rw-r--r--tools/testing/selftests/bpf/progs/local_storage_bench.c104
-rw-r--r--tools/testing/selftests/bpf/progs/local_storage_rcu_tasks_trace_bench.c67
-rw-r--r--tools/testing/selftests/bpf/progs/lsm_cgroup.c180
-rw-r--r--tools/testing/selftests/bpf/progs/lsm_cgroup_nonvoid.c14
-rw-r--r--tools/testing/selftests/bpf/progs/tcp_ca_incompl_cong_ops.c35
-rw-r--r--tools/testing/selftests/bpf/progs/tcp_ca_unsupp_cong_op.c21
-rw-r--r--tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c60
-rw-r--r--tools/testing/selftests/bpf/progs/test_attach_probe.c73
-rw-r--r--tools/testing/selftests/bpf/progs/test_bpf_nf.c85
-rw-r--r--tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c134
-rw-r--r--tools/testing/selftests/bpf/progs/test_btf_haskv.c51
-rw-r--r--tools/testing/selftests/bpf/progs/test_btf_newkv.c18
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_extern.c3
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_enum64val.c70
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c19
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c49
-rw-r--r--tools/testing/selftests/bpf/progs/test_probe_user.c50
-rw-r--r--tools/testing/selftests/bpf/progs/test_skeleton.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_dtime.c53
-rw-r--r--tools/testing/selftests/bpf/progs/test_tunnel_kern.c80
-rw-r--r--tools/testing/selftests/bpf/progs/test_varlen.c8
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_noinline.c30
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c843
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool_synctypes.py182
-rw-r--r--tools/testing/selftests/bpf/test_btf.h3
-rw-r--r--tools/testing/selftests/bpf/test_progs.c7
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c367
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_veth.sh6
-rwxr-xr-xtools/testing/selftests/bpf/test_xdping.sh4
-rwxr-xr-xtools/testing/selftests/bpf/test_xsk.sh6
-rw-r--r--tools/testing/selftests/bpf/verifier/bpf_loop_inline.c264
-rw-r--r--tools/testing/selftests/bpf/verifier/calls.c53
-rw-r--r--tools/testing/selftests/bpf/verifier/jmp32.c21
-rw-r--r--tools/testing/selftests/bpf/verifier/jump.c22
-rwxr-xr-xtools/testing/selftests/bpf/vmtest.sh53
-rw-r--r--tools/testing/selftests/bpf/xdp_synproxy.c466
-rw-r--r--tools/testing/selftests/bpf/xsk.c (renamed from tools/lib/bpf/xsk.c)92
-rw-r--r--tools/testing/selftests/bpf/xsk.h (renamed from tools/lib/bpf/xsk.h)30
-rwxr-xr-xtools/testing/selftests/bpf/xsk_prereqs.sh4
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.c (renamed from tools/testing/selftests/bpf/xdpxceiver.c)25
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.h (renamed from tools/testing/selftests/bpf/xdpxceiver.h)6
-rw-r--r--tools/testing/selftests/damon/_chk_dependency.sh10
-rw-r--r--tools/testing/selftests/drivers/dma-buf/udmabuf.c3
-rwxr-xr-xtools/testing/selftests/drivers/gpu/drm_mm.sh4
-rw-r--r--tools/testing/selftests/drivers/net/dsa/Makefile17
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh54
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/rif_counter_scale.sh107
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh31
l---------tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_counter_scale.sh1
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh15
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh29
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_counter_scale.sh34
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh17
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/fib.sh45
-rw-r--r--tools/testing/selftests/drivers/s390x/uvdevice/Makefile1
-rw-r--r--tools/testing/selftests/filesystems/binderfs/config1
-rw-r--r--tools/testing/selftests/futex/functional/Makefile1
-rw-r--r--tools/testing/selftests/gpio/Makefile2
-rw-r--r--tools/testing/selftests/kcmp/kcmp_test.c6
-rwxr-xr-xtools/testing/selftests/kexec/kexec_common_lib.sh36
-rwxr-xr-xtools/testing/selftests/kselftest_deps.sh2
-rw-r--r--tools/testing/selftests/kselftest_module.h4
-rw-r--r--tools/testing/selftests/kvm/.gitignore10
-rw-r--r--tools/testing/selftests/kvm/Makefile18
-rw-r--r--tools/testing/selftests/kvm/aarch64/arch_timer.c88
-rw-r--r--tools/testing/selftests/kvm/aarch64/debug-exceptions.c26
-rw-r--r--tools/testing/selftests/kvm/aarch64/get-reg-list.c30
-rw-r--r--tools/testing/selftests/kvm/aarch64/hypercalls.c97
-rw-r--r--tools/testing/selftests/kvm/aarch64/psci_test.c72
-rw-r--r--tools/testing/selftests/kvm/aarch64/vcpu_width_config.c71
-rw-r--r--tools/testing/selftests/kvm/aarch64/vgic_init.c446
-rw-r--r--tools/testing/selftests/kvm/aarch64/vgic_irq.c44
-rw-r--r--tools/testing/selftests/kvm/access_tracking_perf_test.c92
-rw-r--r--tools/testing/selftests/kvm/demand_paging_test.c49
-rw-r--r--tools/testing/selftests/kvm/dirty_log_perf_test.c81
-rw-r--r--tools/testing/selftests/kvm/dirty_log_test.c95
-rw-r--r--tools/testing/selftests/kvm/hardware_disable_test.c29
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/processor.h28
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/vgic.h6
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util_base.h823
-rw-r--r--tools/testing/selftests/kvm/include/perf_test_util.h7
-rw-r--r--tools/testing/selftests/kvm/include/riscv/processor.h20
-rw-r--r--tools/testing/selftests/kvm/include/test_util.h7
-rw-r--r--tools/testing/selftests/kvm/include/ucall_common.h65
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/apic.h1
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/evmcs.h2
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/mce.h25
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/processor.h474
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/svm.h2
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/svm_util.h27
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/vmx.h2
-rw-r--r--tools/testing/selftests/kvm/kvm_binary_stats_test.c183
-rw-r--r--tools/testing/selftests/kvm/kvm_create_max_vcpus.c10
-rw-r--r--tools/testing/selftests/kvm/kvm_page_table_test.c66
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/processor.c81
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/ucall.c13
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/vgic.c54
-rw-r--r--tools/testing/selftests/kvm/lib/elf.c1
-rw-r--r--tools/testing/selftests/kvm/lib/guest_modes.c6
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c1207
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util_internal.h128
-rw-r--r--tools/testing/selftests/kvm/lib/perf_test_util.c92
-rw-r--r--tools/testing/selftests/kvm/lib/riscv/processor.c111
-rw-r--r--tools/testing/selftests/kvm/lib/riscv/ucall.c16
-rw-r--r--tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c11
-rw-r--r--tools/testing/selftests/kvm/lib/s390x/processor.c44
-rw-r--r--tools/testing/selftests/kvm/lib/s390x/ucall.c10
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/perf_test_util.c11
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/processor.c811
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/svm.c17
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/ucall.c12
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/vmx.c26
-rw-r--r--tools/testing/selftests/kvm/max_guest_memory_test.c53
-rw-r--r--tools/testing/selftests/kvm/memslot_modification_stress_test.c13
-rw-r--r--tools/testing/selftests/kvm/memslot_perf_test.c32
-rw-r--r--tools/testing/selftests/kvm/rseq_test.c30
-rw-r--r--tools/testing/selftests/kvm/s390x/memop.c182
-rw-r--r--tools/testing/selftests/kvm/s390x/resets.c178
-rw-r--r--tools/testing/selftests/kvm/s390x/sync_regs_test.c121
-rw-r--r--tools/testing/selftests/kvm/s390x/tprot.c68
-rw-r--r--tools/testing/selftests/kvm/set_memory_region_test.c46
-rw-r--r--tools/testing/selftests/kvm/steal_time.c123
-rw-r--r--tools/testing/selftests/kvm/system_counter_offset_test.c38
-rw-r--r--tools/testing/selftests/kvm/x86_64/amx_test.c91
-rw-r--r--tools/testing/selftests/kvm/x86_64/cpuid_test.c105
-rw-r--r--tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c43
-rw-r--r--tools/testing/selftests/kvm/x86_64/debug_regs.c77
-rw-r--r--tools/testing/selftests/kvm/x86_64/emulator_error_test.c85
-rw-r--r--tools/testing/selftests/kvm/x86_64/evmcs_test.c65
-rw-r--r--tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c47
-rw-r--r--tools/testing/selftests/kvm/x86_64/get_msr_index_features.c117
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_clock.c28
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c48
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_features.c406
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c28
-rw-r--r--tools/testing/selftests/kvm/x86_64/kvm_clock_test.c32
-rw-r--r--tools/testing/selftests/kvm/x86_64/kvm_pv_test.c117
-rw-r--r--tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c44
-rw-r--r--tools/testing/selftests/kvm/x86_64/mmio_warning_test.c16
-rw-r--r--tools/testing/selftests/kvm/x86_64/mmu_role_test.c147
-rw-r--r--tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c131
-rw-r--r--tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c269
-rwxr-xr-xtools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh59
-rw-r--r--tools/testing/selftests/kvm/x86_64/platform_info_test.c51
-rw-r--r--tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c117
-rw-r--r--tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c95
-rw-r--r--tools/testing/selftests/kvm/x86_64/set_sregs_test.c75
-rw-r--r--tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c131
-rw-r--r--tools/testing/selftests/kvm/x86_64/smm_test.c46
-rw-r--r--tools/testing/selftests/kvm/x86_64/state_test.c39
-rw-r--r--tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c25
-rw-r--r--tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c211
-rw-r--r--tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c20
-rw-r--r--tools/testing/selftests/kvm/x86_64/sync_regs_test.c62
-rw-r--r--tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c90
-rw-r--r--tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c39
-rw-r--r--tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c25
-rw-r--r--tools/testing/selftests/kvm/x86_64/ucna_injection_test.c316
-rw-r--r--tools/testing/selftests/kvm/x86_64/userspace_io_test.c22
-rw-r--r--tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c188
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c32
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c21
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c18
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c68
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c22
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c84
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c33
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c54
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c38
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c105
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c17
-rw-r--r--tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c48
-rw-r--r--tools/testing/selftests/kvm/x86_64/xapic_state_test.c82
-rw-r--r--tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c75
-rw-r--r--tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c27
-rw-r--r--tools/testing/selftests/kvm/x86_64/xss_msr_test.c56
-rw-r--r--tools/testing/selftests/landlock/Makefile10
-rw-r--r--tools/testing/selftests/lib.mk38
-rw-r--r--tools/testing/selftests/net/.gitignore2
-rw-r--r--tools/testing/selftests/net/Makefile7
-rw-r--r--tools/testing/selftests/net/af_unix/Makefile3
-rw-r--r--tools/testing/selftests/net/af_unix/unix_connect.c148
-rwxr-xr-xtools/testing/selftests/net/arp_ndisc_untracked_subnets.sh308
-rw-r--r--tools/testing/selftests/net/cmsg_sender.c2
-rwxr-xr-xtools/testing/selftests/net/fib_nexthop_nongw.sh119
-rwxr-xr-xtools/testing/selftests/net/fib_rule_tests.sh23
-rw-r--r--tools/testing/selftests/net/forwarding/Makefile3
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_mdb_port_down.sh118
-rwxr-xr-xtools/testing/selftests/net/forwarding/ethtool_extended_state.sh43
-rwxr-xr-xtools/testing/selftests/net/forwarding/lib.sh6
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh7
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_asymmetric.sh2
-rw-r--r--tools/testing/selftests/net/io_uring_zerocopy_tx.c605
-rwxr-xr-xtools/testing/selftests/net/io_uring_zerocopy_tx.sh131
-rwxr-xr-xtools/testing/selftests/net/ioam6.sh12
-rw-r--r--tools/testing/selftests/net/ipv6_flowlabel.c75
-rwxr-xr-xtools/testing/selftests/net/ipv6_flowlabel.sh16
-rw-r--r--tools/testing/selftests/net/mptcp/Makefile3
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh116
-rw-r--r--tools/testing/selftests/net/mptcp/pm_nl_ctl.c75
-rwxr-xr-xtools/testing/selftests/net/mptcp/simult_flows.sh14
-rwxr-xr-xtools/testing/selftests/net/mptcp/userspace_pm.sh72
-rwxr-xr-xtools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh879
-rwxr-xr-xtools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh821
-rw-r--r--tools/testing/selftests/net/tls.c124
-rwxr-xr-xtools/testing/selftests/net/udpgro.sh2
-rwxr-xr-xtools/testing/selftests/net/udpgro_bench.sh2
-rwxr-xr-xtools/testing/selftests/net/udpgro_frglist.sh2
-rwxr-xr-xtools/testing/selftests/net/udpgro_fwd.sh2
-rwxr-xr-xtools/testing/selftests/net/veth.sh6
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-check-branches.sh11
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-remote.sh1
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm.sh6
-rw-r--r--tools/testing/selftests/rseq/rseq-riscv.h50
-rw-r--r--tools/testing/selftests/rseq/rseq.c3
-rw-r--r--tools/testing/selftests/safesetid/Makefile2
-rw-r--r--tools/testing/selftests/safesetid/safesetid-test.c295
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c2
-rw-r--r--tools/testing/selftests/sync/config1
-rw-r--r--tools/testing/selftests/tc-testing/.gitignore1
-rw-r--r--tools/testing/selftests/tc-testing/Makefile1
-rw-r--r--tools/testing/selftests/timens/Makefile2
-rw-r--r--tools/testing/selftests/timens/vfork_exec.c90
-rw-r--r--tools/testing/selftests/timers/adjtick.c2
-rw-r--r--tools/testing/selftests/timers/alarmtimer-suspend.c2
-rw-r--r--tools/testing/selftests/timers/change_skew.c2
-rw-r--r--tools/testing/selftests/timers/clocksource-switch.c71
-rw-r--r--tools/testing/selftests/timers/inconsistency-check.c32
-rw-r--r--tools/testing/selftests/timers/nanosleep.c18
-rw-r--r--tools/testing/selftests/timers/raw_skew.c2
-rw-r--r--tools/testing/selftests/timers/skew_consistency.c2
-rw-r--r--tools/testing/selftests/timers/valid-adjtimex.c2
-rw-r--r--tools/testing/selftests/tpm2/settings1
-rw-r--r--tools/testing/selftests/vm/Makefile1
-rw-r--r--tools/testing/selftests/vm/userfaultfd.c2
-rw-r--r--tools/testing/selftests/wireguard/qemu/Makefile37
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/arm.config1
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/armeb.config1
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/i686.config8
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/m68k.config10
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/mips.config1
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/mipsel.config1
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/powerpc.config1
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/um.config3
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/x86_64.config7
-rw-r--r--tools/testing/selftests/wireguard/qemu/debug.config5
-rw-r--r--tools/testing/selftests/wireguard/qemu/init.c11
-rw-r--r--tools/testing/selftests/wireguard/qemu/kernel.config2
-rw-r--r--tools/thermal/tmon/pid.c2
-rw-r--r--tools/thermal/tmon/tmon.h3
-rw-r--r--tools/tracing/rtla/Makefile2
-rw-r--r--tools/tracing/rtla/src/trace.c9
-rw-r--r--tools/tracing/rtla/src/utils.c7
-rw-r--r--tools/usb/testusb.c18
-rw-r--r--tools/vm/slabinfo.c26
416 files changed, 21306 insertions, 10262 deletions
diff --git a/tools/Makefile b/tools/Makefile
index c074e42fd92f..e497875fc7e3 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -78,6 +78,9 @@ bpf/%: FORCE
libapi: FORCE
$(call descend,lib/api)
+nolibc: FORCE
+ $(call descend,include/nolibc)
+
nolibc_%: FORCE
$(call descend,include/nolibc,$(patsubst nolibc_%,%,$@))
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 03acc823838a..a77b915d36a8 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -203,8 +203,8 @@
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
#define X86_FEATURE_XCOMPACTED ( 7*32+10) /* "" Use compacted XSTATE (XSAVES or XSAVEC) */
#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
-#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
-#define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */
+#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */
+#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* "" Fill RSB on VM-Exit */
#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
#define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */
#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
@@ -296,6 +296,13 @@
#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */
#define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */
#define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */
+#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* "" Issue an IBPB on kernel entry */
+#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* "" RET prediction control */
+#define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
+#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */
+#define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */
+#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */
+#define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
@@ -316,6 +323,7 @@
#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
#define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */
+#define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */
#define X86_FEATURE_BRS (13*32+31) /* Branch Sampling available */
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
@@ -447,5 +455,6 @@
#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
#define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */
#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */
+#define X86_BUG_RETBLEED X86_BUG(26) /* CPU is affected by RETBleed */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h
index 36369e76cc63..33d2cd04d254 100644
--- a/tools/arch/x86/include/asm/disabled-features.h
+++ b/tools/arch/x86/include/asm/disabled-features.h
@@ -50,6 +50,25 @@
# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31))
#endif
+#ifdef CONFIG_RETPOLINE
+# define DISABLE_RETPOLINE 0
+#else
+# define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \
+ (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)))
+#endif
+
+#ifdef CONFIG_RETHUNK
+# define DISABLE_RETHUNK 0
+#else
+# define DISABLE_RETHUNK (1 << (X86_FEATURE_RETHUNK & 31))
+#endif
+
+#ifdef CONFIG_CPU_UNRET_ENTRY
+# define DISABLE_UNRET 0
+#else
+# define DISABLE_UNRET (1 << (X86_FEATURE_UNRET & 31))
+#endif
+
#ifdef CONFIG_INTEL_IOMMU_SVM
# define DISABLE_ENQCMD 0
#else
@@ -82,7 +101,7 @@
#define DISABLED_MASK8 (DISABLE_TDX_GUEST)
#define DISABLED_MASK9 (DISABLE_SGX)
#define DISABLED_MASK10 0
-#define DISABLED_MASK11 0
+#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET)
#define DISABLED_MASK12 0
#define DISABLED_MASK13 0
#define DISABLED_MASK14 0
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index d27e0581b777..cc615be27a54 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -51,6 +51,8 @@
#define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */
#define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */
#define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
+#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */
+#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT)
#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
#define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */
@@ -93,6 +95,7 @@
#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a
#define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */
#define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */
+#define ARCH_CAP_RSBA BIT(2) /* RET may use alternative branch predictors */
#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */
#define ARCH_CAP_SSB_NO BIT(4) /*
* Not susceptible to Speculative Store Bypass
@@ -140,6 +143,13 @@
* bit available to control VERW
* behavior.
*/
+#define ARCH_CAP_RRSBA BIT(19) /*
+ * Indicates RET may use predictors
+ * other than the RSB. With eIBRS
+ * enabled predictions in kernel mode
+ * are restricted to targets in
+ * kernel.
+ */
#define MSR_IA32_FLUSH_CMD 0x0000010b
#define L1D_FLUSH BIT(0) /*
@@ -567,6 +577,9 @@
/* Fam 17h MSRs */
#define MSR_F17H_IRPERF 0xc00000e9
+#define MSR_ZEN2_SPECTRAL_CHICKEN 0xc00110e3
+#define MSR_ZEN2_SPECTRAL_CHICKEN_BIT BIT_ULL(1)
+
/* Fam 16h MSRs */
#define MSR_F16H_L2I_PERF_CTL 0xc0010230
#define MSR_F16H_L2I_PERF_CTR 0xc0010231
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index 21614807a2cb..ec53c9fa1da9 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -198,13 +198,13 @@ struct kvm_msrs {
__u32 nmsrs; /* number of msrs in entries */
__u32 pad;
- struct kvm_msr_entry entries[0];
+ struct kvm_msr_entry entries[];
};
/* for KVM_GET_MSR_INDEX_LIST */
struct kvm_msr_list {
__u32 nmsrs; /* number of msrs in entries */
- __u32 indices[0];
+ __u32 indices[];
};
/* Maximum size of any access bitmap in bytes */
@@ -241,7 +241,7 @@ struct kvm_cpuid_entry {
struct kvm_cpuid {
__u32 nent;
__u32 padding;
- struct kvm_cpuid_entry entries[0];
+ struct kvm_cpuid_entry entries[];
};
struct kvm_cpuid_entry2 {
@@ -263,7 +263,7 @@ struct kvm_cpuid_entry2 {
struct kvm_cpuid2 {
__u32 nent;
__u32 padding;
- struct kvm_cpuid_entry2 entries[0];
+ struct kvm_cpuid_entry2 entries[];
};
/* for KVM_GET_PIT and KVM_SET_PIT */
@@ -389,7 +389,7 @@ struct kvm_xsave {
* the contents of CPUID leaf 0xD on the host.
*/
__u32 region[1024];
- __u32 extra[0];
+ __u32 extra[];
};
#define KVM_MAX_XCRS 16
@@ -516,7 +516,7 @@ struct kvm_pmu_event_filter {
__u32 fixed_counter_bitmap;
__u32 flags;
__u32 pad[4];
- __u64 events[0];
+ __u64 events[];
};
#define KVM_PMU_EVENT_ALLOW 0
diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
index a17e9aa314fd..bd015ec9847b 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
@@ -31,11 +31,17 @@ CGROUP COMMANDS
| **bpftool** **cgroup help**
|
| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* }
-| *ATTACH_TYPE* := { **ingress** | **egress** | **sock_create** | **sock_ops** | **device** |
-| **bind4** | **bind6** | **post_bind4** | **post_bind6** | **connect4** | **connect6** |
-| **getpeername4** | **getpeername6** | **getsockname4** | **getsockname6** | **sendmsg4** |
-| **sendmsg6** | **recvmsg4** | **recvmsg6** | **sysctl** | **getsockopt** | **setsockopt** |
-| **sock_release** }
+| *ATTACH_TYPE* := { **cgroup_inet_ingress** | **cgroup_inet_egress** |
+| **cgroup_inet_sock_create** | **cgroup_sock_ops** |
+| **cgroup_device** | **cgroup_inet4_bind** | **cgroup_inet6_bind** |
+| **cgroup_inet4_post_bind** | **cgroup_inet6_post_bind** |
+| **cgroup_inet4_connect** | **cgroup_inet6_connect** |
+| **cgroup_inet4_getpeername** | **cgroup_inet6_getpeername** |
+| **cgroup_inet4_getsockname** | **cgroup_inet6_getsockname** |
+| **cgroup_udp4_sendmsg** | **cgroup_udp6_sendmsg** |
+| **cgroup_udp4_recvmsg** | **cgroup_udp6_recvmsg** |
+| **cgroup_sysctl** | **cgroup_getsockopt** | **cgroup_setsockopt** |
+| **cgroup_inet_sock_release** }
| *ATTACH_FLAGS* := { **multi** | **override** }
DESCRIPTION
diff --git a/tools/bpf/bpftool/Documentation/bpftool-feature.rst b/tools/bpf/bpftool/Documentation/bpftool-feature.rst
index 4ce9a77bc1e0..e44039f89be7 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-feature.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-feature.rst
@@ -24,9 +24,11 @@ FEATURE COMMANDS
================
| **bpftool** **feature probe** [*COMPONENT*] [**full**] [**unprivileged**] [**macros** [**prefix** *PREFIX*]]
+| **bpftool** **feature list_builtins** *GROUP*
| **bpftool** **feature help**
|
| *COMPONENT* := { **kernel** | **dev** *NAME* }
+| *GROUP* := { **prog_types** | **map_types** | **attach_types** | **link_types** | **helpers** }
DESCRIPTION
===========
@@ -70,6 +72,16 @@ DESCRIPTION
The keywords **full**, **macros** and **prefix** have the
same role as when probing the kernel.
+ **bpftool feature list_builtins** *GROUP*
+ List items known to bpftool. These can be BPF program types
+ (**prog_types**), BPF map types (**map_types**), attach types
+ (**attach_types**), link types (**link_types**), or BPF helper
+ functions (**helpers**). The command does not probe the system, but
+ simply lists the elements that bpftool knows from compilation time,
+ as provided from libbpf (for all object types) or from the BPF UAPI
+ header (list of helpers). This can be used in scripts to iterate over
+ BPF types or helpers.
+
**bpftool feature help**
Print short help message.
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index a2e9359e554c..eb1b2a254eb1 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -53,8 +53,9 @@ PROG COMMANDS
| **cgroup/getsockopt** | **cgroup/setsockopt** | **cgroup/sock_release** |
| **struct_ops** | **fentry** | **fexit** | **freplace** | **sk_lookup**
| }
-| *ATTACH_TYPE* := {
-| **msg_verdict** | **skb_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector**
+| *ATTACH_TYPE* := {
+| **sk_msg_verdict** | **sk_skb_verdict** | **sk_skb_stream_verdict** |
+| **sk_skb_stream_parser** | **flow_dissector**
| }
| *METRICs* := {
| **cycles** | **instructions** | **l1d_loads** | **llc_misses** |
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index c6d2c77d0252..6b5b3a99f79d 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -53,7 +53,7 @@ $(LIBBPF_INTERNAL_HDRS): $(LIBBPF_HDRS_DIR)/%.h: $(BPF_DIR)/%.h | $(LIBBPF_HDRS_
$(LIBBPF_BOOTSTRAP): $(wildcard $(BPF_DIR)/*.[ch] $(BPF_DIR)/Makefile) | $(LIBBPF_BOOTSTRAP_OUTPUT)
$(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_BOOTSTRAP_OUTPUT) \
DESTDIR=$(LIBBPF_BOOTSTRAP_DESTDIR:/=) prefix= \
- ARCH= CROSS_COMPILE= CC=$(HOSTCC) LD=$(HOSTLD) $@ install_headers
+ ARCH= CROSS_COMPILE= CC=$(HOSTCC) LD=$(HOSTLD) AR=$(HOSTAR) $@ install_headers
$(LIBBPF_BOOTSTRAP_INTERNAL_HDRS): $(LIBBPF_BOOTSTRAP_HDRS_DIR)/%.h: $(BPF_DIR)/%.h | $(LIBBPF_BOOTSTRAP_HDRS_DIR)
$(call QUIET_INSTALL, $@)
@@ -93,10 +93,8 @@ INSTALL ?= install
RM ?= rm -f
FEATURE_USER = .bpftool
-FEATURE_TESTS = libbfd disassembler-four-args zlib libcap \
- clang-bpf-co-re
-FEATURE_DISPLAY = libbfd disassembler-four-args zlib libcap \
- clang-bpf-co-re
+FEATURE_TESTS = libbfd disassembler-four-args libcap clang-bpf-co-re
+FEATURE_DISPLAY = libbfd disassembler-four-args libcap clang-bpf-co-re
check_feat := 1
NON_CHECK_FEAT_TARGETS := clean uninstall doc doc-clean doc-install doc-uninstall
@@ -204,11 +202,6 @@ $(BOOTSTRAP_OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c
$(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c
$(QUIET_CC)$(CC) $(CFLAGS) -c -MMD $< -o $@
-$(OUTPUT)feature.o:
-ifneq ($(feature-zlib), 1)
- $(error "No zlib found")
-endif
-
$(BPFTOOL_BOOTSTRAP): $(BOOTSTRAP_OBJS) $(LIBBPF_BOOTSTRAP)
$(QUIET_LINK)$(HOSTCC) $(HOST_CFLAGS) $(LDFLAGS) $(BOOTSTRAP_OBJS) $(LIBS_BOOTSTRAP) -o $@
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 5df8d72c5179..dc1641e3670e 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -407,8 +407,8 @@ _bpftool()
return 0
;;
5)
- local BPFTOOL_PROG_ATTACH_TYPES='msg_verdict \
- skb_verdict stream_verdict stream_parser \
+ local BPFTOOL_PROG_ATTACH_TYPES='sk_msg_verdict \
+ sk_skb_verdict sk_skb_stream_verdict sk_skb_stream_parser \
flow_dissector'
COMPREPLY=( $( compgen -W "$BPFTOOL_PROG_ATTACH_TYPES" -- "$cur" ) )
return 0
@@ -703,15 +703,8 @@ _bpftool()
return 0
;;
type)
- local BPFTOOL_MAP_CREATE_TYPES='hash array \
- prog_array perf_event_array percpu_hash \
- percpu_array stack_trace cgroup_array lru_hash \
- lru_percpu_hash lpm_trie array_of_maps \
- hash_of_maps devmap devmap_hash sockmap cpumap \
- xskmap sockhash cgroup_storage reuseport_sockarray \
- percpu_cgroup_storage queue stack sk_storage \
- struct_ops ringbuf inode_storage task_storage \
- bloom_filter'
+ local BPFTOOL_MAP_CREATE_TYPES="$(bpftool feature list_builtins map_types 2>/dev/null | \
+ grep -v '^unspec$')"
COMPREPLY=( $( compgen -W "$BPFTOOL_MAP_CREATE_TYPES" -- "$cur" ) )
return 0
;;
@@ -1039,12 +1032,8 @@ _bpftool()
return 0
;;
attach|detach)
- local BPFTOOL_CGROUP_ATTACH_TYPES='ingress egress \
- sock_create sock_ops device \
- bind4 bind6 post_bind4 post_bind6 connect4 connect6 \
- getpeername4 getpeername6 getsockname4 getsockname6 \
- sendmsg4 sendmsg6 recvmsg4 recvmsg6 sysctl getsockopt \
- setsockopt sock_release'
+ local BPFTOOL_CGROUP_ATTACH_TYPES="$(bpftool feature list_builtins attach_types 2>/dev/null | \
+ grep '^cgroup_')"
local ATTACH_FLAGS='multi override'
local PROG_TYPE='id pinned tag name'
# Check for $prev = $command first
@@ -1173,9 +1162,14 @@ _bpftool()
_bpftool_once_attr 'full unprivileged'
return 0
;;
+ list_builtins)
+ [[ $prev != "$command" ]] && return 0
+ COMPREPLY=( $( compgen -W 'prog_types map_types \
+ attach_types link_types helpers' -- "$cur" ) )
+ ;;
*)
[[ $prev == $object ]] && \
- COMPREPLY=( $( compgen -W 'help probe' -- "$cur" ) )
+ COMPREPLY=( $( compgen -W 'help list_builtins probe' -- "$cur" ) )
;;
esac
;;
diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index 7e6accb9d9f7..0744bd1150be 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -40,6 +40,7 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = {
[BTF_KIND_FLOAT] = "FLOAT",
[BTF_KIND_DECL_TAG] = "DECL_TAG",
[BTF_KIND_TYPE_TAG] = "TYPE_TAG",
+ [BTF_KIND_ENUM64] = "ENUM64",
};
struct btf_attach_point {
@@ -212,26 +213,76 @@ static int dump_btf_type(const struct btf *btf, __u32 id,
case BTF_KIND_ENUM: {
const struct btf_enum *v = (const void *)(t + 1);
__u16 vlen = BTF_INFO_VLEN(t->info);
+ const char *encoding;
int i;
+ encoding = btf_kflag(t) ? "SIGNED" : "UNSIGNED";
if (json_output) {
+ jsonw_string_field(w, "encoding", encoding);
jsonw_uint_field(w, "size", t->size);
jsonw_uint_field(w, "vlen", vlen);
jsonw_name(w, "values");
jsonw_start_array(w);
} else {
- printf(" size=%u vlen=%u", t->size, vlen);
+ printf(" encoding=%s size=%u vlen=%u", encoding, t->size, vlen);
+ }
+ for (i = 0; i < vlen; i++, v++) {
+ const char *name = btf_str(btf, v->name_off);
+
+ if (json_output) {
+ jsonw_start_object(w);
+ jsonw_string_field(w, "name", name);
+ if (btf_kflag(t))
+ jsonw_int_field(w, "val", v->val);
+ else
+ jsonw_uint_field(w, "val", v->val);
+ jsonw_end_object(w);
+ } else {
+ if (btf_kflag(t))
+ printf("\n\t'%s' val=%d", name, v->val);
+ else
+ printf("\n\t'%s' val=%u", name, v->val);
+ }
+ }
+ if (json_output)
+ jsonw_end_array(w);
+ break;
+ }
+ case BTF_KIND_ENUM64: {
+ const struct btf_enum64 *v = btf_enum64(t);
+ __u16 vlen = btf_vlen(t);
+ const char *encoding;
+ int i;
+
+ encoding = btf_kflag(t) ? "SIGNED" : "UNSIGNED";
+ if (json_output) {
+ jsonw_string_field(w, "encoding", encoding);
+ jsonw_uint_field(w, "size", t->size);
+ jsonw_uint_field(w, "vlen", vlen);
+ jsonw_name(w, "values");
+ jsonw_start_array(w);
+ } else {
+ printf(" encoding=%s size=%u vlen=%u", encoding, t->size, vlen);
}
for (i = 0; i < vlen; i++, v++) {
const char *name = btf_str(btf, v->name_off);
+ __u64 val = ((__u64)v->val_hi32 << 32) | v->val_lo32;
if (json_output) {
jsonw_start_object(w);
jsonw_string_field(w, "name", name);
- jsonw_uint_field(w, "val", v->val);
+ if (btf_kflag(t))
+ jsonw_int_field(w, "val", val);
+ else
+ jsonw_uint_field(w, "val", val);
jsonw_end_object(w);
} else {
- printf("\n\t'%s' val=%u", name, v->val);
+ if (btf_kflag(t))
+ printf("\n\t'%s' val=%lldLL", name,
+ (unsigned long long)val);
+ else
+ printf("\n\t'%s' val=%lluULL", name,
+ (unsigned long long)val);
}
}
if (json_output)
diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c
index f5dddf8ef404..125798b0bc5d 100644
--- a/tools/bpf/bpftool/btf_dumper.c
+++ b/tools/bpf/bpftool/btf_dumper.c
@@ -182,6 +182,32 @@ static int btf_dumper_enum(const struct btf_dumper *d,
return 0;
}
+static int btf_dumper_enum64(const struct btf_dumper *d,
+ const struct btf_type *t,
+ const void *data)
+{
+ const struct btf_enum64 *enums = btf_enum64(t);
+ __u32 val_lo32, val_hi32;
+ __u64 value;
+ __u16 i;
+
+ value = *(__u64 *)data;
+ val_lo32 = (__u32)value;
+ val_hi32 = value >> 32;
+
+ for (i = 0; i < btf_vlen(t); i++) {
+ if (val_lo32 == enums[i].val_lo32 && val_hi32 == enums[i].val_hi32) {
+ jsonw_string(d->jw,
+ btf__name_by_offset(d->btf,
+ enums[i].name_off));
+ return 0;
+ }
+ }
+
+ jsonw_int(d->jw, value);
+ return 0;
+}
+
static bool is_str_array(const struct btf *btf, const struct btf_array *arr,
const char *s)
{
@@ -542,6 +568,8 @@ static int btf_dumper_do_type(const struct btf_dumper *d, __u32 type_id,
return btf_dumper_array(d, type_id, data);
case BTF_KIND_ENUM:
return btf_dumper_enum(d, t, data);
+ case BTF_KIND_ENUM64:
+ return btf_dumper_enum64(d, t, data);
case BTF_KIND_PTR:
btf_dumper_ptr(d, t, data);
return 0;
@@ -618,6 +646,7 @@ static int __btf_dumper_type_only(const struct btf *btf, __u32 type_id,
btf__name_by_offset(btf, t->name_off));
break;
case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
BTF_PRINT_ARG("enum %s ",
btf__name_by_offset(btf, t->name_off));
break;
diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
index effe136119d7..cced668fb2a3 100644
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -15,43 +15,92 @@
#include <unistd.h>
#include <bpf/bpf.h>
+#include <bpf/btf.h>
#include "main.h"
#define HELP_SPEC_ATTACH_FLAGS \
"ATTACH_FLAGS := { multi | override }"
-#define HELP_SPEC_ATTACH_TYPES \
- " ATTACH_TYPE := { ingress | egress | sock_create |\n" \
- " sock_ops | device | bind4 | bind6 |\n" \
- " post_bind4 | post_bind6 | connect4 |\n" \
- " connect6 | getpeername4 | getpeername6 |\n" \
- " getsockname4 | getsockname6 | sendmsg4 |\n" \
- " sendmsg6 | recvmsg4 | recvmsg6 |\n" \
- " sysctl | getsockopt | setsockopt |\n" \
- " sock_release }"
+#define HELP_SPEC_ATTACH_TYPES \
+ " ATTACH_TYPE := { cgroup_inet_ingress | cgroup_inet_egress |\n" \
+ " cgroup_inet_sock_create | cgroup_sock_ops |\n" \
+ " cgroup_device | cgroup_inet4_bind |\n" \
+ " cgroup_inet6_bind | cgroup_inet4_post_bind |\n" \
+ " cgroup_inet6_post_bind | cgroup_inet4_connect |\n" \
+ " cgroup_inet6_connect | cgroup_inet4_getpeername |\n" \
+ " cgroup_inet6_getpeername | cgroup_inet4_getsockname |\n" \
+ " cgroup_inet6_getsockname | cgroup_udp4_sendmsg |\n" \
+ " cgroup_udp6_sendmsg | cgroup_udp4_recvmsg |\n" \
+ " cgroup_udp6_recvmsg | cgroup_sysctl |\n" \
+ " cgroup_getsockopt | cgroup_setsockopt |\n" \
+ " cgroup_inet_sock_release }"
static unsigned int query_flags;
+static struct btf *btf_vmlinux;
+static __u32 btf_vmlinux_id;
static enum bpf_attach_type parse_attach_type(const char *str)
{
+ const char *attach_type_str;
enum bpf_attach_type type;
- for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) {
- if (attach_type_name[type] &&
- is_prefix(str, attach_type_name[type]))
+ for (type = 0; ; type++) {
+ attach_type_str = libbpf_bpf_attach_type_str(type);
+ if (!attach_type_str)
+ break;
+ if (!strcmp(str, attach_type_str))
+ return type;
+ }
+
+ /* Also check traditionally used attach type strings. For these we keep
+ * allowing prefixed usage.
+ */
+ for (type = 0; ; type++) {
+ attach_type_str = bpf_attach_type_input_str(type);
+ if (!attach_type_str)
+ break;
+ if (is_prefix(str, attach_type_str))
return type;
}
return __MAX_BPF_ATTACH_TYPE;
}
+static void guess_vmlinux_btf_id(__u32 attach_btf_obj_id)
+{
+ struct bpf_btf_info btf_info = {};
+ __u32 btf_len = sizeof(btf_info);
+ char name[16] = {};
+ int err;
+ int fd;
+
+ btf_info.name = ptr_to_u64(name);
+ btf_info.name_len = sizeof(name);
+
+ fd = bpf_btf_get_fd_by_id(attach_btf_obj_id);
+ if (fd < 0)
+ return;
+
+ err = bpf_obj_get_info_by_fd(fd, &btf_info, &btf_len);
+ if (err)
+ goto out;
+
+ if (btf_info.kernel_btf && strncmp(name, "vmlinux", sizeof(name)) == 0)
+ btf_vmlinux_id = btf_info.id;
+
+out:
+ close(fd);
+}
+
static int show_bpf_prog(int id, enum bpf_attach_type attach_type,
const char *attach_flags_str,
int level)
{
char prog_name[MAX_PROG_FULL_NAME];
+ const char *attach_btf_name = NULL;
struct bpf_prog_info info = {};
+ const char *attach_type_str;
__u32 info_len = sizeof(info);
int prog_fd;
@@ -64,26 +113,50 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type,
return -1;
}
+ attach_type_str = libbpf_bpf_attach_type_str(attach_type);
+
+ if (btf_vmlinux) {
+ if (!btf_vmlinux_id)
+ guess_vmlinux_btf_id(info.attach_btf_obj_id);
+
+ if (btf_vmlinux_id == info.attach_btf_obj_id &&
+ info.attach_btf_id < btf__type_cnt(btf_vmlinux)) {
+ const struct btf_type *t =
+ btf__type_by_id(btf_vmlinux, info.attach_btf_id);
+ attach_btf_name =
+ btf__name_by_offset(btf_vmlinux, t->name_off);
+ }
+ }
+
get_prog_full_name(&info, prog_fd, prog_name, sizeof(prog_name));
if (json_output) {
jsonw_start_object(json_wtr);
jsonw_uint_field(json_wtr, "id", info.id);
- if (attach_type < ARRAY_SIZE(attach_type_name))
- jsonw_string_field(json_wtr, "attach_type",
- attach_type_name[attach_type]);
+ if (attach_type_str)
+ jsonw_string_field(json_wtr, "attach_type", attach_type_str);
else
jsonw_uint_field(json_wtr, "attach_type", attach_type);
jsonw_string_field(json_wtr, "attach_flags",
attach_flags_str);
jsonw_string_field(json_wtr, "name", prog_name);
+ if (attach_btf_name)
+ jsonw_string_field(json_wtr, "attach_btf_name", attach_btf_name);
+ jsonw_uint_field(json_wtr, "attach_btf_obj_id", info.attach_btf_obj_id);
+ jsonw_uint_field(json_wtr, "attach_btf_id", info.attach_btf_id);
jsonw_end_object(json_wtr);
} else {
printf("%s%-8u ", level ? " " : "", info.id);
- if (attach_type < ARRAY_SIZE(attach_type_name))
- printf("%-15s", attach_type_name[attach_type]);
+ if (attach_type_str)
+ printf("%-15s", attach_type_str);
else
printf("type %-10u", attach_type);
- printf(" %-15s %-15s\n", attach_flags_str, prog_name);
+ printf(" %-15s %-15s", attach_flags_str, prog_name);
+ if (attach_btf_name)
+ printf(" %-15s", attach_btf_name);
+ else if (info.attach_btf_id)
+ printf(" attach_btf_obj_id=%d attach_btf_id=%d",
+ info.attach_btf_obj_id, info.attach_btf_id);
+ printf("\n");
}
close(prog_fd);
@@ -125,40 +198,49 @@ static int cgroup_has_attached_progs(int cgroup_fd)
static int show_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type,
int level)
{
+ LIBBPF_OPTS(bpf_prog_query_opts, p);
+ __u32 prog_attach_flags[1024] = {0};
const char *attach_flags_str;
__u32 prog_ids[1024] = {0};
- __u32 prog_cnt, iter;
- __u32 attach_flags;
char buf[32];
+ __u32 iter;
int ret;
- prog_cnt = ARRAY_SIZE(prog_ids);
- ret = bpf_prog_query(cgroup_fd, type, query_flags, &attach_flags,
- prog_ids, &prog_cnt);
+ p.query_flags = query_flags;
+ p.prog_cnt = ARRAY_SIZE(prog_ids);
+ p.prog_ids = prog_ids;
+ p.prog_attach_flags = prog_attach_flags;
+
+ ret = bpf_prog_query_opts(cgroup_fd, type, &p);
if (ret)
return ret;
- if (prog_cnt == 0)
+ if (p.prog_cnt == 0)
return 0;
- switch (attach_flags) {
- case BPF_F_ALLOW_MULTI:
- attach_flags_str = "multi";
- break;
- case BPF_F_ALLOW_OVERRIDE:
- attach_flags_str = "override";
- break;
- case 0:
- attach_flags_str = "";
- break;
- default:
- snprintf(buf, sizeof(buf), "unknown(%x)", attach_flags);
- attach_flags_str = buf;
- }
+ for (iter = 0; iter < p.prog_cnt; iter++) {
+ __u32 attach_flags;
+
+ attach_flags = prog_attach_flags[iter] ?: p.attach_flags;
+
+ switch (attach_flags) {
+ case BPF_F_ALLOW_MULTI:
+ attach_flags_str = "multi";
+ break;
+ case BPF_F_ALLOW_OVERRIDE:
+ attach_flags_str = "override";
+ break;
+ case 0:
+ attach_flags_str = "";
+ break;
+ default:
+ snprintf(buf, sizeof(buf), "unknown(%x)", attach_flags);
+ attach_flags_str = buf;
+ }
- for (iter = 0; iter < prog_cnt; iter++)
show_bpf_prog(prog_ids[iter], type,
attach_flags_str, level);
+ }
return 0;
}
@@ -214,6 +296,7 @@ static int do_show(int argc, char **argv)
printf("%-8s %-15s %-15s %-15s\n", "ID", "AttachType",
"AttachFlags", "Name");
+ btf_vmlinux = libbpf_find_kernel_btf();
for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) {
/*
* Not all attach types may be supported, so it's expected,
@@ -277,6 +360,7 @@ static int do_show_tree_fn(const char *fpath, const struct stat *sb,
printf("%s\n", fpath);
}
+ btf_vmlinux = libbpf_find_kernel_btf();
for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++)
show_attached_bpf_progs(cgroup_fd, type, ftw->level);
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index c740142c24d8..067e9ea59e3b 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -13,13 +13,17 @@
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
-#include <linux/limits.h>
-#include <linux/magic.h>
#include <net/if.h>
#include <sys/mount.h>
+#include <sys/resource.h>
#include <sys/stat.h>
#include <sys/vfs.h>
+#include <linux/filter.h>
+#include <linux/limits.h>
+#include <linux/magic.h>
+#include <linux/unistd.h>
+
#include <bpf/bpf.h>
#include <bpf/hashmap.h>
#include <bpf/libbpf.h> /* libbpf_num_possible_cpus */
@@ -31,52 +35,6 @@
#define BPF_FS_MAGIC 0xcafe4a11
#endif
-const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = {
- [BPF_CGROUP_INET_INGRESS] = "ingress",
- [BPF_CGROUP_INET_EGRESS] = "egress",
- [BPF_CGROUP_INET_SOCK_CREATE] = "sock_create",
- [BPF_CGROUP_INET_SOCK_RELEASE] = "sock_release",
- [BPF_CGROUP_SOCK_OPS] = "sock_ops",
- [BPF_CGROUP_DEVICE] = "device",
- [BPF_CGROUP_INET4_BIND] = "bind4",
- [BPF_CGROUP_INET6_BIND] = "bind6",
- [BPF_CGROUP_INET4_CONNECT] = "connect4",
- [BPF_CGROUP_INET6_CONNECT] = "connect6",
- [BPF_CGROUP_INET4_POST_BIND] = "post_bind4",
- [BPF_CGROUP_INET6_POST_BIND] = "post_bind6",
- [BPF_CGROUP_INET4_GETPEERNAME] = "getpeername4",
- [BPF_CGROUP_INET6_GETPEERNAME] = "getpeername6",
- [BPF_CGROUP_INET4_GETSOCKNAME] = "getsockname4",
- [BPF_CGROUP_INET6_GETSOCKNAME] = "getsockname6",
- [BPF_CGROUP_UDP4_SENDMSG] = "sendmsg4",
- [BPF_CGROUP_UDP6_SENDMSG] = "sendmsg6",
- [BPF_CGROUP_SYSCTL] = "sysctl",
- [BPF_CGROUP_UDP4_RECVMSG] = "recvmsg4",
- [BPF_CGROUP_UDP6_RECVMSG] = "recvmsg6",
- [BPF_CGROUP_GETSOCKOPT] = "getsockopt",
- [BPF_CGROUP_SETSOCKOPT] = "setsockopt",
- [BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser",
- [BPF_SK_SKB_STREAM_VERDICT] = "sk_skb_stream_verdict",
- [BPF_SK_SKB_VERDICT] = "sk_skb_verdict",
- [BPF_SK_MSG_VERDICT] = "sk_msg_verdict",
- [BPF_LIRC_MODE2] = "lirc_mode2",
- [BPF_FLOW_DISSECTOR] = "flow_dissector",
- [BPF_TRACE_RAW_TP] = "raw_tp",
- [BPF_TRACE_FENTRY] = "fentry",
- [BPF_TRACE_FEXIT] = "fexit",
- [BPF_MODIFY_RETURN] = "mod_ret",
- [BPF_LSM_MAC] = "lsm_mac",
- [BPF_SK_LOOKUP] = "sk_lookup",
- [BPF_TRACE_ITER] = "trace_iter",
- [BPF_XDP_DEVMAP] = "xdp_devmap",
- [BPF_XDP_CPUMAP] = "xdp_cpumap",
- [BPF_XDP] = "xdp",
- [BPF_SK_REUSEPORT_SELECT] = "sk_skb_reuseport_select",
- [BPF_SK_REUSEPORT_SELECT_OR_MIGRATE] = "sk_skb_reuseport_select_or_migrate",
- [BPF_PERF_EVENT] = "perf_event",
- [BPF_TRACE_KPROBE_MULTI] = "trace_kprobe_multi",
-};
-
void p_err(const char *fmt, ...)
{
va_list ap;
@@ -118,6 +76,75 @@ static bool is_bpffs(char *path)
return (unsigned long)st_fs.f_type == BPF_FS_MAGIC;
}
+/* Probe whether kernel switched from memlock-based (RLIMIT_MEMLOCK) to
+ * memcg-based memory accounting for BPF maps and programs. This was done in
+ * commit 97306be45fbe ("Merge branch 'switch to memcg-based memory
+ * accounting'"), in Linux 5.11.
+ *
+ * Libbpf also offers to probe for memcg-based accounting vs rlimit, but does
+ * so by checking for the availability of a given BPF helper and this has
+ * failed on some kernels with backports in the past, see commit 6b4384ff1088
+ * ("Revert "bpftool: Use libbpf 1.0 API mode instead of RLIMIT_MEMLOCK"").
+ * Instead, we can probe by lowering the process-based rlimit to 0, trying to
+ * load a BPF object, and resetting the rlimit. If the load succeeds then
+ * memcg-based accounting is supported.
+ *
+ * This would be too dangerous to do in the library, because multithreaded
+ * applications might attempt to load items while the rlimit is at 0. Given
+ * that bpftool is single-threaded, this is fine to do here.
+ */
+static bool known_to_need_rlimit(void)
+{
+ struct rlimit rlim_init, rlim_cur_zero = {};
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ size_t insn_cnt = ARRAY_SIZE(insns);
+ union bpf_attr attr;
+ int prog_fd, err;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+ attr.insns = ptr_to_u64(insns);
+ attr.insn_cnt = insn_cnt;
+ attr.license = ptr_to_u64("GPL");
+
+ if (getrlimit(RLIMIT_MEMLOCK, &rlim_init))
+ return false;
+
+ /* Drop the soft limit to zero. We maintain the hard limit to its
+ * current value, because lowering it would be a permanent operation
+ * for unprivileged users.
+ */
+ rlim_cur_zero.rlim_max = rlim_init.rlim_max;
+ if (setrlimit(RLIMIT_MEMLOCK, &rlim_cur_zero))
+ return false;
+
+ /* Do not use bpf_prog_load() from libbpf here, because it calls
+ * bump_rlimit_memlock(), interfering with the current probe.
+ */
+ prog_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
+ err = errno;
+
+ /* reset soft rlimit to its initial value */
+ setrlimit(RLIMIT_MEMLOCK, &rlim_init);
+
+ if (prog_fd < 0)
+ return err == EPERM;
+
+ close(prog_fd);
+ return false;
+}
+
+void set_max_rlimit(void)
+{
+ struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
+
+ if (known_to_need_rlimit())
+ setrlimit(RLIMIT_MEMLOCK, &rinf);
+}
+
static int
mnt_fs(const char *target, const char *type, char *buff, size_t bufflen)
{
@@ -289,6 +316,7 @@ const char *get_fd_type_name(enum bpf_obj_type type)
[BPF_OBJ_UNKNOWN] = "unknown",
[BPF_OBJ_PROG] = "prog",
[BPF_OBJ_MAP] = "map",
+ [BPF_OBJ_LINK] = "link",
};
if (type < 0 || type >= ARRAY_SIZE(names) || !names[type])
@@ -1009,3 +1037,39 @@ bool equal_fn_for_key_as_id(const void *k1, const void *k2, void *ctx)
{
return k1 == k2;
}
+
+const char *bpf_attach_type_input_str(enum bpf_attach_type t)
+{
+ switch (t) {
+ case BPF_CGROUP_INET_INGRESS: return "ingress";
+ case BPF_CGROUP_INET_EGRESS: return "egress";
+ case BPF_CGROUP_INET_SOCK_CREATE: return "sock_create";
+ case BPF_CGROUP_INET_SOCK_RELEASE: return "sock_release";
+ case BPF_CGROUP_SOCK_OPS: return "sock_ops";
+ case BPF_CGROUP_DEVICE: return "device";
+ case BPF_CGROUP_INET4_BIND: return "bind4";
+ case BPF_CGROUP_INET6_BIND: return "bind6";
+ case BPF_CGROUP_INET4_CONNECT: return "connect4";
+ case BPF_CGROUP_INET6_CONNECT: return "connect6";
+ case BPF_CGROUP_INET4_POST_BIND: return "post_bind4";
+ case BPF_CGROUP_INET6_POST_BIND: return "post_bind6";
+ case BPF_CGROUP_INET4_GETPEERNAME: return "getpeername4";
+ case BPF_CGROUP_INET6_GETPEERNAME: return "getpeername6";
+ case BPF_CGROUP_INET4_GETSOCKNAME: return "getsockname4";
+ case BPF_CGROUP_INET6_GETSOCKNAME: return "getsockname6";
+ case BPF_CGROUP_UDP4_SENDMSG: return "sendmsg4";
+ case BPF_CGROUP_UDP6_SENDMSG: return "sendmsg6";
+ case BPF_CGROUP_SYSCTL: return "sysctl";
+ case BPF_CGROUP_UDP4_RECVMSG: return "recvmsg4";
+ case BPF_CGROUP_UDP6_RECVMSG: return "recvmsg6";
+ case BPF_CGROUP_GETSOCKOPT: return "getsockopt";
+ case BPF_CGROUP_SETSOCKOPT: return "setsockopt";
+ case BPF_TRACE_RAW_TP: return "raw_tp";
+ case BPF_TRACE_FENTRY: return "fentry";
+ case BPF_TRACE_FEXIT: return "fexit";
+ case BPF_MODIFY_RETURN: return "mod_ret";
+ case BPF_SK_REUSEPORT_SELECT: return "sk_skb_reuseport_select";
+ case BPF_SK_REUSEPORT_SELECT_OR_MIGRATE: return "sk_skb_reuseport_select_or_migrate";
+ default: return libbpf_bpf_attach_type_str(t);
+ }
+}
diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
index d12f46051aac..7ecabf7947fb 100644
--- a/tools/bpf/bpftool/feature.c
+++ b/tools/bpf/bpftool/feature.c
@@ -548,8 +548,8 @@ static bool probe_prog_type_ifindex(enum bpf_prog_type prog_type, __u32 ifindex)
}
static void
-probe_prog_type(enum bpf_prog_type prog_type, bool *supported_types,
- const char *define_prefix, __u32 ifindex)
+probe_prog_type(enum bpf_prog_type prog_type, const char *prog_type_str,
+ bool *supported_types, const char *define_prefix, __u32 ifindex)
{
char feat_name[128], plain_desc[128], define_name[128];
const char *plain_comment = "eBPF program_type ";
@@ -580,20 +580,16 @@ probe_prog_type(enum bpf_prog_type prog_type, bool *supported_types,
supported_types[prog_type] |= res;
- if (!prog_type_name[prog_type]) {
- p_info("program type name not found (type %d)", prog_type);
- return;
- }
maxlen = sizeof(plain_desc) - strlen(plain_comment) - 1;
- if (strlen(prog_type_name[prog_type]) > maxlen) {
+ if (strlen(prog_type_str) > maxlen) {
p_info("program type name too long");
return;
}
- sprintf(feat_name, "have_%s_prog_type", prog_type_name[prog_type]);
- sprintf(define_name, "%s_prog_type", prog_type_name[prog_type]);
+ sprintf(feat_name, "have_%s_prog_type", prog_type_str);
+ sprintf(define_name, "%s_prog_type", prog_type_str);
uppercase(define_name, sizeof(define_name));
- sprintf(plain_desc, "%s%s", plain_comment, prog_type_name[prog_type]);
+ sprintf(plain_desc, "%s%s", plain_comment, prog_type_str);
print_bool_feature(feat_name, plain_desc, define_name, res,
define_prefix);
}
@@ -619,8 +615,8 @@ static bool probe_map_type_ifindex(enum bpf_map_type map_type, __u32 ifindex)
}
static void
-probe_map_type(enum bpf_map_type map_type, const char *define_prefix,
- __u32 ifindex)
+probe_map_type(enum bpf_map_type map_type, char const *map_type_str,
+ const char *define_prefix, __u32 ifindex)
{
char feat_name[128], plain_desc[128], define_name[128];
const char *plain_comment = "eBPF map_type ";
@@ -645,20 +641,16 @@ probe_map_type(enum bpf_map_type map_type, const char *define_prefix,
* check required for unprivileged users
*/
- if (!map_type_name[map_type]) {
- p_info("map type name not found (type %d)", map_type);
- return;
- }
maxlen = sizeof(plain_desc) - strlen(plain_comment) - 1;
- if (strlen(map_type_name[map_type]) > maxlen) {
+ if (strlen(map_type_str) > maxlen) {
p_info("map type name too long");
return;
}
- sprintf(feat_name, "have_%s_map_type", map_type_name[map_type]);
- sprintf(define_name, "%s_map_type", map_type_name[map_type]);
+ sprintf(feat_name, "have_%s_map_type", map_type_str);
+ sprintf(define_name, "%s_map_type", map_type_str);
uppercase(define_name, sizeof(define_name));
- sprintf(plain_desc, "%s%s", plain_comment, map_type_name[map_type]);
+ sprintf(plain_desc, "%s%s", plain_comment, map_type_str);
print_bool_feature(feat_name, plain_desc, define_name, res,
define_prefix);
}
@@ -728,10 +720,10 @@ probe_helper_for_progtype(enum bpf_prog_type prog_type, bool supported_type,
}
static void
-probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type,
+probe_helpers_for_progtype(enum bpf_prog_type prog_type,
+ const char *prog_type_str, bool supported_type,
const char *define_prefix, __u32 ifindex)
{
- const char *ptype_name = prog_type_name[prog_type];
char feat_name[128];
unsigned int id;
bool probe_res = false;
@@ -747,12 +739,12 @@ probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type,
}
if (json_output) {
- sprintf(feat_name, "%s_available_helpers", ptype_name);
+ sprintf(feat_name, "%s_available_helpers", prog_type_str);
jsonw_name(json_wtr, feat_name);
jsonw_start_array(json_wtr);
} else if (!define_prefix) {
printf("eBPF helpers supported for program type %s:",
- ptype_name);
+ prog_type_str);
}
for (id = 1; id < ARRAY_SIZE(helper_name); id++) {
@@ -768,7 +760,7 @@ probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type,
/* fallthrough */
default:
probe_res |= probe_helper_for_progtype(prog_type, supported_type,
- define_prefix, id, ptype_name,
+ define_prefix, id, prog_type_str,
ifindex);
}
}
@@ -943,30 +935,47 @@ static void
section_program_types(bool *supported_types, const char *define_prefix,
__u32 ifindex)
{
- unsigned int i;
+ unsigned int prog_type = BPF_PROG_TYPE_UNSPEC;
+ const char *prog_type_str;
print_start_section("program_types",
"Scanning eBPF program types...",
"/*** eBPF program types ***/",
define_prefix);
- for (i = BPF_PROG_TYPE_UNSPEC + 1; i < prog_type_name_size; i++)
- probe_prog_type(i, supported_types, define_prefix, ifindex);
+ while (true) {
+ prog_type++;
+ prog_type_str = libbpf_bpf_prog_type_str(prog_type);
+ /* libbpf will return NULL for variants unknown to it. */
+ if (!prog_type_str)
+ break;
+
+ probe_prog_type(prog_type, prog_type_str, supported_types, define_prefix,
+ ifindex);
+ }
print_end_section();
}
static void section_map_types(const char *define_prefix, __u32 ifindex)
{
- unsigned int i;
+ unsigned int map_type = BPF_MAP_TYPE_UNSPEC;
+ const char *map_type_str;
print_start_section("map_types",
"Scanning eBPF map types...",
"/*** eBPF map types ***/",
define_prefix);
- for (i = BPF_MAP_TYPE_UNSPEC + 1; i < map_type_name_size; i++)
- probe_map_type(i, define_prefix, ifindex);
+ while (true) {
+ map_type++;
+ map_type_str = libbpf_bpf_map_type_str(map_type);
+ /* libbpf will return NULL for variants unknown to it. */
+ if (!map_type_str)
+ break;
+
+ probe_map_type(map_type, map_type_str, define_prefix, ifindex);
+ }
print_end_section();
}
@@ -974,7 +983,8 @@ static void section_map_types(const char *define_prefix, __u32 ifindex)
static void
section_helpers(bool *supported_types, const char *define_prefix, __u32 ifindex)
{
- unsigned int i;
+ unsigned int prog_type = BPF_PROG_TYPE_UNSPEC;
+ const char *prog_type_str;
print_start_section("helpers",
"Scanning eBPF helper functions...",
@@ -996,9 +1006,18 @@ section_helpers(bool *supported_types, const char *define_prefix, __u32 ifindex)
" %sBPF__PROG_TYPE_ ## prog_type ## __HELPER_ ## helper\n",
define_prefix, define_prefix, define_prefix,
define_prefix);
- for (i = BPF_PROG_TYPE_UNSPEC + 1; i < prog_type_name_size; i++)
- probe_helpers_for_progtype(i, supported_types[i], define_prefix,
+ while (true) {
+ prog_type++;
+ prog_type_str = libbpf_bpf_prog_type_str(prog_type);
+ /* libbpf will return NULL for variants unknown to it. */
+ if (!prog_type_str)
+ break;
+
+ probe_helpers_for_progtype(prog_type, prog_type_str,
+ supported_types[prog_type],
+ define_prefix,
ifindex);
+ }
print_end_section();
}
@@ -1148,6 +1167,8 @@ static int do_probe(int argc, char **argv)
__u32 ifindex = 0;
char *ifname;
+ set_max_rlimit();
+
while (argc) {
if (is_prefix(*argv, "kernel")) {
if (target != COMPONENT_UNSPEC) {
@@ -1237,6 +1258,58 @@ exit_close_json:
return 0;
}
+static const char *get_helper_name(unsigned int id)
+{
+ if (id >= ARRAY_SIZE(helper_name))
+ return NULL;
+
+ return helper_name[id];
+}
+
+static int do_list_builtins(int argc, char **argv)
+{
+ const char *(*get_name)(unsigned int id);
+ unsigned int id = 0;
+
+ if (argc < 1)
+ usage();
+
+ if (is_prefix(*argv, "prog_types")) {
+ get_name = (const char *(*)(unsigned int))libbpf_bpf_prog_type_str;
+ } else if (is_prefix(*argv, "map_types")) {
+ get_name = (const char *(*)(unsigned int))libbpf_bpf_map_type_str;
+ } else if (is_prefix(*argv, "attach_types")) {
+ get_name = (const char *(*)(unsigned int))libbpf_bpf_attach_type_str;
+ } else if (is_prefix(*argv, "link_types")) {
+ get_name = (const char *(*)(unsigned int))libbpf_bpf_link_type_str;
+ } else if (is_prefix(*argv, "helpers")) {
+ get_name = get_helper_name;
+ } else {
+ p_err("expected 'prog_types', 'map_types', 'attach_types', 'link_types' or 'helpers', got: %s", *argv);
+ return -1;
+ }
+
+ if (json_output)
+ jsonw_start_array(json_wtr); /* root array */
+
+ while (true) {
+ const char *name;
+
+ name = get_name(id++);
+ if (!name)
+ break;
+ if (json_output)
+ jsonw_string(json_wtr, name);
+ else
+ printf("%s\n", name);
+ }
+
+ if (json_output)
+ jsonw_end_array(json_wtr); /* root array */
+
+ return 0;
+}
+
static int do_help(int argc, char **argv)
{
if (json_output) {
@@ -1246,9 +1319,11 @@ static int do_help(int argc, char **argv)
fprintf(stderr,
"Usage: %1$s %2$s probe [COMPONENT] [full] [unprivileged] [macros [prefix PREFIX]]\n"
+ " %1$s %2$s list_builtins GROUP\n"
" %1$s %2$s help\n"
"\n"
" COMPONENT := { kernel | dev NAME }\n"
+ " GROUP := { prog_types | map_types | attach_types | link_types | helpers }\n"
" " HELP_SPEC_OPTIONS " }\n"
"",
bin_name, argv[-2]);
@@ -1257,8 +1332,9 @@ static int do_help(int argc, char **argv)
}
static const struct cmd cmds[] = {
- { "probe", do_probe },
- { "help", do_help },
+ { "probe", do_probe },
+ { "list_builtins", do_list_builtins },
+ { "help", do_help },
{ 0 }
};
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index 4c9477ff748d..7070dcffa822 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -474,6 +474,9 @@ static void codegen_asserts(struct bpf_object *obj, const char *obj_name)
const struct btf_type *sec;
char map_ident[256], var_ident[256];
+ if (!btf)
+ return;
+
codegen("\
\n\
__attribute__((unused)) static void \n\
@@ -1172,7 +1175,7 @@ static int do_skeleton(int argc, char **argv)
static inline void \n\
%1$s__detach(struct %1$s *obj) \n\
{ \n\
- return bpf_object__detach_skeleton(obj->skeleton); \n\
+ bpf_object__detach_skeleton(obj->skeleton); \n\
} \n\
",
obj_name
@@ -1747,6 +1750,7 @@ btfgen_mark_type(struct btfgen_info *info, unsigned int type_id, bool follow_poi
case BTF_KIND_INT:
case BTF_KIND_FLOAT:
case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
case BTF_KIND_STRUCT:
case BTF_KIND_UNION:
break;
@@ -1758,6 +1762,7 @@ btfgen_mark_type(struct btfgen_info *info, unsigned int type_id, bool follow_poi
}
break;
case BTF_KIND_CONST:
+ case BTF_KIND_RESTRICT:
case BTF_KIND_VOLATILE:
case BTF_KIND_TYPEDEF:
err = btfgen_mark_type(info, btf_type->type, follow_pointers);
@@ -1852,6 +1857,112 @@ static int btfgen_record_field_relo(struct btfgen_info *info, struct bpf_core_sp
return 0;
}
+/* Mark types, members, and member types. Compared to btfgen_record_field_relo,
+ * this function does not rely on the target spec for inferring members, but
+ * uses the associated BTF.
+ *
+ * The `behind_ptr` argument is used to stop marking of composite types reached
+ * through a pointer. This way, we can keep BTF size in check while providing
+ * reasonable match semantics.
+ */
+static int btfgen_mark_type_match(struct btfgen_info *info, __u32 type_id, bool behind_ptr)
+{
+ const struct btf_type *btf_type;
+ struct btf *btf = info->src_btf;
+ struct btf_type *cloned_type;
+ int i, err;
+
+ if (type_id == 0)
+ return 0;
+
+ btf_type = btf__type_by_id(btf, type_id);
+ /* mark type on cloned BTF as used */
+ cloned_type = (struct btf_type *)btf__type_by_id(info->marked_btf, type_id);
+ cloned_type->name_off = MARKED;
+
+ switch (btf_kind(btf_type)) {
+ case BTF_KIND_UNKN:
+ case BTF_KIND_INT:
+ case BTF_KIND_FLOAT:
+ case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
+ break;
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION: {
+ struct btf_member *m = btf_members(btf_type);
+ __u16 vlen = btf_vlen(btf_type);
+
+ if (behind_ptr)
+ break;
+
+ for (i = 0; i < vlen; i++, m++) {
+ /* mark member */
+ btfgen_mark_member(info, type_id, i);
+
+ /* mark member's type */
+ err = btfgen_mark_type_match(info, m->type, false);
+ if (err)
+ return err;
+ }
+ break;
+ }
+ case BTF_KIND_CONST:
+ case BTF_KIND_FWD:
+ case BTF_KIND_RESTRICT:
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_VOLATILE:
+ return btfgen_mark_type_match(info, btf_type->type, behind_ptr);
+ case BTF_KIND_PTR:
+ return btfgen_mark_type_match(info, btf_type->type, true);
+ case BTF_KIND_ARRAY: {
+ struct btf_array *array;
+
+ array = btf_array(btf_type);
+ /* mark array type */
+ err = btfgen_mark_type_match(info, array->type, false);
+ /* mark array's index type */
+ err = err ? : btfgen_mark_type_match(info, array->index_type, false);
+ if (err)
+ return err;
+ break;
+ }
+ case BTF_KIND_FUNC_PROTO: {
+ __u16 vlen = btf_vlen(btf_type);
+ struct btf_param *param;
+
+ /* mark ret type */
+ err = btfgen_mark_type_match(info, btf_type->type, false);
+ if (err)
+ return err;
+
+ /* mark parameters types */
+ param = btf_params(btf_type);
+ for (i = 0; i < vlen; i++) {
+ err = btfgen_mark_type_match(info, param->type, false);
+ if (err)
+ return err;
+ param++;
+ }
+ break;
+ }
+ /* tells if some other type needs to be handled */
+ default:
+ p_err("unsupported kind: %s (%d)", btf_kind_str(btf_type), type_id);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/* Mark types, members, and member types. Compared to btfgen_record_field_relo,
+ * this function does not rely on the target spec for inferring members, but
+ * uses the associated BTF.
+ */
+static int btfgen_record_type_match_relo(struct btfgen_info *info, struct bpf_core_spec *targ_spec)
+{
+ return btfgen_mark_type_match(info, targ_spec->root_type_id, false);
+}
+
static int btfgen_record_type_relo(struct btfgen_info *info, struct bpf_core_spec *targ_spec)
{
return btfgen_mark_type(info, targ_spec->root_type_id, true);
@@ -1878,6 +1989,8 @@ static int btfgen_record_reloc(struct btfgen_info *info, struct bpf_core_spec *r
case BPF_CORE_TYPE_EXISTS:
case BPF_CORE_TYPE_SIZE:
return btfgen_record_type_relo(info, res);
+ case BPF_CORE_TYPE_MATCHES:
+ return btfgen_record_type_match_relo(info, res);
case BPF_CORE_ENUMVAL_EXISTS:
case BPF_CORE_ENUMVAL_VALUE:
return btfgen_record_enumval_relo(info, res);
diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
index 6353a789322b..7a20931c3250 100644
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -13,19 +13,6 @@
#include "json_writer.h"
#include "main.h"
-static const char * const link_type_name[] = {
- [BPF_LINK_TYPE_UNSPEC] = "unspec",
- [BPF_LINK_TYPE_RAW_TRACEPOINT] = "raw_tracepoint",
- [BPF_LINK_TYPE_TRACING] = "tracing",
- [BPF_LINK_TYPE_CGROUP] = "cgroup",
- [BPF_LINK_TYPE_ITER] = "iter",
- [BPF_LINK_TYPE_NETNS] = "netns",
- [BPF_LINK_TYPE_XDP] = "xdp",
- [BPF_LINK_TYPE_PERF_EVENT] = "perf_event",
- [BPF_LINK_TYPE_KPROBE_MULTI] = "kprobe_multi",
- [BPF_LINK_TYPE_STRUCT_OPS] = "struct_ops",
-};
-
static struct hashmap *link_table;
static int link_parse_fd(int *argc, char ***argv)
@@ -67,9 +54,12 @@ static int link_parse_fd(int *argc, char ***argv)
static void
show_link_header_json(struct bpf_link_info *info, json_writer_t *wtr)
{
+ const char *link_type_str;
+
jsonw_uint_field(wtr, "id", info->id);
- if (info->type < ARRAY_SIZE(link_type_name))
- jsonw_string_field(wtr, "type", link_type_name[info->type]);
+ link_type_str = libbpf_bpf_link_type_str(info->type);
+ if (link_type_str)
+ jsonw_string_field(wtr, "type", link_type_str);
else
jsonw_uint_field(wtr, "type", info->type);
@@ -78,9 +68,11 @@ show_link_header_json(struct bpf_link_info *info, json_writer_t *wtr)
static void show_link_attach_type_json(__u32 attach_type, json_writer_t *wtr)
{
- if (attach_type < ARRAY_SIZE(attach_type_name))
- jsonw_string_field(wtr, "attach_type",
- attach_type_name[attach_type]);
+ const char *attach_type_str;
+
+ attach_type_str = libbpf_bpf_attach_type_str(attach_type);
+ if (attach_type_str)
+ jsonw_string_field(wtr, "attach_type", attach_type_str);
else
jsonw_uint_field(wtr, "attach_type", attach_type);
}
@@ -121,6 +113,7 @@ static int get_prog_info(int prog_id, struct bpf_prog_info *info)
static int show_link_close_json(int fd, struct bpf_link_info *info)
{
struct bpf_prog_info prog_info;
+ const char *prog_type_str;
int err;
jsonw_start_object(json_wtr);
@@ -137,12 +130,12 @@ static int show_link_close_json(int fd, struct bpf_link_info *info)
if (err)
return err;
- if (prog_info.type < prog_type_name_size)
- jsonw_string_field(json_wtr, "prog_type",
- prog_type_name[prog_info.type]);
+ prog_type_str = libbpf_bpf_prog_type_str(prog_info.type);
+ /* libbpf will return NULL for variants unknown to it. */
+ if (prog_type_str)
+ jsonw_string_field(json_wtr, "prog_type", prog_type_str);
else
- jsonw_uint_field(json_wtr, "prog_type",
- prog_info.type);
+ jsonw_uint_field(json_wtr, "prog_type", prog_info.type);
show_link_attach_type_json(info->tracing.attach_type,
json_wtr);
@@ -184,9 +177,12 @@ static int show_link_close_json(int fd, struct bpf_link_info *info)
static void show_link_header_plain(struct bpf_link_info *info)
{
+ const char *link_type_str;
+
printf("%u: ", info->id);
- if (info->type < ARRAY_SIZE(link_type_name))
- printf("%s ", link_type_name[info->type]);
+ link_type_str = libbpf_bpf_link_type_str(info->type);
+ if (link_type_str)
+ printf("%s ", link_type_str);
else
printf("type %u ", info->type);
@@ -195,8 +191,11 @@ static void show_link_header_plain(struct bpf_link_info *info)
static void show_link_attach_type_plain(__u32 attach_type)
{
- if (attach_type < ARRAY_SIZE(attach_type_name))
- printf("attach_type %s ", attach_type_name[attach_type]);
+ const char *attach_type_str;
+
+ attach_type_str = libbpf_bpf_attach_type_str(attach_type);
+ if (attach_type_str)
+ printf("attach_type %s ", attach_type_str);
else
printf("attach_type %u ", attach_type);
}
@@ -214,6 +213,7 @@ static void show_iter_plain(struct bpf_link_info *info)
static int show_link_close_plain(int fd, struct bpf_link_info *info)
{
struct bpf_prog_info prog_info;
+ const char *prog_type_str;
int err;
show_link_header_plain(info);
@@ -228,9 +228,10 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info)
if (err)
return err;
- if (prog_info.type < prog_type_name_size)
- printf("\n\tprog_type %s ",
- prog_type_name[prog_info.type]);
+ prog_type_str = libbpf_bpf_prog_type_str(prog_info.type);
+ /* libbpf will return NULL for variants unknown to it. */
+ if (prog_type_str)
+ printf("\n\tprog_type %s ", prog_type_str);
else
printf("\n\tprog_type %u ", prog_info.type);
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 9062ef2b8767..451cefc2d0da 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -508,8 +508,6 @@ int main(int argc, char **argv)
* mode for loading generated skeleton.
*/
libbpf_set_strict_mode(LIBBPF_STRICT_ALL & ~LIBBPF_STRICT_MAP_DEFINITIONS);
- } else {
- libbpf_set_strict_mode(LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK);
}
argc -= optind;
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index aa99ffab451a..5e5060c2ac04 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -63,14 +63,6 @@ static inline void *u64_to_ptr(__u64 ptr)
#define HELP_SPEC_LINK \
"LINK := { id LINK_ID | pinned FILE }"
-extern const char * const prog_type_name[];
-extern const size_t prog_type_name_size;
-
-extern const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE];
-
-extern const char * const map_type_name[];
-extern const size_t map_type_name_size;
-
/* keep in sync with the definition in skeleton/pid_iter.bpf.c */
enum bpf_obj_type {
BPF_OBJ_UNKNOWN,
@@ -102,6 +94,8 @@ int detect_common_prefix(const char *arg, ...);
void fprint_hex(FILE *f, void *arg, unsigned int n, const char *sep);
void usage(void) __noreturn;
+void set_max_rlimit(void);
+
int mount_tracefs(const char *target);
struct obj_ref {
@@ -249,6 +243,20 @@ int print_all_levels(__maybe_unused enum libbpf_print_level level,
size_t hash_fn_for_key_as_id(const void *key, void *ctx);
bool equal_fn_for_key_as_id(const void *k1, const void *k2, void *ctx);
+/* bpf_attach_type_input_str - convert the provided attach type value into a
+ * textual representation that we accept for input purposes.
+ *
+ * This function is similar in nature to libbpf_bpf_attach_type_str, but
+ * recognizes some attach type names that have been used by the program in the
+ * past and which do not follow the string inference scheme that libbpf uses.
+ * These textual representations should only be used for user input.
+ *
+ * @t: The attach type
+ * Returns a pointer to a static string identifying the attach type. NULL is
+ * returned for unknown bpf_attach_type values.
+ */
+const char *bpf_attach_type_input_str(enum bpf_attach_type t);
+
static inline void *u32_as_hash_field(__u32 x)
{
return (void *)(uintptr_t)x;
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index 877387ef79c7..38b6bc9c26c3 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -22,42 +22,6 @@
#include "json_writer.h"
#include "main.h"
-const char * const map_type_name[] = {
- [BPF_MAP_TYPE_UNSPEC] = "unspec",
- [BPF_MAP_TYPE_HASH] = "hash",
- [BPF_MAP_TYPE_ARRAY] = "array",
- [BPF_MAP_TYPE_PROG_ARRAY] = "prog_array",
- [BPF_MAP_TYPE_PERF_EVENT_ARRAY] = "perf_event_array",
- [BPF_MAP_TYPE_PERCPU_HASH] = "percpu_hash",
- [BPF_MAP_TYPE_PERCPU_ARRAY] = "percpu_array",
- [BPF_MAP_TYPE_STACK_TRACE] = "stack_trace",
- [BPF_MAP_TYPE_CGROUP_ARRAY] = "cgroup_array",
- [BPF_MAP_TYPE_LRU_HASH] = "lru_hash",
- [BPF_MAP_TYPE_LRU_PERCPU_HASH] = "lru_percpu_hash",
- [BPF_MAP_TYPE_LPM_TRIE] = "lpm_trie",
- [BPF_MAP_TYPE_ARRAY_OF_MAPS] = "array_of_maps",
- [BPF_MAP_TYPE_HASH_OF_MAPS] = "hash_of_maps",
- [BPF_MAP_TYPE_DEVMAP] = "devmap",
- [BPF_MAP_TYPE_DEVMAP_HASH] = "devmap_hash",
- [BPF_MAP_TYPE_SOCKMAP] = "sockmap",
- [BPF_MAP_TYPE_CPUMAP] = "cpumap",
- [BPF_MAP_TYPE_XSKMAP] = "xskmap",
- [BPF_MAP_TYPE_SOCKHASH] = "sockhash",
- [BPF_MAP_TYPE_CGROUP_STORAGE] = "cgroup_storage",
- [BPF_MAP_TYPE_REUSEPORT_SOCKARRAY] = "reuseport_sockarray",
- [BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE] = "percpu_cgroup_storage",
- [BPF_MAP_TYPE_QUEUE] = "queue",
- [BPF_MAP_TYPE_STACK] = "stack",
- [BPF_MAP_TYPE_SK_STORAGE] = "sk_storage",
- [BPF_MAP_TYPE_STRUCT_OPS] = "struct_ops",
- [BPF_MAP_TYPE_RINGBUF] = "ringbuf",
- [BPF_MAP_TYPE_INODE_STORAGE] = "inode_storage",
- [BPF_MAP_TYPE_TASK_STORAGE] = "task_storage",
- [BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter",
-};
-
-const size_t map_type_name_size = ARRAY_SIZE(map_type_name);
-
static struct hashmap *map_table;
static bool map_is_per_cpu(__u32 type)
@@ -81,12 +45,18 @@ static bool map_is_map_of_progs(__u32 type)
static int map_type_from_str(const char *type)
{
+ const char *map_type_str;
unsigned int i;
- for (i = 0; i < ARRAY_SIZE(map_type_name); i++)
+ for (i = 0; ; i++) {
+ map_type_str = libbpf_bpf_map_type_str(i);
+ if (!map_type_str)
+ break;
+
/* Don't allow prefixing in case of possible future shadowing */
- if (map_type_name[i] && !strcmp(map_type_name[i], type))
+ if (!strcmp(map_type_str, type))
return i;
+ }
return -1;
}
@@ -472,9 +442,12 @@ static int parse_elem(char **argv, struct bpf_map_info *info,
static void show_map_header_json(struct bpf_map_info *info, json_writer_t *wtr)
{
+ const char *map_type_str;
+
jsonw_uint_field(wtr, "id", info->id);
- if (info->type < ARRAY_SIZE(map_type_name))
- jsonw_string_field(wtr, "type", map_type_name[info->type]);
+ map_type_str = libbpf_bpf_map_type_str(info->type);
+ if (map_type_str)
+ jsonw_string_field(wtr, "type", map_type_str);
else
jsonw_uint_field(wtr, "type", info->type);
@@ -513,10 +486,12 @@ static int show_map_close_json(int fd, struct bpf_map_info *info)
if (owner_prog_type) {
unsigned int prog_type = atoi(owner_prog_type);
+ const char *prog_type_str;
- if (prog_type < prog_type_name_size)
+ prog_type_str = libbpf_bpf_prog_type_str(prog_type);
+ if (prog_type_str)
jsonw_string_field(json_wtr, "owner_prog_type",
- prog_type_name[prog_type]);
+ prog_type_str);
else
jsonw_uint_field(json_wtr, "owner_prog_type",
prog_type);
@@ -559,9 +534,13 @@ static int show_map_close_json(int fd, struct bpf_map_info *info)
static void show_map_header_plain(struct bpf_map_info *info)
{
+ const char *map_type_str;
+
printf("%u: ", info->id);
- if (info->type < ARRAY_SIZE(map_type_name))
- printf("%s ", map_type_name[info->type]);
+
+ map_type_str = libbpf_bpf_map_type_str(info->type);
+ if (map_type_str)
+ printf("%s ", map_type_str);
else
printf("type %u ", info->type);
@@ -597,10 +576,11 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info)
printf("\n\t");
if (owner_prog_type) {
unsigned int prog_type = atoi(owner_prog_type);
+ const char *prog_type_str;
- if (prog_type < prog_type_name_size)
- printf("owner_prog_type %s ",
- prog_type_name[prog_type]);
+ prog_type_str = libbpf_bpf_prog_type_str(prog_type);
+ if (prog_type_str)
+ printf("owner_prog_type %s ", prog_type_str);
else
printf("owner_prog_type %d ", prog_type);
}
@@ -876,9 +856,13 @@ map_dump(int fd, struct bpf_map_info *info, json_writer_t *wtr,
}
if (info->type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY &&
- info->value_size != 8)
+ info->value_size != 8) {
+ const char *map_type_str;
+
+ map_type_str = libbpf_bpf_map_type_str(info->type);
p_info("Warning: cannot read values from %s map with value_size != 8",
- map_type_name[info->type]);
+ map_type_str);
+ }
while (true) {
err = bpf_map_get_next_key(fd, prev_key, key);
if (err) {
@@ -1342,6 +1326,8 @@ static int do_create(int argc, char **argv)
goto exit;
}
+ set_max_rlimit();
+
fd = bpf_map_create(map_type, map_name, key_size, value_size, max_entries, &attr);
if (fd < 0) {
p_err("map create failed: %s", strerror(errno));
diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c
index e2d00d3cd868..bb6c969a114a 100644
--- a/tools/bpf/bpftool/pids.c
+++ b/tools/bpf/bpftool/pids.c
@@ -108,6 +108,7 @@ int build_obj_refs_table(struct hashmap **map, enum bpf_obj_type type)
p_err("failed to create hashmap for PID references");
return -1;
}
+ set_max_rlimit();
skel = pid_iter_bpf__open();
if (!skel) {
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 5c2c63df92e8..c81362a001ba 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -36,54 +36,28 @@
#define BPF_METADATA_PREFIX "bpf_metadata_"
#define BPF_METADATA_PREFIX_LEN (sizeof(BPF_METADATA_PREFIX) - 1)
-const char * const prog_type_name[] = {
- [BPF_PROG_TYPE_UNSPEC] = "unspec",
- [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter",
- [BPF_PROG_TYPE_KPROBE] = "kprobe",
- [BPF_PROG_TYPE_SCHED_CLS] = "sched_cls",
- [BPF_PROG_TYPE_SCHED_ACT] = "sched_act",
- [BPF_PROG_TYPE_TRACEPOINT] = "tracepoint",
- [BPF_PROG_TYPE_XDP] = "xdp",
- [BPF_PROG_TYPE_PERF_EVENT] = "perf_event",
- [BPF_PROG_TYPE_CGROUP_SKB] = "cgroup_skb",
- [BPF_PROG_TYPE_CGROUP_SOCK] = "cgroup_sock",
- [BPF_PROG_TYPE_LWT_IN] = "lwt_in",
- [BPF_PROG_TYPE_LWT_OUT] = "lwt_out",
- [BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit",
- [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops",
- [BPF_PROG_TYPE_SK_SKB] = "sk_skb",
- [BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device",
- [BPF_PROG_TYPE_SK_MSG] = "sk_msg",
- [BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint",
- [BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr",
- [BPF_PROG_TYPE_LWT_SEG6LOCAL] = "lwt_seg6local",
- [BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2",
- [BPF_PROG_TYPE_SK_REUSEPORT] = "sk_reuseport",
- [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector",
- [BPF_PROG_TYPE_CGROUP_SYSCTL] = "cgroup_sysctl",
- [BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable",
- [BPF_PROG_TYPE_CGROUP_SOCKOPT] = "cgroup_sockopt",
- [BPF_PROG_TYPE_TRACING] = "tracing",
- [BPF_PROG_TYPE_STRUCT_OPS] = "struct_ops",
- [BPF_PROG_TYPE_EXT] = "ext",
- [BPF_PROG_TYPE_LSM] = "lsm",
- [BPF_PROG_TYPE_SK_LOOKUP] = "sk_lookup",
- [BPF_PROG_TYPE_SYSCALL] = "syscall",
-};
-
-const size_t prog_type_name_size = ARRAY_SIZE(prog_type_name);
-
enum dump_mode {
DUMP_JITED,
DUMP_XLATED,
};
+static const bool attach_types[] = {
+ [BPF_SK_SKB_STREAM_PARSER] = true,
+ [BPF_SK_SKB_STREAM_VERDICT] = true,
+ [BPF_SK_SKB_VERDICT] = true,
+ [BPF_SK_MSG_VERDICT] = true,
+ [BPF_FLOW_DISSECTOR] = true,
+ [__MAX_BPF_ATTACH_TYPE] = false,
+};
+
+/* Textual representations traditionally used by the program and kept around
+ * for the sake of backwards compatibility.
+ */
static const char * const attach_type_strings[] = {
[BPF_SK_SKB_STREAM_PARSER] = "stream_parser",
[BPF_SK_SKB_STREAM_VERDICT] = "stream_verdict",
[BPF_SK_SKB_VERDICT] = "skb_verdict",
[BPF_SK_MSG_VERDICT] = "msg_verdict",
- [BPF_FLOW_DISSECTOR] = "flow_dissector",
[__MAX_BPF_ATTACH_TYPE] = NULL,
};
@@ -94,6 +68,14 @@ static enum bpf_attach_type parse_attach_type(const char *str)
enum bpf_attach_type type;
for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) {
+ if (attach_types[type]) {
+ const char *attach_type_str;
+
+ attach_type_str = libbpf_bpf_attach_type_str(type);
+ if (!strcmp(str, attach_type_str))
+ return type;
+ }
+
if (attach_type_strings[type] &&
is_prefix(str, attach_type_strings[type]))
return type;
@@ -428,12 +410,14 @@ out_free:
static void print_prog_header_json(struct bpf_prog_info *info, int fd)
{
+ const char *prog_type_str;
char prog_name[MAX_PROG_FULL_NAME];
jsonw_uint_field(json_wtr, "id", info->id);
- if (info->type < ARRAY_SIZE(prog_type_name))
- jsonw_string_field(json_wtr, "type",
- prog_type_name[info->type]);
+ prog_type_str = libbpf_bpf_prog_type_str(info->type);
+
+ if (prog_type_str)
+ jsonw_string_field(json_wtr, "type", prog_type_str);
else
jsonw_uint_field(json_wtr, "type", info->type);
@@ -515,11 +499,13 @@ static void print_prog_json(struct bpf_prog_info *info, int fd)
static void print_prog_header_plain(struct bpf_prog_info *info, int fd)
{
+ const char *prog_type_str;
char prog_name[MAX_PROG_FULL_NAME];
printf("%u: ", info->id);
- if (info->type < ARRAY_SIZE(prog_type_name))
- printf("%s ", prog_type_name[info->type]);
+ prog_type_str = libbpf_bpf_prog_type_str(info->type);
+ if (prog_type_str)
+ printf("%s ", prog_type_str);
else
printf("type %u ", info->type);
@@ -1604,6 +1590,8 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
}
}
+ set_max_rlimit();
+
if (verifier_logs)
/* log_level1 + log_level2 + stats, but not stable UAPI */
open_opts.kernel_log_level = 1 + 2 + 4;
@@ -1974,7 +1962,7 @@ static int profile_parse_metrics(int argc, char **argv)
int selected_cnt = 0;
unsigned int i;
- metric_cnt = sizeof(metrics) / sizeof(struct profile_metric);
+ metric_cnt = ARRAY_SIZE(metrics);
while (argc > 0) {
for (i = 0; i < metric_cnt; i++) {
@@ -2301,6 +2289,7 @@ static int do_profile(int argc, char **argv)
}
}
+ set_max_rlimit();
err = profiler_bpf__load(profile_obj);
if (err) {
p_err("failed to load profile_obj");
@@ -2374,8 +2363,8 @@ static int do_help(int argc, char **argv)
" cgroup/sendmsg6 | cgroup/recvmsg4 | cgroup/recvmsg6 |\n"
" cgroup/getsockopt | cgroup/setsockopt | cgroup/sock_release |\n"
" struct_ops | fentry | fexit | freplace | sk_lookup }\n"
- " ATTACH_TYPE := { msg_verdict | skb_verdict | stream_verdict |\n"
- " stream_parser | flow_dissector }\n"
+ " ATTACH_TYPE := { sk_msg_verdict | sk_skb_verdict | sk_skb_stream_verdict |\n"
+ " sk_skb_stream_parser | flow_dissector }\n"
" METRIC := { cycles | instructions | l1d_loads | llc_misses | itlb_misses | dtlb_misses }\n"
" " HELP_SPEC_OPTIONS " |\n"
" {-f|--bpffs} | {-m|--mapcompat} | {-n|--nomount} |\n"
diff --git a/tools/bpf/bpftool/struct_ops.c b/tools/bpf/bpftool/struct_ops.c
index 2535f079ed67..e08a6ff2866c 100644
--- a/tools/bpf/bpftool/struct_ops.c
+++ b/tools/bpf/bpftool/struct_ops.c
@@ -501,6 +501,8 @@ static int do_register(int argc, char **argv)
if (libbpf_get_error(obj))
return -1;
+ set_max_rlimit();
+
if (bpf_object__load(obj)) {
bpf_object__close(obj);
return -1;
diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c
index 5d26f3c6f918..80cd7843c677 100644
--- a/tools/bpf/resolve_btfids/main.c
+++ b/tools/bpf/resolve_btfids/main.c
@@ -45,6 +45,19 @@
* .zero 4
* __BTF_ID__func__vfs_fallocate__4:
* .zero 4
+ *
+ * set8 - store symbol size into first 4 bytes and sort following
+ * ID list
+ *
+ * __BTF_ID__set8__list:
+ * .zero 8
+ * list:
+ * __BTF_ID__func__vfs_getattr__3:
+ * .zero 4
+ * .word (1 << 0) | (1 << 2)
+ * __BTF_ID__func__vfs_fallocate__5:
+ * .zero 4
+ * .word (1 << 3) | (1 << 1) | (1 << 2)
*/
#define _GNU_SOURCE
@@ -72,6 +85,7 @@
#define BTF_TYPEDEF "typedef"
#define BTF_FUNC "func"
#define BTF_SET "set"
+#define BTF_SET8 "set8"
#define ADDR_CNT 100
@@ -84,6 +98,7 @@ struct btf_id {
};
int addr_cnt;
bool is_set;
+ bool is_set8;
Elf64_Addr addr[ADDR_CNT];
};
@@ -231,14 +246,14 @@ static char *get_id(const char *prefix_end)
return id;
}
-static struct btf_id *add_set(struct object *obj, char *name)
+static struct btf_id *add_set(struct object *obj, char *name, bool is_set8)
{
/*
* __BTF_ID__set__name
* name = ^
* id = ^
*/
- char *id = name + sizeof(BTF_SET "__") - 1;
+ char *id = name + (is_set8 ? sizeof(BTF_SET8 "__") : sizeof(BTF_SET "__")) - 1;
int len = strlen(name);
if (id >= name + len) {
@@ -444,9 +459,21 @@ static int symbols_collect(struct object *obj)
} else if (!strncmp(prefix, BTF_FUNC, sizeof(BTF_FUNC) - 1)) {
obj->nr_funcs++;
id = add_symbol(&obj->funcs, prefix, sizeof(BTF_FUNC) - 1);
+ /* set8 */
+ } else if (!strncmp(prefix, BTF_SET8, sizeof(BTF_SET8) - 1)) {
+ id = add_set(obj, prefix, true);
+ /*
+ * SET8 objects store list's count, which is encoded
+ * in symbol's size, together with 'cnt' field hence
+ * that - 1.
+ */
+ if (id) {
+ id->cnt = sym.st_size / sizeof(uint64_t) - 1;
+ id->is_set8 = true;
+ }
/* set */
} else if (!strncmp(prefix, BTF_SET, sizeof(BTF_SET) - 1)) {
- id = add_set(obj, prefix);
+ id = add_set(obj, prefix, false);
/*
* SET objects store list's count, which is encoded
* in symbol's size, together with 'cnt' field hence
@@ -571,7 +598,8 @@ static int id_patch(struct object *obj, struct btf_id *id)
int *ptr = data->d_buf;
int i;
- if (!id->id && !id->is_set)
+ /* For set, set8, id->id may be 0 */
+ if (!id->id && !id->is_set && !id->is_set8)
pr_err("WARN: resolve_btfids: unresolved symbol %s\n", id->name);
for (i = 0; i < id->addr_cnt; i++) {
@@ -643,13 +671,13 @@ static int sets_patch(struct object *obj)
}
idx = idx / sizeof(int);
- base = &ptr[idx] + 1;
+ base = &ptr[idx] + (id->is_set8 ? 2 : 1);
cnt = ptr[idx];
pr_debug("sorting addr %5lu: cnt %6d [%s]\n",
(idx + 1) * sizeof(int), cnt, id->name);
- qsort(base, cnt, sizeof(int), cmp_id);
+ qsort(base, cnt, id->is_set8 ? sizeof(uint64_t) : sizeof(int), cmp_id);
next = rb_next(next);
}
diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile
index da6de16a3dfb..8b3d87b82b7a 100644
--- a/tools/bpf/runqslower/Makefile
+++ b/tools/bpf/runqslower/Makefile
@@ -4,7 +4,7 @@ include ../../scripts/Makefile.include
OUTPUT ?= $(abspath .output)/
BPFTOOL_OUTPUT := $(OUTPUT)bpftool/
-DEFAULT_BPFTOOL := $(BPFTOOL_OUTPUT)bpftool
+DEFAULT_BPFTOOL := $(BPFTOOL_OUTPUT)bootstrap/bpftool
BPFTOOL ?= $(DEFAULT_BPFTOOL)
LIBBPF_SRC := $(abspath ../../lib/bpf)
BPFOBJ_OUTPUT := $(OUTPUT)libbpf/
@@ -86,6 +86,5 @@ $(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(BPFOBJ_OU
$(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(BPFOBJ_OUTPUT) \
DESTDIR=$(BPFOBJ_OUTPUT) prefix= $(abspath $@) install_headers
-$(DEFAULT_BPFTOOL): $(BPFOBJ) | $(BPFTOOL_OUTPUT)
- $(Q)$(MAKE) $(submake_extras) -C ../bpftool OUTPUT=$(BPFTOOL_OUTPUT) \
- ARCH= CROSS_COMPILE= CC=$(HOSTCC) LD=$(HOSTLD)
+$(DEFAULT_BPFTOOL): | $(BPFTOOL_OUTPUT)
+ $(Q)$(MAKE) $(submake_extras) -C ../bpftool OUTPUT=$(BPFTOOL_OUTPUT) bootstrap
diff --git a/tools/include/linux/btf_ids.h b/tools/include/linux/btf_ids.h
index 57890b357f85..71e54b1e3796 100644
--- a/tools/include/linux/btf_ids.h
+++ b/tools/include/linux/btf_ids.h
@@ -73,7 +73,7 @@ asm( \
__BTF_ID_LIST(name, local) \
extern u32 name[];
-#define BTF_ID_LIST_GLOBAL(name) \
+#define BTF_ID_LIST_GLOBAL(name, n) \
__BTF_ID_LIST(name, globl)
/* The BTF_ID_LIST_SINGLE macro defines a BTF_ID_LIST with
@@ -82,6 +82,9 @@ __BTF_ID_LIST(name, globl)
#define BTF_ID_LIST_SINGLE(name, prefix, typename) \
BTF_ID_LIST(name) \
BTF_ID(prefix, typename)
+#define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) \
+ BTF_ID_LIST_GLOBAL(name, 1) \
+ BTF_ID(prefix, typename)
/*
* The BTF_ID_UNUSED macro defines 4 zero bytes.
@@ -143,13 +146,14 @@ extern struct btf_id_set name;
#else
-#define BTF_ID_LIST(name) static u32 name[5];
+#define BTF_ID_LIST(name) static u32 __maybe_unused name[5];
#define BTF_ID(prefix, name)
#define BTF_ID_UNUSED
-#define BTF_ID_LIST_GLOBAL(name) u32 name[1];
-#define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 name[1];
-#define BTF_SET_START(name) static struct btf_id_set name = { 0 };
-#define BTF_SET_START_GLOBAL(name) static struct btf_id_set name = { 0 };
+#define BTF_ID_LIST_GLOBAL(name, n) u32 __maybe_unused name[n];
+#define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 __maybe_unused name[1];
+#define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) u32 __maybe_unused name[1];
+#define BTF_SET_START(name) static struct btf_id_set __maybe_unused name = { 0 };
+#define BTF_SET_START_GLOBAL(name) static struct btf_id_set __maybe_unused name = { 0 };
#define BTF_SET_END(name)
#endif /* CONFIG_DEBUG_INFO_BTF */
@@ -172,7 +176,10 @@ extern struct btf_id_set name;
BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_TW, tcp_timewait_sock) \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP6, tcp6_sock) \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, udp_sock) \
- BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock)
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_UNIX, unix_sock) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_MPTCP, mptcp_sock) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCKET, socket)
enum {
#define BTF_SOCK_TYPE(name, str) name,
@@ -184,4 +191,18 @@ MAX_BTF_SOCK_TYPE,
extern u32 btf_sock_ids[];
#endif
+#define BTF_TRACING_TYPE_xxx \
+ BTF_TRACING_TYPE(BTF_TRACING_TYPE_TASK, task_struct) \
+ BTF_TRACING_TYPE(BTF_TRACING_TYPE_FILE, file) \
+ BTF_TRACING_TYPE(BTF_TRACING_TYPE_VMA, vm_area_struct)
+
+enum {
+#define BTF_TRACING_TYPE(name, type) name,
+BTF_TRACING_TYPE_xxx
+#undef BTF_TRACING_TYPE
+MAX_BTF_TRACING_TYPE,
+};
+
+extern u32 btf_tracing_ids[];
+
#endif
diff --git a/tools/include/linux/objtool.h b/tools/include/linux/objtool.h
index 15b940ec1eac..62c54ffbeeaa 100644
--- a/tools/include/linux/objtool.h
+++ b/tools/include/linux/objtool.h
@@ -32,11 +32,16 @@ struct unwind_hint {
*
* UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function.
* Useful for code which doesn't have an ELF function annotation.
+ *
+ * UNWIND_HINT_ENTRY: machine entry without stack, SYSCALL/SYSENTER etc.
*/
#define UNWIND_HINT_TYPE_CALL 0
#define UNWIND_HINT_TYPE_REGS 1
#define UNWIND_HINT_TYPE_REGS_PARTIAL 2
#define UNWIND_HINT_TYPE_FUNC 3
+#define UNWIND_HINT_TYPE_ENTRY 4
+#define UNWIND_HINT_TYPE_SAVE 5
+#define UNWIND_HINT_TYPE_RESTORE 6
#ifdef CONFIG_OBJTOOL
@@ -62,7 +67,7 @@ struct unwind_hint {
* It should only be used in special cases where you're 100% sure it won't
* affect the reliability of frame pointers and kernel stack traces.
*
- * For more information, see tools/objtool/Documentation/stack-validation.txt.
+ * For more information, see tools/objtool/Documentation/objtool.txt.
*/
#define STACK_FRAME_NON_STANDARD(func) \
static void __used __section(".discard.func_stack_frame_non_standard") \
@@ -124,7 +129,7 @@ struct unwind_hint {
* the debuginfo as necessary. It will also warn if it sees any
* inconsistencies.
*/
-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0
+.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0
.Lunwind_hint_ip_\@:
.pushsection .discard.unwind_hints
/* struct unwind_hint */
@@ -177,7 +182,7 @@ struct unwind_hint {
#define ASM_REACHABLE
#else
#define ANNOTATE_INTRA_FUNCTION_CALL
-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0
+.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0
.endm
.macro STACK_FRAME_NON_STANDARD func:req
.endm
diff --git a/tools/include/linux/sched/mm.h b/tools/include/linux/sched/mm.h
index c8d9f19c1f35..967294b8edcf 100644
--- a/tools/include/linux/sched/mm.h
+++ b/tools/include/linux/sched/mm.h
@@ -1,4 +1,6 @@
#ifndef _TOOLS_PERF_LINUX_SCHED_MM_H
#define _TOOLS_PERF_LINUX_SCHED_MM_H
+#define might_alloc(gfp) do { } while (0)
+
#endif /* _TOOLS_PERF_LINUX_SCHED_MM_H */
diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile
index 7a16d917c185..cfd06764b5ae 100644
--- a/tools/include/nolibc/Makefile
+++ b/tools/include/nolibc/Makefile
@@ -7,13 +7,46 @@ ifeq ($(srctree),)
srctree := $(patsubst %/tools/include/,%,$(dir $(CURDIR)))
endif
+# when run as make -C tools/ nolibc_<foo> the arch is not set
+ifeq ($(ARCH),)
+include $(srctree)/scripts/subarch.include
+ARCH = $(SUBARCH)
+endif
+
+# OUTPUT is only set when run from the main makefile, otherwise
+# it defaults to this nolibc directory.
+OUTPUT ?= $(CURDIR)/
+
+ifeq ($(V),1)
+Q=
+else
+Q=@
+endif
+
nolibc_arch := $(patsubst arm64,aarch64,$(ARCH))
arch_file := arch-$(nolibc_arch).h
all_files := ctype.h errno.h nolibc.h signal.h std.h stdio.h stdlib.h string.h \
sys.h time.h types.h unistd.h
# install all headers needed to support a bare-metal compiler
-all:
+all: headers
+
+install: help
+
+help:
+ @echo "Supported targets under nolibc:"
+ @echo " all call \"headers\""
+ @echo " clean clean the sysroot"
+ @echo " headers prepare a sysroot in tools/include/nolibc/sysroot"
+ @echo " headers_standalone like \"headers\", and also install kernel headers"
+ @echo " help this help"
+ @echo ""
+ @echo "These targets may also be called from tools as \"make nolibc_<target>\"."
+ @echo ""
+ @echo "Currently using the following variables:"
+ @echo " ARCH = $(ARCH)"
+ @echo " OUTPUT = $(OUTPUT)"
+ @echo ""
# Note: when ARCH is "x86" we concatenate both x86_64 and i386
headers:
@@ -36,7 +69,7 @@ headers:
headers_standalone: headers
$(Q)$(MAKE) -C $(srctree) headers
- $(Q)$(MAKE) -C $(srctree) headers_install INSTALL_HDR_PATH=$(OUTPUT)/sysroot
+ $(Q)$(MAKE) -C $(srctree) headers_install INSTALL_HDR_PATH=$(OUTPUT)sysroot
clean:
$(call QUIET_CLEAN, nolibc) rm -rf "$(OUTPUT)sysroot"
diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h
index 15dedf8d0902..a3cebc4bc3ac 100644
--- a/tools/include/nolibc/stdio.h
+++ b/tools/include/nolibc/stdio.h
@@ -273,7 +273,7 @@ int vfprintf(FILE *stream, const char *fmt, va_list args)
return written;
}
-static __attribute__((unused))
+static __attribute__((unused, format(printf, 2, 3)))
int fprintf(FILE *stream, const char *fmt, ...)
{
va_list args;
@@ -285,7 +285,7 @@ int fprintf(FILE *stream, const char *fmt, ...)
return ret;
}
-static __attribute__((unused))
+static __attribute__((unused, format(printf, 1, 2)))
int printf(const char *fmt, ...)
{
va_list args;
diff --git a/tools/include/nolibc/stdlib.h b/tools/include/nolibc/stdlib.h
index 8fd32eaf8037..92378c4b9660 100644
--- a/tools/include/nolibc/stdlib.h
+++ b/tools/include/nolibc/stdlib.h
@@ -128,10 +128,9 @@ void *malloc(size_t len)
static __attribute__((unused))
void *calloc(size_t size, size_t nmemb)
{
- void *orig;
- size_t res = 0;
+ size_t x = size * nmemb;
- if (__builtin_expect(__builtin_mul_overflow(nmemb, size, &res), 0)) {
+ if (__builtin_expect(size && ((x / size) != nmemb), 0)) {
SET_ERRNO(ENOMEM);
return NULL;
}
@@ -140,7 +139,7 @@ void *calloc(size_t size, size_t nmemb)
* No need to zero the heap, the MAP_ANONYMOUS in malloc()
* already does it.
*/
- return malloc(res);
+ return malloc(x);
}
static __attribute__((unused))
diff --git a/tools/include/uapi/asm-generic/fcntl.h b/tools/include/uapi/asm-generic/fcntl.h
index 0197042b7dfb..1ecdb911add8 100644
--- a/tools/include/uapi/asm-generic/fcntl.h
+++ b/tools/include/uapi/asm-generic/fcntl.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _ASM_GENERIC_FCNTL_H
#define _ASM_GENERIC_FCNTL_H
@@ -90,7 +91,7 @@
/* a horrid kludge trying to make sure that this will fail on old kernels */
#define O_TMPFILE (__O_TMPFILE | O_DIRECTORY)
-#define O_TMPFILE_MASK (__O_TMPFILE | O_DIRECTORY | O_CREAT)
+#define O_TMPFILE_MASK (__O_TMPFILE | O_DIRECTORY | O_CREAT)
#ifndef O_NDELAY
#define O_NDELAY O_NONBLOCK
@@ -115,11 +116,13 @@
#define F_GETSIG 11 /* for sockets. */
#endif
+#if __BITS_PER_LONG == 32 || defined(__KERNEL__)
#ifndef F_GETLK64
#define F_GETLK64 12 /* using 'struct flock64' */
#define F_SETLK64 13
#define F_SETLKW64 14
#endif
+#endif /* __BITS_PER_LONG == 32 || defined(__KERNEL__) */
#ifndef F_SETOWN_EX
#define F_SETOWN_EX 15
@@ -178,6 +181,10 @@ struct f_owner_ex {
blocking */
#define LOCK_UN 8 /* remove lock */
+/*
+ * LOCK_MAND support has been removed from the kernel. We leave the symbols
+ * here to not break legacy builds, but these should not be used in new code.
+ */
#define LOCK_MAND 32 /* This is a mandatory flock ... */
#define LOCK_READ 64 /* which allows concurrent read operations */
#define LOCK_WRITE 128 /* which allows concurrent write operations */
@@ -185,6 +192,7 @@ struct f_owner_ex {
#define F_LINUX_SPECIFIC_BASE 1024
+#ifndef HAVE_ARCH_STRUCT_FLOCK
struct flock {
short l_type;
short l_whence;
@@ -209,5 +217,6 @@ struct flock64 {
__ARCH_FLOCK64_PAD
#endif
};
+#endif /* HAVE_ARCH_STRUCT_FLOCK */
#endif /* _ASM_GENERIC_FCNTL_H */
diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
index a2def7b27009..b28ff5d88145 100644
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -2123,7 +2123,7 @@ struct i915_context_engines_load_balance {
__u64 mbz64; /* reserved for future use; must be zero */
- struct i915_engine_class_instance engines[0];
+ struct i915_engine_class_instance engines[];
} __attribute__((packed));
#define I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(name__, N__) struct { \
@@ -2161,7 +2161,7 @@ struct i915_context_engines_bond {
__u64 flags; /* all undefined flags must be zero */
__u64 mbz64[4]; /* reserved for future use; must be zero */
- struct i915_engine_class_instance engines[0];
+ struct i915_engine_class_instance engines[];
} __attribute__((packed));
#define I915_DEFINE_CONTEXT_ENGINES_BOND(name__, N__) struct { \
@@ -2288,7 +2288,7 @@ struct i915_context_engines_parallel_submit {
* length = width (i) * num_siblings (j)
* index = j + i * num_siblings
*/
- struct i915_engine_class_instance engines[0];
+ struct i915_engine_class_instance engines[];
} __packed;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index f4009dbdf62d..59a217ca2dfd 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -998,6 +998,7 @@ enum bpf_attach_type {
BPF_SK_REUSEPORT_SELECT_OR_MIGRATE,
BPF_PERF_EVENT,
BPF_TRACE_KPROBE_MULTI,
+ BPF_LSM_CGROUP,
__MAX_BPF_ATTACH_TYPE
};
@@ -1431,6 +1432,7 @@ union bpf_attr {
__u32 attach_flags;
__aligned_u64 prog_ids;
__u32 prog_cnt;
+ __aligned_u64 prog_attach_flags; /* output: per-program attach_flags */
} query;
struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */
@@ -2359,7 +2361,8 @@ union bpf_attr {
* Pull in non-linear data in case the *skb* is non-linear and not
* all of *len* are part of the linear section. Make *len* bytes
* from *skb* readable and writable. If a zero value is passed for
- * *len*, then the whole length of the *skb* is pulled.
+ * *len*, then all bytes in the linear part of *skb* will be made
+ * readable and writable.
*
* This helper is only needed for reading and writing with direct
* packet access.
@@ -3597,10 +3600,11 @@ union bpf_attr {
*
* *iph* points to the start of the IPv4 or IPv6 header, while
* *iph_len* contains **sizeof**\ (**struct iphdr**) or
- * **sizeof**\ (**struct ip6hdr**).
+ * **sizeof**\ (**struct ipv6hdr**).
*
* *th* points to the start of the TCP header, while *th_len*
- * contains **sizeof**\ (**struct tcphdr**).
+ * contains the length of the TCP header (at least
+ * **sizeof**\ (**struct tcphdr**)).
* Return
* 0 if *iph* and *th* are a valid SYN cookie ACK, or a negative
* error otherwise.
@@ -3783,10 +3787,11 @@ union bpf_attr {
*
* *iph* points to the start of the IPv4 or IPv6 header, while
* *iph_len* contains **sizeof**\ (**struct iphdr**) or
- * **sizeof**\ (**struct ip6hdr**).
+ * **sizeof**\ (**struct ipv6hdr**).
*
* *th* points to the start of the TCP header, while *th_len*
- * contains the length of the TCP header.
+ * contains the length of the TCP header with options (at least
+ * **sizeof**\ (**struct tcphdr**)).
* Return
* On success, lower 32 bits hold the generated SYN cookie in
* followed by 16 bits which hold the MSS value for that cookie,
@@ -5222,22 +5227,25 @@ union bpf_attr {
* Return
* Nothing. Always succeeds.
*
- * long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset)
+ * long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset, u64 flags)
* Description
* Read *len* bytes from *src* into *dst*, starting from *offset*
* into *src*.
+ * *flags* is currently unused.
* Return
* 0 on success, -E2BIG if *offset* + *len* exceeds the length
- * of *src*'s data, -EINVAL if *src* is an invalid dynptr.
+ * of *src*'s data, -EINVAL if *src* is an invalid dynptr or if
+ * *flags* is not 0.
*
- * long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len)
+ * long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags)
* Description
* Write *len* bytes from *src* into *dst*, starting from *offset*
* into *dst*.
+ * *flags* is currently unused.
* Return
* 0 on success, -E2BIG if *offset* + *len* exceeds the length
* of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst*
- * is a read-only dynptr.
+ * is a read-only dynptr or if *flags* is not 0.
*
* void *bpf_dynptr_data(struct bpf_dynptr *ptr, u32 offset, u32 len)
* Description
@@ -5249,6 +5257,80 @@ union bpf_attr {
* Pointer to the underlying dynptr data, NULL if the dynptr is
* read-only, if the dynptr is invalid, or if the offset and length
* is out of bounds.
+ *
+ * s64 bpf_tcp_raw_gen_syncookie_ipv4(struct iphdr *iph, struct tcphdr *th, u32 th_len)
+ * Description
+ * Try to issue a SYN cookie for the packet with corresponding
+ * IPv4/TCP headers, *iph* and *th*, without depending on a
+ * listening socket.
+ *
+ * *iph* points to the IPv4 header.
+ *
+ * *th* points to the start of the TCP header, while *th_len*
+ * contains the length of the TCP header (at least
+ * **sizeof**\ (**struct tcphdr**)).
+ * Return
+ * On success, lower 32 bits hold the generated SYN cookie in
+ * followed by 16 bits which hold the MSS value for that cookie,
+ * and the top 16 bits are unused.
+ *
+ * On failure, the returned value is one of the following:
+ *
+ * **-EINVAL** if *th_len* is invalid.
+ *
+ * s64 bpf_tcp_raw_gen_syncookie_ipv6(struct ipv6hdr *iph, struct tcphdr *th, u32 th_len)
+ * Description
+ * Try to issue a SYN cookie for the packet with corresponding
+ * IPv6/TCP headers, *iph* and *th*, without depending on a
+ * listening socket.
+ *
+ * *iph* points to the IPv6 header.
+ *
+ * *th* points to the start of the TCP header, while *th_len*
+ * contains the length of the TCP header (at least
+ * **sizeof**\ (**struct tcphdr**)).
+ * Return
+ * On success, lower 32 bits hold the generated SYN cookie in
+ * followed by 16 bits which hold the MSS value for that cookie,
+ * and the top 16 bits are unused.
+ *
+ * On failure, the returned value is one of the following:
+ *
+ * **-EINVAL** if *th_len* is invalid.
+ *
+ * **-EPROTONOSUPPORT** if CONFIG_IPV6 is not builtin.
+ *
+ * long bpf_tcp_raw_check_syncookie_ipv4(struct iphdr *iph, struct tcphdr *th)
+ * Description
+ * Check whether *iph* and *th* contain a valid SYN cookie ACK
+ * without depending on a listening socket.
+ *
+ * *iph* points to the IPv4 header.
+ *
+ * *th* points to the TCP header.
+ * Return
+ * 0 if *iph* and *th* are a valid SYN cookie ACK.
+ *
+ * On failure, the returned value is one of the following:
+ *
+ * **-EACCES** if the SYN cookie is not valid.
+ *
+ * long bpf_tcp_raw_check_syncookie_ipv6(struct ipv6hdr *iph, struct tcphdr *th)
+ * Description
+ * Check whether *iph* and *th* contain a valid SYN cookie ACK
+ * without depending on a listening socket.
+ *
+ * *iph* points to the IPv6 header.
+ *
+ * *th* points to the TCP header.
+ * Return
+ * 0 if *iph* and *th* are a valid SYN cookie ACK.
+ *
+ * On failure, the returned value is one of the following:
+ *
+ * **-EACCES** if the SYN cookie is not valid.
+ *
+ * **-EPROTONOSUPPORT** if CONFIG_IPV6 is not builtin.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -5455,6 +5537,10 @@ union bpf_attr {
FN(dynptr_read), \
FN(dynptr_write), \
FN(dynptr_data), \
+ FN(tcp_raw_gen_syncookie_ipv4), \
+ FN(tcp_raw_gen_syncookie_ipv6), \
+ FN(tcp_raw_check_syncookie_ipv4), \
+ FN(tcp_raw_check_syncookie_ipv6), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -5995,6 +6081,8 @@ struct bpf_prog_info {
__u64 run_cnt;
__u64 recursion_misses;
__u32 verified_insns;
+ __u32 attach_btf_obj_id;
+ __u32 attach_btf_id;
} __attribute__((aligned(8)));
struct bpf_map_info {
@@ -6702,6 +6790,7 @@ enum bpf_core_relo_kind {
BPF_CORE_TYPE_SIZE = 9, /* type size in bytes */
BPF_CORE_ENUMVAL_EXISTS = 10, /* enum value existence in target kernel */
BPF_CORE_ENUMVAL_VALUE = 11, /* enum value integer value */
+ BPF_CORE_TYPE_MATCHES = 12, /* type match in target kernel */
};
/*
diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h
index a9162a6c0284..ec1798b6d3ff 100644
--- a/tools/include/uapi/linux/btf.h
+++ b/tools/include/uapi/linux/btf.h
@@ -36,10 +36,10 @@ struct btf_type {
* bits 24-28: kind (e.g. int, ptr, array...etc)
* bits 29-30: unused
* bit 31: kind_flag, currently used by
- * struct, union and fwd
+ * struct, union, enum, fwd and enum64
*/
__u32 info;
- /* "size" is used by INT, ENUM, STRUCT, UNION and DATASEC.
+ /* "size" is used by INT, ENUM, STRUCT, UNION, DATASEC and ENUM64.
* "size" tells the size of the type it is describing.
*
* "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
@@ -63,7 +63,7 @@ enum {
BTF_KIND_ARRAY = 3, /* Array */
BTF_KIND_STRUCT = 4, /* Struct */
BTF_KIND_UNION = 5, /* Union */
- BTF_KIND_ENUM = 6, /* Enumeration */
+ BTF_KIND_ENUM = 6, /* Enumeration up to 32-bit values */
BTF_KIND_FWD = 7, /* Forward */
BTF_KIND_TYPEDEF = 8, /* Typedef */
BTF_KIND_VOLATILE = 9, /* Volatile */
@@ -76,6 +76,7 @@ enum {
BTF_KIND_FLOAT = 16, /* Floating point */
BTF_KIND_DECL_TAG = 17, /* Decl Tag */
BTF_KIND_TYPE_TAG = 18, /* Type Tag */
+ BTF_KIND_ENUM64 = 19, /* Enumeration up to 64-bit values */
NR_BTF_KINDS,
BTF_KIND_MAX = NR_BTF_KINDS - 1,
@@ -186,4 +187,14 @@ struct btf_decl_tag {
__s32 component_idx;
};
+/* BTF_KIND_ENUM64 is followed by multiple "struct btf_enum64".
+ * The exact number of btf_enum64 is stored in the vlen (of the
+ * info in "struct btf_type").
+ */
+struct btf_enum64 {
+ __u32 name_off;
+ __u32 val_lo32;
+ __u32 val_hi32;
+};
+
#endif /* _UAPI__LINUX_BTF_H__ */
diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h
index bdf7b404b3e7..b7b56871029c 100644
--- a/tools/include/uapi/linux/fs.h
+++ b/tools/include/uapi/linux/fs.h
@@ -90,7 +90,7 @@ struct file_dedupe_range {
__u16 dest_count; /* in - total elements in info array */
__u16 reserved1; /* must be zero */
__u32 reserved2; /* must be zero */
- struct file_dedupe_range_info info[0];
+ struct file_dedupe_range_info info[];
};
/* And dynamically-tunable limits and defaults: */
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index b339bf2196ca..0242f31e339c 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -890,6 +890,7 @@ enum {
IFLA_BOND_SLAVE_AD_AGGREGATOR_ID,
IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE,
IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE,
+ IFLA_BOND_SLAVE_PRIO,
__IFLA_BOND_SLAVE_MAX,
};
diff --git a/tools/include/uapi/linux/if_tun.h b/tools/include/uapi/linux/if_tun.h
index 454ae31b93c7..2ec07de1d73b 100644
--- a/tools/include/uapi/linux/if_tun.h
+++ b/tools/include/uapi/linux/if_tun.h
@@ -108,7 +108,7 @@ struct tun_pi {
struct tun_filter {
__u16 flags; /* TUN_FLT_ flags see above */
__u16 count; /* Number of addresses */
- __u8 addr[0][ETH_ALEN];
+ __u8 addr[][ETH_ALEN];
};
#endif /* _UAPI__IF_TUN_H */
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 5088bd9f1922..cb6e3846d27b 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -542,7 +542,7 @@ struct kvm_coalesced_mmio {
struct kvm_coalesced_mmio_ring {
__u32 first, last;
- struct kvm_coalesced_mmio coalesced_mmio[0];
+ struct kvm_coalesced_mmio coalesced_mmio[];
};
#define KVM_COALESCED_MMIO_MAX \
@@ -621,7 +621,7 @@ struct kvm_clear_dirty_log {
/* for KVM_SET_SIGNAL_MASK */
struct kvm_signal_mask {
__u32 len;
- __u8 sigset[0];
+ __u8 sigset[];
};
/* for KVM_TPR_ACCESS_REPORTING */
@@ -1221,7 +1221,7 @@ struct kvm_irq_routing_entry {
struct kvm_irq_routing {
__u32 nr;
__u32 flags;
- struct kvm_irq_routing_entry entries[0];
+ struct kvm_irq_routing_entry entries[];
};
#endif
@@ -1341,7 +1341,7 @@ struct kvm_dirty_tlb {
struct kvm_reg_list {
__u64 n; /* number of regs */
- __u64 reg[0];
+ __u64 reg[];
};
struct kvm_one_reg {
@@ -2083,7 +2083,8 @@ struct kvm_stats_header {
#define KVM_STATS_UNIT_BYTES (0x1 << KVM_STATS_UNIT_SHIFT)
#define KVM_STATS_UNIT_SECONDS (0x2 << KVM_STATS_UNIT_SHIFT)
#define KVM_STATS_UNIT_CYCLES (0x3 << KVM_STATS_UNIT_SHIFT)
-#define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_CYCLES
+#define KVM_STATS_UNIT_BOOLEAN (0x4 << KVM_STATS_UNIT_SHIFT)
+#define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_BOOLEAN
#define KVM_STATS_BASE_SHIFT 8
#define KVM_STATS_BASE_MASK (0xF << KVM_STATS_BASE_SHIFT)
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index d37629dbad72..4653834f078f 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -491,7 +491,7 @@ struct perf_event_query_bpf {
/*
* User provided buffer to store program ids
*/
- __u32 ids[0];
+ __u32 ids[];
};
/*
diff --git a/tools/include/uapi/linux/pkt_cls.h b/tools/include/uapi/linux/pkt_cls.h
index 12153771396a..3faee0199a9b 100644
--- a/tools/include/uapi/linux/pkt_cls.h
+++ b/tools/include/uapi/linux/pkt_cls.h
@@ -180,7 +180,7 @@ struct tc_u32_sel {
short hoff;
__be32 hmask;
- struct tc_u32_key keys[0];
+ struct tc_u32_key keys[];
};
struct tc_u32_mark {
@@ -192,7 +192,7 @@ struct tc_u32_mark {
struct tc_u32_pcnt {
__u64 rcnt;
__u64 rhit;
- __u64 kcnts[0];
+ __u64 kcnts[];
};
/* Flags */
diff --git a/tools/include/uapi/linux/seg6.h b/tools/include/uapi/linux/seg6.h
index 286e8d6a8e98..f94baf154c47 100644
--- a/tools/include/uapi/linux/seg6.h
+++ b/tools/include/uapi/linux/seg6.h
@@ -30,7 +30,7 @@ struct ipv6_sr_hdr {
__u8 flags;
__u16 tag;
- struct in6_addr segments[0];
+ struct in6_addr segments[];
};
#define SR6_FLAG1_PROTECTED (1 << 6)
@@ -49,7 +49,7 @@ struct ipv6_sr_hdr {
struct sr6_tlv {
__u8 type;
__u8 len;
- __u8 data[0];
+ __u8 data[];
};
#endif
diff --git a/tools/include/uapi/linux/usbdevice_fs.h b/tools/include/uapi/linux/usbdevice_fs.h
index cf525cddeb94..74a84e02422a 100644
--- a/tools/include/uapi/linux/usbdevice_fs.h
+++ b/tools/include/uapi/linux/usbdevice_fs.h
@@ -131,7 +131,7 @@ struct usbdevfs_urb {
unsigned int signr; /* signal to be sent on completion,
or 0 if none should be sent. */
void __user *usercontext;
- struct usbdevfs_iso_packet_desc iso_frame_desc[0];
+ struct usbdevfs_iso_packet_desc iso_frame_desc[];
};
/* ioctls for talking directly to drivers */
@@ -176,7 +176,7 @@ struct usbdevfs_disconnect_claim {
struct usbdevfs_streams {
unsigned int num_streams; /* Not used by USBDEVFS_FREE_STREAMS */
unsigned int num_eps;
- unsigned char eps[0];
+ unsigned char eps[];
};
/*
diff --git a/tools/include/uapi/sound/asound.h b/tools/include/uapi/sound/asound.h
index 2d3e5df39a59..3974a2a911cc 100644
--- a/tools/include/uapi/sound/asound.h
+++ b/tools/include/uapi/sound/asound.h
@@ -1106,7 +1106,7 @@ struct snd_ctl_elem_value {
struct snd_ctl_tlv {
unsigned int numid; /* control element numeric identification */
unsigned int length; /* in bytes aligned to 4 */
- unsigned int tlv[0]; /* first TLV */
+ unsigned int tlv[]; /* first TLV */
};
#define SNDRV_CTL_IOCTL_PVERSION _IOR('U', 0x00, int)
diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
index 31a1a9015902..5a3dfb56d78f 100644
--- a/tools/lib/bpf/Build
+++ b/tools/lib/bpf/Build
@@ -1,4 +1,4 @@
libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \
- netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o hashmap.o \
+ netlink.o bpf_prog_linfo.o libbpf_probes.o hashmap.o \
btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o \
usdt.o
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index a1265b152027..4c904ef0b47e 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -237,7 +237,7 @@ install_lib: all_cmd
$(call do_install_mkdir,$(libdir_SQ)); \
cp -fpR $(LIB_FILE) $(DESTDIR)$(libdir_SQ)
-SRC_HDRS := bpf.h libbpf.h btf.h libbpf_common.h libbpf_legacy.h xsk.h \
+SRC_HDRS := bpf.h libbpf.h btf.h libbpf_common.h libbpf_legacy.h \
bpf_helpers.h bpf_tracing.h bpf_endian.h bpf_core_read.h \
skel_internal.h libbpf_version.h usdt.bpf.h
GEN_HDRS := $(BPF_GENERATED)
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 240186aac8e6..efcc06dafbd9 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -147,10 +147,6 @@ int bump_rlimit_memlock(void)
{
struct rlimit rlim;
- /* this the default in libbpf 1.0, but for now user has to opt-in explicitly */
- if (!(libbpf_mode & LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK))
- return 0;
-
/* if kernel supports memcg-based accounting, skip bumping RLIMIT_MEMLOCK */
if (memlock_bumped || kernel_supports(NULL, FEAT_MEMCG_ACCOUNT))
return 0;
@@ -233,11 +229,10 @@ alloc_zero_tailing_info(const void *orecord, __u32 cnt,
return info;
}
-DEFAULT_VERSION(bpf_prog_load_v0_6_0, bpf_prog_load, LIBBPF_0.6.0)
-int bpf_prog_load_v0_6_0(enum bpf_prog_type prog_type,
- const char *prog_name, const char *license,
- const struct bpf_insn *insns, size_t insn_cnt,
- const struct bpf_prog_load_opts *opts)
+int bpf_prog_load(enum bpf_prog_type prog_type,
+ const char *prog_name, const char *license,
+ const struct bpf_insn *insns, size_t insn_cnt,
+ const struct bpf_prog_load_opts *opts)
{
void *finfo = NULL, *linfo = NULL;
const char *func_info, *line_info;
@@ -384,94 +379,6 @@ done:
return libbpf_err_errno(fd);
}
-__attribute__((alias("bpf_load_program_xattr2")))
-int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
- char *log_buf, size_t log_buf_sz);
-
-static int bpf_load_program_xattr2(const struct bpf_load_program_attr *load_attr,
- char *log_buf, size_t log_buf_sz)
-{
- LIBBPF_OPTS(bpf_prog_load_opts, p);
-
- if (!load_attr || !log_buf != !log_buf_sz)
- return libbpf_err(-EINVAL);
-
- p.expected_attach_type = load_attr->expected_attach_type;
- switch (load_attr->prog_type) {
- case BPF_PROG_TYPE_STRUCT_OPS:
- case BPF_PROG_TYPE_LSM:
- p.attach_btf_id = load_attr->attach_btf_id;
- break;
- case BPF_PROG_TYPE_TRACING:
- case BPF_PROG_TYPE_EXT:
- p.attach_btf_id = load_attr->attach_btf_id;
- p.attach_prog_fd = load_attr->attach_prog_fd;
- break;
- default:
- p.prog_ifindex = load_attr->prog_ifindex;
- p.kern_version = load_attr->kern_version;
- }
- p.log_level = load_attr->log_level;
- p.log_buf = log_buf;
- p.log_size = log_buf_sz;
- p.prog_btf_fd = load_attr->prog_btf_fd;
- p.func_info_rec_size = load_attr->func_info_rec_size;
- p.func_info_cnt = load_attr->func_info_cnt;
- p.func_info = load_attr->func_info;
- p.line_info_rec_size = load_attr->line_info_rec_size;
- p.line_info_cnt = load_attr->line_info_cnt;
- p.line_info = load_attr->line_info;
- p.prog_flags = load_attr->prog_flags;
-
- return bpf_prog_load(load_attr->prog_type, load_attr->name, load_attr->license,
- load_attr->insns, load_attr->insns_cnt, &p);
-}
-
-int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
- size_t insns_cnt, const char *license,
- __u32 kern_version, char *log_buf,
- size_t log_buf_sz)
-{
- struct bpf_load_program_attr load_attr;
-
- memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
- load_attr.prog_type = type;
- load_attr.expected_attach_type = 0;
- load_attr.name = NULL;
- load_attr.insns = insns;
- load_attr.insns_cnt = insns_cnt;
- load_attr.license = license;
- load_attr.kern_version = kern_version;
-
- return bpf_load_program_xattr2(&load_attr, log_buf, log_buf_sz);
-}
-
-int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
- size_t insns_cnt, __u32 prog_flags, const char *license,
- __u32 kern_version, char *log_buf, size_t log_buf_sz,
- int log_level)
-{
- union bpf_attr attr;
- int fd;
-
- bump_rlimit_memlock();
-
- memset(&attr, 0, sizeof(attr));
- attr.prog_type = type;
- attr.insn_cnt = (__u32)insns_cnt;
- attr.insns = ptr_to_u64(insns);
- attr.license = ptr_to_u64(license);
- attr.log_buf = ptr_to_u64(log_buf);
- attr.log_size = log_buf_sz;
- attr.log_level = log_level;
- log_buf[0] = 0;
- attr.kern_version = kern_version;
- attr.prog_flags = prog_flags;
-
- fd = sys_bpf_prog_load(&attr, sizeof(attr), PROG_LOAD_ATTEMPTS);
- return libbpf_err_errno(fd);
-}
-
int bpf_map_update_elem(int fd, const void *key, const void *value,
__u64 flags)
{
@@ -672,11 +579,20 @@ int bpf_obj_pin(int fd, const char *pathname)
int bpf_obj_get(const char *pathname)
{
+ return bpf_obj_get_opts(pathname, NULL);
+}
+
+int bpf_obj_get_opts(const char *pathname, const struct bpf_obj_get_opts *opts)
+{
union bpf_attr attr;
int fd;
+ if (!OPTS_VALID(opts, bpf_obj_get_opts))
+ return libbpf_err(-EINVAL);
+
memset(&attr, 0, sizeof(attr));
attr.pathname = ptr_to_u64((void *)pathname);
+ attr.file_flags = OPTS_GET(opts, file_flags, 0);
fd = sys_bpf_fd(BPF_OBJ_GET, &attr, sizeof(attr));
return libbpf_err_errno(fd);
@@ -888,80 +804,48 @@ int bpf_iter_create(int link_fd)
return libbpf_err_errno(fd);
}
-int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
- __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt)
+int bpf_prog_query_opts(int target_fd,
+ enum bpf_attach_type type,
+ struct bpf_prog_query_opts *opts)
{
union bpf_attr attr;
int ret;
+ if (!OPTS_VALID(opts, bpf_prog_query_opts))
+ return libbpf_err(-EINVAL);
+
memset(&attr, 0, sizeof(attr));
+
attr.query.target_fd = target_fd;
attr.query.attach_type = type;
- attr.query.query_flags = query_flags;
- attr.query.prog_cnt = *prog_cnt;
- attr.query.prog_ids = ptr_to_u64(prog_ids);
+ attr.query.query_flags = OPTS_GET(opts, query_flags, 0);
+ attr.query.prog_cnt = OPTS_GET(opts, prog_cnt, 0);
+ attr.query.prog_ids = ptr_to_u64(OPTS_GET(opts, prog_ids, NULL));
+ attr.query.prog_attach_flags = ptr_to_u64(OPTS_GET(opts, prog_attach_flags, NULL));
ret = sys_bpf(BPF_PROG_QUERY, &attr, sizeof(attr));
- if (attach_flags)
- *attach_flags = attr.query.attach_flags;
- *prog_cnt = attr.query.prog_cnt;
-
- return libbpf_err_errno(ret);
-}
-
-int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size,
- void *data_out, __u32 *size_out, __u32 *retval,
- __u32 *duration)
-{
- union bpf_attr attr;
- int ret;
-
- memset(&attr, 0, sizeof(attr));
- attr.test.prog_fd = prog_fd;
- attr.test.data_in = ptr_to_u64(data);
- attr.test.data_out = ptr_to_u64(data_out);
- attr.test.data_size_in = size;
- attr.test.repeat = repeat;
-
- ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
-
- if (size_out)
- *size_out = attr.test.data_size_out;
- if (retval)
- *retval = attr.test.retval;
- if (duration)
- *duration = attr.test.duration;
+ OPTS_SET(opts, attach_flags, attr.query.attach_flags);
+ OPTS_SET(opts, prog_cnt, attr.query.prog_cnt);
return libbpf_err_errno(ret);
}
-int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr)
+int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
+ __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt)
{
- union bpf_attr attr;
+ LIBBPF_OPTS(bpf_prog_query_opts, opts);
int ret;
- if (!test_attr->data_out && test_attr->data_size_out > 0)
- return libbpf_err(-EINVAL);
+ opts.query_flags = query_flags;
+ opts.prog_ids = prog_ids;
+ opts.prog_cnt = *prog_cnt;
- memset(&attr, 0, sizeof(attr));
- attr.test.prog_fd = test_attr->prog_fd;
- attr.test.data_in = ptr_to_u64(test_attr->data_in);
- attr.test.data_out = ptr_to_u64(test_attr->data_out);
- attr.test.data_size_in = test_attr->data_size_in;
- attr.test.data_size_out = test_attr->data_size_out;
- attr.test.ctx_in = ptr_to_u64(test_attr->ctx_in);
- attr.test.ctx_out = ptr_to_u64(test_attr->ctx_out);
- attr.test.ctx_size_in = test_attr->ctx_size_in;
- attr.test.ctx_size_out = test_attr->ctx_size_out;
- attr.test.repeat = test_attr->repeat;
+ ret = bpf_prog_query_opts(target_fd, type, &opts);
- ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
-
- test_attr->data_size_out = attr.test.data_size_out;
- test_attr->ctx_size_out = attr.test.ctx_size_out;
- test_attr->retval = attr.test.retval;
- test_attr->duration = attr.test.duration;
+ if (attach_flags)
+ *attach_flags = opts.attach_flags;
+ *prog_cnt = opts.prog_cnt;
return libbpf_err_errno(ret);
}
@@ -1162,27 +1046,6 @@ int bpf_btf_load(const void *btf_data, size_t btf_size, const struct bpf_btf_loa
return libbpf_err_errno(fd);
}
-int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size, bool do_log)
-{
- LIBBPF_OPTS(bpf_btf_load_opts, opts);
- int fd;
-
-retry:
- if (do_log && log_buf && log_buf_size) {
- opts.log_buf = log_buf;
- opts.log_size = log_buf_size;
- opts.log_level = 1;
- }
-
- fd = bpf_btf_load(btf, btf_size, &opts);
- if (fd < 0 && !do_log && log_buf && log_buf_size) {
- do_log = true;
- goto retry;
- }
-
- return libbpf_err_errno(fd);
-}
-
int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,
__u32 *prog_id, __u32 *fd_type, __u64 *probe_offset,
__u64 *probe_addr)
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index cabc03703e29..9c50beabdd14 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -103,54 +103,6 @@ LIBBPF_API int bpf_prog_load(enum bpf_prog_type prog_type,
const char *prog_name, const char *license,
const struct bpf_insn *insns, size_t insn_cnt,
const struct bpf_prog_load_opts *opts);
-/* this "specialization" should go away in libbpf 1.0 */
-LIBBPF_API int bpf_prog_load_v0_6_0(enum bpf_prog_type prog_type,
- const char *prog_name, const char *license,
- const struct bpf_insn *insns, size_t insn_cnt,
- const struct bpf_prog_load_opts *opts);
-
-/* This is an elaborate way to not conflict with deprecated bpf_prog_load()
- * API, defined in libbpf.h. Once we hit libbpf 1.0, all this will be gone.
- * With this approach, if someone is calling bpf_prog_load() with
- * 4 arguments, they will use the deprecated API, which keeps backwards
- * compatibility (both source code and binary). If bpf_prog_load() is called
- * with 6 arguments, though, it gets redirected to __bpf_prog_load.
- * So looking forward to libbpf 1.0 when this hack will be gone and
- * __bpf_prog_load() will be called just bpf_prog_load().
- */
-#ifndef bpf_prog_load
-#define bpf_prog_load(...) ___libbpf_overload(___bpf_prog_load, __VA_ARGS__)
-#define ___bpf_prog_load4(file, type, pobj, prog_fd) \
- bpf_prog_load_deprecated(file, type, pobj, prog_fd)
-#define ___bpf_prog_load6(prog_type, prog_name, license, insns, insn_cnt, opts) \
- bpf_prog_load(prog_type, prog_name, license, insns, insn_cnt, opts)
-#endif /* bpf_prog_load */
-
-struct bpf_load_program_attr {
- enum bpf_prog_type prog_type;
- enum bpf_attach_type expected_attach_type;
- const char *name;
- const struct bpf_insn *insns;
- size_t insns_cnt;
- const char *license;
- union {
- __u32 kern_version;
- __u32 attach_prog_fd;
- };
- union {
- __u32 prog_ifindex;
- __u32 attach_btf_id;
- };
- __u32 prog_btf_fd;
- __u32 func_info_rec_size;
- const void *func_info;
- __u32 func_info_cnt;
- __u32 line_info_rec_size;
- const void *line_info;
- __u32 line_info_cnt;
- __u32 log_level;
- __u32 prog_flags;
-};
/* Flags to direct loading requirements */
#define MAPS_RELAX_COMPAT 0x01
@@ -158,22 +110,6 @@ struct bpf_load_program_attr {
/* Recommended log buffer size */
#define BPF_LOG_BUF_SIZE (UINT32_MAX >> 8) /* verifier maximum in kernels <= 5.1 */
-LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_load() instead")
-LIBBPF_API int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
- char *log_buf, size_t log_buf_sz);
-LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_load() instead")
-LIBBPF_API int bpf_load_program(enum bpf_prog_type type,
- const struct bpf_insn *insns, size_t insns_cnt,
- const char *license, __u32 kern_version,
- char *log_buf, size_t log_buf_sz);
-LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_load() instead")
-LIBBPF_API int bpf_verify_program(enum bpf_prog_type type,
- const struct bpf_insn *insns,
- size_t insns_cnt, __u32 prog_flags,
- const char *license, __u32 kern_version,
- char *log_buf, size_t log_buf_sz,
- int log_level);
-
struct bpf_btf_load_opts {
size_t sz; /* size of this struct for forward/backward compatibility */
@@ -187,10 +123,6 @@ struct bpf_btf_load_opts {
LIBBPF_API int bpf_btf_load(const void *btf_data, size_t btf_size,
const struct bpf_btf_load_opts *opts);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_btf_load() instead")
-LIBBPF_API int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf,
- __u32 log_buf_size, bool do_log);
-
LIBBPF_API int bpf_map_update_elem(int fd, const void *key, const void *value,
__u64 flags);
@@ -338,8 +270,19 @@ LIBBPF_API int bpf_map_update_batch(int fd, const void *keys, const void *values
__u32 *count,
const struct bpf_map_batch_opts *opts);
+struct bpf_obj_get_opts {
+ size_t sz; /* size of this struct for forward/backward compatibility */
+
+ __u32 file_flags;
+
+ size_t :0;
+};
+#define bpf_obj_get_opts__last_field file_flags
+
LIBBPF_API int bpf_obj_pin(int fd, const char *pathname);
LIBBPF_API int bpf_obj_get(const char *pathname);
+LIBBPF_API int bpf_obj_get_opts(const char *pathname,
+ const struct bpf_obj_get_opts *opts);
struct bpf_prog_attach_opts {
size_t sz; /* size of this struct for forward/backward compatibility */
@@ -353,10 +296,6 @@ LIBBPF_API int bpf_prog_attach(int prog_fd, int attachable_fd,
LIBBPF_API int bpf_prog_attach_opts(int prog_fd, int attachable_fd,
enum bpf_attach_type type,
const struct bpf_prog_attach_opts *opts);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_prog_attach_opts() instead")
-LIBBPF_API int bpf_prog_attach_xattr(int prog_fd, int attachable_fd,
- enum bpf_attach_type type,
- const struct bpf_prog_attach_opts *opts);
LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type);
LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd,
enum bpf_attach_type type);
@@ -422,17 +361,6 @@ struct bpf_prog_test_run_attr {
* out: length of cxt_out */
};
-LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_test_run_opts() instead")
-LIBBPF_API int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr);
-
-/*
- * bpf_prog_test_run does not check that data_out is large enough. Consider
- * using bpf_prog_test_run_opts instead.
- */
-LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_test_run_opts() instead")
-LIBBPF_API int bpf_prog_test_run(int prog_fd, int repeat, void *data,
- __u32 size, void *data_out, __u32 *size_out,
- __u32 *retval, __u32 *duration);
LIBBPF_API int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id);
LIBBPF_API int bpf_map_get_next_id(__u32 start_id, __u32 *next_id);
LIBBPF_API int bpf_btf_get_next_id(__u32 start_id, __u32 *next_id);
@@ -442,9 +370,24 @@ LIBBPF_API int bpf_map_get_fd_by_id(__u32 id);
LIBBPF_API int bpf_btf_get_fd_by_id(__u32 id);
LIBBPF_API int bpf_link_get_fd_by_id(__u32 id);
LIBBPF_API int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len);
+
+struct bpf_prog_query_opts {
+ size_t sz; /* size of this struct for forward/backward compatibility */
+ __u32 query_flags;
+ __u32 attach_flags; /* output argument */
+ __u32 *prog_ids;
+ __u32 prog_cnt; /* input+output argument */
+ __u32 *prog_attach_flags;
+};
+#define bpf_prog_query_opts__last_field prog_attach_flags
+
+LIBBPF_API int bpf_prog_query_opts(int target_fd,
+ enum bpf_attach_type type,
+ struct bpf_prog_query_opts *opts);
LIBBPF_API int bpf_prog_query(int target_fd, enum bpf_attach_type type,
__u32 query_flags, __u32 *attach_flags,
__u32 *prog_ids, __u32 *prog_cnt);
+
LIBBPF_API int bpf_raw_tracepoint_open(const char *name, int prog_fd);
LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf,
__u32 *buf_len, __u32 *prog_id, __u32 *fd_type,
diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h
index fd48b1ff59ca..496e6a8ee0dc 100644
--- a/tools/lib/bpf/bpf_core_read.h
+++ b/tools/lib/bpf/bpf_core_read.h
@@ -29,6 +29,7 @@ enum bpf_type_id_kind {
enum bpf_type_info_kind {
BPF_TYPE_EXISTS = 0, /* type existence in target kernel */
BPF_TYPE_SIZE = 1, /* type size in target kernel */
+ BPF_TYPE_MATCHES = 2, /* type match in target kernel */
};
/* second argument to __builtin_preserve_enum_value() built-in */
@@ -184,6 +185,16 @@ enum bpf_enum_value_kind {
__builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_EXISTS)
/*
+ * Convenience macro to check that provided named type
+ * (struct/union/enum/typedef) "matches" that in a target kernel.
+ * Returns:
+ * 1, if the type matches in the target kernel's BTF;
+ * 0, if the type does not match any in the target kernel
+ */
+#define bpf_core_type_matches(type) \
+ __builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_MATCHES)
+
+/*
* Convenience macro to get the byte size of a provided named type
* (struct/union/enum/typedef) in a target kernel.
* Returns:
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index fb04eaf367f1..7349b16b8e2f 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -22,12 +22,25 @@
* To allow use of SEC() with externs (e.g., for extern .maps declarations),
* make sure __attribute__((unused)) doesn't trigger compilation warning.
*/
+#if __GNUC__ && !__clang__
+
+/*
+ * Pragma macros are broken on GCC
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=55578
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90400
+ */
+#define SEC(name) __attribute__((section(name), used))
+
+#else
+
#define SEC(name) \
_Pragma("GCC diagnostic push") \
_Pragma("GCC diagnostic ignored \"-Wignored-attributes\"") \
__attribute__((section(name), used)) \
_Pragma("GCC diagnostic pop") \
+#endif
+
/* Avoid 'linux/stddef.h' definition of '__always_inline'. */
#undef __always_inline
#define __always_inline inline __attribute__((always_inline))
diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h
index 01ce121c302d..43ca3aff2292 100644
--- a/tools/lib/bpf/bpf_tracing.h
+++ b/tools/lib/bpf/bpf_tracing.h
@@ -2,6 +2,8 @@
#ifndef __BPF_TRACING_H__
#define __BPF_TRACING_H__
+#include <bpf/bpf_helpers.h>
+
/* Scan the ARCH passed in from ARCH env variable (see Makefile) */
#if defined(__TARGET_ARCH_x86)
#define bpf_target_x86
@@ -140,7 +142,7 @@ struct pt_regs___s390 {
#define __PT_RC_REG gprs[2]
#define __PT_SP_REG gprs[15]
#define __PT_IP_REG psw.addr
-#define PT_REGS_PARM1_SYSCALL(x) ({ _Pragma("GCC error \"use PT_REGS_PARM1_CORE_SYSCALL() instead\""); 0l; })
+#define PT_REGS_PARM1_SYSCALL(x) PT_REGS_PARM1_CORE_SYSCALL(x)
#define PT_REGS_PARM1_CORE_SYSCALL(x) BPF_CORE_READ((const struct pt_regs___s390 *)(x), orig_gpr2)
#elif defined(bpf_target_arm)
@@ -174,7 +176,7 @@ struct pt_regs___arm64 {
#define __PT_RC_REG regs[0]
#define __PT_SP_REG sp
#define __PT_IP_REG pc
-#define PT_REGS_PARM1_SYSCALL(x) ({ _Pragma("GCC error \"use PT_REGS_PARM1_CORE_SYSCALL() instead\""); 0l; })
+#define PT_REGS_PARM1_SYSCALL(x) PT_REGS_PARM1_CORE_SYSCALL(x)
#define PT_REGS_PARM1_CORE_SYSCALL(x) BPF_CORE_READ((const struct pt_regs___arm64 *)(x), orig_x0)
#elif defined(bpf_target_mips)
@@ -233,7 +235,7 @@ struct pt_regs___arm64 {
#define __PT_PARM5_REG a4
#define __PT_RET_REG ra
#define __PT_FP_REG s0
-#define __PT_RC_REG a5
+#define __PT_RC_REG a0
#define __PT_SP_REG sp
#define __PT_IP_REG pc
/* riscv does not select ARCH_HAS_SYSCALL_WRAPPER. */
@@ -493,39 +495,69 @@ typeof(name(0)) name(struct pt_regs *ctx) \
} \
static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args)
+/* If kernel has CONFIG_ARCH_HAS_SYSCALL_WRAPPER, read pt_regs directly */
#define ___bpf_syscall_args0() ctx
-#define ___bpf_syscall_args1(x) ___bpf_syscall_args0(), (void *)PT_REGS_PARM1_CORE_SYSCALL(regs)
-#define ___bpf_syscall_args2(x, args...) ___bpf_syscall_args1(args), (void *)PT_REGS_PARM2_CORE_SYSCALL(regs)
-#define ___bpf_syscall_args3(x, args...) ___bpf_syscall_args2(args), (void *)PT_REGS_PARM3_CORE_SYSCALL(regs)
-#define ___bpf_syscall_args4(x, args...) ___bpf_syscall_args3(args), (void *)PT_REGS_PARM4_CORE_SYSCALL(regs)
-#define ___bpf_syscall_args5(x, args...) ___bpf_syscall_args4(args), (void *)PT_REGS_PARM5_CORE_SYSCALL(regs)
+#define ___bpf_syscall_args1(x) ___bpf_syscall_args0(), (void *)PT_REGS_PARM1_SYSCALL(regs)
+#define ___bpf_syscall_args2(x, args...) ___bpf_syscall_args1(args), (void *)PT_REGS_PARM2_SYSCALL(regs)
+#define ___bpf_syscall_args3(x, args...) ___bpf_syscall_args2(args), (void *)PT_REGS_PARM3_SYSCALL(regs)
+#define ___bpf_syscall_args4(x, args...) ___bpf_syscall_args3(args), (void *)PT_REGS_PARM4_SYSCALL(regs)
+#define ___bpf_syscall_args5(x, args...) ___bpf_syscall_args4(args), (void *)PT_REGS_PARM5_SYSCALL(regs)
#define ___bpf_syscall_args(args...) ___bpf_apply(___bpf_syscall_args, ___bpf_narg(args))(args)
+/* If kernel doesn't have CONFIG_ARCH_HAS_SYSCALL_WRAPPER, we have to BPF_CORE_READ from pt_regs */
+#define ___bpf_syswrap_args0() ctx
+#define ___bpf_syswrap_args1(x) ___bpf_syswrap_args0(), (void *)PT_REGS_PARM1_CORE_SYSCALL(regs)
+#define ___bpf_syswrap_args2(x, args...) ___bpf_syswrap_args1(args), (void *)PT_REGS_PARM2_CORE_SYSCALL(regs)
+#define ___bpf_syswrap_args3(x, args...) ___bpf_syswrap_args2(args), (void *)PT_REGS_PARM3_CORE_SYSCALL(regs)
+#define ___bpf_syswrap_args4(x, args...) ___bpf_syswrap_args3(args), (void *)PT_REGS_PARM4_CORE_SYSCALL(regs)
+#define ___bpf_syswrap_args5(x, args...) ___bpf_syswrap_args4(args), (void *)PT_REGS_PARM5_CORE_SYSCALL(regs)
+#define ___bpf_syswrap_args(args...) ___bpf_apply(___bpf_syswrap_args, ___bpf_narg(args))(args)
+
/*
- * BPF_KPROBE_SYSCALL is a variant of BPF_KPROBE, which is intended for
+ * BPF_KSYSCALL is a variant of BPF_KPROBE, which is intended for
* tracing syscall functions, like __x64_sys_close. It hides the underlying
* platform-specific low-level way of getting syscall input arguments from
* struct pt_regs, and provides a familiar typed and named function arguments
* syntax and semantics of accessing syscall input parameters.
*
- * Original struct pt_regs* context is preserved as 'ctx' argument. This might
+ * Original struct pt_regs * context is preserved as 'ctx' argument. This might
* be necessary when using BPF helpers like bpf_perf_event_output().
*
- * This macro relies on BPF CO-RE support.
+ * At the moment BPF_KSYSCALL does not transparently handle all the calling
+ * convention quirks for the following syscalls:
+ *
+ * - mmap(): __ARCH_WANT_SYS_OLD_MMAP.
+ * - clone(): CONFIG_CLONE_BACKWARDS, CONFIG_CLONE_BACKWARDS2 and
+ * CONFIG_CLONE_BACKWARDS3.
+ * - socket-related syscalls: __ARCH_WANT_SYS_SOCKETCALL.
+ * - compat syscalls.
+ *
+ * This may or may not change in the future. User needs to take extra measures
+ * to handle such quirks explicitly, if necessary.
+ *
+ * This macro relies on BPF CO-RE support and virtual __kconfig externs.
*/
-#define BPF_KPROBE_SYSCALL(name, args...) \
+#define BPF_KSYSCALL(name, args...) \
name(struct pt_regs *ctx); \
+extern _Bool LINUX_HAS_SYSCALL_WRAPPER __kconfig; \
static __attribute__((always_inline)) typeof(name(0)) \
____##name(struct pt_regs *ctx, ##args); \
typeof(name(0)) name(struct pt_regs *ctx) \
{ \
- struct pt_regs *regs = PT_REGS_SYSCALL_REGS(ctx); \
+ struct pt_regs *regs = LINUX_HAS_SYSCALL_WRAPPER \
+ ? (struct pt_regs *)PT_REGS_PARM1(ctx) \
+ : ctx; \
_Pragma("GCC diagnostic push") \
_Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
- return ____##name(___bpf_syscall_args(args)); \
+ if (LINUX_HAS_SYSCALL_WRAPPER) \
+ return ____##name(___bpf_syswrap_args(args)); \
+ else \
+ return ____##name(___bpf_syscall_args(args)); \
_Pragma("GCC diagnostic pop") \
} \
static __attribute__((always_inline)) typeof(name(0)) \
____##name(struct pt_regs *ctx, ##args)
+#define BPF_KPROBE_SYSCALL BPF_KSYSCALL
+
#endif
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index bb1e06eb1eca..2d14f1a52d7a 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -130,7 +130,7 @@ static inline __u64 ptr_to_u64(const void *ptr)
/* Ensure given dynamically allocated memory region pointed to by *data* with
* capacity of *cap_cnt* elements each taking *elem_sz* bytes has enough
- * memory to accomodate *add_cnt* new elements, assuming *cur_cnt* elements
+ * memory to accommodate *add_cnt* new elements, assuming *cur_cnt* elements
* are already used. At most *max_cnt* elements can be ever allocated.
* If necessary, memory is reallocated and all existing data is copied over,
* new pointer to the memory region is stored at *data, new memory region
@@ -305,6 +305,8 @@ static int btf_type_size(const struct btf_type *t)
return base_size + sizeof(__u32);
case BTF_KIND_ENUM:
return base_size + vlen * sizeof(struct btf_enum);
+ case BTF_KIND_ENUM64:
+ return base_size + vlen * sizeof(struct btf_enum64);
case BTF_KIND_ARRAY:
return base_size + sizeof(struct btf_array);
case BTF_KIND_STRUCT:
@@ -334,6 +336,7 @@ static void btf_bswap_type_base(struct btf_type *t)
static int btf_bswap_type_rest(struct btf_type *t)
{
struct btf_var_secinfo *v;
+ struct btf_enum64 *e64;
struct btf_member *m;
struct btf_array *a;
struct btf_param *p;
@@ -361,6 +364,13 @@ static int btf_bswap_type_rest(struct btf_type *t)
e->val = bswap_32(e->val);
}
return 0;
+ case BTF_KIND_ENUM64:
+ for (i = 0, e64 = btf_enum64(t); i < vlen; i++, e64++) {
+ e64->name_off = bswap_32(e64->name_off);
+ e64->val_lo32 = bswap_32(e64->val_lo32);
+ e64->val_hi32 = bswap_32(e64->val_hi32);
+ }
+ return 0;
case BTF_KIND_ARRAY:
a = btf_array(t);
a->type = bswap_32(a->type);
@@ -438,11 +448,6 @@ static int btf_parse_type_sec(struct btf *btf)
return 0;
}
-__u32 btf__get_nr_types(const struct btf *btf)
-{
- return btf->start_id + btf->nr_types - 1;
-}
-
__u32 btf__type_cnt(const struct btf *btf)
{
return btf->start_id + btf->nr_types;
@@ -472,9 +477,22 @@ const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 type_id)
static int determine_ptr_size(const struct btf *btf)
{
+ static const char * const long_aliases[] = {
+ "long",
+ "long int",
+ "int long",
+ "unsigned long",
+ "long unsigned",
+ "unsigned long int",
+ "unsigned int long",
+ "long unsigned int",
+ "long int unsigned",
+ "int unsigned long",
+ "int long unsigned",
+ };
const struct btf_type *t;
const char *name;
- int i, n;
+ int i, j, n;
if (btf->base_btf && btf->base_btf->ptr_sz > 0)
return btf->base_btf->ptr_sz;
@@ -485,15 +503,16 @@ static int determine_ptr_size(const struct btf *btf)
if (!btf_is_int(t))
continue;
+ if (t->size != 4 && t->size != 8)
+ continue;
+
name = btf__name_by_offset(btf, t->name_off);
if (!name)
continue;
- if (strcmp(name, "long int") == 0 ||
- strcmp(name, "long unsigned int") == 0) {
- if (t->size != 4 && t->size != 8)
- continue;
- return t->size;
+ for (j = 0; j < ARRAY_SIZE(long_aliases); j++) {
+ if (strcmp(name, long_aliases[j]) == 0)
+ return t->size;
}
}
@@ -597,6 +616,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
case BTF_KIND_STRUCT:
case BTF_KIND_UNION:
case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
case BTF_KIND_DATASEC:
case BTF_KIND_FLOAT:
size = t->size;
@@ -644,6 +664,7 @@ int btf__align_of(const struct btf *btf, __u32 id)
switch (kind) {
case BTF_KIND_INT:
case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
case BTF_KIND_FLOAT:
return min(btf_ptr_sz(btf), (size_t)t->size);
case BTF_KIND_PTR:
@@ -1382,92 +1403,6 @@ struct btf *btf__load_from_kernel_by_id(__u32 id)
return btf__load_from_kernel_by_id_split(id, NULL);
}
-int btf__get_from_id(__u32 id, struct btf **btf)
-{
- struct btf *res;
- int err;
-
- *btf = NULL;
- res = btf__load_from_kernel_by_id(id);
- err = libbpf_get_error(res);
-
- if (err)
- return libbpf_err(err);
-
- *btf = res;
- return 0;
-}
-
-int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
- __u32 expected_key_size, __u32 expected_value_size,
- __u32 *key_type_id, __u32 *value_type_id)
-{
- const struct btf_type *container_type;
- const struct btf_member *key, *value;
- const size_t max_name = 256;
- char container_name[max_name];
- __s64 key_size, value_size;
- __s32 container_id;
-
- if (snprintf(container_name, max_name, "____btf_map_%s", map_name) == max_name) {
- pr_warn("map:%s length of '____btf_map_%s' is too long\n",
- map_name, map_name);
- return libbpf_err(-EINVAL);
- }
-
- container_id = btf__find_by_name(btf, container_name);
- if (container_id < 0) {
- pr_debug("map:%s container_name:%s cannot be found in BTF. Missing BPF_ANNOTATE_KV_PAIR?\n",
- map_name, container_name);
- return libbpf_err(container_id);
- }
-
- container_type = btf__type_by_id(btf, container_id);
- if (!container_type) {
- pr_warn("map:%s cannot find BTF type for container_id:%u\n",
- map_name, container_id);
- return libbpf_err(-EINVAL);
- }
-
- if (!btf_is_struct(container_type) || btf_vlen(container_type) < 2) {
- pr_warn("map:%s container_name:%s is an invalid container struct\n",
- map_name, container_name);
- return libbpf_err(-EINVAL);
- }
-
- key = btf_members(container_type);
- value = key + 1;
-
- key_size = btf__resolve_size(btf, key->type);
- if (key_size < 0) {
- pr_warn("map:%s invalid BTF key_type_size\n", map_name);
- return libbpf_err(key_size);
- }
-
- if (expected_key_size != key_size) {
- pr_warn("map:%s btf_key_type_size:%u != map_def_key_size:%u\n",
- map_name, (__u32)key_size, expected_key_size);
- return libbpf_err(-EINVAL);
- }
-
- value_size = btf__resolve_size(btf, value->type);
- if (value_size < 0) {
- pr_warn("map:%s invalid BTF value_type_size\n", map_name);
- return libbpf_err(value_size);
- }
-
- if (expected_value_size != value_size) {
- pr_warn("map:%s btf_value_type_size:%u != map_def_value_size:%u\n",
- map_name, (__u32)value_size, expected_value_size);
- return libbpf_err(-EINVAL);
- }
-
- *key_type_id = key->type;
- *value_type_id = value->type;
-
- return 0;
-}
-
static void btf_invalidate_raw_data(struct btf *btf)
{
if (btf->raw_data) {
@@ -2115,20 +2050,8 @@ int btf__add_field(struct btf *btf, const char *name, int type_id,
return 0;
}
-/*
- * Append new BTF_KIND_ENUM type with:
- * - *name* - name of the enum, can be NULL or empty for anonymous enums;
- * - *byte_sz* - size of the enum, in bytes.
- *
- * Enum initially has no enum values in it (and corresponds to enum forward
- * declaration). Enumerator values can be added by btf__add_enum_value()
- * immediately after btf__add_enum() succeeds.
- *
- * Returns:
- * - >0, type ID of newly added BTF type;
- * - <0, on error.
- */
-int btf__add_enum(struct btf *btf, const char *name, __u32 byte_sz)
+static int btf_add_enum_common(struct btf *btf, const char *name, __u32 byte_sz,
+ bool is_signed, __u8 kind)
{
struct btf_type *t;
int sz, name_off = 0;
@@ -2153,13 +2076,35 @@ int btf__add_enum(struct btf *btf, const char *name, __u32 byte_sz)
/* start out with vlen=0; it will be adjusted when adding enum values */
t->name_off = name_off;
- t->info = btf_type_info(BTF_KIND_ENUM, 0, 0);
+ t->info = btf_type_info(kind, 0, is_signed);
t->size = byte_sz;
return btf_commit_type(btf, sz);
}
/*
+ * Append new BTF_KIND_ENUM type with:
+ * - *name* - name of the enum, can be NULL or empty for anonymous enums;
+ * - *byte_sz* - size of the enum, in bytes.
+ *
+ * Enum initially has no enum values in it (and corresponds to enum forward
+ * declaration). Enumerator values can be added by btf__add_enum_value()
+ * immediately after btf__add_enum() succeeds.
+ *
+ * Returns:
+ * - >0, type ID of newly added BTF type;
+ * - <0, on error.
+ */
+int btf__add_enum(struct btf *btf, const char *name, __u32 byte_sz)
+{
+ /*
+ * set the signedness to be unsigned, it will change to signed
+ * if any later enumerator is negative.
+ */
+ return btf_add_enum_common(btf, name, byte_sz, false, BTF_KIND_ENUM);
+}
+
+/*
* Append new enum value for the current ENUM type with:
* - *name* - name of the enumerator value, can't be NULL or empty;
* - *value* - integer value corresponding to enum value *name*;
@@ -2206,6 +2151,82 @@ int btf__add_enum_value(struct btf *btf, const char *name, __s64 value)
t = btf_last_type(btf);
btf_type_inc_vlen(t);
+ /* if negative value, set signedness to signed */
+ if (value < 0)
+ t->info = btf_type_info(btf_kind(t), btf_vlen(t), true);
+
+ btf->hdr->type_len += sz;
+ btf->hdr->str_off += sz;
+ return 0;
+}
+
+/*
+ * Append new BTF_KIND_ENUM64 type with:
+ * - *name* - name of the enum, can be NULL or empty for anonymous enums;
+ * - *byte_sz* - size of the enum, in bytes.
+ * - *is_signed* - whether the enum values are signed or not;
+ *
+ * Enum initially has no enum values in it (and corresponds to enum forward
+ * declaration). Enumerator values can be added by btf__add_enum64_value()
+ * immediately after btf__add_enum64() succeeds.
+ *
+ * Returns:
+ * - >0, type ID of newly added BTF type;
+ * - <0, on error.
+ */
+int btf__add_enum64(struct btf *btf, const char *name, __u32 byte_sz,
+ bool is_signed)
+{
+ return btf_add_enum_common(btf, name, byte_sz, is_signed,
+ BTF_KIND_ENUM64);
+}
+
+/*
+ * Append new enum value for the current ENUM64 type with:
+ * - *name* - name of the enumerator value, can't be NULL or empty;
+ * - *value* - integer value corresponding to enum value *name*;
+ * Returns:
+ * - 0, on success;
+ * - <0, on error.
+ */
+int btf__add_enum64_value(struct btf *btf, const char *name, __u64 value)
+{
+ struct btf_enum64 *v;
+ struct btf_type *t;
+ int sz, name_off;
+
+ /* last type should be BTF_KIND_ENUM64 */
+ if (btf->nr_types == 0)
+ return libbpf_err(-EINVAL);
+ t = btf_last_type(btf);
+ if (!btf_is_enum64(t))
+ return libbpf_err(-EINVAL);
+
+ /* non-empty name */
+ if (!name || !name[0])
+ return libbpf_err(-EINVAL);
+
+ /* decompose and invalidate raw data */
+ if (btf_ensure_modifiable(btf))
+ return libbpf_err(-ENOMEM);
+
+ sz = sizeof(struct btf_enum64);
+ v = btf_add_type_mem(btf, sz);
+ if (!v)
+ return libbpf_err(-ENOMEM);
+
+ name_off = btf__add_str(btf, name);
+ if (name_off < 0)
+ return name_off;
+
+ v->name_off = name_off;
+ v->val_lo32 = (__u32)value;
+ v->val_hi32 = value >> 32;
+
+ /* update parent type's vlen */
+ t = btf_last_type(btf);
+ btf_type_inc_vlen(t);
+
btf->hdr->type_len += sz;
btf->hdr->str_off += sz;
return 0;
@@ -2853,81 +2874,6 @@ const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, __u32 *size)
return btf_ext->data;
}
-static int btf_ext_reloc_info(const struct btf *btf,
- const struct btf_ext_info *ext_info,
- const char *sec_name, __u32 insns_cnt,
- void **info, __u32 *cnt)
-{
- __u32 sec_hdrlen = sizeof(struct btf_ext_info_sec);
- __u32 i, record_size, existing_len, records_len;
- struct btf_ext_info_sec *sinfo;
- const char *info_sec_name;
- __u64 remain_len;
- void *data;
-
- record_size = ext_info->rec_size;
- sinfo = ext_info->info;
- remain_len = ext_info->len;
- while (remain_len > 0) {
- records_len = sinfo->num_info * record_size;
- info_sec_name = btf__name_by_offset(btf, sinfo->sec_name_off);
- if (strcmp(info_sec_name, sec_name)) {
- remain_len -= sec_hdrlen + records_len;
- sinfo = (void *)sinfo + sec_hdrlen + records_len;
- continue;
- }
-
- existing_len = (*cnt) * record_size;
- data = realloc(*info, existing_len + records_len);
- if (!data)
- return libbpf_err(-ENOMEM);
-
- memcpy(data + existing_len, sinfo->data, records_len);
- /* adjust insn_off only, the rest data will be passed
- * to the kernel.
- */
- for (i = 0; i < sinfo->num_info; i++) {
- __u32 *insn_off;
-
- insn_off = data + existing_len + (i * record_size);
- *insn_off = *insn_off / sizeof(struct bpf_insn) + insns_cnt;
- }
- *info = data;
- *cnt += sinfo->num_info;
- return 0;
- }
-
- return libbpf_err(-ENOENT);
-}
-
-int btf_ext__reloc_func_info(const struct btf *btf,
- const struct btf_ext *btf_ext,
- const char *sec_name, __u32 insns_cnt,
- void **func_info, __u32 *cnt)
-{
- return btf_ext_reloc_info(btf, &btf_ext->func_info, sec_name,
- insns_cnt, func_info, cnt);
-}
-
-int btf_ext__reloc_line_info(const struct btf *btf,
- const struct btf_ext *btf_ext,
- const char *sec_name, __u32 insns_cnt,
- void **line_info, __u32 *cnt)
-{
- return btf_ext_reloc_info(btf, &btf_ext->line_info, sec_name,
- insns_cnt, line_info, cnt);
-}
-
-__u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext)
-{
- return btf_ext->func_info.rec_size;
-}
-
-__u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext)
-{
- return btf_ext->line_info.rec_size;
-}
-
struct btf_dedup;
static struct btf_dedup *btf_dedup_new(struct btf *btf, const struct btf_dedup_opts *opts);
@@ -3077,9 +3023,7 @@ static int btf_dedup_remap_types(struct btf_dedup *d);
* deduplicating structs/unions is described in greater details in comments for
* `btf_dedup_is_equiv` function.
*/
-
-DEFAULT_VERSION(btf__dedup_v0_6_0, btf__dedup, LIBBPF_0.6.0)
-int btf__dedup_v0_6_0(struct btf *btf, const struct btf_dedup_opts *opts)
+int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts)
{
struct btf_dedup *d;
int err;
@@ -3139,19 +3083,6 @@ done:
return libbpf_err(err);
}
-COMPAT_VERSION(btf__dedup_deprecated, btf__dedup, LIBBPF_0.0.2)
-int btf__dedup_deprecated(struct btf *btf, struct btf_ext *btf_ext, const void *unused_opts)
-{
- LIBBPF_OPTS(btf_dedup_opts, opts, .btf_ext = btf_ext);
-
- if (unused_opts) {
- pr_warn("please use new version of btf__dedup() that supports options\n");
- return libbpf_err(-ENOTSUP);
- }
-
- return btf__dedup(btf, &opts);
-}
-
#define BTF_UNPROCESSED_ID ((__u32)-1)
#define BTF_IN_PROGRESS_ID ((__u32)-2)
@@ -3470,7 +3401,7 @@ static bool btf_equal_int_tag(struct btf_type *t1, struct btf_type *t2)
return info1 == info2;
}
-/* Calculate type signature hash of ENUM. */
+/* Calculate type signature hash of ENUM/ENUM64. */
static long btf_hash_enum(struct btf_type *t)
{
long h;
@@ -3504,9 +3435,31 @@ static bool btf_equal_enum(struct btf_type *t1, struct btf_type *t2)
return true;
}
+static bool btf_equal_enum64(struct btf_type *t1, struct btf_type *t2)
+{
+ const struct btf_enum64 *m1, *m2;
+ __u16 vlen;
+ int i;
+
+ if (!btf_equal_common(t1, t2))
+ return false;
+
+ vlen = btf_vlen(t1);
+ m1 = btf_enum64(t1);
+ m2 = btf_enum64(t2);
+ for (i = 0; i < vlen; i++) {
+ if (m1->name_off != m2->name_off || m1->val_lo32 != m2->val_lo32 ||
+ m1->val_hi32 != m2->val_hi32)
+ return false;
+ m1++;
+ m2++;
+ }
+ return true;
+}
+
static inline bool btf_is_enum_fwd(struct btf_type *t)
{
- return btf_is_enum(t) && btf_vlen(t) == 0;
+ return btf_is_any_enum(t) && btf_vlen(t) == 0;
}
static bool btf_compat_enum(struct btf_type *t1, struct btf_type *t2)
@@ -3519,6 +3472,17 @@ static bool btf_compat_enum(struct btf_type *t1, struct btf_type *t2)
t1->size == t2->size;
}
+static bool btf_compat_enum64(struct btf_type *t1, struct btf_type *t2)
+{
+ if (!btf_is_enum_fwd(t1) && !btf_is_enum_fwd(t2))
+ return btf_equal_enum64(t1, t2);
+
+ /* ignore vlen when comparing */
+ return t1->name_off == t2->name_off &&
+ (t1->info & ~0xffff) == (t2->info & ~0xffff) &&
+ t1->size == t2->size;
+}
+
/*
* Calculate type signature hash of STRUCT/UNION, ignoring referenced type IDs,
* as referenced type IDs equivalence is established separately during type
@@ -3731,6 +3695,7 @@ static int btf_dedup_prep(struct btf_dedup *d)
h = btf_hash_int_decl_tag(t);
break;
case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
h = btf_hash_enum(t);
break;
case BTF_KIND_STRUCT:
@@ -3820,6 +3785,27 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
}
break;
+ case BTF_KIND_ENUM64:
+ h = btf_hash_enum(t);
+ for_each_dedup_cand(d, hash_entry, h) {
+ cand_id = (__u32)(long)hash_entry->value;
+ cand = btf_type_by_id(d->btf, cand_id);
+ if (btf_equal_enum64(t, cand)) {
+ new_id = cand_id;
+ break;
+ }
+ if (btf_compat_enum64(t, cand)) {
+ if (btf_is_enum_fwd(t)) {
+ /* resolve fwd to full enum */
+ new_id = cand_id;
+ break;
+ }
+ /* resolve canonical enum fwd to full enum */
+ d->map[cand_id] = type_id;
+ }
+ }
+ break;
+
case BTF_KIND_FWD:
case BTF_KIND_FLOAT:
h = btf_hash_common(t);
@@ -4115,6 +4101,9 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
case BTF_KIND_ENUM:
return btf_compat_enum(cand_type, canon_type);
+ case BTF_KIND_ENUM64:
+ return btf_compat_enum64(cand_type, canon_type);
+
case BTF_KIND_FWD:
case BTF_KIND_FLOAT:
return btf_equal_common(cand_type, canon_type);
@@ -4717,6 +4706,7 @@ int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ct
case BTF_KIND_INT:
case BTF_KIND_FLOAT:
case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
return 0;
case BTF_KIND_FWD:
@@ -4811,6 +4801,16 @@ int btf_type_visit_str_offs(struct btf_type *t, str_off_visit_fn visit, void *ct
}
break;
}
+ case BTF_KIND_ENUM64: {
+ struct btf_enum64 *m = btf_enum64(t);
+
+ for (i = 0, n = btf_vlen(t); i < n; i++, m++) {
+ err = visit(&m->name_off, ctx);
+ if (err)
+ return err;
+ }
+ break;
+ }
case BTF_KIND_FUNC_PROTO: {
struct btf_param *m = btf_params(t);
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 951ac7475794..583760df83b4 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -120,20 +120,12 @@ LIBBPF_API struct btf *libbpf_find_kernel_btf(void);
LIBBPF_API struct btf *btf__load_from_kernel_by_id(__u32 id);
LIBBPF_API struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf);
-LIBBPF_DEPRECATED_SINCE(0, 6, "use btf__load_from_kernel_by_id instead")
-LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf);
-LIBBPF_DEPRECATED_SINCE(0, 6, "intended for internal libbpf use only")
-LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf);
-LIBBPF_DEPRECATED_SINCE(0, 6, "use btf__load_into_kernel instead")
-LIBBPF_API int btf__load(struct btf *btf);
LIBBPF_API int btf__load_into_kernel(struct btf *btf);
LIBBPF_API __s32 btf__find_by_name(const struct btf *btf,
const char *type_name);
LIBBPF_API __s32 btf__find_by_name_kind(const struct btf *btf,
const char *type_name, __u32 kind);
-LIBBPF_DEPRECATED_SINCE(0, 7, "use btf__type_cnt() instead; note that btf__get_nr_types() == btf__type_cnt() - 1")
-LIBBPF_API __u32 btf__get_nr_types(const struct btf *btf);
LIBBPF_API __u32 btf__type_cnt(const struct btf *btf);
LIBBPF_API const struct btf *btf__base_btf(const struct btf *btf);
LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf,
@@ -150,29 +142,10 @@ LIBBPF_API void btf__set_fd(struct btf *btf, int fd);
LIBBPF_API const void *btf__raw_data(const struct btf *btf, __u32 *size);
LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset);
LIBBPF_API const char *btf__str_by_offset(const struct btf *btf, __u32 offset);
-LIBBPF_DEPRECATED_SINCE(0, 7, "this API is not necessary when BTF-defined maps are used")
-LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
- __u32 expected_key_size,
- __u32 expected_value_size,
- __u32 *key_type_id, __u32 *value_type_id);
LIBBPF_API struct btf_ext *btf_ext__new(const __u8 *data, __u32 size);
LIBBPF_API void btf_ext__free(struct btf_ext *btf_ext);
LIBBPF_API const void *btf_ext__raw_data(const struct btf_ext *btf_ext, __u32 *size);
-LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_func_info was never meant as a public API and has wrong assumptions embedded in it; it will be removed in the future libbpf versions")
-int btf_ext__reloc_func_info(const struct btf *btf,
- const struct btf_ext *btf_ext,
- const char *sec_name, __u32 insns_cnt,
- void **func_info, __u32 *cnt);
-LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_line_info was never meant as a public API and has wrong assumptions embedded in it; it will be removed in the future libbpf versions")
-int btf_ext__reloc_line_info(const struct btf *btf,
- const struct btf_ext *btf_ext,
- const char *sec_name, __u32 insns_cnt,
- void **line_info, __u32 *cnt);
-LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_func_info is deprecated; write custom func_info parsing to fetch rec_size")
-__u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext);
-LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_line_info is deprecated; write custom line_info parsing to fetch rec_size")
-__u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext);
LIBBPF_API int btf__find_str(struct btf *btf, const char *s);
LIBBPF_API int btf__add_str(struct btf *btf, const char *s);
@@ -215,6 +188,8 @@ LIBBPF_API int btf__add_field(struct btf *btf, const char *name, int field_type_
/* enum construction APIs */
LIBBPF_API int btf__add_enum(struct btf *btf, const char *name, __u32 bytes_sz);
LIBBPF_API int btf__add_enum_value(struct btf *btf, const char *name, __s64 value);
+LIBBPF_API int btf__add_enum64(struct btf *btf, const char *name, __u32 bytes_sz, bool is_signed);
+LIBBPF_API int btf__add_enum64_value(struct btf *btf, const char *name, __u64 value);
enum btf_fwd_kind {
BTF_FWD_STRUCT = 0,
@@ -257,22 +232,12 @@ struct btf_dedup_opts {
LIBBPF_API int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts);
-LIBBPF_API int btf__dedup_v0_6_0(struct btf *btf, const struct btf_dedup_opts *opts);
-
-LIBBPF_DEPRECATED_SINCE(0, 7, "use btf__dedup() instead")
-LIBBPF_API int btf__dedup_deprecated(struct btf *btf, struct btf_ext *btf_ext, const void *opts);
-#define btf__dedup(...) ___libbpf_overload(___btf_dedup, __VA_ARGS__)
-#define ___btf_dedup3(btf, btf_ext, opts) btf__dedup_deprecated(btf, btf_ext, opts)
-#define ___btf_dedup2(btf, opts) btf__dedup(btf, opts)
-
struct btf_dump;
struct btf_dump_opts {
- union {
- size_t sz;
- void *ctx; /* DEPRECATED: will be gone in v1.0 */
- };
+ size_t sz;
};
+#define btf_dump_opts__last_field sz
typedef void (*btf_dump_printf_fn_t)(void *ctx, const char *fmt, va_list args);
@@ -281,51 +246,6 @@ LIBBPF_API struct btf_dump *btf_dump__new(const struct btf *btf,
void *ctx,
const struct btf_dump_opts *opts);
-LIBBPF_API struct btf_dump *btf_dump__new_v0_6_0(const struct btf *btf,
- btf_dump_printf_fn_t printf_fn,
- void *ctx,
- const struct btf_dump_opts *opts);
-
-LIBBPF_API struct btf_dump *btf_dump__new_deprecated(const struct btf *btf,
- const struct btf_ext *btf_ext,
- const struct btf_dump_opts *opts,
- btf_dump_printf_fn_t printf_fn);
-
-/* Choose either btf_dump__new() or btf_dump__new_deprecated() based on the
- * type of 4th argument. If it's btf_dump's print callback, use deprecated
- * API; otherwise, choose the new btf_dump__new(). ___libbpf_override()
- * doesn't work here because both variants have 4 input arguments.
- *
- * (void *) casts are necessary to avoid compilation warnings about type
- * mismatches, because even though __builtin_choose_expr() only ever evaluates
- * one side the other side still has to satisfy type constraints (this is
- * compiler implementation limitation which might be lifted eventually,
- * according to the documentation). So passing struct btf_ext in place of
- * btf_dump_printf_fn_t would be generating compilation warning. Casting to
- * void * avoids this issue.
- *
- * Also, two type compatibility checks for a function and function pointer are
- * required because passing function reference into btf_dump__new() as
- * btf_dump__new(..., my_callback, ...) and as btf_dump__new(...,
- * &my_callback, ...) (not explicit ampersand in the latter case) actually
- * differs as far as __builtin_types_compatible_p() is concerned. Thus two
- * checks are combined to detect callback argument.
- *
- * The rest works just like in case of ___libbpf_override() usage with symbol
- * versioning.
- *
- * C++ compilers don't support __builtin_types_compatible_p(), so at least
- * don't screw up compilation for them and let C++ users pick btf_dump__new
- * vs btf_dump__new_deprecated explicitly.
- */
-#ifndef __cplusplus
-#define btf_dump__new(a1, a2, a3, a4) __builtin_choose_expr( \
- __builtin_types_compatible_p(typeof(a4), btf_dump_printf_fn_t) || \
- __builtin_types_compatible_p(typeof(a4), void(void *, const char *, va_list)), \
- btf_dump__new_deprecated((void *)a1, (void *)a2, (void *)a3, (void *)a4), \
- btf_dump__new((void *)a1, (void *)a2, (void *)a3, (void *)a4))
-#endif
-
LIBBPF_API void btf_dump__free(struct btf_dump *d);
LIBBPF_API int btf_dump__dump_type(struct btf_dump *d, __u32 id);
@@ -393,9 +313,10 @@ btf_dump__dump_type_data(struct btf_dump *d, __u32 id,
#ifndef BTF_KIND_FLOAT
#define BTF_KIND_FLOAT 16 /* Floating point */
#endif
-/* The kernel header switched to enums, so these two were never #defined */
+/* The kernel header switched to enums, so the following were never #defined */
#define BTF_KIND_DECL_TAG 17 /* Decl Tag */
#define BTF_KIND_TYPE_TAG 18 /* Type Tag */
+#define BTF_KIND_ENUM64 19 /* Enum for up-to 64bit values */
static inline __u16 btf_kind(const struct btf_type *t)
{
@@ -454,6 +375,11 @@ static inline bool btf_is_enum(const struct btf_type *t)
return btf_kind(t) == BTF_KIND_ENUM;
}
+static inline bool btf_is_enum64(const struct btf_type *t)
+{
+ return btf_kind(t) == BTF_KIND_ENUM64;
+}
+
static inline bool btf_is_fwd(const struct btf_type *t)
{
return btf_kind(t) == BTF_KIND_FWD;
@@ -524,6 +450,18 @@ static inline bool btf_is_type_tag(const struct btf_type *t)
return btf_kind(t) == BTF_KIND_TYPE_TAG;
}
+static inline bool btf_is_any_enum(const struct btf_type *t)
+{
+ return btf_is_enum(t) || btf_is_enum64(t);
+}
+
+static inline bool btf_kind_core_compat(const struct btf_type *t1,
+ const struct btf_type *t2)
+{
+ return btf_kind(t1) == btf_kind(t2) ||
+ (btf_is_any_enum(t1) && btf_is_any_enum(t2));
+}
+
static inline __u8 btf_int_encoding(const struct btf_type *t)
{
return BTF_INT_ENCODING(*(__u32 *)(t + 1));
@@ -549,6 +487,16 @@ static inline struct btf_enum *btf_enum(const struct btf_type *t)
return (struct btf_enum *)(t + 1);
}
+static inline struct btf_enum64 *btf_enum64(const struct btf_type *t)
+{
+ return (struct btf_enum64 *)(t + 1);
+}
+
+static inline __u64 btf_enum64_value(const struct btf_enum64 *e)
+{
+ return ((__u64)e->val_hi32 << 32) | e->val_lo32;
+}
+
static inline struct btf_member *btf_members(const struct btf_type *t)
{
return (struct btf_member *)(t + 1);
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index 6b1bc1f43728..627edb5bb6de 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -144,15 +144,17 @@ static void btf_dump_printf(const struct btf_dump *d, const char *fmt, ...)
static int btf_dump_mark_referenced(struct btf_dump *d);
static int btf_dump_resize(struct btf_dump *d);
-DEFAULT_VERSION(btf_dump__new_v0_6_0, btf_dump__new, LIBBPF_0.6.0)
-struct btf_dump *btf_dump__new_v0_6_0(const struct btf *btf,
- btf_dump_printf_fn_t printf_fn,
- void *ctx,
- const struct btf_dump_opts *opts)
+struct btf_dump *btf_dump__new(const struct btf *btf,
+ btf_dump_printf_fn_t printf_fn,
+ void *ctx,
+ const struct btf_dump_opts *opts)
{
struct btf_dump *d;
int err;
+ if (!OPTS_VALID(opts, btf_dump_opts))
+ return libbpf_err_ptr(-EINVAL);
+
if (!printf_fn)
return libbpf_err_ptr(-EINVAL);
@@ -188,17 +190,6 @@ err:
return libbpf_err_ptr(err);
}
-COMPAT_VERSION(btf_dump__new_deprecated, btf_dump__new, LIBBPF_0.0.4)
-struct btf_dump *btf_dump__new_deprecated(const struct btf *btf,
- const struct btf_ext *btf_ext,
- const struct btf_dump_opts *opts,
- btf_dump_printf_fn_t printf_fn)
-{
- if (!printf_fn)
- return libbpf_err_ptr(-EINVAL);
- return btf_dump__new_v0_6_0(btf, printf_fn, opts ? opts->ctx : NULL, opts);
-}
-
static int btf_dump_resize(struct btf_dump *d)
{
int err, last_id = btf__type_cnt(d->btf) - 1;
@@ -318,6 +309,7 @@ static int btf_dump_mark_referenced(struct btf_dump *d)
switch (btf_kind(t)) {
case BTF_KIND_INT:
case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
case BTF_KIND_FWD:
case BTF_KIND_FLOAT:
break;
@@ -538,6 +530,7 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)
return 1;
}
case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
case BTF_KIND_FWD:
/*
* non-anonymous or non-referenced enums are top-level
@@ -739,6 +732,7 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id)
tstate->emit_state = EMITTED;
break;
case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
if (top_level_def) {
btf_dump_emit_enum_def(d, id, t, 0);
btf_dump_printf(d, ";\n\n");
@@ -989,38 +983,81 @@ static void btf_dump_emit_enum_fwd(struct btf_dump *d, __u32 id,
btf_dump_printf(d, "enum %s", btf_dump_type_name(d, id));
}
-static void btf_dump_emit_enum_def(struct btf_dump *d, __u32 id,
- const struct btf_type *t,
- int lvl)
+static void btf_dump_emit_enum32_val(struct btf_dump *d,
+ const struct btf_type *t,
+ int lvl, __u16 vlen)
{
const struct btf_enum *v = btf_enum(t);
- __u16 vlen = btf_vlen(t);
+ bool is_signed = btf_kflag(t);
+ const char *fmt_str;
const char *name;
size_t dup_cnt;
int i;
+ for (i = 0; i < vlen; i++, v++) {
+ name = btf_name_of(d, v->name_off);
+ /* enumerators share namespace with typedef idents */
+ dup_cnt = btf_dump_name_dups(d, d->ident_names, name);
+ if (dup_cnt > 1) {
+ fmt_str = is_signed ? "\n%s%s___%zd = %d," : "\n%s%s___%zd = %u,";
+ btf_dump_printf(d, fmt_str, pfx(lvl + 1), name, dup_cnt, v->val);
+ } else {
+ fmt_str = is_signed ? "\n%s%s = %d," : "\n%s%s = %u,";
+ btf_dump_printf(d, fmt_str, pfx(lvl + 1), name, v->val);
+ }
+ }
+}
+
+static void btf_dump_emit_enum64_val(struct btf_dump *d,
+ const struct btf_type *t,
+ int lvl, __u16 vlen)
+{
+ const struct btf_enum64 *v = btf_enum64(t);
+ bool is_signed = btf_kflag(t);
+ const char *fmt_str;
+ const char *name;
+ size_t dup_cnt;
+ __u64 val;
+ int i;
+
+ for (i = 0; i < vlen; i++, v++) {
+ name = btf_name_of(d, v->name_off);
+ dup_cnt = btf_dump_name_dups(d, d->ident_names, name);
+ val = btf_enum64_value(v);
+ if (dup_cnt > 1) {
+ fmt_str = is_signed ? "\n%s%s___%zd = %lldLL,"
+ : "\n%s%s___%zd = %lluULL,";
+ btf_dump_printf(d, fmt_str,
+ pfx(lvl + 1), name, dup_cnt,
+ (unsigned long long)val);
+ } else {
+ fmt_str = is_signed ? "\n%s%s = %lldLL,"
+ : "\n%s%s = %lluULL,";
+ btf_dump_printf(d, fmt_str,
+ pfx(lvl + 1), name,
+ (unsigned long long)val);
+ }
+ }
+}
+static void btf_dump_emit_enum_def(struct btf_dump *d, __u32 id,
+ const struct btf_type *t,
+ int lvl)
+{
+ __u16 vlen = btf_vlen(t);
+
btf_dump_printf(d, "enum%s%s",
t->name_off ? " " : "",
btf_dump_type_name(d, id));
- if (vlen) {
- btf_dump_printf(d, " {");
- for (i = 0; i < vlen; i++, v++) {
- name = btf_name_of(d, v->name_off);
- /* enumerators share namespace with typedef idents */
- dup_cnt = btf_dump_name_dups(d, d->ident_names, name);
- if (dup_cnt > 1) {
- btf_dump_printf(d, "\n%s%s___%zu = %u,",
- pfx(lvl + 1), name, dup_cnt,
- (__u32)v->val);
- } else {
- btf_dump_printf(d, "\n%s%s = %u,",
- pfx(lvl + 1), name,
- (__u32)v->val);
- }
- }
- btf_dump_printf(d, "\n%s}", pfx(lvl));
- }
+ if (!vlen)
+ return;
+
+ btf_dump_printf(d, " {");
+ if (btf_is_enum(t))
+ btf_dump_emit_enum32_val(d, t, lvl, vlen);
+ else
+ btf_dump_emit_enum64_val(d, t, lvl, vlen);
+ btf_dump_printf(d, "\n%s}", pfx(lvl));
}
static void btf_dump_emit_fwd_def(struct btf_dump *d, __u32 id,
@@ -1178,6 +1215,7 @@ skip_mod:
break;
case BTF_KIND_INT:
case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
case BTF_KIND_FWD:
case BTF_KIND_STRUCT:
case BTF_KIND_UNION:
@@ -1312,6 +1350,7 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,
btf_dump_emit_struct_fwd(d, id, t);
break;
case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
btf_dump_emit_mods(d, decls);
/* inline anonymous enum */
if (t->name_off == 0 && !d->skip_anon_defs)
@@ -1988,7 +2027,8 @@ static int btf_dump_get_enum_value(struct btf_dump *d,
__u32 id,
__s64 *value)
{
- /* handle unaligned enum value */
+ bool is_signed = btf_kflag(t);
+
if (!ptr_is_aligned(d->btf, id, data)) {
__u64 val;
int err;
@@ -2005,13 +2045,13 @@ static int btf_dump_get_enum_value(struct btf_dump *d,
*value = *(__s64 *)data;
return 0;
case 4:
- *value = *(__s32 *)data;
+ *value = is_signed ? (__s64)*(__s32 *)data : *(__u32 *)data;
return 0;
case 2:
- *value = *(__s16 *)data;
+ *value = is_signed ? *(__s16 *)data : *(__u16 *)data;
return 0;
case 1:
- *value = *(__s8 *)data;
+ *value = is_signed ? *(__s8 *)data : *(__u8 *)data;
return 0;
default:
pr_warn("unexpected size %d for enum, id:[%u]\n", t->size, id);
@@ -2024,7 +2064,7 @@ static int btf_dump_enum_data(struct btf_dump *d,
__u32 id,
const void *data)
{
- const struct btf_enum *e;
+ bool is_signed;
__s64 value;
int i, err;
@@ -2032,14 +2072,31 @@ static int btf_dump_enum_data(struct btf_dump *d,
if (err)
return err;
- for (i = 0, e = btf_enum(t); i < btf_vlen(t); i++, e++) {
- if (value != e->val)
- continue;
- btf_dump_type_values(d, "%s", btf_name_of(d, e->name_off));
- return 0;
- }
+ is_signed = btf_kflag(t);
+ if (btf_is_enum(t)) {
+ const struct btf_enum *e;
+
+ for (i = 0, e = btf_enum(t); i < btf_vlen(t); i++, e++) {
+ if (value != e->val)
+ continue;
+ btf_dump_type_values(d, "%s", btf_name_of(d, e->name_off));
+ return 0;
+ }
- btf_dump_type_values(d, "%d", value);
+ btf_dump_type_values(d, is_signed ? "%d" : "%u", value);
+ } else {
+ const struct btf_enum64 *e;
+
+ for (i = 0, e = btf_enum64(t); i < btf_vlen(t); i++, e++) {
+ if (value != btf_enum64_value(e))
+ continue;
+ btf_dump_type_values(d, "%s", btf_name_of(d, e->name_off));
+ return 0;
+ }
+
+ btf_dump_type_values(d, is_signed ? "%lldLL" : "%lluULL",
+ (unsigned long long)value);
+ }
return 0;
}
@@ -2099,6 +2156,7 @@ static int btf_dump_type_data_check_overflow(struct btf_dump *d,
case BTF_KIND_FLOAT:
case BTF_KIND_PTR:
case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
if (data + bits_offset / 8 + size > d->typed_dump->data_end)
return -E2BIG;
break;
@@ -2203,6 +2261,7 @@ static int btf_dump_type_data_check_zero(struct btf_dump *d,
return -ENODATA;
}
case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
err = btf_dump_get_enum_value(d, t, data, id, &value);
if (err)
return err;
@@ -2275,6 +2334,7 @@ static int btf_dump_dump_type_data(struct btf_dump *d,
err = btf_dump_struct_data(d, t, id, data);
break;
case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
/* handle bitfield and int enum values */
if (bit_sz) {
__u64 print_num;
diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c
index 927745b08014..23f5c46708f8 100644
--- a/tools/lib/bpf/gen_loader.c
+++ b/tools/lib/bpf/gen_loader.c
@@ -533,7 +533,7 @@ void bpf_gen__record_attach_target(struct bpf_gen *gen, const char *attach_name,
gen->attach_kind = kind;
ret = snprintf(gen->attach_target, sizeof(gen->attach_target), "%s%s",
prefix, attach_name);
- if (ret == sizeof(gen->attach_target))
+ if (ret >= sizeof(gen->attach_target))
gen->error = -ENOSPC;
}
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index e89cc9c885b3..50d41815f431 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -31,7 +31,6 @@
#include <linux/bpf.h>
#include <linux/btf.h>
#include <linux/filter.h>
-#include <linux/list.h>
#include <linux/limits.h>
#include <linux/perf_event.h>
#include <linux/ring_buffer.h>
@@ -72,6 +71,135 @@
static struct bpf_map *bpf_object__add_map(struct bpf_object *obj);
static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog);
+static const char * const attach_type_name[] = {
+ [BPF_CGROUP_INET_INGRESS] = "cgroup_inet_ingress",
+ [BPF_CGROUP_INET_EGRESS] = "cgroup_inet_egress",
+ [BPF_CGROUP_INET_SOCK_CREATE] = "cgroup_inet_sock_create",
+ [BPF_CGROUP_INET_SOCK_RELEASE] = "cgroup_inet_sock_release",
+ [BPF_CGROUP_SOCK_OPS] = "cgroup_sock_ops",
+ [BPF_CGROUP_DEVICE] = "cgroup_device",
+ [BPF_CGROUP_INET4_BIND] = "cgroup_inet4_bind",
+ [BPF_CGROUP_INET6_BIND] = "cgroup_inet6_bind",
+ [BPF_CGROUP_INET4_CONNECT] = "cgroup_inet4_connect",
+ [BPF_CGROUP_INET6_CONNECT] = "cgroup_inet6_connect",
+ [BPF_CGROUP_INET4_POST_BIND] = "cgroup_inet4_post_bind",
+ [BPF_CGROUP_INET6_POST_BIND] = "cgroup_inet6_post_bind",
+ [BPF_CGROUP_INET4_GETPEERNAME] = "cgroup_inet4_getpeername",
+ [BPF_CGROUP_INET6_GETPEERNAME] = "cgroup_inet6_getpeername",
+ [BPF_CGROUP_INET4_GETSOCKNAME] = "cgroup_inet4_getsockname",
+ [BPF_CGROUP_INET6_GETSOCKNAME] = "cgroup_inet6_getsockname",
+ [BPF_CGROUP_UDP4_SENDMSG] = "cgroup_udp4_sendmsg",
+ [BPF_CGROUP_UDP6_SENDMSG] = "cgroup_udp6_sendmsg",
+ [BPF_CGROUP_SYSCTL] = "cgroup_sysctl",
+ [BPF_CGROUP_UDP4_RECVMSG] = "cgroup_udp4_recvmsg",
+ [BPF_CGROUP_UDP6_RECVMSG] = "cgroup_udp6_recvmsg",
+ [BPF_CGROUP_GETSOCKOPT] = "cgroup_getsockopt",
+ [BPF_CGROUP_SETSOCKOPT] = "cgroup_setsockopt",
+ [BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser",
+ [BPF_SK_SKB_STREAM_VERDICT] = "sk_skb_stream_verdict",
+ [BPF_SK_SKB_VERDICT] = "sk_skb_verdict",
+ [BPF_SK_MSG_VERDICT] = "sk_msg_verdict",
+ [BPF_LIRC_MODE2] = "lirc_mode2",
+ [BPF_FLOW_DISSECTOR] = "flow_dissector",
+ [BPF_TRACE_RAW_TP] = "trace_raw_tp",
+ [BPF_TRACE_FENTRY] = "trace_fentry",
+ [BPF_TRACE_FEXIT] = "trace_fexit",
+ [BPF_MODIFY_RETURN] = "modify_return",
+ [BPF_LSM_MAC] = "lsm_mac",
+ [BPF_LSM_CGROUP] = "lsm_cgroup",
+ [BPF_SK_LOOKUP] = "sk_lookup",
+ [BPF_TRACE_ITER] = "trace_iter",
+ [BPF_XDP_DEVMAP] = "xdp_devmap",
+ [BPF_XDP_CPUMAP] = "xdp_cpumap",
+ [BPF_XDP] = "xdp",
+ [BPF_SK_REUSEPORT_SELECT] = "sk_reuseport_select",
+ [BPF_SK_REUSEPORT_SELECT_OR_MIGRATE] = "sk_reuseport_select_or_migrate",
+ [BPF_PERF_EVENT] = "perf_event",
+ [BPF_TRACE_KPROBE_MULTI] = "trace_kprobe_multi",
+};
+
+static const char * const link_type_name[] = {
+ [BPF_LINK_TYPE_UNSPEC] = "unspec",
+ [BPF_LINK_TYPE_RAW_TRACEPOINT] = "raw_tracepoint",
+ [BPF_LINK_TYPE_TRACING] = "tracing",
+ [BPF_LINK_TYPE_CGROUP] = "cgroup",
+ [BPF_LINK_TYPE_ITER] = "iter",
+ [BPF_LINK_TYPE_NETNS] = "netns",
+ [BPF_LINK_TYPE_XDP] = "xdp",
+ [BPF_LINK_TYPE_PERF_EVENT] = "perf_event",
+ [BPF_LINK_TYPE_KPROBE_MULTI] = "kprobe_multi",
+ [BPF_LINK_TYPE_STRUCT_OPS] = "struct_ops",
+};
+
+static const char * const map_type_name[] = {
+ [BPF_MAP_TYPE_UNSPEC] = "unspec",
+ [BPF_MAP_TYPE_HASH] = "hash",
+ [BPF_MAP_TYPE_ARRAY] = "array",
+ [BPF_MAP_TYPE_PROG_ARRAY] = "prog_array",
+ [BPF_MAP_TYPE_PERF_EVENT_ARRAY] = "perf_event_array",
+ [BPF_MAP_TYPE_PERCPU_HASH] = "percpu_hash",
+ [BPF_MAP_TYPE_PERCPU_ARRAY] = "percpu_array",
+ [BPF_MAP_TYPE_STACK_TRACE] = "stack_trace",
+ [BPF_MAP_TYPE_CGROUP_ARRAY] = "cgroup_array",
+ [BPF_MAP_TYPE_LRU_HASH] = "lru_hash",
+ [BPF_MAP_TYPE_LRU_PERCPU_HASH] = "lru_percpu_hash",
+ [BPF_MAP_TYPE_LPM_TRIE] = "lpm_trie",
+ [BPF_MAP_TYPE_ARRAY_OF_MAPS] = "array_of_maps",
+ [BPF_MAP_TYPE_HASH_OF_MAPS] = "hash_of_maps",
+ [BPF_MAP_TYPE_DEVMAP] = "devmap",
+ [BPF_MAP_TYPE_DEVMAP_HASH] = "devmap_hash",
+ [BPF_MAP_TYPE_SOCKMAP] = "sockmap",
+ [BPF_MAP_TYPE_CPUMAP] = "cpumap",
+ [BPF_MAP_TYPE_XSKMAP] = "xskmap",
+ [BPF_MAP_TYPE_SOCKHASH] = "sockhash",
+ [BPF_MAP_TYPE_CGROUP_STORAGE] = "cgroup_storage",
+ [BPF_MAP_TYPE_REUSEPORT_SOCKARRAY] = "reuseport_sockarray",
+ [BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE] = "percpu_cgroup_storage",
+ [BPF_MAP_TYPE_QUEUE] = "queue",
+ [BPF_MAP_TYPE_STACK] = "stack",
+ [BPF_MAP_TYPE_SK_STORAGE] = "sk_storage",
+ [BPF_MAP_TYPE_STRUCT_OPS] = "struct_ops",
+ [BPF_MAP_TYPE_RINGBUF] = "ringbuf",
+ [BPF_MAP_TYPE_INODE_STORAGE] = "inode_storage",
+ [BPF_MAP_TYPE_TASK_STORAGE] = "task_storage",
+ [BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter",
+};
+
+static const char * const prog_type_name[] = {
+ [BPF_PROG_TYPE_UNSPEC] = "unspec",
+ [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter",
+ [BPF_PROG_TYPE_KPROBE] = "kprobe",
+ [BPF_PROG_TYPE_SCHED_CLS] = "sched_cls",
+ [BPF_PROG_TYPE_SCHED_ACT] = "sched_act",
+ [BPF_PROG_TYPE_TRACEPOINT] = "tracepoint",
+ [BPF_PROG_TYPE_XDP] = "xdp",
+ [BPF_PROG_TYPE_PERF_EVENT] = "perf_event",
+ [BPF_PROG_TYPE_CGROUP_SKB] = "cgroup_skb",
+ [BPF_PROG_TYPE_CGROUP_SOCK] = "cgroup_sock",
+ [BPF_PROG_TYPE_LWT_IN] = "lwt_in",
+ [BPF_PROG_TYPE_LWT_OUT] = "lwt_out",
+ [BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit",
+ [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops",
+ [BPF_PROG_TYPE_SK_SKB] = "sk_skb",
+ [BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device",
+ [BPF_PROG_TYPE_SK_MSG] = "sk_msg",
+ [BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint",
+ [BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr",
+ [BPF_PROG_TYPE_LWT_SEG6LOCAL] = "lwt_seg6local",
+ [BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2",
+ [BPF_PROG_TYPE_SK_REUSEPORT] = "sk_reuseport",
+ [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector",
+ [BPF_PROG_TYPE_CGROUP_SYSCTL] = "cgroup_sysctl",
+ [BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable",
+ [BPF_PROG_TYPE_CGROUP_SOCKOPT] = "cgroup_sockopt",
+ [BPF_PROG_TYPE_TRACING] = "tracing",
+ [BPF_PROG_TYPE_STRUCT_OPS] = "struct_ops",
+ [BPF_PROG_TYPE_EXT] = "ext",
+ [BPF_PROG_TYPE_LSM] = "lsm",
+ [BPF_PROG_TYPE_SK_LOOKUP] = "sk_lookup",
+ [BPF_PROG_TYPE_SYSCALL] = "syscall",
+};
+
static int __base_pr(enum libbpf_print_level level, const char *format,
va_list args)
{
@@ -151,12 +279,9 @@ static inline __u64 ptr_to_u64(const void *ptr)
return (__u64) (unsigned long) ptr;
}
-/* this goes away in libbpf 1.0 */
-enum libbpf_strict_mode libbpf_mode = LIBBPF_STRICT_NONE;
-
int libbpf_set_strict_mode(enum libbpf_strict_mode mode)
{
- libbpf_mode = mode;
+ /* as of v1.0 libbpf_set_strict_mode() is a no-op */
return 0;
}
@@ -219,12 +344,8 @@ enum sec_def_flags {
SEC_ATTACH_BTF = 4,
/* BPF program type allows sleeping/blocking in kernel */
SEC_SLEEPABLE = 8,
- /* allow non-strict prefix matching */
- SEC_SLOPPY_PFX = 16,
/* BPF program support non-linear XDP buffer */
- SEC_XDP_FRAGS = 32,
- /* deprecated sec definitions not supposed to be used */
- SEC_DEPRECATED = 64,
+ SEC_XDP_FRAGS = 16,
};
struct bpf_sec_def {
@@ -244,9 +365,10 @@ struct bpf_sec_def {
* linux/filter.h.
*/
struct bpf_program {
- const struct bpf_sec_def *sec_def;
+ char *name;
char *sec_name;
size_t sec_idx;
+ const struct bpf_sec_def *sec_def;
/* this program's instruction offset (in number of instructions)
* within its containing ELF section
*/
@@ -266,12 +388,6 @@ struct bpf_program {
*/
size_t sub_insn_off;
- char *name;
- /* name with / replaced by _; makes recursive pinning
- * in bpf_object__pin_programs easier
- */
- char *pin_name;
-
/* instructions that belong to BPF program; insns[0] is located at
* sec_insn_off instruction within its ELF section in ELF file, so
* when mapping ELF file instruction index to the local instruction,
@@ -292,24 +408,19 @@ struct bpf_program {
size_t log_size;
__u32 log_level;
- struct {
- int nr;
- int *fds;
- } instances;
- bpf_program_prep_t preprocessor;
-
struct bpf_object *obj;
- void *priv;
- bpf_program_clear_priv_t clear_priv;
+ int fd;
bool autoload;
bool mark_btf_static;
enum bpf_prog_type type;
enum bpf_attach_type expected_attach_type;
+
int prog_ifindex;
__u32 attach_btf_obj_fd;
__u32 attach_btf_id;
__u32 attach_prog_fd;
+
void *func_info;
__u32 func_info_rec_size;
__u32 func_info_cnt;
@@ -356,6 +467,14 @@ enum libbpf_map_type {
LIBBPF_MAP_KCONFIG,
};
+struct bpf_map_def {
+ unsigned int type;
+ unsigned int key_size;
+ unsigned int value_size;
+ unsigned int max_entries;
+ unsigned int map_flags;
+};
+
struct bpf_map {
struct bpf_object *obj;
char *name;
@@ -376,8 +495,6 @@ struct bpf_map {
__u32 btf_key_type_id;
__u32 btf_value_type_id;
__u32 btf_vmlinux_value_type_id;
- void *priv;
- bpf_map_clear_priv_t clear_priv;
enum libbpf_map_type libbpf_type;
void *mmaped;
struct bpf_struct_ops *st_ops;
@@ -440,8 +557,6 @@ struct extern_desc {
};
};
-static LIST_HEAD(bpf_objects_list);
-
struct module_btf {
struct btf *btf;
char *name;
@@ -510,12 +625,6 @@ struct bpf_object {
/* Information when doing ELF related work. Only valid if efile.elf is not NULL */
struct elf_state efile;
- /*
- * All loaded bpf_object are linked in a list, which is
- * hidden to caller. bpf_objects__<func> handlers deal with
- * all objects.
- */
- struct list_head list;
struct btf *btf;
struct btf_ext *btf_ext;
@@ -541,9 +650,6 @@ struct bpf_object {
size_t log_size;
__u32 log_level;
- void *priv;
- bpf_object_clear_priv_t clear_priv;
-
int *fd_array;
size_t fd_array_cap;
size_t fd_array_cnt;
@@ -565,25 +671,10 @@ static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx);
void bpf_program__unload(struct bpf_program *prog)
{
- int i;
-
if (!prog)
return;
- /*
- * If the object is opened but the program was never loaded,
- * it is possible that prog->instances.nr == -1.
- */
- if (prog->instances.nr > 0) {
- for (i = 0; i < prog->instances.nr; i++)
- zclose(prog->instances.fds[i]);
- } else if (prog->instances.nr != -1) {
- pr_warn("Internal error: instances.nr is %d\n",
- prog->instances.nr);
- }
-
- prog->instances.nr = -1;
- zfree(&prog->instances.fds);
+ zclose(prog->fd);
zfree(&prog->func_info);
zfree(&prog->line_info);
@@ -594,16 +685,9 @@ static void bpf_program__exit(struct bpf_program *prog)
if (!prog)
return;
- if (prog->clear_priv)
- prog->clear_priv(prog, prog->priv);
-
- prog->priv = NULL;
- prog->clear_priv = NULL;
-
bpf_program__unload(prog);
zfree(&prog->name);
zfree(&prog->sec_name);
- zfree(&prog->pin_name);
zfree(&prog->insns);
zfree(&prog->reloc_desc);
@@ -612,26 +696,6 @@ static void bpf_program__exit(struct bpf_program *prog)
prog->sec_idx = -1;
}
-static char *__bpf_program__pin_name(struct bpf_program *prog)
-{
- char *name, *p;
-
- if (libbpf_mode & LIBBPF_STRICT_SEC_NAME)
- name = strdup(prog->name);
- else
- name = strdup(prog->sec_name);
-
- if (!name)
- return NULL;
-
- p = name;
-
- while ((p = strchr(p, '/')))
- *p = '_';
-
- return name;
-}
-
static bool insn_is_subprog_call(const struct bpf_insn *insn)
{
return BPF_CLASS(insn->code) == BPF_JMP &&
@@ -673,6 +737,7 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
prog->insns_cnt = prog->sec_insn_cnt;
prog->type = BPF_PROG_TYPE_UNSPEC;
+ prog->fd = -1;
/* libbpf's convention for SEC("?abc...") is that it's just like
* SEC("abc...") but the corresponding bpf_program starts out with
@@ -686,9 +751,6 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
prog->autoload = true;
}
- prog->instances.fds = NULL;
- prog->instances.nr = -1;
-
/* inherit object's log_level */
prog->log_level = obj->log_level;
@@ -700,10 +762,6 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
if (!prog->name)
goto errout;
- prog->pin_name = __bpf_program__pin_name(prog);
- if (!prog->pin_name)
- goto errout;
-
prog->insns = malloc(insn_data_sz);
if (!prog->insns)
goto errout;
@@ -1185,7 +1243,6 @@ static struct bpf_object *bpf_object__new(const char *path,
size_t obj_buf_sz,
const char *obj_name)
{
- bool strict = (libbpf_mode & LIBBPF_STRICT_NO_OBJECT_LIST);
struct bpf_object *obj;
char *end;
@@ -1223,9 +1280,6 @@ static struct bpf_object *bpf_object__new(const char *path,
obj->kern_version = get_kernel_version();
obj->loaded = false;
- INIT_LIST_HEAD(&obj->list);
- if (!strict)
- list_add(&obj->list, &bpf_objects_list);
return obj;
}
@@ -1258,10 +1312,7 @@ static int bpf_object__elf_init(struct bpf_object *obj)
}
if (obj->efile.obj_buf_sz > 0) {
- /*
- * obj_buf should have been validated by
- * bpf_object__open_buffer().
- */
+ /* obj_buf should have been validated by bpf_object__open_mem(). */
elf = elf_memory((char *)obj->efile.obj_buf, obj->efile.obj_buf_sz);
} else {
obj->efile.fd = open(obj->path, O_RDONLY | O_CLOEXEC);
@@ -1643,7 +1694,7 @@ static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val,
switch (ext->kcfg.type) {
case KCFG_BOOL:
if (value == 'm') {
- pr_warn("extern (kcfg) %s=%c should be tristate or char\n",
+ pr_warn("extern (kcfg) '%s': value '%c' implies tristate or char type\n",
ext->name, value);
return -EINVAL;
}
@@ -1664,7 +1715,7 @@ static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val,
case KCFG_INT:
case KCFG_CHAR_ARR:
default:
- pr_warn("extern (kcfg) %s=%c should be bool, tristate, or char\n",
+ pr_warn("extern (kcfg) '%s': value '%c' implies bool, tristate, or char type\n",
ext->name, value);
return -EINVAL;
}
@@ -1678,7 +1729,8 @@ static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val,
size_t len;
if (ext->kcfg.type != KCFG_CHAR_ARR) {
- pr_warn("extern (kcfg) %s=%s should be char array\n", ext->name, value);
+ pr_warn("extern (kcfg) '%s': value '%s' implies char array type\n",
+ ext->name, value);
return -EINVAL;
}
@@ -1692,7 +1744,7 @@ static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val,
/* strip quotes */
len -= 2;
if (len >= ext->kcfg.sz) {
- pr_warn("extern (kcfg) '%s': long string config %s of (%zu bytes) truncated to %d bytes\n",
+ pr_warn("extern (kcfg) '%s': long string '%s' of (%zu bytes) truncated to %d bytes\n",
ext->name, value, len, ext->kcfg.sz - 1);
len = ext->kcfg.sz - 1;
}
@@ -1749,13 +1801,20 @@ static bool is_kcfg_value_in_range(const struct extern_desc *ext, __u64 v)
static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val,
__u64 value)
{
- if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) {
- pr_warn("extern (kcfg) %s=%llu should be integer\n",
+ if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR &&
+ ext->kcfg.type != KCFG_BOOL) {
+ pr_warn("extern (kcfg) '%s': value '%llu' implies integer, char, or boolean type\n",
ext->name, (unsigned long long)value);
return -EINVAL;
}
+ if (ext->kcfg.type == KCFG_BOOL && value > 1) {
+ pr_warn("extern (kcfg) '%s': value '%llu' isn't boolean compatible\n",
+ ext->name, (unsigned long long)value);
+ return -EINVAL;
+
+ }
if (!is_kcfg_value_in_range(ext, value)) {
- pr_warn("extern (kcfg) %s=%llu value doesn't fit in %d bytes\n",
+ pr_warn("extern (kcfg) '%s': value '%llu' doesn't fit in %d bytes\n",
ext->name, (unsigned long long)value, ext->kcfg.sz);
return -ERANGE;
}
@@ -1819,16 +1878,19 @@ static int bpf_object__process_kconfig_line(struct bpf_object *obj,
/* assume integer */
err = parse_u64(value, &num);
if (err) {
- pr_warn("extern (kcfg) %s=%s should be integer\n",
- ext->name, value);
+ pr_warn("extern (kcfg) '%s': value '%s' isn't a valid integer\n", ext->name, value);
return err;
}
+ if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) {
+ pr_warn("extern (kcfg) '%s': value '%s' implies integer type\n", ext->name, value);
+ return -EINVAL;
+ }
err = set_kcfg_value_num(ext, ext_val, num);
break;
}
if (err)
return err;
- pr_debug("extern (kcfg) %s=%s\n", ext->name, value);
+ pr_debug("extern (kcfg) '%s': set to %s\n", ext->name, value);
return 0;
}
@@ -1924,143 +1986,6 @@ static int bpf_object__init_kconfig_map(struct bpf_object *obj)
return 0;
}
-static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
-{
- Elf_Data *symbols = obj->efile.symbols;
- int i, map_def_sz = 0, nr_maps = 0, nr_syms;
- Elf_Data *data = NULL;
- Elf_Scn *scn;
-
- if (obj->efile.maps_shndx < 0)
- return 0;
-
- if (libbpf_mode & LIBBPF_STRICT_MAP_DEFINITIONS) {
- pr_warn("legacy map definitions in SEC(\"maps\") are not supported\n");
- return -EOPNOTSUPP;
- }
-
- if (!symbols)
- return -EINVAL;
-
- scn = elf_sec_by_idx(obj, obj->efile.maps_shndx);
- data = elf_sec_data(obj, scn);
- if (!scn || !data) {
- pr_warn("elf: failed to get legacy map definitions for %s\n",
- obj->path);
- return -EINVAL;
- }
-
- /*
- * Count number of maps. Each map has a name.
- * Array of maps is not supported: only the first element is
- * considered.
- *
- * TODO: Detect array of map and report error.
- */
- nr_syms = symbols->d_size / sizeof(Elf64_Sym);
- for (i = 0; i < nr_syms; i++) {
- Elf64_Sym *sym = elf_sym_by_idx(obj, i);
-
- if (sym->st_shndx != obj->efile.maps_shndx)
- continue;
- if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION)
- continue;
- nr_maps++;
- }
- /* Assume equally sized map definitions */
- pr_debug("elf: found %d legacy map definitions (%zd bytes) in %s\n",
- nr_maps, data->d_size, obj->path);
-
- if (!data->d_size || nr_maps == 0 || (data->d_size % nr_maps) != 0) {
- pr_warn("elf: unable to determine legacy map definition size in %s\n",
- obj->path);
- return -EINVAL;
- }
- map_def_sz = data->d_size / nr_maps;
-
- /* Fill obj->maps using data in "maps" section. */
- for (i = 0; i < nr_syms; i++) {
- Elf64_Sym *sym = elf_sym_by_idx(obj, i);
- const char *map_name;
- struct bpf_map_def *def;
- struct bpf_map *map;
-
- if (sym->st_shndx != obj->efile.maps_shndx)
- continue;
- if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION)
- continue;
-
- map = bpf_object__add_map(obj);
- if (IS_ERR(map))
- return PTR_ERR(map);
-
- map_name = elf_sym_str(obj, sym->st_name);
- if (!map_name) {
- pr_warn("failed to get map #%d name sym string for obj %s\n",
- i, obj->path);
- return -LIBBPF_ERRNO__FORMAT;
- }
-
- pr_warn("map '%s' (legacy): legacy map definitions are deprecated, use BTF-defined maps instead\n", map_name);
-
- if (ELF64_ST_BIND(sym->st_info) == STB_LOCAL) {
- pr_warn("map '%s' (legacy): static maps are not supported\n", map_name);
- return -ENOTSUP;
- }
-
- map->libbpf_type = LIBBPF_MAP_UNSPEC;
- map->sec_idx = sym->st_shndx;
- map->sec_offset = sym->st_value;
- pr_debug("map '%s' (legacy): at sec_idx %d, offset %zu.\n",
- map_name, map->sec_idx, map->sec_offset);
- if (sym->st_value + map_def_sz > data->d_size) {
- pr_warn("corrupted maps section in %s: last map \"%s\" too small\n",
- obj->path, map_name);
- return -EINVAL;
- }
-
- map->name = strdup(map_name);
- if (!map->name) {
- pr_warn("map '%s': failed to alloc map name\n", map_name);
- return -ENOMEM;
- }
- pr_debug("map %d is \"%s\"\n", i, map->name);
- def = (struct bpf_map_def *)(data->d_buf + sym->st_value);
- /*
- * If the definition of the map in the object file fits in
- * bpf_map_def, copy it. Any extra fields in our version
- * of bpf_map_def will default to zero as a result of the
- * calloc above.
- */
- if (map_def_sz <= sizeof(struct bpf_map_def)) {
- memcpy(&map->def, def, map_def_sz);
- } else {
- /*
- * Here the map structure being read is bigger than what
- * we expect, truncate if the excess bits are all zero.
- * If they are not zero, reject this map as
- * incompatible.
- */
- char *b;
-
- for (b = ((char *)def) + sizeof(struct bpf_map_def);
- b < ((char *)def) + map_def_sz; b++) {
- if (*b != 0) {
- pr_warn("maps section in %s: \"%s\" has unrecognized, non-zero options\n",
- obj->path, map_name);
- if (strict)
- return -EINVAL;
- }
- }
- memcpy(&map->def, def, sizeof(struct bpf_map_def));
- }
-
- /* btf info may not exist but fill it in if it does exist */
- (void) bpf_map_find_btf_info(obj, map);
- }
- return 0;
-}
-
const struct btf_type *
skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id)
{
@@ -2114,6 +2039,7 @@ static const char *__btf_kind_str(__u16 kind)
case BTF_KIND_FLOAT: return "float";
case BTF_KIND_DECL_TAG: return "decl_tag";
case BTF_KIND_TYPE_TAG: return "type_tag";
+ case BTF_KIND_ENUM64: return "enum64";
default: return "unknown";
}
}
@@ -2177,6 +2103,13 @@ static int build_map_pin_path(struct bpf_map *map, const char *path)
return bpf_map__set_pin_path(map, buf);
}
+/* should match definition in bpf_helpers.h */
+enum libbpf_pin_type {
+ LIBBPF_PIN_NONE,
+ /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
+ LIBBPF_PIN_BY_NAME,
+};
+
int parse_btf_map_def(const char *map_name, struct btf *btf,
const struct btf_type *def_t, bool strict,
struct btf_map_def *map_def, struct btf_map_def *inner_def)
@@ -2398,6 +2331,37 @@ int parse_btf_map_def(const char *map_name, struct btf *btf,
return 0;
}
+static size_t adjust_ringbuf_sz(size_t sz)
+{
+ __u32 page_sz = sysconf(_SC_PAGE_SIZE);
+ __u32 mul;
+
+ /* if user forgot to set any size, make sure they see error */
+ if (sz == 0)
+ return 0;
+ /* Kernel expects BPF_MAP_TYPE_RINGBUF's max_entries to be
+ * a power-of-2 multiple of kernel's page size. If user diligently
+ * satisified these conditions, pass the size through.
+ */
+ if ((sz % page_sz) == 0 && is_pow_of_2(sz / page_sz))
+ return sz;
+
+ /* Otherwise find closest (page_sz * power_of_2) product bigger than
+ * user-set size to satisfy both user size request and kernel
+ * requirements and substitute correct max_entries for map creation.
+ */
+ for (mul = 1; mul <= UINT_MAX / page_sz; mul <<= 1) {
+ if (mul * page_sz > sz)
+ return mul * page_sz;
+ }
+
+ /* if it's impossible to satisfy the conditions (i.e., user size is
+ * very close to UINT_MAX but is not a power-of-2 multiple of
+ * page_size) then just return original size and let kernel reject it
+ */
+ return sz;
+}
+
static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def)
{
map->def.type = def->map_type;
@@ -2411,6 +2375,10 @@ static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def
map->btf_key_type_id = def->key_type_id;
map->btf_value_type_id = def->value_type_id;
+ /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */
+ if (map->def.type == BPF_MAP_TYPE_RINGBUF)
+ map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries);
+
if (def->parts & MAP_DEF_MAP_TYPE)
pr_debug("map '%s': found type = %u.\n", map->name, def->map_type);
@@ -2609,12 +2577,11 @@ static int bpf_object__init_maps(struct bpf_object *obj,
{
const char *pin_root_path;
bool strict;
- int err;
+ int err = 0;
strict = !OPTS_GET(opts, relaxed_maps, false);
pin_root_path = OPTS_GET(opts, pin_root_path, NULL);
- err = bpf_object__init_user_maps(obj, strict);
err = err ?: bpf_object__init_user_btf_maps(obj, strict, pin_root_path);
err = err ?: bpf_object__init_global_data_maps(obj);
err = err ?: bpf_object__init_kconfig_map(obj);
@@ -2642,12 +2609,13 @@ static bool btf_needs_sanitization(struct bpf_object *obj)
bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG);
bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG);
+ bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64);
return !has_func || !has_datasec || !has_func_global || !has_float ||
- !has_decl_tag || !has_type_tag;
+ !has_decl_tag || !has_type_tag || !has_enum64;
}
-static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
+static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
{
bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC);
bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC);
@@ -2655,6 +2623,8 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG);
bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG);
+ bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64);
+ int enum64_placeholder_id = 0;
struct btf_type *t;
int i, j, vlen;
@@ -2717,8 +2687,32 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
/* replace TYPE_TAG with a CONST */
t->name_off = 0;
t->info = BTF_INFO_ENC(BTF_KIND_CONST, 0, 0);
- }
+ } else if (!has_enum64 && btf_is_enum(t)) {
+ /* clear the kflag */
+ t->info = btf_type_info(btf_kind(t), btf_vlen(t), false);
+ } else if (!has_enum64 && btf_is_enum64(t)) {
+ /* replace ENUM64 with a union */
+ struct btf_member *m;
+
+ if (enum64_placeholder_id == 0) {
+ enum64_placeholder_id = btf__add_int(btf, "enum64_placeholder", 1, 0);
+ if (enum64_placeholder_id < 0)
+ return enum64_placeholder_id;
+
+ t = (struct btf_type *)btf__type_by_id(btf, i);
+ }
+
+ m = btf_members(t);
+ vlen = btf_vlen(t);
+ t->info = BTF_INFO_ENC(BTF_KIND_UNION, 0, vlen);
+ for (j = 0; j < vlen; j++, m++) {
+ m->type = enum64_placeholder_id;
+ m->offset = 0;
+ }
+ }
}
+
+ return 0;
}
static bool libbpf_needs_btf(const struct bpf_object *obj)
@@ -2905,11 +2899,6 @@ static int btf_finalize_data(struct bpf_object *obj, struct btf *btf)
return libbpf_err(err);
}
-int btf__finalize_data(struct bpf_object *obj, struct btf *btf)
-{
- return btf_finalize_data(obj, btf);
-}
-
static int bpf_object__finalize_btf(struct bpf_object *obj)
{
int err;
@@ -3056,7 +3045,9 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
/* enforce 8-byte pointers for BPF-targeted BTFs */
btf__set_pointer_size(obj->btf, 8);
- bpf_object__sanitize_btf(obj, kern_btf);
+ err = bpf_object__sanitize_btf(obj, kern_btf);
+ if (err)
+ return err;
}
if (obj->gen_loader) {
@@ -3563,6 +3554,10 @@ static enum kcfg_type find_kcfg_type(const struct btf *btf, int id,
if (strcmp(name, "libbpf_tristate"))
return KCFG_UNKNOWN;
return KCFG_TRISTATE;
+ case BTF_KIND_ENUM64:
+ if (strcmp(name, "libbpf_tristate"))
+ return KCFG_UNKNOWN;
+ return KCFG_TRISTATE;
case BTF_KIND_ARRAY:
if (btf_array(t)->nelems == 0)
return KCFG_UNKNOWN;
@@ -3738,7 +3733,7 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
ext->kcfg.type = find_kcfg_type(obj->btf, t->type,
&ext->kcfg.is_signed);
if (ext->kcfg.type == KCFG_UNKNOWN) {
- pr_warn("extern (kcfg) '%s' type is unsupported\n", ext_name);
+ pr_warn("extern (kcfg) '%s': type is unsupported\n", ext_name);
return -ENOTSUP;
}
} else if (strcmp(sec_name, KSYMS_SEC) == 0) {
@@ -3860,41 +3855,8 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
return 0;
}
-struct bpf_program *
-bpf_object__find_program_by_title(const struct bpf_object *obj,
- const char *title)
+static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog)
{
- struct bpf_program *pos;
-
- bpf_object__for_each_program(pos, obj) {
- if (pos->sec_name && !strcmp(pos->sec_name, title))
- return pos;
- }
- return errno = ENOENT, NULL;
-}
-
-static bool prog_is_subprog(const struct bpf_object *obj,
- const struct bpf_program *prog)
-{
- /* For legacy reasons, libbpf supports an entry-point BPF programs
- * without SEC() attribute, i.e., those in the .text section. But if
- * there are 2 or more such programs in the .text section, they all
- * must be subprograms called from entry-point BPF programs in
- * designated SEC()'tions, otherwise there is no way to distinguish
- * which of those programs should be loaded vs which are a subprogram.
- * Similarly, if there is a function/program in .text and at least one
- * other BPF program with custom SEC() attribute, then we just assume
- * .text programs are subprograms (even if they are not called from
- * other programs), because libbpf never explicitly supported mixing
- * SEC()-designated BPF programs and .text entry-point BPF programs.
- *
- * In libbpf 1.0 strict mode, we always consider .text
- * programs to be subprograms.
- */
-
- if (libbpf_mode & LIBBPF_STRICT_SEC_NAME)
- return prog->sec_idx == obj->efile.text_shndx;
-
return prog->sec_idx == obj->efile.text_shndx && obj->nr_programs > 1;
}
@@ -4235,9 +4197,7 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Dat
static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map)
{
- struct bpf_map_def *def = &map->def;
- __u32 key_type_id = 0, value_type_id = 0;
- int ret;
+ int id;
if (!obj->btf)
return -ENOENT;
@@ -4246,31 +4206,22 @@ static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map)
* For struct_ops map, it does not need btf_key_type_id and
* btf_value_type_id.
*/
- if (map->sec_idx == obj->efile.btf_maps_shndx ||
- bpf_map__is_struct_ops(map))
+ if (map->sec_idx == obj->efile.btf_maps_shndx || bpf_map__is_struct_ops(map))
return 0;
- if (!bpf_map__is_internal(map)) {
- pr_warn("Use of BPF_ANNOTATE_KV_PAIR is deprecated, use BTF-defined maps in .maps section instead\n");
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
- ret = btf__get_map_kv_tids(obj->btf, map->name, def->key_size,
- def->value_size, &key_type_id,
- &value_type_id);
-#pragma GCC diagnostic pop
- } else {
- /*
- * LLVM annotates global data differently in BTF, that is,
- * only as '.data', '.bss' or '.rodata'.
- */
- ret = btf__find_by_name(obj->btf, map->real_name);
- }
- if (ret < 0)
- return ret;
+ /*
+ * LLVM annotates global data differently in BTF, that is,
+ * only as '.data', '.bss' or '.rodata'.
+ */
+ if (!bpf_map__is_internal(map))
+ return -ENOENT;
- map->btf_key_type_id = key_type_id;
- map->btf_value_type_id = bpf_map__is_internal(map) ?
- ret : value_type_id;
+ id = btf__find_by_name(obj->btf, map->real_name);
+ if (id < 0)
+ return id;
+
+ map->btf_key_type_id = 0;
+ map->btf_value_type_id = id;
return 0;
}
@@ -4327,7 +4278,7 @@ int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate)
int bpf_map__reuse_fd(struct bpf_map *map, int fd)
{
struct bpf_map_info info = {};
- __u32 len = sizeof(info);
+ __u32 len = sizeof(info), name_len;
int new_fd, err;
char *new_name;
@@ -4337,7 +4288,12 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd)
if (err)
return libbpf_err(err);
- new_name = strdup(info.name);
+ name_len = strlen(info.name);
+ if (name_len == BPF_OBJ_NAME_LEN - 1 && strncmp(map->name, info.name, name_len) == 0)
+ new_name = strdup(map->name);
+ else
+ new_name = strdup(info.name);
+
if (!new_name)
return libbpf_err(-errno);
@@ -4396,18 +4352,16 @@ struct bpf_map *bpf_map__inner_map(struct bpf_map *map)
int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
{
- if (map->fd >= 0)
+ if (map->obj->loaded)
return libbpf_err(-EBUSY);
+
map->def.max_entries = max_entries;
- return 0;
-}
-int bpf_map__resize(struct bpf_map *map, __u32 max_entries)
-{
- if (!map || !max_entries)
- return libbpf_err(-EINVAL);
+ /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */
+ if (map->def.type == BPF_MAP_TYPE_RINGBUF)
+ map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries);
- return bpf_map__set_max_entries(map, max_entries);
+ return 0;
}
static int
@@ -4746,6 +4700,19 @@ static int probe_kern_bpf_cookie(void)
return probe_fd(ret);
}
+static int probe_kern_btf_enum64(void)
+{
+ static const char strs[] = "\0enum64";
+ __u32 types[] = {
+ BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8),
+ };
+
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs)));
+}
+
+static int probe_kern_syscall_wrapper(void);
+
enum kern_feature_result {
FEAT_UNKNOWN = 0,
FEAT_SUPPORTED = 1,
@@ -4811,6 +4778,12 @@ static struct kern_feature_desc {
[FEAT_BPF_COOKIE] = {
"BPF cookie support", probe_kern_bpf_cookie,
},
+ [FEAT_BTF_ENUM64] = {
+ "BTF_KIND_ENUM64 support", probe_kern_btf_enum64,
+ },
+ [FEAT_SYSCALL_WRAPPER] = {
+ "Kernel using syscall wrapper", probe_kern_syscall_wrapper,
+ },
};
bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
@@ -4943,42 +4916,6 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
static void bpf_map__destroy(struct bpf_map *map);
-static bool is_pow_of_2(size_t x)
-{
- return x && (x & (x - 1));
-}
-
-static size_t adjust_ringbuf_sz(size_t sz)
-{
- __u32 page_sz = sysconf(_SC_PAGE_SIZE);
- __u32 mul;
-
- /* if user forgot to set any size, make sure they see error */
- if (sz == 0)
- return 0;
- /* Kernel expects BPF_MAP_TYPE_RINGBUF's max_entries to be
- * a power-of-2 multiple of kernel's page size. If user diligently
- * satisified these conditions, pass the size through.
- */
- if ((sz % page_sz) == 0 && is_pow_of_2(sz / page_sz))
- return sz;
-
- /* Otherwise find closest (page_sz * power_of_2) product bigger than
- * user-set size to satisfy both user size request and kernel
- * requirements and substitute correct max_entries for map creation.
- */
- for (mul = 1; mul <= UINT_MAX / page_sz; mul <<= 1) {
- if (mul * page_sz > sz)
- return mul * page_sz;
- }
-
- /* if it's impossible to satisfy the conditions (i.e., user size is
- * very close to UINT_MAX but is not a power-of-2 multiple of
- * page_size) then just return original size and let kernel reject it
- */
- return sz;
-}
-
static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner)
{
LIBBPF_OPTS(bpf_map_create_opts, create_attr);
@@ -5017,9 +4954,6 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
}
switch (def->type) {
- case BPF_MAP_TYPE_RINGBUF:
- map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries);
- /* fallthrough */
case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
case BPF_MAP_TYPE_CGROUP_ARRAY:
case BPF_MAP_TYPE_STACK_TRACE:
@@ -5353,7 +5287,7 @@ int bpf_core_add_cands(struct bpf_core_cand *local_cand,
n = btf__type_cnt(targ_btf);
for (i = targ_start_id; i < n; i++) {
t = btf__type_by_id(targ_btf, i);
- if (btf_kind(t) != btf_kind(local_t))
+ if (!btf_kind_core_compat(t, local_t))
continue;
targ_name = btf__name_by_offset(targ_btf, t->name_off);
@@ -5561,76 +5495,13 @@ err_out:
int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
const struct btf *targ_btf, __u32 targ_id)
{
- const struct btf_type *local_type, *targ_type;
- int depth = 32; /* max recursion depth */
-
- /* caller made sure that names match (ignoring flavor suffix) */
- local_type = btf__type_by_id(local_btf, local_id);
- targ_type = btf__type_by_id(targ_btf, targ_id);
- if (btf_kind(local_type) != btf_kind(targ_type))
- return 0;
-
-recur:
- depth--;
- if (depth < 0)
- return -EINVAL;
-
- local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
- targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
- if (!local_type || !targ_type)
- return -EINVAL;
-
- if (btf_kind(local_type) != btf_kind(targ_type))
- return 0;
-
- switch (btf_kind(local_type)) {
- case BTF_KIND_UNKN:
- case BTF_KIND_STRUCT:
- case BTF_KIND_UNION:
- case BTF_KIND_ENUM:
- case BTF_KIND_FWD:
- return 1;
- case BTF_KIND_INT:
- /* just reject deprecated bitfield-like integers; all other
- * integers are by default compatible between each other
- */
- return btf_int_offset(local_type) == 0 && btf_int_offset(targ_type) == 0;
- case BTF_KIND_PTR:
- local_id = local_type->type;
- targ_id = targ_type->type;
- goto recur;
- case BTF_KIND_ARRAY:
- local_id = btf_array(local_type)->type;
- targ_id = btf_array(targ_type)->type;
- goto recur;
- case BTF_KIND_FUNC_PROTO: {
- struct btf_param *local_p = btf_params(local_type);
- struct btf_param *targ_p = btf_params(targ_type);
- __u16 local_vlen = btf_vlen(local_type);
- __u16 targ_vlen = btf_vlen(targ_type);
- int i, err;
-
- if (local_vlen != targ_vlen)
- return 0;
-
- for (i = 0; i < local_vlen; i++, local_p++, targ_p++) {
- skip_mods_and_typedefs(local_btf, local_p->type, &local_id);
- skip_mods_and_typedefs(targ_btf, targ_p->type, &targ_id);
- err = bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id);
- if (err <= 0)
- return err;
- }
+ return __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id, 32);
+}
- /* tail recurse for return type check */
- skip_mods_and_typedefs(local_btf, local_type->type, &local_id);
- skip_mods_and_typedefs(targ_btf, targ_type->type, &targ_id);
- goto recur;
- }
- default:
- pr_warn("unexpected kind %s relocated, local [%d], target [%d]\n",
- btf_kind_str(local_type), local_id, targ_id);
- return 0;
- }
+int bpf_core_types_match(const struct btf *local_btf, __u32 local_id,
+ const struct btf *targ_btf, __u32 targ_id)
+{
+ return __bpf_core_types_match(local_btf, local_id, targ_btf, targ_id, false, 32);
}
static size_t bpf_core_hash_fn(const void *key, void *ctx)
@@ -6754,11 +6625,6 @@ static int libbpf_prepare_prog_load(struct bpf_program *prog,
if (prog->type == BPF_PROG_TYPE_XDP && (def & SEC_XDP_FRAGS))
opts->prog_flags |= BPF_F_XDP_HAS_FRAGS;
- if (def & SEC_DEPRECATED) {
- pr_warn("SEC(\"%s\") is deprecated, please see https://github.com/libbpf/libbpf/wiki/Libbpf-1.0-migration-guide#bpf-program-sec-annotation-deprecations for details\n",
- prog->sec_name);
- }
-
if ((def & SEC_ATTACH_BTF) && !prog->attach_btf_id) {
int btf_obj_fd = 0, btf_type_id = 0, err;
const char *attach_name;
@@ -6801,10 +6667,9 @@ static int libbpf_prepare_prog_load(struct bpf_program *prog,
static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz);
-static int bpf_object_load_prog_instance(struct bpf_object *obj, struct bpf_program *prog,
- struct bpf_insn *insns, int insns_cnt,
- const char *license, __u32 kern_version,
- int *prog_fd)
+static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog,
+ struct bpf_insn *insns, int insns_cnt,
+ const char *license, __u32 kern_version, int *prog_fd)
{
LIBBPF_OPTS(bpf_prog_load_opts, load_attr);
const char *prog_name = NULL;
@@ -7171,93 +7036,6 @@ static int bpf_program_record_relos(struct bpf_program *prog)
return 0;
}
-static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog,
- const char *license, __u32 kern_ver)
-{
- int err = 0, fd, i;
-
- if (obj->loaded) {
- pr_warn("prog '%s': can't load after object was loaded\n", prog->name);
- return libbpf_err(-EINVAL);
- }
-
- if (prog->instances.nr < 0 || !prog->instances.fds) {
- if (prog->preprocessor) {
- pr_warn("Internal error: can't load program '%s'\n",
- prog->name);
- return libbpf_err(-LIBBPF_ERRNO__INTERNAL);
- }
-
- prog->instances.fds = malloc(sizeof(int));
- if (!prog->instances.fds) {
- pr_warn("Not enough memory for BPF fds\n");
- return libbpf_err(-ENOMEM);
- }
- prog->instances.nr = 1;
- prog->instances.fds[0] = -1;
- }
-
- if (!prog->preprocessor) {
- if (prog->instances.nr != 1) {
- pr_warn("prog '%s': inconsistent nr(%d) != 1\n",
- prog->name, prog->instances.nr);
- }
- if (obj->gen_loader)
- bpf_program_record_relos(prog);
- err = bpf_object_load_prog_instance(obj, prog,
- prog->insns, prog->insns_cnt,
- license, kern_ver, &fd);
- if (!err)
- prog->instances.fds[0] = fd;
- goto out;
- }
-
- for (i = 0; i < prog->instances.nr; i++) {
- struct bpf_prog_prep_result result;
- bpf_program_prep_t preprocessor = prog->preprocessor;
-
- memset(&result, 0, sizeof(result));
- err = preprocessor(prog, i, prog->insns,
- prog->insns_cnt, &result);
- if (err) {
- pr_warn("Preprocessing the %dth instance of program '%s' failed\n",
- i, prog->name);
- goto out;
- }
-
- if (!result.new_insn_ptr || !result.new_insn_cnt) {
- pr_debug("Skip loading the %dth instance of program '%s'\n",
- i, prog->name);
- prog->instances.fds[i] = -1;
- if (result.pfd)
- *result.pfd = -1;
- continue;
- }
-
- err = bpf_object_load_prog_instance(obj, prog,
- result.new_insn_ptr, result.new_insn_cnt,
- license, kern_ver, &fd);
- if (err) {
- pr_warn("Loading the %dth instance of program '%s' failed\n",
- i, prog->name);
- goto out;
- }
-
- if (result.pfd)
- *result.pfd = fd;
- prog->instances.fds[i] = fd;
- }
-out:
- if (err)
- pr_warn("failed to load program '%s'\n", prog->name);
- return libbpf_err(err);
-}
-
-int bpf_program__load(struct bpf_program *prog, const char *license, __u32 kern_ver)
-{
- return bpf_object_load_prog(prog->obj, prog, license, kern_ver);
-}
-
static int
bpf_object__load_progs(struct bpf_object *obj, int log_level)
{
@@ -7281,9 +7059,16 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level)
continue;
}
prog->log_level |= log_level;
- err = bpf_object_load_prog(obj, prog, obj->license, obj->kern_version);
- if (err)
+
+ if (obj->gen_loader)
+ bpf_program_record_relos(prog);
+
+ err = bpf_object_load_prog(obj, prog, prog->insns, prog->insns_cnt,
+ obj->license, obj->kern_version, &prog->fd);
+ if (err) {
+ pr_warn("prog '%s': failed to load: %d\n", prog->name, err);
return err;
+ }
}
bpf_object__free_relocs(obj);
@@ -7309,13 +7094,6 @@ static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object
prog->type = prog->sec_def->prog_type;
prog->expected_attach_type = prog->sec_def->expected_attach_type;
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
- if (prog->sec_def->prog_type == BPF_PROG_TYPE_TRACING ||
- prog->sec_def->prog_type == BPF_PROG_TYPE_EXT)
- prog->attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0);
-#pragma GCC diagnostic pop
-
/* sec_def can have custom callback which should be called
* after bpf_program is initialized to adjust its properties
*/
@@ -7421,36 +7199,6 @@ out:
return ERR_PTR(err);
}
-static struct bpf_object *
-__bpf_object__open_xattr(struct bpf_object_open_attr *attr, int flags)
-{
- DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
- .relaxed_maps = flags & MAPS_RELAX_COMPAT,
- );
-
- /* param validation */
- if (!attr->file)
- return NULL;
-
- pr_debug("loading %s\n", attr->file);
- return bpf_object_open(attr->file, NULL, 0, &opts);
-}
-
-struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr)
-{
- return libbpf_ptr(__bpf_object__open_xattr(attr, 0));
-}
-
-struct bpf_object *bpf_object__open(const char *path)
-{
- struct bpf_object_open_attr attr = {
- .file = path,
- .prog_type = BPF_PROG_TYPE_UNSPEC,
- };
-
- return libbpf_ptr(__bpf_object__open_xattr(&attr, 0));
-}
-
struct bpf_object *
bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts)
{
@@ -7462,6 +7210,11 @@ bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts)
return libbpf_ptr(bpf_object_open(path, NULL, 0, opts));
}
+struct bpf_object *bpf_object__open(const char *path)
+{
+ return bpf_object__open_file(path, NULL);
+}
+
struct bpf_object *
bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
const struct bpf_object_open_opts *opts)
@@ -7472,23 +7225,6 @@ bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, opts));
}
-struct bpf_object *
-bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz,
- const char *name)
-{
- DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
- .object_name = name,
- /* wrong default, but backwards-compatible */
- .relaxed_maps = true,
- );
-
- /* returning NULL is wrong, but backwards-compatible */
- if (!obj_buf || obj_buf_sz == 0)
- return errno = EINVAL, NULL;
-
- return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, &opts));
-}
-
static int bpf_object_unload(struct bpf_object *obj)
{
size_t i;
@@ -7574,14 +7310,14 @@ static int kallsyms_cb(unsigned long long sym_addr, char sym_type,
return 0;
if (ext->is_set && ext->ksym.addr != sym_addr) {
- pr_warn("extern (ksym) '%s' resolution is ambiguous: 0x%llx or 0x%llx\n",
+ pr_warn("extern (ksym) '%s': resolution is ambiguous: 0x%llx or 0x%llx\n",
sym_name, ext->ksym.addr, sym_addr);
return -EINVAL;
}
if (!ext->is_set) {
ext->is_set = true;
ext->ksym.addr = sym_addr;
- pr_debug("extern (ksym) %s=0x%llx\n", sym_name, sym_addr);
+ pr_debug("extern (ksym) '%s': set to 0x%llx\n", sym_name, sym_addr);
}
return 0;
}
@@ -7785,28 +7521,52 @@ static int bpf_object__resolve_externs(struct bpf_object *obj,
for (i = 0; i < obj->nr_extern; i++) {
ext = &obj->externs[i];
- if (ext->type == EXT_KCFG &&
- strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) {
- void *ext_val = kcfg_data + ext->kcfg.data_off;
- __u32 kver = get_kernel_version();
+ if (ext->type == EXT_KSYM) {
+ if (ext->ksym.type_id)
+ need_vmlinux_btf = true;
+ else
+ need_kallsyms = true;
+ continue;
+ } else if (ext->type == EXT_KCFG) {
+ void *ext_ptr = kcfg_data + ext->kcfg.data_off;
+ __u64 value = 0;
+
+ /* Kconfig externs need actual /proc/config.gz */
+ if (str_has_pfx(ext->name, "CONFIG_")) {
+ need_config = true;
+ continue;
+ }
- if (!kver) {
- pr_warn("failed to get kernel version\n");
+ /* Virtual kcfg externs are customly handled by libbpf */
+ if (strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) {
+ value = get_kernel_version();
+ if (!value) {
+ pr_warn("extern (kcfg) '%s': failed to get kernel version\n", ext->name);
+ return -EINVAL;
+ }
+ } else if (strcmp(ext->name, "LINUX_HAS_BPF_COOKIE") == 0) {
+ value = kernel_supports(obj, FEAT_BPF_COOKIE);
+ } else if (strcmp(ext->name, "LINUX_HAS_SYSCALL_WRAPPER") == 0) {
+ value = kernel_supports(obj, FEAT_SYSCALL_WRAPPER);
+ } else if (!str_has_pfx(ext->name, "LINUX_") || !ext->is_weak) {
+ /* Currently libbpf supports only CONFIG_ and LINUX_ prefixed
+ * __kconfig externs, where LINUX_ ones are virtual and filled out
+ * customly by libbpf (their values don't come from Kconfig).
+ * If LINUX_xxx variable is not recognized by libbpf, but is marked
+ * __weak, it defaults to zero value, just like for CONFIG_xxx
+ * externs.
+ */
+ pr_warn("extern (kcfg) '%s': unrecognized virtual extern\n", ext->name);
return -EINVAL;
}
- err = set_kcfg_value_num(ext, ext_val, kver);
+
+ err = set_kcfg_value_num(ext, ext_ptr, value);
if (err)
return err;
- pr_debug("extern (kcfg) %s=0x%x\n", ext->name, kver);
- } else if (ext->type == EXT_KCFG && str_has_pfx(ext->name, "CONFIG_")) {
- need_config = true;
- } else if (ext->type == EXT_KSYM) {
- if (ext->ksym.type_id)
- need_vmlinux_btf = true;
- else
- need_kallsyms = true;
+ pr_debug("extern (kcfg) '%s': set to 0x%llx\n",
+ ext->name, (long long)value);
} else {
- pr_warn("unrecognized extern '%s'\n", ext->name);
+ pr_warn("extern '%s': unrecognized extern kind\n", ext->name);
return -EINVAL;
}
}
@@ -7842,10 +7602,10 @@ static int bpf_object__resolve_externs(struct bpf_object *obj,
ext = &obj->externs[i];
if (!ext->is_set && !ext->is_weak) {
- pr_warn("extern %s (strong) not resolved\n", ext->name);
+ pr_warn("extern '%s' (strong): not resolved\n", ext->name);
return -ESRCH;
} else if (!ext->is_set) {
- pr_debug("extern %s (weak) not resolved, defaulting to zero\n",
+ pr_debug("extern '%s' (weak): not resolved, defaulting to zero\n",
ext->name);
}
}
@@ -7921,11 +7681,6 @@ out:
return libbpf_err(err);
}
-int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
-{
- return bpf_object_load(attr->obj, attr->log_level, attr->target_btf_path);
-}
-
int bpf_object__load(struct bpf_object *obj)
{
return bpf_object_load(obj, 0, NULL);
@@ -7983,11 +7738,16 @@ static int check_path(const char *path)
return err;
}
-static int bpf_program_pin_instance(struct bpf_program *prog, const char *path, int instance)
+int bpf_program__pin(struct bpf_program *prog, const char *path)
{
char *cp, errmsg[STRERR_BUFSIZE];
int err;
+ if (prog->fd < 0) {
+ pr_warn("prog '%s': can't pin program that wasn't loaded\n", prog->name);
+ return libbpf_err(-EINVAL);
+ }
+
err = make_parent_dir(path);
if (err)
return libbpf_err(err);
@@ -7996,170 +7756,35 @@ static int bpf_program_pin_instance(struct bpf_program *prog, const char *path,
if (err)
return libbpf_err(err);
- if (prog == NULL) {
- pr_warn("invalid program pointer\n");
- return libbpf_err(-EINVAL);
- }
-
- if (instance < 0 || instance >= prog->instances.nr) {
- pr_warn("invalid prog instance %d of prog %s (max %d)\n",
- instance, prog->name, prog->instances.nr);
- return libbpf_err(-EINVAL);
- }
-
- if (bpf_obj_pin(prog->instances.fds[instance], path)) {
+ if (bpf_obj_pin(prog->fd, path)) {
err = -errno;
cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
- pr_warn("failed to pin program: %s\n", cp);
+ pr_warn("prog '%s': failed to pin at '%s': %s\n", prog->name, path, cp);
return libbpf_err(err);
}
- pr_debug("pinned program '%s'\n", path);
+ pr_debug("prog '%s': pinned at '%s'\n", prog->name, path);
return 0;
}
-static int bpf_program_unpin_instance(struct bpf_program *prog, const char *path, int instance)
+int bpf_program__unpin(struct bpf_program *prog, const char *path)
{
int err;
- err = check_path(path);
- if (err)
- return libbpf_err(err);
-
- if (prog == NULL) {
- pr_warn("invalid program pointer\n");
- return libbpf_err(-EINVAL);
- }
-
- if (instance < 0 || instance >= prog->instances.nr) {
- pr_warn("invalid prog instance %d of prog %s (max %d)\n",
- instance, prog->name, prog->instances.nr);
+ if (prog->fd < 0) {
+ pr_warn("prog '%s': can't unpin program that wasn't loaded\n", prog->name);
return libbpf_err(-EINVAL);
}
- err = unlink(path);
- if (err != 0)
- return libbpf_err(-errno);
-
- pr_debug("unpinned program '%s'\n", path);
-
- return 0;
-}
-
-__attribute__((alias("bpf_program_pin_instance")))
-int bpf_object__pin_instance(struct bpf_program *prog, const char *path, int instance);
-
-__attribute__((alias("bpf_program_unpin_instance")))
-int bpf_program__unpin_instance(struct bpf_program *prog, const char *path, int instance);
-
-int bpf_program__pin(struct bpf_program *prog, const char *path)
-{
- int i, err;
-
- err = make_parent_dir(path);
- if (err)
- return libbpf_err(err);
-
- err = check_path(path);
- if (err)
- return libbpf_err(err);
-
- if (prog == NULL) {
- pr_warn("invalid program pointer\n");
- return libbpf_err(-EINVAL);
- }
-
- if (prog->instances.nr <= 0) {
- pr_warn("no instances of prog %s to pin\n", prog->name);
- return libbpf_err(-EINVAL);
- }
-
- if (prog->instances.nr == 1) {
- /* don't create subdirs when pinning single instance */
- return bpf_program_pin_instance(prog, path, 0);
- }
-
- for (i = 0; i < prog->instances.nr; i++) {
- char buf[PATH_MAX];
- int len;
-
- len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
- if (len < 0) {
- err = -EINVAL;
- goto err_unpin;
- } else if (len >= PATH_MAX) {
- err = -ENAMETOOLONG;
- goto err_unpin;
- }
-
- err = bpf_program_pin_instance(prog, buf, i);
- if (err)
- goto err_unpin;
- }
-
- return 0;
-
-err_unpin:
- for (i = i - 1; i >= 0; i--) {
- char buf[PATH_MAX];
- int len;
-
- len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
- if (len < 0)
- continue;
- else if (len >= PATH_MAX)
- continue;
-
- bpf_program_unpin_instance(prog, buf, i);
- }
-
- rmdir(path);
-
- return libbpf_err(err);
-}
-
-int bpf_program__unpin(struct bpf_program *prog, const char *path)
-{
- int i, err;
-
err = check_path(path);
if (err)
return libbpf_err(err);
- if (prog == NULL) {
- pr_warn("invalid program pointer\n");
- return libbpf_err(-EINVAL);
- }
-
- if (prog->instances.nr <= 0) {
- pr_warn("no instances of prog %s to pin\n", prog->name);
- return libbpf_err(-EINVAL);
- }
-
- if (prog->instances.nr == 1) {
- /* don't create subdirs when pinning single instance */
- return bpf_program_unpin_instance(prog, path, 0);
- }
-
- for (i = 0; i < prog->instances.nr; i++) {
- char buf[PATH_MAX];
- int len;
-
- len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
- if (len < 0)
- return libbpf_err(-EINVAL);
- else if (len >= PATH_MAX)
- return libbpf_err(-ENAMETOOLONG);
-
- err = bpf_program_unpin_instance(prog, buf, i);
- if (err)
- return err;
- }
-
- err = rmdir(path);
+ err = unlink(path);
if (err)
return libbpf_err(-errno);
+ pr_debug("prog '%s': unpinned from '%s'\n", prog->name, path);
return 0;
}
@@ -8406,8 +8031,7 @@ int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
char buf[PATH_MAX];
int len;
- len = snprintf(buf, PATH_MAX, "%s/%s", path,
- prog->pin_name);
+ len = snprintf(buf, PATH_MAX, "%s/%s", path, prog->name);
if (len < 0) {
err = -EINVAL;
goto err_unpin_programs;
@@ -8428,8 +8052,7 @@ err_unpin_programs:
char buf[PATH_MAX];
int len;
- len = snprintf(buf, PATH_MAX, "%s/%s", path,
- prog->pin_name);
+ len = snprintf(buf, PATH_MAX, "%s/%s", path, prog->name);
if (len < 0)
continue;
else if (len >= PATH_MAX)
@@ -8453,8 +8076,7 @@ int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
char buf[PATH_MAX];
int len;
- len = snprintf(buf, PATH_MAX, "%s/%s", path,
- prog->pin_name);
+ len = snprintf(buf, PATH_MAX, "%s/%s", path, prog->name);
if (len < 0)
return libbpf_err(-EINVAL);
else if (len >= PATH_MAX)
@@ -8487,11 +8109,6 @@ int bpf_object__pin(struct bpf_object *obj, const char *path)
static void bpf_map__destroy(struct bpf_map *map)
{
- if (map->clear_priv)
- map->clear_priv(map, map->priv);
- map->priv = NULL;
- map->clear_priv = NULL;
-
if (map->inner_map) {
bpf_map__destroy(map->inner_map);
zfree(&map->inner_map);
@@ -8527,9 +8144,6 @@ void bpf_object__close(struct bpf_object *obj)
if (IS_ERR_OR_NULL(obj))
return;
- if (obj->clear_priv)
- obj->clear_priv(obj, obj->priv);
-
usdt_manager_free(obj->usdt_man);
obj->usdt_man = NULL;
@@ -8556,33 +8170,9 @@ void bpf_object__close(struct bpf_object *obj)
}
zfree(&obj->programs);
- list_del(&obj->list);
free(obj);
}
-struct bpf_object *
-bpf_object__next(struct bpf_object *prev)
-{
- struct bpf_object *next;
- bool strict = (libbpf_mode & LIBBPF_STRICT_NO_OBJECT_LIST);
-
- if (strict)
- return NULL;
-
- if (!prev)
- next = list_first_entry(&bpf_objects_list,
- struct bpf_object,
- list);
- else
- next = list_next_entry(prev, list);
-
- /* Empty list is noticed here so don't need checking on entry. */
- if (&next->list == &bpf_objects_list)
- return NULL;
-
- return next;
-}
-
const char *bpf_object__name(const struct bpf_object *obj)
{
return obj ? obj->name : libbpf_err_ptr(-EINVAL);
@@ -8613,22 +8203,6 @@ int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version)
return 0;
}
-int bpf_object__set_priv(struct bpf_object *obj, void *priv,
- bpf_object_clear_priv_t clear_priv)
-{
- if (obj->priv && obj->clear_priv)
- obj->clear_priv(obj, obj->priv);
-
- obj->priv = priv;
- obj->clear_priv = clear_priv;
- return 0;
-}
-
-void *bpf_object__priv(const struct bpf_object *obj)
-{
- return obj ? obj->priv : libbpf_err_ptr(-EINVAL);
-}
-
int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts)
{
struct bpf_gen *gen;
@@ -8672,12 +8246,6 @@ __bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj,
}
struct bpf_program *
-bpf_program__next(struct bpf_program *prev, const struct bpf_object *obj)
-{
- return bpf_object__next_program(obj, prev);
-}
-
-struct bpf_program *
bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev)
{
struct bpf_program *prog = prev;
@@ -8690,12 +8258,6 @@ bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev)
}
struct bpf_program *
-bpf_program__prev(struct bpf_program *next, const struct bpf_object *obj)
-{
- return bpf_object__prev_program(obj, next);
-}
-
-struct bpf_program *
bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *next)
{
struct bpf_program *prog = next;
@@ -8707,22 +8269,6 @@ bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *next)
return prog;
}
-int bpf_program__set_priv(struct bpf_program *prog, void *priv,
- bpf_program_clear_priv_t clear_priv)
-{
- if (prog->priv && prog->clear_priv)
- prog->clear_priv(prog, prog->priv);
-
- prog->priv = priv;
- prog->clear_priv = clear_priv;
- return 0;
-}
-
-void *bpf_program__priv(const struct bpf_program *prog)
-{
- return prog ? prog->priv : libbpf_err_ptr(-EINVAL);
-}
-
void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex)
{
prog->prog_ifindex = ifindex;
@@ -8738,22 +8284,6 @@ const char *bpf_program__section_name(const struct bpf_program *prog)
return prog->sec_name;
}
-const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy)
-{
- const char *title;
-
- title = prog->sec_name;
- if (needs_copy) {
- title = strdup(title);
- if (!title) {
- pr_warn("failed to strdup program title\n");
- return libbpf_err_ptr(-ENOMEM);
- }
- }
-
- return title;
-}
-
bool bpf_program__autoload(const struct bpf_program *prog)
{
return prog->autoload;
@@ -8768,18 +8298,6 @@ int bpf_program__set_autoload(struct bpf_program *prog, bool autoload)
return 0;
}
-static int bpf_program_nth_fd(const struct bpf_program *prog, int n);
-
-int bpf_program__fd(const struct bpf_program *prog)
-{
- return bpf_program_nth_fd(prog, 0);
-}
-
-size_t bpf_program__size(const struct bpf_program *prog)
-{
- return prog->insns_cnt * BPF_INSN_SZ;
-}
-
const struct bpf_insn *bpf_program__insns(const struct bpf_program *prog)
{
return prog->insns;
@@ -8810,58 +8328,15 @@ int bpf_program__set_insns(struct bpf_program *prog,
return 0;
}
-int bpf_program__set_prep(struct bpf_program *prog, int nr_instances,
- bpf_program_prep_t prep)
-{
- int *instances_fds;
-
- if (nr_instances <= 0 || !prep)
- return libbpf_err(-EINVAL);
-
- if (prog->instances.nr > 0 || prog->instances.fds) {
- pr_warn("Can't set pre-processor after loading\n");
- return libbpf_err(-EINVAL);
- }
-
- instances_fds = malloc(sizeof(int) * nr_instances);
- if (!instances_fds) {
- pr_warn("alloc memory failed for fds\n");
- return libbpf_err(-ENOMEM);
- }
-
- /* fill all fd with -1 */
- memset(instances_fds, -1, sizeof(int) * nr_instances);
-
- prog->instances.nr = nr_instances;
- prog->instances.fds = instances_fds;
- prog->preprocessor = prep;
- return 0;
-}
-
-__attribute__((alias("bpf_program_nth_fd")))
-int bpf_program__nth_fd(const struct bpf_program *prog, int n);
-
-static int bpf_program_nth_fd(const struct bpf_program *prog, int n)
+int bpf_program__fd(const struct bpf_program *prog)
{
- int fd;
-
if (!prog)
return libbpf_err(-EINVAL);
- if (n >= prog->instances.nr || n < 0) {
- pr_warn("Can't get the %dth fd from program %s: only %d instances\n",
- n, prog->name, prog->instances.nr);
- return libbpf_err(-EINVAL);
- }
-
- fd = prog->instances.fds[n];
- if (fd < 0) {
- pr_warn("%dth instance of program '%s' is invalid\n",
- n, prog->name);
+ if (prog->fd < 0)
return libbpf_err(-ENOENT);
- }
- return fd;
+ return prog->fd;
}
__alias(bpf_program__type)
@@ -8881,39 +8356,6 @@ int bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)
return 0;
}
-static bool bpf_program__is_type(const struct bpf_program *prog,
- enum bpf_prog_type type)
-{
- return prog ? (prog->type == type) : false;
-}
-
-#define BPF_PROG_TYPE_FNS(NAME, TYPE) \
-int bpf_program__set_##NAME(struct bpf_program *prog) \
-{ \
- if (!prog) \
- return libbpf_err(-EINVAL); \
- return bpf_program__set_type(prog, TYPE); \
-} \
- \
-bool bpf_program__is_##NAME(const struct bpf_program *prog) \
-{ \
- return bpf_program__is_type(prog, TYPE); \
-} \
-
-BPF_PROG_TYPE_FNS(socket_filter, BPF_PROG_TYPE_SOCKET_FILTER);
-BPF_PROG_TYPE_FNS(lsm, BPF_PROG_TYPE_LSM);
-BPF_PROG_TYPE_FNS(kprobe, BPF_PROG_TYPE_KPROBE);
-BPF_PROG_TYPE_FNS(sched_cls, BPF_PROG_TYPE_SCHED_CLS);
-BPF_PROG_TYPE_FNS(sched_act, BPF_PROG_TYPE_SCHED_ACT);
-BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT);
-BPF_PROG_TYPE_FNS(raw_tracepoint, BPF_PROG_TYPE_RAW_TRACEPOINT);
-BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP);
-BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
-BPF_PROG_TYPE_FNS(tracing, BPF_PROG_TYPE_TRACING);
-BPF_PROG_TYPE_FNS(struct_ops, BPF_PROG_TYPE_STRUCT_OPS);
-BPF_PROG_TYPE_FNS(extension, BPF_PROG_TYPE_EXT);
-BPF_PROG_TYPE_FNS(sk_lookup, BPF_PROG_TYPE_SK_LOOKUP);
-
__alias(bpf_program__expected_attach_type)
enum bpf_attach_type bpf_program__get_expected_attach_type(const struct bpf_program *prog);
@@ -8991,6 +8433,7 @@ int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log
static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link);
static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link);
+static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link);
static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link);
static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link);
static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link);
@@ -9000,19 +8443,23 @@ static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_li
static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link);
static const struct bpf_sec_def section_defs[] = {
- SEC_DEF("socket", SOCKET_FILTER, 0, SEC_NONE | SEC_SLOPPY_PFX),
- SEC_DEF("sk_reuseport/migrate", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
- SEC_DEF("sk_reuseport", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
+ SEC_DEF("socket", SOCKET_FILTER, 0, SEC_NONE),
+ SEC_DEF("sk_reuseport/migrate", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE),
+ SEC_DEF("sk_reuseport", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE),
SEC_DEF("kprobe+", KPROBE, 0, SEC_NONE, attach_kprobe),
SEC_DEF("uprobe+", KPROBE, 0, SEC_NONE, attach_uprobe),
+ SEC_DEF("uprobe.s+", KPROBE, 0, SEC_SLEEPABLE, attach_uprobe),
SEC_DEF("kretprobe+", KPROBE, 0, SEC_NONE, attach_kprobe),
SEC_DEF("uretprobe+", KPROBE, 0, SEC_NONE, attach_uprobe),
+ SEC_DEF("uretprobe.s+", KPROBE, 0, SEC_SLEEPABLE, attach_uprobe),
SEC_DEF("kprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi),
SEC_DEF("kretprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi),
+ SEC_DEF("ksyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall),
+ SEC_DEF("kretsyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall),
SEC_DEF("usdt+", KPROBE, 0, SEC_NONE, attach_usdt),
SEC_DEF("tc", SCHED_CLS, 0, SEC_NONE),
- SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE | SEC_SLOPPY_PFX | SEC_DEPRECATED),
- SEC_DEF("action", SCHED_ACT, 0, SEC_NONE | SEC_SLOPPY_PFX),
+ SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE),
+ SEC_DEF("action", SCHED_ACT, 0, SEC_NONE),
SEC_DEF("tracepoint+", TRACEPOINT, 0, SEC_NONE, attach_tp),
SEC_DEF("tp+", TRACEPOINT, 0, SEC_NONE, attach_tp),
SEC_DEF("raw_tracepoint+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp),
@@ -9029,55 +8476,54 @@ static const struct bpf_sec_def section_defs[] = {
SEC_DEF("freplace+", EXT, 0, SEC_ATTACH_BTF, attach_trace),
SEC_DEF("lsm+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm),
SEC_DEF("lsm.s+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm),
+ SEC_DEF("lsm_cgroup+", LSM, BPF_LSM_CGROUP, SEC_ATTACH_BTF),
SEC_DEF("iter+", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter),
SEC_DEF("iter.s+", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter),
SEC_DEF("syscall", SYSCALL, 0, SEC_SLEEPABLE),
SEC_DEF("xdp.frags/devmap", XDP, BPF_XDP_DEVMAP, SEC_XDP_FRAGS),
SEC_DEF("xdp/devmap", XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE),
- SEC_DEF("xdp_devmap/", XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE | SEC_DEPRECATED),
SEC_DEF("xdp.frags/cpumap", XDP, BPF_XDP_CPUMAP, SEC_XDP_FRAGS),
SEC_DEF("xdp/cpumap", XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE),
- SEC_DEF("xdp_cpumap/", XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE | SEC_DEPRECATED),
SEC_DEF("xdp.frags", XDP, BPF_XDP, SEC_XDP_FRAGS),
- SEC_DEF("xdp", XDP, BPF_XDP, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
- SEC_DEF("perf_event", PERF_EVENT, 0, SEC_NONE | SEC_SLOPPY_PFX),
- SEC_DEF("lwt_in", LWT_IN, 0, SEC_NONE | SEC_SLOPPY_PFX),
- SEC_DEF("lwt_out", LWT_OUT, 0, SEC_NONE | SEC_SLOPPY_PFX),
- SEC_DEF("lwt_xmit", LWT_XMIT, 0, SEC_NONE | SEC_SLOPPY_PFX),
- SEC_DEF("lwt_seg6local", LWT_SEG6LOCAL, 0, SEC_NONE | SEC_SLOPPY_PFX),
- SEC_DEF("cgroup_skb/ingress", CGROUP_SKB, BPF_CGROUP_INET_INGRESS, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
- SEC_DEF("cgroup_skb/egress", CGROUP_SKB, BPF_CGROUP_INET_EGRESS, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
- SEC_DEF("cgroup/skb", CGROUP_SKB, 0, SEC_NONE | SEC_SLOPPY_PFX),
- SEC_DEF("cgroup/sock_create", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
- SEC_DEF("cgroup/sock_release", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_RELEASE, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
- SEC_DEF("cgroup/sock", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
- SEC_DEF("cgroup/post_bind4", CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
- SEC_DEF("cgroup/post_bind6", CGROUP_SOCK, BPF_CGROUP_INET6_POST_BIND, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
- SEC_DEF("cgroup/dev", CGROUP_DEVICE, BPF_CGROUP_DEVICE, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
- SEC_DEF("sockops", SOCK_OPS, BPF_CGROUP_SOCK_OPS, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
- SEC_DEF("sk_skb/stream_parser", SK_SKB, BPF_SK_SKB_STREAM_PARSER, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
- SEC_DEF("sk_skb/stream_verdict",SK_SKB, BPF_SK_SKB_STREAM_VERDICT, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
- SEC_DEF("sk_skb", SK_SKB, 0, SEC_NONE | SEC_SLOPPY_PFX),
- SEC_DEF("sk_msg", SK_MSG, BPF_SK_MSG_VERDICT, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
- SEC_DEF("lirc_mode2", LIRC_MODE2, BPF_LIRC_MODE2, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
- SEC_DEF("flow_dissector", FLOW_DISSECTOR, BPF_FLOW_DISSECTOR, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
- SEC_DEF("cgroup/bind4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
- SEC_DEF("cgroup/bind6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
- SEC_DEF("cgroup/connect4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
- SEC_DEF("cgroup/connect6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
- SEC_DEF("cgroup/sendmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
- SEC_DEF("cgroup/sendmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
- SEC_DEF("cgroup/recvmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
- SEC_DEF("cgroup/recvmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
- SEC_DEF("cgroup/getpeername4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
- SEC_DEF("cgroup/getpeername6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
- SEC_DEF("cgroup/getsockname4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
- SEC_DEF("cgroup/getsockname6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
- SEC_DEF("cgroup/sysctl", CGROUP_SYSCTL, BPF_CGROUP_SYSCTL, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
- SEC_DEF("cgroup/getsockopt", CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
- SEC_DEF("cgroup/setsockopt", CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
+ SEC_DEF("xdp", XDP, BPF_XDP, SEC_ATTACHABLE_OPT),
+ SEC_DEF("perf_event", PERF_EVENT, 0, SEC_NONE),
+ SEC_DEF("lwt_in", LWT_IN, 0, SEC_NONE),
+ SEC_DEF("lwt_out", LWT_OUT, 0, SEC_NONE),
+ SEC_DEF("lwt_xmit", LWT_XMIT, 0, SEC_NONE),
+ SEC_DEF("lwt_seg6local", LWT_SEG6LOCAL, 0, SEC_NONE),
+ SEC_DEF("sockops", SOCK_OPS, BPF_CGROUP_SOCK_OPS, SEC_ATTACHABLE_OPT),
+ SEC_DEF("sk_skb/stream_parser", SK_SKB, BPF_SK_SKB_STREAM_PARSER, SEC_ATTACHABLE_OPT),
+ SEC_DEF("sk_skb/stream_verdict",SK_SKB, BPF_SK_SKB_STREAM_VERDICT, SEC_ATTACHABLE_OPT),
+ SEC_DEF("sk_skb", SK_SKB, 0, SEC_NONE),
+ SEC_DEF("sk_msg", SK_MSG, BPF_SK_MSG_VERDICT, SEC_ATTACHABLE_OPT),
+ SEC_DEF("lirc_mode2", LIRC_MODE2, BPF_LIRC_MODE2, SEC_ATTACHABLE_OPT),
+ SEC_DEF("flow_dissector", FLOW_DISSECTOR, BPF_FLOW_DISSECTOR, SEC_ATTACHABLE_OPT),
+ SEC_DEF("cgroup_skb/ingress", CGROUP_SKB, BPF_CGROUP_INET_INGRESS, SEC_ATTACHABLE_OPT),
+ SEC_DEF("cgroup_skb/egress", CGROUP_SKB, BPF_CGROUP_INET_EGRESS, SEC_ATTACHABLE_OPT),
+ SEC_DEF("cgroup/skb", CGROUP_SKB, 0, SEC_NONE),
+ SEC_DEF("cgroup/sock_create", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE),
+ SEC_DEF("cgroup/sock_release", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_RELEASE, SEC_ATTACHABLE),
+ SEC_DEF("cgroup/sock", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE_OPT),
+ SEC_DEF("cgroup/post_bind4", CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND, SEC_ATTACHABLE),
+ SEC_DEF("cgroup/post_bind6", CGROUP_SOCK, BPF_CGROUP_INET6_POST_BIND, SEC_ATTACHABLE),
+ SEC_DEF("cgroup/bind4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND, SEC_ATTACHABLE),
+ SEC_DEF("cgroup/bind6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND, SEC_ATTACHABLE),
+ SEC_DEF("cgroup/connect4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT, SEC_ATTACHABLE),
+ SEC_DEF("cgroup/connect6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT, SEC_ATTACHABLE),
+ SEC_DEF("cgroup/sendmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG, SEC_ATTACHABLE),
+ SEC_DEF("cgroup/sendmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG, SEC_ATTACHABLE),
+ SEC_DEF("cgroup/recvmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG, SEC_ATTACHABLE),
+ SEC_DEF("cgroup/recvmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG, SEC_ATTACHABLE),
+ SEC_DEF("cgroup/getpeername4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME, SEC_ATTACHABLE),
+ SEC_DEF("cgroup/getpeername6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME, SEC_ATTACHABLE),
+ SEC_DEF("cgroup/getsockname4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME, SEC_ATTACHABLE),
+ SEC_DEF("cgroup/getsockname6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME, SEC_ATTACHABLE),
+ SEC_DEF("cgroup/sysctl", CGROUP_SYSCTL, BPF_CGROUP_SYSCTL, SEC_ATTACHABLE),
+ SEC_DEF("cgroup/getsockopt", CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT, SEC_ATTACHABLE),
+ SEC_DEF("cgroup/setsockopt", CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT, SEC_ATTACHABLE),
+ SEC_DEF("cgroup/dev", CGROUP_DEVICE, BPF_CGROUP_DEVICE, SEC_ATTACHABLE_OPT),
SEC_DEF("struct_ops+", STRUCT_OPS, 0, SEC_NONE),
- SEC_DEF("sk_lookup", SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
+ SEC_DEF("sk_lookup", SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE),
};
static size_t custom_sec_def_cnt;
@@ -9172,8 +8618,7 @@ int libbpf_unregister_prog_handler(int handler_id)
return 0;
}
-static bool sec_def_matches(const struct bpf_sec_def *sec_def, const char *sec_name,
- bool allow_sloppy)
+static bool sec_def_matches(const struct bpf_sec_def *sec_def, const char *sec_name)
{
size_t len = strlen(sec_def->sec);
@@ -9198,17 +8643,6 @@ static bool sec_def_matches(const struct bpf_sec_def *sec_def, const char *sec_n
return false;
}
- /* SEC_SLOPPY_PFX definitions are allowed to be just prefix
- * matches, unless strict section name mode
- * (LIBBPF_STRICT_SEC_NAME) is enabled, in which case the
- * match has to be exact.
- */
- if (allow_sloppy && str_has_pfx(sec_name, sec_def->sec))
- return true;
-
- /* Definitions not marked SEC_SLOPPY_PFX (e.g.,
- * SEC("syscall")) are exact matches in both modes.
- */
return strcmp(sec_name, sec_def->sec) == 0;
}
@@ -9216,20 +8650,18 @@ static const struct bpf_sec_def *find_sec_def(const char *sec_name)
{
const struct bpf_sec_def *sec_def;
int i, n;
- bool strict = libbpf_mode & LIBBPF_STRICT_SEC_NAME, allow_sloppy;
n = custom_sec_def_cnt;
for (i = 0; i < n; i++) {
sec_def = &custom_sec_defs[i];
- if (sec_def_matches(sec_def, sec_name, false))
+ if (sec_def_matches(sec_def, sec_name))
return sec_def;
}
n = ARRAY_SIZE(section_defs);
for (i = 0; i < n; i++) {
sec_def = &section_defs[i];
- allow_sloppy = (sec_def->cookie & SEC_SLOPPY_PFX) && !strict;
- if (sec_def_matches(sec_def, sec_name, allow_sloppy))
+ if (sec_def_matches(sec_def, sec_name))
return sec_def;
}
@@ -9300,6 +8732,38 @@ int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
return libbpf_err(-ESRCH);
}
+const char *libbpf_bpf_attach_type_str(enum bpf_attach_type t)
+{
+ if (t < 0 || t >= ARRAY_SIZE(attach_type_name))
+ return NULL;
+
+ return attach_type_name[t];
+}
+
+const char *libbpf_bpf_link_type_str(enum bpf_link_type t)
+{
+ if (t < 0 || t >= ARRAY_SIZE(link_type_name))
+ return NULL;
+
+ return link_type_name[t];
+}
+
+const char *libbpf_bpf_map_type_str(enum bpf_map_type t)
+{
+ if (t < 0 || t >= ARRAY_SIZE(map_type_name))
+ return NULL;
+
+ return map_type_name[t];
+}
+
+const char *libbpf_bpf_prog_type_str(enum bpf_prog_type t)
+{
+ if (t < 0 || t >= ARRAY_SIZE(prog_type_name))
+ return NULL;
+
+ return prog_type_name[t];
+}
+
static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj,
size_t offset)
{
@@ -9450,6 +8914,7 @@ void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type,
*kind = BTF_KIND_TYPEDEF;
break;
case BPF_LSM_MAC:
+ case BPF_LSM_CGROUP:
*prefix = BTF_LSM_PREFIX;
*kind = BTF_KIND_FUNC;
break;
@@ -9653,11 +9118,6 @@ int bpf_map__fd(const struct bpf_map *map)
return map ? map->fd : libbpf_err(-EINVAL);
}
-const struct bpf_map_def *bpf_map__def(const struct bpf_map *map)
-{
- return map ? &map->def : libbpf_err_ptr(-EINVAL);
-}
-
static bool map_uses_real_name(const struct bpf_map *map)
{
/* Since libbpf started to support custom .data.* and .rodata.* maps,
@@ -9772,27 +9232,6 @@ __u32 bpf_map__btf_value_type_id(const struct bpf_map *map)
return map ? map->btf_value_type_id : 0;
}
-int bpf_map__set_priv(struct bpf_map *map, void *priv,
- bpf_map_clear_priv_t clear_priv)
-{
- if (!map)
- return libbpf_err(-EINVAL);
-
- if (map->priv) {
- if (map->clear_priv)
- map->clear_priv(map, map->priv);
- }
-
- map->priv = priv;
- map->clear_priv = clear_priv;
- return 0;
-}
-
-void *bpf_map__priv(const struct bpf_map *map)
-{
- return map ? map->priv : libbpf_err_ptr(-EINVAL);
-}
-
int bpf_map__set_initial_value(struct bpf_map *map,
const void *data, size_t size)
{
@@ -9812,11 +9251,6 @@ const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize)
return map->mmaped;
}
-bool bpf_map__is_offload_neutral(const struct bpf_map *map)
-{
- return map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY;
-}
-
bool bpf_map__is_internal(const struct bpf_map *map)
{
return map->libbpf_type != LIBBPF_MAP_UNSPEC;
@@ -9878,12 +9312,6 @@ __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
}
struct bpf_map *
-bpf_map__next(const struct bpf_map *prev, const struct bpf_object *obj)
-{
- return bpf_object__next_map(obj, prev);
-}
-
-struct bpf_map *
bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev)
{
if (prev == NULL)
@@ -9893,12 +9321,6 @@ bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev)
}
struct bpf_map *
-bpf_map__prev(const struct bpf_map *next, const struct bpf_object *obj)
-{
- return bpf_object__prev_map(obj, next);
-}
-
-struct bpf_map *
bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *next)
{
if (next == NULL) {
@@ -9943,12 +9365,6 @@ bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name)
return bpf_map__fd(bpf_object__find_map_by_name(obj, name));
}
-struct bpf_map *
-bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset)
-{
- return libbpf_err_ptr(-ENOTSUP);
-}
-
static int validate_map_op(const struct bpf_map *map, size_t key_sz,
size_t value_sz, bool check_value_sz)
{
@@ -10069,95 +9485,6 @@ long libbpf_get_error(const void *ptr)
return -errno;
}
-__attribute__((alias("bpf_prog_load_xattr2")))
-int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
- struct bpf_object **pobj, int *prog_fd);
-
-static int bpf_prog_load_xattr2(const struct bpf_prog_load_attr *attr,
- struct bpf_object **pobj, int *prog_fd)
-{
- struct bpf_object_open_attr open_attr = {};
- struct bpf_program *prog, *first_prog = NULL;
- struct bpf_object *obj;
- struct bpf_map *map;
- int err;
-
- if (!attr)
- return libbpf_err(-EINVAL);
- if (!attr->file)
- return libbpf_err(-EINVAL);
-
- open_attr.file = attr->file;
- open_attr.prog_type = attr->prog_type;
-
- obj = __bpf_object__open_xattr(&open_attr, 0);
- err = libbpf_get_error(obj);
- if (err)
- return libbpf_err(-ENOENT);
-
- bpf_object__for_each_program(prog, obj) {
- enum bpf_attach_type attach_type = attr->expected_attach_type;
- /*
- * to preserve backwards compatibility, bpf_prog_load treats
- * attr->prog_type, if specified, as an override to whatever
- * bpf_object__open guessed
- */
- if (attr->prog_type != BPF_PROG_TYPE_UNSPEC) {
- prog->type = attr->prog_type;
- prog->expected_attach_type = attach_type;
- }
- if (bpf_program__type(prog) == BPF_PROG_TYPE_UNSPEC) {
- /*
- * we haven't guessed from section name and user
- * didn't provide a fallback type, too bad...
- */
- bpf_object__close(obj);
- return libbpf_err(-EINVAL);
- }
-
- prog->prog_ifindex = attr->ifindex;
- prog->log_level = attr->log_level;
- prog->prog_flags |= attr->prog_flags;
- if (!first_prog)
- first_prog = prog;
- }
-
- bpf_object__for_each_map(map, obj) {
- if (map->def.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
- map->map_ifindex = attr->ifindex;
- }
-
- if (!first_prog) {
- pr_warn("object file doesn't contain bpf program\n");
- bpf_object__close(obj);
- return libbpf_err(-ENOENT);
- }
-
- err = bpf_object__load(obj);
- if (err) {
- bpf_object__close(obj);
- return libbpf_err(err);
- }
-
- *pobj = obj;
- *prog_fd = bpf_program__fd(first_prog);
- return 0;
-}
-
-COMPAT_VERSION(bpf_prog_load_deprecated, bpf_prog_load, LIBBPF_0.0.1)
-int bpf_prog_load_deprecated(const char *file, enum bpf_prog_type type,
- struct bpf_object **pobj, int *prog_fd)
-{
- struct bpf_prog_load_attr attr;
-
- memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
- attr.file = file;
- attr.prog_type = type;
- attr.expected_attach_type = 0;
-
- return bpf_prog_load_xattr2(&attr, pobj, prog_fd);
-}
-
/* Replace link's underlying BPF program with the new one */
int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog)
{
@@ -10485,7 +9812,7 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
{
struct perf_event_attr attr = {};
char errmsg[STRERR_BUFSIZE];
- int type, pfd, err;
+ int type, pfd;
if (ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS))
return -EINVAL;
@@ -10521,14 +9848,7 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
pid < 0 ? -1 : pid /* pid */,
pid == -1 ? 0 : -1 /* cpu */,
-1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
- if (pfd < 0) {
- err = -errno;
- pr_warn("%s perf_event_open() failed: %s\n",
- uprobe ? "uprobe" : "kprobe",
- libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
- return err;
- }
- return pfd;
+ return pfd >= 0 ? pfd : -errno;
}
static int append_to_file(const char *file, const char *fmt, ...)
@@ -10551,6 +9871,34 @@ static int append_to_file(const char *file, const char *fmt, ...)
return err;
}
+#define DEBUGFS "/sys/kernel/debug/tracing"
+#define TRACEFS "/sys/kernel/tracing"
+
+static bool use_debugfs(void)
+{
+ static int has_debugfs = -1;
+
+ if (has_debugfs < 0)
+ has_debugfs = access(DEBUGFS, F_OK) == 0;
+
+ return has_debugfs == 1;
+}
+
+static const char *tracefs_path(void)
+{
+ return use_debugfs() ? DEBUGFS : TRACEFS;
+}
+
+static const char *tracefs_kprobe_events(void)
+{
+ return use_debugfs() ? DEBUGFS"/kprobe_events" : TRACEFS"/kprobe_events";
+}
+
+static const char *tracefs_uprobe_events(void)
+{
+ return use_debugfs() ? DEBUGFS"/uprobe_events" : TRACEFS"/uprobe_events";
+}
+
static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz,
const char *kfunc_name, size_t offset)
{
@@ -10563,9 +9911,7 @@ static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz,
static int add_kprobe_event_legacy(const char *probe_name, bool retprobe,
const char *kfunc_name, size_t offset)
{
- const char *file = "/sys/kernel/debug/tracing/kprobe_events";
-
- return append_to_file(file, "%c:%s/%s %s+0x%zx",
+ return append_to_file(tracefs_kprobe_events(), "%c:%s/%s %s+0x%zx",
retprobe ? 'r' : 'p',
retprobe ? "kretprobes" : "kprobes",
probe_name, kfunc_name, offset);
@@ -10573,18 +9919,16 @@ static int add_kprobe_event_legacy(const char *probe_name, bool retprobe,
static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe)
{
- const char *file = "/sys/kernel/debug/tracing/kprobe_events";
-
- return append_to_file(file, "-:%s/%s", retprobe ? "kretprobes" : "kprobes", probe_name);
+ return append_to_file(tracefs_kprobe_events(), "-:%s/%s",
+ retprobe ? "kretprobes" : "kprobes", probe_name);
}
static int determine_kprobe_perf_type_legacy(const char *probe_name, bool retprobe)
{
char file[256];
- snprintf(file, sizeof(file),
- "/sys/kernel/debug/tracing/events/%s/%s/id",
- retprobe ? "kretprobes" : "kprobes", probe_name);
+ snprintf(file, sizeof(file), "%s/events/%s/%s/id",
+ tracefs_path(), retprobe ? "kretprobes" : "kprobes", probe_name);
return parse_uint_from_file(file, "%d\n");
}
@@ -10605,10 +9949,11 @@ static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe,
}
type = determine_kprobe_perf_type_legacy(probe_name, retprobe);
if (type < 0) {
+ err = type;
pr_warn("failed to determine legacy kprobe event id for '%s+0x%zx': %s\n",
kfunc_name, offset,
- libbpf_strerror_r(type, errmsg, sizeof(errmsg)));
- return type;
+ libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+ goto err_clean_legacy;
}
attr.size = sizeof(attr);
attr.config = type;
@@ -10622,9 +9967,72 @@ static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe,
err = -errno;
pr_warn("legacy kprobe perf_event_open() failed: %s\n",
libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
- return err;
+ goto err_clean_legacy;
}
return pfd;
+
+err_clean_legacy:
+ /* Clear the newly added legacy kprobe_event */
+ remove_kprobe_event_legacy(probe_name, retprobe);
+ return err;
+}
+
+static const char *arch_specific_syscall_pfx(void)
+{
+#if defined(__x86_64__)
+ return "x64";
+#elif defined(__i386__)
+ return "ia32";
+#elif defined(__s390x__)
+ return "s390x";
+#elif defined(__s390__)
+ return "s390";
+#elif defined(__arm__)
+ return "arm";
+#elif defined(__aarch64__)
+ return "arm64";
+#elif defined(__mips__)
+ return "mips";
+#elif defined(__riscv)
+ return "riscv";
+#elif defined(__powerpc__)
+ return "powerpc";
+#elif defined(__powerpc64__)
+ return "powerpc64";
+#else
+ return NULL;
+#endif
+}
+
+static int probe_kern_syscall_wrapper(void)
+{
+ char syscall_name[64];
+ const char *ksys_pfx;
+
+ ksys_pfx = arch_specific_syscall_pfx();
+ if (!ksys_pfx)
+ return 0;
+
+ snprintf(syscall_name, sizeof(syscall_name), "__%s_sys_bpf", ksys_pfx);
+
+ if (determine_kprobe_perf_type() >= 0) {
+ int pfd;
+
+ pfd = perf_event_open_probe(false, false, syscall_name, 0, getpid(), 0);
+ if (pfd >= 0)
+ close(pfd);
+
+ return pfd >= 0 ? 1 : 0;
+ } else { /* legacy mode */
+ char probe_name[128];
+
+ gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0);
+ if (add_kprobe_event_legacy(probe_name, false, syscall_name, 0) < 0)
+ return 0;
+
+ (void)remove_kprobe_event_legacy(probe_name, false);
+ return 1;
+ }
}
struct bpf_link *
@@ -10681,7 +10089,7 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog,
prog->name, retprobe ? "kretprobe" : "kprobe",
func_name, offset,
libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
- goto err_out;
+ goto err_clean_legacy;
}
if (legacy) {
struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
@@ -10692,6 +10100,10 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog,
}
return link;
+
+err_clean_legacy:
+ if (legacy)
+ remove_kprobe_event_legacy(legacy_probe, retprobe);
err_out:
free(legacy_probe);
return libbpf_err_ptr(err);
@@ -10708,6 +10120,34 @@ struct bpf_link *bpf_program__attach_kprobe(const struct bpf_program *prog,
return bpf_program__attach_kprobe_opts(prog, func_name, &opts);
}
+struct bpf_link *bpf_program__attach_ksyscall(const struct bpf_program *prog,
+ const char *syscall_name,
+ const struct bpf_ksyscall_opts *opts)
+{
+ LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts);
+ char func_name[128];
+
+ if (!OPTS_VALID(opts, bpf_ksyscall_opts))
+ return libbpf_err_ptr(-EINVAL);
+
+ if (kernel_supports(prog->obj, FEAT_SYSCALL_WRAPPER)) {
+ /* arch_specific_syscall_pfx() should never return NULL here
+ * because it is guarded by kernel_supports(). However, since
+ * compiler does not know that we have an explicit conditional
+ * as well.
+ */
+ snprintf(func_name, sizeof(func_name), "__%s_sys_%s",
+ arch_specific_syscall_pfx() ? : "", syscall_name);
+ } else {
+ snprintf(func_name, sizeof(func_name), "__se_sys_%s", syscall_name);
+ }
+
+ kprobe_opts.retprobe = OPTS_GET(opts, retprobe, false);
+ kprobe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
+
+ return bpf_program__attach_kprobe_opts(prog, func_name, &kprobe_opts);
+}
+
/* Adapted from perf/util/string.c */
static bool glob_match(const char *str, const char *pat)
{
@@ -10878,6 +10318,27 @@ static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf
return libbpf_get_error(*link);
}
+static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link)
+{
+ LIBBPF_OPTS(bpf_ksyscall_opts, opts);
+ const char *syscall_name;
+
+ *link = NULL;
+
+ /* no auto-attach for SEC("ksyscall") and SEC("kretsyscall") */
+ if (strcmp(prog->sec_name, "ksyscall") == 0 || strcmp(prog->sec_name, "kretsyscall") == 0)
+ return 0;
+
+ opts.retprobe = str_has_pfx(prog->sec_name, "kretsyscall/");
+ if (opts.retprobe)
+ syscall_name = prog->sec_name + sizeof("kretsyscall/") - 1;
+ else
+ syscall_name = prog->sec_name + sizeof("ksyscall/") - 1;
+
+ *link = bpf_program__attach_ksyscall(prog, syscall_name, &opts);
+ return *link ? 0 : -errno;
+}
+
static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link)
{
LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
@@ -10926,9 +10387,7 @@ static void gen_uprobe_legacy_event_name(char *buf, size_t buf_sz,
static inline int add_uprobe_event_legacy(const char *probe_name, bool retprobe,
const char *binary_path, size_t offset)
{
- const char *file = "/sys/kernel/debug/tracing/uprobe_events";
-
- return append_to_file(file, "%c:%s/%s %s:0x%zx",
+ return append_to_file(tracefs_uprobe_events(), "%c:%s/%s %s:0x%zx",
retprobe ? 'r' : 'p',
retprobe ? "uretprobes" : "uprobes",
probe_name, binary_path, offset);
@@ -10936,18 +10395,16 @@ static inline int add_uprobe_event_legacy(const char *probe_name, bool retprobe,
static inline int remove_uprobe_event_legacy(const char *probe_name, bool retprobe)
{
- const char *file = "/sys/kernel/debug/tracing/uprobe_events";
-
- return append_to_file(file, "-:%s/%s", retprobe ? "uretprobes" : "uprobes", probe_name);
+ return append_to_file(tracefs_uprobe_events(), "-:%s/%s",
+ retprobe ? "uretprobes" : "uprobes", probe_name);
}
static int determine_uprobe_perf_type_legacy(const char *probe_name, bool retprobe)
{
char file[512];
- snprintf(file, sizeof(file),
- "/sys/kernel/debug/tracing/events/%s/%s/id",
- retprobe ? "uretprobes" : "uprobes", probe_name);
+ snprintf(file, sizeof(file), "%s/events/%s/%s/id",
+ tracefs_path(), retprobe ? "uretprobes" : "uprobes", probe_name);
return parse_uint_from_file(file, "%d\n");
}
@@ -10966,9 +10423,10 @@ static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe,
}
type = determine_uprobe_perf_type_legacy(probe_name, retprobe);
if (type < 0) {
+ err = type;
pr_warn("failed to determine legacy uprobe event id for %s:0x%zx: %d\n",
binary_path, offset, err);
- return type;
+ goto err_clean_legacy;
}
memset(&attr, 0, sizeof(attr));
@@ -10983,46 +10441,14 @@ static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe,
if (pfd < 0) {
err = -errno;
pr_warn("legacy uprobe perf_event_open() failed: %d\n", err);
- return err;
+ goto err_clean_legacy;
}
return pfd;
-}
-/* uprobes deal in relative offsets; subtract the base address associated with
- * the mapped binary. See Documentation/trace/uprobetracer.rst for more
- * details.
- */
-static long elf_find_relative_offset(const char *filename, Elf *elf, long addr)
-{
- size_t n;
- int i;
-
- if (elf_getphdrnum(elf, &n)) {
- pr_warn("elf: failed to find program headers for '%s': %s\n", filename,
- elf_errmsg(-1));
- return -ENOENT;
- }
-
- for (i = 0; i < n; i++) {
- int seg_start, seg_end, seg_offset;
- GElf_Phdr phdr;
-
- if (!gelf_getphdr(elf, i, &phdr)) {
- pr_warn("elf: failed to get program header %d from '%s': %s\n", i, filename,
- elf_errmsg(-1));
- return -ENOENT;
- }
- if (phdr.p_type != PT_LOAD || !(phdr.p_flags & PF_X))
- continue;
-
- seg_start = phdr.p_vaddr;
- seg_end = seg_start + phdr.p_memsz;
- seg_offset = phdr.p_offset;
- if (addr >= seg_start && addr < seg_end)
- return addr - seg_start + seg_offset;
- }
- pr_warn("elf: failed to find prog header containing 0x%lx in '%s'\n", addr, filename);
- return -ENOENT;
+err_clean_legacy:
+ /* Clear the newly added legacy uprobe_event */
+ remove_uprobe_event_legacy(probe_name, retprobe);
+ return err;
}
/* Return next ELF section of sh_type after scn, or first of that type if scn is NULL. */
@@ -11111,6 +10537,8 @@ static long elf_find_func_offset(const char *binary_path, const char *name)
for (idx = 0; idx < nr_syms; idx++) {
int curr_bind;
GElf_Sym sym;
+ Elf_Scn *sym_scn;
+ GElf_Shdr sym_sh;
if (!gelf_getsym(symbols, idx, &sym))
continue;
@@ -11148,12 +10576,28 @@ static long elf_find_func_offset(const char *binary_path, const char *name)
continue;
}
}
- ret = sym.st_value;
+
+ /* Transform symbol's virtual address (absolute for
+ * binaries and relative for shared libs) into file
+ * offset, which is what kernel is expecting for
+ * uprobe/uretprobe attachment.
+ * See Documentation/trace/uprobetracer.rst for more
+ * details.
+ * This is done by looking up symbol's containing
+ * section's header and using it's virtual address
+ * (sh_addr) and corresponding file offset (sh_offset)
+ * to transform sym.st_value (virtual address) into
+ * desired final file offset.
+ */
+ sym_scn = elf_getscn(elf, sym.st_shndx);
+ if (!sym_scn)
+ continue;
+ if (!gelf_getshdr(sym_scn, &sym_sh))
+ continue;
+
+ ret = sym.st_value - sym_sh.sh_addr + sym_sh.sh_offset;
last_bind = curr_bind;
}
- /* For binaries that are not shared libraries, we need relative offset */
- if (ret > 0 && !is_shared_lib)
- ret = elf_find_relative_offset(binary_path, elf, ret);
if (ret > 0)
break;
}
@@ -11276,7 +10720,10 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
ref_ctr_off = OPTS_GET(opts, ref_ctr_offset, 0);
pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
- if (binary_path && !strchr(binary_path, '/')) {
+ if (!binary_path)
+ return libbpf_err_ptr(-EINVAL);
+
+ if (!strchr(binary_path, '/')) {
err = resolve_full_path(binary_path, full_binary_path,
sizeof(full_binary_path));
if (err) {
@@ -11290,11 +10737,6 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
if (func_name) {
long sym_off;
- if (!binary_path) {
- pr_warn("prog '%s': name-based attach requires binary_path\n",
- prog->name);
- return libbpf_err_ptr(-EINVAL);
- }
sym_off = elf_find_func_offset(binary_path, func_name);
if (sym_off < 0)
return libbpf_err_ptr(sym_off);
@@ -11338,7 +10780,7 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
prog->name, retprobe ? "uretprobe" : "uprobe",
binary_path, func_offset,
libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
- goto err_out;
+ goto err_clean_legacy;
}
if (legacy) {
struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
@@ -11348,10 +10790,13 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
perf_link->legacy_is_retprobe = retprobe;
}
return link;
+
+err_clean_legacy:
+ if (legacy)
+ remove_uprobe_event_legacy(legacy_probe, retprobe);
err_out:
free(legacy_probe);
return libbpf_err_ptr(err);
-
}
/* Format of u[ret]probe section definition supporting auto-attach:
@@ -11386,7 +10831,8 @@ static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf
break;
case 3:
case 4:
- opts.retprobe = strcmp(probe_type, "uretprobe") == 0;
+ opts.retprobe = strcmp(probe_type, "uretprobe") == 0 ||
+ strcmp(probe_type, "uretprobe.s") == 0;
if (opts.retprobe && offset != 0) {
pr_warn("prog '%s': uretprobes do not support offset specification\n",
prog->name);
@@ -11438,6 +10884,9 @@ struct bpf_link *bpf_program__attach_usdt(const struct bpf_program *prog,
return libbpf_err_ptr(-EINVAL);
}
+ if (!binary_path)
+ return libbpf_err_ptr(-EINVAL);
+
if (!strchr(binary_path, '/')) {
err = resolve_full_path(binary_path, resolved_path, sizeof(resolved_path));
if (err) {
@@ -11503,9 +10952,8 @@ static int determine_tracepoint_id(const char *tp_category,
char file[PATH_MAX];
int ret;
- ret = snprintf(file, sizeof(file),
- "/sys/kernel/debug/tracing/events/%s/%s/id",
- tp_category, tp_name);
+ ret = snprintf(file, sizeof(file), "%s/events/%s/%s/id",
+ tracefs_path(), tp_category, tp_name);
if (ret < 0)
return -errno;
if (ret >= sizeof(file)) {
@@ -11962,6 +11410,9 @@ struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map)
return link;
}
+typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(struct perf_event_header *hdr,
+ void *private_data);
+
static enum bpf_perf_event_ret
perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
void **copy_mem, size_t *copy_size,
@@ -12010,12 +11461,6 @@ perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
return libbpf_err(ret);
}
-__attribute__((alias("perf_event_read_simple")))
-enum bpf_perf_event_ret
-bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
- void **copy_mem, size_t *copy_size,
- bpf_perf_event_print_t fn, void *private_data);
-
struct perf_buffer;
struct perf_buffer_params {
@@ -12149,12 +11594,11 @@ error:
static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
struct perf_buffer_params *p);
-DEFAULT_VERSION(perf_buffer__new_v0_6_0, perf_buffer__new, LIBBPF_0.6.0)
-struct perf_buffer *perf_buffer__new_v0_6_0(int map_fd, size_t page_cnt,
- perf_buffer_sample_fn sample_cb,
- perf_buffer_lost_fn lost_cb,
- void *ctx,
- const struct perf_buffer_opts *opts)
+struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
+ perf_buffer_sample_fn sample_cb,
+ perf_buffer_lost_fn lost_cb,
+ void *ctx,
+ const struct perf_buffer_opts *opts)
{
struct perf_buffer_params p = {};
struct perf_event_attr attr = {};
@@ -12176,22 +11620,10 @@ struct perf_buffer *perf_buffer__new_v0_6_0(int map_fd, size_t page_cnt,
return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
}
-COMPAT_VERSION(perf_buffer__new_deprecated, perf_buffer__new, LIBBPF_0.0.4)
-struct perf_buffer *perf_buffer__new_deprecated(int map_fd, size_t page_cnt,
- const struct perf_buffer_opts *opts)
-{
- return perf_buffer__new_v0_6_0(map_fd, page_cnt,
- opts ? opts->sample_cb : NULL,
- opts ? opts->lost_cb : NULL,
- opts ? opts->ctx : NULL,
- NULL);
-}
-
-DEFAULT_VERSION(perf_buffer__new_raw_v0_6_0, perf_buffer__new_raw, LIBBPF_0.6.0)
-struct perf_buffer *perf_buffer__new_raw_v0_6_0(int map_fd, size_t page_cnt,
- struct perf_event_attr *attr,
- perf_buffer_event_fn event_cb, void *ctx,
- const struct perf_buffer_raw_opts *opts)
+struct perf_buffer *perf_buffer__new_raw(int map_fd, size_t page_cnt,
+ struct perf_event_attr *attr,
+ perf_buffer_event_fn event_cb, void *ctx,
+ const struct perf_buffer_raw_opts *opts)
{
struct perf_buffer_params p = {};
@@ -12211,20 +11643,6 @@ struct perf_buffer *perf_buffer__new_raw_v0_6_0(int map_fd, size_t page_cnt,
return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
}
-COMPAT_VERSION(perf_buffer__new_raw_deprecated, perf_buffer__new_raw, LIBBPF_0.0.4)
-struct perf_buffer *perf_buffer__new_raw_deprecated(int map_fd, size_t page_cnt,
- const struct perf_buffer_raw_opts *opts)
-{
- LIBBPF_OPTS(perf_buffer_raw_opts, inner_opts,
- .cpu_cnt = opts->cpu_cnt,
- .cpus = opts->cpus,
- .map_keys = opts->map_keys,
- );
-
- return perf_buffer__new_raw_v0_6_0(map_fd, page_cnt, opts->attr,
- opts->event_cb, opts->ctx, &inner_opts);
-}
-
static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
struct perf_buffer_params *p)
{
@@ -12485,6 +11903,22 @@ int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx)
return cpu_buf->fd;
}
+int perf_buffer__buffer(struct perf_buffer *pb, int buf_idx, void **buf, size_t *buf_size)
+{
+ struct perf_cpu_buf *cpu_buf;
+
+ if (buf_idx >= pb->cpu_cnt)
+ return libbpf_err(-EINVAL);
+
+ cpu_buf = pb->cpu_bufs[buf_idx];
+ if (!cpu_buf)
+ return libbpf_err(-ENOENT);
+
+ *buf = cpu_buf->base;
+ *buf_size = pb->mmap_size;
+ return 0;
+}
+
/*
* Consume data from perf ring buffer corresponding to slot *buf_idx* in
* PERF_EVENT_ARRAY BPF map without waiting/polling. If there is no data to
@@ -12526,254 +11960,6 @@ int perf_buffer__consume(struct perf_buffer *pb)
return 0;
}
-struct bpf_prog_info_array_desc {
- int array_offset; /* e.g. offset of jited_prog_insns */
- int count_offset; /* e.g. offset of jited_prog_len */
- int size_offset; /* > 0: offset of rec size,
- * < 0: fix size of -size_offset
- */
-};
-
-static struct bpf_prog_info_array_desc bpf_prog_info_array_desc[] = {
- [BPF_PROG_INFO_JITED_INSNS] = {
- offsetof(struct bpf_prog_info, jited_prog_insns),
- offsetof(struct bpf_prog_info, jited_prog_len),
- -1,
- },
- [BPF_PROG_INFO_XLATED_INSNS] = {
- offsetof(struct bpf_prog_info, xlated_prog_insns),
- offsetof(struct bpf_prog_info, xlated_prog_len),
- -1,
- },
- [BPF_PROG_INFO_MAP_IDS] = {
- offsetof(struct bpf_prog_info, map_ids),
- offsetof(struct bpf_prog_info, nr_map_ids),
- -(int)sizeof(__u32),
- },
- [BPF_PROG_INFO_JITED_KSYMS] = {
- offsetof(struct bpf_prog_info, jited_ksyms),
- offsetof(struct bpf_prog_info, nr_jited_ksyms),
- -(int)sizeof(__u64),
- },
- [BPF_PROG_INFO_JITED_FUNC_LENS] = {
- offsetof(struct bpf_prog_info, jited_func_lens),
- offsetof(struct bpf_prog_info, nr_jited_func_lens),
- -(int)sizeof(__u32),
- },
- [BPF_PROG_INFO_FUNC_INFO] = {
- offsetof(struct bpf_prog_info, func_info),
- offsetof(struct bpf_prog_info, nr_func_info),
- offsetof(struct bpf_prog_info, func_info_rec_size),
- },
- [BPF_PROG_INFO_LINE_INFO] = {
- offsetof(struct bpf_prog_info, line_info),
- offsetof(struct bpf_prog_info, nr_line_info),
- offsetof(struct bpf_prog_info, line_info_rec_size),
- },
- [BPF_PROG_INFO_JITED_LINE_INFO] = {
- offsetof(struct bpf_prog_info, jited_line_info),
- offsetof(struct bpf_prog_info, nr_jited_line_info),
- offsetof(struct bpf_prog_info, jited_line_info_rec_size),
- },
- [BPF_PROG_INFO_PROG_TAGS] = {
- offsetof(struct bpf_prog_info, prog_tags),
- offsetof(struct bpf_prog_info, nr_prog_tags),
- -(int)sizeof(__u8) * BPF_TAG_SIZE,
- },
-
-};
-
-static __u32 bpf_prog_info_read_offset_u32(struct bpf_prog_info *info,
- int offset)
-{
- __u32 *array = (__u32 *)info;
-
- if (offset >= 0)
- return array[offset / sizeof(__u32)];
- return -(int)offset;
-}
-
-static __u64 bpf_prog_info_read_offset_u64(struct bpf_prog_info *info,
- int offset)
-{
- __u64 *array = (__u64 *)info;
-
- if (offset >= 0)
- return array[offset / sizeof(__u64)];
- return -(int)offset;
-}
-
-static void bpf_prog_info_set_offset_u32(struct bpf_prog_info *info, int offset,
- __u32 val)
-{
- __u32 *array = (__u32 *)info;
-
- if (offset >= 0)
- array[offset / sizeof(__u32)] = val;
-}
-
-static void bpf_prog_info_set_offset_u64(struct bpf_prog_info *info, int offset,
- __u64 val)
-{
- __u64 *array = (__u64 *)info;
-
- if (offset >= 0)
- array[offset / sizeof(__u64)] = val;
-}
-
-struct bpf_prog_info_linear *
-bpf_program__get_prog_info_linear(int fd, __u64 arrays)
-{
- struct bpf_prog_info_linear *info_linear;
- struct bpf_prog_info info = {};
- __u32 info_len = sizeof(info);
- __u32 data_len = 0;
- int i, err;
- void *ptr;
-
- if (arrays >> BPF_PROG_INFO_LAST_ARRAY)
- return libbpf_err_ptr(-EINVAL);
-
- /* step 1: get array dimensions */
- err = bpf_obj_get_info_by_fd(fd, &info, &info_len);
- if (err) {
- pr_debug("can't get prog info: %s", strerror(errno));
- return libbpf_err_ptr(-EFAULT);
- }
-
- /* step 2: calculate total size of all arrays */
- for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
- bool include_array = (arrays & (1UL << i)) > 0;
- struct bpf_prog_info_array_desc *desc;
- __u32 count, size;
-
- desc = bpf_prog_info_array_desc + i;
-
- /* kernel is too old to support this field */
- if (info_len < desc->array_offset + sizeof(__u32) ||
- info_len < desc->count_offset + sizeof(__u32) ||
- (desc->size_offset > 0 && info_len < desc->size_offset))
- include_array = false;
-
- if (!include_array) {
- arrays &= ~(1UL << i); /* clear the bit */
- continue;
- }
-
- count = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
- size = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
-
- data_len += count * size;
- }
-
- /* step 3: allocate continuous memory */
- data_len = roundup(data_len, sizeof(__u64));
- info_linear = malloc(sizeof(struct bpf_prog_info_linear) + data_len);
- if (!info_linear)
- return libbpf_err_ptr(-ENOMEM);
-
- /* step 4: fill data to info_linear->info */
- info_linear->arrays = arrays;
- memset(&info_linear->info, 0, sizeof(info));
- ptr = info_linear->data;
-
- for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
- struct bpf_prog_info_array_desc *desc;
- __u32 count, size;
-
- if ((arrays & (1UL << i)) == 0)
- continue;
-
- desc = bpf_prog_info_array_desc + i;
- count = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
- size = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
- bpf_prog_info_set_offset_u32(&info_linear->info,
- desc->count_offset, count);
- bpf_prog_info_set_offset_u32(&info_linear->info,
- desc->size_offset, size);
- bpf_prog_info_set_offset_u64(&info_linear->info,
- desc->array_offset,
- ptr_to_u64(ptr));
- ptr += count * size;
- }
-
- /* step 5: call syscall again to get required arrays */
- err = bpf_obj_get_info_by_fd(fd, &info_linear->info, &info_len);
- if (err) {
- pr_debug("can't get prog info: %s", strerror(errno));
- free(info_linear);
- return libbpf_err_ptr(-EFAULT);
- }
-
- /* step 6: verify the data */
- for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
- struct bpf_prog_info_array_desc *desc;
- __u32 v1, v2;
-
- if ((arrays & (1UL << i)) == 0)
- continue;
-
- desc = bpf_prog_info_array_desc + i;
- v1 = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
- v2 = bpf_prog_info_read_offset_u32(&info_linear->info,
- desc->count_offset);
- if (v1 != v2)
- pr_warn("%s: mismatch in element count\n", __func__);
-
- v1 = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
- v2 = bpf_prog_info_read_offset_u32(&info_linear->info,
- desc->size_offset);
- if (v1 != v2)
- pr_warn("%s: mismatch in rec size\n", __func__);
- }
-
- /* step 7: update info_len and data_len */
- info_linear->info_len = sizeof(struct bpf_prog_info);
- info_linear->data_len = data_len;
-
- return info_linear;
-}
-
-void bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear)
-{
- int i;
-
- for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
- struct bpf_prog_info_array_desc *desc;
- __u64 addr, offs;
-
- if ((info_linear->arrays & (1UL << i)) == 0)
- continue;
-
- desc = bpf_prog_info_array_desc + i;
- addr = bpf_prog_info_read_offset_u64(&info_linear->info,
- desc->array_offset);
- offs = addr - ptr_to_u64(info_linear->data);
- bpf_prog_info_set_offset_u64(&info_linear->info,
- desc->array_offset, offs);
- }
-}
-
-void bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear)
-{
- int i;
-
- for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
- struct bpf_prog_info_array_desc *desc;
- __u64 addr, offs;
-
- if ((info_linear->arrays & (1UL << i)) == 0)
- continue;
-
- desc = bpf_prog_info_array_desc + i;
- offs = bpf_prog_info_read_offset_u64(&info_linear->info,
- desc->array_offset);
- addr = offs + ptr_to_u64(info_linear->data);
- bpf_prog_info_set_offset_u64(&info_linear->info,
- desc->array_offset, addr);
- }
-}
-
int bpf_program__set_attach_target(struct bpf_program *prog,
int attach_prog_fd,
const char *attach_func_name)
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 9e9a3fd3edd8..61493c4cddac 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -51,6 +51,42 @@ enum libbpf_errno {
LIBBPF_API int libbpf_strerror(int err, char *buf, size_t size);
+/**
+ * @brief **libbpf_bpf_attach_type_str()** converts the provided attach type
+ * value into a textual representation.
+ * @param t The attach type.
+ * @return Pointer to a static string identifying the attach type. NULL is
+ * returned for unknown **bpf_attach_type** values.
+ */
+LIBBPF_API const char *libbpf_bpf_attach_type_str(enum bpf_attach_type t);
+
+/**
+ * @brief **libbpf_bpf_link_type_str()** converts the provided link type value
+ * into a textual representation.
+ * @param t The link type.
+ * @return Pointer to a static string identifying the link type. NULL is
+ * returned for unknown **bpf_link_type** values.
+ */
+LIBBPF_API const char *libbpf_bpf_link_type_str(enum bpf_link_type t);
+
+/**
+ * @brief **libbpf_bpf_map_type_str()** converts the provided map type value
+ * into a textual representation.
+ * @param t The map type.
+ * @return Pointer to a static string identifying the map type. NULL is
+ * returned for unknown **bpf_map_type** values.
+ */
+LIBBPF_API const char *libbpf_bpf_map_type_str(enum bpf_map_type t);
+
+/**
+ * @brief **libbpf_bpf_prog_type_str()** converts the provided program type
+ * value into a textual representation.
+ * @param t The program type.
+ * @return Pointer to a static string identifying the program type. NULL is
+ * returned for unknown **bpf_prog_type** values.
+ */
+LIBBPF_API const char *libbpf_bpf_prog_type_str(enum bpf_prog_type t);
+
enum libbpf_print_level {
LIBBPF_WARN,
LIBBPF_INFO,
@@ -65,13 +101,8 @@ LIBBPF_API libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn);
/* Hide internal to user */
struct bpf_object;
-struct bpf_object_open_attr {
- const char *file;
- enum bpf_prog_type prog_type;
-};
-
struct bpf_object_open_opts {
- /* size of this struct, for forward/backward compatiblity */
+ /* size of this struct, for forward/backward compatibility */
size_t sz;
/* object name override, if provided:
* - for object open from file, this will override setting object
@@ -82,21 +113,12 @@ struct bpf_object_open_opts {
const char *object_name;
/* parse map definitions non-strictly, allowing extra attributes/data */
bool relaxed_maps;
- /* DEPRECATED: handle CO-RE relocations non-strictly, allowing failures.
- * Value is ignored. Relocations always are processed non-strictly.
- * Non-relocatable instructions are replaced with invalid ones to
- * prevent accidental errors.
- * */
- LIBBPF_DEPRECATED_SINCE(0, 6, "field has no effect")
- bool relaxed_core_relocs;
/* maps that set the 'pinning' attribute in their definition will have
* their pin_path attribute set to a file in this directory, and be
* auto-pinned to that path on load; defaults to "/sys/fs/bpf".
*/
const char *pin_root_path;
-
- LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_program__set_attach_target() on each individual bpf_program")
- __u32 attach_prog_fd;
+ long :0;
/* Additional kernel config content that augments and overrides
* system Kconfig for CONFIG_xxx externs.
*/
@@ -179,20 +201,10 @@ LIBBPF_API struct bpf_object *
bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
const struct bpf_object_open_opts *opts);
-/* deprecated bpf_object__open variants */
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_object__open_mem() instead")
-LIBBPF_API struct bpf_object *
-bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz,
- const char *name);
-LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__open_file() instead")
-LIBBPF_API struct bpf_object *
-bpf_object__open_xattr(struct bpf_object_open_attr *attr);
+/* Load/unload object into/from kernel */
+LIBBPF_API int bpf_object__load(struct bpf_object *obj);
-enum libbpf_pin_type {
- LIBBPF_PIN_NONE,
- /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
- LIBBPF_PIN_BY_NAME,
-};
+LIBBPF_API void bpf_object__close(struct bpf_object *object);
/* pin_maps and unpin_maps can both be called with a NULL path, in which case
* they will use the pin_path attribute of each map (and ignore all maps that
@@ -206,20 +218,6 @@ LIBBPF_API int bpf_object__pin_programs(struct bpf_object *obj,
LIBBPF_API int bpf_object__unpin_programs(struct bpf_object *obj,
const char *path);
LIBBPF_API int bpf_object__pin(struct bpf_object *object, const char *path);
-LIBBPF_API void bpf_object__close(struct bpf_object *object);
-
-struct bpf_object_load_attr {
- struct bpf_object *obj;
- int log_level;
- const char *target_btf_path;
-};
-
-/* Load/unload object into/from kernel */
-LIBBPF_API int bpf_object__load(struct bpf_object *obj);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_object__load() instead")
-LIBBPF_API int bpf_object__load_xattr(struct bpf_object_load_attr *attr);
-LIBBPF_DEPRECATED_SINCE(0, 6, "bpf_object__unload() is deprecated, use bpf_object__close() instead")
-LIBBPF_API int bpf_object__unload(struct bpf_object *obj);
LIBBPF_API const char *bpf_object__name(const struct bpf_object *obj);
LIBBPF_API unsigned int bpf_object__kversion(const struct bpf_object *obj);
@@ -229,29 +227,10 @@ struct btf;
LIBBPF_API struct btf *bpf_object__btf(const struct bpf_object *obj);
LIBBPF_API int bpf_object__btf_fd(const struct bpf_object *obj);
-LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__find_program_by_name() instead")
-LIBBPF_API struct bpf_program *
-bpf_object__find_program_by_title(const struct bpf_object *obj,
- const char *title);
LIBBPF_API struct bpf_program *
bpf_object__find_program_by_name(const struct bpf_object *obj,
const char *name);
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "track bpf_objects in application code instead")
-struct bpf_object *bpf_object__next(struct bpf_object *prev);
-#define bpf_object__for_each_safe(pos, tmp) \
- for ((pos) = bpf_object__next(NULL), \
- (tmp) = bpf_object__next(pos); \
- (pos) != NULL; \
- (pos) = (tmp), (tmp) = bpf_object__next(tmp))
-
-typedef void (*bpf_object_clear_priv_t)(struct bpf_object *, void *);
-LIBBPF_DEPRECATED_SINCE(0, 7, "storage via set_priv/priv is deprecated")
-LIBBPF_API int bpf_object__set_priv(struct bpf_object *obj, void *priv,
- bpf_object_clear_priv_t clear_priv);
-LIBBPF_DEPRECATED_SINCE(0, 7, "storage via set_priv/priv is deprecated")
-LIBBPF_API void *bpf_object__priv(const struct bpf_object *prog);
-
LIBBPF_API int
libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
enum bpf_attach_type *expected_attach_type);
@@ -262,9 +241,7 @@ LIBBPF_API int libbpf_find_vmlinux_btf_id(const char *name,
/* Accessors of bpf_program */
struct bpf_program;
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__next_program() instead")
-struct bpf_program *bpf_program__next(struct bpf_program *prog,
- const struct bpf_object *obj);
+
LIBBPF_API struct bpf_program *
bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prog);
@@ -273,33 +250,17 @@ bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prog)
(pos) != NULL; \
(pos) = bpf_object__next_program((obj), (pos)))
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__prev_program() instead")
-struct bpf_program *bpf_program__prev(struct bpf_program *prog,
- const struct bpf_object *obj);
LIBBPF_API struct bpf_program *
bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *prog);
-typedef void (*bpf_program_clear_priv_t)(struct bpf_program *, void *);
-
-LIBBPF_DEPRECATED_SINCE(0, 7, "storage via set_priv/priv is deprecated")
-LIBBPF_API int bpf_program__set_priv(struct bpf_program *prog, void *priv,
- bpf_program_clear_priv_t clear_priv);
-LIBBPF_DEPRECATED_SINCE(0, 7, "storage via set_priv/priv is deprecated")
-LIBBPF_API void *bpf_program__priv(const struct bpf_program *prog);
LIBBPF_API void bpf_program__set_ifindex(struct bpf_program *prog,
__u32 ifindex);
LIBBPF_API const char *bpf_program__name(const struct bpf_program *prog);
LIBBPF_API const char *bpf_program__section_name(const struct bpf_program *prog);
-LIBBPF_API LIBBPF_DEPRECATED("BPF program title is confusing term; please use bpf_program__section_name() instead")
-const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy);
LIBBPF_API bool bpf_program__autoload(const struct bpf_program *prog);
LIBBPF_API int bpf_program__set_autoload(struct bpf_program *prog, bool autoload);
-/* returns program size in bytes */
-LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_program__insn_cnt() instead")
-LIBBPF_API size_t bpf_program__size(const struct bpf_program *prog);
-
struct bpf_insn;
/**
@@ -352,17 +313,7 @@ LIBBPF_API int bpf_program__set_insns(struct bpf_program *prog,
*/
LIBBPF_API size_t bpf_program__insn_cnt(const struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 6, "use bpf_object__load() instead")
-LIBBPF_API int bpf_program__load(struct bpf_program *prog, const char *license, __u32 kern_version);
LIBBPF_API int bpf_program__fd(const struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 7, "multi-instance bpf_program support is deprecated")
-LIBBPF_API int bpf_program__pin_instance(struct bpf_program *prog,
- const char *path,
- int instance);
-LIBBPF_DEPRECATED_SINCE(0, 7, "multi-instance bpf_program support is deprecated")
-LIBBPF_API int bpf_program__unpin_instance(struct bpf_program *prog,
- const char *path,
- int instance);
/**
* @brief **bpf_program__pin()** pins the BPF program to a file
@@ -506,6 +457,52 @@ bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog,
const char *pattern,
const struct bpf_kprobe_multi_opts *opts);
+struct bpf_ksyscall_opts {
+ /* size of this struct, for forward/backward compatiblity */
+ size_t sz;
+ /* custom user-provided value fetchable through bpf_get_attach_cookie() */
+ __u64 bpf_cookie;
+ /* attach as return probe? */
+ bool retprobe;
+ size_t :0;
+};
+#define bpf_ksyscall_opts__last_field retprobe
+
+/**
+ * @brief **bpf_program__attach_ksyscall()** attaches a BPF program
+ * to kernel syscall handler of a specified syscall. Optionally it's possible
+ * to request to install retprobe that will be triggered at syscall exit. It's
+ * also possible to associate BPF cookie (though options).
+ *
+ * Libbpf automatically will determine correct full kernel function name,
+ * which depending on system architecture and kernel version/configuration
+ * could be of the form __<arch>_sys_<syscall> or __se_sys_<syscall>, and will
+ * attach specified program using kprobe/kretprobe mechanism.
+ *
+ * **bpf_program__attach_ksyscall()** is an API counterpart of declarative
+ * **SEC("ksyscall/<syscall>")** annotation of BPF programs.
+ *
+ * At the moment **SEC("ksyscall")** and **bpf_program__attach_ksyscall()** do
+ * not handle all the calling convention quirks for mmap(), clone() and compat
+ * syscalls. It also only attaches to "native" syscall interfaces. If host
+ * system supports compat syscalls or defines 32-bit syscalls in 64-bit
+ * kernel, such syscall interfaces won't be attached to by libbpf.
+ *
+ * These limitations may or may not change in the future. Therefore it is
+ * recommended to use SEC("kprobe") for these syscalls or if working with
+ * compat and 32-bit interfaces is required.
+ *
+ * @param prog BPF program to attach
+ * @param syscall_name Symbolic name of the syscall (e.g., "bpf")
+ * @param opts Additional options (see **struct bpf_ksyscall_opts**)
+ * @return Reference to the newly created BPF link; or NULL is returned on
+ * error, error code is stored in errno
+ */
+LIBBPF_API struct bpf_link *
+bpf_program__attach_ksyscall(const struct bpf_program *prog,
+ const char *syscall_name,
+ const struct bpf_ksyscall_opts *opts);
+
struct bpf_uprobe_opts {
/* size of this struct, for forward/backward compatiblity */
size_t sz;
@@ -662,99 +659,6 @@ LIBBPF_API struct bpf_link *
bpf_program__attach_iter(const struct bpf_program *prog,
const struct bpf_iter_attach_opts *opts);
-/*
- * Libbpf allows callers to adjust BPF programs before being loaded
- * into kernel. One program in an object file can be transformed into
- * multiple variants to be attached to different hooks.
- *
- * bpf_program_prep_t, bpf_program__set_prep and bpf_program__nth_fd
- * form an API for this purpose.
- *
- * - bpf_program_prep_t:
- * Defines a 'preprocessor', which is a caller defined function
- * passed to libbpf through bpf_program__set_prep(), and will be
- * called before program is loaded. The processor should adjust
- * the program one time for each instance according to the instance id
- * passed to it.
- *
- * - bpf_program__set_prep:
- * Attaches a preprocessor to a BPF program. The number of instances
- * that should be created is also passed through this function.
- *
- * - bpf_program__nth_fd:
- * After the program is loaded, get resulting FD of a given instance
- * of the BPF program.
- *
- * If bpf_program__set_prep() is not used, the program would be loaded
- * without adjustment during bpf_object__load(). The program has only
- * one instance. In this case bpf_program__fd(prog) is equal to
- * bpf_program__nth_fd(prog, 0).
- */
-struct bpf_prog_prep_result {
- /*
- * If not NULL, load new instruction array.
- * If set to NULL, don't load this instance.
- */
- struct bpf_insn *new_insn_ptr;
- int new_insn_cnt;
-
- /* If not NULL, result FD is written to it. */
- int *pfd;
-};
-
-/*
- * Parameters of bpf_program_prep_t:
- * - prog: The bpf_program being loaded.
- * - n: Index of instance being generated.
- * - insns: BPF instructions array.
- * - insns_cnt:Number of instructions in insns.
- * - res: Output parameter, result of transformation.
- *
- * Return value:
- * - Zero: pre-processing success.
- * - Non-zero: pre-processing error, stop loading.
- */
-typedef int (*bpf_program_prep_t)(struct bpf_program *prog, int n,
- struct bpf_insn *insns, int insns_cnt,
- struct bpf_prog_prep_result *res);
-
-LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_program__insns() for getting bpf_program instructions")
-LIBBPF_API int bpf_program__set_prep(struct bpf_program *prog, int nr_instance,
- bpf_program_prep_t prep);
-
-LIBBPF_DEPRECATED_SINCE(0, 7, "multi-instance bpf_program support is deprecated")
-LIBBPF_API int bpf_program__nth_fd(const struct bpf_program *prog, int n);
-
-/*
- * Adjust type of BPF program. Default is kprobe.
- */
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
-LIBBPF_API int bpf_program__set_socket_filter(struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
-LIBBPF_API int bpf_program__set_tracepoint(struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
-LIBBPF_API int bpf_program__set_raw_tracepoint(struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
-LIBBPF_API int bpf_program__set_kprobe(struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
-LIBBPF_API int bpf_program__set_lsm(struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
-LIBBPF_API int bpf_program__set_sched_cls(struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
-LIBBPF_API int bpf_program__set_sched_act(struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
-LIBBPF_API int bpf_program__set_xdp(struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
-LIBBPF_API int bpf_program__set_perf_event(struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
-LIBBPF_API int bpf_program__set_tracing(struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
-LIBBPF_API int bpf_program__set_struct_ops(struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
-LIBBPF_API int bpf_program__set_extension(struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
-LIBBPF_API int bpf_program__set_sk_lookup(struct bpf_program *prog);
-
LIBBPF_API enum bpf_prog_type bpf_program__type(const struct bpf_program *prog);
/**
@@ -817,47 +721,6 @@ LIBBPF_API int
bpf_program__set_attach_target(struct bpf_program *prog, int attach_prog_fd,
const char *attach_func_name);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
-LIBBPF_API bool bpf_program__is_socket_filter(const struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
-LIBBPF_API bool bpf_program__is_tracepoint(const struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
-LIBBPF_API bool bpf_program__is_raw_tracepoint(const struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
-LIBBPF_API bool bpf_program__is_kprobe(const struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
-LIBBPF_API bool bpf_program__is_lsm(const struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
-LIBBPF_API bool bpf_program__is_sched_cls(const struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
-LIBBPF_API bool bpf_program__is_sched_act(const struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
-LIBBPF_API bool bpf_program__is_xdp(const struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
-LIBBPF_API bool bpf_program__is_perf_event(const struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
-LIBBPF_API bool bpf_program__is_tracing(const struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
-LIBBPF_API bool bpf_program__is_struct_ops(const struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
-LIBBPF_API bool bpf_program__is_extension(const struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
-LIBBPF_API bool bpf_program__is_sk_lookup(const struct bpf_program *prog);
-
-/*
- * No need for __attribute__((packed)), all members of 'bpf_map_def'
- * are all aligned. In addition, using __attribute__((packed))
- * would trigger a -Wpacked warning message, and lead to an error
- * if -Werror is set.
- */
-struct bpf_map_def {
- unsigned int type;
- unsigned int key_size;
- unsigned int value_size;
- unsigned int max_entries;
- unsigned int map_flags;
-};
-
/**
* @brief **bpf_object__find_map_by_name()** returns BPF map of
* the given name, if it exists within the passed BPF object
@@ -872,16 +735,6 @@ bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name);
LIBBPF_API int
bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name);
-/*
- * Get bpf_map through the offset of corresponding struct bpf_map_def
- * in the BPF object file.
- */
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_object__find_map_by_name() instead")
-struct bpf_map *
-bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset);
-
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__next_map() instead")
-struct bpf_map *bpf_map__next(const struct bpf_map *map, const struct bpf_object *obj);
LIBBPF_API struct bpf_map *
bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *map);
@@ -891,8 +744,6 @@ bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *map);
(pos) = bpf_object__next_map((obj), (pos)))
#define bpf_map__for_each bpf_object__for_each_map
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__prev_map() instead")
-struct bpf_map *bpf_map__prev(const struct bpf_map *map, const struct bpf_object *obj);
LIBBPF_API struct bpf_map *
bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *map);
@@ -926,9 +777,6 @@ LIBBPF_API bool bpf_map__autocreate(const struct bpf_map *map);
*/
LIBBPF_API int bpf_map__fd(const struct bpf_map *map);
LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd);
-/* get map definition */
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 8, "use appropriate getters or setters instead")
-const struct bpf_map_def *bpf_map__def(const struct bpf_map *map);
/* get map name */
LIBBPF_API const char *bpf_map__name(const struct bpf_map *map);
/* get/set map type */
@@ -937,8 +785,6 @@ LIBBPF_API int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type);
/* get/set map size (max_entries) */
LIBBPF_API __u32 bpf_map__max_entries(const struct bpf_map *map);
LIBBPF_API int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_map__set_max_entries() instead")
-LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries);
/* get/set map flags */
LIBBPF_API __u32 bpf_map__map_flags(const struct bpf_map *map);
LIBBPF_API int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags);
@@ -961,17 +807,9 @@ LIBBPF_API int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex);
LIBBPF_API __u64 bpf_map__map_extra(const struct bpf_map *map);
LIBBPF_API int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra);
-typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *);
-LIBBPF_DEPRECATED_SINCE(0, 7, "storage via set_priv/priv is deprecated")
-LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv,
- bpf_map_clear_priv_t clear_priv);
-LIBBPF_DEPRECATED_SINCE(0, 7, "storage via set_priv/priv is deprecated")
-LIBBPF_API void *bpf_map__priv(const struct bpf_map *map);
LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map,
const void *data, size_t size);
LIBBPF_API const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_map__type() instead")
-LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map);
/**
* @brief **bpf_map__is_internal()** tells the caller whether or not the
@@ -1094,65 +932,6 @@ LIBBPF_API int bpf_map__lookup_and_delete_elem(const struct bpf_map *map,
LIBBPF_API int bpf_map__get_next_key(const struct bpf_map *map,
const void *cur_key, void *next_key, size_t key_sz);
-/**
- * @brief **libbpf_get_error()** extracts the error code from the passed
- * pointer
- * @param ptr pointer returned from libbpf API function
- * @return error code; or 0 if no error occured
- *
- * Many libbpf API functions which return pointers have logic to encode error
- * codes as pointers, and do not return NULL. Meaning **libbpf_get_error()**
- * should be used on the return value from these functions immediately after
- * calling the API function, with no intervening calls that could clobber the
- * `errno` variable. Consult the individual functions documentation to verify
- * if this logic applies should be used.
- *
- * For these API functions, if `libbpf_set_strict_mode(LIBBPF_STRICT_CLEAN_PTRS)`
- * is enabled, NULL is returned on error instead.
- *
- * If ptr is NULL, then errno should be already set by the failing
- * API, because libbpf never returns NULL on success and it now always
- * sets errno on error.
- *
- * Example usage:
- *
- * struct perf_buffer *pb;
- *
- * pb = perf_buffer__new(bpf_map__fd(obj->maps.events), PERF_BUFFER_PAGES, &opts);
- * err = libbpf_get_error(pb);
- * if (err) {
- * pb = NULL;
- * fprintf(stderr, "failed to open perf buffer: %d\n", err);
- * goto cleanup;
- * }
- */
-LIBBPF_API long libbpf_get_error(const void *ptr);
-
-struct bpf_prog_load_attr {
- const char *file;
- enum bpf_prog_type prog_type;
- enum bpf_attach_type expected_attach_type;
- int ifindex;
- int log_level;
- int prog_flags;
-};
-
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_object__open() and bpf_object__load() instead")
-LIBBPF_API int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
- struct bpf_object **pobj, int *prog_fd);
-LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__open() and bpf_object__load() instead")
-LIBBPF_API int bpf_prog_load_deprecated(const char *file, enum bpf_prog_type type,
- struct bpf_object **pobj, int *prog_fd);
-
-/* XDP related API */
-struct xdp_link_info {
- __u32 prog_id;
- __u32 drv_prog_id;
- __u32 hw_prog_id;
- __u32 skb_prog_id;
- __u8 attach_mode;
-};
-
struct bpf_xdp_set_link_opts {
size_t sz;
int old_fd;
@@ -1160,17 +939,6 @@ struct bpf_xdp_set_link_opts {
};
#define bpf_xdp_set_link_opts__last_field old_fd
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_attach() instead")
-LIBBPF_API int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_attach() instead")
-LIBBPF_API int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags,
- const struct bpf_xdp_set_link_opts *opts);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_query_id() instead")
-LIBBPF_API int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_query() instead")
-LIBBPF_API int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
- size_t info_size, __u32 flags);
-
struct bpf_xdp_attach_opts {
size_t sz;
int old_prog_fd;
@@ -1269,17 +1037,7 @@ typedef void (*perf_buffer_lost_fn)(void *ctx, int cpu, __u64 cnt);
/* common use perf buffer options */
struct perf_buffer_opts {
- union {
- size_t sz;
- struct { /* DEPRECATED: will be removed in v1.0 */
- /* if specified, sample_cb is called for each sample */
- perf_buffer_sample_fn sample_cb;
- /* if specified, lost_cb is called for each batch of lost samples */
- perf_buffer_lost_fn lost_cb;
- /* ctx is provided to sample_cb and lost_cb */
- void *ctx;
- };
- };
+ size_t sz;
};
#define perf_buffer_opts__last_field sz
@@ -1300,21 +1058,6 @@ perf_buffer__new(int map_fd, size_t page_cnt,
perf_buffer_sample_fn sample_cb, perf_buffer_lost_fn lost_cb, void *ctx,
const struct perf_buffer_opts *opts);
-LIBBPF_API struct perf_buffer *
-perf_buffer__new_v0_6_0(int map_fd, size_t page_cnt,
- perf_buffer_sample_fn sample_cb, perf_buffer_lost_fn lost_cb, void *ctx,
- const struct perf_buffer_opts *opts);
-
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use new variant of perf_buffer__new() instead")
-struct perf_buffer *perf_buffer__new_deprecated(int map_fd, size_t page_cnt,
- const struct perf_buffer_opts *opts);
-
-#define perf_buffer__new(...) ___libbpf_overload(___perf_buffer_new, __VA_ARGS__)
-#define ___perf_buffer_new6(map_fd, page_cnt, sample_cb, lost_cb, ctx, opts) \
- perf_buffer__new(map_fd, page_cnt, sample_cb, lost_cb, ctx, opts)
-#define ___perf_buffer_new3(map_fd, page_cnt, opts) \
- perf_buffer__new_deprecated(map_fd, page_cnt, opts)
-
enum bpf_perf_event_ret {
LIBBPF_PERF_EVENT_DONE = 0,
LIBBPF_PERF_EVENT_ERROR = -1,
@@ -1328,21 +1071,9 @@ typedef enum bpf_perf_event_ret
/* raw perf buffer options, giving most power and control */
struct perf_buffer_raw_opts {
- union {
- struct {
- size_t sz;
- long :0;
- long :0;
- };
- struct { /* DEPRECATED: will be removed in v1.0 */
- /* perf event attrs passed directly into perf_event_open() */
- struct perf_event_attr *attr;
- /* raw event callback */
- perf_buffer_event_fn event_cb;
- /* ctx is provided to event_cb */
- void *ctx;
- };
- };
+ size_t sz;
+ long :0;
+ long :0;
/* if cpu_cnt == 0, open all on all possible CPUs (up to the number of
* max_entries of given PERF_EVENT_ARRAY map)
*/
@@ -1354,26 +1085,13 @@ struct perf_buffer_raw_opts {
};
#define perf_buffer_raw_opts__last_field map_keys
+struct perf_event_attr;
+
LIBBPF_API struct perf_buffer *
perf_buffer__new_raw(int map_fd, size_t page_cnt, struct perf_event_attr *attr,
perf_buffer_event_fn event_cb, void *ctx,
const struct perf_buffer_raw_opts *opts);
-LIBBPF_API struct perf_buffer *
-perf_buffer__new_raw_v0_6_0(int map_fd, size_t page_cnt, struct perf_event_attr *attr,
- perf_buffer_event_fn event_cb, void *ctx,
- const struct perf_buffer_raw_opts *opts);
-
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use new variant of perf_buffer__new_raw() instead")
-struct perf_buffer *perf_buffer__new_raw_deprecated(int map_fd, size_t page_cnt,
- const struct perf_buffer_raw_opts *opts);
-
-#define perf_buffer__new_raw(...) ___libbpf_overload(___perf_buffer_new_raw, __VA_ARGS__)
-#define ___perf_buffer_new_raw6(map_fd, page_cnt, attr, event_cb, ctx, opts) \
- perf_buffer__new_raw(map_fd, page_cnt, attr, event_cb, ctx, opts)
-#define ___perf_buffer_new_raw3(map_fd, page_cnt, opts) \
- perf_buffer__new_raw_deprecated(map_fd, page_cnt, opts)
-
LIBBPF_API void perf_buffer__free(struct perf_buffer *pb);
LIBBPF_API int perf_buffer__epoll_fd(const struct perf_buffer *pb);
LIBBPF_API int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms);
@@ -1381,15 +1099,22 @@ LIBBPF_API int perf_buffer__consume(struct perf_buffer *pb);
LIBBPF_API int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx);
LIBBPF_API size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb);
LIBBPF_API int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx);
-
-typedef enum bpf_perf_event_ret
- (*bpf_perf_event_print_t)(struct perf_event_header *hdr,
- void *private_data);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use perf_buffer__poll() or perf_buffer__consume() instead")
-LIBBPF_API enum bpf_perf_event_ret
-bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
- void **copy_mem, size_t *copy_size,
- bpf_perf_event_print_t fn, void *private_data);
+/**
+ * @brief **perf_buffer__buffer()** returns the per-cpu raw mmap()'ed underlying
+ * memory region of the ring buffer.
+ * This ring buffer can be used to implement a custom events consumer.
+ * The ring buffer starts with the *struct perf_event_mmap_page*, which
+ * holds the ring buffer managment fields, when accessing the header
+ * structure it's important to be SMP aware.
+ * You can refer to *perf_event_read_simple* for a simple example.
+ * @param pb the perf buffer structure
+ * @param buf_idx the buffer index to retreive
+ * @param buf (out) gets the base pointer of the mmap()'ed memory
+ * @param buf_size (out) gets the size of the mmap()'ed region
+ * @return 0 on success, negative error code for failure
+ */
+LIBBPF_API int perf_buffer__buffer(struct perf_buffer *pb, int buf_idx, void **buf,
+ size_t *buf_size);
struct bpf_prog_linfo;
struct bpf_prog_info;
@@ -1412,14 +1137,6 @@ bpf_prog_linfo__lfind(const struct bpf_prog_linfo *prog_linfo,
* user, causing subsequent probes to fail. In this case, the caller may want
* to adjust that limit with setrlimit().
*/
-LIBBPF_DEPRECATED_SINCE(0, 8, "use libbpf_probe_bpf_prog_type() instead")
-LIBBPF_API bool bpf_probe_prog_type(enum bpf_prog_type prog_type, __u32 ifindex);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use libbpf_probe_bpf_map_type() instead")
-LIBBPF_API bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use libbpf_probe_bpf_helper() instead")
-LIBBPF_API bool bpf_probe_helper(enum bpf_func_id id, enum bpf_prog_type prog_type, __u32 ifindex);
-LIBBPF_DEPRECATED_SINCE(0, 8, "implement your own or use bpftool for feature detection")
-LIBBPF_API bool bpf_probe_large_insn_limit(__u32 ifindex);
/**
* @brief **libbpf_probe_bpf_prog_type()** detects if host kernel supports
@@ -1463,72 +1180,6 @@ LIBBPF_API int libbpf_probe_bpf_map_type(enum bpf_map_type map_type, const void
LIBBPF_API int libbpf_probe_bpf_helper(enum bpf_prog_type prog_type,
enum bpf_func_id helper_id, const void *opts);
-/*
- * Get bpf_prog_info in continuous memory
- *
- * struct bpf_prog_info has multiple arrays. The user has option to choose
- * arrays to fetch from kernel. The following APIs provide an uniform way to
- * fetch these data. All arrays in bpf_prog_info are stored in a single
- * continuous memory region. This makes it easy to store the info in a
- * file.
- *
- * Before writing bpf_prog_info_linear to files, it is necessary to
- * translate pointers in bpf_prog_info to offsets. Helper functions
- * bpf_program__bpil_addr_to_offs() and bpf_program__bpil_offs_to_addr()
- * are introduced to switch between pointers and offsets.
- *
- * Examples:
- * # To fetch map_ids and prog_tags:
- * __u64 arrays = (1UL << BPF_PROG_INFO_MAP_IDS) |
- * (1UL << BPF_PROG_INFO_PROG_TAGS);
- * struct bpf_prog_info_linear *info_linear =
- * bpf_program__get_prog_info_linear(fd, arrays);
- *
- * # To save data in file
- * bpf_program__bpil_addr_to_offs(info_linear);
- * write(f, info_linear, sizeof(*info_linear) + info_linear->data_len);
- *
- * # To read data from file
- * read(f, info_linear, <proper_size>);
- * bpf_program__bpil_offs_to_addr(info_linear);
- */
-enum bpf_prog_info_array {
- BPF_PROG_INFO_FIRST_ARRAY = 0,
- BPF_PROG_INFO_JITED_INSNS = 0,
- BPF_PROG_INFO_XLATED_INSNS,
- BPF_PROG_INFO_MAP_IDS,
- BPF_PROG_INFO_JITED_KSYMS,
- BPF_PROG_INFO_JITED_FUNC_LENS,
- BPF_PROG_INFO_FUNC_INFO,
- BPF_PROG_INFO_LINE_INFO,
- BPF_PROG_INFO_JITED_LINE_INFO,
- BPF_PROG_INFO_PROG_TAGS,
- BPF_PROG_INFO_LAST_ARRAY,
-};
-
-struct bpf_prog_info_linear {
- /* size of struct bpf_prog_info, when the tool is compiled */
- __u32 info_len;
- /* total bytes allocated for data, round up to 8 bytes */
- __u32 data_len;
- /* which arrays are included in data */
- __u64 arrays;
- struct bpf_prog_info info;
- __u8 data[];
-};
-
-LIBBPF_DEPRECATED_SINCE(0, 6, "use a custom linear prog_info wrapper")
-LIBBPF_API struct bpf_prog_info_linear *
-bpf_program__get_prog_info_linear(int fd, __u64 arrays);
-
-LIBBPF_DEPRECATED_SINCE(0, 6, "use a custom linear prog_info wrapper")
-LIBBPF_API void
-bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear);
-
-LIBBPF_DEPRECATED_SINCE(0, 6, "use a custom linear prog_info wrapper")
-LIBBPF_API void
-bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear);
-
/**
* @brief **libbpf_num_possible_cpus()** is a helper function to get the
* number of possible CPUs that the host kernel supports and expects.
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 52973cffc20c..119e6e1ea7f1 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -1,29 +1,14 @@
LIBBPF_0.0.1 {
global:
bpf_btf_get_fd_by_id;
- bpf_create_map;
- bpf_create_map_in_map;
- bpf_create_map_in_map_node;
- bpf_create_map_name;
- bpf_create_map_node;
- bpf_create_map_xattr;
- bpf_load_btf;
- bpf_load_program;
- bpf_load_program_xattr;
bpf_map__btf_key_type_id;
bpf_map__btf_value_type_id;
- bpf_map__def;
bpf_map__fd;
- bpf_map__is_offload_neutral;
bpf_map__name;
- bpf_map__next;
bpf_map__pin;
- bpf_map__prev;
- bpf_map__priv;
bpf_map__reuse_fd;
bpf_map__set_ifindex;
bpf_map__set_inner_map_fd;
- bpf_map__set_priv;
bpf_map__unpin;
bpf_map_delete_elem;
bpf_map_get_fd_by_id;
@@ -38,79 +23,37 @@ LIBBPF_0.0.1 {
bpf_object__btf_fd;
bpf_object__close;
bpf_object__find_map_by_name;
- bpf_object__find_map_by_offset;
- bpf_object__find_program_by_title;
bpf_object__kversion;
bpf_object__load;
bpf_object__name;
- bpf_object__next;
bpf_object__open;
- bpf_object__open_buffer;
- bpf_object__open_xattr;
bpf_object__pin;
bpf_object__pin_maps;
bpf_object__pin_programs;
- bpf_object__priv;
- bpf_object__set_priv;
- bpf_object__unload;
bpf_object__unpin_maps;
bpf_object__unpin_programs;
- bpf_perf_event_read_simple;
bpf_prog_attach;
bpf_prog_detach;
bpf_prog_detach2;
bpf_prog_get_fd_by_id;
bpf_prog_get_next_id;
- bpf_prog_load;
- bpf_prog_load_xattr;
bpf_prog_query;
- bpf_prog_test_run;
- bpf_prog_test_run_xattr;
bpf_program__fd;
- bpf_program__is_kprobe;
- bpf_program__is_perf_event;
- bpf_program__is_raw_tracepoint;
- bpf_program__is_sched_act;
- bpf_program__is_sched_cls;
- bpf_program__is_socket_filter;
- bpf_program__is_tracepoint;
- bpf_program__is_xdp;
- bpf_program__load;
- bpf_program__next;
- bpf_program__nth_fd;
bpf_program__pin;
- bpf_program__pin_instance;
- bpf_program__prev;
- bpf_program__priv;
bpf_program__set_expected_attach_type;
bpf_program__set_ifindex;
- bpf_program__set_kprobe;
- bpf_program__set_perf_event;
- bpf_program__set_prep;
- bpf_program__set_priv;
- bpf_program__set_raw_tracepoint;
- bpf_program__set_sched_act;
- bpf_program__set_sched_cls;
- bpf_program__set_socket_filter;
- bpf_program__set_tracepoint;
bpf_program__set_type;
- bpf_program__set_xdp;
- bpf_program__title;
bpf_program__unload;
bpf_program__unpin;
- bpf_program__unpin_instance;
bpf_prog_linfo__free;
bpf_prog_linfo__new;
bpf_prog_linfo__lfind_addr_func;
bpf_prog_linfo__lfind;
bpf_raw_tracepoint_open;
- bpf_set_link_xdp_fd;
bpf_task_fd_query;
- bpf_verify_program;
btf__fd;
btf__find_by_name;
btf__free;
- btf__get_from_id;
btf__name_by_offset;
btf__new;
btf__resolve_size;
@@ -127,48 +70,24 @@ LIBBPF_0.0.1 {
LIBBPF_0.0.2 {
global:
- bpf_probe_helper;
- bpf_probe_map_type;
- bpf_probe_prog_type;
- bpf_map__resize;
bpf_map_lookup_elem_flags;
bpf_object__btf;
bpf_object__find_map_fd_by_name;
- bpf_get_link_xdp_id;
- btf__dedup;
- btf__get_map_kv_tids;
- btf__get_nr_types;
btf__get_raw_data;
- btf__load;
btf_ext__free;
- btf_ext__func_info_rec_size;
btf_ext__get_raw_data;
- btf_ext__line_info_rec_size;
btf_ext__new;
- btf_ext__reloc_func_info;
- btf_ext__reloc_line_info;
- xsk_umem__create;
- xsk_socket__create;
- xsk_umem__delete;
- xsk_socket__delete;
- xsk_umem__fd;
- xsk_socket__fd;
- bpf_program__get_prog_info_linear;
- bpf_program__bpil_addr_to_offs;
- bpf_program__bpil_offs_to_addr;
} LIBBPF_0.0.1;
LIBBPF_0.0.3 {
global:
bpf_map__is_internal;
bpf_map_freeze;
- btf__finalize_data;
} LIBBPF_0.0.2;
LIBBPF_0.0.4 {
global:
bpf_link__destroy;
- bpf_object__load_xattr;
bpf_program__attach_kprobe;
bpf_program__attach_perf_event;
bpf_program__attach_raw_tracepoint;
@@ -176,14 +95,10 @@ LIBBPF_0.0.4 {
bpf_program__attach_uprobe;
btf_dump__dump_type;
btf_dump__free;
- btf_dump__new;
btf__parse_elf;
libbpf_num_possible_cpus;
perf_buffer__free;
- perf_buffer__new;
- perf_buffer__new_raw;
perf_buffer__poll;
- xsk_umem__create;
} LIBBPF_0.0.3;
LIBBPF_0.0.5 {
@@ -193,7 +108,6 @@ LIBBPF_0.0.5 {
LIBBPF_0.0.6 {
global:
- bpf_get_link_xdp_info;
bpf_map__get_pin_path;
bpf_map__is_pinned;
bpf_map__set_pin_path;
@@ -202,9 +116,6 @@ LIBBPF_0.0.6 {
bpf_program__attach_trace;
bpf_program__get_expected_attach_type;
bpf_program__get_type;
- bpf_program__is_tracing;
- bpf_program__set_tracing;
- bpf_program__size;
btf__find_by_name_kind;
libbpf_find_vmlinux_btf_id;
} LIBBPF_0.0.5;
@@ -224,14 +135,8 @@ LIBBPF_0.0.7 {
bpf_object__detach_skeleton;
bpf_object__load_skeleton;
bpf_object__open_skeleton;
- bpf_probe_large_insn_limit;
- bpf_prog_attach_xattr;
bpf_program__attach;
bpf_program__name;
- bpf_program__is_extension;
- bpf_program__is_struct_ops;
- bpf_program__set_extension;
- bpf_program__set_struct_ops;
btf__align_of;
libbpf_find_kernel_btf;
} LIBBPF_0.0.6;
@@ -250,10 +155,7 @@ LIBBPF_0.0.8 {
bpf_prog_attach_opts;
bpf_program__attach_cgroup;
bpf_program__attach_lsm;
- bpf_program__is_lsm;
bpf_program__set_attach_target;
- bpf_program__set_lsm;
- bpf_set_link_xdp_fd_opts;
} LIBBPF_0.0.7;
LIBBPF_0.0.9 {
@@ -291,9 +193,7 @@ LIBBPF_0.1.0 {
bpf_map__value_size;
bpf_program__attach_xdp;
bpf_program__autoload;
- bpf_program__is_sk_lookup;
bpf_program__set_autoload;
- bpf_program__set_sk_lookup;
btf__parse;
btf__parse_raw;
btf__pointer_size;
@@ -336,7 +236,6 @@ LIBBPF_0.2.0 {
perf_buffer__buffer_fd;
perf_buffer__epoll_fd;
perf_buffer__consume_buffer;
- xsk_socket__create_shared;
} LIBBPF_0.1.0;
LIBBPF_0.3.0 {
@@ -348,8 +247,6 @@ LIBBPF_0.3.0 {
btf__new_empty_split;
btf__new_split;
ring_buffer__epoll_fd;
- xsk_setup_xdp_prog;
- xsk_socket__update_xskmap;
} LIBBPF_0.2.0;
LIBBPF_0.4.0 {
@@ -397,7 +294,6 @@ LIBBPF_0.6.0 {
bpf_object__next_program;
bpf_object__prev_map;
bpf_object__prev_program;
- bpf_prog_load_deprecated;
bpf_prog_load;
bpf_program__flags;
bpf_program__insn_cnt;
@@ -407,18 +303,14 @@ LIBBPF_0.6.0 {
btf__add_decl_tag;
btf__add_type_tag;
btf__dedup;
- btf__dedup_deprecated;
btf__raw_data;
btf__type_cnt;
btf_dump__new;
- btf_dump__new_deprecated;
libbpf_major_version;
libbpf_minor_version;
libbpf_version_string;
perf_buffer__new;
- perf_buffer__new_deprecated;
perf_buffer__new_raw;
- perf_buffer__new_raw_deprecated;
} LIBBPF_0.5.0;
LIBBPF_0.7.0 {
@@ -434,10 +326,11 @@ LIBBPF_0.7.0 {
bpf_xdp_detach;
bpf_xdp_query;
bpf_xdp_query_id;
+ btf_ext__raw_data;
libbpf_probe_bpf_helper;
libbpf_probe_bpf_map_type;
libbpf_probe_bpf_prog_type;
- libbpf_set_memlock_rlim_max;
+ libbpf_set_memlock_rlim;
} LIBBPF_0.6.0;
LIBBPF_0.8.0 {
@@ -461,5 +354,15 @@ LIBBPF_0.8.0 {
} LIBBPF_0.7.0;
LIBBPF_1.0.0 {
- local: *;
+ global:
+ bpf_obj_get_opts;
+ bpf_prog_query_opts;
+ bpf_program__attach_ksyscall;
+ btf__add_enum64;
+ btf__add_enum64_value;
+ libbpf_bpf_attach_type_str;
+ libbpf_bpf_link_type_str;
+ libbpf_bpf_map_type_str;
+ libbpf_bpf_prog_type_str;
+ perf_buffer__buffer;
};
diff --git a/tools/lib/bpf/libbpf_common.h b/tools/lib/bpf/libbpf_common.h
index 000e37798ff2..9a7937f339df 100644
--- a/tools/lib/bpf/libbpf_common.h
+++ b/tools/lib/bpf/libbpf_common.h
@@ -30,20 +30,10 @@
/* Add checks for other versions below when planning deprecation of API symbols
* with the LIBBPF_DEPRECATED_SINCE macro.
*/
-#if __LIBBPF_CURRENT_VERSION_GEQ(0, 6)
-#define __LIBBPF_MARK_DEPRECATED_0_6(X) X
+#if __LIBBPF_CURRENT_VERSION_GEQ(1, 0)
+#define __LIBBPF_MARK_DEPRECATED_1_0(X) X
#else
-#define __LIBBPF_MARK_DEPRECATED_0_6(X)
-#endif
-#if __LIBBPF_CURRENT_VERSION_GEQ(0, 7)
-#define __LIBBPF_MARK_DEPRECATED_0_7(X) X
-#else
-#define __LIBBPF_MARK_DEPRECATED_0_7(X)
-#endif
-#if __LIBBPF_CURRENT_VERSION_GEQ(0, 8)
-#define __LIBBPF_MARK_DEPRECATED_0_8(X) X
-#else
-#define __LIBBPF_MARK_DEPRECATED_0_8(X)
+#define __LIBBPF_MARK_DEPRECATED_1_0(X)
#endif
/* This set of internal macros allows to do "function overloading" based on
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 4abdbe2fea9d..4135ae0a2bc3 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -15,7 +15,6 @@
#include <linux/err.h>
#include <fcntl.h>
#include <unistd.h>
-#include "libbpf_legacy.h"
#include "relo_core.h"
/* make sure libbpf doesn't use kernel-only integer typedefs */
@@ -109,9 +108,9 @@ static inline bool str_has_sfx(const char *str, const char *sfx)
size_t str_len = strlen(str);
size_t sfx_len = strlen(sfx);
- if (sfx_len <= str_len)
- return strcmp(str + str_len - sfx_len, sfx);
- return false;
+ if (sfx_len > str_len)
+ return false;
+ return strcmp(str + str_len - sfx_len, sfx) == 0;
}
/* Symbol versioning is different between static and shared library.
@@ -351,6 +350,10 @@ enum kern_feature_id {
FEAT_MEMCG_ACCOUNT,
/* BPF cookie (bpf_get_attach_cookie() BPF helper) support */
FEAT_BPF_COOKIE,
+ /* BTF_KIND_ENUM64 support and BTF_KIND_ENUM kflag support */
+ FEAT_BTF_ENUM64,
+ /* Kernel uses syscall wrapper (CONFIG_ARCH_HAS_SYSCALL_WRAPPER) */
+ FEAT_SYSCALL_WRAPPER,
__FEAT_CNT,
};
@@ -476,8 +479,6 @@ int btf_ext_visit_str_offs(struct btf_ext *btf_ext, str_off_visit_fn visit, void
__s32 btf__find_by_name_kind_own(const struct btf *btf, const char *type_name,
__u32 kind);
-extern enum libbpf_strict_mode libbpf_mode;
-
typedef int (*kallsyms_cb_t)(unsigned long long sym_addr, char sym_type,
const char *sym_name, void *ctx);
@@ -496,12 +497,8 @@ static inline int libbpf_err(int ret)
*/
static inline int libbpf_err_errno(int ret)
{
- if (libbpf_mode & LIBBPF_STRICT_DIRECT_ERRS)
- /* errno is already assumed to be set on error */
- return ret < 0 ? -errno : ret;
-
- /* legacy: on error return -1 directly and don't touch errno */
- return ret;
+ /* errno is already assumed to be set on error */
+ return ret < 0 ? -errno : ret;
}
/* handle error for pointer-returning APIs, err is assumed to be < 0 always */
@@ -509,12 +506,7 @@ static inline void *libbpf_err_ptr(int err)
{
/* set errno on error, this doesn't break anything */
errno = -err;
-
- if (libbpf_mode & LIBBPF_STRICT_CLEAN_PTRS)
- return NULL;
-
- /* legacy: encode err as ptr */
- return ERR_PTR(err);
+ return NULL;
}
/* handle pointer-returning APIs' error handling */
@@ -524,11 +516,7 @@ static inline void *libbpf_ptr(void *ret)
if (IS_ERR(ret))
errno = -PTR_ERR(ret);
- if (libbpf_mode & LIBBPF_STRICT_CLEAN_PTRS)
- return IS_ERR(ret) ? NULL : ret;
-
- /* legacy: pass-through original pointer */
- return ret;
+ return IS_ERR(ret) ? NULL : ret;
}
static inline bool str_is_empty(const char *s)
@@ -580,4 +568,9 @@ struct bpf_link * usdt_manager_attach_usdt(struct usdt_manager *man,
const char *usdt_provider, const char *usdt_name,
__u64 usdt_cookie);
+static inline bool is_pow_of_2(size_t x)
+{
+ return x && (x & (x - 1)) == 0;
+}
+
#endif /* __LIBBPF_LIBBPF_INTERNAL_H */
diff --git a/tools/lib/bpf/libbpf_legacy.h b/tools/lib/bpf/libbpf_legacy.h
index d7bcbd01f66f..5b7e0155db6a 100644
--- a/tools/lib/bpf/libbpf_legacy.h
+++ b/tools/lib/bpf/libbpf_legacy.h
@@ -20,6 +20,11 @@
extern "C" {
#endif
+/* As of libbpf 1.0 libbpf_set_strict_mode() and enum libbpf_struct_mode have
+ * no effect. But they are left in libbpf_legacy.h so that applications that
+ * prepared for libbpf 1.0 before final release by using
+ * libbpf_set_strict_mode() still work with libbpf 1.0+ without any changes.
+ */
enum libbpf_strict_mode {
/* Turn on all supported strict features of libbpf to simulate libbpf
* v1.0 behavior.
@@ -71,8 +76,8 @@ enum libbpf_strict_mode {
* first BPF program or map creation operation. This is done only if
* kernel is too old to support memcg-based memory accounting for BPF
* subsystem. By default, RLIMIT_MEMLOCK limit is set to RLIM_INFINITY,
- * but it can be overriden with libbpf_set_memlock_rlim_max() API.
- * Note that libbpf_set_memlock_rlim_max() needs to be called before
+ * but it can be overriden with libbpf_set_memlock_rlim() API.
+ * Note that libbpf_set_memlock_rlim() needs to be called before
* the very first bpf_prog_load(), bpf_map_create() or bpf_object__load()
* operation.
*/
@@ -88,6 +93,25 @@ enum libbpf_strict_mode {
LIBBPF_API int libbpf_set_strict_mode(enum libbpf_strict_mode mode);
+/**
+ * @brief **libbpf_get_error()** extracts the error code from the passed
+ * pointer
+ * @param ptr pointer returned from libbpf API function
+ * @return error code; or 0 if no error occured
+ *
+ * Note, as of libbpf 1.0 this function is not necessary and not recommended
+ * to be used. Libbpf doesn't return error code embedded into the pointer
+ * itself. Instead, NULL is returned on error and error code is passed through
+ * thread-local errno variable. **libbpf_get_error()** is just returning -errno
+ * value if it receives NULL, which is correct only if errno hasn't been
+ * modified between libbpf API call and corresponding **libbpf_get_error()**
+ * call. Prefer to check return for NULL and use errno directly.
+ *
+ * This API is left in libbpf 1.0 to allow applications that were 1.0-ready
+ * before final libbpf 1.0 without needing to change them.
+ */
+LIBBPF_API long libbpf_get_error(const void *ptr);
+
#define DECLARE_LIBBPF_OPTS LIBBPF_OPTS
/* "Discouraged" APIs which don't follow consistent libbpf naming patterns.
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index 97b06cede56f..0b5398786bf3 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -17,47 +17,14 @@
#include "libbpf.h"
#include "libbpf_internal.h"
-static bool grep(const char *buffer, const char *pattern)
-{
- return !!strstr(buffer, pattern);
-}
-
-static int get_vendor_id(int ifindex)
-{
- char ifname[IF_NAMESIZE], path[64], buf[8];
- ssize_t len;
- int fd;
-
- if (!if_indextoname(ifindex, ifname))
- return -1;
-
- snprintf(path, sizeof(path), "/sys/class/net/%s/device/vendor", ifname);
-
- fd = open(path, O_RDONLY | O_CLOEXEC);
- if (fd < 0)
- return -1;
-
- len = read(fd, buf, sizeof(buf));
- close(fd);
- if (len < 0)
- return -1;
- if (len >= (ssize_t)sizeof(buf))
- return -1;
- buf[len] = '\0';
-
- return strtol(buf, NULL, 0);
-}
-
static int probe_prog_load(enum bpf_prog_type prog_type,
const struct bpf_insn *insns, size_t insns_cnt,
- char *log_buf, size_t log_buf_sz,
- __u32 ifindex)
+ char *log_buf, size_t log_buf_sz)
{
LIBBPF_OPTS(bpf_prog_load_opts, opts,
.log_buf = log_buf,
.log_size = log_buf_sz,
.log_level = log_buf ? 1 : 0,
- .prog_ifindex = ifindex,
);
int fd, err, exp_err = 0;
const char *exp_msg = NULL;
@@ -161,31 +128,10 @@ int libbpf_probe_bpf_prog_type(enum bpf_prog_type prog_type, const void *opts)
if (opts)
return libbpf_err(-EINVAL);
- ret = probe_prog_load(prog_type, insns, insn_cnt, NULL, 0, 0);
+ ret = probe_prog_load(prog_type, insns, insn_cnt, NULL, 0);
return libbpf_err(ret);
}
-bool bpf_probe_prog_type(enum bpf_prog_type prog_type, __u32 ifindex)
-{
- struct bpf_insn insns[2] = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN()
- };
-
- /* prefer libbpf_probe_bpf_prog_type() unless offload is requested */
- if (ifindex == 0)
- return libbpf_probe_bpf_prog_type(prog_type, NULL) == 1;
-
- if (ifindex && prog_type == BPF_PROG_TYPE_SCHED_CLS)
- /* nfp returns -EINVAL on exit(0) with TC offload */
- insns[0].imm = 2;
-
- errno = 0;
- probe_prog_load(prog_type, insns, ARRAY_SIZE(insns), NULL, 0, ifindex);
-
- return errno != EINVAL && errno != EOPNOTSUPP;
-}
-
int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
const char *str_sec, size_t str_len)
{
@@ -242,15 +188,13 @@ static int load_local_storage_btf(void)
strs, sizeof(strs));
}
-static int probe_map_create(enum bpf_map_type map_type, __u32 ifindex)
+static int probe_map_create(enum bpf_map_type map_type)
{
LIBBPF_OPTS(bpf_map_create_opts, opts);
int key_size, value_size, max_entries;
__u32 btf_key_type_id = 0, btf_value_type_id = 0;
int fd = -1, btf_fd = -1, fd_inner = -1, exp_err = 0, err;
- opts.map_ifindex = ifindex;
-
key_size = sizeof(__u32);
value_size = sizeof(__u32);
max_entries = 1;
@@ -326,12 +270,6 @@ static int probe_map_create(enum bpf_map_type map_type, __u32 ifindex)
if (map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
- /* TODO: probe for device, once libbpf has a function to create
- * map-in-map for offload
- */
- if (ifindex)
- goto cleanup;
-
fd_inner = bpf_map_create(BPF_MAP_TYPE_HASH, NULL,
sizeof(__u32), sizeof(__u32), 1, NULL);
if (fd_inner < 0)
@@ -370,15 +308,10 @@ int libbpf_probe_bpf_map_type(enum bpf_map_type map_type, const void *opts)
if (opts)
return libbpf_err(-EINVAL);
- ret = probe_map_create(map_type, 0);
+ ret = probe_map_create(map_type);
return libbpf_err(ret);
}
-bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)
-{
- return probe_map_create(map_type, ifindex) == 1;
-}
-
int libbpf_probe_bpf_helper(enum bpf_prog_type prog_type, enum bpf_func_id helper_id,
const void *opts)
{
@@ -407,7 +340,7 @@ int libbpf_probe_bpf_helper(enum bpf_prog_type prog_type, enum bpf_func_id helpe
}
buf[0] = '\0';
- ret = probe_prog_load(prog_type, insns, insn_cnt, buf, sizeof(buf), 0);
+ ret = probe_prog_load(prog_type, insns, insn_cnt, buf, sizeof(buf));
if (ret < 0)
return libbpf_err(ret);
@@ -427,51 +360,3 @@ int libbpf_probe_bpf_helper(enum bpf_prog_type prog_type, enum bpf_func_id helpe
return 0;
return 1; /* assume supported */
}
-
-bool bpf_probe_helper(enum bpf_func_id id, enum bpf_prog_type prog_type,
- __u32 ifindex)
-{
- struct bpf_insn insns[2] = {
- BPF_EMIT_CALL(id),
- BPF_EXIT_INSN()
- };
- char buf[4096] = {};
- bool res;
-
- probe_prog_load(prog_type, insns, ARRAY_SIZE(insns), buf, sizeof(buf), ifindex);
- res = !grep(buf, "invalid func ") && !grep(buf, "unknown func ");
-
- if (ifindex) {
- switch (get_vendor_id(ifindex)) {
- case 0x19ee: /* Netronome specific */
- res = res && !grep(buf, "not supported by FW") &&
- !grep(buf, "unsupported function id");
- break;
- default:
- break;
- }
- }
-
- return res;
-}
-
-/*
- * Probe for availability of kernel commit (5.3):
- *
- * c04c0d2b968a ("bpf: increase complexity limit and maximum program size")
- */
-bool bpf_probe_large_insn_limit(__u32 ifindex)
-{
- struct bpf_insn insns[BPF_MAXINSNS + 1];
- int i;
-
- for (i = 0; i < BPF_MAXINSNS; i++)
- insns[i] = BPF_MOV64_IMM(BPF_REG_0, 1);
- insns[BPF_MAXINSNS] = BPF_EXIT_INSN();
-
- errno = 0;
- probe_prog_load(BPF_PROG_TYPE_SCHED_CLS, insns, ARRAY_SIZE(insns), NULL, 0,
- ifindex);
-
- return errno != E2BIG && errno != EINVAL;
-}
diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c
index 9aa016fb55aa..4ac02c28e152 100644
--- a/tools/lib/bpf/linker.c
+++ b/tools/lib/bpf/linker.c
@@ -697,11 +697,6 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename,
return err;
}
-static bool is_pow_of_2(size_t x)
-{
- return x && (x & (x - 1)) == 0;
-}
-
static int linker_sanity_check_elf(struct src_obj *obj)
{
struct src_sec *sec;
@@ -1340,6 +1335,7 @@ recur:
case BTF_KIND_STRUCT:
case BTF_KIND_UNION:
case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
case BTF_KIND_FWD:
case BTF_KIND_FUNC:
case BTF_KIND_VAR:
@@ -1362,6 +1358,7 @@ recur:
case BTF_KIND_INT:
case BTF_KIND_FLOAT:
case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
/* ignore encoding for int and enum values for enum */
if (t1->size != t2->size) {
pr_warn("global '%s': incompatible %s '%s' size %u and %u\n",
diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c
index cbc8967d5402..6c013168032d 100644
--- a/tools/lib/bpf/netlink.c
+++ b/tools/lib/bpf/netlink.c
@@ -27,6 +27,14 @@ typedef int (*libbpf_dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb);
typedef int (*__dump_nlmsg_t)(struct nlmsghdr *nlmsg, libbpf_dump_nlmsg_t,
void *cookie);
+struct xdp_link_info {
+ __u32 prog_id;
+ __u32 drv_prog_id;
+ __u32 hw_prog_id;
+ __u32 skb_prog_id;
+ __u8 attach_mode;
+};
+
struct xdp_id_md {
int ifindex;
__u32 flags;
@@ -288,31 +296,6 @@ int bpf_xdp_detach(int ifindex, __u32 flags, const struct bpf_xdp_attach_opts *o
return bpf_xdp_attach(ifindex, -1, flags, opts);
}
-int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags,
- const struct bpf_xdp_set_link_opts *opts)
-{
- int old_fd = -1, ret;
-
- if (!OPTS_VALID(opts, bpf_xdp_set_link_opts))
- return libbpf_err(-EINVAL);
-
- if (OPTS_HAS(opts, old_fd)) {
- old_fd = OPTS_GET(opts, old_fd, -1);
- flags |= XDP_FLAGS_REPLACE;
- }
-
- ret = __bpf_set_link_xdp_fd_replace(ifindex, fd, old_fd, flags);
- return libbpf_err(ret);
-}
-
-int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags)
-{
- int ret;
-
- ret = __bpf_set_link_xdp_fd_replace(ifindex, fd, 0, flags);
- return libbpf_err(ret);
-}
-
static int __dump_link_nlmsg(struct nlmsghdr *nlh,
libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie)
{
@@ -413,30 +396,6 @@ int bpf_xdp_query(int ifindex, int xdp_flags, struct bpf_xdp_query_opts *opts)
return 0;
}
-int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
- size_t info_size, __u32 flags)
-{
- LIBBPF_OPTS(bpf_xdp_query_opts, opts);
- size_t sz;
- int err;
-
- if (!info_size)
- return libbpf_err(-EINVAL);
-
- err = bpf_xdp_query(ifindex, flags, &opts);
- if (err)
- return libbpf_err(err);
-
- /* struct xdp_link_info field layout matches struct bpf_xdp_query_opts
- * layout after sz field
- */
- sz = min(info_size, offsetofend(struct xdp_link_info, attach_mode));
- memcpy(info, &opts.prog_id, sz);
- memset((void *)info + sz, 0, info_size - sz);
-
- return 0;
-}
-
int bpf_xdp_query_id(int ifindex, int flags, __u32 *prog_id)
{
LIBBPF_OPTS(bpf_xdp_query_opts, opts);
@@ -463,11 +422,6 @@ int bpf_xdp_query_id(int ifindex, int flags, __u32 *prog_id)
}
-int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags)
-{
- return bpf_xdp_query_id(ifindex, flags, prog_id);
-}
-
typedef int (*qdisc_config_t)(struct libbpf_nla_req *req);
static int clsact_config(struct libbpf_nla_req *req)
diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c
index ba4453dfd1ed..c4b0e81ae293 100644
--- a/tools/lib/bpf/relo_core.c
+++ b/tools/lib/bpf/relo_core.c
@@ -95,6 +95,7 @@ static const char *core_relo_kind_str(enum bpf_core_relo_kind kind)
case BPF_CORE_TYPE_ID_LOCAL: return "local_type_id";
case BPF_CORE_TYPE_ID_TARGET: return "target_type_id";
case BPF_CORE_TYPE_EXISTS: return "type_exists";
+ case BPF_CORE_TYPE_MATCHES: return "type_matches";
case BPF_CORE_TYPE_SIZE: return "type_size";
case BPF_CORE_ENUMVAL_EXISTS: return "enumval_exists";
case BPF_CORE_ENUMVAL_VALUE: return "enumval_value";
@@ -123,6 +124,7 @@ static bool core_relo_is_type_based(enum bpf_core_relo_kind kind)
case BPF_CORE_TYPE_ID_LOCAL:
case BPF_CORE_TYPE_ID_TARGET:
case BPF_CORE_TYPE_EXISTS:
+ case BPF_CORE_TYPE_MATCHES:
case BPF_CORE_TYPE_SIZE:
return true;
default:
@@ -141,6 +143,86 @@ static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind)
}
}
+int __bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
+ const struct btf *targ_btf, __u32 targ_id, int level)
+{
+ const struct btf_type *local_type, *targ_type;
+ int depth = 32; /* max recursion depth */
+
+ /* caller made sure that names match (ignoring flavor suffix) */
+ local_type = btf_type_by_id(local_btf, local_id);
+ targ_type = btf_type_by_id(targ_btf, targ_id);
+ if (!btf_kind_core_compat(local_type, targ_type))
+ return 0;
+
+recur:
+ depth--;
+ if (depth < 0)
+ return -EINVAL;
+
+ local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
+ targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
+ if (!local_type || !targ_type)
+ return -EINVAL;
+
+ if (!btf_kind_core_compat(local_type, targ_type))
+ return 0;
+
+ switch (btf_kind(local_type)) {
+ case BTF_KIND_UNKN:
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ case BTF_KIND_ENUM:
+ case BTF_KIND_FWD:
+ case BTF_KIND_ENUM64:
+ return 1;
+ case BTF_KIND_INT:
+ /* just reject deprecated bitfield-like integers; all other
+ * integers are by default compatible between each other
+ */
+ return btf_int_offset(local_type) == 0 && btf_int_offset(targ_type) == 0;
+ case BTF_KIND_PTR:
+ local_id = local_type->type;
+ targ_id = targ_type->type;
+ goto recur;
+ case BTF_KIND_ARRAY:
+ local_id = btf_array(local_type)->type;
+ targ_id = btf_array(targ_type)->type;
+ goto recur;
+ case BTF_KIND_FUNC_PROTO: {
+ struct btf_param *local_p = btf_params(local_type);
+ struct btf_param *targ_p = btf_params(targ_type);
+ __u16 local_vlen = btf_vlen(local_type);
+ __u16 targ_vlen = btf_vlen(targ_type);
+ int i, err;
+
+ if (local_vlen != targ_vlen)
+ return 0;
+
+ for (i = 0; i < local_vlen; i++, local_p++, targ_p++) {
+ if (level <= 0)
+ return -EINVAL;
+
+ skip_mods_and_typedefs(local_btf, local_p->type, &local_id);
+ skip_mods_and_typedefs(targ_btf, targ_p->type, &targ_id);
+ err = __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id,
+ level - 1);
+ if (err <= 0)
+ return err;
+ }
+
+ /* tail recurse for return type check */
+ skip_mods_and_typedefs(local_btf, local_type->type, &local_id);
+ skip_mods_and_typedefs(targ_btf, targ_type->type, &targ_id);
+ goto recur;
+ }
+ default:
+ pr_warn("unexpected kind %s relocated, local [%d], target [%d]\n",
+ btf_kind_str(local_type), local_id, targ_id);
+ return 0;
+ }
+}
+
/*
* Turn bpf_core_relo into a low- and high-level spec representation,
* validating correctness along the way, as well as calculating resulting
@@ -167,11 +249,11 @@ static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind)
* just a parsed access string representation): [0, 1, 2, 3].
*
* High-level spec will capture only 3 points:
- * - intial zero-index access by pointer (&s->... is the same as &s[0]...);
+ * - initial zero-index access by pointer (&s->... is the same as &s[0]...);
* - field 'a' access (corresponds to '2' in low-level spec);
* - array element #3 access (corresponds to '3' in low-level spec).
*
- * Type-based relocations (TYPE_EXISTS/TYPE_SIZE,
+ * Type-based relocations (TYPE_EXISTS/TYPE_MATCHES/TYPE_SIZE,
* TYPE_ID_LOCAL/TYPE_ID_TARGET) don't capture any field information. Their
* spec and raw_spec are kept empty.
*
@@ -186,7 +268,7 @@ int bpf_core_parse_spec(const char *prog_name, const struct btf *btf,
struct bpf_core_accessor *acc;
const struct btf_type *t;
const char *name, *spec_str;
- __u32 id;
+ __u32 id, name_off;
__s64 sz;
spec_str = btf__name_by_offset(btf, relo->access_str_off);
@@ -231,11 +313,13 @@ int bpf_core_parse_spec(const char *prog_name, const struct btf *btf,
spec->len++;
if (core_relo_is_enumval_based(relo->kind)) {
- if (!btf_is_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t))
+ if (!btf_is_any_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t))
return -EINVAL;
/* record enumerator name in a first accessor */
- acc->name = btf__name_by_offset(btf, btf_enum(t)[access_idx].name_off);
+ name_off = btf_is_enum(t) ? btf_enum(t)[access_idx].name_off
+ : btf_enum64(t)[access_idx].name_off;
+ acc->name = btf__name_by_offset(btf, name_off);
return 0;
}
@@ -340,7 +424,7 @@ recur:
if (btf_is_composite(local_type) && btf_is_composite(targ_type))
return 1;
- if (btf_kind(local_type) != btf_kind(targ_type))
+ if (!btf_kind_core_compat(local_type, targ_type))
return 0;
switch (btf_kind(local_type)) {
@@ -348,6 +432,7 @@ recur:
case BTF_KIND_FLOAT:
return 1;
case BTF_KIND_FWD:
+ case BTF_KIND_ENUM64:
case BTF_KIND_ENUM: {
const char *local_name, *targ_name;
size_t local_len, targ_len;
@@ -477,6 +562,7 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
const struct bpf_core_accessor *local_acc;
struct bpf_core_accessor *targ_acc;
int i, sz, matched;
+ __u32 name_off;
memset(targ_spec, 0, sizeof(*targ_spec));
targ_spec->btf = targ_btf;
@@ -484,9 +570,14 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
targ_spec->relo_kind = local_spec->relo_kind;
if (core_relo_is_type_based(local_spec->relo_kind)) {
- return bpf_core_types_are_compat(local_spec->btf,
- local_spec->root_type_id,
- targ_btf, targ_id);
+ if (local_spec->relo_kind == BPF_CORE_TYPE_MATCHES)
+ return bpf_core_types_match(local_spec->btf,
+ local_spec->root_type_id,
+ targ_btf, targ_id);
+ else
+ return bpf_core_types_are_compat(local_spec->btf,
+ local_spec->root_type_id,
+ targ_btf, targ_id);
}
local_acc = &local_spec->spec[0];
@@ -494,18 +585,22 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
if (core_relo_is_enumval_based(local_spec->relo_kind)) {
size_t local_essent_len, targ_essent_len;
- const struct btf_enum *e;
const char *targ_name;
/* has to resolve to an enum */
targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, &targ_id);
- if (!btf_is_enum(targ_type))
+ if (!btf_is_any_enum(targ_type))
return 0;
local_essent_len = bpf_core_essential_name_len(local_acc->name);
- for (i = 0, e = btf_enum(targ_type); i < btf_vlen(targ_type); i++, e++) {
- targ_name = btf__name_by_offset(targ_spec->btf, e->name_off);
+ for (i = 0; i < btf_vlen(targ_type); i++) {
+ if (btf_is_enum(targ_type))
+ name_off = btf_enum(targ_type)[i].name_off;
+ else
+ name_off = btf_enum64(targ_type)[i].name_off;
+
+ targ_name = btf__name_by_offset(targ_spec->btf, name_off);
targ_essent_len = bpf_core_essential_name_len(targ_name);
if (targ_essent_len != local_essent_len)
continue;
@@ -583,7 +678,7 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
static int bpf_core_calc_field_relo(const char *prog_name,
const struct bpf_core_relo *relo,
const struct bpf_core_spec *spec,
- __u32 *val, __u32 *field_sz, __u32 *type_id,
+ __u64 *val, __u32 *field_sz, __u32 *type_id,
bool *validate)
{
const struct bpf_core_accessor *acc;
@@ -680,8 +775,7 @@ static int bpf_core_calc_field_relo(const char *prog_name,
*val = byte_sz;
break;
case BPF_CORE_FIELD_SIGNED:
- /* enums will be assumed unsigned */
- *val = btf_is_enum(mt) ||
+ *val = (btf_is_any_enum(mt) && BTF_INFO_KFLAG(mt->info)) ||
(btf_int_encoding(mt) & BTF_INT_SIGNED);
if (validate)
*validate = true; /* signedness is never ambiguous */
@@ -708,7 +802,7 @@ static int bpf_core_calc_field_relo(const char *prog_name,
static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo,
const struct bpf_core_spec *spec,
- __u32 *val, bool *validate)
+ __u64 *val, bool *validate)
{
__s64 sz;
@@ -732,6 +826,7 @@ static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo,
*validate = false;
break;
case BPF_CORE_TYPE_EXISTS:
+ case BPF_CORE_TYPE_MATCHES:
*val = 1;
break;
case BPF_CORE_TYPE_SIZE:
@@ -751,10 +846,9 @@ static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo,
static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo,
const struct bpf_core_spec *spec,
- __u32 *val)
+ __u64 *val)
{
const struct btf_type *t;
- const struct btf_enum *e;
switch (relo->kind) {
case BPF_CORE_ENUMVAL_EXISTS:
@@ -764,8 +858,10 @@ static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo,
if (!spec)
return -EUCLEAN; /* request instruction poisoning */
t = btf_type_by_id(spec->btf, spec->spec[0].type_id);
- e = btf_enum(t) + spec->spec[0].idx;
- *val = e->val;
+ if (btf_is_enum(t))
+ *val = btf_enum(t)[spec->spec[0].idx].val;
+ else
+ *val = btf_enum64_value(btf_enum64(t) + spec->spec[0].idx);
break;
default:
return -EOPNOTSUPP;
@@ -929,7 +1025,7 @@ int bpf_core_patch_insn(const char *prog_name, struct bpf_insn *insn,
int insn_idx, const struct bpf_core_relo *relo,
int relo_idx, const struct bpf_core_relo_res *res)
{
- __u32 orig_val, new_val;
+ __u64 orig_val, new_val;
__u8 class;
class = BPF_CLASS(insn->code);
@@ -954,28 +1050,30 @@ poison:
if (BPF_SRC(insn->code) != BPF_K)
return -EINVAL;
if (res->validate && insn->imm != orig_val) {
- pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n",
+ pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %llu -> %llu\n",
prog_name, relo_idx,
- insn_idx, insn->imm, orig_val, new_val);
+ insn_idx, insn->imm, (unsigned long long)orig_val,
+ (unsigned long long)new_val);
return -EINVAL;
}
orig_val = insn->imm;
insn->imm = new_val;
- pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %u -> %u\n",
+ pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %llu -> %llu\n",
prog_name, relo_idx, insn_idx,
- orig_val, new_val);
+ (unsigned long long)orig_val, (unsigned long long)new_val);
break;
case BPF_LDX:
case BPF_ST:
case BPF_STX:
if (res->validate && insn->off != orig_val) {
- pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %u -> %u\n",
- prog_name, relo_idx, insn_idx, insn->off, orig_val, new_val);
+ pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %llu -> %llu\n",
+ prog_name, relo_idx, insn_idx, insn->off, (unsigned long long)orig_val,
+ (unsigned long long)new_val);
return -EINVAL;
}
if (new_val > SHRT_MAX) {
- pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %u\n",
- prog_name, relo_idx, insn_idx, new_val);
+ pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %llu\n",
+ prog_name, relo_idx, insn_idx, (unsigned long long)new_val);
return -ERANGE;
}
if (res->fail_memsz_adjust) {
@@ -987,8 +1085,9 @@ poison:
orig_val = insn->off;
insn->off = new_val;
- pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n",
- prog_name, relo_idx, insn_idx, orig_val, new_val);
+ pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %llu -> %llu\n",
+ prog_name, relo_idx, insn_idx, (unsigned long long)orig_val,
+ (unsigned long long)new_val);
if (res->new_sz != res->orig_sz) {
int insn_bytes_sz, insn_bpf_sz;
@@ -1024,20 +1123,20 @@ poison:
return -EINVAL;
}
- imm = insn[0].imm + ((__u64)insn[1].imm << 32);
+ imm = (__u32)insn[0].imm | ((__u64)insn[1].imm << 32);
if (res->validate && imm != orig_val) {
- pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %u -> %u\n",
+ pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %llu -> %llu\n",
prog_name, relo_idx,
insn_idx, (unsigned long long)imm,
- orig_val, new_val);
+ (unsigned long long)orig_val, (unsigned long long)new_val);
return -EINVAL;
}
insn[0].imm = new_val;
- insn[1].imm = 0; /* currently only 32-bit values are supported */
- pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %u\n",
+ insn[1].imm = new_val >> 32;
+ pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %llu\n",
prog_name, relo_idx, insn_idx,
- (unsigned long long)imm, new_val);
+ (unsigned long long)imm, (unsigned long long)new_val);
break;
}
default:
@@ -1057,7 +1156,6 @@ poison:
int bpf_core_format_spec(char *buf, size_t buf_sz, const struct bpf_core_spec *spec)
{
const struct btf_type *t;
- const struct btf_enum *e;
const char *s;
__u32 type_id;
int i, len = 0;
@@ -1086,10 +1184,23 @@ int bpf_core_format_spec(char *buf, size_t buf_sz, const struct bpf_core_spec *s
if (core_relo_is_enumval_based(spec->relo_kind)) {
t = skip_mods_and_typedefs(spec->btf, type_id, NULL);
- e = btf_enum(t) + spec->raw_spec[0];
- s = btf__name_by_offset(spec->btf, e->name_off);
+ if (btf_is_enum(t)) {
+ const struct btf_enum *e;
+ const char *fmt_str;
+
+ e = btf_enum(t) + spec->raw_spec[0];
+ s = btf__name_by_offset(spec->btf, e->name_off);
+ fmt_str = BTF_INFO_KFLAG(t->info) ? "::%s = %d" : "::%s = %u";
+ append_buf(fmt_str, s, e->val);
+ } else {
+ const struct btf_enum64 *e;
+ const char *fmt_str;
- append_buf("::%s = %u", s, e->val);
+ e = btf_enum64(t) + spec->raw_spec[0];
+ s = btf__name_by_offset(spec->btf, e->name_off);
+ fmt_str = BTF_INFO_KFLAG(t->info) ? "::%s = %lld" : "::%s = %llu";
+ append_buf(fmt_str, s, (unsigned long long)btf_enum64_value(e));
+ }
return len;
}
@@ -1148,11 +1259,11 @@ int bpf_core_format_spec(char *buf, size_t buf_sz, const struct bpf_core_spec *s
* 3. It is supported and expected that there might be multiple flavors
* matching the spec. As long as all the specs resolve to the same set of
* offsets across all candidates, there is no error. If there is any
- * ambiguity, CO-RE relocation will fail. This is necessary to accomodate
- * imprefection of BTF deduplication, which can cause slight duplication of
+ * ambiguity, CO-RE relocation will fail. This is necessary to accommodate
+ * imperfection of BTF deduplication, which can cause slight duplication of
* the same BTF type, if some directly or indirectly referenced (by
* pointer) type gets resolved to different actual types in different
- * object files. If such situation occurs, deduplicated BTF will end up
+ * object files. If such a situation occurs, deduplicated BTF will end up
* with two (or more) structurally identical types, which differ only in
* types they refer to through pointer. This should be OK in most cases and
* is not an error.
@@ -1261,10 +1372,12 @@ int bpf_core_calc_relo_insn(const char *prog_name,
* decision and value, otherwise it's dangerous to
* proceed due to ambiguity
*/
- pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %u != %s %u\n",
+ pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %llu != %s %llu\n",
prog_name, relo_idx,
- cand_res.poison ? "failure" : "success", cand_res.new_val,
- targ_res->poison ? "failure" : "success", targ_res->new_val);
+ cand_res.poison ? "failure" : "success",
+ (unsigned long long)cand_res.new_val,
+ targ_res->poison ? "failure" : "success",
+ (unsigned long long)targ_res->new_val);
return -EINVAL;
}
@@ -1305,3 +1418,273 @@ int bpf_core_calc_relo_insn(const char *prog_name,
return 0;
}
+
+static bool bpf_core_names_match(const struct btf *local_btf, size_t local_name_off,
+ const struct btf *targ_btf, size_t targ_name_off)
+{
+ const char *local_n, *targ_n;
+ size_t local_len, targ_len;
+
+ local_n = btf__name_by_offset(local_btf, local_name_off);
+ targ_n = btf__name_by_offset(targ_btf, targ_name_off);
+
+ if (str_is_empty(targ_n))
+ return str_is_empty(local_n);
+
+ targ_len = bpf_core_essential_name_len(targ_n);
+ local_len = bpf_core_essential_name_len(local_n);
+
+ return targ_len == local_len && strncmp(local_n, targ_n, local_len) == 0;
+}
+
+static int bpf_core_enums_match(const struct btf *local_btf, const struct btf_type *local_t,
+ const struct btf *targ_btf, const struct btf_type *targ_t)
+{
+ __u16 local_vlen = btf_vlen(local_t);
+ __u16 targ_vlen = btf_vlen(targ_t);
+ int i, j;
+
+ if (local_t->size != targ_t->size)
+ return 0;
+
+ if (local_vlen > targ_vlen)
+ return 0;
+
+ /* iterate over the local enum's variants and make sure each has
+ * a symbolic name correspondent in the target
+ */
+ for (i = 0; i < local_vlen; i++) {
+ bool matched = false;
+ __u32 local_n_off, targ_n_off;
+
+ local_n_off = btf_is_enum(local_t) ? btf_enum(local_t)[i].name_off :
+ btf_enum64(local_t)[i].name_off;
+
+ for (j = 0; j < targ_vlen; j++) {
+ targ_n_off = btf_is_enum(targ_t) ? btf_enum(targ_t)[j].name_off :
+ btf_enum64(targ_t)[j].name_off;
+
+ if (bpf_core_names_match(local_btf, local_n_off, targ_btf, targ_n_off)) {
+ matched = true;
+ break;
+ }
+ }
+
+ if (!matched)
+ return 0;
+ }
+ return 1;
+}
+
+static int bpf_core_composites_match(const struct btf *local_btf, const struct btf_type *local_t,
+ const struct btf *targ_btf, const struct btf_type *targ_t,
+ bool behind_ptr, int level)
+{
+ const struct btf_member *local_m = btf_members(local_t);
+ __u16 local_vlen = btf_vlen(local_t);
+ __u16 targ_vlen = btf_vlen(targ_t);
+ int i, j, err;
+
+ if (local_vlen > targ_vlen)
+ return 0;
+
+ /* check that all local members have a match in the target */
+ for (i = 0; i < local_vlen; i++, local_m++) {
+ const struct btf_member *targ_m = btf_members(targ_t);
+ bool matched = false;
+
+ for (j = 0; j < targ_vlen; j++, targ_m++) {
+ if (!bpf_core_names_match(local_btf, local_m->name_off,
+ targ_btf, targ_m->name_off))
+ continue;
+
+ err = __bpf_core_types_match(local_btf, local_m->type, targ_btf,
+ targ_m->type, behind_ptr, level - 1);
+ if (err < 0)
+ return err;
+ if (err > 0) {
+ matched = true;
+ break;
+ }
+ }
+
+ if (!matched)
+ return 0;
+ }
+ return 1;
+}
+
+/* Check that two types "match". This function assumes that root types were
+ * already checked for name match.
+ *
+ * The matching relation is defined as follows:
+ * - modifiers and typedefs are stripped (and, hence, effectively ignored)
+ * - generally speaking types need to be of same kind (struct vs. struct, union
+ * vs. union, etc.)
+ * - exceptions are struct/union behind a pointer which could also match a
+ * forward declaration of a struct or union, respectively, and enum vs.
+ * enum64 (see below)
+ * Then, depending on type:
+ * - integers:
+ * - match if size and signedness match
+ * - arrays & pointers:
+ * - target types are recursively matched
+ * - structs & unions:
+ * - local members need to exist in target with the same name
+ * - for each member we recursively check match unless it is already behind a
+ * pointer, in which case we only check matching names and compatible kind
+ * - enums:
+ * - local variants have to have a match in target by symbolic name (but not
+ * numeric value)
+ * - size has to match (but enum may match enum64 and vice versa)
+ * - function pointers:
+ * - number and position of arguments in local type has to match target
+ * - for each argument and the return value we recursively check match
+ */
+int __bpf_core_types_match(const struct btf *local_btf, __u32 local_id, const struct btf *targ_btf,
+ __u32 targ_id, bool behind_ptr, int level)
+{
+ const struct btf_type *local_t, *targ_t;
+ int depth = 32; /* max recursion depth */
+ __u16 local_k, targ_k;
+
+ if (level <= 0)
+ return -EINVAL;
+
+ local_t = btf_type_by_id(local_btf, local_id);
+ targ_t = btf_type_by_id(targ_btf, targ_id);
+
+recur:
+ depth--;
+ if (depth < 0)
+ return -EINVAL;
+
+ local_t = skip_mods_and_typedefs(local_btf, local_id, &local_id);
+ targ_t = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
+ if (!local_t || !targ_t)
+ return -EINVAL;
+
+ /* While the name check happens after typedefs are skipped, root-level
+ * typedefs would still be name-matched as that's the contract with
+ * callers.
+ */
+ if (!bpf_core_names_match(local_btf, local_t->name_off, targ_btf, targ_t->name_off))
+ return 0;
+
+ local_k = btf_kind(local_t);
+ targ_k = btf_kind(targ_t);
+
+ switch (local_k) {
+ case BTF_KIND_UNKN:
+ return local_k == targ_k;
+ case BTF_KIND_FWD: {
+ bool local_f = BTF_INFO_KFLAG(local_t->info);
+
+ if (behind_ptr) {
+ if (local_k == targ_k)
+ return local_f == BTF_INFO_KFLAG(targ_t->info);
+
+ /* for forward declarations kflag dictates whether the
+ * target is a struct (0) or union (1)
+ */
+ return (targ_k == BTF_KIND_STRUCT && !local_f) ||
+ (targ_k == BTF_KIND_UNION && local_f);
+ } else {
+ if (local_k != targ_k)
+ return 0;
+
+ /* match if the forward declaration is for the same kind */
+ return local_f == BTF_INFO_KFLAG(targ_t->info);
+ }
+ }
+ case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
+ if (!btf_is_any_enum(targ_t))
+ return 0;
+
+ return bpf_core_enums_match(local_btf, local_t, targ_btf, targ_t);
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ if (behind_ptr) {
+ bool targ_f = BTF_INFO_KFLAG(targ_t->info);
+
+ if (local_k == targ_k)
+ return 1;
+
+ if (targ_k != BTF_KIND_FWD)
+ return 0;
+
+ return (local_k == BTF_KIND_UNION) == targ_f;
+ } else {
+ if (local_k != targ_k)
+ return 0;
+
+ return bpf_core_composites_match(local_btf, local_t, targ_btf, targ_t,
+ behind_ptr, level);
+ }
+ case BTF_KIND_INT: {
+ __u8 local_sgn;
+ __u8 targ_sgn;
+
+ if (local_k != targ_k)
+ return 0;
+
+ local_sgn = btf_int_encoding(local_t) & BTF_INT_SIGNED;
+ targ_sgn = btf_int_encoding(targ_t) & BTF_INT_SIGNED;
+
+ return local_t->size == targ_t->size && local_sgn == targ_sgn;
+ }
+ case BTF_KIND_PTR:
+ if (local_k != targ_k)
+ return 0;
+
+ behind_ptr = true;
+
+ local_id = local_t->type;
+ targ_id = targ_t->type;
+ goto recur;
+ case BTF_KIND_ARRAY: {
+ const struct btf_array *local_array = btf_array(local_t);
+ const struct btf_array *targ_array = btf_array(targ_t);
+
+ if (local_k != targ_k)
+ return 0;
+
+ if (local_array->nelems != targ_array->nelems)
+ return 0;
+
+ local_id = local_array->type;
+ targ_id = targ_array->type;
+ goto recur;
+ }
+ case BTF_KIND_FUNC_PROTO: {
+ struct btf_param *local_p = btf_params(local_t);
+ struct btf_param *targ_p = btf_params(targ_t);
+ __u16 local_vlen = btf_vlen(local_t);
+ __u16 targ_vlen = btf_vlen(targ_t);
+ int i, err;
+
+ if (local_k != targ_k)
+ return 0;
+
+ if (local_vlen != targ_vlen)
+ return 0;
+
+ for (i = 0; i < local_vlen; i++, local_p++, targ_p++) {
+ err = __bpf_core_types_match(local_btf, local_p->type, targ_btf,
+ targ_p->type, behind_ptr, level - 1);
+ if (err <= 0)
+ return err;
+ }
+
+ /* tail recurse for return type check */
+ local_id = local_t->type;
+ targ_id = targ_t->type;
+ goto recur;
+ }
+ default:
+ pr_warn("unexpected kind %s relocated, local [%d], target [%d]\n",
+ btf_kind_str(local_t), local_id, targ_id);
+ return 0;
+ }
+}
diff --git a/tools/lib/bpf/relo_core.h b/tools/lib/bpf/relo_core.h
index 073039d8ca4f..1c0566daf8e8 100644
--- a/tools/lib/bpf/relo_core.h
+++ b/tools/lib/bpf/relo_core.h
@@ -46,9 +46,9 @@ struct bpf_core_spec {
struct bpf_core_relo_res {
/* expected value in the instruction, unless validate == false */
- __u32 orig_val;
+ __u64 orig_val;
/* new value that needs to be patched up to */
- __u32 new_val;
+ __u64 new_val;
/* relocation unsuccessful, poison instruction, but don't fail load */
bool poison;
/* some relocations can't be validated against orig_val */
@@ -68,8 +68,14 @@ struct bpf_core_relo_res {
__u32 new_type_id;
};
+int __bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
+ const struct btf *targ_btf, __u32 targ_id, int level);
int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
const struct btf *targ_btf, __u32 targ_id);
+int __bpf_core_types_match(const struct btf *local_btf, __u32 local_id, const struct btf *targ_btf,
+ __u32 targ_id, bool behind_ptr, int level);
+int bpf_core_types_match(const struct btf *local_btf, __u32 local_id, const struct btf *targ_btf,
+ __u32 targ_id);
size_t bpf_core_essential_name_len(const char *name);
diff --git a/tools/lib/bpf/usdt.bpf.h b/tools/lib/bpf/usdt.bpf.h
index 4181fddb3687..4f2adc0bd6ca 100644
--- a/tools/lib/bpf/usdt.bpf.h
+++ b/tools/lib/bpf/usdt.bpf.h
@@ -6,7 +6,6 @@
#include <linux/errno.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
-#include <bpf/bpf_core_read.h>
/* Below types and maps are internal implementation details of libbpf's USDT
* support and are subjects to change. Also, bpf_usdt_xxx() API helpers should
@@ -30,14 +29,6 @@
#ifndef BPF_USDT_MAX_IP_CNT
#define BPF_USDT_MAX_IP_CNT (4 * BPF_USDT_MAX_SPEC_CNT)
#endif
-/* We use BPF CO-RE to detect support for BPF cookie from BPF side. This is
- * the only dependency on CO-RE, so if it's undesirable, user can override
- * BPF_USDT_HAS_BPF_COOKIE to specify whether to BPF cookie is supported or not.
- */
-#ifndef BPF_USDT_HAS_BPF_COOKIE
-#define BPF_USDT_HAS_BPF_COOKIE \
- bpf_core_enum_value_exists(enum bpf_func_id___usdt, BPF_FUNC_get_attach_cookie___usdt)
-#endif
enum __bpf_usdt_arg_type {
BPF_USDT_ARG_CONST,
@@ -83,15 +74,12 @@ struct {
__type(value, __u32);
} __bpf_usdt_ip_to_spec_id SEC(".maps") __weak;
-/* don't rely on user's BPF code to have latest definition of bpf_func_id */
-enum bpf_func_id___usdt {
- BPF_FUNC_get_attach_cookie___usdt = 0xBAD, /* value doesn't matter */
-};
+extern const _Bool LINUX_HAS_BPF_COOKIE __kconfig;
static __always_inline
int __bpf_usdt_spec_id(struct pt_regs *ctx)
{
- if (!BPF_USDT_HAS_BPF_COOKIE) {
+ if (!LINUX_HAS_BPF_COOKIE) {
long ip = PT_REGS_IP(ctx);
int *spec_id_ptr;
diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c
index f1c9339cfbbc..d18e37982344 100644
--- a/tools/lib/bpf/usdt.c
+++ b/tools/lib/bpf/usdt.c
@@ -441,7 +441,7 @@ static int parse_elf_segs(Elf *elf, const char *path, struct elf_seg **segs, siz
return 0;
}
-static int parse_lib_segs(int pid, const char *lib_path, struct elf_seg **segs, size_t *seg_cnt)
+static int parse_vma_segs(int pid, const char *lib_path, struct elf_seg **segs, size_t *seg_cnt)
{
char path[PATH_MAX], line[PATH_MAX], mode[16];
size_t seg_start, seg_end, seg_off;
@@ -531,35 +531,40 @@ err_out:
return err;
}
-static struct elf_seg *find_elf_seg(struct elf_seg *segs, size_t seg_cnt, long addr, bool relative)
+static struct elf_seg *find_elf_seg(struct elf_seg *segs, size_t seg_cnt, long virtaddr)
{
struct elf_seg *seg;
int i;
- if (relative) {
- /* for shared libraries, address is relative offset and thus
- * should be fall within logical offset-based range of
- * [offset_start, offset_end)
- */
- for (i = 0, seg = segs; i < seg_cnt; i++, seg++) {
- if (seg->offset <= addr && addr < seg->offset + (seg->end - seg->start))
- return seg;
- }
- } else {
- /* for binaries, address is absolute and thus should be within
- * absolute address range of [seg_start, seg_end)
- */
- for (i = 0, seg = segs; i < seg_cnt; i++, seg++) {
- if (seg->start <= addr && addr < seg->end)
- return seg;
- }
+ /* for ELF binaries (both executables and shared libraries), we are
+ * given virtual address (absolute for executables, relative for
+ * libraries) which should match address range of [seg_start, seg_end)
+ */
+ for (i = 0, seg = segs; i < seg_cnt; i++, seg++) {
+ if (seg->start <= virtaddr && virtaddr < seg->end)
+ return seg;
}
+ return NULL;
+}
+static struct elf_seg *find_vma_seg(struct elf_seg *segs, size_t seg_cnt, long offset)
+{
+ struct elf_seg *seg;
+ int i;
+
+ /* for VMA segments from /proc/<pid>/maps file, provided "address" is
+ * actually a file offset, so should be fall within logical
+ * offset-based range of [offset_start, offset_end)
+ */
+ for (i = 0, seg = segs; i < seg_cnt; i++, seg++) {
+ if (seg->offset <= offset && offset < seg->offset + (seg->end - seg->start))
+ return seg;
+ }
return NULL;
}
-static int parse_usdt_note(Elf *elf, const char *path, long base_addr,
- GElf_Nhdr *nhdr, const char *data, size_t name_off, size_t desc_off,
+static int parse_usdt_note(Elf *elf, const char *path, GElf_Nhdr *nhdr,
+ const char *data, size_t name_off, size_t desc_off,
struct usdt_note *usdt_note);
static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, __u64 usdt_cookie);
@@ -568,8 +573,8 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *
const char *usdt_provider, const char *usdt_name, __u64 usdt_cookie,
struct usdt_target **out_targets, size_t *out_target_cnt)
{
- size_t off, name_off, desc_off, seg_cnt = 0, lib_seg_cnt = 0, target_cnt = 0;
- struct elf_seg *segs = NULL, *lib_segs = NULL;
+ size_t off, name_off, desc_off, seg_cnt = 0, vma_seg_cnt = 0, target_cnt = 0;
+ struct elf_seg *segs = NULL, *vma_segs = NULL;
struct usdt_target *targets = NULL, *target;
long base_addr = 0;
Elf_Scn *notes_scn, *base_scn;
@@ -613,8 +618,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *
struct elf_seg *seg = NULL;
void *tmp;
- err = parse_usdt_note(elf, path, base_addr, &nhdr,
- data->d_buf, name_off, desc_off, &note);
+ err = parse_usdt_note(elf, path, &nhdr, data->d_buf, name_off, desc_off, &note);
if (err)
goto err_out;
@@ -648,36 +652,33 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *
*
* [0] https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation
*/
- usdt_rel_ip = usdt_abs_ip = note.loc_addr;
- if (base_addr) {
+ usdt_abs_ip = note.loc_addr;
+ if (base_addr)
usdt_abs_ip += base_addr - note.base_addr;
- usdt_rel_ip += base_addr - note.base_addr;
- }
- if (ehdr.e_type == ET_EXEC) {
- /* When attaching uprobes (which what USDTs basically
- * are) kernel expects a relative IP to be specified,
- * so if we are attaching to an executable ELF binary
- * (i.e., not a shared library), we need to calculate
- * proper relative IP based on ELF's load address
- */
- seg = find_elf_seg(segs, seg_cnt, usdt_abs_ip, false /* relative */);
- if (!seg) {
- err = -ESRCH;
- pr_warn("usdt: failed to find ELF program segment for '%s:%s' in '%s' at IP 0x%lx\n",
- usdt_provider, usdt_name, path, usdt_abs_ip);
- goto err_out;
- }
- if (!seg->is_exec) {
- err = -ESRCH;
- pr_warn("usdt: matched ELF binary '%s' segment [0x%lx, 0x%lx) for '%s:%s' at IP 0x%lx is not executable\n",
- path, seg->start, seg->end, usdt_provider, usdt_name,
- usdt_abs_ip);
- goto err_out;
- }
+ /* When attaching uprobes (which is what USDTs basically are)
+ * kernel expects file offset to be specified, not a relative
+ * virtual address, so we need to translate virtual address to
+ * file offset, for both ET_EXEC and ET_DYN binaries.
+ */
+ seg = find_elf_seg(segs, seg_cnt, usdt_abs_ip);
+ if (!seg) {
+ err = -ESRCH;
+ pr_warn("usdt: failed to find ELF program segment for '%s:%s' in '%s' at IP 0x%lx\n",
+ usdt_provider, usdt_name, path, usdt_abs_ip);
+ goto err_out;
+ }
+ if (!seg->is_exec) {
+ err = -ESRCH;
+ pr_warn("usdt: matched ELF binary '%s' segment [0x%lx, 0x%lx) for '%s:%s' at IP 0x%lx is not executable\n",
+ path, seg->start, seg->end, usdt_provider, usdt_name,
+ usdt_abs_ip);
+ goto err_out;
+ }
+ /* translate from virtual address to file offset */
+ usdt_rel_ip = usdt_abs_ip - seg->start + seg->offset;
- usdt_rel_ip = usdt_abs_ip - (seg->start - seg->offset);
- } else if (!man->has_bpf_cookie) { /* ehdr.e_type == ET_DYN */
+ if (ehdr.e_type == ET_DYN && !man->has_bpf_cookie) {
/* If we don't have BPF cookie support but need to
* attach to a shared library, we'll need to know and
* record absolute addresses of attach points due to
@@ -697,9 +698,9 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *
goto err_out;
}
- /* lib_segs are lazily initialized only if necessary */
- if (lib_seg_cnt == 0) {
- err = parse_lib_segs(pid, path, &lib_segs, &lib_seg_cnt);
+ /* vma_segs are lazily initialized only if necessary */
+ if (vma_seg_cnt == 0) {
+ err = parse_vma_segs(pid, path, &vma_segs, &vma_seg_cnt);
if (err) {
pr_warn("usdt: failed to get memory segments in PID %d for shared library '%s': %d\n",
pid, path, err);
@@ -707,7 +708,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *
}
}
- seg = find_elf_seg(lib_segs, lib_seg_cnt, usdt_rel_ip, true /* relative */);
+ seg = find_vma_seg(vma_segs, vma_seg_cnt, usdt_rel_ip);
if (!seg) {
err = -ESRCH;
pr_warn("usdt: failed to find shared lib memory segment for '%s:%s' in '%s' at relative IP 0x%lx\n",
@@ -715,7 +716,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *
goto err_out;
}
- usdt_abs_ip = seg->start + (usdt_rel_ip - seg->offset);
+ usdt_abs_ip = seg->start - seg->offset + usdt_rel_ip;
}
pr_debug("usdt: probe for '%s:%s' in %s '%s': addr 0x%lx base 0x%lx (resolved abs_ip 0x%lx rel_ip 0x%lx) args '%s' in segment [0x%lx, 0x%lx) at offset 0x%lx\n",
@@ -723,7 +724,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *
note.loc_addr, note.base_addr, usdt_abs_ip, usdt_rel_ip, note.args,
seg ? seg->start : 0, seg ? seg->end : 0, seg ? seg->offset : 0);
- /* Adjust semaphore address to be a relative offset */
+ /* Adjust semaphore address to be a file offset */
if (note.sema_addr) {
if (!man->has_sema_refcnt) {
pr_warn("usdt: kernel doesn't support USDT semaphore refcounting for '%s:%s' in '%s'\n",
@@ -732,7 +733,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *
goto err_out;
}
- seg = find_elf_seg(segs, seg_cnt, note.sema_addr, false /* relative */);
+ seg = find_elf_seg(segs, seg_cnt, note.sema_addr);
if (!seg) {
err = -ESRCH;
pr_warn("usdt: failed to find ELF loadable segment with semaphore of '%s:%s' in '%s' at 0x%lx\n",
@@ -747,7 +748,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *
goto err_out;
}
- usdt_sema_off = note.sema_addr - (seg->start - seg->offset);
+ usdt_sema_off = note.sema_addr - seg->start + seg->offset;
pr_debug("usdt: sema for '%s:%s' in %s '%s': addr 0x%lx base 0x%lx (resolved 0x%lx) in segment [0x%lx, 0x%lx] at offset 0x%lx\n",
usdt_provider, usdt_name, ehdr.e_type == ET_EXEC ? "exec" : "lib ",
@@ -770,7 +771,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *
target->rel_ip = usdt_rel_ip;
target->sema_off = usdt_sema_off;
- /* notes->args references strings from Elf itself, so they can
+ /* notes.args references strings from Elf itself, so they can
* be referenced safely until elf_end() call
*/
target->spec_str = note.args;
@@ -788,7 +789,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *
err_out:
free(segs);
- free(lib_segs);
+ free(vma_segs);
if (err < 0)
free(targets);
return err;
@@ -1089,8 +1090,8 @@ err_out:
/* Parse out USDT ELF note from '.note.stapsdt' section.
* Logic inspired by perf's code.
*/
-static int parse_usdt_note(Elf *elf, const char *path, long base_addr,
- GElf_Nhdr *nhdr, const char *data, size_t name_off, size_t desc_off,
+static int parse_usdt_note(Elf *elf, const char *path, GElf_Nhdr *nhdr,
+ const char *data, size_t name_off, size_t desc_off,
struct usdt_note *note)
{
const char *provider, *name, *args;
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 8b990a52aada..c260006106be 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -787,3 +787,8 @@ bool arch_is_retpoline(struct symbol *sym)
{
return !strncmp(sym->name, "__x86_indirect_", 15);
}
+
+bool arch_is_rethunk(struct symbol *sym)
+{
+ return !strcmp(sym->name, "__x86_return_thunk");
+}
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index f4c3a5091737..24fbe803a0d3 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -68,6 +68,8 @@ const struct option check_options[] = {
OPT_BOOLEAN('n', "noinstr", &opts.noinstr, "validate noinstr rules"),
OPT_BOOLEAN('o', "orc", &opts.orc, "generate ORC metadata"),
OPT_BOOLEAN('r', "retpoline", &opts.retpoline, "validate and annotate retpoline usage"),
+ OPT_BOOLEAN(0, "rethunk", &opts.rethunk, "validate and annotate rethunk usage"),
+ OPT_BOOLEAN(0, "unret", &opts.unret, "validate entry unret placement"),
OPT_BOOLEAN('l', "sls", &opts.sls, "validate straight-line-speculation mitigations"),
OPT_BOOLEAN('s', "stackval", &opts.stackval, "validate frame pointer rules"),
OPT_BOOLEAN('t', "static-call", &opts.static_call, "annotate static calls"),
@@ -123,6 +125,7 @@ static bool opts_valid(void)
opts.noinstr ||
opts.orc ||
opts.retpoline ||
+ opts.rethunk ||
opts.sls ||
opts.stackval ||
opts.static_call ||
@@ -135,6 +138,11 @@ static bool opts_valid(void)
return true;
}
+ if (opts.unret && !opts.rethunk) {
+ ERROR("--unret requires --rethunk");
+ return false;
+ }
+
if (opts.dump_orc)
return true;
@@ -163,6 +171,11 @@ static bool link_opts_valid(struct objtool_file *file)
return false;
}
+ if (opts.unret) {
+ ERROR("--unret requires --link");
+ return false;
+ }
+
return true;
}
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 864bb9dd3584..0cec74da7ffe 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -376,7 +376,8 @@ static int decode_instructions(struct objtool_file *file)
sec->text = true;
if (!strcmp(sec->name, ".noinstr.text") ||
- !strcmp(sec->name, ".entry.text"))
+ !strcmp(sec->name, ".entry.text") ||
+ !strncmp(sec->name, ".text.__x86.", 12))
sec->noinstr = true;
for (offset = 0; offset < sec->sh.sh_size; offset += insn->len) {
@@ -749,6 +750,52 @@ static int create_retpoline_sites_sections(struct objtool_file *file)
return 0;
}
+static int create_return_sites_sections(struct objtool_file *file)
+{
+ struct instruction *insn;
+ struct section *sec;
+ int idx;
+
+ sec = find_section_by_name(file->elf, ".return_sites");
+ if (sec) {
+ WARN("file already has .return_sites, skipping");
+ return 0;
+ }
+
+ idx = 0;
+ list_for_each_entry(insn, &file->return_thunk_list, call_node)
+ idx++;
+
+ if (!idx)
+ return 0;
+
+ sec = elf_create_section(file->elf, ".return_sites", 0,
+ sizeof(int), idx);
+ if (!sec) {
+ WARN("elf_create_section: .return_sites");
+ return -1;
+ }
+
+ idx = 0;
+ list_for_each_entry(insn, &file->return_thunk_list, call_node) {
+
+ int *site = (int *)sec->data->d_buf + idx;
+ *site = 0;
+
+ if (elf_add_reloc_to_insn(file->elf, sec,
+ idx * sizeof(int),
+ R_X86_64_PC32,
+ insn->sec, insn->offset)) {
+ WARN("elf_add_reloc_to_insn: .return_sites");
+ return -1;
+ }
+
+ idx++;
+ }
+
+ return 0;
+}
+
static int create_ibt_endbr_seal_sections(struct objtool_file *file)
{
struct instruction *insn;
@@ -1083,6 +1130,11 @@ __weak bool arch_is_retpoline(struct symbol *sym)
return false;
}
+__weak bool arch_is_rethunk(struct symbol *sym)
+{
+ return false;
+}
+
#define NEGATIVE_RELOC ((void *)-1L)
static struct reloc *insn_reloc(struct objtool_file *file, struct instruction *insn)
@@ -1250,6 +1302,19 @@ static void add_retpoline_call(struct objtool_file *file, struct instruction *in
annotate_call_site(file, insn, false);
}
+static void add_return_call(struct objtool_file *file, struct instruction *insn, bool add)
+{
+ /*
+ * Return thunk tail calls are really just returns in disguise,
+ * so convert them accordingly.
+ */
+ insn->type = INSN_RETURN;
+ insn->retpoline_safe = true;
+
+ if (add)
+ list_add_tail(&insn->call_node, &file->return_thunk_list);
+}
+
static bool same_function(struct instruction *insn1, struct instruction *insn2)
{
return insn1->func->pfunc == insn2->func->pfunc;
@@ -1302,6 +1367,9 @@ static int add_jump_destinations(struct objtool_file *file)
} else if (reloc->sym->retpoline_thunk) {
add_retpoline_call(file, insn);
continue;
+ } else if (reloc->sym->return_thunk) {
+ add_return_call(file, insn, true);
+ continue;
} else if (insn->func) {
/*
* External sibling call or internal sibling call with
@@ -1320,6 +1388,21 @@ static int add_jump_destinations(struct objtool_file *file)
jump_dest = find_insn(file, dest_sec, dest_off);
if (!jump_dest) {
+ struct symbol *sym = find_symbol_by_offset(dest_sec, dest_off);
+
+ /*
+ * This is a special case for zen_untrain_ret().
+ * It jumps to __x86_return_thunk(), but objtool
+ * can't find the thunk's starting RET
+ * instruction, because the RET is also in the
+ * middle of another instruction. Objtool only
+ * knows about the outer instruction.
+ */
+ if (sym && sym->return_thunk) {
+ add_return_call(file, insn, false);
+ continue;
+ }
+
WARN_FUNC("can't find jump dest instruction at %s+0x%lx",
insn->sec, insn->offset, dest_sec->name,
dest_off);
@@ -1949,16 +2032,35 @@ static int read_unwind_hints(struct objtool_file *file)
insn->hint = true;
- if (opts.ibt && hint->type == UNWIND_HINT_TYPE_REGS_PARTIAL) {
+ if (hint->type == UNWIND_HINT_TYPE_SAVE) {
+ insn->hint = false;
+ insn->save = true;
+ continue;
+ }
+
+ if (hint->type == UNWIND_HINT_TYPE_RESTORE) {
+ insn->restore = true;
+ continue;
+ }
+
+ if (hint->type == UNWIND_HINT_TYPE_REGS_PARTIAL) {
struct symbol *sym = find_symbol_by_offset(insn->sec, insn->offset);
- if (sym && sym->bind == STB_GLOBAL &&
- insn->type != INSN_ENDBR && !insn->noendbr) {
- WARN_FUNC("UNWIND_HINT_IRET_REGS without ENDBR",
- insn->sec, insn->offset);
+ if (sym && sym->bind == STB_GLOBAL) {
+ if (opts.ibt && insn->type != INSN_ENDBR && !insn->noendbr) {
+ WARN_FUNC("UNWIND_HINT_IRET_REGS without ENDBR",
+ insn->sec, insn->offset);
+ }
+
+ insn->entry = 1;
}
}
+ if (hint->type == UNWIND_HINT_TYPE_ENTRY) {
+ hint->type = UNWIND_HINT_TYPE_CALL;
+ insn->entry = 1;
+ }
+
if (hint->type == UNWIND_HINT_TYPE_FUNC) {
insn->cfi = &func_cfi;
continue;
@@ -2032,8 +2134,10 @@ static int read_retpoline_hints(struct objtool_file *file)
}
if (insn->type != INSN_JUMP_DYNAMIC &&
- insn->type != INSN_CALL_DYNAMIC) {
- WARN_FUNC("retpoline_safe hint not an indirect jump/call",
+ insn->type != INSN_CALL_DYNAMIC &&
+ insn->type != INSN_RETURN &&
+ insn->type != INSN_NOP) {
+ WARN_FUNC("retpoline_safe hint not an indirect jump/call/ret/nop",
insn->sec, insn->offset);
return -1;
}
@@ -2184,6 +2288,9 @@ static int classify_symbols(struct objtool_file *file)
if (arch_is_retpoline(func))
func->retpoline_thunk = true;
+ if (arch_is_rethunk(func))
+ func->return_thunk = true;
+
if (!strcmp(func->name, "__fentry__"))
func->fentry = true;
@@ -3190,7 +3297,7 @@ static struct instruction *next_insn_to_validate(struct objtool_file *file,
* Follow the branch starting at the given instruction, and recursively follow
* any other branches (jumps). Meanwhile, track the frame pointer state at
* each instruction and validate all the rules described in
- * tools/objtool/Documentation/stack-validation.txt.
+ * tools/objtool/Documentation/objtool.txt.
*/
static int validate_branch(struct objtool_file *file, struct symbol *func,
struct instruction *insn, struct insn_state state)
@@ -3218,8 +3325,8 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
return 1;
}
- visited = 1 << state.uaccess;
- if (insn->visited) {
+ visited = VISITED_BRANCH << state.uaccess;
+ if (insn->visited & VISITED_BRANCH_MASK) {
if (!insn->hint && !insn_cfi_match(insn, &state.cfi))
return 1;
@@ -3233,6 +3340,35 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
state.instr += insn->instr;
if (insn->hint) {
+ if (insn->restore) {
+ struct instruction *save_insn, *i;
+
+ i = insn;
+ save_insn = NULL;
+
+ sym_for_each_insn_continue_reverse(file, func, i) {
+ if (i->save) {
+ save_insn = i;
+ break;
+ }
+ }
+
+ if (!save_insn) {
+ WARN_FUNC("no corresponding CFI save for CFI restore",
+ sec, insn->offset);
+ return 1;
+ }
+
+ if (!save_insn->visited) {
+ WARN_FUNC("objtool isn't smart enough to handle this CFI save/restore combo",
+ sec, insn->offset);
+ return 1;
+ }
+
+ insn->cfi = save_insn->cfi;
+ nr_cfi_reused++;
+ }
+
state.cfi = *insn->cfi;
} else {
/* XXX track if we actually changed state.cfi */
@@ -3433,6 +3569,145 @@ static int validate_unwind_hints(struct objtool_file *file, struct section *sec)
return warnings;
}
+/*
+ * Validate rethunk entry constraint: must untrain RET before the first RET.
+ *
+ * Follow every branch (intra-function) and ensure ANNOTATE_UNRET_END comes
+ * before an actual RET instruction.
+ */
+static int validate_entry(struct objtool_file *file, struct instruction *insn)
+{
+ struct instruction *next, *dest;
+ int ret, warnings = 0;
+
+ for (;;) {
+ next = next_insn_to_validate(file, insn);
+
+ if (insn->visited & VISITED_ENTRY)
+ return 0;
+
+ insn->visited |= VISITED_ENTRY;
+
+ if (!insn->ignore_alts && !list_empty(&insn->alts)) {
+ struct alternative *alt;
+ bool skip_orig = false;
+
+ list_for_each_entry(alt, &insn->alts, list) {
+ if (alt->skip_orig)
+ skip_orig = true;
+
+ ret = validate_entry(file, alt->insn);
+ if (ret) {
+ if (opts.backtrace)
+ BT_FUNC("(alt)", insn);
+ return ret;
+ }
+ }
+
+ if (skip_orig)
+ return 0;
+ }
+
+ switch (insn->type) {
+
+ case INSN_CALL_DYNAMIC:
+ case INSN_JUMP_DYNAMIC:
+ case INSN_JUMP_DYNAMIC_CONDITIONAL:
+ WARN_FUNC("early indirect call", insn->sec, insn->offset);
+ return 1;
+
+ case INSN_JUMP_UNCONDITIONAL:
+ case INSN_JUMP_CONDITIONAL:
+ if (!is_sibling_call(insn)) {
+ if (!insn->jump_dest) {
+ WARN_FUNC("unresolved jump target after linking?!?",
+ insn->sec, insn->offset);
+ return -1;
+ }
+ ret = validate_entry(file, insn->jump_dest);
+ if (ret) {
+ if (opts.backtrace) {
+ BT_FUNC("(branch%s)", insn,
+ insn->type == INSN_JUMP_CONDITIONAL ? "-cond" : "");
+ }
+ return ret;
+ }
+
+ if (insn->type == INSN_JUMP_UNCONDITIONAL)
+ return 0;
+
+ break;
+ }
+
+ /* fallthrough */
+ case INSN_CALL:
+ dest = find_insn(file, insn->call_dest->sec,
+ insn->call_dest->offset);
+ if (!dest) {
+ WARN("Unresolved function after linking!?: %s",
+ insn->call_dest->name);
+ return -1;
+ }
+
+ ret = validate_entry(file, dest);
+ if (ret) {
+ if (opts.backtrace)
+ BT_FUNC("(call)", insn);
+ return ret;
+ }
+ /*
+ * If a call returns without error, it must have seen UNTRAIN_RET.
+ * Therefore any non-error return is a success.
+ */
+ return 0;
+
+ case INSN_RETURN:
+ WARN_FUNC("RET before UNTRAIN", insn->sec, insn->offset);
+ return 1;
+
+ case INSN_NOP:
+ if (insn->retpoline_safe)
+ return 0;
+ break;
+
+ default:
+ break;
+ }
+
+ if (!next) {
+ WARN_FUNC("teh end!", insn->sec, insn->offset);
+ return -1;
+ }
+ insn = next;
+ }
+
+ return warnings;
+}
+
+/*
+ * Validate that all branches starting at 'insn->entry' encounter UNRET_END
+ * before RET.
+ */
+static int validate_unret(struct objtool_file *file)
+{
+ struct instruction *insn;
+ int ret, warnings = 0;
+
+ for_each_insn(file, insn) {
+ if (!insn->entry)
+ continue;
+
+ ret = validate_entry(file, insn);
+ if (ret < 0) {
+ WARN_FUNC("Failed UNRET validation", insn->sec, insn->offset);
+ return ret;
+ }
+ warnings += ret;
+ }
+
+ return warnings;
+}
+
static int validate_retpoline(struct objtool_file *file)
{
struct instruction *insn;
@@ -3440,7 +3715,8 @@ static int validate_retpoline(struct objtool_file *file)
for_each_insn(file, insn) {
if (insn->type != INSN_JUMP_DYNAMIC &&
- insn->type != INSN_CALL_DYNAMIC)
+ insn->type != INSN_CALL_DYNAMIC &&
+ insn->type != INSN_RETURN)
continue;
if (insn->retpoline_safe)
@@ -3455,9 +3731,17 @@ static int validate_retpoline(struct objtool_file *file)
if (!strcmp(insn->sec->name, ".init.text") && !opts.module)
continue;
- WARN_FUNC("indirect %s found in RETPOLINE build",
- insn->sec, insn->offset,
- insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call");
+ if (insn->type == INSN_RETURN) {
+ if (opts.rethunk) {
+ WARN_FUNC("'naked' return found in RETHUNK build",
+ insn->sec, insn->offset);
+ } else
+ continue;
+ } else {
+ WARN_FUNC("indirect %s found in RETPOLINE build",
+ insn->sec, insn->offset,
+ insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call");
+ }
warnings++;
}
@@ -3826,8 +4110,7 @@ static int validate_ibt(struct objtool_file *file)
!strcmp(sec->name, "__bug_table") ||
!strcmp(sec->name, "__ex_table") ||
!strcmp(sec->name, "__jump_table") ||
- !strcmp(sec->name, "__mcount_loc") ||
- !strcmp(sec->name, "__tracepoints"))
+ !strcmp(sec->name, "__mcount_loc"))
continue;
list_for_each_entry(reloc, &sec->reloc->reloc_list, list)
@@ -3946,6 +4229,17 @@ int check(struct objtool_file *file)
warnings += ret;
}
+ if (opts.unret) {
+ /*
+ * Must be after validate_branch() and friends, it plays
+ * further games with insn->visited.
+ */
+ ret = validate_unret(file);
+ if (ret < 0)
+ return ret;
+ warnings += ret;
+ }
+
if (opts.ibt) {
ret = validate_ibt(file);
if (ret < 0)
@@ -3974,6 +4268,13 @@ int check(struct objtool_file *file)
warnings += ret;
}
+ if (opts.rethunk) {
+ ret = create_return_sites_sections(file);
+ if (ret < 0)
+ goto out;
+ warnings += ret;
+ }
+
if (opts.mcount) {
ret = create_mcount_loc_sections(file);
if (ret < 0)
diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h
index 9b19cc304195..beb2f3aa94ff 100644
--- a/tools/objtool/include/objtool/arch.h
+++ b/tools/objtool/include/objtool/arch.h
@@ -89,6 +89,7 @@ const char *arch_ret_insn(int len);
int arch_decode_hint_reg(u8 sp_reg, int *base);
bool arch_is_retpoline(struct symbol *sym);
+bool arch_is_rethunk(struct symbol *sym);
int arch_rewrite_retpolines(struct objtool_file *file);
diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h
index 280ea18b7f2b..42a52f1a0add 100644
--- a/tools/objtool/include/objtool/builtin.h
+++ b/tools/objtool/include/objtool/builtin.h
@@ -19,6 +19,8 @@ struct opts {
bool noinstr;
bool orc;
bool retpoline;
+ bool rethunk;
+ bool unret;
bool sls;
bool stackval;
bool static_call;
diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h
index f10d7374f388..036129cebeee 100644
--- a/tools/objtool/include/objtool/check.h
+++ b/tools/objtool/include/objtool/check.h
@@ -46,16 +46,19 @@ struct instruction {
enum insn_type type;
unsigned long immediate;
- u8 dead_end : 1,
- ignore : 1,
- ignore_alts : 1,
- hint : 1,
- retpoline_safe : 1,
- noendbr : 1;
- /* 2 bit hole */
+ u16 dead_end : 1,
+ ignore : 1,
+ ignore_alts : 1,
+ hint : 1,
+ save : 1,
+ restore : 1,
+ retpoline_safe : 1,
+ noendbr : 1,
+ entry : 1;
+ /* 7 bit hole */
+
s8 instr;
u8 visited;
- /* u8 hole */
struct alt_group *alt_group;
struct symbol *call_dest;
@@ -69,6 +72,11 @@ struct instruction {
struct cfi_state *cfi;
};
+#define VISITED_BRANCH 0x01
+#define VISITED_BRANCH_UACCESS 0x02
+#define VISITED_BRANCH_MASK 0x03
+#define VISITED_ENTRY 0x04
+
static inline bool is_static_jump(struct instruction *insn)
{
return insn->type == INSN_JUMP_CONDITIONAL ||
diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h
index adebfbc2b518..16f4067b82ae 100644
--- a/tools/objtool/include/objtool/elf.h
+++ b/tools/objtool/include/objtool/elf.h
@@ -57,6 +57,7 @@ struct symbol {
u8 uaccess_safe : 1;
u8 static_call_tramp : 1;
u8 retpoline_thunk : 1;
+ u8 return_thunk : 1;
u8 fentry : 1;
u8 profiling_func : 1;
struct list_head pv_target;
diff --git a/tools/objtool/include/objtool/objtool.h b/tools/objtool/include/objtool/objtool.h
index a6e72d916807..7f2d1b095333 100644
--- a/tools/objtool/include/objtool/objtool.h
+++ b/tools/objtool/include/objtool/objtool.h
@@ -24,6 +24,7 @@ struct objtool_file {
struct list_head insn_list;
DECLARE_HASHTABLE(insn_hash, 20);
struct list_head retpoline_call_list;
+ struct list_head return_thunk_list;
struct list_head static_call_list;
struct list_head mcount_loc_list;
struct list_head endbr_list;
diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c
index 512669ce064c..a7ecc32e3512 100644
--- a/tools/objtool/objtool.c
+++ b/tools/objtool/objtool.c
@@ -102,6 +102,7 @@ struct objtool_file *objtool_open_read(const char *_objname)
INIT_LIST_HEAD(&file.insn_list);
hash_init(file.insn_hash);
INIT_LIST_HEAD(&file.retpoline_call_list);
+ INIT_LIST_HEAD(&file.return_thunk_list);
INIT_LIST_HEAD(&file.static_call_list);
INIT_LIST_HEAD(&file.mcount_loc_list);
INIT_LIST_HEAD(&file.endbr_list);
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 897fc504918b..f075cf37a65e 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -4280,6 +4280,7 @@ static int trace__replay(struct trace *trace)
goto out;
evsel = evlist__find_tracepoint_by_name(session->evlist, "raw_syscalls:sys_enter");
+ trace->syscalls.events.sys_enter = evsel;
/* older kernels have syscalls tp versus raw_syscalls */
if (evsel == NULL)
evsel = evlist__find_tracepoint_by_name(session->evlist, "syscalls:sys_enter");
@@ -4292,6 +4293,7 @@ static int trace__replay(struct trace *trace)
}
evsel = evlist__find_tracepoint_by_name(session->evlist, "raw_syscalls:sys_exit");
+ trace->syscalls.events.sys_exit = evsel;
if (evsel == NULL)
evsel = evlist__find_tracepoint_by_name(session->evlist, "syscalls:sys_exit");
if (evsel &&
diff --git a/tools/perf/scripts/python/arm-cs-trace-disasm.py b/tools/perf/scripts/python/arm-cs-trace-disasm.py
index 5f57d9829956..4339692a8d0b 100755
--- a/tools/perf/scripts/python/arm-cs-trace-disasm.py
+++ b/tools/perf/scripts/python/arm-cs-trace-disasm.py
@@ -61,7 +61,7 @@ def get_optional(perf_dict, field):
def get_offset(perf_dict, field):
if field in perf_dict:
- return f"+0x{perf_dict[field]:x}"
+ return "+%#x" % perf_dict[field]
return ""
def get_dso_file_path(dso_name, dso_build_id):
@@ -76,7 +76,7 @@ def get_dso_file_path(dso_name, dso_build_id):
else:
append = "/elf"
- dso_path = f"{os.environ['PERF_BUILDID_DIR']}/{dso_name}/{dso_build_id}{append}"
+ dso_path = os.environ['PERF_BUILDID_DIR'] + "/" + dso_name + "/" + dso_build_id + append;
# Replace duplicate slash chars to single slash char
dso_path = dso_path.replace('//', '/', 1)
return dso_path
@@ -94,8 +94,8 @@ def read_disam(dso_fname, dso_start, start_addr, stop_addr):
start_addr = start_addr - dso_start;
stop_addr = stop_addr - dso_start;
disasm = [ options.objdump_name, "-d", "-z",
- f"--start-address=0x{start_addr:x}",
- f"--stop-address=0x{stop_addr:x}" ]
+ "--start-address="+format(start_addr,"#x"),
+ "--stop-address="+format(stop_addr,"#x") ]
disasm += [ dso_fname ]
disasm_output = check_output(disasm).decode('utf-8').split('\n')
disasm_cache[addr_range] = disasm_output
@@ -109,12 +109,14 @@ def print_disam(dso_fname, dso_start, start_addr, stop_addr):
m = disasm_re.search(line)
if m is None:
continue
- print(f"\t{line}")
+ print("\t" + line)
def print_sample(sample):
- print(f"Sample = {{ cpu: {sample['cpu']:04} addr: 0x{sample['addr']:016x} " \
- f"phys_addr: 0x{sample['phys_addr']:016x} ip: 0x{sample['ip']:016x} " \
- f"pid: {sample['pid']} tid: {sample['tid']} period: {sample['period']} time: {sample['time']} }}")
+ print("Sample = { cpu: %04d addr: 0x%016x phys_addr: 0x%016x ip: 0x%016x " \
+ "pid: %d tid: %d period: %d time: %d }" % \
+ (sample['cpu'], sample['addr'], sample['phys_addr'], \
+ sample['ip'], sample['pid'], sample['tid'], \
+ sample['period'], sample['time']))
def trace_begin():
print('ARM CoreSight Trace Data Assembler Dump')
@@ -131,7 +133,7 @@ def common_start_str(comm, sample):
cpu = sample["cpu"]
pid = sample["pid"]
tid = sample["tid"]
- return f"{comm:>16} {pid:>5}/{tid:<5} [{cpu:04}] {sec:9}.{ns:09} "
+ return "%16s %5u/%-5u [%04u] %9u.%09u " % (comm, pid, tid, cpu, sec, ns)
# This code is copied from intel-pt-events.py for printing source code
# line and symbols.
@@ -171,7 +173,7 @@ def print_srccode(comm, param_dict, sample, symbol, dso):
glb_line_number = line_number
glb_source_file_name = source_file_name
- print(f"{start_str}{src_str}")
+ print(start_str, src_str)
def process_event(param_dict):
global cache_size
@@ -188,7 +190,7 @@ def process_event(param_dict):
symbol = get_optional(param_dict, "symbol")
if (options.verbose == True):
- print(f"Event type: {name}")
+ print("Event type: %s" % name)
print_sample(sample)
# If cannot find dso so cannot dump assembler, bail out
@@ -197,7 +199,7 @@ def process_event(param_dict):
# Validate dso start and end addresses
if ((dso_start == '[unknown]') or (dso_end == '[unknown]')):
- print(f"Failed to find valid dso map for dso {dso}")
+ print("Failed to find valid dso map for dso %s" % dso)
return
if (name[0:12] == "instructions"):
@@ -244,15 +246,15 @@ def process_event(param_dict):
# Handle CS_ETM_TRACE_ON packet if start_addr=0 and stop_addr=4
if (start_addr == 0 and stop_addr == 4):
- print(f"CPU{cpu}: CS_ETM_TRACE_ON packet is inserted")
+ print("CPU%d: CS_ETM_TRACE_ON packet is inserted" % cpu)
return
if (start_addr < int(dso_start) or start_addr > int(dso_end)):
- print(f"Start address 0x{start_addr:x} is out of range [ 0x{dso_start:x} .. 0x{dso_end:x} ] for dso {dso}")
+ print("Start address 0x%x is out of range [ 0x%x .. 0x%x ] for dso %s" % (start_addr, int(dso_start), int(dso_end), dso))
return
if (stop_addr < int(dso_start) or stop_addr > int(dso_end)):
- print(f"Stop address 0x{stop_addr:x} is out of range [ 0x{dso_start:x} .. 0x{dso_end:x} ] for dso {dso}")
+ print("Stop address 0x%x is out of range [ 0x%x .. 0x%x ] for dso %s" % (stop_addr, int(dso_start), int(dso_end), dso))
return
if (options.objdump_name != None):
@@ -267,6 +269,6 @@ def process_event(param_dict):
if path.exists(dso_fname):
print_disam(dso_fname, dso_vm_start, start_addr, stop_addr)
else:
- print(f"Failed to find dso {dso} for address range [ 0x{start_addr:x} .. 0x{stop_addr:x} ]")
+ print("Failed to find dso %s for address range [ 0x%x .. 0x%x ]" % (dso, start_addr, stop_addr))
print_srccode(comm, param_dict, sample, symbol, dso)
diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c
index 4ad0dfbc8b21..7c7d20fc503a 100644
--- a/tools/perf/tests/perf-time-to-tsc.c
+++ b/tools/perf/tests/perf-time-to-tsc.c
@@ -20,8 +20,6 @@
#include "tsc.h"
#include "mmap.h"
#include "tests.h"
-#include "pmu.h"
-#include "pmu-hybrid.h"
/*
* Except x86_64/i386 and Arm64, other archs don't support TSC in perf. Just
@@ -106,28 +104,21 @@ static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int su
evlist__config(evlist, &opts, NULL);
- evsel = evlist__first(evlist);
-
- evsel->core.attr.comm = 1;
- evsel->core.attr.disabled = 1;
- evsel->core.attr.enable_on_exec = 0;
-
- /*
- * For hybrid "cycles:u", it creates two events.
- * Init the second evsel here.
- */
- if (perf_pmu__has_hybrid() && perf_pmu__hybrid_mounted("cpu_atom")) {
- evsel = evsel__next(evsel);
+ /* For hybrid "cycles:u", it creates two events */
+ evlist__for_each_entry(evlist, evsel) {
evsel->core.attr.comm = 1;
evsel->core.attr.disabled = 1;
evsel->core.attr.enable_on_exec = 0;
}
- if (evlist__open(evlist) == -ENOENT) {
- err = TEST_SKIP;
+ ret = evlist__open(evlist);
+ if (ret < 0) {
+ if (ret == -ENOENT)
+ err = TEST_SKIP;
+ else
+ pr_debug("evlist__open() failed\n");
goto out_err;
}
- CHECK__(evlist__open(evlist));
CHECK__(evlist__mmap(evlist, UINT_MAX));
@@ -167,10 +158,12 @@ static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int su
goto next_event;
if (strcmp(event->comm.comm, comm1) == 0) {
+ CHECK_NOT_NULL__(evsel = evlist__event2evsel(evlist, event));
CHECK__(evsel__parse_sample(evsel, event, &sample));
comm1_time = sample.time;
}
if (strcmp(event->comm.comm, comm2) == 0) {
+ CHECK_NOT_NULL__(evsel = evlist__event2evsel(evlist, event));
CHECK__(evsel__parse_sample(evsel, event, &sample));
comm2_time = sample.time;
}
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index f8ad581ea247..d2c9b09ddb48 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -9,6 +9,7 @@
#include <linux/bpf.h>
#include <bpf/libbpf.h>
#include <bpf/bpf.h>
+#include <linux/filter.h>
#include <linux/err.h>
#include <linux/kernel.h>
#include <linux/string.h>
@@ -49,6 +50,7 @@ struct bpf_prog_priv {
struct bpf_insn *insns_buf;
int nr_types;
int *type_mapping;
+ int *prologue_fds;
};
struct bpf_perf_object {
@@ -56,6 +58,11 @@ struct bpf_perf_object {
struct bpf_object *obj;
};
+struct bpf_preproc_result {
+ struct bpf_insn *new_insn_ptr;
+ int new_insn_cnt;
+};
+
static LIST_HEAD(bpf_objects_list);
static struct hashmap *bpf_program_hash;
static struct hashmap *bpf_map_hash;
@@ -63,20 +70,16 @@ static struct hashmap *bpf_map_hash;
static struct bpf_perf_object *
bpf_perf_object__next(struct bpf_perf_object *prev)
{
- struct bpf_perf_object *next;
-
- if (!prev)
- next = list_first_entry(&bpf_objects_list,
- struct bpf_perf_object,
- list);
- else
- next = list_next_entry(prev, list);
+ if (!prev) {
+ if (list_empty(&bpf_objects_list))
+ return NULL;
- /* Empty list is noticed here so don't need checking on entry. */
- if (&next->list == &bpf_objects_list)
+ return list_first_entry(&bpf_objects_list, struct bpf_perf_object, list);
+ }
+ if (list_is_last(&prev->list, &bpf_objects_list))
return NULL;
- return next;
+ return list_next_entry(prev, list);
}
#define bpf_perf_object__for_each(perf_obj, tmp) \
@@ -86,6 +89,7 @@ bpf_perf_object__next(struct bpf_perf_object *prev)
(perf_obj) = (tmp), (tmp) = bpf_perf_object__next(tmp))
static bool libbpf_initialized;
+static int libbpf_sec_handler;
static int bpf_perf_object__add(struct bpf_object *obj)
{
@@ -99,12 +103,76 @@ static int bpf_perf_object__add(struct bpf_object *obj)
return perf_obj ? 0 : -ENOMEM;
}
+static void *program_priv(const struct bpf_program *prog)
+{
+ void *priv;
+
+ if (IS_ERR_OR_NULL(bpf_program_hash))
+ return NULL;
+ if (!hashmap__find(bpf_program_hash, prog, &priv))
+ return NULL;
+ return priv;
+}
+
+static struct bpf_insn prologue_init_insn[] = {
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+};
+
+static int libbpf_prog_prepare_load_fn(struct bpf_program *prog,
+ struct bpf_prog_load_opts *opts __maybe_unused,
+ long cookie __maybe_unused)
+{
+ size_t init_size_cnt = ARRAY_SIZE(prologue_init_insn);
+ size_t orig_insn_cnt, insn_cnt, init_size, orig_size;
+ struct bpf_prog_priv *priv = program_priv(prog);
+ const struct bpf_insn *orig_insn;
+ struct bpf_insn *insn;
+
+ if (IS_ERR_OR_NULL(priv)) {
+ pr_debug("bpf: failed to get private field\n");
+ return -BPF_LOADER_ERRNO__INTERNAL;
+ }
+
+ if (!priv->need_prologue)
+ return 0;
+
+ /* prepend initialization code to program instructions */
+ orig_insn = bpf_program__insns(prog);
+ orig_insn_cnt = bpf_program__insn_cnt(prog);
+ init_size = init_size_cnt * sizeof(*insn);
+ orig_size = orig_insn_cnt * sizeof(*insn);
+
+ insn_cnt = orig_insn_cnt + init_size_cnt;
+ insn = malloc(insn_cnt * sizeof(*insn));
+ if (!insn)
+ return -ENOMEM;
+
+ memcpy(insn, prologue_init_insn, init_size);
+ memcpy((char *) insn + init_size, orig_insn, orig_size);
+ bpf_program__set_insns(prog, insn, insn_cnt);
+ return 0;
+}
+
static int libbpf_init(void)
{
+ LIBBPF_OPTS(libbpf_prog_handler_opts, handler_opts,
+ .prog_prepare_load_fn = libbpf_prog_prepare_load_fn,
+ );
+
if (libbpf_initialized)
return 0;
libbpf_set_print(libbpf_perf_print);
+ libbpf_sec_handler = libbpf_register_prog_handler(NULL, BPF_PROG_TYPE_KPROBE,
+ 0, &handler_opts);
+ if (libbpf_sec_handler < 0) {
+ pr_debug("bpf: failed to register libbpf section handler: %d\n",
+ libbpf_sec_handler);
+ return -BPF_LOADER_ERRNO__INTERNAL;
+ }
libbpf_initialized = true;
return 0;
}
@@ -188,14 +256,31 @@ struct bpf_object *bpf__prepare_load(const char *filename, bool source)
return obj;
}
+static void close_prologue_programs(struct bpf_prog_priv *priv)
+{
+ struct perf_probe_event *pev;
+ int i, fd;
+
+ if (!priv->need_prologue)
+ return;
+ pev = &priv->pev;
+ for (i = 0; i < pev->ntevs; i++) {
+ fd = priv->prologue_fds[i];
+ if (fd != -1)
+ close(fd);
+ }
+}
+
static void
clear_prog_priv(const struct bpf_program *prog __maybe_unused,
void *_priv)
{
struct bpf_prog_priv *priv = _priv;
+ close_prologue_programs(priv);
cleanup_perf_probe_events(&priv->pev, 1);
zfree(&priv->insns_buf);
+ zfree(&priv->prologue_fds);
zfree(&priv->type_mapping);
zfree(&priv->sys_name);
zfree(&priv->evt_name);
@@ -243,17 +328,6 @@ static bool ptr_equal(const void *key1, const void *key2,
return key1 == key2;
}
-static void *program_priv(const struct bpf_program *prog)
-{
- void *priv;
-
- if (IS_ERR_OR_NULL(bpf_program_hash))
- return NULL;
- if (!hashmap__find(bpf_program_hash, prog, &priv))
- return NULL;
- return priv;
-}
-
static int program_set_priv(struct bpf_program *prog, void *priv)
{
void *old_priv;
@@ -558,8 +632,8 @@ static int bpf__prepare_probe(void)
static int
preproc_gen_prologue(struct bpf_program *prog, int n,
- struct bpf_insn *orig_insns, int orig_insns_cnt,
- struct bpf_prog_prep_result *res)
+ const struct bpf_insn *orig_insns, int orig_insns_cnt,
+ struct bpf_preproc_result *res)
{
struct bpf_prog_priv *priv = program_priv(prog);
struct probe_trace_event *tev;
@@ -607,7 +681,6 @@ preproc_gen_prologue(struct bpf_program *prog, int n,
res->new_insn_ptr = buf;
res->new_insn_cnt = prologue_cnt + orig_insns_cnt;
- res->pfd = NULL;
return 0;
errout:
@@ -715,7 +788,7 @@ static int hook_load_preprocessor(struct bpf_program *prog)
struct bpf_prog_priv *priv = program_priv(prog);
struct perf_probe_event *pev;
bool need_prologue = false;
- int err, i;
+ int i;
if (IS_ERR_OR_NULL(priv)) {
pr_debug("Internal error when hook preprocessor\n");
@@ -753,6 +826,13 @@ static int hook_load_preprocessor(struct bpf_program *prog)
return -ENOMEM;
}
+ priv->prologue_fds = malloc(sizeof(int) * pev->ntevs);
+ if (!priv->prologue_fds) {
+ pr_debug("Not enough memory: alloc prologue fds failed\n");
+ return -ENOMEM;
+ }
+ memset(priv->prologue_fds, -1, sizeof(int) * pev->ntevs);
+
priv->type_mapping = malloc(sizeof(int) * pev->ntevs);
if (!priv->type_mapping) {
pr_debug("Not enough memory: alloc type_mapping failed\n");
@@ -761,13 +841,7 @@ static int hook_load_preprocessor(struct bpf_program *prog)
memset(priv->type_mapping, -1,
sizeof(int) * pev->ntevs);
- err = map_prologue(pev, priv->type_mapping, &priv->nr_types);
- if (err)
- return err;
-
- err = bpf_program__set_prep(prog, priv->nr_types,
- preproc_gen_prologue);
- return err;
+ return map_prologue(pev, priv->type_mapping, &priv->nr_types);
}
int bpf__probe(struct bpf_object *obj)
@@ -874,6 +948,77 @@ int bpf__unprobe(struct bpf_object *obj)
return ret;
}
+static int bpf_object__load_prologue(struct bpf_object *obj)
+{
+ int init_cnt = ARRAY_SIZE(prologue_init_insn);
+ const struct bpf_insn *orig_insns;
+ struct bpf_preproc_result res;
+ struct perf_probe_event *pev;
+ struct bpf_program *prog;
+ int orig_insns_cnt;
+
+ bpf_object__for_each_program(prog, obj) {
+ struct bpf_prog_priv *priv = program_priv(prog);
+ int err, i, fd;
+
+ if (IS_ERR_OR_NULL(priv)) {
+ pr_debug("bpf: failed to get private field\n");
+ return -BPF_LOADER_ERRNO__INTERNAL;
+ }
+
+ if (!priv->need_prologue)
+ continue;
+
+ /*
+ * For each program that needs prologue we do following:
+ *
+ * - take its current instructions and use them
+ * to generate the new code with prologue
+ * - load new instructions with bpf_prog_load
+ * and keep the fd in prologue_fds
+ * - new fd will be used in bpf__foreach_event
+ * to connect this program with perf evsel
+ */
+ orig_insns = bpf_program__insns(prog);
+ orig_insns_cnt = bpf_program__insn_cnt(prog);
+
+ pev = &priv->pev;
+ for (i = 0; i < pev->ntevs; i++) {
+ /*
+ * Skipping artificall prologue_init_insn instructions
+ * (init_cnt), so the prologue can be generated instead
+ * of them.
+ */
+ err = preproc_gen_prologue(prog, i,
+ orig_insns + init_cnt,
+ orig_insns_cnt - init_cnt,
+ &res);
+ if (err)
+ return err;
+
+ fd = bpf_prog_load(bpf_program__get_type(prog),
+ bpf_program__name(prog), "GPL",
+ res.new_insn_ptr,
+ res.new_insn_cnt, NULL);
+ if (fd < 0) {
+ char bf[128];
+
+ libbpf_strerror(-errno, bf, sizeof(bf));
+ pr_debug("bpf: load objects with prologue failed: err=%d: (%s)\n",
+ -errno, bf);
+ return -errno;
+ }
+ priv->prologue_fds[i] = fd;
+ }
+ /*
+ * We no longer need the original program,
+ * we can unload it.
+ */
+ bpf_program__unload(prog);
+ }
+ return 0;
+}
+
int bpf__load(struct bpf_object *obj)
{
int err;
@@ -885,7 +1030,7 @@ int bpf__load(struct bpf_object *obj)
pr_debug("bpf: load objects failed: err=%d: (%s)\n", err, bf);
return err;
}
- return 0;
+ return bpf_object__load_prologue(obj);
}
int bpf__foreach_event(struct bpf_object *obj,
@@ -920,13 +1065,10 @@ int bpf__foreach_event(struct bpf_object *obj,
for (i = 0; i < pev->ntevs; i++) {
tev = &pev->tevs[i];
- if (priv->need_prologue) {
- int type = priv->type_mapping[i];
-
- fd = bpf_program__nth_fd(prog, type);
- } else {
+ if (priv->need_prologue)
+ fd = priv->prologue_fds[i];
+ else
fd = bpf_program__fd(prog);
- }
if (fd < 0) {
pr_debug("bpf: failed to get file descriptor\n");
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index ecd377938eea..b3be5b1d9dbb 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -233,6 +233,33 @@ Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
return NULL;
}
+static int elf_read_program_header(Elf *elf, u64 vaddr, GElf_Phdr *phdr)
+{
+ size_t i, phdrnum;
+ u64 sz;
+
+ if (elf_getphdrnum(elf, &phdrnum))
+ return -1;
+
+ for (i = 0; i < phdrnum; i++) {
+ if (gelf_getphdr(elf, i, phdr) == NULL)
+ return -1;
+
+ if (phdr->p_type != PT_LOAD)
+ continue;
+
+ sz = max(phdr->p_memsz, phdr->p_filesz);
+ if (!sz)
+ continue;
+
+ if (vaddr >= phdr->p_vaddr && (vaddr < phdr->p_vaddr + sz))
+ return 0;
+ }
+
+ /* Not found any valid program header */
+ return -1;
+}
+
static bool want_demangle(bool is_kernel_sym)
{
return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle;
@@ -1209,6 +1236,7 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss,
sym.st_value);
used_opd = true;
}
+
/*
* When loading symbols in a data mapping, ABS symbols (which
* has a value of SHN_ABS in its st_shndx) failed at
@@ -1227,6 +1255,17 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss,
gelf_getshdr(sec, &shdr);
+ /*
+ * If the attribute bit SHF_ALLOC is not set, the section
+ * doesn't occupy memory during process execution.
+ * E.g. ".gnu.warning.*" section is used by linker to generate
+ * warnings when calling deprecated functions, the symbols in
+ * the section aren't loaded to memory during process execution,
+ * so skip them.
+ */
+ if (!(shdr.sh_flags & SHF_ALLOC))
+ continue;
+
secstrs = secstrs_sym;
/*
@@ -1262,11 +1301,20 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss,
goto out_elf_end;
} else if ((used_opd && runtime_ss->adjust_symbols) ||
(!used_opd && syms_ss->adjust_symbols)) {
+ GElf_Phdr phdr;
+
+ if (elf_read_program_header(syms_ss->elf,
+ (u64)sym.st_value, &phdr)) {
+ pr_warning("%s: failed to find program header for "
+ "symbol: %s st_value: %#" PRIx64 "\n",
+ __func__, elf_name, (u64)sym.st_value);
+ continue;
+ }
pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " "
- "sh_addr: %#" PRIx64 " sh_offset: %#" PRIx64 "\n", __func__,
- (u64)sym.st_value, (u64)shdr.sh_addr,
- (u64)shdr.sh_offset);
- sym.st_value -= shdr.sh_addr - shdr.sh_offset;
+ "p_vaddr: %#" PRIx64 " p_offset: %#" PRIx64 "\n",
+ __func__, (u64)sym.st_value, (u64)phdr.p_vaddr,
+ (u64)phdr.p_offset);
+ sym.st_value -= phdr.p_vaddr - phdr.p_offset;
}
demangled = demangle_sym(dso, kmodule, elf_name);
diff --git a/tools/power/cpupower/debug/i386/dump_psb.c b/tools/power/cpupower/debug/i386/dump_psb.c
index 2c768cf70128..6fb81b42ea61 100644
--- a/tools/power/cpupower/debug/i386/dump_psb.c
+++ b/tools/power/cpupower/debug/i386/dump_psb.c
@@ -1,7 +1,5 @@
-/*
- * dump_psb. (c) 2004, Dave Jones, Red Hat Inc.
- * Licensed under the GPL v2.
- */
+// SPDX-License-Identifier: GPL-2.0-only
+// dump_psb. (c) 2004, Dave Jones, Red Hat Inc.
#include <fcntl.h>
#include <stdio.h>
diff --git a/tools/power/pm-graph/README b/tools/power/pm-graph/README
index da468bd510ca..e6020c0d59ec 100644
--- a/tools/power/pm-graph/README
+++ b/tools/power/pm-graph/README
@@ -6,7 +6,7 @@
|_| |___/ |_|
pm-graph: suspend/resume/boot timing analysis tools
- Version: 5.8
+ Version: 5.9
Author: Todd Brandt <todd.e.brandt@intel.com>
Home Page: https://01.org/pm-graph
@@ -97,8 +97,8 @@
(kernel/pre-3.15/enable_trace_events_suspend_resume.patch)
(kernel/pre-3.15/enable_trace_events_device_pm_callback.patch)
- If you're using a kernel older than 3.15.0, the following
- additional kernel parameters are required:
+ If you're using bootgraph, or sleepgraph with a kernel older than 3.15.0,
+ the following additional kernel parameters are required:
(e.g. in file /etc/default/grub)
GRUB_CMDLINE_LINUX_DEFAULT="... initcall_debug log_buf_len=32M ..."
diff --git a/tools/power/pm-graph/bootgraph.py b/tools/power/pm-graph/bootgraph.py
index 2823cd3122f7..f96f50e0c336 100755
--- a/tools/power/pm-graph/bootgraph.py
+++ b/tools/power/pm-graph/bootgraph.py
@@ -69,22 +69,24 @@ class SystemValues(aslib.SystemValues):
bootloader = 'grub'
blexec = []
def __init__(self):
- self.hostname = platform.node()
+ self.kernel, self.hostname = 'unknown', platform.node()
self.testtime = datetime.now().strftime('%Y-%m-%d_%H:%M:%S')
if os.path.exists('/proc/version'):
fp = open('/proc/version', 'r')
- val = fp.read().strip()
+ self.kernel = self.kernelVersion(fp.read().strip())
fp.close()
- self.kernel = self.kernelVersion(val)
- else:
- self.kernel = 'unknown'
self.testdir = datetime.now().strftime('boot-%y%m%d-%H%M%S')
def kernelVersion(self, msg):
- return msg.split()[2]
+ m = re.match('^[Ll]inux *[Vv]ersion *(?P<v>\S*) .*', msg)
+ if m:
+ return m.group('v')
+ return 'unknown'
def checkFtraceKernelVersion(self):
- val = tuple(map(int, self.kernel.split('-')[0].split('.')))
- if val >= (4, 10, 0):
- return True
+ m = re.match('^(?P<x>[0-9]*)\.(?P<y>[0-9]*)\.(?P<z>[0-9]*).*', self.kernel)
+ if m:
+ val = tuple(map(int, m.groups()))
+ if val >= (4, 10, 0):
+ return True
return False
def kernelParams(self):
cmdline = 'initcall_debug log_buf_len=32M'
diff --git a/tools/power/pm-graph/config/custom-timeline-functions.cfg b/tools/power/pm-graph/config/custom-timeline-functions.cfg
index 962e5768681c..4f80ad7d7275 100644
--- a/tools/power/pm-graph/config/custom-timeline-functions.cfg
+++ b/tools/power/pm-graph/config/custom-timeline-functions.cfg
@@ -125,7 +125,7 @@ acpi_suspend_begin:
suspend_console:
acpi_pm_prepare:
syscore_suspend:
-arch_thaw_secondary_cpus_end:
+arch_enable_nonboot_cpus_end:
syscore_resume:
acpi_pm_finish:
resume_console:
diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py
index ffd50953a024..33981adcdd68 100755
--- a/tools/power/pm-graph/sleepgraph.py
+++ b/tools/power/pm-graph/sleepgraph.py
@@ -66,8 +66,13 @@ from threading import Thread
from subprocess import call, Popen, PIPE
import base64
+debugtiming = False
+mystarttime = time.time()
def pprint(msg):
- print(msg)
+ if debugtiming:
+ print('[%09.3f] %s' % (time.time()-mystarttime, msg))
+ else:
+ print(msg)
sys.stdout.flush()
def ascii(text):
@@ -81,13 +86,14 @@ def ascii(text):
# store system values and test parameters
class SystemValues:
title = 'SleepGraph'
- version = '5.8'
+ version = '5.9'
ansi = False
rs = 0
display = ''
gzip = False
sync = False
wifi = False
+ netfix = False
verbose = False
testlog = True
dmesglog = True
@@ -108,6 +114,7 @@ class SystemValues:
cpucount = 0
memtotal = 204800
memfree = 204800
+ osversion = ''
srgap = 0
cgexp = False
testdir = ''
@@ -116,6 +123,7 @@ class SystemValues:
fpdtpath = '/sys/firmware/acpi/tables/FPDT'
epath = '/sys/kernel/debug/tracing/events/power/'
pmdpath = '/sys/power/pm_debug_messages'
+ s0ixpath = '/sys/module/intel_pmc_core/parameters/warn_on_s0ix_failures'
acpipath='/sys/module/acpi/parameters/debug_level'
traceevents = [
'suspend_resume',
@@ -156,6 +164,7 @@ class SystemValues:
ftop = False
usetraceevents = False
usetracemarkers = True
+ useftrace = True
usekprobes = True
usedevsrc = False
useprocmon = False
@@ -279,10 +288,16 @@ class SystemValues:
'intel_fbdev_set_suspend': {},
}
infocmds = [
+ [0, 'sysinfo', 'uname', '-a'],
+ [0, 'cpuinfo', 'head', '-7', '/proc/cpuinfo'],
[0, 'kparams', 'cat', '/proc/cmdline'],
[0, 'mcelog', 'mcelog'],
[0, 'pcidevices', 'lspci', '-tv'],
- [0, 'usbdevices', 'lsusb', '-t'],
+ [0, 'usbdevices', 'lsusb', '-tv'],
+ [0, 'acpidevices', 'sh', '-c', 'ls -l /sys/bus/acpi/devices/*/physical_node'],
+ [0, 's0ix_require', 'cat', '/sys/kernel/debug/pmc_core/substate_requirements'],
+ [0, 's0ix_debug', 'cat', '/sys/kernel/debug/pmc_core/slp_s0_debug_status'],
+ [1, 's0ix_residency', 'cat', '/sys/kernel/debug/pmc_core/slp_s0_residency_usec'],
[1, 'interrupts', 'cat', '/proc/interrupts'],
[1, 'wakeups', 'cat', '/sys/kernel/debug/wakeup_sources'],
[2, 'gpecounts', 'sh', '-c', 'grep -v invalid /sys/firmware/acpi/interrupts/*'],
@@ -358,8 +373,19 @@ class SystemValues:
self.outputResult({'error':msg})
sys.exit(1)
return False
- def usable(self, file):
- return (os.path.exists(file) and os.path.getsize(file) > 0)
+ def usable(self, file, ishtml=False):
+ if not os.path.exists(file) or os.path.getsize(file) < 1:
+ return False
+ if ishtml:
+ try:
+ fp = open(file, 'r')
+ res = fp.read(1000)
+ fp.close()
+ except:
+ return False
+ if '<html>' not in res:
+ return False
+ return True
def getExec(self, cmd):
try:
fp = Popen(['which', cmd], stdout=PIPE, stderr=PIPE).stdout
@@ -413,12 +439,16 @@ class SystemValues:
r = info['bios-release-date'] if 'bios-release-date' in info else ''
self.sysstamp = '# sysinfo | man:%s | plat:%s | cpu:%s | bios:%s | biosdate:%s | numcpu:%d | memsz:%d | memfr:%d' % \
(m, p, c, b, r, self.cpucount, self.memtotal, self.memfree)
+ if self.osversion:
+ self.sysstamp += ' | os:%s' % self.osversion
def printSystemInfo(self, fatal=False):
self.rootCheck(True)
out = dmidecode(self.mempath, fatal)
if len(out) < 1:
return
fmt = '%-24s: %s'
+ if self.osversion:
+ print(fmt % ('os-version', self.osversion))
for name in sorted(out):
print(fmt % (name, out[name]))
print(fmt % ('cpucount', ('%d' % self.cpucount)))
@@ -426,20 +456,25 @@ class SystemValues:
print(fmt % ('memfree', ('%d kB' % self.memfree)))
def cpuInfo(self):
self.cpucount = 0
- fp = open('/proc/cpuinfo', 'r')
- for line in fp:
- if re.match('^processor[ \t]*:[ \t]*[0-9]*', line):
- self.cpucount += 1
- fp.close()
- fp = open('/proc/meminfo', 'r')
- for line in fp:
- m = re.match('^MemTotal:[ \t]*(?P<sz>[0-9]*) *kB', line)
- if m:
- self.memtotal = int(m.group('sz'))
- m = re.match('^MemFree:[ \t]*(?P<sz>[0-9]*) *kB', line)
- if m:
- self.memfree = int(m.group('sz'))
- fp.close()
+ if os.path.exists('/proc/cpuinfo'):
+ with open('/proc/cpuinfo', 'r') as fp:
+ for line in fp:
+ if re.match('^processor[ \t]*:[ \t]*[0-9]*', line):
+ self.cpucount += 1
+ if os.path.exists('/proc/meminfo'):
+ with open('/proc/meminfo', 'r') as fp:
+ for line in fp:
+ m = re.match('^MemTotal:[ \t]*(?P<sz>[0-9]*) *kB', line)
+ if m:
+ self.memtotal = int(m.group('sz'))
+ m = re.match('^MemFree:[ \t]*(?P<sz>[0-9]*) *kB', line)
+ if m:
+ self.memfree = int(m.group('sz'))
+ if os.path.exists('/etc/os-release'):
+ with open('/etc/os-release', 'r') as fp:
+ for line in fp:
+ if line.startswith('PRETTY_NAME='):
+ self.osversion = line[12:].strip().replace('"', '')
def initTestOutput(self, name):
self.prefix = self.hostname
v = open('/proc/version', 'r').read().strip()
@@ -698,6 +733,8 @@ class SystemValues:
return False
return True
def fsetVal(self, val, path):
+ if not self.useftrace:
+ return False
return self.setVal(val, self.tpath+path)
def getVal(self, file):
res = ''
@@ -711,9 +748,11 @@ class SystemValues:
pass
return res
def fgetVal(self, path):
+ if not self.useftrace:
+ return ''
return self.getVal(self.tpath+path)
def cleanupFtrace(self):
- if(self.usecallgraph or self.usetraceevents or self.usedevsrc):
+ if self.useftrace:
self.fsetVal('0', 'events/kprobes/enable')
self.fsetVal('', 'kprobe_events')
self.fsetVal('1024', 'buffer_size_kb')
@@ -734,13 +773,14 @@ class SystemValues:
return True
return False
def initFtrace(self, quiet=False):
+ if not self.useftrace:
+ return
if not quiet:
sysvals.printSystemInfo(False)
pprint('INITIALIZING FTRACE...')
# turn trace off
self.fsetVal('0', 'tracing_on')
self.cleanupFtrace()
- self.testVal(self.pmdpath, 'basic', '1')
# set the trace clock to global
self.fsetVal('global', 'trace_clock')
self.fsetVal('nop', 'current_tracer')
@@ -766,6 +806,10 @@ class SystemValues:
# set trace type
self.fsetVal('function_graph', 'current_tracer')
self.fsetVal('', 'set_ftrace_filter')
+ # temporary hack to fix https://bugzilla.kernel.org/show_bug.cgi?id=212761
+ fp = open(self.tpath+'set_ftrace_notrace', 'w')
+ fp.write('native_queued_spin_lock_slowpath\ndev_driver_string')
+ fp.close()
# set trace format options
self.fsetVal('print-parent', 'trace_options')
self.fsetVal('funcgraph-abstime', 'trace_options')
@@ -846,6 +890,8 @@ class SystemValues:
fp.write('# turbostat %s\n' % test['turbo'])
if 'wifi' in test:
fp.write('# wifi %s\n' % test['wifi'])
+ if 'netfix' in test:
+ fp.write('# netfix %s\n' % test['netfix'])
if test['error'] or len(testdata) > 1:
fp.write('# enter_sleep_error %s\n' % test['error'])
return fp
@@ -865,6 +911,8 @@ class SystemValues:
fp.write('error%s: %s\n' % (n, testdata['error']))
else:
fp.write('result%s: pass\n' % n)
+ if 'mode' in testdata:
+ fp.write('mode%s: %s\n' % (n, testdata['mode']))
for v in ['suspend', 'resume', 'boot', 'lastinit']:
if v in testdata:
fp.write('%s%s: %.3f\n' % (v, n, testdata[v]))
@@ -901,6 +949,8 @@ class SystemValues:
fp.write(text)
fp.close()
def dlog(self, text):
+ if not self.dmesgfile:
+ return
self.putlog(self.dmesgfile, '# %s\n' % text)
def flog(self, text):
self.putlog(self.ftracefile, text)
@@ -954,34 +1004,31 @@ class SystemValues:
dirname = props[dev].syspath
if not dirname or not os.path.exists(dirname):
continue
- with open(dirname+'/power/async') as fp:
- text = fp.read()
- props[dev].isasync = False
- if 'enabled' in text:
+ props[dev].isasync = False
+ if os.path.exists(dirname+'/power/async'):
+ fp = open(dirname+'/power/async')
+ if 'enabled' in fp.read():
props[dev].isasync = True
+ fp.close()
fields = os.listdir(dirname)
- if 'product' in fields:
- with open(dirname+'/product', 'rb') as fp:
- props[dev].altname = ascii(fp.read())
- elif 'name' in fields:
- with open(dirname+'/name', 'rb') as fp:
- props[dev].altname = ascii(fp.read())
- elif 'model' in fields:
- with open(dirname+'/model', 'rb') as fp:
- props[dev].altname = ascii(fp.read())
- elif 'description' in fields:
- with open(dirname+'/description', 'rb') as fp:
- props[dev].altname = ascii(fp.read())
- elif 'id' in fields:
- with open(dirname+'/id', 'rb') as fp:
- props[dev].altname = ascii(fp.read())
- elif 'idVendor' in fields and 'idProduct' in fields:
- idv, idp = '', ''
- with open(dirname+'/idVendor', 'rb') as fp:
- idv = ascii(fp.read()).strip()
- with open(dirname+'/idProduct', 'rb') as fp:
- idp = ascii(fp.read()).strip()
- props[dev].altname = '%s:%s' % (idv, idp)
+ for file in ['product', 'name', 'model', 'description', 'id', 'idVendor']:
+ if file not in fields:
+ continue
+ try:
+ with open(os.path.join(dirname, file), 'rb') as fp:
+ props[dev].altname = ascii(fp.read())
+ except:
+ continue
+ if file == 'idVendor':
+ idv, idp = props[dev].altname.strip(), ''
+ try:
+ with open(os.path.join(dirname, 'idProduct'), 'rb') as fp:
+ idp = ascii(fp.read()).strip()
+ except:
+ props[dev].altname = ''
+ break
+ props[dev].altname = '%s:%s' % (idv, idp)
+ break
if props[dev].altname:
out = props[dev].altname.strip().replace('\n', ' ')\
.replace(',', ' ').replace(';', ' ')
@@ -1047,7 +1094,7 @@ class SystemValues:
self.cmd1[name] = self.dictify(info, delta)
elif not debug and delta and name in self.cmd1:
before, after = self.cmd1[name], self.dictify(info, delta)
- dinfo = ('\t%s\n' % before['@']) if '@' in before else ''
+ dinfo = ('\t%s\n' % before['@']) if '@' in before and len(before) > 1 else ''
prefix = self.commonPrefix(list(before.keys()))
for key in sorted(before):
if key in after and before[key] != after[key]:
@@ -1128,6 +1175,22 @@ class SystemValues:
val = valline[idx]
out.append('%s=%s' % (key, val))
return '|'.join(out)
+ def netfixon(self, net='both'):
+ cmd = self.getExec('netfix')
+ if not cmd:
+ return ''
+ fp = Popen([cmd, '-s', net, 'on'], stdout=PIPE, stderr=PIPE).stdout
+ out = ascii(fp.read()).strip()
+ fp.close()
+ return out
+ def wifiRepair(self):
+ out = self.netfixon('wifi')
+ if not out or 'error' in out.lower():
+ return ''
+ m = re.match('WIFI \S* ONLINE (?P<action>\S*)', out)
+ if not m:
+ return 'dead'
+ return m.group('action')
def wifiDetails(self, dev):
try:
info = open('/sys/class/net/%s/device/uevent' % dev, 'r').read().strip()
@@ -1144,12 +1207,12 @@ class SystemValues:
except:
return ''
for line in reversed(w.split('\n')):
- m = re.match(' *(?P<dev>.*): (?P<stat>[0-9a-f]*) .*', w.split('\n')[-1])
+ m = re.match(' *(?P<dev>.*): (?P<stat>[0-9a-f]*) .*', line)
if not m or (dev and dev != m.group('dev')):
continue
return m.group('dev')
return ''
- def pollWifi(self, dev, timeout=60):
+ def pollWifi(self, dev, timeout=10):
start = time.time()
while (time.time() - start) < timeout:
w = self.checkWifi(dev)
@@ -1157,6 +1220,11 @@ class SystemValues:
return '%s reconnected %.2f' % \
(self.wifiDetails(dev), max(0, time.time() - start))
time.sleep(0.01)
+ if self.netfix:
+ res = self.wifiRepair()
+ if res:
+ timeout = max(0, time.time() - start)
+ return '%s %s %d' % (self.wifiDetails(dev), res, timeout)
return '%s timeout %d' % (self.wifiDetails(dev), timeout)
def errorSummary(self, errinfo, msg):
found = False
@@ -1283,10 +1351,10 @@ sysvals = SystemValues()
switchvalues = ['enable', 'disable', 'on', 'off', 'true', 'false', '1', '0']
switchoff = ['disable', 'off', 'false', '0']
suspendmodename = {
- 'freeze': 'Freeze (S0)',
- 'standby': 'Standby (S1)',
- 'mem': 'Suspend (S3)',
- 'disk': 'Hibernate (S4)'
+ 'standby': 'standby (S1)',
+ 'freeze': 'freeze (S2idle)',
+ 'mem': 'suspend (S3)',
+ 'disk': 'hibernate (S4)'
}
# Class: DevProps
@@ -1376,6 +1444,7 @@ class Data:
'INVALID' : r'(?i).*\bINVALID\b.*',
'CRASH' : r'(?i).*\bCRASHED\b.*',
'TIMEOUT' : r'(?i).*\bTIMEOUT\b.*',
+ 'ABORT' : r'(?i).*\bABORT\b.*',
'IRQ' : r'.*\bgenirq: .*',
'TASKFAIL': r'.*Freezing of tasks *.*',
'ACPI' : r'.*\bACPI *(?P<b>[A-Za-z]*) *Error[: ].*',
@@ -1724,9 +1793,9 @@ class Data:
if 'waking' in self.dmesg[lp]:
tCnt = self.dmesg[lp]['waking'][0]
if self.dmesg[lp]['waking'][1] >= 0.001:
- tTry = '-%.0f' % (round(self.dmesg[lp]['waking'][1] * 1000))
+ tTry = '%.0f' % (round(self.dmesg[lp]['waking'][1] * 1000))
else:
- tTry = '-%.3f' % (self.dmesg[lp]['waking'][1] * 1000)
+ tTry = '%.3f' % (self.dmesg[lp]['waking'][1] * 1000)
text = '%.0f (%s ms waking %d times)' % (tL * 1000, tTry, tCnt)
else:
text = '%.0f' % (tL * 1000)
@@ -2107,6 +2176,30 @@ class Data:
# set resume complete to end at end marker
if 'resume_complete' in dm:
dm['resume_complete']['end'] = time
+ def initcall_debug_call(self, line, quick=False):
+ m = re.match('.*(\[ *)(?P<t>[0-9\.]*)(\]) .* (?P<f>.*)\: '+\
+ 'PM: *calling .* @ (?P<n>.*), parent: (?P<p>.*)', line)
+ if not m:
+ m = re.match('.*(\[ *)(?P<t>[0-9\.]*)(\]) .* (?P<f>.*)\: '+\
+ 'calling .* @ (?P<n>.*), parent: (?P<p>.*)', line)
+ if not m:
+ m = re.match('.*(\[ *)(?P<t>[0-9\.]*)(\]) calling '+\
+ '(?P<f>.*)\+ @ (?P<n>.*), parent: (?P<p>.*)', line)
+ if m:
+ return True if quick else m.group('t', 'f', 'n', 'p')
+ return False if quick else ('', '', '', '')
+ def initcall_debug_return(self, line, quick=False):
+ m = re.match('.*(\[ *)(?P<t>[0-9\.]*)(\]) .* (?P<f>.*)\: PM: '+\
+ '.* returned (?P<r>[0-9]*) after (?P<dt>[0-9]*) usecs', line)
+ if not m:
+ m = re.match('.*(\[ *)(?P<t>[0-9\.]*)(\]) .* (?P<f>.*)\: '+\
+ '.* returned (?P<r>[0-9]*) after (?P<dt>[0-9]*) usecs', line)
+ if not m:
+ m = re.match('.*(\[ *)(?P<t>[0-9\.]*)(\]) call '+\
+ '(?P<f>.*)\+ returned .* after (?P<dt>.*) usecs', line)
+ if m:
+ return True if quick else m.group('t', 'f', 'dt')
+ return False if quick else ('', '', '')
def debugPrint(self):
for p in self.sortedPhases():
list = self.dmesg[p]['list']
@@ -2880,10 +2973,11 @@ class TestProps:
cmdlinefmt = '^# command \| (?P<cmd>.*)'
kparamsfmt = '^# kparams \| (?P<kp>.*)'
devpropfmt = '# Device Properties: .*'
- pinfofmt = '# platform-(?P<val>[a-z,A-Z,0-9]*): (?P<info>.*)'
+ pinfofmt = '# platform-(?P<val>[a-z,A-Z,0-9,_]*): (?P<info>.*)'
tracertypefmt = '# tracer: (?P<t>.*)'
firmwarefmt = '# fwsuspend (?P<s>[0-9]*) fwresume (?P<r>[0-9]*)$'
procexecfmt = 'ps - (?P<ps>.*)$'
+ procmultifmt = '@(?P<n>[0-9]*)\|(?P<ps>.*)$'
ftrace_line_fmt_fg = \
'^ *(?P<time>[0-9\.]*) *\| *(?P<cpu>[0-9]*)\)'+\
' *(?P<proc>.*)-(?P<pid>[0-9]*) *\|'+\
@@ -2893,6 +2987,9 @@ class TestProps:
'(?P<flags>\S*) *(?P<time>[0-9\.]*): *'+\
'(?P<msg>.*)'
machinesuspend = 'machine_suspend\[.*'
+ multiproclist = dict()
+ multiproctime = 0.0
+ multiproccnt = 0
def __init__(self):
self.stamp = ''
self.sysinfo = ''
@@ -3063,6 +3160,7 @@ class TestRun:
self.ttemp = dict()
class ProcessMonitor:
+ maxchars = 512
def __init__(self):
self.proclist = dict()
self.running = False
@@ -3088,19 +3186,23 @@ class ProcessMonitor:
if ujiff > 0 or kjiff > 0:
running[pid] = ujiff + kjiff
process.wait()
- out = ''
+ out = ['']
for pid in running:
jiffies = running[pid]
val = self.proclist[pid]
- if out:
- out += ','
- out += '%s-%s %d' % (val['name'], pid, jiffies)
- return 'ps - '+out
+ if len(out[-1]) > self.maxchars:
+ out.append('')
+ elif len(out[-1]) > 0:
+ out[-1] += ','
+ out[-1] += '%s-%s %d' % (val['name'], pid, jiffies)
+ if len(out) > 1:
+ for line in out:
+ sysvals.fsetVal('ps - @%d|%s' % (len(out), line), 'trace_marker')
+ else:
+ sysvals.fsetVal('ps - %s' % out[0], 'trace_marker')
def processMonitor(self, tid):
while self.running:
- out = self.procstat()
- if out:
- sysvals.fsetVal(out, 'trace_marker')
+ self.procstat()
def start(self):
self.thread = Thread(target=self.processMonitor, args=(0,))
self.running = True
@@ -3144,7 +3246,6 @@ def doesTraceLogHaveTraceEvents():
# Function: appendIncompleteTraceLog
# Description:
-# [deprecated for kernel 3.15 or newer]
# Adds callgraph data which lacks trace event data. This is only
# for timelines generated from 3.15 or older
# Arguments:
@@ -3246,6 +3347,61 @@ def appendIncompleteTraceLog(testruns):
dev['ftrace'] = cg
break
+# Function: loadTraceLog
+# Description:
+# load the ftrace file into memory and fix up any ordering issues
+# Output:
+# TestProps instance and an array of lines in proper order
+def loadTraceLog():
+ tp, data, lines, trace = TestProps(), dict(), [], []
+ tf = sysvals.openlog(sysvals.ftracefile, 'r')
+ for line in tf:
+ # remove any latent carriage returns
+ line = line.replace('\r\n', '')
+ if tp.stampInfo(line, sysvals):
+ continue
+ # ignore all other commented lines
+ if line[0] == '#':
+ continue
+ # ftrace line: parse only valid lines
+ m = re.match(tp.ftrace_line_fmt, line)
+ if(not m):
+ continue
+ dur = m.group('dur') if tp.cgformat else 'traceevent'
+ info = (m.group('time'), m.group('proc'), m.group('pid'),
+ m.group('msg'), dur)
+ # group the data by timestamp
+ t = float(info[0])
+ if t in data:
+ data[t].append(info)
+ else:
+ data[t] = [info]
+ # we only care about trace event ordering
+ if (info[3].startswith('suspend_resume:') or \
+ info[3].startswith('tracing_mark_write:')) and t not in trace:
+ trace.append(t)
+ tf.close()
+ for t in sorted(data):
+ first, last, blk = [], [], data[t]
+ if len(blk) > 1 and t in trace:
+ # move certain lines to the start or end of a timestamp block
+ for i in range(len(blk)):
+ if 'SUSPEND START' in blk[i][3]:
+ first.append(i)
+ elif re.match('.* timekeeping_freeze.*begin', blk[i][3]):
+ last.append(i)
+ elif re.match('.* timekeeping_freeze.*end', blk[i][3]):
+ first.append(i)
+ elif 'RESUME COMPLETE' in blk[i][3]:
+ last.append(i)
+ if len(first) == 1 and len(last) == 0:
+ blk.insert(0, blk.pop(first[0]))
+ elif len(last) == 1 and len(first) == 0:
+ blk.append(blk.pop(last[0]))
+ for info in blk:
+ lines.append(info)
+ return (tp, lines)
+
# Function: parseTraceLog
# Description:
# Analyze an ftrace log output file generated from this app during
@@ -3271,32 +3427,12 @@ def parseTraceLog(live=False):
# extract the callgraph and traceevent data
s2idle_enter = hwsus = False
- tp = TestProps()
testruns, testdata = [], []
testrun, data, limbo = 0, 0, True
- tf = sysvals.openlog(sysvals.ftracefile, 'r')
phase = 'suspend_prepare'
- for line in tf:
- # remove any latent carriage returns
- line = line.replace('\r\n', '')
- if tp.stampInfo(line, sysvals):
- continue
- # ignore all other commented lines
- if line[0] == '#':
- continue
- # ftrace line: parse only valid lines
- m = re.match(tp.ftrace_line_fmt, line)
- if(not m):
- continue
+ tp, tf = loadTraceLog()
+ for m_time, m_proc, m_pid, m_msg, m_param3 in tf:
# gather the basic message data from the line
- m_time = m.group('time')
- m_proc = m.group('proc')
- m_pid = m.group('pid')
- m_msg = m.group('msg')
- if(tp.cgformat):
- m_param3 = m.group('dur')
- else:
- m_param3 = 'traceevent'
if(m_time and m_pid and m_msg):
t = FTraceLine(m_time, m_msg, m_param3)
pid = int(m_pid)
@@ -3322,14 +3458,29 @@ def parseTraceLog(live=False):
if t.type == 'tracing_mark_write':
m = re.match(tp.procexecfmt, t.name)
if(m):
- proclist = dict()
- for ps in m.group('ps').split(','):
+ parts, msg = 1, m.group('ps')
+ m = re.match(tp.procmultifmt, msg)
+ if(m):
+ parts, msg = int(m.group('n')), m.group('ps')
+ if tp.multiproccnt == 0:
+ tp.multiproctime = t.time
+ tp.multiproclist = dict()
+ proclist = tp.multiproclist
+ tp.multiproccnt += 1
+ else:
+ proclist = dict()
+ tp.multiproccnt = 0
+ for ps in msg.split(','):
val = ps.split()
- if not val:
+ if not val or len(val) != 2:
continue
name = val[0].replace('--', '-')
proclist[name] = int(val[1])
- data.pstl[t.time] = proclist
+ if parts == 1:
+ data.pstl[t.time] = proclist
+ elif parts == tp.multiproccnt:
+ data.pstl[tp.multiproctime] = proclist
+ tp.multiproccnt = 0
continue
# find the end of resume
if(t.endMarker()):
@@ -3545,7 +3696,6 @@ def parseTraceLog(live=False):
testrun.ftemp[key].append(FTraceCallGraph(pid, sysvals))
if(res == -1):
testrun.ftemp[key][-1].addLine(t)
- tf.close()
if len(testdata) < 1:
sysvals.vprint('WARNING: ftrace start marker is missing')
if data and not data.devicegroups:
@@ -3667,7 +3817,13 @@ def parseTraceLog(live=False):
if p not in data.dmesg:
if not terr:
ph = p if 'machine' in p else lp
- terr = '%s%s failed in %s phase' % (sysvals.suspendmode, tn, ph)
+ if p == 'suspend_machine':
+ sm = sysvals.suspendmode
+ if sm in suspendmodename:
+ sm = suspendmodename[sm]
+ terr = 'test%s did not enter %s power mode' % (tn, sm)
+ else:
+ terr = '%s%s failed in %s phase' % (sysvals.suspendmode, tn, ph)
pprint('TEST%s FAILED: %s' % (tn, terr))
error.append(terr)
if data.tSuspended == 0:
@@ -3708,9 +3864,7 @@ def parseTraceLog(live=False):
# Function: loadKernelLog
# Description:
-# [deprecated for kernel 3.15.0 or newer]
# load the dmesg file into memory and fix up any ordering issues
-# The dmesg filename is taken from sysvals
# Output:
# An array of empty Data objects with only their dmesgtext attributes set
def loadKernelLog():
@@ -3736,7 +3890,8 @@ def loadKernelLog():
if(not m):
continue
msg = m.group("msg")
- if(re.match('PM: Syncing filesystems.*', msg)):
+ if re.match('PM: Syncing filesystems.*', msg) or \
+ re.match('PM: suspend entry.*', msg):
if(data):
testruns.append(data)
data = Data(len(testruns))
@@ -3747,11 +3902,17 @@ def loadKernelLog():
if(m):
sysvals.stamp['kernel'] = m.group('k')
m = re.match('PM: Preparing system for (?P<m>.*) sleep', msg)
- if(m):
+ if not m:
+ m = re.match('PM: Preparing system for sleep \((?P<m>.*)\)', msg)
+ if m:
sysvals.stamp['mode'] = sysvals.suspendmode = m.group('m')
data.dmesgtext.append(line)
lf.close()
+ if sysvals.suspendmode == 's2idle':
+ sysvals.suspendmode = 'freeze'
+ elif sysvals.suspendmode == 'deep':
+ sysvals.suspendmode = 'mem'
if data:
testruns.append(data)
if len(testruns) < 1:
@@ -3762,12 +3923,9 @@ def loadKernelLog():
for data in testruns:
last = ''
for line in data.dmesgtext:
- mc = re.match('.*(\[ *)(?P<t>[0-9\.]*)(\]) calling '+\
- '(?P<f>.*)\+ @ .*, parent: .*', line)
- mr = re.match('.*(\[ *)(?P<t>[0-9\.]*)(\]) call '+\
- '(?P<f>.*)\+ returned .* after (?P<dt>.*) usecs', last)
- if(mc and mr and (mc.group('t') == mr.group('t')) and
- (mc.group('f') == mr.group('f'))):
+ ct, cf, n, p = data.initcall_debug_call(line)
+ rt, rf, l = data.initcall_debug_return(last)
+ if ct and rt and ct == rt and cf == rf:
i = data.dmesgtext.index(last)
j = data.dmesgtext.index(line)
data.dmesgtext[i] = line
@@ -3777,7 +3935,6 @@ def loadKernelLog():
# Function: parseKernelLog
# Description:
-# [deprecated for kernel 3.15.0 or newer]
# Analyse a dmesg log output file generated from this app during
# the execution phase. Create a set of device structures in memory
# for subsequent formatting in the html output file
@@ -3796,30 +3953,30 @@ def parseKernelLog(data):
# dmesg phase match table
dm = {
- 'suspend_prepare': ['PM: Syncing filesystems.*'],
- 'suspend': ['PM: Entering [a-z]* sleep.*', 'Suspending console.*'],
- 'suspend_late': ['PM: suspend of devices complete after.*'],
- 'suspend_noirq': ['PM: late suspend of devices complete after.*'],
- 'suspend_machine': ['PM: noirq suspend of devices complete after.*'],
- 'resume_machine': ['ACPI: Low-level resume complete.*'],
- 'resume_noirq': ['ACPI: Waking up from system sleep state.*'],
- 'resume_early': ['PM: noirq resume of devices complete after.*'],
- 'resume': ['PM: early resume of devices complete after.*'],
- 'resume_complete': ['PM: resume of devices complete after.*'],
+ 'suspend_prepare': ['PM: Syncing filesystems.*', 'PM: suspend entry.*'],
+ 'suspend': ['PM: Entering [a-z]* sleep.*', 'Suspending console.*',
+ 'PM: Suspending system .*'],
+ 'suspend_late': ['PM: suspend of devices complete after.*',
+ 'PM: freeze of devices complete after.*'],
+ 'suspend_noirq': ['PM: late suspend of devices complete after.*',
+ 'PM: late freeze of devices complete after.*'],
+ 'suspend_machine': ['PM: suspend-to-idle',
+ 'PM: noirq suspend of devices complete after.*',
+ 'PM: noirq freeze of devices complete after.*'],
+ 'resume_machine': ['PM: Timekeeping suspended for.*',
+ 'ACPI: Low-level resume complete.*',
+ 'ACPI: resume from mwait',
+ 'Suspended for [0-9\.]* seconds'],
+ 'resume_noirq': ['PM: resume from suspend-to-idle',
+ 'ACPI: Waking up from system sleep state.*'],
+ 'resume_early': ['PM: noirq resume of devices complete after.*',
+ 'PM: noirq restore of devices complete after.*'],
+ 'resume': ['PM: early resume of devices complete after.*',
+ 'PM: early restore of devices complete after.*'],
+ 'resume_complete': ['PM: resume of devices complete after.*',
+ 'PM: restore of devices complete after.*'],
'post_resume': ['.*Restarting tasks \.\.\..*'],
}
- if(sysvals.suspendmode == 'standby'):
- dm['resume_machine'] = ['PM: Restoring platform NVS memory']
- elif(sysvals.suspendmode == 'disk'):
- dm['suspend_late'] = ['PM: freeze of devices complete after.*']
- dm['suspend_noirq'] = ['PM: late freeze of devices complete after.*']
- dm['suspend_machine'] = ['PM: noirq freeze of devices complete after.*']
- dm['resume_machine'] = ['PM: Restoring platform NVS memory']
- dm['resume_early'] = ['PM: noirq restore of devices complete after.*']
- dm['resume'] = ['PM: early restore of devices complete after.*']
- dm['resume_complete'] = ['PM: restore of devices complete after.*']
- elif(sysvals.suspendmode == 'freeze'):
- dm['resume_machine'] = ['ACPI: resume from mwait']
# action table (expected events that occur and show up in dmesg)
at = {
@@ -3867,12 +4024,13 @@ def parseKernelLog(data):
for s in dm[p]:
if(re.match(s, msg)):
phasechange, phase = True, p
+ dm[p] = [s]
break
# hack for determining resume_machine end for freeze
if(not sysvals.usetraceevents and sysvals.suspendmode == 'freeze' \
and phase == 'resume_machine' and \
- re.match('calling (?P<f>.*)\+ @ .*, parent: .*', msg)):
+ data.initcall_debug_call(line, True)):
data.setPhase(phase, ktime, False)
phase = 'resume_noirq'
data.setPhase(phase, ktime, True)
@@ -3945,26 +4103,18 @@ def parseKernelLog(data):
# -- device callbacks --
if(phase in data.sortedPhases()):
# device init call
- if(re.match('calling (?P<f>.*)\+ @ .*, parent: .*', msg)):
- sm = re.match('calling (?P<f>.*)\+ @ '+\
- '(?P<n>.*), parent: (?P<p>.*)', msg);
- f = sm.group('f')
- n = sm.group('n')
- p = sm.group('p')
- if(f and n and p):
- data.newAction(phase, f, int(n), p, ktime, -1, '')
- # device init return
- elif(re.match('call (?P<f>.*)\+ returned .* after '+\
- '(?P<t>.*) usecs', msg)):
- sm = re.match('call (?P<f>.*)\+ returned .* after '+\
- '(?P<t>.*) usecs(?P<a>.*)', msg);
- f = sm.group('f')
- t = sm.group('t')
- list = data.dmesg[phase]['list']
- if(f in list):
- dev = list[f]
- dev['length'] = int(t)
- dev['end'] = ktime
+ t, f, n, p = data.initcall_debug_call(line)
+ if t and f and n and p:
+ data.newAction(phase, f, int(n), p, ktime, -1, '')
+ else:
+ # device init return
+ t, f, l = data.initcall_debug_return(line)
+ if t and f and l:
+ list = data.dmesg[phase]['list']
+ if(f in list):
+ dev = list[f]
+ dev['length'] = int(l)
+ dev['end'] = ktime
# if trace events are not available, these are better than nothing
if(not sysvals.usetraceevents):
@@ -4006,6 +4156,8 @@ def parseKernelLog(data):
# fill in any missing phases
phasedef = data.phasedef
terr, lp = '', 'suspend_prepare'
+ if lp not in data.dmesg:
+ doError('dmesg log format has changed, could not find start of suspend')
for p in sorted(phasedef, key=lambda k:phasedef[k]['order']):
if p not in data.dmesg:
if not terr:
@@ -5302,7 +5454,7 @@ def executeSuspend(quiet=False):
sv.dlog('read dmesg')
sv.initdmesg()
# start ftrace
- if(sv.usecallgraph or sv.usetraceevents):
+ if sv.useftrace:
if not quiet:
pprint('START TRACING')
sv.dlog('start ftrace tracing')
@@ -5334,8 +5486,7 @@ def executeSuspend(quiet=False):
sv.dlog('enable RTC wake alarm')
sv.rtcWakeAlarmOn()
# start of suspend trace marker
- if(sv.usecallgraph or sv.usetraceevents):
- sv.fsetVal(datetime.now().strftime(sv.tmstart), 'trace_marker')
+ sv.fsetVal(datetime.now().strftime(sv.tmstart), 'trace_marker')
# predelay delay
if(count == 1 and sv.predelay > 0):
sv.fsetVal('WAIT %d' % sv.predelay, 'trace_marker')
@@ -5384,11 +5535,17 @@ def executeSuspend(quiet=False):
sv.fsetVal('WAIT END', 'trace_marker')
# return from suspend
pprint('RESUME COMPLETE')
- if(sv.usecallgraph or sv.usetraceevents):
- sv.fsetVal(datetime.now().strftime(sv.tmend), 'trace_marker')
+ sv.fsetVal(datetime.now().strftime(sv.tmend), 'trace_marker')
if sv.wifi and wifi:
tdata['wifi'] = sv.pollWifi(wifi)
sv.dlog('wifi check, %s' % tdata['wifi'])
+ if sv.netfix:
+ netfixout = sv.netfixon('wired')
+ elif sv.netfix:
+ netfixout = sv.netfixon()
+ if sv.netfix and netfixout:
+ tdata['netfix'] = netfixout
+ sv.dlog('netfix, %s' % tdata['netfix'])
if(sv.suspendmode == 'mem' or sv.suspendmode == 'command'):
sv.dlog('read the ACPI FPDT')
tdata['fw'] = getFPDT(False)
@@ -5396,7 +5553,7 @@ def executeSuspend(quiet=False):
sv.dlog('run the cmdinfo list after')
cmdafter = sv.cmdinfo(False)
# stop ftrace
- if(sv.usecallgraph or sv.usetraceevents):
+ if sv.useftrace:
if sv.useprocmon:
sv.dlog('stop the process monitor')
pm.stop()
@@ -5407,7 +5564,7 @@ def executeSuspend(quiet=False):
sysvals.dlog('EXECUTION TRACE END')
sv.getdmesg(testdata)
# grab a copy of the ftrace output
- if(sv.usecallgraph or sv.usetraceevents):
+ if sv.useftrace:
if not quiet:
pprint('CAPTURING TRACE')
op = sv.writeDatafileHeader(sv.ftracefile, testdata)
@@ -5838,13 +5995,19 @@ def statusCheck(probecheck=False):
pprint(' please choose one with -m')
# check if ftrace is available
- res = sysvals.colorText('NO')
- ftgood = sysvals.verifyFtrace()
- if(ftgood):
- res = 'YES'
- elif(sysvals.usecallgraph):
- status = 'ftrace is not properly supported'
- pprint(' is ftrace supported: %s' % res)
+ if sysvals.useftrace:
+ res = sysvals.colorText('NO')
+ sysvals.useftrace = sysvals.verifyFtrace()
+ efmt = '"{0}" uses ftrace, and it is not properly supported'
+ if sysvals.useftrace:
+ res = 'YES'
+ elif sysvals.usecallgraph:
+ status = efmt.format('-f')
+ elif sysvals.usedevsrc:
+ status = efmt.format('-dev')
+ elif sysvals.useprocmon:
+ status = efmt.format('-proc')
+ pprint(' is ftrace supported: %s' % res)
# check if kprobes are available
if sysvals.usekprobes:
@@ -5857,8 +6020,8 @@ def statusCheck(probecheck=False):
pprint(' are kprobes supported: %s' % res)
# what data source are we using
- res = 'DMESG'
- if(ftgood):
+ res = 'DMESG (very limited, ftrace is preferred)'
+ if sysvals.useftrace:
sysvals.usetraceevents = True
for e in sysvals.traceevents:
if not os.path.exists(sysvals.epath+e):
@@ -5879,7 +6042,7 @@ def statusCheck(probecheck=False):
pprint(' optional commands this tool may use for info:')
no = sysvals.colorText('MISSING')
yes = sysvals.colorText('FOUND', 32)
- for c in ['turbostat', 'mcelog', 'lspci', 'lsusb']:
+ for c in ['turbostat', 'mcelog', 'lspci', 'lsusb', 'netfix']:
if c == 'turbostat':
res = yes if sysvals.haveTurbostat() else no
else:
@@ -5971,7 +6134,7 @@ def processData(live=False, quiet=False):
if not sysvals.stamp:
pprint('ERROR: data does not include the expected stamp')
return (testruns, {'error': 'timeline generation failed'})
- shown = ['bios', 'biosdate', 'cpu', 'host', 'kernel', 'man', 'memfr',
+ shown = ['os', 'bios', 'biosdate', 'cpu', 'host', 'kernel', 'man', 'memfr',
'memsz', 'mode', 'numcpu', 'plat', 'time', 'wifi']
sysvals.vprint('System Info:')
for key in sorted(sysvals.stamp):
@@ -6052,6 +6215,8 @@ def runTest(n=0, quiet=False):
if sysvals.display:
ret = sysvals.displayControl('init')
sysvals.dlog('xset display init, ret = %d' % ret)
+ sysvals.testVal(sysvals.pmdpath, 'basic', '1')
+ sysvals.testVal(sysvals.s0ixpath, 'basic', 'Y')
sysvals.dlog('initialize ftrace')
sysvals.initFtrace(quiet)
@@ -6145,9 +6310,12 @@ def data_from_html(file, outpath, issues, fulldetail=False):
elist[err[0]] += 1
for i in elist:
ilist.append('%sx%d' % (i, elist[i]) if elist[i] > 1 else i)
- wifi = find_in_html(html, 'Wifi Resume: ', '</td>')
- if wifi:
- extra['wifi'] = wifi
+ line = find_in_html(log, '# wifi ', '\n')
+ if line:
+ extra['wifi'] = line
+ line = find_in_html(log, '# netfix ', '\n')
+ if line:
+ extra['netfix'] = line
low = find_in_html(html, 'freeze time: <b>', ' ms</b>')
for lowstr in ['waking', '+']:
if not low:
@@ -6243,7 +6411,7 @@ def genHtml(subdir, force=False):
sysvals.ftracefile = file
sysvals.setOutputFile()
if (sysvals.dmesgfile or sysvals.ftracefile) and sysvals.htmlfile and \
- (force or not sysvals.usable(sysvals.htmlfile)):
+ (force or not sysvals.usable(sysvals.htmlfile, True)):
pprint('FTRACE: %s' % sysvals.ftracefile)
if sysvals.dmesgfile:
pprint('DMESG : %s' % sysvals.dmesgfile)
@@ -6533,6 +6701,7 @@ def printHelp():
' -skiphtml Run the test and capture the trace logs, but skip the timeline (default: disabled)\n'\
' -result fn Export a results table to a text file for parsing.\n'\
' -wifi If a wifi connection is available, check that it reconnects after resume.\n'\
+ ' -netfix Use netfix to reset the network in the event it fails to resume.\n'\
' [testprep]\n'\
' -sync Sync the filesystems before starting the test\n'\
' -rs on/off Enable/disable runtime suspend for all devices, restore all after test\n'\
@@ -6615,6 +6784,8 @@ if __name__ == '__main__':
elif(arg == '-v'):
pprint("Version %s" % sysvals.version)
sys.exit(0)
+ elif(arg == '-debugtiming'):
+ debugtiming = True
elif(arg == '-x2'):
sysvals.execcount = 2
elif(arg == '-x2delay'):
@@ -6657,6 +6828,8 @@ if __name__ == '__main__':
sysvals.sync = True
elif(arg == '-wifi'):
sysvals.wifi = True
+ elif(arg == '-netfix'):
+ sysvals.netfix = True
elif(arg == '-gzip'):
sysvals.gzip = True
elif(arg == '-info'):
@@ -6819,7 +6992,7 @@ if __name__ == '__main__':
sysvals.outdir = val
sysvals.notestrun = True
if(os.path.isdir(val) == False):
- doError('%s is not accessible' % val)
+ doError('%s is not accesible' % val)
elif(arg == '-filter'):
try:
val = next(args)
@@ -6942,12 +7115,11 @@ if __name__ == '__main__':
time.sleep(sysvals.multitest['delay'])
fmt = 'suspend-%y%m%d-%H%M%S'
sysvals.testdir = os.path.join(sysvals.outdir, datetime.now().strftime(fmt))
- ret = runTest(i+1, True)
+ ret = runTest(i+1, not sysvals.verbose)
failcnt = 0 if not ret else failcnt + 1
if sysvals.maxfail > 0 and failcnt >= sysvals.maxfail:
pprint('Maximum fail count of %d reached, aborting multitest' % (sysvals.maxfail))
break
- time.sleep(5)
sysvals.resetlog()
sysvals.multistat(False, i, finish)
if 'time' in sysvals.multitest and datetime.now() >= finish:
diff --git a/tools/power/x86/intel-speed-select/hfi-events.c b/tools/power/x86/intel-speed-select/hfi-events.c
index 761375062505..f0ed69721308 100644
--- a/tools/power/x86/intel-speed-select/hfi-events.c
+++ b/tools/power/x86/intel-speed-select/hfi-events.c
@@ -144,7 +144,7 @@ static int family_handler(struct nl_msg *msg, void *arg)
continue;
res->id = nla_get_u32(tb2[CTRL_ATTR_MCAST_GRP_ID]);
break;
- };
+ }
return 0;
}
diff --git a/tools/power/x86/intel-speed-select/isst-daemon.c b/tools/power/x86/intel-speed-select/isst-daemon.c
index dd372924bc82..d0400c6684ba 100644
--- a/tools/power/x86/intel-speed-select/isst-daemon.c
+++ b/tools/power/x86/intel-speed-select/isst-daemon.c
@@ -41,7 +41,7 @@ void process_level_change(int cpu)
time_t tm;
int ret;
- if (pkg_id >= MAX_PACKAGE_COUNT || die_id > MAX_DIE_PER_PACKAGE) {
+ if (pkg_id >= MAX_PACKAGE_COUNT || die_id >= MAX_DIE_PER_PACKAGE) {
debug_printf("Invalid package/die info for cpu:%d\n", cpu);
return;
}
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index 1e7d3de55a94..c7b26a3603af 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -92,40 +92,66 @@ displays the statistics gathered since it was forked.
.SH ROW DESCRIPTIONS
The system configuration dump (if --quiet is not used) is followed by statistics. The first row of the statistics labels the content of each column (below). The second row of statistics is the system summary line. The system summary line has a '-' in the columns for the Package, Core, and CPU. The contents of the system summary line depends on the type of column. Columns that count items (eg. IRQ) show the sum across all CPUs in the system. Columns that show a percentage show the average across all CPUs in the system. Columns that dump raw MSR values simply show 0 in the summary. After the system summary row, each row describes a specific Package/Core/CPU. Note that if the --cpu parameter is used to limit which specific CPUs are displayed, turbostat will still collect statistics for all CPUs in the system and will still show the system summary for all CPUs in the system.
.SH COLUMN DESCRIPTIONS
-.nf
+.PP
\fBusec\fP For each CPU, the number of microseconds elapsed during counter collection, including thread migration -- if any. This counter is disabled by default, and is enabled with "--enable usec", or --debug. On the summary row, usec refers to the total elapsed time to collect the counters on all cpus.
+.PP
\fBTime_Of_Day_Seconds\fP For each CPU, the gettimeofday(2) value (seconds.subsec since Epoch) when the counters ending the measurement interval were collected. This column is disabled by default, and can be enabled with "--enable Time_Of_Day_Seconds" or "--debug". On the summary row, Time_Of_Day_Seconds refers to the timestamp following collection of counters on the last CPU.
+.PP
\fBCore\fP processor core number. Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading Technology (HT).
+.PP
\fBCPU\fP Linux CPU (logical processor) number. Yes, it is okay that on many systems the CPUs are not listed in numerical order -- for efficiency reasons, turbostat runs in topology order, so HT siblings appear together.
+.PP
\fBPackage\fP processor package number -- not present on systems with a single processor package.
+.PP
\fBAvg_MHz\fP number of cycles executed divided by time elapsed. Note that this includes idle-time when 0 instructions are executed.
+.PP
\fBBusy%\fP percent of the measurement interval that the CPU executes instructions, aka. % of time in "C0" state.
+.PP
\fBBzy_MHz\fP average clock rate while the CPU was not idle (ie. in "c0" state).
+.PP
\fBTSC_MHz\fP average MHz that the TSC ran during the entire interval.
+.PP
\fBIRQ\fP The number of interrupts serviced by that CPU during the measurement interval. The system total line is the sum of interrupts serviced across all CPUs. turbostat parses /proc/interrupts to generate this summary.
+.PP
\fBSMI\fP The number of System Management Interrupts serviced CPU during the measurement interval. While this counter is actually per-CPU, SMI are triggered on all processors, so the number should be the same for all CPUs.
+.PP
\fBC1, C2, C3...\fP The number times Linux requested the C1, C2, C3 idle state during the measurement interval. The system summary line shows the sum for all CPUs. These are C-state names as exported in /sys/devices/system/cpu/cpu*/cpuidle/state*/name. While their names are generic, their attributes are processor specific. They the system description section of output shows what MWAIT sub-states they are mapped to on each system.
+.PP
\fBC1%, C2%, C3%\fP The residency percentage that Linux requested C1, C2, C3.... The system summary is the average of all CPUs in the system. Note that these are software, reflecting what was requested. The hardware counters reflect what was actually achieved.
+.PP
\fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states. These numbers are from hardware residency counters.
+.PP
\fBCoreTmp\fP Degrees Celsius reported by the per-core Digital Thermal Sensor.
+.PP
\fBPkgTmp\fP Degrees Celsius reported by the per-package Package Thermal Monitor.
+.PP
\fBGFX%rc6\fP The percentage of time the GPU is in the "render C6" state, rc6, during the measurement interval. From /sys/class/drm/card0/power/rc6_residency_ms.
+.PP
\fBGFXMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz.
+.PP
\fBPkg%pc2, Pkg%pc3, Pkg%pc6, Pkg%pc7\fP percentage residency in hardware package idle states. These numbers are from hardware residency counters.
+.PP
\fBPkgWatt\fP Watts consumed by the whole package.
+.PP
\fBCorWatt\fP Watts consumed by the core part of the package.
+.PP
\fBGFXWatt\fP Watts consumed by the Graphics part of the package -- available only on client processors.
+.PP
\fBRAMWatt\fP Watts consumed by the DRAM DIMMS -- available only on server processors.
+.PP
\fBPKG_%\fP percent of the interval that RAPL throttling was active on the Package. Note that the system summary is the sum of the package throttling time, and thus may be higher than 100% on a multi-package system. Note that the meaning of this field is model specific. For example, some hardware increments this counter when RAPL responds to thermal limits, but does not increment this counter when RAPL responds to power limits. Comparing PkgWatt and PkgTmp to system limits is necessary.
+.PP
\fBRAM_%\fP percent of the interval that RAPL throttling was active on DRAM.
-.fi
+.PP
+\fBUncMHz\fP uncore MHz, instantaneous sample.
.SH TOO MUCH INFORMATION EXAMPLE
By default, turbostat dumps all possible information -- a system configuration header, followed by columns for all counters.
This is ideal for remote debugging, use the "--out" option to save everything to a text file, and get that file to the expert helping you debug.
.PP
When you are not interested in all that information, and there are several ways to see only what you want. First the "--quiet" option will skip the configuration information, and turbostat will show only the counter columns. Second, you can reduce the columns with the "--hide" and "--show" options. If you use the "--show" option, then turbostat will show only the columns you list. If you use the "--hide" option, turbostat will show all columns, except the ones you list.
.PP
-To find out what columns are available for --show and --hide, the "--list" option is available. For convenience, the special strings "sysfs" can be used to refer to all of the sysfs C-state counters at once:
+To find out what columns are available for --show and --hide, the "--list" option is available. Usually, the CATEGORY names above are used to refer to groups of counters. Also, for convenience, the special string "sysfs" can be used to refer to all of the sysfs C-state counters at once:
+.PP
.nf
sudo ./turbostat --show sysfs --quiet sleep 10
10.003837 sec
@@ -158,32 +184,29 @@ Without a command to fork, turbostat displays statistics ever 5 seconds.
Periodic output goes to stdout, by default, unless --out is used to specify an output file.
The 5-second interval can be changed with the "-i sec" option.
.nf
-sudo ./turbostat --quiet --hide sysfs,IRQ,SMI,CoreTmp,PkgTmp,GFX%rc6,GFXMHz,PkgWatt,CorWatt,GFXWatt
- Core CPU Avg_MHz Busy% Bzy_MHz TSC_MHz CPU%c1 CPU%c3 CPU%c6 CPU%c7
- - - 488 12.52 3900 3498 12.50 0.00 0.00 74.98
- 0 0 5 0.13 3900 3498 99.87 0.00 0.00 0.00
- 0 4 3897 99.99 3900 3498 0.01
- 1 1 0 0.00 3856 3498 0.01 0.00 0.00 99.98
- 1 5 0 0.00 3861 3498 0.01
- 2 2 1 0.02 3889 3498 0.03 0.00 0.00 99.95
- 2 6 0 0.00 3863 3498 0.05
- 3 3 0 0.01 3869 3498 0.02 0.00 0.00 99.97
- 3 7 0 0.00 3878 3498 0.03
- Core CPU Avg_MHz Busy% Bzy_MHz TSC_MHz CPU%c1 CPU%c3 CPU%c6 CPU%c7
- - - 491 12.59 3900 3498 12.42 0.00 0.00 74.99
- 0 0 27 0.69 3900 3498 99.31 0.00 0.00 0.00
- 0 4 3898 99.99 3900 3498 0.01
- 1 1 0 0.00 3883 3498 0.01 0.00 0.00 99.99
- 1 5 0 0.00 3898 3498 0.01
- 2 2 0 0.01 3889 3498 0.02 0.00 0.00 99.98
- 2 6 0 0.00 3889 3498 0.02
- 3 3 0 0.00 3856 3498 0.01 0.00 0.00 99.99
- 3 7 0 0.00 3897 3498 0.01
+sudo turbostat --quiet --show CPU,frequency
+ Core CPU Avg_MHz Busy% Bzy_MHz TSC_MHz CPU%c7 UncMhz
+ - - 524 12.48 4198 3096 74.53 3800
+ 0 0 4 0.09 4081 3096 98.88 3800
+ 0 4 1 0.02 4063 3096
+ 1 1 2 0.06 4063 3096 99.60
+ 1 5 2 0.05 4070 3096
+ 2 2 4178 99.52 4199 3096 0.00
+ 2 6 3 0.08 4159 3096
+ 3 3 1 0.04 4046 3096 99.66
+ 3 7 0 0.01 3989 3096
+ Core CPU Avg_MHz Busy% Bzy_MHz TSC_MHz CPU%c7 UncMhz
+ - - 525 12.52 4198 3096 74.54 3800
+ 0 0 4 0.10 4051 3096 99.49 3800
+ 0 4 2 0.04 3993 3096
+ 1 1 3 0.07 4054 3096 99.56
+ 1 5 4 0.10 4018 3096
+ 2 2 4178 99.51 4199 3096 0.00
+ 2 6 4 0.09 4143 3096
+ 3 3 2 0.06 4026 3096 99.10
+ 3 7 7 0.17 4074 3096
.fi
-This example also shows the use of the --hide option to skip columns that are not wanted.
-Note that cpu4 in this example is 99.99% busy, while the other CPUs are all under 1% busy.
-Notice that cpu4's HT sibling is cpu0, which is under 1% busy, but can get into CPU%c1 only,
-because its cpu4's activity on shared hardware keeps it from entering a deeper C-state.
+This example also shows the use of the --show option to show only the desired columns.
.SH SYSTEM CONFIGURATION INFORMATION EXAMPLE
@@ -191,61 +214,86 @@ By default, turbostat always dumps system configuration information
before taking measurements. In the example above, "--quiet" is used
to suppress that output. Here is an example of the configuration information:
.nf
-turbostat version 2017.02.15 - Len Brown <lenb@kernel.org>
-CPUID(0): GenuineIntel 13 CPUID levels; family:model:stepping 0x6:3c:3 (6:60:3)
-CPUID(1): SSE3 MONITOR - EIST TM2 TSC MSR ACPI-TM TM
-CPUID(6): APERF, TURBO, DTS, PTM, No-HWP, No-HWPnotify, No-HWPwindow, No-HWPepp, No-HWPpkg, EPB
-cpu4: MSR_IA32_MISC_ENABLE: 0x00850089 (TCC EIST No-MWAIT PREFETCH TURBO)
-CPUID(7): No-SGX
-cpu4: MSR_MISC_PWR_MGMT: 0x00400000 (ENable-EIST_Coordination DISable-EPB DISable-OOB)
-RAPL: 3121 sec. Joule Counter Range, at 84 Watts
-cpu4: MSR_PLATFORM_INFO: 0x80838f3012300
+turbostat version 2022.04.16 - Len Brown <lenb@kernel.org>
+Kernel command line: BOOT_IMAGE=/boot/vmlinuz-5.18.0-rc6-00001-ge6891250e3b5 ...
+CPUID(0): GenuineIntel 0x16 CPUID levels
+CPUID(1): family:model:stepping 0x6:9e:9 (6:158:9) microcode 0xea
+CPUID(0x80000000): max_extended_levels: 0x80000008
+CPUID(1): SSE3 MONITOR - EIST TM2 TSC MSR ACPI-TM HT TM
+CPUID(6): APERF, TURBO, DTS, PTM, HWP, HWPnotify, HWPwindow, HWPepp, No-HWPpkg, EPB
+cpu7: MSR_IA32_MISC_ENABLE: 0x00850089 (TCC EIST MWAIT PREFETCH TURBO)
+CPUID(7): SGX
+cpu7: MSR_IA32_FEATURE_CONTROL: 0x00000005 (Locked )
+CPUID(0x15): eax_crystal: 2 ebx_tsc: 258 ecx_crystal_hz: 0
+TSC: 3096 MHz (24000000 Hz * 258 / 2 / 1000000)
+CPUID(0x16): base_mhz: 3100 max_mhz: 4200 bus_mhz: 100
+cpu7: MSR_MISC_PWR_MGMT: 0x00401cc0 (ENable-EIST_Coordination DISable-EPB DISable-OOB)
+RAPL: 5825 sec. Joule Counter Range, at 45 Watts
+cpu7: MSR_PLATFORM_INFO: 0x80839f1011f00
8 * 100.0 = 800.0 MHz max efficiency frequency
-35 * 100.0 = 3500.0 MHz base frequency
-cpu4: MSR_IA32_POWER_CTL: 0x0004005d (C1E auto-promotion: DISabled)
-cpu4: MSR_TURBO_RATIO_LIMIT: 0x25262727
-37 * 100.0 = 3700.0 MHz max turbo 4 active cores
-38 * 100.0 = 3800.0 MHz max turbo 3 active cores
-39 * 100.0 = 3900.0 MHz max turbo 2 active cores
-39 * 100.0 = 3900.0 MHz max turbo 1 active cores
-cpu4: MSR_CONFIG_TDP_NOMINAL: 0x00000023 (base_ratio=35)
-cpu4: MSR_CONFIG_TDP_LEVEL_1: 0x00000000 ()
-cpu4: MSR_CONFIG_TDP_LEVEL_2: 0x00000000 ()
-cpu4: MSR_CONFIG_TDP_CONTROL: 0x80000000 ( lock=1)
-cpu4: MSR_TURBO_ACTIVATION_RATIO: 0x00000000 (MAX_NON_TURBO_RATIO=0 lock=0)
-cpu4: MSR_PKG_CST_CONFIG_CONTROL: 0x1e000400 (UNdemote-C3, UNdemote-C1, demote-C3, demote-C1, UNlocked: pkg-cstate-limit=0: pc0)
-cpu4: POLL: CPUIDLE CORE POLL IDLE
-cpu4: C1: MWAIT 0x00
-cpu4: C1E: MWAIT 0x01
-cpu4: C3: MWAIT 0x10
-cpu4: C6: MWAIT 0x20
-cpu4: C7s: MWAIT 0x32
-cpu4: MSR_MISC_FEATURE_CONTROL: 0x00000000 (L2-Prefetch L2-Prefetch-pair L1-Prefetch L1-IP-Prefetch)
-cpu0: MSR_IA32_ENERGY_PERF_BIAS: 0x00000006 (balanced)
-cpu0: MSR_CORE_PERF_LIMIT_REASONS, 0x31200000 (Active: ) (Logged: Transitions, MultiCoreTurbo, Amps, Auto-HWP, )
-cpu0: MSR_GFX_PERF_LIMIT_REASONS, 0x00000000 (Active: ) (Logged: )
-cpu0: MSR_RING_PERF_LIMIT_REASONS, 0x0d000000 (Active: ) (Logged: Amps, PkgPwrL1, PkgPwrL2, )
+31 * 100.0 = 3100.0 MHz base frequency
+cpu7: MSR_IA32_POWER_CTL: 0x002c005d (C1E auto-promotion: DISabled)
+cpu7: MSR_TURBO_RATIO_LIMIT: 0x2728292a
+39 * 100.0 = 3900.0 MHz max turbo 4 active cores
+40 * 100.0 = 4000.0 MHz max turbo 3 active cores
+41 * 100.0 = 4100.0 MHz max turbo 2 active cores
+42 * 100.0 = 4200.0 MHz max turbo 1 active cores
+cpu7: MSR_CONFIG_TDP_NOMINAL: 0x0000001f (base_ratio=31)
+cpu7: MSR_CONFIG_TDP_LEVEL_1: 0x00000000 ()
+cpu7: MSR_CONFIG_TDP_LEVEL_2: 0x00000000 ()
+cpu7: MSR_CONFIG_TDP_CONTROL: 0x80000000 ( lock=1)
+cpu7: MSR_TURBO_ACTIVATION_RATIO: 0x00000000 (MAX_NON_TURBO_RATIO=0 lock=0)
+cpu7: MSR_PKG_CST_CONFIG_CONTROL: 0x1e008008 (UNdemote-C3, UNdemote-C1, demote-C3, demote-C1, locked, pkg-cstate-limit=8 (unlimited))
+Uncore Frequency pkg0 die0: 800 - 3900 MHz (800 - 3900 MHz)
+/dev/cpu_dma_latency: 2000000000 usec (default)
+current_driver: intel_idle
+current_governor: menu
+current_governor_ro: menu
+cpu7: POLL: CPUIDLE CORE POLL IDLE
+cpu7: C1: MWAIT 0x00
+cpu7: C1E: MWAIT 0x01
+cpu7: C3: MWAIT 0x10
+cpu7: C6: MWAIT 0x20
+cpu7: C7s: MWAIT 0x33
+cpu7: C8: MWAIT 0x40
+cpu7: C9: MWAIT 0x50
+cpu7: C10: MWAIT 0x60
+cpu7: cpufreq driver: intel_pstate
+cpu7: cpufreq governor: performance
+cpufreq intel_pstate no_turbo: 0
+cpu7: MSR_MISC_FEATURE_CONTROL: 0x00000000 (L2-Prefetch L2-Prefetch-pair L1-Prefetch L1-IP-Prefetch)
+cpu0: MSR_PM_ENABLE: 0x00000001 (HWP)
+cpu0: MSR_HWP_CAPABILITIES: 0x01101f53 (high 83 guar 31 eff 16 low 1)
+cpu0: MSR_HWP_REQUEST: 0x00005353 (min 83 max 83 des 0 epp 0x0 window 0x0 pkg 0x0)
+cpu0: MSR_HWP_INTERRUPT: 0x00000001 (EN_Guaranteed_Perf_Change, Dis_Excursion_Min)
+cpu0: MSR_HWP_STATUS: 0x00000004 (No-Guaranteed_Perf_Change, No-Excursion_Min)
+cpu0: EPB: 6 (balanced)
cpu0: MSR_RAPL_POWER_UNIT: 0x000a0e03 (0.125000 Watts, 0.000061 Joules, 0.000977 sec.)
-cpu0: MSR_PKG_POWER_INFO: 0x000002a0 (84 W TDP, RAPL 0 - 0 W, 0.000000 sec.)
-cpu0: MSR_PKG_POWER_LIMIT: 0x428348001a82a0 (UNlocked)
-cpu0: PKG Limit #1: ENabled (84.000000 Watts, 8.000000 sec, clamp DISabled)
-cpu0: PKG Limit #2: ENabled (105.000000 Watts, 0.002441* sec, clamp DISabled)
+cpu0: MSR_PKG_POWER_INFO: 0x00000168 (45 W TDP, RAPL 0 - 0 W, 0.000000 sec.)
+cpu0: MSR_PKG_POWER_LIMIT: 0x42820800218208 (UNlocked)
+cpu0: PKG Limit #1: ENabled (65.000 Watts, 64.000000 sec, clamp ENabled)
+cpu0: PKG Limit #2: ENabled (65.000 Watts, 0.002441* sec, clamp DISabled)
+cpu0: MSR_VR_CURRENT_CONFIG: 0x00000000
+cpu0: PKG Limit #4: 0.000000 Watts (UNlocked)
+cpu0: MSR_DRAM_POWER_LIMIT: 0x5400de00000000 (UNlocked)
+cpu0: DRAM Limit: DISabled (0.000 Watts, 0.000977 sec, clamp DISabled)
cpu0: MSR_PP0_POLICY: 0
cpu0: MSR_PP0_POWER_LIMIT: 0x00000000 (UNlocked)
-cpu0: Cores Limit: DISabled (0.000000 Watts, 0.000977 sec, clamp DISabled)
+cpu0: Cores Limit: DISabled (0.000 Watts, 0.000977 sec, clamp DISabled)
cpu0: MSR_PP1_POLICY: 0
cpu0: MSR_PP1_POWER_LIMIT: 0x00000000 (UNlocked)
-cpu0: GFX Limit: DISabled (0.000000 Watts, 0.000977 sec, clamp DISabled)
-cpu0: MSR_IA32_TEMPERATURE_TARGET: 0x00641400 (100 C)
-cpu0: MSR_IA32_PACKAGE_THERM_STATUS: 0x884c0800 (24 C)
-cpu0: MSR_IA32_THERM_STATUS: 0x884c0000 (24 C +/- 1)
-cpu1: MSR_IA32_THERM_STATUS: 0x88510000 (19 C +/- 1)
-cpu2: MSR_IA32_THERM_STATUS: 0x884e0000 (22 C +/- 1)
-cpu3: MSR_IA32_THERM_STATUS: 0x88510000 (19 C +/- 1)
-cpu4: MSR_PKGC3_IRTL: 0x00008842 (valid, 67584 ns)
-cpu4: MSR_PKGC6_IRTL: 0x00008873 (valid, 117760 ns)
-cpu4: MSR_PKGC7_IRTL: 0x00008891 (valid, 148480 ns)
+cpu0: GFX Limit: DISabled (0.000 Watts, 0.000977 sec, clamp DISabled)
+cpu0: MSR_IA32_TEMPERATURE_TARGET: 0x00640000 (100 C) (100 default - 0 offset)
+cpu0: MSR_IA32_PACKAGE_THERM_STATUS: 0x88200800 (68 C)
+cpu0: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x00000003 (100 C, 100 C)
+cpu7: MSR_PKGC3_IRTL: 0x0000884e (valid, 79872 ns)
+cpu7: MSR_PKGC6_IRTL: 0x00008876 (valid, 120832 ns)
+cpu7: MSR_PKGC7_IRTL: 0x00008894 (valid, 151552 ns)
+cpu7: MSR_PKGC8_IRTL: 0x000088fa (valid, 256000 ns)
+cpu7: MSR_PKGC9_IRTL: 0x0000894c (valid, 339968 ns)
+cpu7: MSR_PKGC10_IRTL: 0x00008bf2 (valid, 1034240 ns)
.fi
+.PP
The \fBmax efficiency\fP frequency, a.k.a. Low Frequency Mode, is the frequency
available at the minimum package voltage. The \fBTSC frequency\fP is the base
frequency of the processor -- this should match the brand string
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index ede31a4287a0..831dc32d45fa 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -126,6 +126,7 @@ struct msr_counter bic[] = {
{ 0x0, "GFXAMHz", "", 0, 0, 0, NULL, 0 },
{ 0x0, "IPC", "", 0, 0, 0, NULL, 0 },
{ 0x0, "CoreThr", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "UncMHz", "", 0, 0, 0, NULL, 0 },
};
#define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
@@ -183,10 +184,11 @@ struct msr_counter bic[] = {
#define BIC_GFXACTMHz (1ULL << 51)
#define BIC_IPC (1ULL << 52)
#define BIC_CORE_THROT_CNT (1ULL << 53)
+#define BIC_UNCORE_MHZ (1ULL << 54)
#define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die )
#define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__)
-#define BIC_FREQUENCY ( BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz )
+#define BIC_FREQUENCY ( BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_UNCORE_MHZ)
#define BIC_IDLE ( BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX)
#define BIC_OTHER ( BIC_IRQ | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC)
@@ -228,6 +230,7 @@ unsigned int do_slm_cstates;
unsigned int use_c1_residency_msr;
unsigned int has_aperf;
unsigned int has_epb;
+unsigned int is_hybrid;
unsigned int do_irtl_snb;
unsigned int do_irtl_hsw;
unsigned int units = 1000000; /* MHz etc */
@@ -393,6 +396,7 @@ struct pkg_data {
unsigned long long rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */
unsigned long long rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */
unsigned int pkg_temp_c;
+ unsigned int uncore_mhz;
unsigned long long counter[MAX_ADDED_COUNTERS];
} *package_even, *package_odd;
@@ -988,6 +992,9 @@ void print_header(char *delim)
if (DO_BIC(BIC_RAM__))
outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
}
+ if (DO_BIC(BIC_UNCORE_MHZ))
+ outp += sprintf(outp, "%sUncMHz", (printed++ ? delim : ""));
+
for (mp = sys.pp; mp; mp = mp->next) {
if (mp->format == FORMAT_RAW) {
if (mp->width == 64)
@@ -1370,6 +1377,9 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
outp +=
sprintf(outp, fmt8, (printed++ ? delim : ""),
100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
+ /* UncMHz */
+ if (DO_BIC(BIC_UNCORE_MHZ))
+ outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->uncore_mhz);
for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW) {
@@ -1471,6 +1481,7 @@ int delta_package(struct pkg_data *new, struct pkg_data *old)
else
old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms;
+ old->uncore_mhz = new->uncore_mhz;
old->gfx_mhz = new->gfx_mhz;
old->gfx_act_mhz = new->gfx_act_mhz;
@@ -1689,6 +1700,7 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
p->pkg_temp_c = 0;
p->gfx_rc6_ms = 0;
+ p->uncore_mhz = 0;
p->gfx_mhz = 0;
p->gfx_act_mhz = 0;
for (i = 0, mp = sys.tp; mp; i++, mp = mp->next)
@@ -1788,6 +1800,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
average.packages.energy_gfx += p->energy_gfx;
average.packages.gfx_rc6_ms = p->gfx_rc6_ms;
+ average.packages.uncore_mhz = p->uncore_mhz;
average.packages.gfx_mhz = p->gfx_mhz;
average.packages.gfx_act_mhz = p->gfx_act_mhz;
@@ -1948,6 +1961,16 @@ int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
return 0;
}
+unsigned long long get_uncore_mhz(int package, int die)
+{
+ char path[128];
+
+ sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/current_freq_khz", package,
+ die);
+
+ return (snapshot_sysfs_counter(path) / 1000);
+}
+
int get_epb(int cpu)
{
char path[128 + PATH_BYTES];
@@ -2035,9 +2058,9 @@ int get_core_throt_cnt(int cpu, unsigned long long *cnt)
if (!fp)
return -1;
ret = fscanf(fp, "%lld", &tmp);
+ fclose(fp);
if (ret != 1)
return -1;
- fclose(fp);
*cnt = tmp;
return 0;
@@ -2297,6 +2320,10 @@ retry:
if (DO_BIC(BIC_GFX_rc6))
p->gfx_rc6_ms = gfx_cur_rc6_ms;
+ /* n.b. assume die0 uncore frequency applies to whole package */
+ if (DO_BIC(BIC_UNCORE_MHZ))
+ p->uncore_mhz = get_uncore_mhz(p->package_id, 0);
+
if (DO_BIC(BIC_GFXMHz))
p->gfx_mhz = gfx_cur_mhz;
@@ -2494,6 +2521,7 @@ int has_turbo_ratio_group_limits(int family, int model)
case INTEL_FAM6_ATOM_GOLDMONT:
case INTEL_FAM6_SKYLAKE_X:
case INTEL_FAM6_ICELAKE_X:
+ case INTEL_FAM6_SAPPHIRERAPIDS_X:
case INTEL_FAM6_ATOM_GOLDMONT_D:
case INTEL_FAM6_ATOM_TREMONT_D:
return 1;
@@ -2502,13 +2530,14 @@ int has_turbo_ratio_group_limits(int family, int model)
}
}
-static void dump_turbo_ratio_limits(int family, int model)
+static void dump_turbo_ratio_limits(int trl_msr_offset, int family, int model)
{
unsigned long long msr, core_counts;
- unsigned int ratio, group_size;
+ int shift;
- get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
- fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr);
+ get_msr(base_cpu, trl_msr_offset, &msr);
+ fprintf(outf, "cpu%d: MSR_%sTURBO_RATIO_LIMIT: 0x%08llx\n",
+ base_cpu, trl_msr_offset == MSR_SECONDARY_TURBO_RATIO_LIMIT ? "SECONDARY" : "", msr);
if (has_turbo_ratio_group_limits(family, model)) {
get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &core_counts);
@@ -2517,53 +2546,16 @@ static void dump_turbo_ratio_limits(int family, int model)
core_counts = 0x0807060504030201;
}
- ratio = (msr >> 56) & 0xFF;
- group_size = (core_counts >> 56) & 0xFF;
- if (ratio)
- fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
- ratio, bclk, ratio * bclk, group_size);
+ for (shift = 56; shift >= 0; shift -= 8) {
+ unsigned int ratio, group_size;
- ratio = (msr >> 48) & 0xFF;
- group_size = (core_counts >> 48) & 0xFF;
- if (ratio)
- fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
- ratio, bclk, ratio * bclk, group_size);
-
- ratio = (msr >> 40) & 0xFF;
- group_size = (core_counts >> 40) & 0xFF;
- if (ratio)
- fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
- ratio, bclk, ratio * bclk, group_size);
-
- ratio = (msr >> 32) & 0xFF;
- group_size = (core_counts >> 32) & 0xFF;
- if (ratio)
- fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
- ratio, bclk, ratio * bclk, group_size);
-
- ratio = (msr >> 24) & 0xFF;
- group_size = (core_counts >> 24) & 0xFF;
- if (ratio)
- fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
- ratio, bclk, ratio * bclk, group_size);
-
- ratio = (msr >> 16) & 0xFF;
- group_size = (core_counts >> 16) & 0xFF;
- if (ratio)
- fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
- ratio, bclk, ratio * bclk, group_size);
-
- ratio = (msr >> 8) & 0xFF;
- group_size = (core_counts >> 8) & 0xFF;
- if (ratio)
- fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
- ratio, bclk, ratio * bclk, group_size);
+ ratio = (msr >> shift) & 0xFF;
+ group_size = (core_counts >> shift) & 0xFF;
+ if (ratio)
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
+ ratio, bclk, ratio * bclk, group_size);
+ }
- ratio = (msr >> 0) & 0xFF;
- group_size = (core_counts >> 0) & 0xFF;
- if (ratio)
- fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
- ratio, bclk, ratio * bclk, group_size);
return;
}
@@ -2976,7 +2968,7 @@ int get_thread_siblings(struct cpu_topology *thiscpu)
}
}
}
- } while (!strncmp(&character, ",", 1));
+ } while (character == ',');
fclose(filep);
return CPU_COUNT_S(size, thiscpu->put_ids);
@@ -3742,6 +3734,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
has_misc_feature_control = 1;
break;
case INTEL_FAM6_SKYLAKE_X: /* SKX */
+ case INTEL_FAM6_SAPPHIRERAPIDS_X: /* SPR */
pkg_cstate_limits = skx_pkg_cstate_limits;
has_misc_feature_control = 1;
break;
@@ -3871,6 +3864,22 @@ int is_icx(unsigned int family, unsigned int model)
return 0;
}
+int is_spr(unsigned int family, unsigned int model)
+{
+
+ if (!genuine_intel)
+ return 0;
+
+ if (family != 6)
+ return 0;
+
+ switch (model) {
+ case INTEL_FAM6_SAPPHIRERAPIDS_X:
+ return 1;
+ }
+ return 0;
+}
+
int is_ehl(unsigned int family, unsigned int model)
{
if (!genuine_intel)
@@ -3988,6 +3997,7 @@ int has_glm_turbo_ratio_limit(unsigned int family, unsigned int model)
case INTEL_FAM6_ATOM_GOLDMONT:
case INTEL_FAM6_SKYLAKE_X:
case INTEL_FAM6_ICELAKE_X:
+ case INTEL_FAM6_SAPPHIRERAPIDS_X:
return 1;
default:
return 0;
@@ -4015,7 +4025,7 @@ int has_config_tdp(unsigned int family, unsigned int model)
case INTEL_FAM6_CANNONLAKE_L: /* CNL */
case INTEL_FAM6_SKYLAKE_X: /* SKX */
case INTEL_FAM6_ICELAKE_X: /* ICX */
-
+ case INTEL_FAM6_SAPPHIRERAPIDS_X: /* SPR */
case INTEL_FAM6_XEON_PHI_KNL: /* Knights Landing */
return 1;
default:
@@ -4083,8 +4093,12 @@ static void dump_cstate_pstate_config_info(unsigned int family, unsigned int mod
if (has_ivt_turbo_ratio_limit(family, model))
dump_ivt_turbo_ratio_limits();
- if (has_turbo_ratio_limit(family, model))
- dump_turbo_ratio_limits(family, model);
+ if (has_turbo_ratio_limit(family, model)) {
+ dump_turbo_ratio_limits(MSR_TURBO_RATIO_LIMIT, family, model);
+
+ if (is_hybrid)
+ dump_turbo_ratio_limits(MSR_SECONDARY_TURBO_RATIO_LIMIT, family, model);
+ }
if (has_atom_turbo_ratio_limit(family, model))
dump_atom_turbo_ratio_limits();
@@ -4098,6 +4112,24 @@ static void dump_cstate_pstate_config_info(unsigned int family, unsigned int mod
dump_nhm_cst_cfg();
}
+static int read_sysfs_int(char *path)
+{
+ FILE *input;
+ int retval = -1;
+
+ input = fopen(path, "r");
+ if (input == NULL) {
+ if (debug)
+ fprintf(outf, "NSFOD %s\n", path);
+ return (-1);
+ }
+ if (fscanf(input, "%d", &retval) != 1)
+ err(1, "%s: failed to read int from file", path);
+ fclose(input);
+
+ return (retval);
+}
+
static void dump_sysfs_file(char *path)
{
FILE *input;
@@ -4116,6 +4148,48 @@ static void dump_sysfs_file(char *path)
fprintf(outf, "%s: %s", strrchr(path, '/') + 1, cpuidle_buf);
}
+static void intel_uncore_frequency_probe(void)
+{
+ int i, j;
+ char path[128];
+
+ if (!genuine_intel)
+ return;
+
+ if (access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00", R_OK))
+ return;
+
+ if (!access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/current_freq_khz", R_OK))
+ BIC_PRESENT(BIC_UNCORE_MHZ);
+
+ if (quiet)
+ return;
+
+ for (i = 0; i < topo.num_packages; ++i) {
+ for (j = 0; j < topo.num_die; ++j) {
+ int k, l;
+
+ sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/min_freq_khz",
+ i, j);
+ k = read_sysfs_int(path);
+ sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/max_freq_khz",
+ i, j);
+ l = read_sysfs_int(path);
+ fprintf(outf, "Uncore Frequency pkg%d die%d: %d - %d MHz ", i, j, k / 1000, l / 1000);
+
+ sprintf(path,
+ "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/initial_min_freq_khz",
+ i, j);
+ k = read_sysfs_int(path);
+ sprintf(path,
+ "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/initial_max_freq_khz",
+ i, j);
+ l = read_sysfs_int(path);
+ fprintf(outf, "(%d - %d MHz)\n", k / 1000, l / 1000);
+ }
+ }
+}
+
static void dump_sysfs_cstate_config(void)
{
char path[64];
@@ -4486,6 +4560,7 @@ static double rapl_dram_energy_units_probe(int model, double rapl_energy_units)
case INTEL_FAM6_SKYLAKE_X: /* SKX */
case INTEL_FAM6_XEON_PHI_KNL: /* KNL */
case INTEL_FAM6_ICELAKE_X: /* ICX */
+ case INTEL_FAM6_SAPPHIRERAPIDS_X: /* SPR */
return (rapl_dram_energy_units = 15.3 / 1000000);
default:
return (rapl_energy_units);
@@ -4575,6 +4650,7 @@ void rapl_probe_intel(unsigned int family, unsigned int model)
case INTEL_FAM6_BROADWELL_X: /* BDX */
case INTEL_FAM6_SKYLAKE_X: /* SKX */
case INTEL_FAM6_ICELAKE_X: /* ICX */
+ case INTEL_FAM6_SAPPHIRERAPIDS_X: /* SPR */
case INTEL_FAM6_XEON_PHI_KNL: /* KNL */
do_rapl =
RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS |
@@ -4740,13 +4816,19 @@ void perf_limit_reasons_probe(unsigned int family, unsigned int model)
void automatic_cstate_conversion_probe(unsigned int family, unsigned int model)
{
- if (is_skx(family, model) || is_bdx(family, model) || is_icx(family, model))
+ if (family != 6)
+ return;
+
+ switch (model) {
+ case INTEL_FAM6_BROADWELL_X:
+ case INTEL_FAM6_SKYLAKE_X:
has_automatic_cstate_conversion = 1;
+ }
}
void prewake_cstate_probe(unsigned int family, unsigned int model)
{
- if (is_icx(family, model))
+ if (is_icx(family, model) || is_spr(family, model))
dis_cstate_prewake = 1;
}
@@ -4975,6 +5057,7 @@ int has_snb_msrs(unsigned int family, unsigned int model)
case INTEL_FAM6_CANNONLAKE_L: /* CNL */
case INTEL_FAM6_SKYLAKE_X: /* SKX */
case INTEL_FAM6_ICELAKE_X: /* ICX */
+ case INTEL_FAM6_SAPPHIRERAPIDS_X: /* SPR */
case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */
case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */
@@ -5361,13 +5444,15 @@ unsigned int intel_model_duplicates(unsigned int model)
case INTEL_FAM6_LAKEFIELD:
case INTEL_FAM6_ALDERLAKE:
case INTEL_FAM6_ALDERLAKE_L:
+ case INTEL_FAM6_ALDERLAKE_N:
+ case INTEL_FAM6_RAPTORLAKE:
+ case INTEL_FAM6_RAPTORLAKE_P:
return INTEL_FAM6_CANNONLAKE_L;
case INTEL_FAM6_ATOM_TREMONT_L:
return INTEL_FAM6_ATOM_TREMONT;
case INTEL_FAM6_ICELAKE_D:
- case INTEL_FAM6_SAPPHIRERAPIDS_X:
return INTEL_FAM6_ICELAKE_X;
}
return model;
@@ -5398,7 +5483,7 @@ void print_dev_latency(void)
}
/*
- * Linux-perf manages the the HW instructions-retired counter
+ * Linux-perf manages the HW instructions-retired counter
* by enabling when requested, and hiding rollover
*/
void linux_perf_init(void)
@@ -5543,7 +5628,10 @@ void process_cpuid()
__cpuid_count(0x7, 0, eax, ebx, ecx, edx);
has_sgx = ebx & (1 << 2);
- fprintf(outf, "CPUID(7): %sSGX\n", has_sgx ? "" : "No-");
+
+ is_hybrid = edx & (1 << 15);
+
+ fprintf(outf, "CPUID(7): %sSGX %sHybrid\n", has_sgx ? "" : "No-", is_hybrid ? "" : "No-");
if (has_sgx)
decode_feature_control_msr();
@@ -5654,7 +5742,7 @@ void process_cpuid()
BIC_NOT_PRESENT(BIC_Pkgpc7);
use_c1_residency_msr = 1;
}
- if (is_skx(family, model) || is_icx(family, model)) {
+ if (is_skx(family, model) || is_icx(family, model) || is_spr(family, model)) {
BIC_NOT_PRESENT(BIC_CPU_c3);
BIC_NOT_PRESENT(BIC_Pkgpc3);
BIC_NOT_PRESENT(BIC_CPU_c7);
@@ -5699,6 +5787,7 @@ void process_cpuid()
if (!quiet)
dump_cstate_pstate_config_info(family, model);
+ intel_uncore_frequency_probe();
if (!quiet)
print_dev_latency();
@@ -6128,7 +6217,30 @@ int get_and_dump_counters(void)
void print_version()
{
- fprintf(outf, "turbostat version 2022.04.16 - Len Brown <lenb@kernel.org>\n");
+ fprintf(outf, "turbostat version 2022.07.28 - Len Brown <lenb@kernel.org>\n");
+}
+
+#define COMMAND_LINE_SIZE 2048
+
+void print_bootcmd(void)
+{
+ char bootcmd[COMMAND_LINE_SIZE];
+ FILE *fp;
+ int ret;
+
+ memset(bootcmd, 0, COMMAND_LINE_SIZE);
+ fp = fopen("/proc/cmdline", "r");
+ if (!fp)
+ return;
+
+ ret = fread(bootcmd, sizeof(char), COMMAND_LINE_SIZE - 1, fp);
+ if (ret) {
+ bootcmd[ret] = '\0';
+ /* the last character is already '\n' */
+ fprintf(outf, "Kernel command line: %s", bootcmd);
+ }
+
+ fclose(fp);
}
int add_counter(unsigned int msr_num, char *path, char *name,
@@ -6602,8 +6714,10 @@ int main(int argc, char **argv)
outf = stderr;
cmdline(argc, argv);
- if (!quiet)
+ if (!quiet) {
print_version();
+ print_bootcmd();
+ }
probe_sysfs();
diff --git a/tools/spi/spidev_test.c b/tools/spi/spidev_test.c
index 83844f8b862a..b0ca44c70e83 100644
--- a/tools/spi/spidev_test.c
+++ b/tools/spi/spidev_test.c
@@ -417,6 +417,7 @@ int main(int argc, char *argv[])
{
int ret = 0;
int fd;
+ uint32_t request;
parse_opts(argc, argv);
@@ -430,13 +431,23 @@ int main(int argc, char *argv[])
/*
* spi mode
*/
+ /* WR is make a request to assign 'mode' */
+ request = mode;
ret = ioctl(fd, SPI_IOC_WR_MODE32, &mode);
if (ret == -1)
pabort("can't set spi mode");
+ /* RD is read what mode the device actually is in */
ret = ioctl(fd, SPI_IOC_RD_MODE32, &mode);
if (ret == -1)
pabort("can't get spi mode");
+ /* Drivers can reject some mode bits without returning an error.
+ * Read the current value to identify what mode it is in, and if it
+ * differs from the requested mode, warn the user.
+ */
+ if (request != mode)
+ printf("WARNING device does not support requested mode 0x%x\n",
+ request);
/*
* bits per word
diff --git a/tools/testing/kunit/configs/arch_uml.config b/tools/testing/kunit/configs/arch_uml.config
new file mode 100644
index 000000000000..e824ce43b05a
--- /dev/null
+++ b/tools/testing/kunit/configs/arch_uml.config
@@ -0,0 +1,5 @@
+# Config options which are added to UML builds by default
+
+# Enable virtio/pci, as a lot of tests require it.
+CONFIG_VIRTIO_UML=y
+CONFIG_UML_PCI_OVER_VIRTIO=y
diff --git a/tools/testing/kunit/configs/coverage_uml.config b/tools/testing/kunit/configs/coverage_uml.config
new file mode 100644
index 000000000000..bacb77664fa8
--- /dev/null
+++ b/tools/testing/kunit/configs/coverage_uml.config
@@ -0,0 +1,11 @@
+# This config fragment enables coverage on UML, which is different from the
+# normal gcov used in other arches (no debugfs).
+# Example usage:
+# ./tools/testing/kunit/kunit.py run \
+# --kunitconfig=tools/testing/kunit/configs/all_tests_uml.config \
+# --kunitconfig=tools/testing/kunit/configs/coverage_uml.config
+
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+CONFIG_GCOV=y
diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py
index 13bd72e47da8..e132b0654029 100755
--- a/tools/testing/kunit/kunit.py
+++ b/tools/testing/kunit/kunit.py
@@ -10,6 +10,7 @@
import argparse
import os
import re
+import shlex
import sys
import time
@@ -22,6 +23,7 @@ from typing import Iterable, List, Optional, Sequence, Tuple
import kunit_json
import kunit_kernel
import kunit_parser
+from kunit_printer import stdout
class KunitStatus(Enum):
SUCCESS = auto()
@@ -72,7 +74,7 @@ def get_kernel_root_path() -> str:
def config_tests(linux: kunit_kernel.LinuxSourceTree,
request: KunitConfigRequest) -> KunitResult:
- kunit_parser.print_with_timestamp('Configuring KUnit Kernel ...')
+ stdout.print_with_timestamp('Configuring KUnit Kernel ...')
config_start = time.time()
success = linux.build_reconfig(request.build_dir, request.make_options)
@@ -85,7 +87,7 @@ def config_tests(linux: kunit_kernel.LinuxSourceTree,
def build_tests(linux: kunit_kernel.LinuxSourceTree,
request: KunitBuildRequest) -> KunitResult:
- kunit_parser.print_with_timestamp('Building KUnit Kernel ...')
+ stdout.print_with_timestamp('Building KUnit Kernel ...')
build_start = time.time()
success = linux.build_kernel(request.alltests,
@@ -158,7 +160,7 @@ def exec_tests(linux: kunit_kernel.LinuxSourceTree, request: KunitExecRequest) -
test_counts = kunit_parser.TestCounts()
exec_time = 0.0
for i, filter_glob in enumerate(filter_globs):
- kunit_parser.print_with_timestamp('Starting KUnit Kernel ({}/{})...'.format(i+1, len(filter_globs)))
+ stdout.print_with_timestamp('Starting KUnit Kernel ({}/{})...'.format(i+1, len(filter_globs)))
test_start = time.time()
run_result = linux.run_kernel(
@@ -221,7 +223,7 @@ def parse_tests(request: KunitParseRequest, metadata: kunit_json.Metadata, input
else:
with open(request.json, 'w') as f:
f.write(json_str)
- kunit_parser.print_with_timestamp("Test results stored in %s" %
+ stdout.print_with_timestamp("Test results stored in %s" %
os.path.abspath(request.json))
if test_result.status != kunit_parser.TestStatus.SUCCESS:
@@ -245,7 +247,7 @@ def run_tests(linux: kunit_kernel.LinuxSourceTree,
run_end = time.time()
- kunit_parser.print_with_timestamp((
+ stdout.print_with_timestamp((
'Elapsed time: %.3fs total, %.3fs configuring, %.3fs ' +
'building, %.3fs running\n') % (
run_end - run_start,
@@ -291,8 +293,9 @@ def add_common_opts(parser) -> None:
parser.add_argument('--kunitconfig',
help='Path to Kconfig fragment that enables KUnit tests.'
' If given a directory, (e.g. lib/kunit), "/.kunitconfig" '
- 'will get automatically appended.',
- metavar='PATH')
+ 'will get automatically appended. If repeated, the files '
+ 'blindly concatenated, which might not work in all cases.',
+ action='append', metavar='PATHS')
parser.add_argument('--kconfig_add',
help='Additional Kconfig options to append to the '
'.kunitconfig, e.g. CONFIG_KASAN=y. Can be repeated.',
@@ -323,6 +326,10 @@ def add_common_opts(parser) -> None:
'a QemuArchParams object.'),
type=str, metavar='FILE')
+ parser.add_argument('--qemu_args',
+ help='Additional QEMU arguments, e.g. "-smp 8"',
+ action='append', metavar='')
+
def add_build_opts(parser) -> None:
parser.add_argument('--jobs',
help='As in the make command, "Specifies the number of '
@@ -365,7 +372,25 @@ def add_parse_opts(parser) -> None:
'filename is specified',
type=str, const='stdout', default=None, metavar='FILE')
-def main(argv, linux=None):
+
+def tree_from_args(cli_args: argparse.Namespace) -> kunit_kernel.LinuxSourceTree:
+ """Returns a LinuxSourceTree based on the user's arguments."""
+ # Allow users to specify multiple arguments in one string, e.g. '-smp 8'
+ qemu_args: List[str] = []
+ if cli_args.qemu_args:
+ for arg in cli_args.qemu_args:
+ qemu_args.extend(shlex.split(arg))
+
+ return kunit_kernel.LinuxSourceTree(cli_args.build_dir,
+ kunitconfig_paths=cli_args.kunitconfig,
+ kconfig_add=cli_args.kconfig_add,
+ arch=cli_args.arch,
+ cross_compile=cli_args.cross_compile,
+ qemu_config_path=cli_args.qemu_config,
+ extra_qemu_args=qemu_args)
+
+
+def main(argv):
parser = argparse.ArgumentParser(
description='Helps writing and running KUnit tests.')
subparser = parser.add_subparsers(dest='subcommand')
@@ -412,14 +437,7 @@ def main(argv, linux=None):
if not os.path.exists(cli_args.build_dir):
os.mkdir(cli_args.build_dir)
- if not linux:
- linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir,
- kunitconfig_path=cli_args.kunitconfig,
- kconfig_add=cli_args.kconfig_add,
- arch=cli_args.arch,
- cross_compile=cli_args.cross_compile,
- qemu_config_path=cli_args.qemu_config)
-
+ linux = tree_from_args(cli_args)
request = KunitRequest(build_dir=cli_args.build_dir,
make_options=cli_args.make_options,
jobs=cli_args.jobs,
@@ -438,50 +456,29 @@ def main(argv, linux=None):
not os.path.exists(cli_args.build_dir)):
os.mkdir(cli_args.build_dir)
- if not linux:
- linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir,
- kunitconfig_path=cli_args.kunitconfig,
- kconfig_add=cli_args.kconfig_add,
- arch=cli_args.arch,
- cross_compile=cli_args.cross_compile,
- qemu_config_path=cli_args.qemu_config)
-
+ linux = tree_from_args(cli_args)
request = KunitConfigRequest(build_dir=cli_args.build_dir,
make_options=cli_args.make_options)
result = config_tests(linux, request)
- kunit_parser.print_with_timestamp((
+ stdout.print_with_timestamp((
'Elapsed time: %.3fs\n') % (
result.elapsed_time))
if result.status != KunitStatus.SUCCESS:
sys.exit(1)
elif cli_args.subcommand == 'build':
- if not linux:
- linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir,
- kunitconfig_path=cli_args.kunitconfig,
- kconfig_add=cli_args.kconfig_add,
- arch=cli_args.arch,
- cross_compile=cli_args.cross_compile,
- qemu_config_path=cli_args.qemu_config)
-
+ linux = tree_from_args(cli_args)
request = KunitBuildRequest(build_dir=cli_args.build_dir,
make_options=cli_args.make_options,
jobs=cli_args.jobs,
alltests=cli_args.alltests)
result = config_and_build_tests(linux, request)
- kunit_parser.print_with_timestamp((
+ stdout.print_with_timestamp((
'Elapsed time: %.3fs\n') % (
result.elapsed_time))
if result.status != KunitStatus.SUCCESS:
sys.exit(1)
elif cli_args.subcommand == 'exec':
- if not linux:
- linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir,
- kunitconfig_path=cli_args.kunitconfig,
- kconfig_add=cli_args.kconfig_add,
- arch=cli_args.arch,
- cross_compile=cli_args.cross_compile,
- qemu_config_path=cli_args.qemu_config)
-
+ linux = tree_from_args(cli_args)
exec_request = KunitExecRequest(raw_output=cli_args.raw_output,
build_dir=cli_args.build_dir,
json=cli_args.json,
@@ -491,7 +488,7 @@ def main(argv, linux=None):
kernel_args=cli_args.kernel_args,
run_isolated=cli_args.run_isolated)
result = exec_tests(linux, exec_request)
- kunit_parser.print_with_timestamp((
+ stdout.print_with_timestamp((
'Elapsed time: %.3fs\n') % (result.elapsed_time))
if result.status != KunitStatus.SUCCESS:
sys.exit(1)
diff --git a/tools/testing/kunit/kunit_config.py b/tools/testing/kunit/kunit_config.py
index 75a8dc1683d4..48b5f34b2e5d 100644
--- a/tools/testing/kunit/kunit_config.py
+++ b/tools/testing/kunit/kunit_config.py
@@ -8,7 +8,7 @@
from dataclasses import dataclass
import re
-from typing import List, Set
+from typing import Dict, Iterable, List, Set, Tuple
CONFIG_IS_NOT_SET_PATTERN = r'^# CONFIG_(\w+) is not set$'
CONFIG_PATTERN = r'^CONFIG_(\w+)=(\S+|".*")$'
@@ -32,35 +32,51 @@ class Kconfig:
"""Represents defconfig or .config specified using the Kconfig language."""
def __init__(self) -> None:
- self._entries = [] # type: List[KconfigEntry]
+ self._entries = {} # type: Dict[str, str]
- def entries(self) -> Set[KconfigEntry]:
- return set(self._entries)
+ def __eq__(self, other) -> bool:
+ if not isinstance(other, self.__class__):
+ return False
+ return self._entries == other._entries
- def add_entry(self, entry: KconfigEntry) -> None:
- self._entries.append(entry)
+ def __repr__(self) -> str:
+ return ','.join(str(e) for e in self.as_entries())
+
+ def as_entries(self) -> Iterable[KconfigEntry]:
+ for name, value in self._entries.items():
+ yield KconfigEntry(name, value)
+
+ def add_entry(self, name: str, value: str) -> None:
+ self._entries[name] = value
def is_subset_of(self, other: 'Kconfig') -> bool:
- other_dict = {e.name: e.value for e in other.entries()}
- for a in self.entries():
- b = other_dict.get(a.name)
+ for name, value in self._entries.items():
+ b = other._entries.get(name)
if b is None:
- if a.value == 'n':
+ if value == 'n':
continue
return False
- if a.value != b:
+ if value != b:
return False
return True
+ def conflicting_options(self, other: 'Kconfig') -> List[Tuple[KconfigEntry, KconfigEntry]]:
+ diff = [] # type: List[Tuple[KconfigEntry, KconfigEntry]]
+ for name, value in self._entries.items():
+ b = other._entries.get(name)
+ if b and value != b:
+ pair = (KconfigEntry(name, value), KconfigEntry(name, b))
+ diff.append(pair)
+ return diff
+
def merge_in_entries(self, other: 'Kconfig') -> None:
- if other.is_subset_of(self):
- return
- self._entries = list(self.entries().union(other.entries()))
+ for name, value in other._entries.items():
+ self._entries[name] = value
def write_to_file(self, path: str) -> None:
with open(path, 'a+') as f:
- for entry in self.entries():
- f.write(str(entry) + '\n')
+ for e in self.as_entries():
+ f.write(str(e) + '\n')
def parse_file(path: str) -> Kconfig:
with open(path, 'r') as f:
@@ -78,14 +94,12 @@ def parse_from_string(blob: str) -> Kconfig:
match = config_matcher.match(line)
if match:
- entry = KconfigEntry(match.group(1), match.group(2))
- kconfig.add_entry(entry)
+ kconfig.add_entry(match.group(1), match.group(2))
continue
empty_match = is_not_set_matcher.match(line)
if empty_match:
- entry = KconfigEntry(empty_match.group(1), 'n')
- kconfig.add_entry(entry)
+ kconfig.add_entry(empty_match.group(1), 'n')
continue
if line[0] == '#':
diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py
index 3539efaf99ba..f5c26ea89714 100644
--- a/tools/testing/kunit/kunit_kernel.py
+++ b/tools/testing/kunit/kunit_kernel.py
@@ -18,7 +18,7 @@ import threading
from typing import Iterator, List, Optional, Tuple
import kunit_config
-import kunit_parser
+from kunit_printer import stdout
import qemu_config
KCONFIG_PATH = '.config'
@@ -26,6 +26,7 @@ KUNITCONFIG_PATH = '.kunitconfig'
OLD_KUNITCONFIG_PATH = 'last_used_kunitconfig'
DEFAULT_KUNITCONFIG_PATH = 'tools/testing/kunit/configs/default.config'
BROKEN_ALLCONFIG_PATH = 'tools/testing/kunit/configs/broken_on_uml.config'
+UML_KCONFIG_PATH = 'tools/testing/kunit/configs/arch_uml.config'
OUTFILE_PATH = 'test.log'
ABS_TOOL_PATH = os.path.abspath(os.path.dirname(__file__))
QEMU_CONFIGS_DIR = os.path.join(ABS_TOOL_PATH, 'qemu_configs')
@@ -53,8 +54,8 @@ class LinuxSourceTreeOperations:
except subprocess.CalledProcessError as e:
raise ConfigError(e.output.decode())
- def make_arch_qemuconfig(self, base_kunitconfig: kunit_config.Kconfig) -> None:
- pass
+ def make_arch_config(self, base_kunitconfig: kunit_config.Kconfig) -> kunit_config.Kconfig:
+ return base_kunitconfig
def make_allyesconfig(self, build_dir: str, make_options) -> None:
raise ConfigError('Only the "um" arch is supported for alltests')
@@ -109,9 +110,10 @@ class LinuxSourceTreeOperationsQemu(LinuxSourceTreeOperations):
self._kernel_command_line = qemu_arch_params.kernel_command_line + ' kunit_shutdown=reboot'
self._extra_qemu_params = qemu_arch_params.extra_qemu_params
- def make_arch_qemuconfig(self, base_kunitconfig: kunit_config.Kconfig) -> None:
+ def make_arch_config(self, base_kunitconfig: kunit_config.Kconfig) -> kunit_config.Kconfig:
kconfig = kunit_config.parse_from_string(self._kconfig)
- base_kunitconfig.merge_in_entries(kconfig)
+ kconfig.merge_in_entries(base_kunitconfig)
+ return kconfig
def start(self, params: List[str], build_dir: str) -> subprocess.Popen:
kernel_path = os.path.join(build_dir, self._kernel_path)
@@ -137,8 +139,13 @@ class LinuxSourceTreeOperationsUml(LinuxSourceTreeOperations):
def __init__(self, cross_compile=None):
super().__init__(linux_arch='um', cross_compile=cross_compile)
+ def make_arch_config(self, base_kunitconfig: kunit_config.Kconfig) -> kunit_config.Kconfig:
+ kconfig = kunit_config.parse_file(UML_KCONFIG_PATH)
+ kconfig.merge_in_entries(base_kunitconfig)
+ return kconfig
+
def make_allyesconfig(self, build_dir: str, make_options) -> None:
- kunit_parser.print_with_timestamp(
+ stdout.print_with_timestamp(
'Enabling all CONFIGs for UML...')
command = ['make', 'ARCH=um', 'O=' + build_dir, 'allyesconfig']
if make_options:
@@ -148,18 +155,19 @@ class LinuxSourceTreeOperationsUml(LinuxSourceTreeOperations):
stdout=subprocess.DEVNULL,
stderr=subprocess.STDOUT)
process.wait()
- kunit_parser.print_with_timestamp(
+ stdout.print_with_timestamp(
'Disabling broken configs to run KUnit tests...')
with open(get_kconfig_path(build_dir), 'a') as config:
with open(BROKEN_ALLCONFIG_PATH, 'r') as disable:
config.write(disable.read())
- kunit_parser.print_with_timestamp(
+ stdout.print_with_timestamp(
'Starting Kernel with all configs takes a few minutes...')
def start(self, params: List[str], build_dir: str) -> subprocess.Popen:
"""Runs the Linux UML binary. Must be named 'linux'."""
linux_bin = os.path.join(build_dir, 'linux')
+ params.extend(['mem=1G', 'console=tty', 'kunit_shutdown=halt'])
return subprocess.Popen([linux_bin] + params,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
@@ -175,22 +183,44 @@ def get_kunitconfig_path(build_dir: str) -> str:
def get_old_kunitconfig_path(build_dir: str) -> str:
return os.path.join(build_dir, OLD_KUNITCONFIG_PATH)
+def get_parsed_kunitconfig(build_dir: str,
+ kunitconfig_paths: Optional[List[str]]=None) -> kunit_config.Kconfig:
+ if not kunitconfig_paths:
+ path = get_kunitconfig_path(build_dir)
+ if not os.path.exists(path):
+ shutil.copyfile(DEFAULT_KUNITCONFIG_PATH, path)
+ return kunit_config.parse_file(path)
+
+ merged = kunit_config.Kconfig()
+
+ for path in kunitconfig_paths:
+ if os.path.isdir(path):
+ path = os.path.join(path, KUNITCONFIG_PATH)
+ if not os.path.exists(path):
+ raise ConfigError(f'Specified kunitconfig ({path}) does not exist')
+
+ partial = kunit_config.parse_file(path)
+ diff = merged.conflicting_options(partial)
+ if diff:
+ diff_str = '\n\n'.join(f'{a}\n vs from {path}\n{b}' for a, b in diff)
+ raise ConfigError(f'Multiple values specified for {len(diff)} options in kunitconfig:\n{diff_str}')
+ merged.merge_in_entries(partial)
+ return merged
+
def get_outfile_path(build_dir: str) -> str:
return os.path.join(build_dir, OUTFILE_PATH)
-def get_source_tree_ops(arch: str, cross_compile: Optional[str]) -> LinuxSourceTreeOperations:
+def _default_qemu_config_path(arch: str) -> str:
config_path = os.path.join(QEMU_CONFIGS_DIR, arch + '.py')
- if arch == 'um':
- return LinuxSourceTreeOperationsUml(cross_compile=cross_compile)
if os.path.isfile(config_path):
- return get_source_tree_ops_from_qemu_config(config_path, cross_compile)[1]
+ return config_path
options = [f[:-3] for f in os.listdir(QEMU_CONFIGS_DIR) if f.endswith('.py')]
raise ConfigError(arch + ' is not a valid arch, options are ' + str(sorted(options)))
-def get_source_tree_ops_from_qemu_config(config_path: str,
- cross_compile: Optional[str]) -> Tuple[
- str, LinuxSourceTreeOperations]:
+def _get_qemu_ops(config_path: str,
+ extra_qemu_args: Optional[List[str]],
+ cross_compile: Optional[str]) -> Tuple[str, LinuxSourceTreeOperations]:
# The module name/path has very little to do with where the actual file
# exists (I learned this through experimentation and could not find it
# anywhere in the Python documentation).
@@ -210,6 +240,8 @@ def get_source_tree_ops_from_qemu_config(config_path: str,
if not hasattr(config, 'QEMU_ARCH'):
raise ValueError('qemu_config module missing "QEMU_ARCH": ' + config_path)
params: qemu_config.QemuArchParams = config.QEMU_ARCH # type: ignore
+ if extra_qemu_args:
+ params.extra_qemu_params.extend(extra_qemu_args)
return params.linux_arch, LinuxSourceTreeOperationsQemu(
params, cross_compile=cross_compile)
@@ -219,34 +251,24 @@ class LinuxSourceTree:
def __init__(
self,
build_dir: str,
- load_config=True,
- kunitconfig_path='',
+ kunitconfig_paths: Optional[List[str]]=None,
kconfig_add: Optional[List[str]]=None,
arch=None,
cross_compile=None,
- qemu_config_path=None) -> None:
+ qemu_config_path=None,
+ extra_qemu_args=None) -> None:
signal.signal(signal.SIGINT, self.signal_handler)
if qemu_config_path:
- self._arch, self._ops = get_source_tree_ops_from_qemu_config(
- qemu_config_path, cross_compile)
+ self._arch, self._ops = _get_qemu_ops(qemu_config_path, extra_qemu_args, cross_compile)
else:
self._arch = 'um' if arch is None else arch
- self._ops = get_source_tree_ops(self._arch, cross_compile)
-
- if not load_config:
- return
+ if self._arch == 'um':
+ self._ops = LinuxSourceTreeOperationsUml(cross_compile=cross_compile)
+ else:
+ qemu_config_path = _default_qemu_config_path(self._arch)
+ _, self._ops = _get_qemu_ops(qemu_config_path, extra_qemu_args, cross_compile)
- if kunitconfig_path:
- if os.path.isdir(kunitconfig_path):
- kunitconfig_path = os.path.join(kunitconfig_path, KUNITCONFIG_PATH)
- if not os.path.exists(kunitconfig_path):
- raise ConfigError(f'Specified kunitconfig ({kunitconfig_path}) does not exist')
- else:
- kunitconfig_path = get_kunitconfig_path(build_dir)
- if not os.path.exists(kunitconfig_path):
- shutil.copyfile(DEFAULT_KUNITCONFIG_PATH, kunitconfig_path)
-
- self._kconfig = kunit_config.parse_file(kunitconfig_path)
+ self._kconfig = get_parsed_kunitconfig(build_dir, kunitconfig_paths)
if kconfig_add:
kconfig = kunit_config.parse_from_string('\n'.join(kconfig_add))
self._kconfig.merge_in_entries(kconfig)
@@ -267,10 +289,10 @@ class LinuxSourceTree:
validated_kconfig = kunit_config.parse_file(kconfig_path)
if self._kconfig.is_subset_of(validated_kconfig):
return True
- invalid = self._kconfig.entries() - validated_kconfig.entries()
+ missing = set(self._kconfig.as_entries()) - set(validated_kconfig.as_entries())
message = 'Not all Kconfig options selected in kunitconfig were in the generated .config.\n' \
'This is probably due to unsatisfied dependencies.\n' \
- 'Missing: ' + ', '.join([str(e) for e in invalid])
+ 'Missing: ' + ', '.join(str(e) for e in missing)
if self._arch == 'um':
message += '\nNote: many Kconfig options aren\'t available on UML. You can try running ' \
'on a different architecture with something like "--arch=x86_64".'
@@ -282,7 +304,7 @@ class LinuxSourceTree:
if build_dir and not os.path.exists(build_dir):
os.mkdir(build_dir)
try:
- self._ops.make_arch_qemuconfig(self._kconfig)
+ self._kconfig = self._ops.make_arch_config(self._kconfig)
self._kconfig.write_to_file(kconfig_path)
self._ops.make_olddefconfig(build_dir, make_options)
except ConfigError as e:
@@ -303,7 +325,7 @@ class LinuxSourceTree:
return True
old_kconfig = kunit_config.parse_file(old_path)
- return old_kconfig.entries() != self._kconfig.entries()
+ return old_kconfig != self._kconfig
def build_reconfig(self, build_dir: str, make_options) -> bool:
"""Creates a new .config if it is not a subset of the .kunitconfig."""
@@ -313,7 +335,8 @@ class LinuxSourceTree:
return self.build_config(build_dir, make_options)
existing_kconfig = kunit_config.parse_file(kconfig_path)
- self._ops.make_arch_qemuconfig(self._kconfig)
+ self._kconfig = self._ops.make_arch_config(self._kconfig)
+
if self._kconfig.is_subset_of(existing_kconfig) and not self._kunitconfig_changed(build_dir):
return True
print('Regenerating .config ...')
@@ -334,7 +357,6 @@ class LinuxSourceTree:
def run_kernel(self, args=None, build_dir='', filter_glob='', timeout=None) -> Iterator[str]:
if not args:
args = []
- args.extend(['mem=1G', 'console=tty', 'kunit_shutdown=halt'])
if filter_glob:
args.append('kunit.filter_glob='+filter_glob)
diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py
index c5569b367c69..12d3ec77f427 100644
--- a/tools/testing/kunit/kunit_parser.py
+++ b/tools/testing/kunit/kunit_parser.py
@@ -13,10 +13,11 @@ from __future__ import annotations
import re
import sys
-import datetime
from enum import Enum, auto
from typing import Iterable, Iterator, List, Optional, Tuple
+from kunit_printer import stdout
+
class Test:
"""
A class to represent a test parsed from KTAP results. All KTAP
@@ -55,7 +56,7 @@ class Test:
def add_error(self, error_message: str) -> None:
"""Records an error that occurred while parsing this test."""
self.counts.errors += 1
- print_with_timestamp(red('[ERROR]') + f' Test: {self.name}: {error_message}')
+ stdout.print_with_timestamp(stdout.red('[ERROR]') + f' Test: {self.name}: {error_message}')
class TestStatus(Enum):
"""An enumeration class to represent the status of a test."""
@@ -461,32 +462,6 @@ def parse_diagnostic(lines: LineStream) -> List[str]:
DIVIDER = '=' * 60
-RESET = '\033[0;0m'
-
-def red(text: str) -> str:
- """Returns inputted string with red color code."""
- if not sys.stdout.isatty():
- return text
- return '\033[1;31m' + text + RESET
-
-def yellow(text: str) -> str:
- """Returns inputted string with yellow color code."""
- if not sys.stdout.isatty():
- return text
- return '\033[1;33m' + text + RESET
-
-def green(text: str) -> str:
- """Returns inputted string with green color code."""
- if not sys.stdout.isatty():
- return text
- return '\033[1;32m' + text + RESET
-
-ANSI_LEN = len(red(''))
-
-def print_with_timestamp(message: str) -> None:
- """Prints message with timestamp at beginning."""
- print('[%s] %s' % (datetime.datetime.now().strftime('%H:%M:%S'), message))
-
def format_test_divider(message: str, len_message: int) -> str:
"""
Returns string with message centered in fixed width divider.
@@ -529,12 +504,12 @@ def print_test_header(test: Test) -> None:
message += ' (1 subtest)'
else:
message += f' ({test.expected_count} subtests)'
- print_with_timestamp(format_test_divider(message, len(message)))
+ stdout.print_with_timestamp(format_test_divider(message, len(message)))
def print_log(log: Iterable[str]) -> None:
"""Prints all strings in saved log for test in yellow."""
for m in log:
- print_with_timestamp(yellow(m))
+ stdout.print_with_timestamp(stdout.yellow(m))
def format_test_result(test: Test) -> str:
"""
@@ -551,16 +526,16 @@ def format_test_result(test: Test) -> str:
String containing formatted test result
"""
if test.status == TestStatus.SUCCESS:
- return green('[PASSED] ') + test.name
+ return stdout.green('[PASSED] ') + test.name
if test.status == TestStatus.SKIPPED:
- return yellow('[SKIPPED] ') + test.name
+ return stdout.yellow('[SKIPPED] ') + test.name
if test.status == TestStatus.NO_TESTS:
- return yellow('[NO TESTS RUN] ') + test.name
+ return stdout.yellow('[NO TESTS RUN] ') + test.name
if test.status == TestStatus.TEST_CRASHED:
print_log(test.log)
- return red('[CRASHED] ') + test.name
+ return stdout.red('[CRASHED] ') + test.name
print_log(test.log)
- return red('[FAILED] ') + test.name
+ return stdout.red('[FAILED] ') + test.name
def print_test_result(test: Test) -> None:
"""
@@ -572,7 +547,7 @@ def print_test_result(test: Test) -> None:
Parameters:
test - Test object representing current test being printed
"""
- print_with_timestamp(format_test_result(test))
+ stdout.print_with_timestamp(format_test_result(test))
def print_test_footer(test: Test) -> None:
"""
@@ -585,8 +560,8 @@ def print_test_footer(test: Test) -> None:
test - Test object representing current test being printed
"""
message = format_test_result(test)
- print_with_timestamp(format_test_divider(message,
- len(message) - ANSI_LEN))
+ stdout.print_with_timestamp(format_test_divider(message,
+ len(message) - stdout.color_len()))
def print_summary_line(test: Test) -> None:
"""
@@ -603,12 +578,12 @@ def print_summary_line(test: Test) -> None:
test - Test object representing current test being printed
"""
if test.status == TestStatus.SUCCESS:
- color = green
+ color = stdout.green
elif test.status in (TestStatus.SKIPPED, TestStatus.NO_TESTS):
- color = yellow
+ color = stdout.yellow
else:
- color = red
- print_with_timestamp(color(f'Testing complete. {test.counts}'))
+ color = stdout.red
+ stdout.print_with_timestamp(color(f'Testing complete. {test.counts}'))
# Other methods:
@@ -762,7 +737,7 @@ def parse_run_tests(kernel_output: Iterable[str]) -> Test:
Return:
Test - the main test object with all subtests.
"""
- print_with_timestamp(DIVIDER)
+ stdout.print_with_timestamp(DIVIDER)
lines = extract_tap_lines(kernel_output)
test = Test()
if not lines:
@@ -773,6 +748,6 @@ def parse_run_tests(kernel_output: Iterable[str]) -> Test:
test = parse_test(lines, 0, [])
if test.status != TestStatus.NO_TESTS:
test.status = test.counts.get_status()
- print_with_timestamp(DIVIDER)
+ stdout.print_with_timestamp(DIVIDER)
print_summary_line(test)
return test
diff --git a/tools/testing/kunit/kunit_printer.py b/tools/testing/kunit/kunit_printer.py
new file mode 100644
index 000000000000..5f1cc55ecdf5
--- /dev/null
+++ b/tools/testing/kunit/kunit_printer.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+#
+# Utilities for printing and coloring output.
+#
+# Copyright (C) 2022, Google LLC.
+# Author: Daniel Latypov <dlatypov@google.com>
+
+import datetime
+import sys
+import typing
+
+_RESET = '\033[0;0m'
+
+class Printer:
+ """Wraps a file object, providing utilities for coloring output, etc."""
+
+ def __init__(self, output: typing.IO):
+ self._output = output
+ self._use_color = output.isatty()
+
+ def print(self, message: str) -> None:
+ print(message, file=self._output)
+
+ def print_with_timestamp(self, message: str) -> None:
+ ts = datetime.datetime.now().strftime('%H:%M:%S')
+ self.print(f'[{ts}] {message}')
+
+ def _color(self, code: str, text: str) -> str:
+ if not self._use_color:
+ return text
+ return code + text + _RESET
+
+ def red(self, text: str) -> str:
+ return self._color('\033[1;31m', text)
+
+ def yellow(self, text: str) -> str:
+ return self._color('\033[1;33m', text)
+
+ def green(self, text: str) -> str:
+ return self._color('\033[1;32m', text)
+
+ def color_len(self) -> int:
+ """Returns the length of the color escape codes."""
+ return len(self.red(''))
+
+# Provides a default instance that prints to stdout
+stdout = Printer(sys.stdout)
diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py
index 25a2eb3bf114..446ac432d9a4 100755
--- a/tools/testing/kunit/kunit_tool_test.py
+++ b/tools/testing/kunit/kunit_tool_test.py
@@ -45,7 +45,7 @@ class KconfigTest(unittest.TestCase):
self.assertTrue(kconfig0.is_subset_of(kconfig0))
kconfig1 = kunit_config.Kconfig()
- kconfig1.add_entry(kunit_config.KconfigEntry('TEST', 'y'))
+ kconfig1.add_entry('TEST', 'y')
self.assertTrue(kconfig1.is_subset_of(kconfig1))
self.assertTrue(kconfig0.is_subset_of(kconfig1))
self.assertFalse(kconfig1.is_subset_of(kconfig0))
@@ -56,40 +56,28 @@ class KconfigTest(unittest.TestCase):
kconfig = kunit_config.parse_file(kconfig_path)
expected_kconfig = kunit_config.Kconfig()
- expected_kconfig.add_entry(
- kunit_config.KconfigEntry('UML', 'y'))
- expected_kconfig.add_entry(
- kunit_config.KconfigEntry('MMU', 'y'))
- expected_kconfig.add_entry(
- kunit_config.KconfigEntry('TEST', 'y'))
- expected_kconfig.add_entry(
- kunit_config.KconfigEntry('EXAMPLE_TEST', 'y'))
- expected_kconfig.add_entry(
- kunit_config.KconfigEntry('MK8', 'n'))
-
- self.assertEqual(kconfig.entries(), expected_kconfig.entries())
+ expected_kconfig.add_entry('UML', 'y')
+ expected_kconfig.add_entry('MMU', 'y')
+ expected_kconfig.add_entry('TEST', 'y')
+ expected_kconfig.add_entry('EXAMPLE_TEST', 'y')
+ expected_kconfig.add_entry('MK8', 'n')
+
+ self.assertEqual(kconfig, expected_kconfig)
def test_write_to_file(self):
kconfig_path = os.path.join(test_tmpdir, '.config')
expected_kconfig = kunit_config.Kconfig()
- expected_kconfig.add_entry(
- kunit_config.KconfigEntry('UML', 'y'))
- expected_kconfig.add_entry(
- kunit_config.KconfigEntry('MMU', 'y'))
- expected_kconfig.add_entry(
- kunit_config.KconfigEntry('TEST', 'y'))
- expected_kconfig.add_entry(
- kunit_config.KconfigEntry('EXAMPLE_TEST', 'y'))
- expected_kconfig.add_entry(
- kunit_config.KconfigEntry('MK8', 'n'))
+ expected_kconfig.add_entry('UML', 'y')
+ expected_kconfig.add_entry('MMU', 'y')
+ expected_kconfig.add_entry('TEST', 'y')
+ expected_kconfig.add_entry('EXAMPLE_TEST', 'y')
+ expected_kconfig.add_entry('MK8', 'n')
expected_kconfig.write_to_file(kconfig_path)
actual_kconfig = kunit_config.parse_file(kconfig_path)
-
- self.assertEqual(actual_kconfig.entries(),
- expected_kconfig.entries())
+ self.assertEqual(actual_kconfig, expected_kconfig)
class KUnitParserTest(unittest.TestCase):
@@ -222,7 +210,7 @@ class KUnitParserTest(unittest.TestCase):
def test_no_kunit_output(self):
crash_log = test_data_path('test_insufficient_memory.log')
- print_mock = mock.patch('builtins.print').start()
+ print_mock = mock.patch('kunit_printer.Printer.print').start()
with open(crash_log) as file:
result = kunit_parser.parse_run_tests(
kunit_parser.extract_tap_lines(file.readlines()))
@@ -368,21 +356,53 @@ class LinuxSourceTreeTest(unittest.TestCase):
def test_invalid_kunitconfig(self):
with self.assertRaisesRegex(kunit_kernel.ConfigError, 'nonexistent.* does not exist'):
- kunit_kernel.LinuxSourceTree('', kunitconfig_path='/nonexistent_file')
+ kunit_kernel.LinuxSourceTree('', kunitconfig_paths=['/nonexistent_file'])
def test_valid_kunitconfig(self):
with tempfile.NamedTemporaryFile('wt') as kunitconfig:
- kunit_kernel.LinuxSourceTree('', kunitconfig_path=kunitconfig.name)
+ kunit_kernel.LinuxSourceTree('', kunitconfig_paths=[kunitconfig.name])
def test_dir_kunitconfig(self):
with tempfile.TemporaryDirectory('') as dir:
with open(os.path.join(dir, '.kunitconfig'), 'w'):
pass
- kunit_kernel.LinuxSourceTree('', kunitconfig_path=dir)
+ kunit_kernel.LinuxSourceTree('', kunitconfig_paths=[dir])
+
+ def test_multiple_kunitconfig(self):
+ want_kconfig = kunit_config.Kconfig()
+ want_kconfig.add_entry('KUNIT', 'y')
+ want_kconfig.add_entry('KUNIT_TEST', 'm')
+
+ with tempfile.TemporaryDirectory('') as dir:
+ other = os.path.join(dir, 'otherkunitconfig')
+ with open(os.path.join(dir, '.kunitconfig'), 'w') as f:
+ f.write('CONFIG_KUNIT=y')
+ with open(other, 'w') as f:
+ f.write('CONFIG_KUNIT_TEST=m')
+ pass
+
+ tree = kunit_kernel.LinuxSourceTree('', kunitconfig_paths=[dir, other])
+ self.assertTrue(want_kconfig.is_subset_of(tree._kconfig), msg=tree._kconfig)
+
+
+ def test_multiple_kunitconfig_invalid(self):
+ with tempfile.TemporaryDirectory('') as dir:
+ other = os.path.join(dir, 'otherkunitconfig')
+ with open(os.path.join(dir, '.kunitconfig'), 'w') as f:
+ f.write('CONFIG_KUNIT=y')
+ with open(other, 'w') as f:
+ f.write('CONFIG_KUNIT=m')
+
+ with self.assertRaisesRegex(kunit_kernel.ConfigError, '(?s)Multiple values.*CONFIG_KUNIT'):
+ kunit_kernel.LinuxSourceTree('', kunitconfig_paths=[dir, other])
+
def test_kconfig_add(self):
+ want_kconfig = kunit_config.Kconfig()
+ want_kconfig.add_entry('NOT_REAL', 'y')
+
tree = kunit_kernel.LinuxSourceTree('', kconfig_add=['CONFIG_NOT_REAL=y'])
- self.assertIn(kunit_config.KconfigEntry('NOT_REAL', 'y'), tree._kconfig.entries())
+ self.assertTrue(want_kconfig.is_subset_of(tree._kconfig), msg=tree._kconfig)
def test_invalid_arch(self):
with self.assertRaisesRegex(kunit_kernel.ConfigError, 'not a valid arch, options are.*x86_64'):
@@ -393,7 +413,7 @@ class LinuxSourceTreeTest(unittest.TestCase):
return subprocess.Popen(['echo "hi\nbye"'], shell=True, text=True, stdout=subprocess.PIPE)
with tempfile.TemporaryDirectory('') as build_dir:
- tree = kunit_kernel.LinuxSourceTree(build_dir, load_config=False)
+ tree = kunit_kernel.LinuxSourceTree(build_dir)
mock.patch.object(tree._ops, 'start', side_effect=fake_start).start()
with self.assertRaises(ValueError):
@@ -410,6 +430,10 @@ class LinuxSourceTreeTest(unittest.TestCase):
f.write('CONFIG_KUNIT=y')
tree = kunit_kernel.LinuxSourceTree(build_dir)
+ # Stub out the source tree operations, so we don't have
+ # the defaults for any given architecture get in the
+ # way.
+ tree._ops = kunit_kernel.LinuxSourceTreeOperations('none', None)
mock_build_config = mock.patch.object(tree, 'build_config').start()
# Should generate the .config
@@ -427,6 +451,10 @@ class LinuxSourceTreeTest(unittest.TestCase):
f.write('CONFIG_KUNIT=y\nCONFIG_KUNIT_TEST=y')
tree = kunit_kernel.LinuxSourceTree(build_dir)
+ # Stub out the source tree operations, so we don't have
+ # the defaults for any given architecture get in the
+ # way.
+ tree._ops = kunit_kernel.LinuxSourceTreeOperations('none', None)
mock_build_config = mock.patch.object(tree, 'build_config').start()
self.assertTrue(tree.build_reconfig(build_dir, make_options=[]))
@@ -443,6 +471,10 @@ class LinuxSourceTreeTest(unittest.TestCase):
f.write('CONFIG_KUNIT=y\nCONFIG_KUNIT_TEST=y')
tree = kunit_kernel.LinuxSourceTree(build_dir)
+ # Stub out the source tree operations, so we don't have
+ # the defaults for any given architecture get in the
+ # way.
+ tree._ops = kunit_kernel.LinuxSourceTreeOperations('none', None)
mock_build_config = mock.patch.object(tree, 'build_config').start()
# ... so we should trigger a call to build_config()
@@ -500,27 +532,28 @@ class KUnitMainTest(unittest.TestCase):
with open(path) as file:
all_passed_log = file.readlines()
- self.print_mock = mock.patch('builtins.print').start()
+ self.print_mock = mock.patch('kunit_printer.Printer.print').start()
self.addCleanup(mock.patch.stopall)
- self.linux_source_mock = mock.Mock()
- self.linux_source_mock.build_reconfig = mock.Mock(return_value=True)
- self.linux_source_mock.build_kernel = mock.Mock(return_value=True)
- self.linux_source_mock.run_kernel = mock.Mock(return_value=all_passed_log)
+ self.mock_linux_init = mock.patch.object(kunit_kernel, 'LinuxSourceTree').start()
+ self.linux_source_mock = self.mock_linux_init.return_value
+ self.linux_source_mock.build_reconfig.return_value = True
+ self.linux_source_mock.build_kernel.return_value = True
+ self.linux_source_mock.run_kernel.return_value = all_passed_log
def test_config_passes_args_pass(self):
- kunit.main(['config', '--build_dir=.kunit'], self.linux_source_mock)
+ kunit.main(['config', '--build_dir=.kunit'])
self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1)
self.assertEqual(self.linux_source_mock.run_kernel.call_count, 0)
def test_build_passes_args_pass(self):
- kunit.main(['build'], self.linux_source_mock)
+ kunit.main(['build'])
self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1)
self.linux_source_mock.build_kernel.assert_called_once_with(False, kunit.get_default_jobs(), '.kunit', None)
self.assertEqual(self.linux_source_mock.run_kernel.call_count, 0)
def test_exec_passes_args_pass(self):
- kunit.main(['exec'], self.linux_source_mock)
+ kunit.main(['exec'])
self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 0)
self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1)
self.linux_source_mock.run_kernel.assert_called_once_with(
@@ -528,7 +561,7 @@ class KUnitMainTest(unittest.TestCase):
self.print_mock.assert_any_call(StrContains('Testing complete.'))
def test_run_passes_args_pass(self):
- kunit.main(['run'], self.linux_source_mock)
+ kunit.main(['run'])
self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1)
self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1)
self.linux_source_mock.run_kernel.assert_called_once_with(
@@ -538,13 +571,13 @@ class KUnitMainTest(unittest.TestCase):
def test_exec_passes_args_fail(self):
self.linux_source_mock.run_kernel = mock.Mock(return_value=[])
with self.assertRaises(SystemExit) as e:
- kunit.main(['exec'], self.linux_source_mock)
+ kunit.main(['exec'])
self.assertEqual(e.exception.code, 1)
def test_run_passes_args_fail(self):
self.linux_source_mock.run_kernel = mock.Mock(return_value=[])
with self.assertRaises(SystemExit) as e:
- kunit.main(['run'], self.linux_source_mock)
+ kunit.main(['run'])
self.assertEqual(e.exception.code, 1)
self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1)
self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1)
@@ -553,7 +586,7 @@ class KUnitMainTest(unittest.TestCase):
def test_exec_no_tests(self):
self.linux_source_mock.run_kernel = mock.Mock(return_value=['TAP version 14', '1..0'])
with self.assertRaises(SystemExit) as e:
- kunit.main(['run'], self.linux_source_mock)
+ kunit.main(['run'])
self.assertEqual(e.exception.code, 1)
self.linux_source_mock.run_kernel.assert_called_once_with(
args=None, build_dir='.kunit', filter_glob='', timeout=300)
@@ -561,7 +594,7 @@ class KUnitMainTest(unittest.TestCase):
def test_exec_raw_output(self):
self.linux_source_mock.run_kernel = mock.Mock(return_value=[])
- kunit.main(['exec', '--raw_output'], self.linux_source_mock)
+ kunit.main(['exec', '--raw_output'])
self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1)
for call in self.print_mock.call_args_list:
self.assertNotEqual(call, mock.call(StrContains('Testing complete.')))
@@ -569,7 +602,7 @@ class KUnitMainTest(unittest.TestCase):
def test_run_raw_output(self):
self.linux_source_mock.run_kernel = mock.Mock(return_value=[])
- kunit.main(['run', '--raw_output'], self.linux_source_mock)
+ kunit.main(['run', '--raw_output'])
self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1)
self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1)
for call in self.print_mock.call_args_list:
@@ -578,7 +611,7 @@ class KUnitMainTest(unittest.TestCase):
def test_run_raw_output_kunit(self):
self.linux_source_mock.run_kernel = mock.Mock(return_value=[])
- kunit.main(['run', '--raw_output=kunit'], self.linux_source_mock)
+ kunit.main(['run', '--raw_output=kunit'])
self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1)
self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1)
for call in self.print_mock.call_args_list:
@@ -588,27 +621,27 @@ class KUnitMainTest(unittest.TestCase):
def test_run_raw_output_invalid(self):
self.linux_source_mock.run_kernel = mock.Mock(return_value=[])
with self.assertRaises(SystemExit) as e:
- kunit.main(['run', '--raw_output=invalid'], self.linux_source_mock)
+ kunit.main(['run', '--raw_output=invalid'])
self.assertNotEqual(e.exception.code, 0)
def test_run_raw_output_does_not_take_positional_args(self):
# --raw_output is a string flag, but we don't want it to consume
# any positional arguments, only ones after an '='
self.linux_source_mock.run_kernel = mock.Mock(return_value=[])
- kunit.main(['run', '--raw_output', 'filter_glob'], self.linux_source_mock)
+ kunit.main(['run', '--raw_output', 'filter_glob'])
self.linux_source_mock.run_kernel.assert_called_once_with(
args=None, build_dir='.kunit', filter_glob='filter_glob', timeout=300)
def test_exec_timeout(self):
timeout = 3453
- kunit.main(['exec', '--timeout', str(timeout)], self.linux_source_mock)
+ kunit.main(['exec', '--timeout', str(timeout)])
self.linux_source_mock.run_kernel.assert_called_once_with(
args=None, build_dir='.kunit', filter_glob='', timeout=timeout)
self.print_mock.assert_any_call(StrContains('Testing complete.'))
def test_run_timeout(self):
timeout = 3453
- kunit.main(['run', '--timeout', str(timeout)], self.linux_source_mock)
+ kunit.main(['run', '--timeout', str(timeout)])
self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1)
self.linux_source_mock.run_kernel.assert_called_once_with(
args=None, build_dir='.kunit', filter_glob='', timeout=timeout)
@@ -616,7 +649,7 @@ class KUnitMainTest(unittest.TestCase):
def test_run_builddir(self):
build_dir = '.kunit'
- kunit.main(['run', '--build_dir=.kunit'], self.linux_source_mock)
+ kunit.main(['run', '--build_dir=.kunit'])
self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1)
self.linux_source_mock.run_kernel.assert_called_once_with(
args=None, build_dir=build_dir, filter_glob='', timeout=300)
@@ -624,60 +657,81 @@ class KUnitMainTest(unittest.TestCase):
def test_config_builddir(self):
build_dir = '.kunit'
- kunit.main(['config', '--build_dir', build_dir], self.linux_source_mock)
+ kunit.main(['config', '--build_dir', build_dir])
self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1)
def test_build_builddir(self):
build_dir = '.kunit'
jobs = kunit.get_default_jobs()
- kunit.main(['build', '--build_dir', build_dir], self.linux_source_mock)
+ kunit.main(['build', '--build_dir', build_dir])
self.linux_source_mock.build_kernel.assert_called_once_with(False, jobs, build_dir, None)
def test_exec_builddir(self):
build_dir = '.kunit'
- kunit.main(['exec', '--build_dir', build_dir], self.linux_source_mock)
+ kunit.main(['exec', '--build_dir', build_dir])
self.linux_source_mock.run_kernel.assert_called_once_with(
args=None, build_dir=build_dir, filter_glob='', timeout=300)
self.print_mock.assert_any_call(StrContains('Testing complete.'))
- @mock.patch.object(kunit_kernel, 'LinuxSourceTree')
- def test_run_kunitconfig(self, mock_linux_init):
- mock_linux_init.return_value = self.linux_source_mock
+ def test_run_kunitconfig(self):
kunit.main(['run', '--kunitconfig=mykunitconfig'])
# Just verify that we parsed and initialized it correctly here.
- mock_linux_init.assert_called_once_with('.kunit',
- kunitconfig_path='mykunitconfig',
- kconfig_add=None,
- arch='um',
- cross_compile=None,
- qemu_config_path=None)
+ self.mock_linux_init.assert_called_once_with('.kunit',
+ kunitconfig_paths=['mykunitconfig'],
+ kconfig_add=None,
+ arch='um',
+ cross_compile=None,
+ qemu_config_path=None,
+ extra_qemu_args=[])
+
+ def test_config_kunitconfig(self):
+ kunit.main(['config', '--kunitconfig=mykunitconfig'])
+ # Just verify that we parsed and initialized it correctly here.
+ self.mock_linux_init.assert_called_once_with('.kunit',
+ kunitconfig_paths=['mykunitconfig'],
+ kconfig_add=None,
+ arch='um',
+ cross_compile=None,
+ qemu_config_path=None,
+ extra_qemu_args=[])
@mock.patch.object(kunit_kernel, 'LinuxSourceTree')
- def test_config_kunitconfig(self, mock_linux_init):
+ def test_run_multiple_kunitconfig(self, mock_linux_init):
mock_linux_init.return_value = self.linux_source_mock
- kunit.main(['config', '--kunitconfig=mykunitconfig'])
+ kunit.main(['run', '--kunitconfig=mykunitconfig', '--kunitconfig=other'])
# Just verify that we parsed and initialized it correctly here.
mock_linux_init.assert_called_once_with('.kunit',
- kunitconfig_path='mykunitconfig',
+ kunitconfig_paths=['mykunitconfig', 'other'],
kconfig_add=None,
arch='um',
cross_compile=None,
- qemu_config_path=None)
+ qemu_config_path=None,
+ extra_qemu_args=[])
- @mock.patch.object(kunit_kernel, 'LinuxSourceTree')
- def test_run_kconfig_add(self, mock_linux_init):
- mock_linux_init.return_value = self.linux_source_mock
+ def test_run_kconfig_add(self):
kunit.main(['run', '--kconfig_add=CONFIG_KASAN=y', '--kconfig_add=CONFIG_KCSAN=y'])
# Just verify that we parsed and initialized it correctly here.
- mock_linux_init.assert_called_once_with('.kunit',
- kunitconfig_path=None,
- kconfig_add=['CONFIG_KASAN=y', 'CONFIG_KCSAN=y'],
- arch='um',
- cross_compile=None,
- qemu_config_path=None)
+ self.mock_linux_init.assert_called_once_with('.kunit',
+ kunitconfig_paths=None,
+ kconfig_add=['CONFIG_KASAN=y', 'CONFIG_KCSAN=y'],
+ arch='um',
+ cross_compile=None,
+ qemu_config_path=None,
+ extra_qemu_args=[])
+
+ def test_run_qemu_args(self):
+ kunit.main(['run', '--arch=x86_64', '--qemu_args', '-m 2048'])
+ # Just verify that we parsed and initialized it correctly here.
+ self.mock_linux_init.assert_called_once_with('.kunit',
+ kunitconfig_paths=None,
+ kconfig_add=None,
+ arch='x86_64',
+ cross_compile=None,
+ qemu_config_path=None,
+ extra_qemu_args=['-m', '2048'])
def test_run_kernel_args(self):
- kunit.main(['run', '--kernel_args=a=1', '--kernel_args=b=2'], self.linux_source_mock)
+ kunit.main(['run', '--kernel_args=a=1', '--kernel_args=b=2'])
self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1)
self.linux_source_mock.run_kernel.assert_called_once_with(
args=['a=1','b=2'], build_dir='.kunit', filter_glob='', timeout=300)
@@ -699,7 +753,7 @@ class KUnitMainTest(unittest.TestCase):
@mock.patch.object(kunit, '_list_tests')
def test_run_isolated_by_suite(self, mock_tests):
mock_tests.return_value = ['suite.test1', 'suite.test2', 'suite2.test1']
- kunit.main(['exec', '--run_isolated=suite', 'suite*.test*'], self.linux_source_mock)
+ kunit.main(['exec', '--run_isolated=suite', 'suite*.test*'])
# Should respect the user's filter glob when listing tests.
mock_tests.assert_called_once_with(mock.ANY,
@@ -712,7 +766,7 @@ class KUnitMainTest(unittest.TestCase):
@mock.patch.object(kunit, '_list_tests')
def test_run_isolated_by_test(self, mock_tests):
mock_tests.return_value = ['suite.test1', 'suite.test2', 'suite2.test1']
- kunit.main(['exec', '--run_isolated=test', 'suite*'], self.linux_source_mock)
+ kunit.main(['exec', '--run_isolated=test', 'suite*'])
# Should respect the user's filter glob when listing tests.
mock_tests.assert_called_once_with(mock.ANY,
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index de11992dc577..5047d8eef53e 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -143,7 +143,6 @@ endif
# Prepare for headers install
include $(top_srcdir)/scripts/subarch.include
ARCH ?= $(SUBARCH)
-export KSFT_KHDR_INSTALL_DONE := 1
export BUILD
export KHDR_INCLUDES
@@ -151,30 +150,7 @@ export KHDR_INCLUDES
# all isn't the first target in the file.
.DEFAULT_GOAL := all
-# Install headers here once for all tests. KSFT_KHDR_INSTALL_DONE
-# is used to avoid running headers_install from lib.mk.
-# Invoke headers install with --no-builtin-rules to avoid circular
-# dependency in "make kselftest" case. In this case, second level
-# make inherits builtin-rules which will use the rule generate
-# Makefile.o and runs into
-# "Circular Makefile.o <- prepare dependency dropped."
-# and headers_install fails and test compile fails.
-#
-# O= KBUILD_OUTPUT cases don't run into this error, since main Makefile
-# invokes them as sub-makes and --no-builtin-rules is not necessary,
-# but doesn't cause any failures. Keep it simple and use the same
-# flags in both cases.
-# Local build cases: "make kselftest", "make -C" - headers are installed
-# in the default INSTALL_HDR_PATH usr/include.
-khdr:
-ifeq (1,$(DEFAULT_INSTALL_HDR_PATH))
- $(MAKE) --no-builtin-rules ARCH=$(ARCH) -C $(top_srcdir) headers_install
-else
- $(MAKE) --no-builtin-rules INSTALL_HDR_PATH=$(abs_objtree)/usr \
- ARCH=$(ARCH) -C $(top_srcdir) headers_install
-endif
-
-all: khdr
+all:
@ret=1; \
for TARGET in $(TARGETS); do \
BUILD_TARGET=$$BUILD/$$TARGET; \
@@ -253,7 +229,7 @@ ifdef INSTALL_PATH
for TARGET in $(TARGETS); do \
BUILD_TARGET=$$BUILD/$$TARGET; \
[ ! -d $(INSTALL_PATH)/$$TARGET ] && echo "Skipping non-existent dir: $$TARGET" && continue; \
- echo -n "Emit Tests for $$TARGET\n"; \
+ echo -ne "Emit Tests for $$TARGET\n"; \
$(MAKE) -s --no-print-directory OUTPUT=$$BUILD_TARGET COLLECTION=$$TARGET \
-C $$TARGET emit_tests >> $(TEST_LIST); \
done;
@@ -274,4 +250,4 @@ clean:
$(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\
done;
-.PHONY: khdr all run_tests hotplug run_hotplug clean_hotplug run_pstore_crash install clean gen_tar
+.PHONY: all run_tests hotplug run_hotplug clean_hotplug run_pstore_crash install clean gen_tar
diff --git a/tools/testing/selftests/arm64/mte/Makefile b/tools/testing/selftests/arm64/mte/Makefile
index 409e3e53d00a..a5a0744423d8 100644
--- a/tools/testing/selftests/arm64/mte/Makefile
+++ b/tools/testing/selftests/arm64/mte/Makefile
@@ -22,7 +22,6 @@ ifeq ($(mte_cc_support),1)
TEST_GEN_PROGS := $(PROGS)
# Get Kernel headers installed and use them.
-KSFT_KHDR_INSTALL := 1
else
$(warning compiler "$(CC)" does not support the ARMv8.5 MTE extension.)
$(warning test program "mte" will not be created.)
diff --git a/tools/testing/selftests/arm64/signal/Makefile b/tools/testing/selftests/arm64/signal/Makefile
index ac4ad0005715..be7520a863b0 100644
--- a/tools/testing/selftests/arm64/signal/Makefile
+++ b/tools/testing/selftests/arm64/signal/Makefile
@@ -11,7 +11,6 @@ PROGS := $(patsubst %.c,%,$(SRCS))
TEST_GEN_PROGS := $(notdir $(PROGS))
# Get Kernel headers installed and use them.
-KSFT_KHDR_INSTALL := 1
# Including KSFT lib.mk here will also mangle the TEST_GEN_PROGS list
# to account for any OUTPUT target-dirs optionally provided by
diff --git a/tools/testing/selftests/arm64/signal/test_signals.h b/tools/testing/selftests/arm64/signal/test_signals.h
index c70fdec7d7c4..0c645834ddc3 100644
--- a/tools/testing/selftests/arm64/signal/test_signals.h
+++ b/tools/testing/selftests/arm64/signal/test_signals.h
@@ -9,9 +9,7 @@
#include <ucontext.h>
/*
- * Using ARCH specific and sanitized Kernel headers installed by KSFT
- * framework since we asked for it by setting flag KSFT_KHDR_INSTALL
- * in our Makefile.
+ * Using ARCH specific and sanitized Kernel headers from the tree.
*/
#include <asm/ptrace.h>
#include <asm/hwcap.h>
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 595565eb68c0..3a8cb2404ea6 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -41,5 +41,6 @@ test_cpp
/bench
*.ko
*.tmp
-xdpxceiver
+xskxceiver
xdp_redirect_multi
+xdp_synproxy
diff --git a/tools/testing/selftests/bpf/DENYLIST b/tools/testing/selftests/bpf/DENYLIST
new file mode 100644
index 000000000000..939de574fc7f
--- /dev/null
+++ b/tools/testing/selftests/bpf/DENYLIST
@@ -0,0 +1,6 @@
+# TEMPORARY
+get_stack_raw_tp # spams with kernel warnings until next bpf -> bpf-next merge
+stacktrace_build_id_nmi
+stacktrace_build_id
+task_fd_query_rawtp
+varlen
diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
new file mode 100644
index 000000000000..e33cab34d22f
--- /dev/null
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -0,0 +1,67 @@
+# TEMPORARY
+atomics # attach(add): actual -524 <= expected 0 (trampoline)
+bpf_iter_setsockopt # JIT does not support calling kernel function (kfunc)
+bloom_filter_map # failed to find kernel BTF type ID of '__x64_sys_getpgid': -3 (?)
+bpf_tcp_ca # JIT does not support calling kernel function (kfunc)
+bpf_loop # attaches to __x64_sys_nanosleep
+bpf_mod_race # BPF trampoline
+bpf_nf # JIT does not support calling kernel function
+core_read_macros # unknown func bpf_probe_read#4 (overlapping)
+d_path # failed to auto-attach program 'prog_stat': -524 (trampoline)
+dummy_st_ops # test_run unexpected error: -524 (errno 524) (trampoline)
+fentry_fexit # fentry attach failed: -524 (trampoline)
+fentry_test # fentry_first_attach unexpected error: -524 (trampoline)
+fexit_bpf2bpf # freplace_attach_trace unexpected error: -524 (trampoline)
+fexit_sleep # fexit_skel_load fexit skeleton failed (trampoline)
+fexit_stress # fexit attach failed prog 0 failed: -524 (trampoline)
+fexit_test # fexit_first_attach unexpected error: -524 (trampoline)
+get_func_args_test # trampoline
+get_func_ip_test # get_func_ip_test__attach unexpected error: -524 (trampoline)
+get_stack_raw_tp # user_stack corrupted user stack (no backchain userspace)
+kfree_skb # attach fentry unexpected error: -524 (trampoline)
+kfunc_call # 'bpf_prog_active': not found in kernel BTF (?)
+ksyms_module # test_ksyms_module__open_and_load unexpected error: -9 (?)
+ksyms_module_libbpf # JIT does not support calling kernel function (kfunc)
+ksyms_module_lskel # test_ksyms_module_lskel__open_and_load unexpected error: -9 (?)
+modify_return # modify_return attach failed: -524 (trampoline)
+module_attach # skel_attach skeleton attach failed: -524 (trampoline)
+mptcp
+kprobe_multi_test # relies on fentry
+netcnt # failed to load BPF skeleton 'netcnt_prog': -7 (?)
+probe_user # check_kprobe_res wrong kprobe res from probe read (?)
+recursion # skel_attach unexpected error: -524 (trampoline)
+ringbuf # skel_load skeleton load failed (?)
+sk_assign # Can't read on server: Invalid argument (?)
+sk_lookup # endianness problem
+sk_storage_tracing # test_sk_storage_tracing__attach unexpected error: -524 (trampoline)
+skc_to_unix_sock # could not attach BPF object unexpected error: -524 (trampoline)
+socket_cookie # prog_attach unexpected error: -524 (trampoline)
+stacktrace_build_id # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2 (?)
+tailcalls # tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls (?)
+task_local_storage # failed to auto-attach program 'trace_exit_creds': -524 (trampoline)
+test_bpffs # bpffs test failed 255 (iterator)
+test_bprm_opts # failed to auto-attach program 'secure_exec': -524 (trampoline)
+test_ima # failed to auto-attach program 'ima': -524 (trampoline)
+test_local_storage # failed to auto-attach program 'unlink_hook': -524 (trampoline)
+test_lsm # failed to find kernel BTF type ID of '__x64_sys_setdomainname': -3 (?)
+test_overhead # attach_fentry unexpected error: -524 (trampoline)
+test_profiler # unknown func bpf_probe_read_str#45 (overlapping)
+timer # failed to auto-attach program 'test1': -524 (trampoline)
+timer_crash # trampoline
+timer_mim # failed to auto-attach program 'test1': -524 (trampoline)
+trace_ext # failed to auto-attach program 'test_pkt_md_access_new': -524 (trampoline)
+trace_printk # trace_printk__load unexpected error: -2 (errno 2) (?)
+trace_vprintk # trace_vprintk__open_and_load unexpected error: -9 (?)
+trampoline_count # prog 'prog1': failed to attach: ERROR: strerror_r(-524)=22 (trampoline)
+verif_stats # trace_vprintk__open_and_load unexpected error: -9 (?)
+vmlinux # failed to auto-attach program 'handle__fentry': -524 (trampoline)
+xdp_adjust_tail # case-128 err 0 errno 28 retval 1 size 128 expect-size 3520 (?)
+xdp_bonding # failed to auto-attach program 'trace_on_entry': -524 (trampoline)
+xdp_bpf2bpf # failed to auto-attach program 'trace_on_entry': -524 (trampoline)
+map_kptr # failed to open_and_load program: -524 (trampoline)
+bpf_cookie # failed to open_and_load program: -524 (trampoline)
+xdp_do_redirect # prog_run_max_size unexpected error: -22 (errno 22)
+send_signal # intermittently fails to receive signal
+select_reuseport # intermittently fails on new s390x setup
+xdp_synproxy # JIT does not support calling kernel function (kfunc)
+unpriv_bpf_disabled # fentry
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 2d3c8c8f558a..8d59ec7f4c2d 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -82,7 +82,7 @@ TEST_PROGS_EXTENDED := with_addr.sh \
TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \
test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \
- xdpxceiver xdp_redirect_multi
+ xskxceiver xdp_redirect_multi xdp_synproxy
TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read
@@ -168,17 +168,26 @@ $(OUTPUT)/%:%.c
$(call msg,BINARY,,$@)
$(Q)$(LINK.c) $^ $(LDLIBS) -o $@
+# LLVM's ld.lld doesn't support all the architectures, so use it only on x86
+ifeq ($(SRCARCH),x86)
+LLD := lld
+else
+LLD := ld
+endif
+
# Filter out -static for liburandom_read.so and its dependent targets so that static builds
# do not fail. Static builds leave urandom_read relying on system-wide shared libraries.
$(OUTPUT)/liburandom_read.so: urandom_read_lib1.c urandom_read_lib2.c
$(call msg,LIB,,$@)
- $(Q)$(CC) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $^ $(LDLIBS) -fPIC -shared -o $@
+ $(Q)$(CLANG) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $^ $(LDLIBS) \
+ -fuse-ld=$(LLD) -Wl,-znoseparate-code -fPIC -shared -o $@
$(OUTPUT)/urandom_read: urandom_read.c urandom_read_aux.c $(OUTPUT)/liburandom_read.so
$(call msg,BINARY,,$@)
- $(Q)$(CC) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $(filter %.c,$^) \
- liburandom_read.so $(LDLIBS) \
- -Wl,-rpath=. -Wl,--build-id=sha1 -o $@
+ $(Q)$(CLANG) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $(filter %.c,$^) \
+ liburandom_read.so $(LDLIBS) \
+ -fuse-ld=$(LLD) -Wl,-znoseparate-code \
+ -Wl,-rpath=. -Wl,--build-id=sha1 -o $@
$(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(wildcard bpf_testmod/Makefile bpf_testmod/*.[ch])
$(call msg,MOD,,$@)
@@ -221,6 +230,8 @@ $(OUTPUT)/xdping: $(TESTING_HELPERS)
$(OUTPUT)/flow_dissector_load: $(TESTING_HELPERS)
$(OUTPUT)/test_maps: $(TESTING_HELPERS)
$(OUTPUT)/test_verifier: $(TESTING_HELPERS) $(CAP_HELPERS)
+$(OUTPUT)/xsk.o: $(BPFOBJ)
+$(OUTPUT)/xskxceiver: $(OUTPUT)/xsk.o
BPFTOOL ?= $(DEFAULT_BPFTOOL)
$(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \
@@ -502,6 +513,7 @@ TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \
cap_helpers.c
TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \
$(OUTPUT)/liburandom_read.so \
+ $(OUTPUT)/xdp_synproxy \
ima_setup.sh \
$(wildcard progs/btf_dump_test_case_*.c)
TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
@@ -560,6 +572,9 @@ $(OUTPUT)/bench_ringbufs.o: $(OUTPUT)/ringbuf_bench.skel.h \
$(OUTPUT)/bench_bloom_filter_map.o: $(OUTPUT)/bloom_filter_bench.skel.h
$(OUTPUT)/bench_bpf_loop.o: $(OUTPUT)/bpf_loop_bench.skel.h
$(OUTPUT)/bench_strncmp.o: $(OUTPUT)/strncmp_bench.skel.h
+$(OUTPUT)/bench_bpf_hashmap_full_update.o: $(OUTPUT)/bpf_hashmap_full_update_bench.skel.h
+$(OUTPUT)/bench_local_storage.o: $(OUTPUT)/local_storage_bench.skel.h
+$(OUTPUT)/bench_local_storage_rcu_tasks_trace.o: $(OUTPUT)/local_storage_rcu_tasks_trace_bench.skel.h
$(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ)
$(OUTPUT)/bench: LDLIBS += -lm
$(OUTPUT)/bench: $(OUTPUT)/bench.o \
@@ -571,13 +586,18 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \
$(OUTPUT)/bench_ringbufs.o \
$(OUTPUT)/bench_bloom_filter_map.o \
$(OUTPUT)/bench_bpf_loop.o \
- $(OUTPUT)/bench_strncmp.o
+ $(OUTPUT)/bench_strncmp.o \
+ $(OUTPUT)/bench_bpf_hashmap_full_update.o \
+ $(OUTPUT)/bench_local_storage.o \
+ $(OUTPUT)/bench_local_storage_rcu_tasks_trace.o
$(call msg,BINARY,,$@)
$(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@
EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \
prog_tests/tests.h map_tests/tests.h verifier/tests.h \
feature bpftool \
- $(addprefix $(OUTPUT)/,*.o *.skel.h *.lskel.h *.subskel.h no_alu32 bpf_gcc bpf_testmod.ko)
+ $(addprefix $(OUTPUT)/,*.o *.skel.h *.lskel.h *.subskel.h \
+ no_alu32 bpf_gcc bpf_testmod.ko \
+ liburandom_read.so)
.PHONY: docs docs-clean
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index f061cc20e776..c1f20a147462 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -79,6 +79,43 @@ void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns)
hits_per_sec, hits_per_prod, drops_per_sec, hits_per_sec + drops_per_sec);
}
+void
+grace_period_latency_basic_stats(struct bench_res res[], int res_cnt, struct basic_stats *gp_stat)
+{
+ int i;
+
+ memset(gp_stat, 0, sizeof(struct basic_stats));
+
+ for (i = 0; i < res_cnt; i++)
+ gp_stat->mean += res[i].gp_ns / 1000.0 / (double)res[i].gp_ct / (0.0 + res_cnt);
+
+#define IT_MEAN_DIFF (res[i].gp_ns / 1000.0 / (double)res[i].gp_ct - gp_stat->mean)
+ if (res_cnt > 1) {
+ for (i = 0; i < res_cnt; i++)
+ gp_stat->stddev += (IT_MEAN_DIFF * IT_MEAN_DIFF) / (res_cnt - 1.0);
+ }
+ gp_stat->stddev = sqrt(gp_stat->stddev);
+#undef IT_MEAN_DIFF
+}
+
+void
+grace_period_ticks_basic_stats(struct bench_res res[], int res_cnt, struct basic_stats *gp_stat)
+{
+ int i;
+
+ memset(gp_stat, 0, sizeof(struct basic_stats));
+ for (i = 0; i < res_cnt; i++)
+ gp_stat->mean += res[i].stime / (double)res[i].gp_ct / (0.0 + res_cnt);
+
+#define IT_MEAN_DIFF (res[i].stime / (double)res[i].gp_ct - gp_stat->mean)
+ if (res_cnt > 1) {
+ for (i = 0; i < res_cnt; i++)
+ gp_stat->stddev += (IT_MEAN_DIFF * IT_MEAN_DIFF) / (res_cnt - 1.0);
+ }
+ gp_stat->stddev = sqrt(gp_stat->stddev);
+#undef IT_MEAN_DIFF
+}
+
void hits_drops_report_final(struct bench_res res[], int res_cnt)
{
int i;
@@ -150,6 +187,53 @@ void ops_report_final(struct bench_res res[], int res_cnt)
printf("latency %8.3lf ns/op\n", 1000.0 / hits_mean * env.producer_cnt);
}
+void local_storage_report_progress(int iter, struct bench_res *res,
+ long delta_ns)
+{
+ double important_hits_per_sec, hits_per_sec;
+ double delta_sec = delta_ns / 1000000000.0;
+
+ hits_per_sec = res->hits / 1000000.0 / delta_sec;
+ important_hits_per_sec = res->important_hits / 1000000.0 / delta_sec;
+
+ printf("Iter %3d (%7.3lfus): ", iter, (delta_ns - 1000000000) / 1000.0);
+
+ printf("hits %8.3lfM/s ", hits_per_sec);
+ printf("important_hits %8.3lfM/s\n", important_hits_per_sec);
+}
+
+void local_storage_report_final(struct bench_res res[], int res_cnt)
+{
+ double important_hits_mean = 0.0, important_hits_stddev = 0.0;
+ double hits_mean = 0.0, hits_stddev = 0.0;
+ int i;
+
+ for (i = 0; i < res_cnt; i++) {
+ hits_mean += res[i].hits / 1000000.0 / (0.0 + res_cnt);
+ important_hits_mean += res[i].important_hits / 1000000.0 / (0.0 + res_cnt);
+ }
+
+ if (res_cnt > 1) {
+ for (i = 0; i < res_cnt; i++) {
+ hits_stddev += (hits_mean - res[i].hits / 1000000.0) *
+ (hits_mean - res[i].hits / 1000000.0) /
+ (res_cnt - 1.0);
+ important_hits_stddev +=
+ (important_hits_mean - res[i].important_hits / 1000000.0) *
+ (important_hits_mean - res[i].important_hits / 1000000.0) /
+ (res_cnt - 1.0);
+ }
+
+ hits_stddev = sqrt(hits_stddev);
+ important_hits_stddev = sqrt(important_hits_stddev);
+ }
+ printf("Summary: hits throughput %8.3lf \u00B1 %5.3lf M ops/s, ",
+ hits_mean, hits_stddev);
+ printf("hits latency %8.3lf ns/op, ", 1000.0 / hits_mean);
+ printf("important_hits throughput %8.3lf \u00B1 %5.3lf M ops/s\n",
+ important_hits_mean, important_hits_stddev);
+}
+
const char *argp_program_version = "benchmark";
const char *argp_program_bug_address = "<bpf@vger.kernel.org>";
const char argp_program_doc[] =
@@ -188,13 +272,18 @@ static const struct argp_option opts[] = {
extern struct argp bench_ringbufs_argp;
extern struct argp bench_bloom_map_argp;
extern struct argp bench_bpf_loop_argp;
+extern struct argp bench_local_storage_argp;
+extern struct argp bench_local_storage_rcu_tasks_trace_argp;
extern struct argp bench_strncmp_argp;
static const struct argp_child bench_parsers[] = {
{ &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 },
{ &bench_bloom_map_argp, 0, "Bloom filter map benchmark", 0 },
{ &bench_bpf_loop_argp, 0, "bpf_loop helper benchmark", 0 },
+ { &bench_local_storage_argp, 0, "local_storage benchmark", 0 },
{ &bench_strncmp_argp, 0, "bpf_strncmp helper benchmark", 0 },
+ { &bench_local_storage_rcu_tasks_trace_argp, 0,
+ "local_storage RCU Tasks Trace slowdown benchmark", 0 },
{},
};
@@ -396,6 +485,11 @@ extern const struct bench bench_hashmap_with_bloom;
extern const struct bench bench_bpf_loop;
extern const struct bench bench_strncmp_no_helper;
extern const struct bench bench_strncmp_helper;
+extern const struct bench bench_bpf_hashmap_full_update;
+extern const struct bench bench_local_storage_cache_seq_get;
+extern const struct bench bench_local_storage_cache_interleaved_get;
+extern const struct bench bench_local_storage_cache_hashmap_control;
+extern const struct bench bench_local_storage_tasks_trace;
static const struct bench *benchs[] = {
&bench_count_global,
@@ -430,6 +524,11 @@ static const struct bench *benchs[] = {
&bench_bpf_loop,
&bench_strncmp_no_helper,
&bench_strncmp_helper,
+ &bench_bpf_hashmap_full_update,
+ &bench_local_storage_cache_seq_get,
+ &bench_local_storage_cache_interleaved_get,
+ &bench_local_storage_cache_hashmap_control,
+ &bench_local_storage_tasks_trace,
};
static void setup_benchmark()
diff --git a/tools/testing/selftests/bpf/bench.h b/tools/testing/selftests/bpf/bench.h
index fb3e213df3dc..d748255877e2 100644
--- a/tools/testing/selftests/bpf/bench.h
+++ b/tools/testing/selftests/bpf/bench.h
@@ -30,10 +30,19 @@ struct env {
struct cpu_set cons_cpus;
};
+struct basic_stats {
+ double mean;
+ double stddev;
+};
+
struct bench_res {
long hits;
long drops;
long false_hits;
+ long important_hits;
+ unsigned long gp_ns;
+ unsigned long gp_ct;
+ unsigned int stime;
};
struct bench {
@@ -61,6 +70,13 @@ void false_hits_report_progress(int iter, struct bench_res *res, long delta_ns);
void false_hits_report_final(struct bench_res res[], int res_cnt);
void ops_report_progress(int iter, struct bench_res *res, long delta_ns);
void ops_report_final(struct bench_res res[], int res_cnt);
+void local_storage_report_progress(int iter, struct bench_res *res,
+ long delta_ns);
+void local_storage_report_final(struct bench_res res[], int res_cnt);
+void grace_period_latency_basic_stats(struct bench_res res[], int res_cnt,
+ struct basic_stats *gp_stat);
+void grace_period_ticks_basic_stats(struct bench_res res[], int res_cnt,
+ struct basic_stats *gp_stat);
static inline __u64 get_time_ns(void)
{
diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c
new file mode 100644
index 000000000000..cec51e0ff4b8
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Bytedance */
+
+#include <argp.h>
+#include "bench.h"
+#include "bpf_hashmap_full_update_bench.skel.h"
+#include "bpf_util.h"
+
+/* BPF triggering benchmarks */
+static struct ctx {
+ struct bpf_hashmap_full_update_bench *skel;
+} ctx;
+
+#define MAX_LOOP_NUM 10000
+
+static void validate(void)
+{
+ if (env.consumer_cnt != 1) {
+ fprintf(stderr, "benchmark doesn't support multi-consumer!\n");
+ exit(1);
+ }
+}
+
+static void *producer(void *input)
+{
+ while (true) {
+ /* trigger the bpf program */
+ syscall(__NR_getpgid);
+ }
+
+ return NULL;
+}
+
+static void *consumer(void *input)
+{
+ return NULL;
+}
+
+static void measure(struct bench_res *res)
+{
+}
+
+static void setup(void)
+{
+ struct bpf_link *link;
+ int map_fd, i, max_entries;
+
+ setup_libbpf();
+
+ ctx.skel = bpf_hashmap_full_update_bench__open_and_load();
+ if (!ctx.skel) {
+ fprintf(stderr, "failed to open skeleton\n");
+ exit(1);
+ }
+
+ ctx.skel->bss->nr_loops = MAX_LOOP_NUM;
+
+ link = bpf_program__attach(ctx.skel->progs.benchmark);
+ if (!link) {
+ fprintf(stderr, "failed to attach program!\n");
+ exit(1);
+ }
+
+ /* fill hash_map */
+ map_fd = bpf_map__fd(ctx.skel->maps.hash_map_bench);
+ max_entries = bpf_map__max_entries(ctx.skel->maps.hash_map_bench);
+ for (i = 0; i < max_entries; i++)
+ bpf_map_update_elem(map_fd, &i, &i, BPF_ANY);
+}
+
+void hashmap_report_final(struct bench_res res[], int res_cnt)
+{
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ int i;
+
+ for (i = 0; i < nr_cpus; i++) {
+ u64 time = ctx.skel->bss->percpu_time[i];
+
+ if (!time)
+ continue;
+
+ printf("%d:hash_map_full_perf %lld events per sec\n",
+ i, ctx.skel->bss->nr_loops * 1000000000ll / time);
+ }
+}
+
+const struct bench bench_bpf_hashmap_full_update = {
+ .name = "bpf-hashmap-ful-update",
+ .validate = validate,
+ .setup = setup,
+ .producer_thread = producer,
+ .consumer_thread = consumer,
+ .measure = measure,
+ .report_progress = NULL,
+ .report_final = hashmap_report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage.c b/tools/testing/selftests/bpf/benchs/bench_local_storage.c
new file mode 100644
index 000000000000..5a378c84e81f
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_local_storage.c
@@ -0,0 +1,287 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <argp.h>
+#include <linux/btf.h>
+
+#include "local_storage_bench.skel.h"
+#include "bench.h"
+
+#include <test_btf.h>
+
+static struct {
+ __u32 nr_maps;
+ __u32 hashmap_nr_keys_used;
+} args = {
+ .nr_maps = 1000,
+ .hashmap_nr_keys_used = 1000,
+};
+
+enum {
+ ARG_NR_MAPS = 6000,
+ ARG_HASHMAP_NR_KEYS_USED = 6001,
+};
+
+static const struct argp_option opts[] = {
+ { "nr_maps", ARG_NR_MAPS, "NR_MAPS", 0,
+ "Set number of local_storage maps"},
+ { "hashmap_nr_keys_used", ARG_HASHMAP_NR_KEYS_USED, "NR_KEYS",
+ 0, "When doing hashmap test, set number of hashmap keys test uses"},
+ {},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+ long ret;
+
+ switch (key) {
+ case ARG_NR_MAPS:
+ ret = strtol(arg, NULL, 10);
+ if (ret < 1 || ret > UINT_MAX) {
+ fprintf(stderr, "invalid nr_maps");
+ argp_usage(state);
+ }
+ args.nr_maps = ret;
+ break;
+ case ARG_HASHMAP_NR_KEYS_USED:
+ ret = strtol(arg, NULL, 10);
+ if (ret < 1 || ret > UINT_MAX) {
+ fprintf(stderr, "invalid hashmap_nr_keys_used");
+ argp_usage(state);
+ }
+ args.hashmap_nr_keys_used = ret;
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+
+ return 0;
+}
+
+const struct argp bench_local_storage_argp = {
+ .options = opts,
+ .parser = parse_arg,
+};
+
+/* Keep in sync w/ array of maps in bpf */
+#define MAX_NR_MAPS 1000
+/* keep in sync w/ same define in bpf */
+#define HASHMAP_SZ 4194304
+
+static void validate(void)
+{
+ if (env.producer_cnt != 1) {
+ fprintf(stderr, "benchmark doesn't support multi-producer!\n");
+ exit(1);
+ }
+ if (env.consumer_cnt != 1) {
+ fprintf(stderr, "benchmark doesn't support multi-consumer!\n");
+ exit(1);
+ }
+
+ if (args.nr_maps > MAX_NR_MAPS) {
+ fprintf(stderr, "nr_maps must be <= 1000\n");
+ exit(1);
+ }
+
+ if (args.hashmap_nr_keys_used > HASHMAP_SZ) {
+ fprintf(stderr, "hashmap_nr_keys_used must be <= %u\n", HASHMAP_SZ);
+ exit(1);
+ }
+}
+
+static struct {
+ struct local_storage_bench *skel;
+ void *bpf_obj;
+ struct bpf_map *array_of_maps;
+} ctx;
+
+static void prepopulate_hashmap(int fd)
+{
+ int i, key, val;
+
+ /* local_storage gets will have BPF_LOCAL_STORAGE_GET_F_CREATE flag set, so
+ * populate the hashmap for a similar comparison
+ */
+ for (i = 0; i < HASHMAP_SZ; i++) {
+ key = val = i;
+ if (bpf_map_update_elem(fd, &key, &val, 0)) {
+ fprintf(stderr, "Error prepopulating hashmap (key %d)\n", key);
+ exit(1);
+ }
+ }
+}
+
+static void __setup(struct bpf_program *prog, bool hashmap)
+{
+ struct bpf_map *inner_map;
+ int i, fd, mim_fd, err;
+
+ LIBBPF_OPTS(bpf_map_create_opts, create_opts);
+
+ if (!hashmap)
+ create_opts.map_flags = BPF_F_NO_PREALLOC;
+
+ ctx.skel->rodata->num_maps = args.nr_maps;
+ ctx.skel->rodata->hashmap_num_keys = args.hashmap_nr_keys_used;
+ inner_map = bpf_map__inner_map(ctx.array_of_maps);
+ create_opts.btf_key_type_id = bpf_map__btf_key_type_id(inner_map);
+ create_opts.btf_value_type_id = bpf_map__btf_value_type_id(inner_map);
+
+ err = local_storage_bench__load(ctx.skel);
+ if (err) {
+ fprintf(stderr, "Error loading skeleton\n");
+ goto err_out;
+ }
+
+ create_opts.btf_fd = bpf_object__btf_fd(ctx.skel->obj);
+
+ mim_fd = bpf_map__fd(ctx.array_of_maps);
+ if (mim_fd < 0) {
+ fprintf(stderr, "Error getting map_in_map fd\n");
+ goto err_out;
+ }
+
+ for (i = 0; i < args.nr_maps; i++) {
+ if (hashmap)
+ fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(int),
+ sizeof(int), HASHMAP_SZ, &create_opts);
+ else
+ fd = bpf_map_create(BPF_MAP_TYPE_TASK_STORAGE, NULL, sizeof(int),
+ sizeof(int), 0, &create_opts);
+ if (fd < 0) {
+ fprintf(stderr, "Error creating map %d: %d\n", i, fd);
+ goto err_out;
+ }
+
+ if (hashmap)
+ prepopulate_hashmap(fd);
+
+ err = bpf_map_update_elem(mim_fd, &i, &fd, 0);
+ if (err) {
+ fprintf(stderr, "Error updating array-of-maps w/ map %d\n", i);
+ goto err_out;
+ }
+ }
+
+ if (!bpf_program__attach(prog)) {
+ fprintf(stderr, "Error attaching bpf program\n");
+ goto err_out;
+ }
+
+ return;
+err_out:
+ exit(1);
+}
+
+static void hashmap_setup(void)
+{
+ struct local_storage_bench *skel;
+
+ setup_libbpf();
+
+ skel = local_storage_bench__open();
+ ctx.skel = skel;
+ ctx.array_of_maps = skel->maps.array_of_hash_maps;
+ skel->rodata->use_hashmap = 1;
+ skel->rodata->interleave = 0;
+
+ __setup(skel->progs.get_local, true);
+}
+
+static void local_storage_cache_get_setup(void)
+{
+ struct local_storage_bench *skel;
+
+ setup_libbpf();
+
+ skel = local_storage_bench__open();
+ ctx.skel = skel;
+ ctx.array_of_maps = skel->maps.array_of_local_storage_maps;
+ skel->rodata->use_hashmap = 0;
+ skel->rodata->interleave = 0;
+
+ __setup(skel->progs.get_local, false);
+}
+
+static void local_storage_cache_get_interleaved_setup(void)
+{
+ struct local_storage_bench *skel;
+
+ setup_libbpf();
+
+ skel = local_storage_bench__open();
+ ctx.skel = skel;
+ ctx.array_of_maps = skel->maps.array_of_local_storage_maps;
+ skel->rodata->use_hashmap = 0;
+ skel->rodata->interleave = 1;
+
+ __setup(skel->progs.get_local, false);
+}
+
+static void measure(struct bench_res *res)
+{
+ res->hits = atomic_swap(&ctx.skel->bss->hits, 0);
+ res->important_hits = atomic_swap(&ctx.skel->bss->important_hits, 0);
+}
+
+static inline void trigger_bpf_program(void)
+{
+ syscall(__NR_getpgid);
+}
+
+static void *consumer(void *input)
+{
+ return NULL;
+}
+
+static void *producer(void *input)
+{
+ while (true)
+ trigger_bpf_program();
+
+ return NULL;
+}
+
+/* cache sequential and interleaved get benchs test local_storage get
+ * performance, specifically they demonstrate performance cliff of
+ * current list-plus-cache local_storage model.
+ *
+ * cache sequential get: call bpf_task_storage_get on n maps in order
+ * cache interleaved get: like "sequential get", but interleave 4 calls to the
+ * 'important' map (idx 0 in array_of_maps) for every 10 calls. Goal
+ * is to mimic environment where many progs are accessing their local_storage
+ * maps, with 'our' prog needing to access its map more often than others
+ */
+const struct bench bench_local_storage_cache_seq_get = {
+ .name = "local-storage-cache-seq-get",
+ .validate = validate,
+ .setup = local_storage_cache_get_setup,
+ .producer_thread = producer,
+ .consumer_thread = consumer,
+ .measure = measure,
+ .report_progress = local_storage_report_progress,
+ .report_final = local_storage_report_final,
+};
+
+const struct bench bench_local_storage_cache_interleaved_get = {
+ .name = "local-storage-cache-int-get",
+ .validate = validate,
+ .setup = local_storage_cache_get_interleaved_setup,
+ .producer_thread = producer,
+ .consumer_thread = consumer,
+ .measure = measure,
+ .report_progress = local_storage_report_progress,
+ .report_final = local_storage_report_final,
+};
+
+const struct bench bench_local_storage_cache_hashmap_control = {
+ .name = "local-storage-cache-hashmap-control",
+ .validate = validate,
+ .setup = hashmap_setup,
+ .producer_thread = producer,
+ .consumer_thread = consumer,
+ .measure = measure,
+ .report_progress = local_storage_report_progress,
+ .report_final = local_storage_report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c b/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c
new file mode 100644
index 000000000000..43f109d93130
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c
@@ -0,0 +1,281 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <argp.h>
+
+#include <sys/prctl.h>
+#include "local_storage_rcu_tasks_trace_bench.skel.h"
+#include "bench.h"
+
+#include <signal.h>
+
+static struct {
+ __u32 nr_procs;
+ __u32 kthread_pid;
+ bool quiet;
+} args = {
+ .nr_procs = 1000,
+ .kthread_pid = 0,
+ .quiet = false,
+};
+
+enum {
+ ARG_NR_PROCS = 7000,
+ ARG_KTHREAD_PID = 7001,
+ ARG_QUIET = 7002,
+};
+
+static const struct argp_option opts[] = {
+ { "nr_procs", ARG_NR_PROCS, "NR_PROCS", 0,
+ "Set number of user processes to spin up"},
+ { "kthread_pid", ARG_KTHREAD_PID, "PID", 0,
+ "Pid of rcu_tasks_trace kthread for ticks tracking"},
+ { "quiet", ARG_QUIET, "{0,1}", 0,
+ "If true, don't report progress"},
+ {},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+ long ret;
+
+ switch (key) {
+ case ARG_NR_PROCS:
+ ret = strtol(arg, NULL, 10);
+ if (ret < 1 || ret > UINT_MAX) {
+ fprintf(stderr, "invalid nr_procs\n");
+ argp_usage(state);
+ }
+ args.nr_procs = ret;
+ break;
+ case ARG_KTHREAD_PID:
+ ret = strtol(arg, NULL, 10);
+ if (ret < 1) {
+ fprintf(stderr, "invalid kthread_pid\n");
+ argp_usage(state);
+ }
+ args.kthread_pid = ret;
+ break;
+ case ARG_QUIET:
+ ret = strtol(arg, NULL, 10);
+ if (ret < 0 || ret > 1) {
+ fprintf(stderr, "invalid quiet %ld\n", ret);
+ argp_usage(state);
+ }
+ args.quiet = ret;
+ break;
+break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+
+ return 0;
+}
+
+const struct argp bench_local_storage_rcu_tasks_trace_argp = {
+ .options = opts,
+ .parser = parse_arg,
+};
+
+#define MAX_SLEEP_PROCS 150000
+
+static void validate(void)
+{
+ if (env.producer_cnt != 1) {
+ fprintf(stderr, "benchmark doesn't support multi-producer!\n");
+ exit(1);
+ }
+ if (env.consumer_cnt != 1) {
+ fprintf(stderr, "benchmark doesn't support multi-consumer!\n");
+ exit(1);
+ }
+
+ if (args.nr_procs > MAX_SLEEP_PROCS) {
+ fprintf(stderr, "benchmark supports up to %u sleeper procs!\n",
+ MAX_SLEEP_PROCS);
+ exit(1);
+ }
+}
+
+static long kthread_pid_ticks(void)
+{
+ char procfs_path[100];
+ long stime;
+ FILE *f;
+
+ if (!args.kthread_pid)
+ return -1;
+
+ sprintf(procfs_path, "/proc/%u/stat", args.kthread_pid);
+ f = fopen(procfs_path, "r");
+ if (!f) {
+ fprintf(stderr, "couldn't open %s, exiting\n", procfs_path);
+ goto err_out;
+ }
+ if (fscanf(f, "%*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %ld", &stime) != 1) {
+ fprintf(stderr, "fscanf of %s failed, exiting\n", procfs_path);
+ goto err_out;
+ }
+ fclose(f);
+ return stime;
+
+err_out:
+ if (f)
+ fclose(f);
+ exit(1);
+ return 0;
+}
+
+static struct {
+ struct local_storage_rcu_tasks_trace_bench *skel;
+ long prev_kthread_stime;
+} ctx;
+
+static void sleep_and_loop(void)
+{
+ while (true) {
+ sleep(rand() % 4);
+ syscall(__NR_getpgid);
+ }
+}
+
+static void local_storage_tasks_trace_setup(void)
+{
+ int i, err, forkret, runner_pid;
+
+ runner_pid = getpid();
+
+ for (i = 0; i < args.nr_procs; i++) {
+ forkret = fork();
+ if (forkret < 0) {
+ fprintf(stderr, "Error forking sleeper proc %u of %u, exiting\n", i,
+ args.nr_procs);
+ goto err_out;
+ }
+
+ if (!forkret) {
+ err = prctl(PR_SET_PDEATHSIG, SIGKILL);
+ if (err < 0) {
+ fprintf(stderr, "prctl failed with err %d, exiting\n", errno);
+ goto err_out;
+ }
+
+ if (getppid() != runner_pid) {
+ fprintf(stderr, "Runner died while spinning up procs, exiting\n");
+ goto err_out;
+ }
+ sleep_and_loop();
+ }
+ }
+ printf("Spun up %u procs (our pid %d)\n", args.nr_procs, runner_pid);
+
+ setup_libbpf();
+
+ ctx.skel = local_storage_rcu_tasks_trace_bench__open_and_load();
+ if (!ctx.skel) {
+ fprintf(stderr, "Error doing open_and_load, exiting\n");
+ goto err_out;
+ }
+
+ ctx.prev_kthread_stime = kthread_pid_ticks();
+
+ if (!bpf_program__attach(ctx.skel->progs.get_local)) {
+ fprintf(stderr, "Error attaching bpf program\n");
+ goto err_out;
+ }
+
+ if (!bpf_program__attach(ctx.skel->progs.pregp_step)) {
+ fprintf(stderr, "Error attaching bpf program\n");
+ goto err_out;
+ }
+
+ if (!bpf_program__attach(ctx.skel->progs.postgp)) {
+ fprintf(stderr, "Error attaching bpf program\n");
+ goto err_out;
+ }
+
+ return;
+err_out:
+ exit(1);
+}
+
+static void measure(struct bench_res *res)
+{
+ long ticks;
+
+ res->gp_ct = atomic_swap(&ctx.skel->bss->gp_hits, 0);
+ res->gp_ns = atomic_swap(&ctx.skel->bss->gp_times, 0);
+ ticks = kthread_pid_ticks();
+ res->stime = ticks - ctx.prev_kthread_stime;
+ ctx.prev_kthread_stime = ticks;
+}
+
+static void *consumer(void *input)
+{
+ return NULL;
+}
+
+static void *producer(void *input)
+{
+ while (true)
+ syscall(__NR_getpgid);
+ return NULL;
+}
+
+static void report_progress(int iter, struct bench_res *res, long delta_ns)
+{
+ if (ctx.skel->bss->unexpected) {
+ fprintf(stderr, "Error: Unexpected order of bpf prog calls (postgp after pregp).");
+ fprintf(stderr, "Data can't be trusted, exiting\n");
+ exit(1);
+ }
+
+ if (args.quiet)
+ return;
+
+ printf("Iter %d\t avg tasks_trace grace period latency\t%lf ns\n",
+ iter, res->gp_ns / (double)res->gp_ct);
+ printf("Iter %d\t avg ticks per tasks_trace grace period\t%lf\n",
+ iter, res->stime / (double)res->gp_ct);
+}
+
+static void report_final(struct bench_res res[], int res_cnt)
+{
+ struct basic_stats gp_stat;
+
+ grace_period_latency_basic_stats(res, res_cnt, &gp_stat);
+ printf("SUMMARY tasks_trace grace period latency");
+ printf("\tavg %.3lf us\tstddev %.3lf us\n", gp_stat.mean, gp_stat.stddev);
+ grace_period_ticks_basic_stats(res, res_cnt, &gp_stat);
+ printf("SUMMARY ticks per tasks_trace grace period");
+ printf("\tavg %.3lf\tstddev %.3lf\n", gp_stat.mean, gp_stat.stddev);
+}
+
+/* local-storage-tasks-trace: Benchmark performance of BPF local_storage's use
+ * of RCU Tasks-Trace.
+ *
+ * Stress RCU Tasks Trace by forking many tasks, all of which do no work aside
+ * from sleep() loop, and creating/destroying BPF task-local storage on wakeup.
+ * The number of forked tasks is configurable.
+ *
+ * exercising code paths which call call_rcu_tasks_trace while there are many
+ * thousands of tasks on the system should result in RCU Tasks-Trace having to
+ * do a noticeable amount of work.
+ *
+ * This should be observable by measuring rcu_tasks_trace_kthread CPU usage
+ * after the grace period has ended, or by measuring grace period latency.
+ *
+ * This benchmark uses both approaches, attaching to rcu_tasks_trace_pregp_step
+ * and rcu_tasks_trace_postgp functions to measure grace period latency and
+ * using /proc/PID/stat to measure rcu_tasks_trace_kthread kernel ticks
+ */
+const struct bench bench_local_storage_tasks_trace = {
+ .name = "local-storage-tasks-trace",
+ .validate = validate,
+ .setup = local_storage_tasks_trace_setup,
+ .producer_thread = producer,
+ .consumer_thread = consumer,
+ .measure = measure,
+ .report_progress = report_progress,
+ .report_final = report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh b/tools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh
new file mode 100755
index 000000000000..1e2de838f9fa
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source ./benchs/run_common.sh
+
+set -eufo pipefail
+
+nr_threads=`expr $(cat /proc/cpuinfo | grep "processor"| wc -l) - 1`
+summary=$($RUN_BENCH -p $nr_threads bpf-hashmap-ful-update)
+printf "$summary"
+printf "\n"
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_local_storage.sh b/tools/testing/selftests/bpf/benchs/run_bench_local_storage.sh
new file mode 100755
index 000000000000..2eb2b513a173
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_bench_local_storage.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source ./benchs/run_common.sh
+
+set -eufo pipefail
+
+header "Hashmap Control"
+for i in 10 1000 10000 100000 4194304; do
+subtitle "num keys: $i"
+ summarize_local_storage "hashmap (control) sequential get: "\
+ "$(./bench --nr_maps 1 --hashmap_nr_keys_used=$i local-storage-cache-hashmap-control)"
+ printf "\n"
+done
+
+header "Local Storage"
+for i in 1 10 16 17 24 32 100 1000; do
+subtitle "num_maps: $i"
+ summarize_local_storage "local_storage cache sequential get: "\
+ "$(./bench --nr_maps $i local-storage-cache-seq-get)"
+ summarize_local_storage "local_storage cache interleaved get: "\
+ "$(./bench --nr_maps $i local-storage-cache-int-get)"
+ printf "\n"
+done
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh b/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh
new file mode 100755
index 000000000000..5dac1f02892c
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+kthread_pid=`pgrep rcu_tasks_trace_kthread`
+
+if [ -z $kthread_pid ]; then
+ echo "error: Couldn't find rcu_tasks_trace_kthread"
+ exit 1
+fi
+
+./bench --nr_procs 15000 --kthread_pid $kthread_pid -d 600 --quiet 1 local-storage-tasks-trace
diff --git a/tools/testing/selftests/bpf/benchs/run_common.sh b/tools/testing/selftests/bpf/benchs/run_common.sh
index 6c5e6023a69f..d9f40af82006 100644
--- a/tools/testing/selftests/bpf/benchs/run_common.sh
+++ b/tools/testing/selftests/bpf/benchs/run_common.sh
@@ -41,6 +41,16 @@ function ops()
echo "$*" | sed -E "s/.*latency\s+([0-9]+\.[0-9]+\sns\/op).*/\1/"
}
+function local_storage()
+{
+ echo -n "hits throughput: "
+ echo -n "$*" | sed -E "s/.* hits throughput\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+\sM\sops\/s).*/\1/"
+ echo -n -e ", hits latency: "
+ echo -n "$*" | sed -E "s/.* hits latency\s+([0-9]+\.[0-9]+\sns\/op).*/\1/"
+ echo -n ", important_hits throughput: "
+ echo "$*" | sed -E "s/.*important_hits throughput\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+\sM\sops\/s).*/\1/"
+}
+
function total()
{
echo "$*" | sed -E "s/.*total operations\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/"
@@ -67,6 +77,13 @@ function summarize_ops()
printf "%-20s %s\n" "$bench" "$(ops $summary)"
}
+function summarize_local_storage()
+{
+ bench="$1"
+ summary=$(echo $2 | tail -n1)
+ printf "%-20s %s\n" "$bench" "$(local_storage $summary)"
+}
+
function summarize_total()
{
bench="$1"
diff --git a/tools/testing/selftests/bpf/bpf_legacy.h b/tools/testing/selftests/bpf/bpf_legacy.h
index 719ab56cdb5d..845209581440 100644
--- a/tools/testing/selftests/bpf/bpf_legacy.h
+++ b/tools/testing/selftests/bpf/bpf_legacy.h
@@ -2,15 +2,6 @@
#ifndef __BPF_LEGACY__
#define __BPF_LEGACY__
-#define BPF_ANNOTATE_KV_PAIR(name, type_key, type_val) \
- struct ____btf_map_##name { \
- type_key key; \
- type_val value; \
- }; \
- struct ____btf_map_##name \
- __attribute__ ((section(".maps." #name), used)) \
- ____btf_map_##name = { }
-
/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions
*/
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
index e585e1cefc77..792cb15bac40 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
@@ -148,13 +148,13 @@ static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = {
.write = bpf_testmod_test_write,
};
-BTF_SET_START(bpf_testmod_check_kfunc_ids)
-BTF_ID(func, bpf_testmod_test_mod_kfunc)
-BTF_SET_END(bpf_testmod_check_kfunc_ids)
+BTF_SET8_START(bpf_testmod_check_kfunc_ids)
+BTF_ID_FLAGS(func, bpf_testmod_test_mod_kfunc)
+BTF_SET8_END(bpf_testmod_check_kfunc_ids)
static const struct btf_kfunc_id_set bpf_testmod_kfunc_set = {
- .owner = THIS_MODULE,
- .check_set = &bpf_testmod_check_kfunc_ids,
+ .owner = THIS_MODULE,
+ .set = &bpf_testmod_check_kfunc_ids,
};
extern int bpf_fentry_test1(int a);
diff --git a/tools/testing/selftests/bpf/btf_helpers.c b/tools/testing/selftests/bpf/btf_helpers.c
index b5941d514e17..1c1c2c26690a 100644
--- a/tools/testing/selftests/bpf/btf_helpers.c
+++ b/tools/testing/selftests/bpf/btf_helpers.c
@@ -26,11 +26,12 @@ static const char * const btf_kind_str_mapping[] = {
[BTF_KIND_FLOAT] = "FLOAT",
[BTF_KIND_DECL_TAG] = "DECL_TAG",
[BTF_KIND_TYPE_TAG] = "TYPE_TAG",
+ [BTF_KIND_ENUM64] = "ENUM64",
};
static const char *btf_kind_str(__u16 kind)
{
- if (kind > BTF_KIND_TYPE_TAG)
+ if (kind > BTF_KIND_ENUM64)
return "UNKNOWN";
return btf_kind_str_mapping[kind];
}
@@ -139,14 +140,32 @@ int fprintf_btf_type_raw(FILE *out, const struct btf *btf, __u32 id)
}
case BTF_KIND_ENUM: {
const struct btf_enum *v = btf_enum(t);
+ const char *fmt_str;
- fprintf(out, " size=%u vlen=%u", t->size, vlen);
+ fmt_str = btf_kflag(t) ? "\n\t'%s' val=%d" : "\n\t'%s' val=%u";
+ fprintf(out, " encoding=%s size=%u vlen=%u",
+ btf_kflag(t) ? "SIGNED" : "UNSIGNED", t->size, vlen);
for (i = 0; i < vlen; i++, v++) {
- fprintf(out, "\n\t'%s' val=%u",
+ fprintf(out, fmt_str,
btf_str(btf, v->name_off), v->val);
}
break;
}
+ case BTF_KIND_ENUM64: {
+ const struct btf_enum64 *v = btf_enum64(t);
+ const char *fmt_str;
+
+ fmt_str = btf_kflag(t) ? "\n\t'%s' val=%lld" : "\n\t'%s' val=%llu";
+
+ fprintf(out, " encoding=%s size=%u vlen=%u",
+ btf_kflag(t) ? "SIGNED" : "UNSIGNED", t->size, vlen);
+ for (i = 0; i < vlen; i++, v++) {
+ fprintf(out, fmt_str,
+ btf_str(btf, v->name_off),
+ ((__u64)v->val_hi32 << 32) | v->val_lo32);
+ }
+ break;
+ }
case BTF_KIND_FWD:
fprintf(out, " fwd_kind=%s", btf_kflag(t) ? "union" : "struct");
break;
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 3b3edc0fc8a6..fabf0c014349 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -1,59 +1,64 @@
+CONFIG_BLK_DEV_LOOP=y
CONFIG_BPF=y
-CONFIG_BPF_SYSCALL=y
-CONFIG_NET_CLS_BPF=m
CONFIG_BPF_EVENTS=y
-CONFIG_TEST_BPF=m
+CONFIG_BPF_JIT=y
+CONFIG_BPF_LIRC_MODE2=y
+CONFIG_BPF_LSM=y
+CONFIG_BPF_STREAM_PARSER=y
+CONFIG_BPF_SYSCALL=y
CONFIG_CGROUP_BPF=y
-CONFIG_NETDEVSIM=m
-CONFIG_NET_CLS_ACT=y
-CONFIG_NET_SCHED=y
-CONFIG_NET_SCH_INGRESS=y
-CONFIG_NET_IPIP=y
-CONFIG_IPV6=y
-CONFIG_NET_IPGRE_DEMUX=y
-CONFIG_NET_IPGRE=y
-CONFIG_IPV6_GRE=y
-CONFIG_CRYPTO_USER_API_HASH=m
CONFIG_CRYPTO_HMAC=m
CONFIG_CRYPTO_SHA256=m
-CONFIG_VXLAN=y
-CONFIG_GENEVE=y
-CONFIG_NET_CLS_FLOWER=m
-CONFIG_LWTUNNEL=y
-CONFIG_BPF_STREAM_PARSER=y
-CONFIG_XDP_SOCKETS=y
+CONFIG_CRYPTO_USER_API_HASH=m
+CONFIG_DYNAMIC_FTRACE=y
+CONFIG_FPROBE=y
CONFIG_FTRACE_SYSCALLS=y
-CONFIG_IPV6_TUNNEL=y
+CONFIG_FUNCTION_TRACER=y
+CONFIG_GENEVE=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_IMA=y
+CONFIG_IMA_READ_POLICY=y
+CONFIG_IMA_WRITE_POLICY=y
+CONFIG_IP_NF_FILTER=y
+CONFIG_IP_NF_RAW=y
+CONFIG_IP_NF_TARGET_SYNPROXY=y
+CONFIG_IPV6=y
+CONFIG_IPV6_FOU=m
+CONFIG_IPV6_FOU_TUNNEL=m
CONFIG_IPV6_GRE=y
CONFIG_IPV6_SEG6_BPF=y
+CONFIG_IPV6_SIT=m
+CONFIG_IPV6_TUNNEL=y
+CONFIG_LIRC=y
+CONFIG_LWTUNNEL=y
+CONFIG_MPLS=y
+CONFIG_MPLS_IPTUNNEL=m
+CONFIG_MPLS_ROUTING=m
+CONFIG_MPTCP=y
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_CLS_BPF=y
+CONFIG_NET_CLS_FLOWER=m
CONFIG_NET_FOU=m
CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_IPV6_FOU=m
-CONFIG_IPV6_FOU_TUNNEL=m
-CONFIG_MPLS=y
+CONFIG_NET_IPGRE=y
+CONFIG_NET_IPGRE_DEMUX=y
+CONFIG_NET_IPIP=y
CONFIG_NET_MPLS_GSO=m
-CONFIG_MPLS_ROUTING=m
-CONFIG_MPLS_IPTUNNEL=m
-CONFIG_IPV6_SIT=m
-CONFIG_BPF_JIT=y
-CONFIG_BPF_LSM=y
-CONFIG_SECURITY=y
-CONFIG_RC_CORE=y
-CONFIG_LIRC=y
-CONFIG_BPF_LIRC_MODE2=y
-CONFIG_IMA=y
-CONFIG_SECURITYFS=y
-CONFIG_IMA_WRITE_POLICY=y
-CONFIG_IMA_READ_POLICY=y
-CONFIG_BLK_DEV_LOOP=y
-CONFIG_FUNCTION_TRACER=y
-CONFIG_DYNAMIC_FTRACE=y
+CONFIG_NET_SCH_INGRESS=y
+CONFIG_NET_SCHED=y
+CONFIG_NETDEVSIM=m
CONFIG_NETFILTER=y
+CONFIG_NETFILTER_SYNPROXY=y
+CONFIG_NETFILTER_XT_MATCH_STATE=y
+CONFIG_NETFILTER_XT_TARGET_CT=y
+CONFIG_NF_CONNTRACK=y
CONFIG_NF_DEFRAG_IPV4=y
CONFIG_NF_DEFRAG_IPV6=y
-CONFIG_NF_CONNTRACK=y
+CONFIG_RC_CORE=y
+CONFIG_SECURITY=y
+CONFIG_SECURITYFS=y
+CONFIG_TEST_BPF=m
CONFIG_USERFAULTFD=y
-CONFIG_FPROBE=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_MPTCP=y
+CONFIG_VXLAN=y
+CONFIG_XDP_SOCKETS=y
diff --git a/tools/testing/selftests/bpf/config.s390x b/tools/testing/selftests/bpf/config.s390x
new file mode 100644
index 000000000000..f8a7a258a718
--- /dev/null
+++ b/tools/testing/selftests/bpf/config.s390x
@@ -0,0 +1,147 @@
+CONFIG_9P_FS=y
+CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y
+CONFIG_AUDIT=y
+CONFIG_BLK_CGROUP=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BONDING=y
+CONFIG_BPF_JIT_ALWAYS_ON=y
+CONFIG_BPF_JIT_DEFAULT_ON=y
+CONFIG_BPF_PRELOAD=y
+CONFIG_BPF_PRELOAD_UMD=y
+CONFIG_BPFILTER=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_HUGETLB=y
+CONFIG_CGROUP_NET_CLASSID=y
+CONFIG_CGROUP_PERF=y
+CONFIG_CGROUP_PIDS=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_CGROUPS=y
+CONFIG_CHECKPOINT_RESTORE=y
+CONFIG_CPUSETS=y
+CONFIG_CRASH_DUMP=y
+CONFIG_CRYPTO_USER_API_RNG=y
+CONFIG_CRYPTO_USER_API_SKCIPHER=y
+CONFIG_DEBUG_ATOMIC_SLEEP=y
+CONFIG_DEBUG_INFO_BTF=y
+CONFIG_DEBUG_INFO_DWARF4=y
+CONFIG_DEBUG_LIST=y
+CONFIG_DEBUG_LOCKDEP=y
+CONFIG_DEBUG_NOTIFIERS=y
+CONFIG_DEBUG_PAGEALLOC=y
+CONFIG_DEBUG_SECTION_MISMATCH=y
+CONFIG_DEBUG_SG=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_DEVTMPFS=y
+CONFIG_EXPERT=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_FANOTIFY=y
+CONFIG_FUNCTION_PROFILER=y
+CONFIG_GDB_SCRIPTS=y
+CONFIG_HAVE_EBPF_JIT=y
+CONFIG_HAVE_KPROBES=y
+CONFIG_HAVE_KPROBES_ON_FTRACE=y
+CONFIG_HAVE_KRETPROBES=y
+CONFIG_HAVE_MARCH_Z10_FEATURES=y
+CONFIG_HAVE_MARCH_Z196_FEATURES=y
+CONFIG_HEADERS_INSTALL=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_HUGETLBFS=y
+CONFIG_HW_RANDOM=y
+CONFIG_HZ_100=y
+CONFIG_IDLE_PAGE_TRACKING=y
+CONFIG_IKHEADERS=y
+CONFIG_INET6_ESP=y
+CONFIG_INET=y
+CONFIG_INET_ESP=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_NF_IPTABLES=y
+CONFIG_IPV6_SEG6_LWTUNNEL=y
+CONFIG_IPVLAN=y
+CONFIG_JUMP_LABEL=y
+CONFIG_KERNEL_UNCOMPRESSED=y
+CONFIG_KPROBES=y
+CONFIG_KPROBES_ON_FTRACE=y
+CONFIG_KRETPROBES=y
+CONFIG_KSM=y
+CONFIG_LATENCYTOP=y
+CONFIG_LIVEPATCH=y
+CONFIG_LOCK_STAT=y
+CONFIG_MACVLAN=y
+CONFIG_MACVTAP=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_MARCH_Z196=y
+CONFIG_MARCH_Z196_TUNE=y
+CONFIG_MEMCG=y
+CONFIG_MEMORY_HOTPLUG=y
+CONFIG_MEMORY_HOTREMOVE=y
+CONFIG_MODULE_SIG=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULES=y
+CONFIG_NAMESPACES=y
+CONFIG_NET=y
+CONFIG_NET_9P=y
+CONFIG_NET_9P_VIRTIO=y
+CONFIG_NET_ACT_BPF=y
+CONFIG_NET_ACT_GACT=y
+CONFIG_NET_KEY=y
+CONFIG_NET_SCH_FQ=y
+CONFIG_NET_VRF=y
+CONFIG_NETDEVICES=y
+CONFIG_NETFILTER_XT_MATCH_BPF=y
+CONFIG_NETFILTER_XT_TARGET_MARK=y
+CONFIG_NF_TABLES=y
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NR_CPUS=256
+CONFIG_NUMA=y
+CONFIG_PACKET=y
+CONFIG_PANIC_ON_OOPS=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_PCI=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_PROC_KCORE=y
+CONFIG_PROFILING=y
+CONFIG_PROVE_LOCKING=y
+CONFIG_PTDUMP_DEBUGFS=y
+CONFIG_RC_DEVICES=y
+CONFIG_RC_LOOPBACK=y
+CONFIG_RT_GROUP_SCHED=y
+CONFIG_SAMPLE_SECCOMP=y
+CONFIG_SAMPLES=y
+CONFIG_SCHED_TRACER=y
+CONFIG_SCSI=y
+CONFIG_SCSI_VIRTIO=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_STACK_TRACER=y
+CONFIG_STATIC_KEYS_SELFTEST=y
+CONFIG_SYSVIPC=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASKSTATS=y
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_TCP_CONG_DCTCP=y
+CONFIG_TLS=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_TUN=y
+CONFIG_UNIX=y
+CONFIG_UPROBES=y
+CONFIG_USELIB=y
+CONFIG_USER_NS=y
+CONFIG_VETH=y
+CONFIG_VIRTIO_BALLOON=y
+CONFIG_VIRTIO_BLK=y
+CONFIG_VIRTIO_NET=y
+CONFIG_VIRTIO_PCI=y
+CONFIG_VLAN_8021Q=y
+CONFIG_XFRM_USER=y
diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64
new file mode 100644
index 000000000000..f0859a1d37ab
--- /dev/null
+++ b/tools/testing/selftests/bpf/config.x86_64
@@ -0,0 +1,251 @@
+CONFIG_9P_FS=y
+CONFIG_9P_FS_POSIX_ACL=y
+CONFIG_9P_FS_SECURITY=y
+CONFIG_AGP=y
+CONFIG_AGP_AMD64=y
+CONFIG_AGP_INTEL=y
+CONFIG_AGP_SIS=y
+CONFIG_AGP_VIA=y
+CONFIG_AMIGA_PARTITION=y
+CONFIG_AUDIT=y
+CONFIG_BACKLIGHT_CLASS_DEVICE=y
+CONFIG_BINFMT_MISC=y
+CONFIG_BLK_CGROUP=y
+CONFIG_BLK_CGROUP_IOLATENCY=y
+CONFIG_BLK_DEV_BSGLIB=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=16384
+CONFIG_BLK_DEV_THROTTLING=y
+CONFIG_BONDING=y
+CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y
+CONFIG_BOOTTIME_TRACING=y
+CONFIG_BPF_JIT_ALWAYS_ON=y
+CONFIG_BPF_KPROBE_OVERRIDE=y
+CONFIG_BPF_PRELOAD=y
+CONFIG_BPF_PRELOAD_UMD=y
+CONFIG_BPFILTER=y
+CONFIG_BSD_DISKLABEL=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_CFS_BANDWIDTH=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_HUGETLB=y
+CONFIG_CGROUP_PERF=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_CGROUPS=y
+CONFIG_CMA=y
+CONFIG_CMA_AREAS=7
+CONFIG_COMPAT_32BIT_TIME=y
+CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y
+CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
+CONFIG_CPU_FREQ_GOV_ONDEMAND=y
+CONFIG_CPU_FREQ_GOV_USERSPACE=y
+CONFIG_CPU_FREQ_STAT=y
+CONFIG_CPU_IDLE_GOV_LADDER=y
+CONFIG_CPUSETS=y
+CONFIG_CRC_T10DIF=y
+CONFIG_CRYPTO_BLAKE2B=y
+CONFIG_CRYPTO_DEV_VIRTIO=m
+CONFIG_CRYPTO_SEQIV=y
+CONFIG_CRYPTO_XXHASH=y
+CONFIG_DCB=y
+CONFIG_DEBUG_ATOMIC_SLEEP=y
+CONFIG_DEBUG_CREDENTIALS=y
+CONFIG_DEBUG_INFO_BTF=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+CONFIG_DEBUG_MEMORY_INIT=y
+CONFIG_DEFAULT_FQ_CODEL=y
+CONFIG_DEFAULT_RENO=y
+CONFIG_DEFAULT_SECURITY_DAC=y
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_DMA_CMA=y
+CONFIG_DNS_RESOLVER=y
+CONFIG_EFI=y
+CONFIG_EFI_STUB=y
+CONFIG_EXPERT=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_FAIL_FUNCTION=y
+CONFIG_FAULT_INJECTION=y
+CONFIG_FAULT_INJECTION_DEBUG_FS=y
+CONFIG_FB=y
+CONFIG_FB_MODE_HELPERS=y
+CONFIG_FB_TILEBLITTING=y
+CONFIG_FB_VESA=y
+CONFIG_FONT_8x16=y
+CONFIG_FONT_MINI_4x6=y
+CONFIG_FONTS=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
+CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y
+CONFIG_FW_LOADER_USER_HELPER=y
+CONFIG_GART_IOMMU=y
+CONFIG_GENERIC_PHY=y
+CONFIG_HARDLOCKUP_DETECTOR=y
+CONFIG_HID_A4TECH=y
+CONFIG_HID_BELKIN=y
+CONFIG_HID_CHERRY=y
+CONFIG_HID_CYPRESS=y
+CONFIG_HID_DRAGONRISE=y
+CONFIG_HID_EZKEY=y
+CONFIG_HID_GREENASIA=y
+CONFIG_HID_GYRATION=y
+CONFIG_HID_KENSINGTON=y
+CONFIG_HID_KYE=y
+CONFIG_HID_MICROSOFT=y
+CONFIG_HID_MONTEREY=y
+CONFIG_HID_PANTHERLORD=y
+CONFIG_HID_PETALYNX=y
+CONFIG_HID_SMARTJOYPLUS=y
+CONFIG_HID_SUNPLUS=y
+CONFIG_HID_TOPSEED=y
+CONFIG_HID_TWINHAN=y
+CONFIG_HID_ZEROPLUS=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_HPET=y
+CONFIG_HUGETLBFS=y
+CONFIG_HWPOISON_INJECT=y
+CONFIG_HZ_1000=y
+CONFIG_INET=y
+CONFIG_INPUT_EVDEV=y
+CONFIG_INTEL_POWERCLAMP=y
+CONFIG_IP6_NF_IPTABLES=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_NF_IPTABLES=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_IPV6_MIP6=y
+CONFIG_IPV6_ROUTE_INFO=y
+CONFIG_IPV6_ROUTER_PREF=y
+CONFIG_IPV6_SEG6_LWTUNNEL=y
+CONFIG_IPV6_SUBTREES=y
+CONFIG_IRQ_POLL=y
+CONFIG_JUMP_LABEL=y
+CONFIG_KARMA_PARTITION=y
+CONFIG_KEXEC=y
+CONFIG_KPROBES=y
+CONFIG_KSM=y
+CONFIG_LEGACY_VSYSCALL_NONE=y
+CONFIG_LOG_BUF_SHIFT=21
+CONFIG_LOG_CPU_MAX_BUF_SHIFT=0
+CONFIG_LOGO=y
+CONFIG_LSM="selinux,bpf,integrity"
+CONFIG_MAC_PARTITION=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_MCORE2=y
+CONFIG_MEMCG=y
+CONFIG_MEMORY_FAILURE=y
+CONFIG_MINIX_SUBPARTITION=y
+CONFIG_MODULE_SIG=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULES=y
+CONFIG_MODVERSIONS=y
+CONFIG_NAMESPACES=y
+CONFIG_NET=y
+CONFIG_NET_9P=y
+CONFIG_NET_9P_VIRTIO=y
+CONFIG_NET_ACT_BPF=y
+CONFIG_NET_CLS_CGROUP=y
+CONFIG_NET_EMATCH=y
+CONFIG_NET_IPGRE_BROADCAST=y
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_NET_SCH_DEFAULT=y
+CONFIG_NET_SCH_FQ_CODEL=y
+CONFIG_NET_TC_SKB_EXT=y
+CONFIG_NET_VRF=y
+CONFIG_NETDEVICES=y
+CONFIG_NETFILTER_NETLINK_LOG=y
+CONFIG_NETFILTER_NETLINK_QUEUE=y
+CONFIG_NETFILTER_XT_MATCH_BPF=y
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=y
+CONFIG_NETLABEL=y
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NO_HZ=y
+CONFIG_NR_CPUS=128
+CONFIG_NUMA=y
+CONFIG_NUMA_BALANCING=y
+CONFIG_NVMEM=y
+CONFIG_OSF_PARTITION=y
+CONFIG_PACKET=y
+CONFIG_PANIC_ON_OOPS=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_PCI=y
+CONFIG_PCI_IOV=y
+CONFIG_PCI_MSI=y
+CONFIG_PCIEPORTBUS=y
+CONFIG_PHYSICAL_ALIGN=0x1000000
+CONFIG_POSIX_MQUEUE=y
+CONFIG_POWER_SUPPLY=y
+CONFIG_PREEMPT=y
+CONFIG_PRINTK_TIME=y
+CONFIG_PROC_KCORE=y
+CONFIG_PROFILING=y
+CONFIG_PROVE_LOCKING=y
+CONFIG_PTP_1588_CLOCK=y
+CONFIG_RC_DEVICES=y
+CONFIG_RC_LOOPBACK=y
+CONFIG_RCU_CPU_STALL_TIMEOUT=60
+CONFIG_SCHED_STACK_END_CHECK=y
+CONFIG_SCHEDSTATS=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SECURITY_SELINUX=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_DETECT_IRQ=y
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_MANY_PORTS=y
+CONFIG_SERIAL_8250_NR_UARTS=32
+CONFIG_SERIAL_8250_RSA=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_NONSTANDARD=y
+CONFIG_SERIO_LIBPS2=y
+CONFIG_SGI_PARTITION=y
+CONFIG_SMP=y
+CONFIG_SOLARIS_X86_PARTITION=y
+CONFIG_SUN_PARTITION=y
+CONFIG_SYNC_FILE=y
+CONFIG_SYSVIPC=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASKSTATS=y
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_TCP_MD5SIG=y
+CONFIG_TLS=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_TRANSPARENT_HUGEPAGE_MADVISE=y
+CONFIG_TUN=y
+CONFIG_UNIX=y
+CONFIG_UNIXWARE_DISKLABEL=y
+CONFIG_USER_NS=y
+CONFIG_VALIDATE_FS_PARSER=y
+CONFIG_VETH=y
+CONFIG_VIRT_DRIVERS=y
+CONFIG_VIRTIO_BALLOON=y
+CONFIG_VIRTIO_BLK=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_VIRTIO_NET=y
+CONFIG_VIRTIO_PCI=y
+CONFIG_VLAN_8021Q=y
+CONFIG_X86_ACPI_CPUFREQ=y
+CONFIG_X86_CPUID=y
+CONFIG_X86_MSR=y
+CONFIG_X86_POWERNOW_K8=y
+CONFIG_XDP_SOCKETS_DIAG=y
+CONFIG_XFRM_SUB_POLICY=y
+CONFIG_XFRM_USER=y
+CONFIG_ZEROPLUS_FF=y
diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index 59cf81ec55af..bec15558fd93 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -436,7 +436,7 @@ struct nstoken *open_netns(const char *name)
int err;
struct nstoken *token;
- token = malloc(sizeof(struct nstoken));
+ token = calloc(1, sizeof(struct nstoken));
if (!ASSERT_OK_PTR(token, "malloc token"))
return NULL;
diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
index 08c0601b3e84..0b899d2d8ea7 100644
--- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c
+++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
@@ -17,6 +17,14 @@ static void trigger_func2(void)
asm volatile ("");
}
+/* attach point for byname sleepable uprobe */
+static void trigger_func3(void)
+{
+ asm volatile ("");
+}
+
+static char test_data[] = "test_data";
+
void test_attach_probe(void)
{
DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
@@ -49,9 +57,17 @@ void test_attach_probe(void)
if (!ASSERT_GE(ref_ctr_offset, 0, "ref_ctr_offset"))
return;
- skel = test_attach_probe__open_and_load();
+ skel = test_attach_probe__open();
if (!ASSERT_OK_PTR(skel, "skel_open"))
return;
+
+ /* sleepable kprobe test case needs flags set before loading */
+ if (!ASSERT_OK(bpf_program__set_flags(skel->progs.handle_kprobe_sleepable,
+ BPF_F_SLEEPABLE), "kprobe_sleepable_flags"))
+ goto cleanup;
+
+ if (!ASSERT_OK(test_attach_probe__load(skel), "skel_load"))
+ goto cleanup;
if (!ASSERT_OK_PTR(skel->bss, "check_bss"))
goto cleanup;
@@ -151,6 +167,30 @@ void test_attach_probe(void)
if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname2, "attach_uretprobe_byname2"))
goto cleanup;
+ /* sleepable kprobes should not attach successfully */
+ skel->links.handle_kprobe_sleepable = bpf_program__attach(skel->progs.handle_kprobe_sleepable);
+ if (!ASSERT_ERR_PTR(skel->links.handle_kprobe_sleepable, "attach_kprobe_sleepable"))
+ goto cleanup;
+
+ /* test sleepable uprobe and uretprobe variants */
+ skel->links.handle_uprobe_byname3_sleepable = bpf_program__attach(skel->progs.handle_uprobe_byname3_sleepable);
+ if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname3_sleepable, "attach_uprobe_byname3_sleepable"))
+ goto cleanup;
+
+ skel->links.handle_uprobe_byname3 = bpf_program__attach(skel->progs.handle_uprobe_byname3);
+ if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname3, "attach_uprobe_byname3"))
+ goto cleanup;
+
+ skel->links.handle_uretprobe_byname3_sleepable = bpf_program__attach(skel->progs.handle_uretprobe_byname3_sleepable);
+ if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname3_sleepable, "attach_uretprobe_byname3_sleepable"))
+ goto cleanup;
+
+ skel->links.handle_uretprobe_byname3 = bpf_program__attach(skel->progs.handle_uretprobe_byname3);
+ if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname3, "attach_uretprobe_byname3"))
+ goto cleanup;
+
+ skel->bss->user_ptr = test_data;
+
/* trigger & validate kprobe && kretprobe */
usleep(1);
@@ -164,6 +204,9 @@ void test_attach_probe(void)
/* trigger & validate uprobe attached by name */
trigger_func2();
+ /* trigger & validate sleepable uprobe attached by name */
+ trigger_func3();
+
ASSERT_EQ(skel->bss->kprobe_res, 1, "check_kprobe_res");
ASSERT_EQ(skel->bss->kprobe2_res, 11, "check_kprobe_auto_res");
ASSERT_EQ(skel->bss->kretprobe_res, 2, "check_kretprobe_res");
@@ -174,6 +217,10 @@ void test_attach_probe(void)
ASSERT_EQ(skel->bss->uretprobe_byname_res, 6, "check_uretprobe_byname_res");
ASSERT_EQ(skel->bss->uprobe_byname2_res, 7, "check_uprobe_byname2_res");
ASSERT_EQ(skel->bss->uretprobe_byname2_res, 8, "check_uretprobe_byname2_res");
+ ASSERT_EQ(skel->bss->uprobe_byname3_sleepable_res, 9, "check_uprobe_byname3_sleepable_res");
+ ASSERT_EQ(skel->bss->uprobe_byname3_res, 10, "check_uprobe_byname3_res");
+ ASSERT_EQ(skel->bss->uretprobe_byname3_sleepable_res, 11, "check_uretprobe_byname3_sleepable_res");
+ ASSERT_EQ(skel->bss->uretprobe_byname3_res, 12, "check_uretprobe_byname3_res");
cleanup:
test_attach_probe__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 7ff5fa93d056..a33874b081b6 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -27,6 +27,7 @@
#include "bpf_iter_test_kern5.skel.h"
#include "bpf_iter_test_kern6.skel.h"
#include "bpf_iter_bpf_link.skel.h"
+#include "bpf_iter_ksym.skel.h"
static int duration;
@@ -1120,6 +1121,19 @@ static void test_link_iter(void)
bpf_iter_bpf_link__destroy(skel);
}
+static void test_ksym_iter(void)
+{
+ struct bpf_iter_ksym *skel;
+
+ skel = bpf_iter_ksym__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_ksym__open_and_load"))
+ return;
+
+ do_dummy_read(skel->progs.dump_ksym);
+
+ bpf_iter_ksym__destroy(skel);
+}
+
#define CMP_BUFFER_SIZE 1024
static char task_vma_output[CMP_BUFFER_SIZE];
static char proc_maps_output[CMP_BUFFER_SIZE];
@@ -1267,4 +1281,6 @@ void test_bpf_iter(void)
test_buf_neg_offset();
if (test__start_subtest("link-iter"))
test_link_iter();
+ if (test__start_subtest("ksym"))
+ test_ksym_iter();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_loop.c b/tools/testing/selftests/bpf/prog_tests/bpf_loop.c
index 380d7a2072e3..4cd8a25afe68 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_loop.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_loop.c
@@ -120,6 +120,64 @@ static void check_nested_calls(struct bpf_loop *skel)
bpf_link__destroy(link);
}
+static void check_non_constant_callback(struct bpf_loop *skel)
+{
+ struct bpf_link *link =
+ bpf_program__attach(skel->progs.prog_non_constant_callback);
+
+ if (!ASSERT_OK_PTR(link, "link"))
+ return;
+
+ skel->bss->callback_selector = 0x0F;
+ usleep(1);
+ ASSERT_EQ(skel->bss->g_output, 0x0F, "g_output #1");
+
+ skel->bss->callback_selector = 0xF0;
+ usleep(1);
+ ASSERT_EQ(skel->bss->g_output, 0xF0, "g_output #2");
+
+ bpf_link__destroy(link);
+}
+
+static void check_stack(struct bpf_loop *skel)
+{
+ struct bpf_link *link = bpf_program__attach(skel->progs.stack_check);
+ const int max_key = 12;
+ int key;
+ int map_fd;
+
+ if (!ASSERT_OK_PTR(link, "link"))
+ return;
+
+ map_fd = bpf_map__fd(skel->maps.map1);
+
+ if (!ASSERT_GE(map_fd, 0, "bpf_map__fd"))
+ goto out;
+
+ for (key = 1; key <= max_key; ++key) {
+ int val = key;
+ int err = bpf_map_update_elem(map_fd, &key, &val, BPF_NOEXIST);
+
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
+ goto out;
+ }
+
+ usleep(1);
+
+ for (key = 1; key <= max_key; ++key) {
+ int val;
+ int err = bpf_map_lookup_elem(map_fd, &key, &val);
+
+ if (!ASSERT_OK(err, "bpf_map_lookup_elem"))
+ goto out;
+ if (!ASSERT_EQ(val, key + 1, "bad value in the map"))
+ goto out;
+ }
+
+out:
+ bpf_link__destroy(link);
+}
+
void test_bpf_loop(void)
{
struct bpf_loop *skel;
@@ -140,6 +198,10 @@ void test_bpf_loop(void)
check_invalid_flags(skel);
if (test__start_subtest("check_nested_calls"))
check_nested_calls(skel);
+ if (test__start_subtest("check_non_constant_callback"))
+ check_non_constant_callback(skel);
+ if (test__start_subtest("check_stack"))
+ check_stack(skel);
bpf_loop__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
index dd30b1e3a67c..7a74a1579076 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
@@ -2,13 +2,29 @@
#include <test_progs.h>
#include <network_helpers.h>
#include "test_bpf_nf.skel.h"
+#include "test_bpf_nf_fail.skel.h"
+
+static char log_buf[1024 * 1024];
+
+struct {
+ const char *prog_name;
+ const char *err_msg;
+} test_bpf_nf_fail_tests[] = {
+ { "alloc_release", "kernel function bpf_ct_release args#0 expected pointer to STRUCT nf_conn but" },
+ { "insert_insert", "kernel function bpf_ct_insert_entry args#0 expected pointer to STRUCT nf_conn___init but" },
+ { "lookup_insert", "kernel function bpf_ct_insert_entry args#0 expected pointer to STRUCT nf_conn___init but" },
+ { "set_timeout_after_insert", "kernel function bpf_ct_set_timeout args#0 expected pointer to STRUCT nf_conn___init but" },
+ { "set_status_after_insert", "kernel function bpf_ct_set_status args#0 expected pointer to STRUCT nf_conn___init but" },
+ { "change_timeout_after_alloc", "kernel function bpf_ct_change_timeout args#0 expected pointer to STRUCT nf_conn but" },
+ { "change_status_after_alloc", "kernel function bpf_ct_change_status args#0 expected pointer to STRUCT nf_conn but" },
+};
enum {
TEST_XDP,
TEST_TC_BPF,
};
-void test_bpf_nf_ct(int mode)
+static void test_bpf_nf_ct(int mode)
{
struct test_bpf_nf *skel;
int prog_fd, err;
@@ -39,14 +55,60 @@ void test_bpf_nf_ct(int mode)
ASSERT_EQ(skel->bss->test_enonet_netns_id, -ENONET, "Test ENONET for bad but valid netns_id");
ASSERT_EQ(skel->bss->test_enoent_lookup, -ENOENT, "Test ENOENT for failed lookup");
ASSERT_EQ(skel->bss->test_eafnosupport, -EAFNOSUPPORT, "Test EAFNOSUPPORT for invalid len__tuple");
+ ASSERT_EQ(skel->data->test_alloc_entry, 0, "Test for alloc new entry");
+ ASSERT_EQ(skel->data->test_insert_entry, 0, "Test for insert new entry");
+ ASSERT_EQ(skel->data->test_succ_lookup, 0, "Test for successful lookup");
+ /* allow some tolerance for test_delta_timeout value to avoid races. */
+ ASSERT_GT(skel->bss->test_delta_timeout, 8, "Test for min ct timeout update");
+ ASSERT_LE(skel->bss->test_delta_timeout, 10, "Test for max ct timeout update");
+ /* expected status is IPS_SEEN_REPLY */
+ ASSERT_EQ(skel->bss->test_status, 2, "Test for ct status update ");
end:
test_bpf_nf__destroy(skel);
}
+static void test_bpf_nf_ct_fail(const char *prog_name, const char *err_msg)
+{
+ LIBBPF_OPTS(bpf_object_open_opts, opts, .kernel_log_buf = log_buf,
+ .kernel_log_size = sizeof(log_buf),
+ .kernel_log_level = 1);
+ struct test_bpf_nf_fail *skel;
+ struct bpf_program *prog;
+ int ret;
+
+ skel = test_bpf_nf_fail__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "test_bpf_nf_fail__open"))
+ return;
+
+ prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+ goto end;
+
+ bpf_program__set_autoload(prog, true);
+
+ ret = test_bpf_nf_fail__load(skel);
+ if (!ASSERT_ERR(ret, "test_bpf_nf_fail__load must fail"))
+ goto end;
+
+ if (!ASSERT_OK_PTR(strstr(log_buf, err_msg), "expected error message")) {
+ fprintf(stderr, "Expected: %s\n", err_msg);
+ fprintf(stderr, "Verifier: %s\n", log_buf);
+ }
+
+end:
+ test_bpf_nf_fail__destroy(skel);
+}
+
void test_bpf_nf(void)
{
+ int i;
if (test__start_subtest("xdp-ct"))
test_bpf_nf_ct(TEST_XDP);
if (test__start_subtest("tc-bpf-ct"))
test_bpf_nf_ct(TEST_TC_BPF);
+ for (i = 0; i < ARRAY_SIZE(test_bpf_nf_fail_tests); i++) {
+ if (test__start_subtest(test_bpf_nf_fail_tests[i].prog_name))
+ test_bpf_nf_ct_fail(test_bpf_nf_fail_tests[i].prog_name,
+ test_bpf_nf_fail_tests[i].err_msg);
+ }
}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
index e9a9a31b2ffe..2959a52ced06 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
@@ -9,6 +9,9 @@
#include "bpf_cubic.skel.h"
#include "bpf_tcp_nogpl.skel.h"
#include "bpf_dctcp_release.skel.h"
+#include "tcp_ca_write_sk_pacing.skel.h"
+#include "tcp_ca_incompl_cong_ops.skel.h"
+#include "tcp_ca_unsupp_cong_op.skel.h"
#ifndef ENOTSUPP
#define ENOTSUPP 524
@@ -322,6 +325,58 @@ static void test_rel_setsockopt(void)
bpf_dctcp_release__destroy(rel_skel);
}
+static void test_write_sk_pacing(void)
+{
+ struct tcp_ca_write_sk_pacing *skel;
+ struct bpf_link *link;
+
+ skel = tcp_ca_write_sk_pacing__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ link = bpf_map__attach_struct_ops(skel->maps.write_sk_pacing);
+ ASSERT_OK_PTR(link, "attach_struct_ops");
+
+ bpf_link__destroy(link);
+ tcp_ca_write_sk_pacing__destroy(skel);
+}
+
+static void test_incompl_cong_ops(void)
+{
+ struct tcp_ca_incompl_cong_ops *skel;
+ struct bpf_link *link;
+
+ skel = tcp_ca_incompl_cong_ops__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ /* That cong_avoid() and cong_control() are missing is only reported at
+ * this point:
+ */
+ link = bpf_map__attach_struct_ops(skel->maps.incompl_cong_ops);
+ ASSERT_ERR_PTR(link, "attach_struct_ops");
+
+ bpf_link__destroy(link);
+ tcp_ca_incompl_cong_ops__destroy(skel);
+}
+
+static void test_unsupp_cong_op(void)
+{
+ libbpf_print_fn_t old_print_fn;
+ struct tcp_ca_unsupp_cong_op *skel;
+
+ err_str = "attach to unsupported member get_info";
+ found = false;
+ old_print_fn = libbpf_set_print(libbpf_debug_print);
+
+ skel = tcp_ca_unsupp_cong_op__open_and_load();
+ ASSERT_NULL(skel, "open_and_load");
+ ASSERT_EQ(found, true, "expected_err_msg");
+
+ tcp_ca_unsupp_cong_op__destroy(skel);
+ libbpf_set_print(old_print_fn);
+}
+
void test_bpf_tcp_ca(void)
{
if (test__start_subtest("dctcp"))
@@ -334,4 +389,10 @@ void test_bpf_tcp_ca(void)
test_dctcp_fallback();
if (test__start_subtest("rel_setsockopt"))
test_rel_setsockopt();
+ if (test__start_subtest("write_sk_pacing"))
+ test_write_sk_pacing();
+ if (test__start_subtest("incompl_cong_ops"))
+ test_incompl_cong_ops();
+ if (test__start_subtest("unsupp_cong_op"))
+ test_unsupp_cong_op();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index ba5bde53d418..ef6528b8084c 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -34,7 +34,6 @@ static bool always_log;
#undef CHECK
#define CHECK(condition, format...) _CHECK(condition, "check", duration, format)
-#define BTF_END_RAW 0xdeadbeef
#define NAME_TBD 0xdeadb33f
#define NAME_NTH(N) (0xfffe0000 | N)
@@ -2897,26 +2896,6 @@ static struct btf_raw_test raw_tests[] = {
},
{
- .descr = "invalid enum kind_flag",
- .raw_types = {
- BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
- BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 1, 1), 4), /* [2] */
- BTF_ENUM_ENC(NAME_TBD, 0),
- BTF_END_RAW,
- },
- BTF_STR_SEC("\0A"),
- .map_type = BPF_MAP_TYPE_ARRAY,
- .map_name = "enum_type_check_btf",
- .key_size = sizeof(int),
- .value_size = sizeof(int),
- .key_type_id = 1,
- .value_type_id = 1,
- .max_entries = 4,
- .btf_load_err = true,
- .err_str = "Invalid btf_info kind_flag",
-},
-
-{
.descr = "valid fwd kind_flag",
.raw_types = {
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
@@ -4072,6 +4051,42 @@ static struct btf_raw_test raw_tests[] = {
.btf_load_err = true,
.err_str = "Type tags don't precede modifiers",
},
+{
+ .descr = "enum64 test #1, unsigned, size 8",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 2), 8), /* [2] */
+ BTF_ENUM64_ENC(NAME_TBD, 0, 0),
+ BTF_ENUM64_ENC(NAME_TBD, 1, 1),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0a\0b\0c"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 8,
+ .key_type_id = 1,
+ .value_type_id = 2,
+ .max_entries = 1,
+},
+{
+ .descr = "enum64 test #2, signed, size 4",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM64, 1, 2), 4), /* [2] */
+ BTF_ENUM64_ENC(NAME_TBD, -1, 0),
+ BTF_ENUM64_ENC(NAME_TBD, 1, 0),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0a\0b\0c"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 2,
+ .max_entries = 1,
+},
}; /* struct btf_raw_test raw_tests[] */
@@ -4636,7 +4651,6 @@ struct btf_file_test {
};
static struct btf_file_test file_tests[] = {
- { .file = "test_btf_haskv.o", },
{ .file = "test_btf_newkv.o", },
{ .file = "test_btf_nokv.o", .btf_kv_notfound = true, },
};
@@ -5324,7 +5338,7 @@ static void do_test_pprint(int test_num)
ret = snprintf(pin_path, sizeof(pin_path), "%s/%s",
"/sys/fs/bpf", test->map_name);
- if (CHECK(ret == sizeof(pin_path), "pin_path %s/%s is too long",
+ if (CHECK(ret >= sizeof(pin_path), "pin_path %s/%s is too long",
"/sys/fs/bpf", test->map_name)) {
err = -1;
goto done;
@@ -7000,9 +7014,12 @@ static struct btf_dedup_test dedup_tests[] = {
BTF_DECL_TAG_ENC(NAME_TBD, 13, 1), /* [16] decl_tag */
BTF_DECL_TAG_ENC(NAME_TBD, 7, -1), /* [17] decl_tag */
BTF_TYPE_TAG_ENC(NAME_TBD, 8), /* [18] type_tag */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 2), 8), /* [19] enum64 */
+ BTF_ENUM64_ENC(NAME_TBD, 0, 0),
+ BTF_ENUM64_ENC(NAME_TBD, 1, 1),
BTF_END_RAW,
},
- BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q\0R"),
+ BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q\0R\0S\0T\0U"),
},
.expect = {
.raw_types = {
@@ -7030,9 +7047,12 @@ static struct btf_dedup_test dedup_tests[] = {
BTF_DECL_TAG_ENC(NAME_TBD, 13, 1), /* [16] decl_tag */
BTF_DECL_TAG_ENC(NAME_TBD, 7, -1), /* [17] decl_tag */
BTF_TYPE_TAG_ENC(NAME_TBD, 8), /* [18] type_tag */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 2), 8), /* [19] enum64 */
+ BTF_ENUM64_ENC(NAME_TBD, 0, 0),
+ BTF_ENUM64_ENC(NAME_TBD, 1, 1),
BTF_END_RAW,
},
- BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q\0R"),
+ BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q\0R\0S\0T\0U"),
},
},
{
@@ -7493,6 +7513,91 @@ static struct btf_dedup_test dedup_tests[] = {
BTF_STR_SEC("\0tag1\0t\0m"),
},
},
+{
+ .descr = "dedup: enum64, standalone",
+ .input = {
+ .raw_types = {
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8),
+ BTF_ENUM64_ENC(NAME_NTH(2), 1, 123),
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8),
+ BTF_ENUM64_ENC(NAME_NTH(2), 1, 123),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0e1\0e1_val"),
+ },
+ .expect = {
+ .raw_types = {
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8),
+ BTF_ENUM64_ENC(NAME_NTH(2), 1, 123),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0e1\0e1_val"),
+ },
+},
+{
+ .descr = "dedup: enum64, fwd resolution",
+ .input = {
+ .raw_types = {
+ /* [1] fwd enum64 'e1' before full enum */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8),
+ /* [2] full enum64 'e1' after fwd */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8),
+ BTF_ENUM64_ENC(NAME_NTH(2), 1, 123),
+ /* [3] full enum64 'e2' before fwd */
+ BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8),
+ BTF_ENUM64_ENC(NAME_NTH(4), 0, 456),
+ /* [4] fwd enum64 'e2' after full enum */
+ BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8),
+ /* [5] incompatible full enum64 with different value */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8),
+ BTF_ENUM64_ENC(NAME_NTH(2), 0, 321),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0e1\0e1_val\0e2\0e2_val"),
+ },
+ .expect = {
+ .raw_types = {
+ /* [1] full enum64 'e1' */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8),
+ BTF_ENUM64_ENC(NAME_NTH(2), 1, 123),
+ /* [2] full enum64 'e2' */
+ BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8),
+ BTF_ENUM64_ENC(NAME_NTH(4), 0, 456),
+ /* [3] incompatible full enum64 with different value */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8),
+ BTF_ENUM64_ENC(NAME_NTH(2), 0, 321),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0e1\0e1_val\0e2\0e2_val"),
+ },
+},
+{
+ .descr = "dedup: enum and enum64, no dedup",
+ .input = {
+ .raw_types = {
+ /* [1] enum 'e1' */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
+ BTF_ENUM_ENC(NAME_NTH(2), 1),
+ /* [2] enum64 'e1' */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 4),
+ BTF_ENUM64_ENC(NAME_NTH(2), 1, 0),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0e1\0e1_val"),
+ },
+ .expect = {
+ .raw_types = {
+ /* [1] enum 'e1' */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
+ BTF_ENUM_ENC(NAME_NTH(2), 1),
+ /* [2] enum64 'e1' */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 4),
+ BTF_ENUM64_ENC(NAME_NTH(2), 1, 0),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0e1\0e1_val"),
+ },
+},
};
@@ -7517,6 +7622,8 @@ static int btf_type_size(const struct btf_type *t)
return base_size + sizeof(__u32);
case BTF_KIND_ENUM:
return base_size + vlen * sizeof(struct btf_enum);
+ case BTF_KIND_ENUM64:
+ return base_size + vlen * sizeof(struct btf_enum64);
case BTF_KIND_ARRAY:
return base_size + sizeof(struct btf_array);
case BTF_KIND_STRUCT:
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_write.c b/tools/testing/selftests/bpf/prog_tests/btf_write.c
index addf99c05896..6e36de1302fc 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_write.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_write.c
@@ -9,6 +9,7 @@ static void gen_btf(struct btf *btf)
const struct btf_var_secinfo *vi;
const struct btf_type *t;
const struct btf_member *m;
+ const struct btf_enum64 *v64;
const struct btf_enum *v;
const struct btf_param *p;
int id, err, str_off;
@@ -171,7 +172,7 @@ static void gen_btf(struct btf *btf)
ASSERT_STREQ(btf__str_by_offset(btf, v->name_off), "v2", "v2_name");
ASSERT_EQ(v->val, 2, "v2_val");
ASSERT_STREQ(btf_type_raw_dump(btf, 9),
- "[9] ENUM 'e1' size=4 vlen=2\n"
+ "[9] ENUM 'e1' encoding=UNSIGNED size=4 vlen=2\n"
"\t'v1' val=1\n"
"\t'v2' val=2", "raw_dump");
@@ -202,7 +203,7 @@ static void gen_btf(struct btf *btf)
ASSERT_EQ(btf_vlen(t), 0, "enum_fwd_kind");
ASSERT_EQ(t->size, 4, "enum_fwd_sz");
ASSERT_STREQ(btf_type_raw_dump(btf, 12),
- "[12] ENUM 'enum_fwd' size=4 vlen=0", "raw_dump");
+ "[12] ENUM 'enum_fwd' encoding=UNSIGNED size=4 vlen=0", "raw_dump");
/* TYPEDEF */
id = btf__add_typedef(btf, "typedef1", 1);
@@ -307,6 +308,48 @@ static void gen_btf(struct btf *btf)
ASSERT_EQ(t->type, 1, "tag_type");
ASSERT_STREQ(btf_type_raw_dump(btf, 20),
"[20] TYPE_TAG 'tag1' type_id=1", "raw_dump");
+
+ /* ENUM64 */
+ id = btf__add_enum64(btf, "e1", 8, true);
+ ASSERT_EQ(id, 21, "enum64_id");
+ err = btf__add_enum64_value(btf, "v1", -1);
+ ASSERT_OK(err, "v1_res");
+ err = btf__add_enum64_value(btf, "v2", 0x123456789); /* 4886718345 */
+ ASSERT_OK(err, "v2_res");
+ t = btf__type_by_id(btf, 21);
+ ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "e1", "enum64_name");
+ ASSERT_EQ(btf_kind(t), BTF_KIND_ENUM64, "enum64_kind");
+ ASSERT_EQ(btf_vlen(t), 2, "enum64_vlen");
+ ASSERT_EQ(t->size, 8, "enum64_sz");
+ v64 = btf_enum64(t) + 0;
+ ASSERT_STREQ(btf__str_by_offset(btf, v64->name_off), "v1", "v1_name");
+ ASSERT_EQ(v64->val_hi32, 0xffffffff, "v1_val");
+ ASSERT_EQ(v64->val_lo32, 0xffffffff, "v1_val");
+ v64 = btf_enum64(t) + 1;
+ ASSERT_STREQ(btf__str_by_offset(btf, v64->name_off), "v2", "v2_name");
+ ASSERT_EQ(v64->val_hi32, 0x1, "v2_val");
+ ASSERT_EQ(v64->val_lo32, 0x23456789, "v2_val");
+ ASSERT_STREQ(btf_type_raw_dump(btf, 21),
+ "[21] ENUM64 'e1' encoding=SIGNED size=8 vlen=2\n"
+ "\t'v1' val=-1\n"
+ "\t'v2' val=4886718345", "raw_dump");
+
+ id = btf__add_enum64(btf, "e1", 8, false);
+ ASSERT_EQ(id, 22, "enum64_id");
+ err = btf__add_enum64_value(btf, "v1", 0xffffffffFFFFFFFF); /* 18446744073709551615 */
+ ASSERT_OK(err, "v1_res");
+ t = btf__type_by_id(btf, 22);
+ ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "e1", "enum64_name");
+ ASSERT_EQ(btf_kind(t), BTF_KIND_ENUM64, "enum64_kind");
+ ASSERT_EQ(btf_vlen(t), 1, "enum64_vlen");
+ ASSERT_EQ(t->size, 8, "enum64_sz");
+ v64 = btf_enum64(t) + 0;
+ ASSERT_STREQ(btf__str_by_offset(btf, v64->name_off), "v1", "v1_name");
+ ASSERT_EQ(v64->val_hi32, 0xffffffff, "v1_val");
+ ASSERT_EQ(v64->val_lo32, 0xffffffff, "v1_val");
+ ASSERT_STREQ(btf_type_raw_dump(btf, 22),
+ "[22] ENUM64 'e1' encoding=UNSIGNED size=8 vlen=1\n"
+ "\t'v1' val=18446744073709551615", "raw_dump");
}
static void test_btf_add()
@@ -332,12 +375,12 @@ static void test_btf_add()
"\t'f2' type_id=1 bits_offset=32 bitfield_size=16",
"[8] UNION 'u1' size=8 vlen=1\n"
"\t'f1' type_id=1 bits_offset=0 bitfield_size=16",
- "[9] ENUM 'e1' size=4 vlen=2\n"
+ "[9] ENUM 'e1' encoding=UNSIGNED size=4 vlen=2\n"
"\t'v1' val=1\n"
"\t'v2' val=2",
"[10] FWD 'struct_fwd' fwd_kind=struct",
"[11] FWD 'union_fwd' fwd_kind=union",
- "[12] ENUM 'enum_fwd' size=4 vlen=0",
+ "[12] ENUM 'enum_fwd' encoding=UNSIGNED size=4 vlen=0",
"[13] TYPEDEF 'typedef1' type_id=1",
"[14] FUNC 'func1' type_id=15 linkage=global",
"[15] FUNC_PROTO '(anon)' ret_type_id=1 vlen=2\n"
@@ -348,7 +391,12 @@ static void test_btf_add()
"\ttype_id=1 offset=4 size=8",
"[18] DECL_TAG 'tag1' type_id=16 component_idx=-1",
"[19] DECL_TAG 'tag2' type_id=14 component_idx=1",
- "[20] TYPE_TAG 'tag1' type_id=1");
+ "[20] TYPE_TAG 'tag1' type_id=1",
+ "[21] ENUM64 'e1' encoding=SIGNED size=8 vlen=2\n"
+ "\t'v1' val=-1\n"
+ "\t'v2' val=4886718345",
+ "[22] ENUM64 'e1' encoding=UNSIGNED size=8 vlen=1\n"
+ "\t'v1' val=18446744073709551615");
btf__free(btf);
}
@@ -370,7 +418,7 @@ static void test_btf_add_btf()
gen_btf(btf2);
id = btf__add_btf(btf1, btf2);
- if (!ASSERT_EQ(id, 21, "id"))
+ if (!ASSERT_EQ(id, 23, "id"))
goto cleanup;
VALIDATE_RAW_BTF(
@@ -386,12 +434,12 @@ static void test_btf_add_btf()
"\t'f2' type_id=1 bits_offset=32 bitfield_size=16",
"[8] UNION 'u1' size=8 vlen=1\n"
"\t'f1' type_id=1 bits_offset=0 bitfield_size=16",
- "[9] ENUM 'e1' size=4 vlen=2\n"
+ "[9] ENUM 'e1' encoding=UNSIGNED size=4 vlen=2\n"
"\t'v1' val=1\n"
"\t'v2' val=2",
"[10] FWD 'struct_fwd' fwd_kind=struct",
"[11] FWD 'union_fwd' fwd_kind=union",
- "[12] ENUM 'enum_fwd' size=4 vlen=0",
+ "[12] ENUM 'enum_fwd' encoding=UNSIGNED size=4 vlen=0",
"[13] TYPEDEF 'typedef1' type_id=1",
"[14] FUNC 'func1' type_id=15 linkage=global",
"[15] FUNC_PROTO '(anon)' ret_type_id=1 vlen=2\n"
@@ -403,36 +451,46 @@ static void test_btf_add_btf()
"[18] DECL_TAG 'tag1' type_id=16 component_idx=-1",
"[19] DECL_TAG 'tag2' type_id=14 component_idx=1",
"[20] TYPE_TAG 'tag1' type_id=1",
+ "[21] ENUM64 'e1' encoding=SIGNED size=8 vlen=2\n"
+ "\t'v1' val=-1\n"
+ "\t'v2' val=4886718345",
+ "[22] ENUM64 'e1' encoding=UNSIGNED size=8 vlen=1\n"
+ "\t'v1' val=18446744073709551615",
/* types appended from the second BTF */
- "[21] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
- "[22] PTR '(anon)' type_id=21",
- "[23] CONST '(anon)' type_id=25",
- "[24] VOLATILE '(anon)' type_id=23",
- "[25] RESTRICT '(anon)' type_id=24",
- "[26] ARRAY '(anon)' type_id=22 index_type_id=21 nr_elems=10",
- "[27] STRUCT 's1' size=8 vlen=2\n"
- "\t'f1' type_id=21 bits_offset=0\n"
- "\t'f2' type_id=21 bits_offset=32 bitfield_size=16",
- "[28] UNION 'u1' size=8 vlen=1\n"
- "\t'f1' type_id=21 bits_offset=0 bitfield_size=16",
- "[29] ENUM 'e1' size=4 vlen=2\n"
+ "[23] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[24] PTR '(anon)' type_id=23",
+ "[25] CONST '(anon)' type_id=27",
+ "[26] VOLATILE '(anon)' type_id=25",
+ "[27] RESTRICT '(anon)' type_id=26",
+ "[28] ARRAY '(anon)' type_id=24 index_type_id=23 nr_elems=10",
+ "[29] STRUCT 's1' size=8 vlen=2\n"
+ "\t'f1' type_id=23 bits_offset=0\n"
+ "\t'f2' type_id=23 bits_offset=32 bitfield_size=16",
+ "[30] UNION 'u1' size=8 vlen=1\n"
+ "\t'f1' type_id=23 bits_offset=0 bitfield_size=16",
+ "[31] ENUM 'e1' encoding=UNSIGNED size=4 vlen=2\n"
"\t'v1' val=1\n"
"\t'v2' val=2",
- "[30] FWD 'struct_fwd' fwd_kind=struct",
- "[31] FWD 'union_fwd' fwd_kind=union",
- "[32] ENUM 'enum_fwd' size=4 vlen=0",
- "[33] TYPEDEF 'typedef1' type_id=21",
- "[34] FUNC 'func1' type_id=35 linkage=global",
- "[35] FUNC_PROTO '(anon)' ret_type_id=21 vlen=2\n"
- "\t'p1' type_id=21\n"
- "\t'p2' type_id=22",
- "[36] VAR 'var1' type_id=21, linkage=global-alloc",
- "[37] DATASEC 'datasec1' size=12 vlen=1\n"
- "\ttype_id=21 offset=4 size=8",
- "[38] DECL_TAG 'tag1' type_id=36 component_idx=-1",
- "[39] DECL_TAG 'tag2' type_id=34 component_idx=1",
- "[40] TYPE_TAG 'tag1' type_id=21");
+ "[32] FWD 'struct_fwd' fwd_kind=struct",
+ "[33] FWD 'union_fwd' fwd_kind=union",
+ "[34] ENUM 'enum_fwd' encoding=UNSIGNED size=4 vlen=0",
+ "[35] TYPEDEF 'typedef1' type_id=23",
+ "[36] FUNC 'func1' type_id=37 linkage=global",
+ "[37] FUNC_PROTO '(anon)' ret_type_id=23 vlen=2\n"
+ "\t'p1' type_id=23\n"
+ "\t'p2' type_id=24",
+ "[38] VAR 'var1' type_id=23, linkage=global-alloc",
+ "[39] DATASEC 'datasec1' size=12 vlen=1\n"
+ "\ttype_id=23 offset=4 size=8",
+ "[40] DECL_TAG 'tag1' type_id=38 component_idx=-1",
+ "[41] DECL_TAG 'tag2' type_id=36 component_idx=1",
+ "[42] TYPE_TAG 'tag1' type_id=23",
+ "[43] ENUM64 'e1' encoding=SIGNED size=8 vlen=2\n"
+ "\t'v1' val=-1\n"
+ "\t'v2' val=4886718345",
+ "[44] ENUM64 'e1' encoding=UNSIGNED size=8 vlen=1\n"
+ "\t'v1' val=18446744073709551615");
cleanup:
btf__free(btf1);
diff --git a/tools/testing/selftests/bpf/prog_tests/core_extern.c b/tools/testing/selftests/bpf/prog_tests/core_extern.c
index 1931a158510e..63a51e9f3630 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_extern.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_extern.c
@@ -39,6 +39,7 @@ static struct test_case {
"CONFIG_STR=\"abracad\"\n"
"CONFIG_MISSING=0",
.data = {
+ .unkn_virt_val = 0,
.bpf_syscall = false,
.tristate_val = TRI_MODULE,
.bool_val = true,
@@ -121,7 +122,7 @@ static struct test_case {
void test_core_extern(void)
{
const uint32_t kern_ver = get_kernel_version();
- int err, duration = 0, i, j;
+ int err, i, j;
struct test_core_extern *skel = NULL;
uint64_t *got, *exp;
int n = sizeof(*skel->data) / sizeof(uint64_t);
@@ -136,19 +137,17 @@ void test_core_extern(void)
continue;
skel = test_core_extern__open_opts(&opts);
- if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
goto cleanup;
err = test_core_extern__load(skel);
if (t->fails) {
- CHECK(!err, "skel_load",
- "shouldn't succeed open/load of skeleton\n");
+ ASSERT_ERR(err, "skel_load_should_fail");
goto cleanup;
- } else if (CHECK(err, "skel_load",
- "failed to open/load skeleton\n")) {
+ } else if (!ASSERT_OK(err, "skel_load")) {
goto cleanup;
}
err = test_core_extern__attach(skel);
- if (CHECK(err, "attach_raw_tp", "failed attach: %d\n", err))
+ if (!ASSERT_OK(err, "attach_raw_tp"))
goto cleanup;
usleep(1);
@@ -158,9 +157,7 @@ void test_core_extern(void)
got = (uint64_t *)skel->data;
exp = (uint64_t *)&t->data;
for (j = 0; j < n; j++) {
- CHECK(got[j] != exp[j], "check_res",
- "result #%d: expected %llx, but got %llx\n",
- j, (__u64)exp[j], (__u64)got[j]);
+ ASSERT_EQ(got[j], exp[j], "result");
}
cleanup:
test_core_extern__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index 3712dfe1be59..c8655ba9a88f 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -84,6 +84,7 @@ static int duration = 0;
#define NESTING_ERR_CASE(name) { \
NESTING_CASE_COMMON(name), \
.fails = true, \
+ .run_btfgen_fails = true, \
}
#define ARRAYS_DATA(struct_name) STRUCT_TO_CHAR_PTR(struct_name) { \
@@ -258,12 +259,14 @@ static int duration = 0;
BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_probed.o", \
"probed:", name), \
.fails = true, \
+ .run_btfgen_fails = true, \
.raw_tp_name = "sys_enter", \
.prog_name = "test_core_bitfields", \
}, { \
BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_direct.o", \
"direct:", name), \
.fails = true, \
+ .run_btfgen_fails = true, \
.prog_name = "test_core_bitfields_direct", \
}
@@ -304,6 +307,7 @@ static int duration = 0;
#define SIZE_ERR_CASE(name) { \
SIZE_CASE_COMMON(name), \
.fails = true, \
+ .run_btfgen_fails = true, \
}
#define TYPE_BASED_CASE_COMMON(name) \
@@ -363,6 +367,25 @@ static int duration = 0;
.fails = true, \
}
+#define ENUM64VAL_CASE_COMMON(name) \
+ .case_name = #name, \
+ .bpf_obj_file = "test_core_reloc_enum64val.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".o", \
+ .raw_tp_name = "sys_enter", \
+ .prog_name = "test_core_enum64val"
+
+#define ENUM64VAL_CASE(name, ...) { \
+ ENUM64VAL_CASE_COMMON(name), \
+ .output = STRUCT_TO_CHAR_PTR(core_reloc_enum64val_output) \
+ __VA_ARGS__, \
+ .output_len = sizeof(struct core_reloc_enum64val_output), \
+}
+
+#define ENUM64VAL_ERR_CASE(name) { \
+ ENUM64VAL_CASE_COMMON(name), \
+ .fails = true, \
+}
+
struct core_reloc_test_case;
typedef int (*setup_test_fn)(struct core_reloc_test_case *test);
@@ -377,6 +400,7 @@ struct core_reloc_test_case {
const char *output;
int output_len;
bool fails;
+ bool run_btfgen_fails;
bool needs_testmod;
bool relaxed_core_relocs;
const char *prog_name;
@@ -519,7 +543,6 @@ static int __trigger_module_test_read(const struct core_reloc_test_case *test)
return 0;
}
-
static const struct core_reloc_test_case test_cases[] = {
/* validate we can find kernel image and use its BTF for relocs */
{
@@ -532,6 +555,7 @@ static const struct core_reloc_test_case test_cases[] = {
.valid = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, },
.comm = "test_progs",
.comm_len = sizeof("test_progs"),
+ .local_task_struct_matches = true,
},
.output_len = sizeof(struct core_reloc_kernel_output),
.raw_tp_name = "sys_enter",
@@ -728,9 +752,10 @@ static const struct core_reloc_test_case test_cases[] = {
SIZE_CASE(size___diff_offs),
SIZE_ERR_CASE(size___err_ambiguous),
- /* validate type existence and size relocations */
+ /* validate type existence, match, and size relocations */
TYPE_BASED_CASE(type_based, {
.struct_exists = 1,
+ .complex_struct_exists = 1,
.union_exists = 1,
.enum_exists = 1,
.typedef_named_struct_exists = 1,
@@ -739,8 +764,24 @@ static const struct core_reloc_test_case test_cases[] = {
.typedef_int_exists = 1,
.typedef_enum_exists = 1,
.typedef_void_ptr_exists = 1,
+ .typedef_restrict_ptr_exists = 1,
.typedef_func_proto_exists = 1,
.typedef_arr_exists = 1,
+
+ .struct_matches = 1,
+ .complex_struct_matches = 1,
+ .union_matches = 1,
+ .enum_matches = 1,
+ .typedef_named_struct_matches = 1,
+ .typedef_anon_struct_matches = 1,
+ .typedef_struct_ptr_matches = 1,
+ .typedef_int_matches = 1,
+ .typedef_enum_matches = 1,
+ .typedef_void_ptr_matches = 1,
+ .typedef_restrict_ptr_matches = 1,
+ .typedef_func_proto_matches = 1,
+ .typedef_arr_matches = 1,
+
.struct_sz = sizeof(struct a_struct),
.union_sz = sizeof(union a_union),
.enum_sz = sizeof(enum an_enum),
@@ -756,6 +797,45 @@ static const struct core_reloc_test_case test_cases[] = {
TYPE_BASED_CASE(type_based___all_missing, {
/* all zeros */
}),
+ TYPE_BASED_CASE(type_based___diff, {
+ .struct_exists = 1,
+ .complex_struct_exists = 1,
+ .union_exists = 1,
+ .enum_exists = 1,
+ .typedef_named_struct_exists = 1,
+ .typedef_anon_struct_exists = 1,
+ .typedef_struct_ptr_exists = 1,
+ .typedef_int_exists = 1,
+ .typedef_enum_exists = 1,
+ .typedef_void_ptr_exists = 1,
+ .typedef_func_proto_exists = 1,
+ .typedef_arr_exists = 1,
+
+ .struct_matches = 1,
+ .complex_struct_matches = 1,
+ .union_matches = 1,
+ .enum_matches = 1,
+ .typedef_named_struct_matches = 1,
+ .typedef_anon_struct_matches = 1,
+ .typedef_struct_ptr_matches = 1,
+ .typedef_int_matches = 0,
+ .typedef_enum_matches = 1,
+ .typedef_void_ptr_matches = 1,
+ .typedef_func_proto_matches = 0,
+ .typedef_arr_matches = 0,
+
+ .struct_sz = sizeof(struct a_struct___diff),
+ .union_sz = sizeof(union a_union___diff),
+ .enum_sz = sizeof(enum an_enum___diff),
+ .typedef_named_struct_sz = sizeof(named_struct_typedef___diff),
+ .typedef_anon_struct_sz = sizeof(anon_struct_typedef___diff),
+ .typedef_struct_ptr_sz = sizeof(struct_ptr_typedef___diff),
+ .typedef_int_sz = sizeof(int_typedef___diff),
+ .typedef_enum_sz = sizeof(enum_typedef___diff),
+ .typedef_void_ptr_sz = sizeof(void_ptr_typedef___diff),
+ .typedef_func_proto_sz = sizeof(func_proto_typedef___diff),
+ .typedef_arr_sz = sizeof(arr_typedef___diff),
+ }),
TYPE_BASED_CASE(type_based___diff_sz, {
.struct_exists = 1,
.union_exists = 1,
@@ -768,6 +848,19 @@ static const struct core_reloc_test_case test_cases[] = {
.typedef_void_ptr_exists = 1,
.typedef_func_proto_exists = 1,
.typedef_arr_exists = 1,
+
+ .struct_matches = 0,
+ .union_matches = 0,
+ .enum_matches = 0,
+ .typedef_named_struct_matches = 0,
+ .typedef_anon_struct_matches = 0,
+ .typedef_struct_ptr_matches = 1,
+ .typedef_int_matches = 0,
+ .typedef_enum_matches = 0,
+ .typedef_void_ptr_matches = 1,
+ .typedef_func_proto_matches = 0,
+ .typedef_arr_matches = 0,
+
.struct_sz = sizeof(struct a_struct___diff_sz),
.union_sz = sizeof(union a_union___diff_sz),
.enum_sz = sizeof(enum an_enum___diff_sz),
@@ -782,10 +875,12 @@ static const struct core_reloc_test_case test_cases[] = {
}),
TYPE_BASED_CASE(type_based___incompat, {
.enum_exists = 1,
+ .enum_matches = 1,
.enum_sz = sizeof(enum an_enum),
}),
TYPE_BASED_CASE(type_based___fn_wrong_args, {
.struct_exists = 1,
+ .struct_matches = 1,
.struct_sz = sizeof(struct a_struct),
}),
@@ -831,6 +926,45 @@ static const struct core_reloc_test_case test_cases[] = {
.anon_val2 = 0x222,
}),
ENUMVAL_ERR_CASE(enumval___err_missing),
+
+ /* 64bit enumerator value existence and value relocations */
+ ENUM64VAL_CASE(enum64val, {
+ .unsigned_val1_exists = true,
+ .unsigned_val2_exists = true,
+ .unsigned_val3_exists = true,
+ .signed_val1_exists = true,
+ .signed_val2_exists = true,
+ .signed_val3_exists = true,
+ .unsigned_val1 = 0x1ffffffffULL,
+ .unsigned_val2 = 0x2,
+ .signed_val1 = 0x1ffffffffLL,
+ .signed_val2 = -2,
+ }),
+ ENUM64VAL_CASE(enum64val___diff, {
+ .unsigned_val1_exists = true,
+ .unsigned_val2_exists = true,
+ .unsigned_val3_exists = true,
+ .signed_val1_exists = true,
+ .signed_val2_exists = true,
+ .signed_val3_exists = true,
+ .unsigned_val1 = 0x101ffffffffULL,
+ .unsigned_val2 = 0x202ffffffffULL,
+ .signed_val1 = -101,
+ .signed_val2 = -202,
+ }),
+ ENUM64VAL_CASE(enum64val___val3_missing, {
+ .unsigned_val1_exists = true,
+ .unsigned_val2_exists = true,
+ .unsigned_val3_exists = false,
+ .signed_val1_exists = true,
+ .signed_val2_exists = true,
+ .signed_val3_exists = false,
+ .unsigned_val1 = 0x111ffffffffULL,
+ .unsigned_val2 = 0x222,
+ .signed_val1 = 0x111ffffffffLL,
+ .signed_val2 = -222,
+ }),
+ ENUM64VAL_ERR_CASE(enum64val___err_missing),
};
struct data {
@@ -894,7 +1028,7 @@ static void run_core_reloc_tests(bool use_btfgen)
/* generate a "minimal" BTF file and use it as source */
if (use_btfgen) {
- if (!test_case->btf_src_file || test_case->fails) {
+ if (!test_case->btf_src_file || test_case->run_btfgen_fails) {
test__skip();
continue;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
index a7e74297f15f..5a7e6011f6bf 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
@@ -7,11 +7,9 @@
void serial_test_fexit_stress(void)
{
- char test_skb[128] = {};
int fexit_fd[CNT] = {};
int link_fd[CNT] = {};
- char error[4096];
- int err, i, filter_fd;
+ int err, i;
const struct bpf_insn trace_program[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
@@ -20,25 +18,9 @@ void serial_test_fexit_stress(void)
LIBBPF_OPTS(bpf_prog_load_opts, trace_opts,
.expected_attach_type = BPF_TRACE_FEXIT,
- .log_buf = error,
- .log_size = sizeof(error),
);
- const struct bpf_insn skb_program[] = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- };
-
- LIBBPF_OPTS(bpf_prog_load_opts, skb_opts,
- .log_buf = error,
- .log_size = sizeof(error),
- );
-
- LIBBPF_OPTS(bpf_test_run_opts, topts,
- .data_in = test_skb,
- .data_size_in = sizeof(test_skb),
- .repeat = 1,
- );
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
err = libbpf_find_vmlinux_btf_id("bpf_fentry_test1",
trace_opts.expected_attach_type);
@@ -58,15 +40,9 @@ void serial_test_fexit_stress(void)
goto out;
}
- filter_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL",
- skb_program, sizeof(skb_program) / sizeof(struct bpf_insn),
- &skb_opts);
- if (!ASSERT_GE(filter_fd, 0, "test_program_loaded"))
- goto out;
+ err = bpf_prog_test_run_opts(fexit_fd[0], &topts);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
- err = bpf_prog_test_run_opts(filter_fd, &topts);
- close(filter_fd);
- CHECK_FAIL(err);
out:
for (i = 0; i < CNT; i++) {
if (link_fd[i])
diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
index 5b93d5d0bd93..d457a55ff408 100644
--- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
@@ -329,7 +329,7 @@ static int get_syms(char ***symsp, size_t *cntp)
struct hashmap *map;
char buf[256];
FILE *f;
- int err;
+ int err = 0;
/*
* The available_filter_functions contains many duplicates,
@@ -364,6 +364,8 @@ static int get_syms(char ***symsp, size_t *cntp)
continue;
if (!strncmp(name, "rcu_", 4))
continue;
+ if (!strcmp(name, "bpf_dispatcher_xdp_func"))
+ continue;
if (!strncmp(name, "__ftrace_invalid_address__",
sizeof("__ftrace_invalid_address__") - 1))
continue;
@@ -407,7 +409,7 @@ static void test_bench_attach(void)
double attach_delta, detach_delta;
struct bpf_link *link = NULL;
char **syms = NULL;
- size_t cnt, i;
+ size_t cnt = 0, i;
if (!ASSERT_OK(get_syms(&syms, &cnt), "get_syms"))
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c
new file mode 100644
index 000000000000..93e9cddaadcf
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c
@@ -0,0 +1,207 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <ctype.h>
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+/*
+ * Utility function uppercasing an entire string.
+ */
+static void uppercase(char *s)
+{
+ for (; *s != '\0'; s++)
+ *s = toupper(*s);
+}
+
+/*
+ * Test case to check that all bpf_attach_type variants are covered by
+ * libbpf_bpf_attach_type_str.
+ */
+static void test_libbpf_bpf_attach_type_str(void)
+{
+ struct btf *btf;
+ const struct btf_type *t;
+ const struct btf_enum *e;
+ int i, n, id;
+
+ btf = btf__parse("/sys/kernel/btf/vmlinux", NULL);
+ if (!ASSERT_OK_PTR(btf, "btf_parse"))
+ return;
+
+ /* find enum bpf_attach_type and enumerate each value */
+ id = btf__find_by_name_kind(btf, "bpf_attach_type", BTF_KIND_ENUM);
+ if (!ASSERT_GT(id, 0, "bpf_attach_type_id"))
+ goto cleanup;
+ t = btf__type_by_id(btf, id);
+ e = btf_enum(t);
+ n = btf_vlen(t);
+ for (i = 0; i < n; e++, i++) {
+ enum bpf_attach_type attach_type = (enum bpf_attach_type)e->val;
+ const char *attach_type_name;
+ const char *attach_type_str;
+ char buf[256];
+
+ if (attach_type == __MAX_BPF_ATTACH_TYPE)
+ continue;
+
+ attach_type_name = btf__str_by_offset(btf, e->name_off);
+ attach_type_str = libbpf_bpf_attach_type_str(attach_type);
+ ASSERT_OK_PTR(attach_type_str, attach_type_name);
+
+ snprintf(buf, sizeof(buf), "BPF_%s", attach_type_str);
+ uppercase(buf);
+
+ ASSERT_STREQ(buf, attach_type_name, "exp_str_value");
+ }
+
+cleanup:
+ btf__free(btf);
+}
+
+/*
+ * Test case to check that all bpf_link_type variants are covered by
+ * libbpf_bpf_link_type_str.
+ */
+static void test_libbpf_bpf_link_type_str(void)
+{
+ struct btf *btf;
+ const struct btf_type *t;
+ const struct btf_enum *e;
+ int i, n, id;
+
+ btf = btf__parse("/sys/kernel/btf/vmlinux", NULL);
+ if (!ASSERT_OK_PTR(btf, "btf_parse"))
+ return;
+
+ /* find enum bpf_link_type and enumerate each value */
+ id = btf__find_by_name_kind(btf, "bpf_link_type", BTF_KIND_ENUM);
+ if (!ASSERT_GT(id, 0, "bpf_link_type_id"))
+ goto cleanup;
+ t = btf__type_by_id(btf, id);
+ e = btf_enum(t);
+ n = btf_vlen(t);
+ for (i = 0; i < n; e++, i++) {
+ enum bpf_link_type link_type = (enum bpf_link_type)e->val;
+ const char *link_type_name;
+ const char *link_type_str;
+ char buf[256];
+
+ if (link_type == MAX_BPF_LINK_TYPE)
+ continue;
+
+ link_type_name = btf__str_by_offset(btf, e->name_off);
+ link_type_str = libbpf_bpf_link_type_str(link_type);
+ ASSERT_OK_PTR(link_type_str, link_type_name);
+
+ snprintf(buf, sizeof(buf), "BPF_LINK_TYPE_%s", link_type_str);
+ uppercase(buf);
+
+ ASSERT_STREQ(buf, link_type_name, "exp_str_value");
+ }
+
+cleanup:
+ btf__free(btf);
+}
+
+/*
+ * Test case to check that all bpf_map_type variants are covered by
+ * libbpf_bpf_map_type_str.
+ */
+static void test_libbpf_bpf_map_type_str(void)
+{
+ struct btf *btf;
+ const struct btf_type *t;
+ const struct btf_enum *e;
+ int i, n, id;
+
+ btf = btf__parse("/sys/kernel/btf/vmlinux", NULL);
+ if (!ASSERT_OK_PTR(btf, "btf_parse"))
+ return;
+
+ /* find enum bpf_map_type and enumerate each value */
+ id = btf__find_by_name_kind(btf, "bpf_map_type", BTF_KIND_ENUM);
+ if (!ASSERT_GT(id, 0, "bpf_map_type_id"))
+ goto cleanup;
+ t = btf__type_by_id(btf, id);
+ e = btf_enum(t);
+ n = btf_vlen(t);
+ for (i = 0; i < n; e++, i++) {
+ enum bpf_map_type map_type = (enum bpf_map_type)e->val;
+ const char *map_type_name;
+ const char *map_type_str;
+ char buf[256];
+
+ map_type_name = btf__str_by_offset(btf, e->name_off);
+ map_type_str = libbpf_bpf_map_type_str(map_type);
+ ASSERT_OK_PTR(map_type_str, map_type_name);
+
+ snprintf(buf, sizeof(buf), "BPF_MAP_TYPE_%s", map_type_str);
+ uppercase(buf);
+
+ ASSERT_STREQ(buf, map_type_name, "exp_str_value");
+ }
+
+cleanup:
+ btf__free(btf);
+}
+
+/*
+ * Test case to check that all bpf_prog_type variants are covered by
+ * libbpf_bpf_prog_type_str.
+ */
+static void test_libbpf_bpf_prog_type_str(void)
+{
+ struct btf *btf;
+ const struct btf_type *t;
+ const struct btf_enum *e;
+ int i, n, id;
+
+ btf = btf__parse("/sys/kernel/btf/vmlinux", NULL);
+ if (!ASSERT_OK_PTR(btf, "btf_parse"))
+ return;
+
+ /* find enum bpf_prog_type and enumerate each value */
+ id = btf__find_by_name_kind(btf, "bpf_prog_type", BTF_KIND_ENUM);
+ if (!ASSERT_GT(id, 0, "bpf_prog_type_id"))
+ goto cleanup;
+ t = btf__type_by_id(btf, id);
+ e = btf_enum(t);
+ n = btf_vlen(t);
+ for (i = 0; i < n; e++, i++) {
+ enum bpf_prog_type prog_type = (enum bpf_prog_type)e->val;
+ const char *prog_type_name;
+ const char *prog_type_str;
+ char buf[256];
+
+ prog_type_name = btf__str_by_offset(btf, e->name_off);
+ prog_type_str = libbpf_bpf_prog_type_str(prog_type);
+ ASSERT_OK_PTR(prog_type_str, prog_type_name);
+
+ snprintf(buf, sizeof(buf), "BPF_PROG_TYPE_%s", prog_type_str);
+ uppercase(buf);
+
+ ASSERT_STREQ(buf, prog_type_name, "exp_str_value");
+ }
+
+cleanup:
+ btf__free(btf);
+}
+
+/*
+ * Run all libbpf str conversion tests.
+ */
+void test_libbpf_str(void)
+{
+ if (test__start_subtest("bpf_attach_type_str"))
+ test_libbpf_bpf_attach_type_str();
+
+ if (test__start_subtest("bpf_link_type_str"))
+ test_libbpf_bpf_link_type_str();
+
+ if (test__start_subtest("bpf_map_type_str"))
+ test_libbpf_bpf_map_type_str();
+
+ if (test__start_subtest("bpf_prog_type_str"))
+ test_libbpf_bpf_prog_type_str();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c b/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c
new file mode 100644
index 000000000000..1102e4f42d2d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c
@@ -0,0 +1,313 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+#include "lsm_cgroup.skel.h"
+#include "lsm_cgroup_nonvoid.skel.h"
+#include "cgroup_helpers.h"
+#include "network_helpers.h"
+
+#ifndef ENOTSUPP
+#define ENOTSUPP 524
+#endif
+
+static struct btf *btf;
+
+static __u32 query_prog_cnt(int cgroup_fd, const char *attach_func)
+{
+ LIBBPF_OPTS(bpf_prog_query_opts, p);
+ int cnt = 0;
+ int i;
+
+ ASSERT_OK(bpf_prog_query_opts(cgroup_fd, BPF_LSM_CGROUP, &p), "prog_query");
+
+ if (!attach_func)
+ return p.prog_cnt;
+
+ /* When attach_func is provided, count the number of progs that
+ * attach to the given symbol.
+ */
+
+ if (!btf)
+ btf = btf__load_vmlinux_btf();
+ if (!ASSERT_OK(libbpf_get_error(btf), "btf_vmlinux"))
+ return -1;
+
+ p.prog_ids = malloc(sizeof(u32) * p.prog_cnt);
+ p.prog_attach_flags = malloc(sizeof(u32) * p.prog_cnt);
+ ASSERT_OK(bpf_prog_query_opts(cgroup_fd, BPF_LSM_CGROUP, &p), "prog_query");
+
+ for (i = 0; i < p.prog_cnt; i++) {
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
+ int fd;
+
+ fd = bpf_prog_get_fd_by_id(p.prog_ids[i]);
+ ASSERT_GE(fd, 0, "prog_get_fd_by_id");
+ ASSERT_OK(bpf_obj_get_info_by_fd(fd, &info, &info_len), "prog_info_by_fd");
+ close(fd);
+
+ if (info.attach_btf_id ==
+ btf__find_by_name_kind(btf, attach_func, BTF_KIND_FUNC))
+ cnt++;
+ }
+
+ free(p.prog_ids);
+ free(p.prog_attach_flags);
+
+ return cnt;
+}
+
+static void test_lsm_cgroup_functional(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_prog_attach_opts, attach_opts);
+ DECLARE_LIBBPF_OPTS(bpf_link_update_opts, update_opts);
+ int cgroup_fd = -1, cgroup_fd2 = -1, cgroup_fd3 = -1;
+ int listen_fd, client_fd, accepted_fd;
+ struct lsm_cgroup *skel = NULL;
+ int post_create_prog_fd2 = -1;
+ int post_create_prog_fd = -1;
+ int bind_link_fd2 = -1;
+ int bind_prog_fd2 = -1;
+ int alloc_prog_fd = -1;
+ int bind_prog_fd = -1;
+ int bind_link_fd = -1;
+ int clone_prog_fd = -1;
+ int err, fd, prio;
+ socklen_t socklen;
+
+ cgroup_fd3 = test__join_cgroup("/sock_policy_empty");
+ if (!ASSERT_GE(cgroup_fd3, 0, "create empty cgroup"))
+ goto close_cgroup;
+
+ cgroup_fd2 = test__join_cgroup("/sock_policy_reuse");
+ if (!ASSERT_GE(cgroup_fd2, 0, "create cgroup for reuse"))
+ goto close_cgroup;
+
+ cgroup_fd = test__join_cgroup("/sock_policy");
+ if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup"))
+ goto close_cgroup;
+
+ skel = lsm_cgroup__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ goto close_cgroup;
+
+ post_create_prog_fd = bpf_program__fd(skel->progs.socket_post_create);
+ post_create_prog_fd2 = bpf_program__fd(skel->progs.socket_post_create2);
+ bind_prog_fd = bpf_program__fd(skel->progs.socket_bind);
+ bind_prog_fd2 = bpf_program__fd(skel->progs.socket_bind2);
+ alloc_prog_fd = bpf_program__fd(skel->progs.socket_alloc);
+ clone_prog_fd = bpf_program__fd(skel->progs.socket_clone);
+
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_sk_alloc_security"), 0, "prog count");
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 0, "total prog count");
+ err = bpf_prog_attach(alloc_prog_fd, cgroup_fd, BPF_LSM_CGROUP, 0);
+ if (err == -ENOTSUPP) {
+ test__skip();
+ goto close_cgroup;
+ }
+ if (!ASSERT_OK(err, "attach alloc_prog_fd"))
+ goto detach_cgroup;
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_sk_alloc_security"), 1, "prog count");
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 1, "total prog count");
+
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_inet_csk_clone"), 0, "prog count");
+ err = bpf_prog_attach(clone_prog_fd, cgroup_fd, BPF_LSM_CGROUP, 0);
+ if (!ASSERT_OK(err, "attach clone_prog_fd"))
+ goto detach_cgroup;
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_inet_csk_clone"), 1, "prog count");
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 2, "total prog count");
+
+ /* Make sure replacing works. */
+
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_post_create"), 0, "prog count");
+ err = bpf_prog_attach(post_create_prog_fd, cgroup_fd,
+ BPF_LSM_CGROUP, 0);
+ if (!ASSERT_OK(err, "attach post_create_prog_fd"))
+ goto detach_cgroup;
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_post_create"), 1, "prog count");
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 3, "total prog count");
+
+ attach_opts.replace_prog_fd = post_create_prog_fd;
+ err = bpf_prog_attach_opts(post_create_prog_fd2, cgroup_fd,
+ BPF_LSM_CGROUP, &attach_opts);
+ if (!ASSERT_OK(err, "prog replace post_create_prog_fd"))
+ goto detach_cgroup;
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_post_create"), 1, "prog count");
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 3, "total prog count");
+
+ /* Try the same attach/replace via link API. */
+
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_bind"), 0, "prog count");
+ bind_link_fd = bpf_link_create(bind_prog_fd, cgroup_fd,
+ BPF_LSM_CGROUP, NULL);
+ if (!ASSERT_GE(bind_link_fd, 0, "link create bind_prog_fd"))
+ goto detach_cgroup;
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_bind"), 1, "prog count");
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 4, "total prog count");
+
+ update_opts.old_prog_fd = bind_prog_fd;
+ update_opts.flags = BPF_F_REPLACE;
+
+ err = bpf_link_update(bind_link_fd, bind_prog_fd2, &update_opts);
+ if (!ASSERT_OK(err, "link update bind_prog_fd"))
+ goto detach_cgroup;
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_bind"), 1, "prog count");
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 4, "total prog count");
+
+ /* Attach another instance of bind program to another cgroup.
+ * This should trigger the reuse of the trampoline shim (two
+ * programs attaching to the same btf_id).
+ */
+
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_bind"), 1, "prog count");
+ ASSERT_EQ(query_prog_cnt(cgroup_fd2, "bpf_lsm_socket_bind"), 0, "prog count");
+ bind_link_fd2 = bpf_link_create(bind_prog_fd2, cgroup_fd2,
+ BPF_LSM_CGROUP, NULL);
+ if (!ASSERT_GE(bind_link_fd2, 0, "link create bind_prog_fd2"))
+ goto detach_cgroup;
+ ASSERT_EQ(query_prog_cnt(cgroup_fd2, "bpf_lsm_socket_bind"), 1, "prog count");
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 4, "total prog count");
+ ASSERT_EQ(query_prog_cnt(cgroup_fd2, NULL), 1, "total prog count");
+
+ /* AF_UNIX is prohibited. */
+
+ fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ ASSERT_LT(fd, 0, "socket(AF_UNIX)");
+ close(fd);
+
+ /* AF_INET6 gets default policy (sk_priority). */
+
+ fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (!ASSERT_GE(fd, 0, "socket(SOCK_STREAM)"))
+ goto detach_cgroup;
+
+ prio = 0;
+ socklen = sizeof(prio);
+ ASSERT_GE(getsockopt(fd, SOL_SOCKET, SO_PRIORITY, &prio, &socklen), 0,
+ "getsockopt");
+ ASSERT_EQ(prio, 123, "sk_priority");
+
+ close(fd);
+
+ /* TX-only AF_PACKET is allowed. */
+
+ ASSERT_LT(socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)), 0,
+ "socket(AF_PACKET, ..., ETH_P_ALL)");
+
+ fd = socket(AF_PACKET, SOCK_RAW, 0);
+ ASSERT_GE(fd, 0, "socket(AF_PACKET, ..., 0)");
+
+ /* TX-only AF_PACKET can not be rebound. */
+
+ struct sockaddr_ll sa = {
+ .sll_family = AF_PACKET,
+ .sll_protocol = htons(ETH_P_ALL),
+ };
+ ASSERT_LT(bind(fd, (struct sockaddr *)&sa, sizeof(sa)), 0,
+ "bind(ETH_P_ALL)");
+
+ close(fd);
+
+ /* Trigger passive open. */
+
+ listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+ ASSERT_GE(listen_fd, 0, "start_server");
+ client_fd = connect_to_fd(listen_fd, 0);
+ ASSERT_GE(client_fd, 0, "connect_to_fd");
+ accepted_fd = accept(listen_fd, NULL, NULL);
+ ASSERT_GE(accepted_fd, 0, "accept");
+
+ prio = 0;
+ socklen = sizeof(prio);
+ ASSERT_GE(getsockopt(accepted_fd, SOL_SOCKET, SO_PRIORITY, &prio, &socklen), 0,
+ "getsockopt");
+ ASSERT_EQ(prio, 234, "sk_priority");
+
+ /* These are replaced and never called. */
+ ASSERT_EQ(skel->bss->called_socket_post_create, 0, "called_create");
+ ASSERT_EQ(skel->bss->called_socket_bind, 0, "called_bind");
+
+ /* AF_INET6+SOCK_STREAM
+ * AF_PACKET+SOCK_RAW
+ * listen_fd
+ * client_fd
+ * accepted_fd
+ */
+ ASSERT_EQ(skel->bss->called_socket_post_create2, 5, "called_create2");
+
+ /* start_server
+ * bind(ETH_P_ALL)
+ */
+ ASSERT_EQ(skel->bss->called_socket_bind2, 2, "called_bind2");
+ /* Single accept(). */
+ ASSERT_EQ(skel->bss->called_socket_clone, 1, "called_clone");
+
+ /* AF_UNIX+SOCK_STREAM (failed)
+ * AF_INET6+SOCK_STREAM
+ * AF_PACKET+SOCK_RAW (failed)
+ * AF_PACKET+SOCK_RAW
+ * listen_fd
+ * client_fd
+ * accepted_fd
+ */
+ ASSERT_EQ(skel->bss->called_socket_alloc, 7, "called_alloc");
+
+ close(listen_fd);
+ close(client_fd);
+ close(accepted_fd);
+
+ /* Make sure other cgroup doesn't trigger the programs. */
+
+ if (!ASSERT_OK(join_cgroup("/sock_policy_empty"), "join root cgroup"))
+ goto detach_cgroup;
+
+ fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (!ASSERT_GE(fd, 0, "socket(SOCK_STREAM)"))
+ goto detach_cgroup;
+
+ prio = 0;
+ socklen = sizeof(prio);
+ ASSERT_GE(getsockopt(fd, SOL_SOCKET, SO_PRIORITY, &prio, &socklen), 0,
+ "getsockopt");
+ ASSERT_EQ(prio, 0, "sk_priority");
+
+ close(fd);
+
+detach_cgroup:
+ ASSERT_GE(bpf_prog_detach2(post_create_prog_fd2, cgroup_fd,
+ BPF_LSM_CGROUP), 0, "detach_create");
+ close(bind_link_fd);
+ /* Don't close bind_link_fd2, exercise cgroup release cleanup. */
+ ASSERT_GE(bpf_prog_detach2(alloc_prog_fd, cgroup_fd,
+ BPF_LSM_CGROUP), 0, "detach_alloc");
+ ASSERT_GE(bpf_prog_detach2(clone_prog_fd, cgroup_fd,
+ BPF_LSM_CGROUP), 0, "detach_clone");
+
+close_cgroup:
+ close(cgroup_fd);
+ close(cgroup_fd2);
+ close(cgroup_fd3);
+ lsm_cgroup__destroy(skel);
+}
+
+static void test_lsm_cgroup_nonvoid(void)
+{
+ struct lsm_cgroup_nonvoid *skel = NULL;
+
+ skel = lsm_cgroup_nonvoid__open_and_load();
+ ASSERT_NULL(skel, "open succeeds");
+ lsm_cgroup_nonvoid__destroy(skel);
+}
+
+void test_lsm_cgroup(void)
+{
+ if (test__start_subtest("functional"))
+ test_lsm_cgroup_functional();
+ if (test__start_subtest("nonvoid"))
+ test_lsm_cgroup_nonvoid();
+ btf__free(btf);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/probe_user.c b/tools/testing/selftests/bpf/prog_tests/probe_user.c
index abf890d066eb..34dbd2adc157 100644
--- a/tools/testing/selftests/bpf/prog_tests/probe_user.c
+++ b/tools/testing/selftests/bpf/prog_tests/probe_user.c
@@ -4,25 +4,35 @@
/* TODO: corrupts other tests uses connect() */
void serial_test_probe_user(void)
{
- const char *prog_name = "handle_sys_connect";
+ static const char *const prog_names[] = {
+ "handle_sys_connect",
+#if defined(__s390x__)
+ "handle_sys_socketcall",
+#endif
+ };
+ enum { prog_count = ARRAY_SIZE(prog_names) };
const char *obj_file = "./test_probe_user.o";
DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, );
int err, results_map_fd, sock_fd, duration = 0;
struct sockaddr curr, orig, tmp;
struct sockaddr_in *in = (struct sockaddr_in *)&curr;
- struct bpf_link *kprobe_link = NULL;
- struct bpf_program *kprobe_prog;
+ struct bpf_link *kprobe_links[prog_count] = {};
+ struct bpf_program *kprobe_progs[prog_count];
struct bpf_object *obj;
static const int zero = 0;
+ size_t i;
obj = bpf_object__open_file(obj_file, &opts);
if (!ASSERT_OK_PTR(obj, "obj_open_file"))
return;
- kprobe_prog = bpf_object__find_program_by_name(obj, prog_name);
- if (CHECK(!kprobe_prog, "find_probe",
- "prog '%s' not found\n", prog_name))
- goto cleanup;
+ for (i = 0; i < prog_count; i++) {
+ kprobe_progs[i] =
+ bpf_object__find_program_by_name(obj, prog_names[i]);
+ if (CHECK(!kprobe_progs[i], "find_probe",
+ "prog '%s' not found\n", prog_names[i]))
+ goto cleanup;
+ }
err = bpf_object__load(obj);
if (CHECK(err, "obj_load", "err %d\n", err))
@@ -33,9 +43,11 @@ void serial_test_probe_user(void)
"err %d\n", results_map_fd))
goto cleanup;
- kprobe_link = bpf_program__attach(kprobe_prog);
- if (!ASSERT_OK_PTR(kprobe_link, "attach_kprobe"))
- goto cleanup;
+ for (i = 0; i < prog_count; i++) {
+ kprobe_links[i] = bpf_program__attach(kprobe_progs[i]);
+ if (!ASSERT_OK_PTR(kprobe_links[i], "attach_kprobe"))
+ goto cleanup;
+ }
memset(&curr, 0, sizeof(curr));
in->sin_family = AF_INET;
@@ -69,6 +81,7 @@ void serial_test_probe_user(void)
inet_ntoa(in->sin_addr), ntohs(in->sin_port)))
goto cleanup;
cleanup:
- bpf_link__destroy(kprobe_link);
+ for (i = 0; i < prog_count; i++)
+ bpf_link__destroy(kprobe_links[i]);
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
index f4a13d9dd5c8..c197261d02e2 100644
--- a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
+++ b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
@@ -44,7 +44,7 @@ BTF_ID(union, U)
BTF_ID(func, func)
extern __u32 test_list_global[];
-BTF_ID_LIST_GLOBAL(test_list_global)
+BTF_ID_LIST_GLOBAL(test_list_global, 1)
BTF_ID_UNUSED
BTF_ID(typedef, S)
BTF_ID(typedef, T)
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
index eb5f7f5aa81a..1455911d9fcb 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
@@ -50,6 +50,13 @@ void test_ringbuf_multi(void)
if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
return;
+ /* validate ringbuf size adjustment logic */
+ ASSERT_EQ(bpf_map__max_entries(skel->maps.ringbuf1), page_size, "rb1_size_before");
+ ASSERT_OK(bpf_map__set_max_entries(skel->maps.ringbuf1, page_size + 1), "rb1_resize");
+ ASSERT_EQ(bpf_map__max_entries(skel->maps.ringbuf1), 2 * page_size, "rb1_size_after");
+ ASSERT_OK(bpf_map__set_max_entries(skel->maps.ringbuf1, page_size), "rb1_reset");
+ ASSERT_EQ(bpf_map__max_entries(skel->maps.ringbuf1), page_size, "rb1_size_final");
+
proto_fd = bpf_map_create(BPF_MAP_TYPE_RINGBUF, NULL, 0, 0, page_size, NULL);
if (CHECK(proto_fd < 0, "bpf_map_create", "bpf_map_create failed\n"))
goto cleanup;
@@ -65,6 +72,10 @@ void test_ringbuf_multi(void)
close(proto_fd);
proto_fd = -1;
+ /* make sure we can't resize ringbuf after object load */
+ if (!ASSERT_ERR(bpf_map__set_max_entries(skel->maps.ringbuf1, 3 * page_size), "rb1_resize_after_load"))
+ goto cleanup;
+
/* only trigger BPF program for current process */
skel->bss->pid = getpid();
diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c
index d71226e34c34..d63a20fbed33 100644
--- a/tools/testing/selftests/bpf/prog_tests/send_signal.c
+++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c
@@ -64,7 +64,7 @@ static void test_send_signal_common(struct perf_event_attr *attr,
ASSERT_EQ(read(pipe_p2c[0], buf, 1), 1, "pipe_read");
/* wait a little for signal handler */
- for (int i = 0; i < 100000000 && !sigusr1_received; i++)
+ for (int i = 0; i < 1000000000 && !sigusr1_received; i++)
j /= i + j + 1;
buf[0] = sigusr1_received ? '2' : '0';
diff --git a/tools/testing/selftests/bpf/prog_tests/skeleton.c b/tools/testing/selftests/bpf/prog_tests/skeleton.c
index 180afd632f4c..99dac5292b41 100644
--- a/tools/testing/selftests/bpf/prog_tests/skeleton.c
+++ b/tools/testing/selftests/bpf/prog_tests/skeleton.c
@@ -122,6 +122,8 @@ void test_skeleton(void)
ASSERT_EQ(skel->bss->out_mostly_var, 123, "out_mostly_var");
+ ASSERT_EQ(bss->huge_arr[ARRAY_SIZE(bss->huge_arr) - 1], 123, "huge_arr");
+
elf_bytes = test_skeleton__elf_bytes(&elf_bytes_sz);
ASSERT_OK_PTR(elf_bytes, "elf_bytes");
ASSERT_GE(elf_bytes_sz, 0, "elf_bytes_sz");
diff --git a/tools/testing/selftests/bpf/prog_tests/sock_fields.c b/tools/testing/selftests/bpf/prog_tests/sock_fields.c
index 9d211b5c22c4..7d23166c77af 100644
--- a/tools/testing/selftests/bpf/prog_tests/sock_fields.c
+++ b/tools/testing/selftests/bpf/prog_tests/sock_fields.c
@@ -394,7 +394,6 @@ void serial_test_sock_fields(void)
test();
done:
- test_sock_fields__detach(skel);
test_sock_fields__destroy(skel);
if (child_cg_fd >= 0)
close(child_cg_fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
index 958dae769c52..cb6a53b3e023 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -646,7 +646,7 @@ static void test_tcp_clear_dtime(struct test_tc_dtime *skel)
__u32 *errs = skel->bss->errs[t];
skel->bss->test = t;
- test_inet_dtime(AF_INET6, SOCK_STREAM, IP6_DST, 0);
+ test_inet_dtime(AF_INET6, SOCK_STREAM, IP6_DST, 50000 + t);
ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
dtime_cnt_str(t, INGRESS_FWDNS_P100));
@@ -683,7 +683,7 @@ static void test_tcp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd)
errs = skel->bss->errs[t];
skel->bss->test = t;
- test_inet_dtime(family, SOCK_STREAM, addr, 0);
+ test_inet_dtime(family, SOCK_STREAM, addr, 50000 + t);
/* fwdns_prio100 prog does not read delivery_time_type, so
* kernel puts the (rcv) timetamp in __sk_buff->tstamp
@@ -715,13 +715,13 @@ static void test_udp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd)
errs = skel->bss->errs[t];
skel->bss->test = t;
- test_inet_dtime(family, SOCK_DGRAM, addr, 0);
+ test_inet_dtime(family, SOCK_DGRAM, addr, 50000 + t);
ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
dtime_cnt_str(t, INGRESS_FWDNS_P100));
/* non mono delivery time is not forwarded */
ASSERT_EQ(dtimes[INGRESS_FWDNS_P101], 0,
- dtime_cnt_str(t, INGRESS_FWDNS_P100));
+ dtime_cnt_str(t, INGRESS_FWDNS_P101));
for (i = EGRESS_FWDNS_P100; i < SET_DTIME; i++)
ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i));
diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
index 3bba4a2a0530..eea274110267 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
@@ -82,6 +82,7 @@
#define MAC_TUNL_DEV0 "52:54:00:d9:01:00"
#define MAC_TUNL_DEV1 "52:54:00:d9:02:00"
+#define MAC_VETH1 "52:54:00:d9:03:00"
#define VXLAN_TUNL_DEV0 "vxlan00"
#define VXLAN_TUNL_DEV1 "vxlan11"
@@ -108,10 +109,9 @@
static int config_device(void)
{
SYS("ip netns add at_ns0");
- SYS("ip link add veth0 type veth peer name veth1");
+ SYS("ip link add veth0 address " MAC_VETH1 " type veth peer name veth1");
SYS("ip link set veth0 netns at_ns0");
SYS("ip addr add " IP4_ADDR1_VETH1 "/24 dev veth1");
- SYS("ip addr add " IP4_ADDR2_VETH1 "/24 dev veth1");
SYS("ip link set dev veth1 up mtu 1500");
SYS("ip netns exec at_ns0 ip addr add " IP4_ADDR_VETH0 "/24 dev veth0");
SYS("ip netns exec at_ns0 ip link set dev veth0 up mtu 1500");
@@ -140,6 +140,8 @@ static int add_vxlan_tunnel(void)
VXLAN_TUNL_DEV0, IP4_ADDR_TUNL_DEV0);
SYS("ip netns exec at_ns0 ip neigh add %s lladdr %s dev %s",
IP4_ADDR_TUNL_DEV1, MAC_TUNL_DEV1, VXLAN_TUNL_DEV0);
+ SYS("ip netns exec at_ns0 ip neigh add %s lladdr %s dev veth0",
+ IP4_ADDR2_VETH1, MAC_VETH1);
/* root namespace */
SYS("ip link add dev %s type vxlan external gbp dstport 4789",
@@ -277,6 +279,17 @@ static void test_vxlan_tunnel(void)
if (attach_tc_prog(&tc_hook, get_src_prog_fd, set_src_prog_fd))
goto done;
+ /* load and attach bpf prog to veth dev tc hook point */
+ ifindex = if_nametoindex("veth1");
+ if (!ASSERT_NEQ(ifindex, 0, "veth1 ifindex"))
+ goto done;
+ tc_hook.ifindex = ifindex;
+ set_dst_prog_fd = bpf_program__fd(skel->progs.veth_set_outer_dst);
+ if (!ASSERT_GE(set_dst_prog_fd, 0, "bpf_program__fd"))
+ goto done;
+ if (attach_tc_prog(&tc_hook, set_dst_prog_fd, -1))
+ goto done;
+
/* load and attach prog set_md to tunnel dev tc hook point at_ns0 */
nstoken = open_netns("at_ns0");
if (!ASSERT_OK_PTR(nstoken, "setns src"))
diff --git a/tools/testing/selftests/bpf/prog_tests/usdt.c b/tools/testing/selftests/bpf/prog_tests/usdt.c
index 5f733d50b0d7..9ad9da0f215e 100644
--- a/tools/testing/selftests/bpf/prog_tests/usdt.c
+++ b/tools/testing/selftests/bpf/prog_tests/usdt.c
@@ -12,7 +12,7 @@ int lets_test_this(int);
static volatile int idx = 2;
static volatile __u64 bla = 0xFEDCBA9876543210ULL;
-static volatile short nums[] = {-1, -2, -3, };
+static volatile short nums[] = {-1, -2, -3, -4};
static volatile struct {
int x;
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c
new file mode 100644
index 000000000000..874a846e298c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c
@@ -0,0 +1,183 @@
+// SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#define _GNU_SOURCE
+#include <test_progs.h>
+#include <network_helpers.h>
+#include <ctype.h>
+
+#define CMD_OUT_BUF_SIZE 1023
+
+#define SYS(cmd) ({ \
+ if (!ASSERT_OK(system(cmd), (cmd))) \
+ goto out; \
+})
+
+#define SYS_OUT(cmd, ...) ({ \
+ char buf[1024]; \
+ snprintf(buf, sizeof(buf), (cmd), ##__VA_ARGS__); \
+ FILE *f = popen(buf, "r"); \
+ if (!ASSERT_OK_PTR(f, buf)) \
+ goto out; \
+ f; \
+})
+
+/* out must be at least `size * 4 + 1` bytes long */
+static void escape_str(char *out, const char *in, size_t size)
+{
+ static const char *hex = "0123456789ABCDEF";
+ size_t i;
+
+ for (i = 0; i < size; i++) {
+ if (isprint(in[i]) && in[i] != '\\' && in[i] != '\'') {
+ *out++ = in[i];
+ } else {
+ *out++ = '\\';
+ *out++ = 'x';
+ *out++ = hex[(in[i] >> 4) & 0xf];
+ *out++ = hex[in[i] & 0xf];
+ }
+ }
+ *out++ = '\0';
+}
+
+static bool expect_str(char *buf, size_t size, const char *str, const char *name)
+{
+ static char escbuf_expected[CMD_OUT_BUF_SIZE * 4];
+ static char escbuf_actual[CMD_OUT_BUF_SIZE * 4];
+ static int duration = 0;
+ bool ok;
+
+ ok = size == strlen(str) && !memcmp(buf, str, size);
+
+ if (!ok) {
+ escape_str(escbuf_expected, str, strlen(str));
+ escape_str(escbuf_actual, buf, size);
+ }
+ CHECK(!ok, name, "unexpected %s: actual '%s' != expected '%s'\n",
+ name, escbuf_actual, escbuf_expected);
+
+ return ok;
+}
+
+static void test_synproxy(bool xdp)
+{
+ int server_fd = -1, client_fd = -1, accept_fd = -1;
+ char *prog_id = NULL, *prog_id_end;
+ struct nstoken *ns = NULL;
+ FILE *ctrl_file = NULL;
+ char buf[CMD_OUT_BUF_SIZE];
+ size_t size;
+
+ SYS("ip netns add synproxy");
+
+ SYS("ip link add tmp0 type veth peer name tmp1");
+ SYS("ip link set tmp1 netns synproxy");
+ SYS("ip link set tmp0 up");
+ SYS("ip addr replace 198.18.0.1/24 dev tmp0");
+
+ /* When checksum offload is enabled, the XDP program sees wrong
+ * checksums and drops packets.
+ */
+ SYS("ethtool -K tmp0 tx off");
+ if (xdp)
+ /* Workaround required for veth. */
+ SYS("ip link set tmp0 xdp object xdp_dummy.o section xdp 2> /dev/null");
+
+ ns = open_netns("synproxy");
+ if (!ASSERT_OK_PTR(ns, "setns"))
+ goto out;
+
+ SYS("ip link set lo up");
+ SYS("ip link set tmp1 up");
+ SYS("ip addr replace 198.18.0.2/24 dev tmp1");
+ SYS("sysctl -w net.ipv4.tcp_syncookies=2");
+ SYS("sysctl -w net.ipv4.tcp_timestamps=1");
+ SYS("sysctl -w net.netfilter.nf_conntrack_tcp_loose=0");
+ SYS("iptables -t raw -I PREROUTING \
+ -i tmp1 -p tcp -m tcp --syn --dport 8080 -j CT --notrack");
+ SYS("iptables -t filter -A INPUT \
+ -i tmp1 -p tcp -m tcp --dport 8080 -m state --state INVALID,UNTRACKED \
+ -j SYNPROXY --sack-perm --timestamp --wscale 7 --mss 1460");
+ SYS("iptables -t filter -A INPUT \
+ -i tmp1 -m state --state INVALID -j DROP");
+
+ ctrl_file = SYS_OUT("./xdp_synproxy --iface tmp1 --ports 8080 \
+ --single --mss4 1460 --mss6 1440 \
+ --wscale 7 --ttl 64%s", xdp ? "" : " --tc");
+ size = fread(buf, 1, sizeof(buf), ctrl_file);
+ pclose(ctrl_file);
+ if (!expect_str(buf, size, "Total SYNACKs generated: 0\n",
+ "initial SYNACKs"))
+ goto out;
+
+ if (!xdp) {
+ ctrl_file = SYS_OUT("tc filter show dev tmp1 ingress");
+ size = fread(buf, 1, sizeof(buf), ctrl_file);
+ pclose(ctrl_file);
+ prog_id = memmem(buf, size, " id ", 4);
+ if (!ASSERT_OK_PTR(prog_id, "find prog id"))
+ goto out;
+ prog_id += 4;
+ if (!ASSERT_LT(prog_id, buf + size, "find prog id begin"))
+ goto out;
+ prog_id_end = prog_id;
+ while (prog_id_end < buf + size && *prog_id_end >= '0' &&
+ *prog_id_end <= '9')
+ prog_id_end++;
+ if (!ASSERT_LT(prog_id_end, buf + size, "find prog id end"))
+ goto out;
+ *prog_id_end = '\0';
+ }
+
+ server_fd = start_server(AF_INET, SOCK_STREAM, "198.18.0.2", 8080, 0);
+ if (!ASSERT_GE(server_fd, 0, "start_server"))
+ goto out;
+
+ close_netns(ns);
+ ns = NULL;
+
+ client_fd = connect_to_fd(server_fd, 10000);
+ if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
+ goto out;
+
+ accept_fd = accept(server_fd, NULL, NULL);
+ if (!ASSERT_GE(accept_fd, 0, "accept"))
+ goto out;
+
+ ns = open_netns("synproxy");
+ if (!ASSERT_OK_PTR(ns, "setns"))
+ goto out;
+
+ if (xdp)
+ ctrl_file = SYS_OUT("./xdp_synproxy --iface tmp1 --single");
+ else
+ ctrl_file = SYS_OUT("./xdp_synproxy --prog %s --single",
+ prog_id);
+ size = fread(buf, 1, sizeof(buf), ctrl_file);
+ pclose(ctrl_file);
+ if (!expect_str(buf, size, "Total SYNACKs generated: 1\n",
+ "SYNACKs after connection"))
+ goto out;
+
+out:
+ if (accept_fd >= 0)
+ close(accept_fd);
+ if (client_fd >= 0)
+ close(client_fd);
+ if (server_fd >= 0)
+ close(server_fd);
+ if (ns)
+ close_netns(ns);
+
+ system("ip link del tmp0");
+ system("ip netns del synproxy");
+}
+
+void test_xdp_synproxy(void)
+{
+ if (test__start_subtest("xdp"))
+ test_synproxy(true);
+ if (test__start_subtest("tc"))
+ test_synproxy(false);
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_hashmap_full_update_bench.c b/tools/testing/selftests/bpf/progs/bpf_hashmap_full_update_bench.c
new file mode 100644
index 000000000000..56957557e3e1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_hashmap_full_update_bench.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Bytedance */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+#define MAX_ENTRIES 1000
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u32);
+ __type(value, u64);
+ __uint(max_entries, MAX_ENTRIES);
+} hash_map_bench SEC(".maps");
+
+u64 __attribute__((__aligned__(256))) percpu_time[256];
+u64 nr_loops;
+
+static int loop_update_callback(__u32 index, u32 *key)
+{
+ u64 init_val = 1;
+
+ bpf_map_update_elem(&hash_map_bench, key, &init_val, BPF_ANY);
+ return 0;
+}
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int benchmark(void *ctx)
+{
+ u32 cpu = bpf_get_smp_processor_id();
+ u32 key = cpu + MAX_ENTRIES;
+ u64 start_time = bpf_ktime_get_ns();
+
+ bpf_loop(nr_loops, loop_update_callback, &key, 0);
+ percpu_time[cpu & 255] = bpf_ktime_get_ns() - start_time;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h
index 97ec8bc76ae6..e9846606690d 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter.h
+++ b/tools/testing/selftests/bpf/progs/bpf_iter.h
@@ -22,6 +22,7 @@
#define BTF_F_NONAME BTF_F_NONAME___not_used
#define BTF_F_PTR_RAW BTF_F_PTR_RAW___not_used
#define BTF_F_ZERO BTF_F_ZERO___not_used
+#define bpf_iter__ksym bpf_iter__ksym___not_used
#include "vmlinux.h"
#undef bpf_iter_meta
#undef bpf_iter__bpf_map
@@ -44,6 +45,7 @@
#undef BTF_F_NONAME
#undef BTF_F_PTR_RAW
#undef BTF_F_ZERO
+#undef bpf_iter__ksym
struct bpf_iter_meta {
struct seq_file *seq;
@@ -151,3 +153,8 @@ enum {
BTF_F_PTR_RAW = (1ULL << 2),
BTF_F_ZERO = (1ULL << 3),
};
+
+struct bpf_iter__ksym {
+ struct bpf_iter_meta *meta;
+ struct kallsym_iter *ksym;
+};
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c b/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c
new file mode 100644
index 000000000000..285c008cbf9c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022, Oracle and/or its affiliates. */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+unsigned long last_sym_value = 0;
+
+static inline char tolower(char c)
+{
+ if (c >= 'A' && c <= 'Z')
+ c += ('a' - 'A');
+ return c;
+}
+
+static inline char toupper(char c)
+{
+ if (c >= 'a' && c <= 'z')
+ c -= ('a' - 'A');
+ return c;
+}
+
+/* Dump symbols with max size; the latter is calculated by caching symbol N value
+ * and when iterating on symbol N+1, we can print max size of symbol N via
+ * address of N+1 - address of N.
+ */
+SEC("iter/ksym")
+int dump_ksym(struct bpf_iter__ksym *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct kallsym_iter *iter = ctx->ksym;
+ __u32 seq_num = ctx->meta->seq_num;
+ unsigned long value;
+ char type;
+ int ret;
+
+ if (!iter)
+ return 0;
+
+ if (seq_num == 0) {
+ BPF_SEQ_PRINTF(seq, "ADDR TYPE NAME MODULE_NAME KIND MAX_SIZE\n");
+ return 0;
+ }
+ if (last_sym_value)
+ BPF_SEQ_PRINTF(seq, "0x%x\n", iter->value - last_sym_value);
+ else
+ BPF_SEQ_PRINTF(seq, "\n");
+
+ value = iter->show_value ? iter->value : 0;
+
+ last_sym_value = value;
+
+ type = iter->type;
+
+ if (iter->module_name[0]) {
+ type = iter->exported ? toupper(type) : tolower(type);
+ BPF_SEQ_PRINTF(seq, "0x%llx %c %s [ %s ] ",
+ value, type, iter->name, iter->module_name);
+ } else {
+ BPF_SEQ_PRINTF(seq, "0x%llx %c %s ", value, type, iter->name);
+ }
+ if (!iter->pos_arch_end || iter->pos_arch_end > iter->pos)
+ BPF_SEQ_PRINTF(seq, "CORE ");
+ else if (!iter->pos_mod_end || iter->pos_mod_end > iter->pos)
+ BPF_SEQ_PRINTF(seq, "MOD ");
+ else if (!iter->pos_ftrace_mod_end || iter->pos_ftrace_mod_end > iter->pos)
+ BPF_SEQ_PRINTF(seq, "FTRACE_MOD ");
+ else if (!iter->pos_bpf_end || iter->pos_bpf_end > iter->pos)
+ BPF_SEQ_PRINTF(seq, "BPF ");
+ else
+ BPF_SEQ_PRINTF(seq, "KPROBE ");
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_loop.c b/tools/testing/selftests/bpf/progs/bpf_loop.c
index e08565282759..de1fc82d2710 100644
--- a/tools/testing/selftests/bpf/progs/bpf_loop.c
+++ b/tools/testing/selftests/bpf/progs/bpf_loop.c
@@ -11,11 +11,19 @@ struct callback_ctx {
int output;
};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 32);
+ __type(key, int);
+ __type(value, int);
+} map1 SEC(".maps");
+
/* These should be set by the user program */
u32 nested_callback_nr_loops;
u32 stop_index = -1;
u32 nr_loops;
int pid;
+int callback_selector;
/* Making these global variables so that the userspace program
* can verify the output through the skeleton
@@ -111,3 +119,109 @@ int prog_nested_calls(void *ctx)
return 0;
}
+
+static int callback_set_f0(int i, void *ctx)
+{
+ g_output = 0xF0;
+ return 0;
+}
+
+static int callback_set_0f(int i, void *ctx)
+{
+ g_output = 0x0F;
+ return 0;
+}
+
+/*
+ * non-constant callback is a corner case for bpf_loop inline logic
+ */
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int prog_non_constant_callback(void *ctx)
+{
+ struct callback_ctx data = {};
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ int (*callback)(int i, void *ctx);
+
+ g_output = 0;
+
+ if (callback_selector == 0x0F)
+ callback = callback_set_0f;
+ else
+ callback = callback_set_f0;
+
+ bpf_loop(1, callback, NULL, 0);
+
+ return 0;
+}
+
+static int stack_check_inner_callback(void *ctx)
+{
+ return 0;
+}
+
+static int map1_lookup_elem(int key)
+{
+ int *val = bpf_map_lookup_elem(&map1, &key);
+
+ return val ? *val : -1;
+}
+
+static void map1_update_elem(int key, int val)
+{
+ bpf_map_update_elem(&map1, &key, &val, BPF_ANY);
+}
+
+static int stack_check_outer_callback(void *ctx)
+{
+ int a = map1_lookup_elem(1);
+ int b = map1_lookup_elem(2);
+ int c = map1_lookup_elem(3);
+ int d = map1_lookup_elem(4);
+ int e = map1_lookup_elem(5);
+ int f = map1_lookup_elem(6);
+
+ bpf_loop(1, stack_check_inner_callback, NULL, 0);
+
+ map1_update_elem(1, a + 1);
+ map1_update_elem(2, b + 1);
+ map1_update_elem(3, c + 1);
+ map1_update_elem(4, d + 1);
+ map1_update_elem(5, e + 1);
+ map1_update_elem(6, f + 1);
+
+ return 0;
+}
+
+/* Some of the local variables in stack_check and
+ * stack_check_outer_callback would be allocated on stack by
+ * compiler. This test should verify that stack content for these
+ * variables is preserved between calls to bpf_loop (might be an issue
+ * if loop inlining allocates stack slots incorrectly).
+ */
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int stack_check(void *ctx)
+{
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ int a = map1_lookup_elem(7);
+ int b = map1_lookup_elem(8);
+ int c = map1_lookup_elem(9);
+ int d = map1_lookup_elem(10);
+ int e = map1_lookup_elem(11);
+ int f = map1_lookup_elem(12);
+
+ bpf_loop(1, stack_check_outer_callback, NULL, 0);
+
+ map1_update_elem(7, a + 1);
+ map1_update_elem(8, b + 1);
+ map1_update_elem(9, c + 1);
+ map1_update_elem(10, d + 1);
+ map1_update_elem(11, e + 1);
+ map1_update_elem(12, f + 1);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c b/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c
index 05838ed9b89c..e1e11897e99b 100644
--- a/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c
+++ b/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c
@@ -64,9 +64,9 @@ int BPF_KPROBE(handle_sys_prctl)
return 0;
}
-SEC("kprobe/" SYS_PREFIX "sys_prctl")
-int BPF_KPROBE_SYSCALL(prctl_enter, int option, unsigned long arg2,
- unsigned long arg3, unsigned long arg4, unsigned long arg5)
+SEC("ksyscall/prctl")
+int BPF_KSYSCALL(prctl_enter, int option, unsigned long arg2,
+ unsigned long arg3, unsigned long arg4, unsigned long arg5)
{
pid_t pid = bpf_get_current_pid_tgid() >> 32;
diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
index 1c1289ba5fc5..98dd2c4815f0 100644
--- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
+++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
@@ -8,6 +8,7 @@
#define SOL_SOCKET 1
#define SO_SNDBUF 7
#define __SO_ACCEPTCON (1 << 16)
+#define SO_PRIORITY 12
#define SOL_TCP 6
#define TCP_CONGESTION 13
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val.c
new file mode 100644
index 000000000000..888e79db6a77
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_enum64val x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___diff.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___diff.c
new file mode 100644
index 000000000000..194749130d87
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___diff.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_enum64val___diff x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___err_missing.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___err_missing.c
new file mode 100644
index 000000000000..3d732d4193e4
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___err_missing.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_enum64val___err_missing x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___val3_missing.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___val3_missing.c
new file mode 100644
index 000000000000..17cf5d6a848d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___val3_missing.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_enum64val___val3_missing x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff.c
new file mode 100644
index 000000000000..57ae2c258928
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_type_based___diff x) {}
diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h
index f9dc9766546e..fd8e1b4c6762 100644
--- a/tools/testing/selftests/bpf/progs/core_reloc_types.h
+++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h
@@ -13,6 +13,7 @@ struct core_reloc_kernel_output {
int valid[10];
char comm[sizeof("test_progs")];
int comm_len;
+ bool local_task_struct_matches;
};
/*
@@ -860,10 +861,11 @@ struct core_reloc_size___err_ambiguous2 {
};
/*
- * TYPE EXISTENCE & SIZE
+ * TYPE EXISTENCE, MATCH & SIZE
*/
struct core_reloc_type_based_output {
bool struct_exists;
+ bool complex_struct_exists;
bool union_exists;
bool enum_exists;
bool typedef_named_struct_exists;
@@ -872,9 +874,24 @@ struct core_reloc_type_based_output {
bool typedef_int_exists;
bool typedef_enum_exists;
bool typedef_void_ptr_exists;
+ bool typedef_restrict_ptr_exists;
bool typedef_func_proto_exists;
bool typedef_arr_exists;
+ bool struct_matches;
+ bool complex_struct_matches;
+ bool union_matches;
+ bool enum_matches;
+ bool typedef_named_struct_matches;
+ bool typedef_anon_struct_matches;
+ bool typedef_struct_ptr_matches;
+ bool typedef_int_matches;
+ bool typedef_enum_matches;
+ bool typedef_void_ptr_matches;
+ bool typedef_restrict_ptr_matches;
+ bool typedef_func_proto_matches;
+ bool typedef_arr_matches;
+
int struct_sz;
int union_sz;
int enum_sz;
@@ -892,6 +909,14 @@ struct a_struct {
int x;
};
+struct a_complex_struct {
+ union {
+ struct a_struct * restrict a;
+ void *b;
+ } x;
+ volatile long y;
+};
+
union a_union {
int y;
int z;
@@ -916,6 +941,7 @@ typedef int int_typedef;
typedef enum { TYPEDEF_ENUM_VAL1, TYPEDEF_ENUM_VAL2 } enum_typedef;
typedef void *void_ptr_typedef;
+typedef int *restrict restrict_ptr_typedef;
typedef int (*func_proto_typedef)(long);
@@ -923,22 +949,86 @@ typedef char arr_typedef[20];
struct core_reloc_type_based {
struct a_struct f1;
- union a_union f2;
- enum an_enum f3;
- named_struct_typedef f4;
- anon_struct_typedef f5;
- struct_ptr_typedef f6;
- int_typedef f7;
- enum_typedef f8;
- void_ptr_typedef f9;
- func_proto_typedef f10;
- arr_typedef f11;
+ struct a_complex_struct f2;
+ union a_union f3;
+ enum an_enum f4;
+ named_struct_typedef f5;
+ anon_struct_typedef f6;
+ struct_ptr_typedef f7;
+ int_typedef f8;
+ enum_typedef f9;
+ void_ptr_typedef f10;
+ restrict_ptr_typedef f11;
+ func_proto_typedef f12;
+ arr_typedef f13;
};
/* no types in target */
struct core_reloc_type_based___all_missing {
};
+/* different member orders, enum variant values, signedness, etc */
+struct a_struct___diff {
+ int x;
+ int a;
+};
+
+struct a_struct___forward;
+
+struct a_complex_struct___diff {
+ union {
+ struct a_struct___forward *a;
+ void *b;
+ } x;
+ volatile long y;
+};
+
+union a_union___diff {
+ int z;
+ int y;
+};
+
+typedef struct a_struct___diff named_struct_typedef___diff;
+
+typedef struct { int z, x, y; } anon_struct_typedef___diff;
+
+typedef struct {
+ int c;
+ int b;
+ int a;
+} *struct_ptr_typedef___diff;
+
+enum an_enum___diff {
+ AN_ENUM_VAL2___diff = 0,
+ AN_ENUM_VAL1___diff = 42,
+ AN_ENUM_VAL3___diff = 1,
+};
+
+typedef unsigned int int_typedef___diff;
+
+typedef enum { TYPEDEF_ENUM_VAL2___diff, TYPEDEF_ENUM_VAL1___diff = 50 } enum_typedef___diff;
+
+typedef const void *void_ptr_typedef___diff;
+
+typedef int_typedef___diff (*func_proto_typedef___diff)(long);
+
+typedef char arr_typedef___diff[3];
+
+struct core_reloc_type_based___diff {
+ struct a_struct___diff f1;
+ struct a_complex_struct___diff f2;
+ union a_union___diff f3;
+ enum an_enum___diff f4;
+ named_struct_typedef___diff f5;
+ anon_struct_typedef___diff f6;
+ struct_ptr_typedef___diff f7;
+ int_typedef___diff f8;
+ enum_typedef___diff f9;
+ void_ptr_typedef___diff f10;
+ func_proto_typedef___diff f11;
+ arr_typedef___diff f12;
+};
+
/* different type sizes, extra modifiers, anon vs named enums, etc */
struct a_struct___diff_sz {
long x;
@@ -1117,6 +1207,20 @@ struct core_reloc_enumval_output {
int anon_val2;
};
+struct core_reloc_enum64val_output {
+ bool unsigned_val1_exists;
+ bool unsigned_val2_exists;
+ bool unsigned_val3_exists;
+ bool signed_val1_exists;
+ bool signed_val2_exists;
+ bool signed_val3_exists;
+
+ long unsigned_val1;
+ long unsigned_val2;
+ long signed_val1;
+ long signed_val2;
+};
+
enum named_enum {
NAMED_ENUM_VAL1 = 1,
NAMED_ENUM_VAL2 = 2,
@@ -1134,6 +1238,23 @@ struct core_reloc_enumval {
anon_enum f2;
};
+enum named_unsigned_enum64 {
+ UNSIGNED_ENUM64_VAL1 = 0x1ffffffffULL,
+ UNSIGNED_ENUM64_VAL2 = 0x2,
+ UNSIGNED_ENUM64_VAL3 = 0x3ffffffffULL,
+};
+
+enum named_signed_enum64 {
+ SIGNED_ENUM64_VAL1 = 0x1ffffffffLL,
+ SIGNED_ENUM64_VAL2 = -2,
+ SIGNED_ENUM64_VAL3 = 0x3ffffffffLL,
+};
+
+struct core_reloc_enum64val {
+ enum named_unsigned_enum64 f1;
+ enum named_signed_enum64 f2;
+};
+
/* differing enumerator values */
enum named_enum___diff {
NAMED_ENUM_VAL1___diff = 101,
@@ -1152,6 +1273,23 @@ struct core_reloc_enumval___diff {
anon_enum___diff f2;
};
+enum named_unsigned_enum64___diff {
+ UNSIGNED_ENUM64_VAL1___diff = 0x101ffffffffULL,
+ UNSIGNED_ENUM64_VAL2___diff = 0x202ffffffffULL,
+ UNSIGNED_ENUM64_VAL3___diff = 0x303ffffffffULL,
+};
+
+enum named_signed_enum64___diff {
+ SIGNED_ENUM64_VAL1___diff = -101,
+ SIGNED_ENUM64_VAL2___diff = -202,
+ SIGNED_ENUM64_VAL3___diff = -303,
+};
+
+struct core_reloc_enum64val___diff {
+ enum named_unsigned_enum64___diff f1;
+ enum named_signed_enum64___diff f2;
+};
+
/* missing (optional) third enum value */
enum named_enum___val3_missing {
NAMED_ENUM_VAL1___val3_missing = 111,
@@ -1168,6 +1306,21 @@ struct core_reloc_enumval___val3_missing {
anon_enum___val3_missing f2;
};
+enum named_unsigned_enum64___val3_missing {
+ UNSIGNED_ENUM64_VAL1___val3_missing = 0x111ffffffffULL,
+ UNSIGNED_ENUM64_VAL2___val3_missing = 0x222,
+};
+
+enum named_signed_enum64___val3_missing {
+ SIGNED_ENUM64_VAL1___val3_missing = 0x111ffffffffLL,
+ SIGNED_ENUM64_VAL2___val3_missing = -222,
+};
+
+struct core_reloc_enum64val___val3_missing {
+ enum named_unsigned_enum64___val3_missing f1;
+ enum named_signed_enum64___val3_missing f2;
+};
+
/* missing (mandatory) second enum value, should fail */
enum named_enum___err_missing {
NAMED_ENUM_VAL1___err_missing = 1,
@@ -1183,3 +1336,18 @@ struct core_reloc_enumval___err_missing {
enum named_enum___err_missing f1;
anon_enum___err_missing f2;
};
+
+enum named_unsigned_enum64___err_missing {
+ UNSIGNED_ENUM64_VAL1___err_missing = 0x1ffffffffULL,
+ UNSIGNED_ENUM64_VAL3___err_missing = 0x3ffffffffULL,
+};
+
+enum named_signed_enum64___err_missing {
+ SIGNED_ENUM64_VAL1___err_missing = 0x1ffffffffLL,
+ SIGNED_ENUM64_VAL3___err_missing = -3,
+};
+
+struct core_reloc_enum64val___err_missing {
+ enum named_unsigned_enum64___err_missing f1;
+ enum named_signed_enum64___err_missing f2;
+};
diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c
index d811cff73597..0a26c243e6e9 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c
@@ -140,12 +140,12 @@ int use_after_invalid(void *ctx)
bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(read_data), 0, &ptr);
- bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0);
+ bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0, 0);
bpf_ringbuf_submit_dynptr(&ptr, 0);
/* this should fail */
- bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0);
+ bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0, 0);
return 0;
}
@@ -338,7 +338,7 @@ int invalid_helper2(void *ctx)
get_map_val_dynptr(&ptr);
/* this should fail */
- bpf_dynptr_read(read_data, sizeof(read_data), (void *)&ptr + 8, 0);
+ bpf_dynptr_read(read_data, sizeof(read_data), (void *)&ptr + 8, 0, 0);
return 0;
}
@@ -377,7 +377,7 @@ int invalid_write2(void *ctx)
memcpy((void *)&ptr + 8, &x, sizeof(x));
/* this should fail */
- bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0);
+ bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0, 0);
bpf_ringbuf_submit_dynptr(&ptr, 0);
@@ -473,7 +473,7 @@ int invalid_read2(void *ctx)
get_map_val_dynptr(&ptr);
/* this should fail */
- bpf_dynptr_read(read_data, sizeof(read_data), (void *)&ptr + 1, 0);
+ bpf_dynptr_read(read_data, sizeof(read_data), (void *)&ptr + 1, 0, 0);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c
index d67be48df4b2..a3a6103c8569 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_success.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_success.c
@@ -43,10 +43,10 @@ int test_read_write(void *ctx)
bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(write_data), 0, &ptr);
/* Write data into the dynptr */
- err = err ?: bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data));
+ err = bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data), 0);
/* Read the data that was written into the dynptr */
- err = err ?: bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0);
+ err = err ?: bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0, 0);
/* Ensure the data we read matches the data we wrote */
for (i = 0; i < sizeof(read_data); i++) {
diff --git a/tools/testing/selftests/bpf/progs/local_storage_bench.c b/tools/testing/selftests/bpf/progs/local_storage_bench.c
new file mode 100644
index 000000000000..2c3234c5b73a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/local_storage_bench.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#define HASHMAP_SZ 4194304
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 1000);
+ __type(key, int);
+ __type(value, int);
+ __array(values, struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, int);
+ });
+} array_of_local_storage_maps SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 1000);
+ __type(key, int);
+ __type(value, int);
+ __array(values, struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, HASHMAP_SZ);
+ __type(key, int);
+ __type(value, int);
+ });
+} array_of_hash_maps SEC(".maps");
+
+long important_hits;
+long hits;
+
+/* set from user-space */
+const volatile unsigned int use_hashmap;
+const volatile unsigned int hashmap_num_keys;
+const volatile unsigned int num_maps;
+const volatile unsigned int interleave;
+
+struct loop_ctx {
+ struct task_struct *task;
+ long loop_hits;
+ long loop_important_hits;
+};
+
+static int do_lookup(unsigned int elem, struct loop_ctx *lctx)
+{
+ void *map, *inner_map;
+ int idx = 0;
+
+ if (use_hashmap)
+ map = &array_of_hash_maps;
+ else
+ map = &array_of_local_storage_maps;
+
+ inner_map = bpf_map_lookup_elem(map, &elem);
+ if (!inner_map)
+ return -1;
+
+ if (use_hashmap) {
+ idx = bpf_get_prandom_u32() % hashmap_num_keys;
+ bpf_map_lookup_elem(inner_map, &idx);
+ } else {
+ bpf_task_storage_get(inner_map, lctx->task, &idx,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ }
+
+ lctx->loop_hits++;
+ if (!elem)
+ lctx->loop_important_hits++;
+ return 0;
+}
+
+static long loop(u32 index, void *ctx)
+{
+ struct loop_ctx *lctx = (struct loop_ctx *)ctx;
+ unsigned int map_idx = index % num_maps;
+
+ do_lookup(map_idx, lctx);
+ if (interleave && map_idx % 3 == 0)
+ do_lookup(0, lctx);
+ return 0;
+}
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int get_local(void *ctx)
+{
+ struct loop_ctx lctx;
+
+ lctx.task = bpf_get_current_task_btf();
+ lctx.loop_hits = 0;
+ lctx.loop_important_hits = 0;
+ bpf_loop(10000, &loop, &lctx, 0);
+ __sync_add_and_fetch(&hits, lctx.loop_hits);
+ __sync_add_and_fetch(&important_hits, lctx.loop_important_hits);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/local_storage_rcu_tasks_trace_bench.c b/tools/testing/selftests/bpf/progs/local_storage_rcu_tasks_trace_bench.c
new file mode 100644
index 000000000000..03bf69f49075
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/local_storage_rcu_tasks_trace_bench.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, int);
+} task_storage SEC(".maps");
+
+long hits;
+long gp_hits;
+long gp_times;
+long current_gp_start;
+long unexpected;
+bool postgp_seen;
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int get_local(void *ctx)
+{
+ struct task_struct *task;
+ int idx;
+ int *s;
+
+ idx = 0;
+ task = bpf_get_current_task_btf();
+ s = bpf_task_storage_get(&task_storage, task, &idx,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!s)
+ return 0;
+
+ *s = 3;
+ bpf_task_storage_delete(&task_storage, task);
+ __sync_add_and_fetch(&hits, 1);
+ return 0;
+}
+
+SEC("fentry/rcu_tasks_trace_pregp_step")
+int pregp_step(struct pt_regs *ctx)
+{
+ current_gp_start = bpf_ktime_get_ns();
+ return 0;
+}
+
+SEC("fentry/rcu_tasks_trace_postgp")
+int postgp(struct pt_regs *ctx)
+{
+ if (!current_gp_start && postgp_seen) {
+ /* Will only happen if prog tracing rcu_tasks_trace_pregp_step doesn't
+ * execute before this prog
+ */
+ __sync_add_and_fetch(&unexpected, 1);
+ return 0;
+ }
+
+ __sync_add_and_fetch(&gp_times, bpf_ktime_get_ns() - current_gp_start);
+ __sync_add_and_fetch(&gp_hits, 1);
+ current_gp_start = 0;
+ postgp_seen = true;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/lsm_cgroup.c b/tools/testing/selftests/bpf/progs/lsm_cgroup.c
new file mode 100644
index 000000000000..4f2d60b87b75
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/lsm_cgroup.c
@@ -0,0 +1,180 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+#ifndef AF_PACKET
+#define AF_PACKET 17
+#endif
+
+#ifndef AF_UNIX
+#define AF_UNIX 1
+#endif
+
+#ifndef EPERM
+#define EPERM 1
+#endif
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
+ __type(key, __u64);
+ __type(value, __u64);
+} cgroup_storage SEC(".maps");
+
+int called_socket_post_create;
+int called_socket_post_create2;
+int called_socket_bind;
+int called_socket_bind2;
+int called_socket_alloc;
+int called_socket_clone;
+
+static __always_inline int test_local_storage(void)
+{
+ __u64 *val;
+
+ val = bpf_get_local_storage(&cgroup_storage, 0);
+ if (!val)
+ return 0;
+ *val += 1;
+
+ return 1;
+}
+
+static __always_inline int real_create(struct socket *sock, int family,
+ int protocol)
+{
+ struct sock *sk;
+ int prio = 123;
+
+ /* Reject non-tx-only AF_PACKET. */
+ if (family == AF_PACKET && protocol != 0)
+ return 0; /* EPERM */
+
+ sk = sock->sk;
+ if (!sk)
+ return 1;
+
+ /* The rest of the sockets get default policy. */
+ if (bpf_setsockopt(sk, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(prio)))
+ return 0; /* EPERM */
+
+ /* Make sure bpf_getsockopt is allowed and works. */
+ prio = 0;
+ if (bpf_getsockopt(sk, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(prio)))
+ return 0; /* EPERM */
+ if (prio != 123)
+ return 0; /* EPERM */
+
+ /* Can access cgroup local storage. */
+ if (!test_local_storage())
+ return 0; /* EPERM */
+
+ return 1;
+}
+
+/* __cgroup_bpf_run_lsm_socket */
+SEC("lsm_cgroup/socket_post_create")
+int BPF_PROG(socket_post_create, struct socket *sock, int family,
+ int type, int protocol, int kern)
+{
+ called_socket_post_create++;
+ return real_create(sock, family, protocol);
+}
+
+/* __cgroup_bpf_run_lsm_socket */
+SEC("lsm_cgroup/socket_post_create")
+int BPF_PROG(socket_post_create2, struct socket *sock, int family,
+ int type, int protocol, int kern)
+{
+ called_socket_post_create2++;
+ return real_create(sock, family, protocol);
+}
+
+static __always_inline int real_bind(struct socket *sock,
+ struct sockaddr *address,
+ int addrlen)
+{
+ struct sockaddr_ll sa = {};
+
+ if (sock->sk->__sk_common.skc_family != AF_PACKET)
+ return 1;
+
+ if (sock->sk->sk_kern_sock)
+ return 1;
+
+ bpf_probe_read_kernel(&sa, sizeof(sa), address);
+ if (sa.sll_protocol)
+ return 0; /* EPERM */
+
+ /* Can access cgroup local storage. */
+ if (!test_local_storage())
+ return 0; /* EPERM */
+
+ return 1;
+}
+
+/* __cgroup_bpf_run_lsm_socket */
+SEC("lsm_cgroup/socket_bind")
+int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address,
+ int addrlen)
+{
+ called_socket_bind++;
+ return real_bind(sock, address, addrlen);
+}
+
+/* __cgroup_bpf_run_lsm_socket */
+SEC("lsm_cgroup/socket_bind")
+int BPF_PROG(socket_bind2, struct socket *sock, struct sockaddr *address,
+ int addrlen)
+{
+ called_socket_bind2++;
+ return real_bind(sock, address, addrlen);
+}
+
+/* __cgroup_bpf_run_lsm_current (via bpf_lsm_current_hooks) */
+SEC("lsm_cgroup/sk_alloc_security")
+int BPF_PROG(socket_alloc, struct sock *sk, int family, gfp_t priority)
+{
+ called_socket_alloc++;
+ if (family == AF_UNIX)
+ return 0; /* EPERM */
+
+ /* Can access cgroup local storage. */
+ if (!test_local_storage())
+ return 0; /* EPERM */
+
+ return 1;
+}
+
+/* __cgroup_bpf_run_lsm_sock */
+SEC("lsm_cgroup/inet_csk_clone")
+int BPF_PROG(socket_clone, struct sock *newsk, const struct request_sock *req)
+{
+ int prio = 234;
+
+ if (!newsk)
+ return 1;
+
+ /* Accepted request sockets get a different priority. */
+ if (bpf_setsockopt(newsk, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(prio)))
+ return 1;
+
+ /* Make sure bpf_getsockopt is allowed and works. */
+ prio = 0;
+ if (bpf_getsockopt(newsk, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(prio)))
+ return 1;
+ if (prio != 234)
+ return 1;
+
+ /* Can access cgroup local storage. */
+ if (!test_local_storage())
+ return 1;
+
+ called_socket_clone++;
+
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/lsm_cgroup_nonvoid.c b/tools/testing/selftests/bpf/progs/lsm_cgroup_nonvoid.c
new file mode 100644
index 000000000000..6cb0f161f417
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/lsm_cgroup_nonvoid.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("lsm_cgroup/inet_csk_clone")
+int BPF_PROG(nonvoid_socket_clone, struct sock *newsk, const struct request_sock *req)
+{
+ /* Can not return any errors from void LSM hooks. */
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_incompl_cong_ops.c b/tools/testing/selftests/bpf/progs/tcp_ca_incompl_cong_ops.c
new file mode 100644
index 000000000000..7bb872fb22dd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tcp_ca_incompl_cong_ops.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+static inline struct tcp_sock *tcp_sk(const struct sock *sk)
+{
+ return (struct tcp_sock *)sk;
+}
+
+SEC("struct_ops/incompl_cong_ops_ssthresh")
+__u32 BPF_PROG(incompl_cong_ops_ssthresh, struct sock *sk)
+{
+ return tcp_sk(sk)->snd_ssthresh;
+}
+
+SEC("struct_ops/incompl_cong_ops_undo_cwnd")
+__u32 BPF_PROG(incompl_cong_ops_undo_cwnd, struct sock *sk)
+{
+ return tcp_sk(sk)->snd_cwnd;
+}
+
+SEC(".struct_ops")
+struct tcp_congestion_ops incompl_cong_ops = {
+ /* Intentionally leaving out any of the required cong_avoid() and
+ * cong_control() here.
+ */
+ .ssthresh = (void *)incompl_cong_ops_ssthresh,
+ .undo_cwnd = (void *)incompl_cong_ops_undo_cwnd,
+ .name = "bpf_incompl_ops",
+};
diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_unsupp_cong_op.c b/tools/testing/selftests/bpf/progs/tcp_ca_unsupp_cong_op.c
new file mode 100644
index 000000000000..c06f4a41c21a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tcp_ca_unsupp_cong_op.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("struct_ops/unsupp_cong_op_get_info")
+size_t BPF_PROG(unsupp_cong_op_get_info, struct sock *sk, u32 ext, int *attr,
+ union tcp_cc_info *info)
+{
+ return 0;
+}
+
+SEC(".struct_ops")
+struct tcp_congestion_ops unsupp_cong_op = {
+ .get_info = (void *)unsupp_cong_op_get_info,
+ .name = "bpf_unsupp_op",
+};
diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c b/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c
new file mode 100644
index 000000000000..43447704cf0e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+#define USEC_PER_SEC 1000000UL
+
+#define min(a, b) ((a) < (b) ? (a) : (b))
+
+static inline struct tcp_sock *tcp_sk(const struct sock *sk)
+{
+ return (struct tcp_sock *)sk;
+}
+
+SEC("struct_ops/write_sk_pacing_init")
+void BPF_PROG(write_sk_pacing_init, struct sock *sk)
+{
+#ifdef ENABLE_ATOMICS_TESTS
+ __sync_bool_compare_and_swap(&sk->sk_pacing_status, SK_PACING_NONE,
+ SK_PACING_NEEDED);
+#else
+ sk->sk_pacing_status = SK_PACING_NEEDED;
+#endif
+}
+
+SEC("struct_ops/write_sk_pacing_cong_control")
+void BPF_PROG(write_sk_pacing_cong_control, struct sock *sk,
+ const struct rate_sample *rs)
+{
+ const struct tcp_sock *tp = tcp_sk(sk);
+ unsigned long rate =
+ ((tp->snd_cwnd * tp->mss_cache * USEC_PER_SEC) << 3) /
+ (tp->srtt_us ?: 1U << 3);
+ sk->sk_pacing_rate = min(rate, sk->sk_max_pacing_rate);
+}
+
+SEC("struct_ops/write_sk_pacing_ssthresh")
+__u32 BPF_PROG(write_sk_pacing_ssthresh, struct sock *sk)
+{
+ return tcp_sk(sk)->snd_ssthresh;
+}
+
+SEC("struct_ops/write_sk_pacing_undo_cwnd")
+__u32 BPF_PROG(write_sk_pacing_undo_cwnd, struct sock *sk)
+{
+ return tcp_sk(sk)->snd_cwnd;
+}
+
+SEC(".struct_ops")
+struct tcp_congestion_ops write_sk_pacing = {
+ .init = (void *)write_sk_pacing_init,
+ .cong_control = (void *)write_sk_pacing_cong_control,
+ .ssthresh = (void *)write_sk_pacing_ssthresh,
+ .undo_cwnd = (void *)write_sk_pacing_undo_cwnd,
+ .name = "bpf_w_sk_pacing",
+};
diff --git a/tools/testing/selftests/bpf/progs/test_attach_probe.c b/tools/testing/selftests/bpf/progs/test_attach_probe.c
index ce9acf4db8d2..a1e45fec8938 100644
--- a/tools/testing/selftests/bpf/progs/test_attach_probe.c
+++ b/tools/testing/selftests/bpf/progs/test_attach_probe.c
@@ -1,10 +1,10 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2017 Facebook
-#include <linux/ptrace.h>
-#include <linux/bpf.h>
+#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
#include "bpf_misc.h"
int kprobe_res = 0;
@@ -17,6 +17,11 @@ int uprobe_byname_res = 0;
int uretprobe_byname_res = 0;
int uprobe_byname2_res = 0;
int uretprobe_byname2_res = 0;
+int uprobe_byname3_sleepable_res = 0;
+int uprobe_byname3_res = 0;
+int uretprobe_byname3_sleepable_res = 0;
+int uretprobe_byname3_res = 0;
+void *user_ptr = 0;
SEC("kprobe")
int handle_kprobe(struct pt_regs *ctx)
@@ -25,13 +30,24 @@ int handle_kprobe(struct pt_regs *ctx)
return 0;
}
-SEC("kprobe/" SYS_PREFIX "sys_nanosleep")
-int BPF_KPROBE(handle_kprobe_auto)
+SEC("ksyscall/nanosleep")
+int BPF_KSYSCALL(handle_kprobe_auto, struct __kernel_timespec *req, struct __kernel_timespec *rem)
{
kprobe2_res = 11;
return 0;
}
+/**
+ * This program will be manually made sleepable on the userspace side
+ * and should thus be unattachable.
+ */
+SEC("kprobe/" SYS_PREFIX "sys_nanosleep")
+int handle_kprobe_sleepable(struct pt_regs *ctx)
+{
+ kprobe_res = 2;
+ return 0;
+}
+
SEC("kretprobe")
int handle_kretprobe(struct pt_regs *ctx)
{
@@ -39,11 +55,11 @@ int handle_kretprobe(struct pt_regs *ctx)
return 0;
}
-SEC("kretprobe/" SYS_PREFIX "sys_nanosleep")
-int BPF_KRETPROBE(handle_kretprobe_auto)
+SEC("kretsyscall/nanosleep")
+int BPF_KRETPROBE(handle_kretprobe_auto, int ret)
{
kretprobe2_res = 22;
- return 0;
+ return ret;
}
SEC("uprobe")
@@ -93,4 +109,47 @@ int handle_uretprobe_byname2(struct pt_regs *ctx)
return 0;
}
+static __always_inline bool verify_sleepable_user_copy(void)
+{
+ char data[9];
+
+ bpf_copy_from_user(data, sizeof(data), user_ptr);
+ return bpf_strncmp(data, sizeof(data), "test_data") == 0;
+}
+
+SEC("uprobe.s//proc/self/exe:trigger_func3")
+int handle_uprobe_byname3_sleepable(struct pt_regs *ctx)
+{
+ if (verify_sleepable_user_copy())
+ uprobe_byname3_sleepable_res = 9;
+ return 0;
+}
+
+/**
+ * same target as the uprobe.s above to force sleepable and non-sleepable
+ * programs in the same bpf_prog_array
+ */
+SEC("uprobe//proc/self/exe:trigger_func3")
+int handle_uprobe_byname3(struct pt_regs *ctx)
+{
+ uprobe_byname3_res = 10;
+ return 0;
+}
+
+SEC("uretprobe.s//proc/self/exe:trigger_func3")
+int handle_uretprobe_byname3_sleepable(struct pt_regs *ctx)
+{
+ if (verify_sleepable_user_copy())
+ uretprobe_byname3_sleepable_res = 11;
+ return 0;
+}
+
+SEC("uretprobe//proc/self/exe:trigger_func3")
+int handle_uretprobe_byname3(struct pt_regs *ctx)
+{
+ uretprobe_byname3_res = 12;
+ return 0;
+}
+
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf.c b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
index f00a9731930e..196cd8dfe42a 100644
--- a/tools/testing/selftests/bpf/progs/test_bpf_nf.c
+++ b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
@@ -8,6 +8,8 @@
#define EINVAL 22
#define ENOENT 2
+extern unsigned long CONFIG_HZ __kconfig;
+
int test_einval_bpf_tuple = 0;
int test_einval_reserved = 0;
int test_einval_netns_id = 0;
@@ -16,6 +18,11 @@ int test_eproto_l4proto = 0;
int test_enonet_netns_id = 0;
int test_enoent_lookup = 0;
int test_eafnosupport = 0;
+int test_alloc_entry = -EINVAL;
+int test_insert_entry = -EAFNOSUPPORT;
+int test_succ_lookup = -ENOENT;
+u32 test_delta_timeout = 0;
+u32 test_status = 0;
struct nf_conn;
@@ -26,31 +33,44 @@ struct bpf_ct_opts___local {
u8 reserved[3];
} __attribute__((preserve_access_index));
+struct nf_conn *bpf_xdp_ct_alloc(struct xdp_md *, struct bpf_sock_tuple *, u32,
+ struct bpf_ct_opts___local *, u32) __ksym;
struct nf_conn *bpf_xdp_ct_lookup(struct xdp_md *, struct bpf_sock_tuple *, u32,
struct bpf_ct_opts___local *, u32) __ksym;
+struct nf_conn *bpf_skb_ct_alloc(struct __sk_buff *, struct bpf_sock_tuple *, u32,
+ struct bpf_ct_opts___local *, u32) __ksym;
struct nf_conn *bpf_skb_ct_lookup(struct __sk_buff *, struct bpf_sock_tuple *, u32,
struct bpf_ct_opts___local *, u32) __ksym;
+struct nf_conn *bpf_ct_insert_entry(struct nf_conn *) __ksym;
void bpf_ct_release(struct nf_conn *) __ksym;
+void bpf_ct_set_timeout(struct nf_conn *, u32) __ksym;
+int bpf_ct_change_timeout(struct nf_conn *, u32) __ksym;
+int bpf_ct_set_status(struct nf_conn *, u32) __ksym;
+int bpf_ct_change_status(struct nf_conn *, u32) __ksym;
static __always_inline void
-nf_ct_test(struct nf_conn *(*func)(void *, struct bpf_sock_tuple *, u32,
- struct bpf_ct_opts___local *, u32),
+nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
+ struct bpf_ct_opts___local *, u32),
+ struct nf_conn *(*alloc_fn)(void *, struct bpf_sock_tuple *, u32,
+ struct bpf_ct_opts___local *, u32),
void *ctx)
{
struct bpf_ct_opts___local opts_def = { .l4proto = IPPROTO_TCP, .netns_id = -1 };
struct bpf_sock_tuple bpf_tuple;
struct nf_conn *ct;
+ int err;
__builtin_memset(&bpf_tuple, 0, sizeof(bpf_tuple.ipv4));
- ct = func(ctx, NULL, 0, &opts_def, sizeof(opts_def));
+ ct = lookup_fn(ctx, NULL, 0, &opts_def, sizeof(opts_def));
if (ct)
bpf_ct_release(ct);
else
test_einval_bpf_tuple = opts_def.error;
opts_def.reserved[0] = 1;
- ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def));
+ ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
+ sizeof(opts_def));
opts_def.reserved[0] = 0;
opts_def.l4proto = IPPROTO_TCP;
if (ct)
@@ -59,21 +79,24 @@ nf_ct_test(struct nf_conn *(*func)(void *, struct bpf_sock_tuple *, u32,
test_einval_reserved = opts_def.error;
opts_def.netns_id = -2;
- ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def));
+ ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
+ sizeof(opts_def));
opts_def.netns_id = -1;
if (ct)
bpf_ct_release(ct);
else
test_einval_netns_id = opts_def.error;
- ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def) - 1);
+ ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
+ sizeof(opts_def) - 1);
if (ct)
bpf_ct_release(ct);
else
test_einval_len_opts = opts_def.error;
opts_def.l4proto = IPPROTO_ICMP;
- ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def));
+ ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
+ sizeof(opts_def));
opts_def.l4proto = IPPROTO_TCP;
if (ct)
bpf_ct_release(ct);
@@ -81,37 +104,75 @@ nf_ct_test(struct nf_conn *(*func)(void *, struct bpf_sock_tuple *, u32,
test_eproto_l4proto = opts_def.error;
opts_def.netns_id = 0xf00f;
- ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def));
+ ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
+ sizeof(opts_def));
opts_def.netns_id = -1;
if (ct)
bpf_ct_release(ct);
else
test_enonet_netns_id = opts_def.error;
- ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def));
+ ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
+ sizeof(opts_def));
if (ct)
bpf_ct_release(ct);
else
test_enoent_lookup = opts_def.error;
- ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4) - 1, &opts_def, sizeof(opts_def));
+ ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4) - 1, &opts_def,
+ sizeof(opts_def));
if (ct)
bpf_ct_release(ct);
else
test_eafnosupport = opts_def.error;
+
+ bpf_tuple.ipv4.saddr = bpf_get_prandom_u32(); /* src IP */
+ bpf_tuple.ipv4.daddr = bpf_get_prandom_u32(); /* dst IP */
+ bpf_tuple.ipv4.sport = bpf_get_prandom_u32(); /* src port */
+ bpf_tuple.ipv4.dport = bpf_get_prandom_u32(); /* dst port */
+
+ ct = alloc_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
+ sizeof(opts_def));
+ if (ct) {
+ struct nf_conn *ct_ins;
+
+ bpf_ct_set_timeout(ct, 10000);
+ bpf_ct_set_status(ct, IPS_CONFIRMED);
+
+ ct_ins = bpf_ct_insert_entry(ct);
+ if (ct_ins) {
+ struct nf_conn *ct_lk;
+
+ ct_lk = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4),
+ &opts_def, sizeof(opts_def));
+ if (ct_lk) {
+ /* update ct entry timeout */
+ bpf_ct_change_timeout(ct_lk, 10000);
+ test_delta_timeout = ct_lk->timeout - bpf_jiffies64();
+ test_delta_timeout /= CONFIG_HZ;
+ test_status = IPS_SEEN_REPLY;
+ bpf_ct_change_status(ct_lk, IPS_SEEN_REPLY);
+ bpf_ct_release(ct_lk);
+ test_succ_lookup = 0;
+ }
+ bpf_ct_release(ct_ins);
+ test_insert_entry = 0;
+ }
+ test_alloc_entry = 0;
+ }
}
SEC("xdp")
int nf_xdp_ct_test(struct xdp_md *ctx)
{
- nf_ct_test((void *)bpf_xdp_ct_lookup, ctx);
+ nf_ct_test((void *)bpf_xdp_ct_lookup, (void *)bpf_xdp_ct_alloc, ctx);
return 0;
}
SEC("tc")
int nf_skb_ct_test(struct __sk_buff *ctx)
{
- nf_ct_test((void *)bpf_skb_ct_lookup, ctx);
+ nf_ct_test((void *)bpf_skb_ct_lookup, (void *)bpf_skb_ct_alloc, ctx);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c b/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c
new file mode 100644
index 000000000000..bf79af15c808
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+struct nf_conn;
+
+struct bpf_ct_opts___local {
+ s32 netns_id;
+ s32 error;
+ u8 l4proto;
+ u8 reserved[3];
+} __attribute__((preserve_access_index));
+
+struct nf_conn *bpf_skb_ct_alloc(struct __sk_buff *, struct bpf_sock_tuple *, u32,
+ struct bpf_ct_opts___local *, u32) __ksym;
+struct nf_conn *bpf_skb_ct_lookup(struct __sk_buff *, struct bpf_sock_tuple *, u32,
+ struct bpf_ct_opts___local *, u32) __ksym;
+struct nf_conn *bpf_ct_insert_entry(struct nf_conn *) __ksym;
+void bpf_ct_release(struct nf_conn *) __ksym;
+void bpf_ct_set_timeout(struct nf_conn *, u32) __ksym;
+int bpf_ct_change_timeout(struct nf_conn *, u32) __ksym;
+int bpf_ct_set_status(struct nf_conn *, u32) __ksym;
+int bpf_ct_change_status(struct nf_conn *, u32) __ksym;
+
+SEC("?tc")
+int alloc_release(struct __sk_buff *ctx)
+{
+ struct bpf_ct_opts___local opts = {};
+ struct bpf_sock_tuple tup = {};
+ struct nf_conn *ct;
+
+ ct = bpf_skb_ct_alloc(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts));
+ if (!ct)
+ return 0;
+ bpf_ct_release(ct);
+ return 0;
+}
+
+SEC("?tc")
+int insert_insert(struct __sk_buff *ctx)
+{
+ struct bpf_ct_opts___local opts = {};
+ struct bpf_sock_tuple tup = {};
+ struct nf_conn *ct;
+
+ ct = bpf_skb_ct_alloc(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts));
+ if (!ct)
+ return 0;
+ ct = bpf_ct_insert_entry(ct);
+ if (!ct)
+ return 0;
+ ct = bpf_ct_insert_entry(ct);
+ return 0;
+}
+
+SEC("?tc")
+int lookup_insert(struct __sk_buff *ctx)
+{
+ struct bpf_ct_opts___local opts = {};
+ struct bpf_sock_tuple tup = {};
+ struct nf_conn *ct;
+
+ ct = bpf_skb_ct_lookup(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts));
+ if (!ct)
+ return 0;
+ bpf_ct_insert_entry(ct);
+ return 0;
+}
+
+SEC("?tc")
+int set_timeout_after_insert(struct __sk_buff *ctx)
+{
+ struct bpf_ct_opts___local opts = {};
+ struct bpf_sock_tuple tup = {};
+ struct nf_conn *ct;
+
+ ct = bpf_skb_ct_alloc(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts));
+ if (!ct)
+ return 0;
+ ct = bpf_ct_insert_entry(ct);
+ if (!ct)
+ return 0;
+ bpf_ct_set_timeout(ct, 0);
+ return 0;
+}
+
+SEC("?tc")
+int set_status_after_insert(struct __sk_buff *ctx)
+{
+ struct bpf_ct_opts___local opts = {};
+ struct bpf_sock_tuple tup = {};
+ struct nf_conn *ct;
+
+ ct = bpf_skb_ct_alloc(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts));
+ if (!ct)
+ return 0;
+ ct = bpf_ct_insert_entry(ct);
+ if (!ct)
+ return 0;
+ bpf_ct_set_status(ct, 0);
+ return 0;
+}
+
+SEC("?tc")
+int change_timeout_after_alloc(struct __sk_buff *ctx)
+{
+ struct bpf_ct_opts___local opts = {};
+ struct bpf_sock_tuple tup = {};
+ struct nf_conn *ct;
+
+ ct = bpf_skb_ct_alloc(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts));
+ if (!ct)
+ return 0;
+ bpf_ct_change_timeout(ct, 0);
+ return 0;
+}
+
+SEC("?tc")
+int change_status_after_alloc(struct __sk_buff *ctx)
+{
+ struct bpf_ct_opts___local opts = {};
+ struct bpf_sock_tuple tup = {};
+ struct nf_conn *ct;
+
+ ct = bpf_skb_ct_alloc(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts));
+ if (!ct)
+ return 0;
+ bpf_ct_change_status(ct, 0);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_btf_haskv.c b/tools/testing/selftests/bpf/progs/test_btf_haskv.c
deleted file mode 100644
index 07c94df13660..000000000000
--- a/tools/testing/selftests/bpf/progs/test_btf_haskv.c
+++ /dev/null
@@ -1,51 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (c) 2018 Facebook */
-#include <linux/bpf.h>
-#include <bpf/bpf_helpers.h>
-#include "bpf_legacy.h"
-
-struct ipv_counts {
- unsigned int v4;
- unsigned int v6;
-};
-
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-struct bpf_map_def SEC("maps") btf_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(struct ipv_counts),
- .max_entries = 4,
-};
-#pragma GCC diagnostic pop
-
-BPF_ANNOTATE_KV_PAIR(btf_map, int, struct ipv_counts);
-
-__attribute__((noinline))
-int test_long_fname_2(void)
-{
- struct ipv_counts *counts;
- int key = 0;
-
- counts = bpf_map_lookup_elem(&btf_map, &key);
- if (!counts)
- return 0;
-
- counts->v6++;
-
- return 0;
-}
-
-__attribute__((noinline))
-int test_long_fname_1(void)
-{
- return test_long_fname_2();
-}
-
-SEC("dummy_tracepoint")
-int _dummy_tracepoint(void *arg)
-{
- return test_long_fname_1();
-}
-
-char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_btf_newkv.c b/tools/testing/selftests/bpf/progs/test_btf_newkv.c
index 762671a2e90c..251854a041b5 100644
--- a/tools/testing/selftests/bpf/progs/test_btf_newkv.c
+++ b/tools/testing/selftests/bpf/progs/test_btf_newkv.c
@@ -9,19 +9,6 @@ struct ipv_counts {
unsigned int v6;
};
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-/* just to validate we can handle maps in multiple sections */
-struct bpf_map_def SEC("maps") btf_map_legacy = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(long long),
- .max_entries = 4,
-};
-#pragma GCC diagnostic pop
-
-BPF_ANNOTATE_KV_PAIR(btf_map_legacy, int, struct ipv_counts);
-
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__uint(max_entries, 4);
@@ -41,11 +28,6 @@ int test_long_fname_2(void)
counts->v6++;
- /* just verify we can reference both maps */
- counts = bpf_map_lookup_elem(&btf_map_legacy, &key);
- if (!counts)
- return 0;
-
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_core_extern.c b/tools/testing/selftests/bpf/progs/test_core_extern.c
index 3ac3603ad53d..a3c7c1042f35 100644
--- a/tools/testing/selftests/bpf/progs/test_core_extern.c
+++ b/tools/testing/selftests/bpf/progs/test_core_extern.c
@@ -11,6 +11,7 @@
static int (*bpf_missing_helper)(const void *arg1, int arg2) = (void *) 999;
extern int LINUX_KERNEL_VERSION __kconfig;
+extern int LINUX_UNKNOWN_VIRTUAL_EXTERN __kconfig __weak;
extern bool CONFIG_BPF_SYSCALL __kconfig; /* strong */
extern enum libbpf_tristate CONFIG_TRISTATE __kconfig __weak;
extern bool CONFIG_BOOL __kconfig __weak;
@@ -22,6 +23,7 @@ extern const char CONFIG_STR[8] __kconfig __weak;
extern uint64_t CONFIG_MISSING __kconfig __weak;
uint64_t kern_ver = -1;
+uint64_t unkn_virt_val = -1;
uint64_t bpf_syscall = -1;
uint64_t tristate_val = -1;
uint64_t bool_val = -1;
@@ -38,6 +40,7 @@ int handle_sys_enter(struct pt_regs *ctx)
int i;
kern_ver = LINUX_KERNEL_VERSION;
+ unkn_virt_val = LINUX_UNKNOWN_VIRTUAL_EXTERN;
bpf_syscall = CONFIG_BPF_SYSCALL;
tristate_val = CONFIG_TRISTATE;
bool_val = CONFIG_BOOL;
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_enum64val.c b/tools/testing/selftests/bpf/progs/test_core_reloc_enum64val.c
new file mode 100644
index 000000000000..63147fbfae6e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_enum64val.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ char in[256];
+ char out[256];
+ bool skip;
+} data = {};
+
+enum named_unsigned_enum64 {
+ UNSIGNED_ENUM64_VAL1 = 0x1ffffffffULL,
+ UNSIGNED_ENUM64_VAL2 = 0x2ffffffffULL,
+ UNSIGNED_ENUM64_VAL3 = 0x3ffffffffULL,
+};
+
+enum named_signed_enum64 {
+ SIGNED_ENUM64_VAL1 = 0x1ffffffffLL,
+ SIGNED_ENUM64_VAL2 = -2,
+ SIGNED_ENUM64_VAL3 = 0x3ffffffffLL,
+};
+
+struct core_reloc_enum64val_output {
+ bool unsigned_val1_exists;
+ bool unsigned_val2_exists;
+ bool unsigned_val3_exists;
+ bool signed_val1_exists;
+ bool signed_val2_exists;
+ bool signed_val3_exists;
+
+ long unsigned_val1;
+ long unsigned_val2;
+ long signed_val1;
+ long signed_val2;
+};
+
+SEC("raw_tracepoint/sys_enter")
+int test_core_enum64val(void *ctx)
+{
+#if __clang_major__ >= 15
+ struct core_reloc_enum64val_output *out = (void *)&data.out;
+ enum named_unsigned_enum64 named_unsigned = 0;
+ enum named_signed_enum64 named_signed = 0;
+
+ out->unsigned_val1_exists = bpf_core_enum_value_exists(named_unsigned, UNSIGNED_ENUM64_VAL1);
+ out->unsigned_val2_exists = bpf_core_enum_value_exists(enum named_unsigned_enum64, UNSIGNED_ENUM64_VAL2);
+ out->unsigned_val3_exists = bpf_core_enum_value_exists(enum named_unsigned_enum64, UNSIGNED_ENUM64_VAL3);
+ out->signed_val1_exists = bpf_core_enum_value_exists(named_signed, SIGNED_ENUM64_VAL1);
+ out->signed_val2_exists = bpf_core_enum_value_exists(enum named_signed_enum64, SIGNED_ENUM64_VAL2);
+ out->signed_val3_exists = bpf_core_enum_value_exists(enum named_signed_enum64, SIGNED_ENUM64_VAL3);
+
+ out->unsigned_val1 = bpf_core_enum_value(named_unsigned, UNSIGNED_ENUM64_VAL1);
+ out->unsigned_val2 = bpf_core_enum_value(named_unsigned, UNSIGNED_ENUM64_VAL2);
+ out->signed_val1 = bpf_core_enum_value(named_signed, SIGNED_ENUM64_VAL1);
+ out->signed_val2 = bpf_core_enum_value(named_signed, SIGNED_ENUM64_VAL2);
+ /* NAMED_ENUM64_VAL3 value is optional */
+
+#else
+ data.skip = true;
+#endif
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c b/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c
index 145028b52ad8..a17dd83eae67 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c
@@ -21,6 +21,7 @@ struct core_reloc_kernel_output {
/* we have test_progs[-flavor], so cut flavor part */
char comm[sizeof("test_progs")];
int comm_len;
+ bool local_task_struct_matches;
};
struct task_struct {
@@ -30,11 +31,25 @@ struct task_struct {
struct task_struct *group_leader;
};
+struct mm_struct___wrong {
+ int abc_whatever_should_not_exist;
+};
+
+struct task_struct___local {
+ int pid;
+ struct mm_struct___wrong *mm;
+};
+
#define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src)
SEC("raw_tracepoint/sys_enter")
int test_core_kernel(void *ctx)
{
+ /* Support for the BPF_TYPE_MATCHES argument to the
+ * __builtin_preserve_type_info builtin was added at some point during
+ * development of clang 15 and it's what we require for this test.
+ */
+#if __has_builtin(__builtin_preserve_type_info) && __clang_major__ >= 15
struct task_struct *task = (void *)bpf_get_current_task();
struct core_reloc_kernel_output *out = (void *)&data.out;
uint64_t pid_tgid = bpf_get_current_pid_tgid();
@@ -93,6 +108,10 @@ int test_core_kernel(void *ctx)
group_leader, group_leader, group_leader, group_leader,
comm);
+ out->local_task_struct_matches = bpf_core_type_matches(struct task_struct___local);
+#else
+ data.skip = true;
+#endif
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c b/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c
index fb60f8195c53..2edb4df35e6e 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c
@@ -19,6 +19,14 @@ struct a_struct {
int x;
};
+struct a_complex_struct {
+ union {
+ struct a_struct *a;
+ void *b;
+ } x;
+ volatile long y;
+};
+
union a_union {
int y;
int z;
@@ -43,6 +51,7 @@ typedef int int_typedef;
typedef enum { TYPEDEF_ENUM_VAL1, TYPEDEF_ENUM_VAL2 } enum_typedef;
typedef void *void_ptr_typedef;
+typedef int *restrict restrict_ptr_typedef;
typedef int (*func_proto_typedef)(long);
@@ -50,6 +59,7 @@ typedef char arr_typedef[20];
struct core_reloc_type_based_output {
bool struct_exists;
+ bool complex_struct_exists;
bool union_exists;
bool enum_exists;
bool typedef_named_struct_exists;
@@ -58,9 +68,24 @@ struct core_reloc_type_based_output {
bool typedef_int_exists;
bool typedef_enum_exists;
bool typedef_void_ptr_exists;
+ bool typedef_restrict_ptr_exists;
bool typedef_func_proto_exists;
bool typedef_arr_exists;
+ bool struct_matches;
+ bool complex_struct_matches;
+ bool union_matches;
+ bool enum_matches;
+ bool typedef_named_struct_matches;
+ bool typedef_anon_struct_matches;
+ bool typedef_struct_ptr_matches;
+ bool typedef_int_matches;
+ bool typedef_enum_matches;
+ bool typedef_void_ptr_matches;
+ bool typedef_restrict_ptr_matches;
+ bool typedef_func_proto_matches;
+ bool typedef_arr_matches;
+
int struct_sz;
int union_sz;
int enum_sz;
@@ -77,10 +102,17 @@ struct core_reloc_type_based_output {
SEC("raw_tracepoint/sys_enter")
int test_core_type_based(void *ctx)
{
-#if __has_builtin(__builtin_preserve_type_info)
+ /* Support for the BPF_TYPE_MATCHES argument to the
+ * __builtin_preserve_type_info builtin was added at some point during
+ * development of clang 15 and it's what we require for this test. Part of it
+ * could run with merely __builtin_preserve_type_info (which could be checked
+ * separately), but we have to find an upper bound.
+ */
+#if __has_builtin(__builtin_preserve_type_info) && __clang_major__ >= 15
struct core_reloc_type_based_output *out = (void *)&data.out;
out->struct_exists = bpf_core_type_exists(struct a_struct);
+ out->complex_struct_exists = bpf_core_type_exists(struct a_complex_struct);
out->union_exists = bpf_core_type_exists(union a_union);
out->enum_exists = bpf_core_type_exists(enum an_enum);
out->typedef_named_struct_exists = bpf_core_type_exists(named_struct_typedef);
@@ -89,9 +121,24 @@ int test_core_type_based(void *ctx)
out->typedef_int_exists = bpf_core_type_exists(int_typedef);
out->typedef_enum_exists = bpf_core_type_exists(enum_typedef);
out->typedef_void_ptr_exists = bpf_core_type_exists(void_ptr_typedef);
+ out->typedef_restrict_ptr_exists = bpf_core_type_exists(restrict_ptr_typedef);
out->typedef_func_proto_exists = bpf_core_type_exists(func_proto_typedef);
out->typedef_arr_exists = bpf_core_type_exists(arr_typedef);
+ out->struct_matches = bpf_core_type_matches(struct a_struct);
+ out->complex_struct_matches = bpf_core_type_matches(struct a_complex_struct);
+ out->union_matches = bpf_core_type_matches(union a_union);
+ out->enum_matches = bpf_core_type_matches(enum an_enum);
+ out->typedef_named_struct_matches = bpf_core_type_matches(named_struct_typedef);
+ out->typedef_anon_struct_matches = bpf_core_type_matches(anon_struct_typedef);
+ out->typedef_struct_ptr_matches = bpf_core_type_matches(struct_ptr_typedef);
+ out->typedef_int_matches = bpf_core_type_matches(int_typedef);
+ out->typedef_enum_matches = bpf_core_type_matches(enum_typedef);
+ out->typedef_void_ptr_matches = bpf_core_type_matches(void_ptr_typedef);
+ out->typedef_restrict_ptr_matches = bpf_core_type_matches(restrict_ptr_typedef);
+ out->typedef_func_proto_matches = bpf_core_type_matches(func_proto_typedef);
+ out->typedef_arr_matches = bpf_core_type_matches(arr_typedef);
+
out->struct_sz = bpf_core_type_size(struct a_struct);
out->union_sz = bpf_core_type_size(union a_union);
out->enum_sz = bpf_core_type_size(enum an_enum);
diff --git a/tools/testing/selftests/bpf/progs/test_probe_user.c b/tools/testing/selftests/bpf/progs/test_probe_user.c
index 702578a5e496..a8e501af9604 100644
--- a/tools/testing/selftests/bpf/progs/test_probe_user.c
+++ b/tools/testing/selftests/bpf/progs/test_probe_user.c
@@ -1,37 +1,47 @@
// SPDX-License-Identifier: GPL-2.0
-
-#include <linux/ptrace.h>
-#include <linux/bpf.h>
-
-#include <netinet/in.h>
-
+#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
#include "bpf_misc.h"
static struct sockaddr_in old;
-SEC("kprobe/" SYS_PREFIX "sys_connect")
-int BPF_KPROBE(handle_sys_connect)
+static int handle_sys_connect_common(struct sockaddr_in *uservaddr)
{
-#if SYSCALL_WRAPPER == 1
- struct pt_regs *real_regs;
-#endif
struct sockaddr_in new;
- void *ptr;
-#if SYSCALL_WRAPPER == 0
- ptr = (void *)PT_REGS_PARM2(ctx);
-#else
- real_regs = (struct pt_regs *)PT_REGS_PARM1(ctx);
- bpf_probe_read_kernel(&ptr, sizeof(ptr), &PT_REGS_PARM2(real_regs));
+ bpf_probe_read_user(&old, sizeof(old), uservaddr);
+ __builtin_memset(&new, 0xab, sizeof(new));
+ bpf_probe_write_user(uservaddr, &new, sizeof(new));
+
+ return 0;
+}
+
+SEC("ksyscall/connect")
+int BPF_KSYSCALL(handle_sys_connect, int fd, struct sockaddr_in *uservaddr,
+ int addrlen)
+{
+ return handle_sys_connect_common(uservaddr);
+}
+
+#if defined(bpf_target_s390)
+#ifndef SYS_CONNECT
+#define SYS_CONNECT 3
#endif
- bpf_probe_read_user(&old, sizeof(old), ptr);
- __builtin_memset(&new, 0xab, sizeof(new));
- bpf_probe_write_user(ptr, &new, sizeof(new));
+SEC("ksyscall/socketcall")
+int BPF_KSYSCALL(handle_sys_socketcall, int call, unsigned long *args)
+{
+ if (call == SYS_CONNECT) {
+ struct sockaddr_in *uservaddr;
+
+ bpf_probe_read_user(&uservaddr, sizeof(uservaddr), &args[1]);
+ return handle_sys_connect_common(uservaddr);
+ }
return 0;
}
+#endif
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_skeleton.c b/tools/testing/selftests/bpf/progs/test_skeleton.c
index 1b1187d2967b..1a4e93f6d9df 100644
--- a/tools/testing/selftests/bpf/progs/test_skeleton.c
+++ b/tools/testing/selftests/bpf/progs/test_skeleton.c
@@ -51,6 +51,8 @@ int out_dynarr[4] SEC(".data.dyn") = { 1, 2, 3, 4 };
int read_mostly_var __read_mostly;
int out_mostly_var;
+char huge_arr[16 * 1024 * 1024];
+
SEC("raw_tp/sys_enter")
int handler(const void *ctx)
{
@@ -71,6 +73,8 @@ int handler(const void *ctx)
out_mostly_var = read_mostly_var;
+ huge_arr[sizeof(huge_arr) - 1] = 123;
+
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_tc_dtime.c b/tools/testing/selftests/bpf/progs/test_tc_dtime.c
index 06f300d06dbd..b596479a9ebe 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_dtime.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_dtime.c
@@ -11,6 +11,8 @@
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
#include <sys/socket.h>
@@ -115,6 +117,19 @@ static bool bpf_fwd(void)
return test < TCP_IP4_RT_FWD;
}
+static __u8 get_proto(void)
+{
+ switch (test) {
+ case UDP_IP4:
+ case UDP_IP6:
+ case UDP_IP4_RT_FWD:
+ case UDP_IP6_RT_FWD:
+ return IPPROTO_UDP;
+ default:
+ return IPPROTO_TCP;
+ }
+}
+
/* -1: parse error: TC_ACT_SHOT
* 0: not testing traffic: TC_ACT_OK
* >0: first byte is the inet_proto, second byte has the netns
@@ -122,11 +137,16 @@ static bool bpf_fwd(void)
*/
static int skb_get_type(struct __sk_buff *skb)
{
+ __u16 dst_ns_port = __bpf_htons(50000 + test);
void *data_end = ctx_ptr(skb->data_end);
void *data = ctx_ptr(skb->data);
__u8 inet_proto = 0, ns = 0;
struct ipv6hdr *ip6h;
+ __u16 sport, dport;
struct iphdr *iph;
+ struct tcphdr *th;
+ struct udphdr *uh;
+ void *trans;
switch (skb->protocol) {
case __bpf_htons(ETH_P_IP):
@@ -138,6 +158,7 @@ static int skb_get_type(struct __sk_buff *skb)
else if (iph->saddr == ip4_dst)
ns = DST_NS;
inet_proto = iph->protocol;
+ trans = iph + 1;
break;
case __bpf_htons(ETH_P_IPV6):
ip6h = data + sizeof(struct ethhdr);
@@ -148,15 +169,43 @@ static int skb_get_type(struct __sk_buff *skb)
else if (v6_equal(ip6h->saddr, (struct in6_addr)ip6_dst))
ns = DST_NS;
inet_proto = ip6h->nexthdr;
+ trans = ip6h + 1;
break;
default:
return 0;
}
- if ((inet_proto != IPPROTO_TCP && inet_proto != IPPROTO_UDP) || !ns)
+ /* skb is not from src_ns or dst_ns.
+ * skb is not the testing IPPROTO.
+ */
+ if (!ns || inet_proto != get_proto())
return 0;
- return (ns << 8 | inet_proto);
+ switch (inet_proto) {
+ case IPPROTO_TCP:
+ th = trans;
+ if (th + 1 > data_end)
+ return -1;
+ sport = th->source;
+ dport = th->dest;
+ break;
+ case IPPROTO_UDP:
+ uh = trans;
+ if (uh + 1 > data_end)
+ return -1;
+ sport = uh->source;
+ dport = uh->dest;
+ break;
+ default:
+ return 0;
+ }
+
+ /* The skb is the testing traffic */
+ if ((ns == SRC_NS && dport == dst_ns_port) ||
+ (ns == DST_NS && sport == dst_ns_port))
+ return (ns << 8 | inet_proto);
+
+ return 0;
}
/* format: direction@iface@netns
diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
index 17f2f325b3f3..df0673c4ecbe 100644
--- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
@@ -14,15 +14,24 @@
#include <linux/if_packet.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
+#include <linux/icmp.h>
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/pkt_cls.h>
#include <linux/erspan.h>
+#include <linux/udp.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
#define log_err(__ret) bpf_printk("ERROR line:%d ret:%d\n", __LINE__, __ret)
+#define VXLAN_UDP_PORT 4789
+
+/* Only IPv4 address assigned to veth1.
+ * 172.16.1.200
+ */
+#define ASSIGNED_ADDR_VETH1 0xac1001c8
+
struct geneve_opt {
__be16 opt_class;
__u8 type;
@@ -33,6 +42,11 @@ struct geneve_opt {
__u8 opt_data[8]; /* hard-coded to 8 byte */
};
+struct vxlanhdr {
+ __be32 vx_flags;
+ __be32 vx_vni;
+} __attribute__((packed));
+
struct vxlan_metadata {
__u32 gbp;
};
@@ -369,14 +383,8 @@ int vxlan_get_tunnel_src(struct __sk_buff *skb)
int ret;
struct bpf_tunnel_key key;
struct vxlan_metadata md;
+ __u32 orig_daddr;
__u32 index = 0;
- __u32 *local_ip = NULL;
-
- local_ip = bpf_map_lookup_elem(&local_ip_map, &index);
- if (!local_ip) {
- log_err(ret);
- return TC_ACT_SHOT;
- }
ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
if (ret < 0) {
@@ -390,11 +398,10 @@ int vxlan_get_tunnel_src(struct __sk_buff *skb)
return TC_ACT_SHOT;
}
- if (key.local_ipv4 != *local_ip || md.gbp != 0x800FF) {
+ if (key.local_ipv4 != ASSIGNED_ADDR_VETH1 || md.gbp != 0x800FF) {
bpf_printk("vxlan key %d local ip 0x%x remote ip 0x%x gbp 0x%x\n",
key.tunnel_id, key.local_ipv4,
key.remote_ipv4, md.gbp);
- bpf_printk("local_ip 0x%x\n", *local_ip);
log_err(ret);
return TC_ACT_SHOT;
}
@@ -403,6 +410,61 @@ int vxlan_get_tunnel_src(struct __sk_buff *skb)
}
SEC("tc")
+int veth_set_outer_dst(struct __sk_buff *skb)
+{
+ struct ethhdr *eth = (struct ethhdr *)(long)skb->data;
+ __u32 assigned_ip = bpf_htonl(ASSIGNED_ADDR_VETH1);
+ void *data_end = (void *)(long)skb->data_end;
+ struct udphdr *udph;
+ struct iphdr *iph;
+ __u32 index = 0;
+ int ret = 0;
+ int shrink;
+ __s64 csum;
+
+ if ((void *)eth + sizeof(*eth) > data_end) {
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
+
+ if (eth->h_proto != bpf_htons(ETH_P_IP))
+ return TC_ACT_OK;
+
+ iph = (struct iphdr *)(eth + 1);
+ if ((void *)iph + sizeof(*iph) > data_end) {
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
+ if (iph->protocol != IPPROTO_UDP)
+ return TC_ACT_OK;
+
+ udph = (struct udphdr *)(iph + 1);
+ if ((void *)udph + sizeof(*udph) > data_end) {
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
+ if (udph->dest != bpf_htons(VXLAN_UDP_PORT))
+ return TC_ACT_OK;
+
+ if (iph->daddr != assigned_ip) {
+ csum = bpf_csum_diff(&iph->daddr, sizeof(__u32), &assigned_ip,
+ sizeof(__u32), 0);
+ if (bpf_skb_store_bytes(skb, ETH_HLEN + offsetof(struct iphdr, daddr),
+ &assigned_ip, sizeof(__u32), 0) < 0) {
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
+ if (bpf_l3_csum_replace(skb, ETH_HLEN + offsetof(struct iphdr, check),
+ 0, csum, 0) < 0) {
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
+ bpf_skb_change_type(skb, PACKET_HOST);
+ }
+ return TC_ACT_OK;
+}
+
+SEC("tc")
int ip6vxlan_set_tunnel_dst(struct __sk_buff *skb)
{
struct bpf_tunnel_key key;
diff --git a/tools/testing/selftests/bpf/progs/test_varlen.c b/tools/testing/selftests/bpf/progs/test_varlen.c
index 913acdffd90f..3987ff174f1f 100644
--- a/tools/testing/selftests/bpf/progs/test_varlen.c
+++ b/tools/testing/selftests/bpf/progs/test_varlen.c
@@ -41,20 +41,20 @@ int handler64_unsigned(void *regs)
{
int pid = bpf_get_current_pid_tgid() >> 32;
void *payload = payload1;
- u64 len;
+ long len;
/* ignore irrelevant invocations */
if (test_pid != pid || !capture)
return 0;
len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]);
- if (len <= MAX_LEN) {
+ if (len >= 0) {
payload += len;
payload1_len1 = len;
}
len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]);
- if (len <= MAX_LEN) {
+ if (len >= 0) {
payload += len;
payload1_len2 = len;
}
@@ -123,7 +123,7 @@ int handler32_signed(void *regs)
{
int pid = bpf_get_current_pid_tgid() >> 32;
void *payload = payload4;
- int len;
+ long len;
/* ignore irrelevant invocations */
if (test_pid != pid || !capture)
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
index 125d872d7981..ba48fcb98ab2 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
@@ -239,7 +239,7 @@ bool parse_udp(void *data, void *data_end,
udp = data + off;
if (udp + 1 > data_end)
- return 0;
+ return false;
if (!is_icmp) {
pckt->flow.port16[0] = udp->source;
pckt->flow.port16[1] = udp->dest;
@@ -247,7 +247,7 @@ bool parse_udp(void *data, void *data_end,
pckt->flow.port16[0] = udp->dest;
pckt->flow.port16[1] = udp->source;
}
- return 1;
+ return true;
}
static __attribute__ ((noinline))
@@ -261,7 +261,7 @@ bool parse_tcp(void *data, void *data_end,
tcp = data + off;
if (tcp + 1 > data_end)
- return 0;
+ return false;
if (tcp->syn)
pckt->flags |= (1 << 1);
if (!is_icmp) {
@@ -271,7 +271,7 @@ bool parse_tcp(void *data, void *data_end,
pckt->flow.port16[0] = tcp->dest;
pckt->flow.port16[1] = tcp->source;
}
- return 1;
+ return true;
}
static __attribute__ ((noinline))
@@ -287,7 +287,7 @@ bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval,
void *data;
if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr)))
- return 0;
+ return false;
data = (void *)(long)xdp->data;
data_end = (void *)(long)xdp->data_end;
new_eth = data;
@@ -295,7 +295,7 @@ bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval,
old_eth = data + sizeof(struct ipv6hdr);
if (new_eth + 1 > data_end ||
old_eth + 1 > data_end || ip6h + 1 > data_end)
- return 0;
+ return false;
memcpy(new_eth->eth_dest, cval->mac, 6);
memcpy(new_eth->eth_source, old_eth->eth_dest, 6);
new_eth->eth_proto = 56710;
@@ -314,7 +314,7 @@ bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval,
ip6h->saddr.in6_u.u6_addr32[2] = 3;
ip6h->saddr.in6_u.u6_addr32[3] = ip_suffix;
memcpy(ip6h->daddr.in6_u.u6_addr32, dst->dstv6, 16);
- return 1;
+ return true;
}
static __attribute__ ((noinline))
@@ -335,7 +335,7 @@ bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval,
ip_suffix <<= 15;
ip_suffix ^= pckt->flow.src;
if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr)))
- return 0;
+ return false;
data = (void *)(long)xdp->data;
data_end = (void *)(long)xdp->data_end;
new_eth = data;
@@ -343,7 +343,7 @@ bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval,
old_eth = data + sizeof(struct iphdr);
if (new_eth + 1 > data_end ||
old_eth + 1 > data_end || iph + 1 > data_end)
- return 0;
+ return false;
memcpy(new_eth->eth_dest, cval->mac, 6);
memcpy(new_eth->eth_source, old_eth->eth_dest, 6);
new_eth->eth_proto = 8;
@@ -367,8 +367,8 @@ bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval,
csum += *next_iph_u16++;
iph->check = ~((csum & 0xffff) + (csum >> 16));
if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr)))
- return 0;
- return 1;
+ return false;
+ return true;
}
static __attribute__ ((noinline))
@@ -386,10 +386,10 @@ bool decap_v6(struct xdp_md *xdp, void **data, void **data_end, bool inner_v4)
else
new_eth->eth_proto = 56710;
if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct ipv6hdr)))
- return 0;
+ return false;
*data = (void *)(long)xdp->data;
*data_end = (void *)(long)xdp->data_end;
- return 1;
+ return true;
}
static __attribute__ ((noinline))
@@ -404,10 +404,10 @@ bool decap_v4(struct xdp_md *xdp, void **data, void **data_end)
memcpy(new_eth->eth_dest, old_eth->eth_dest, 6);
new_eth->eth_proto = 8;
if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr)))
- return 0;
+ return false;
*data = (void *)(long)xdp->data;
*data_end = (void *)(long)xdp->data_end;
- return 1;
+ return true;
}
static __attribute__ ((noinline))
diff --git a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
new file mode 100644
index 000000000000..736686e903f6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
@@ -0,0 +1,843 @@
+// SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include <asm/errno.h>
+
+#define TC_ACT_OK 0
+#define TC_ACT_SHOT 2
+
+#define NSEC_PER_SEC 1000000000L
+
+#define ETH_ALEN 6
+#define ETH_P_IP 0x0800
+#define ETH_P_IPV6 0x86DD
+
+#define tcp_flag_word(tp) (((union tcp_word_hdr *)(tp))->words[3])
+
+#define IP_DF 0x4000
+#define IP_MF 0x2000
+#define IP_OFFSET 0x1fff
+
+#define NEXTHDR_TCP 6
+
+#define TCPOPT_NOP 1
+#define TCPOPT_EOL 0
+#define TCPOPT_MSS 2
+#define TCPOPT_WINDOW 3
+#define TCPOPT_SACK_PERM 4
+#define TCPOPT_TIMESTAMP 8
+
+#define TCPOLEN_MSS 4
+#define TCPOLEN_WINDOW 3
+#define TCPOLEN_SACK_PERM 2
+#define TCPOLEN_TIMESTAMP 10
+
+#define TCP_TS_HZ 1000
+#define TS_OPT_WSCALE_MASK 0xf
+#define TS_OPT_SACK (1 << 4)
+#define TS_OPT_ECN (1 << 5)
+#define TSBITS 6
+#define TSMASK (((__u32)1 << TSBITS) - 1)
+#define TCP_MAX_WSCALE 14U
+
+#define IPV4_MAXLEN 60
+#define TCP_MAXLEN 60
+
+#define DEFAULT_MSS4 1460
+#define DEFAULT_MSS6 1440
+#define DEFAULT_WSCALE 7
+#define DEFAULT_TTL 64
+#define MAX_ALLOWED_PORTS 8
+
+#define swap(a, b) \
+ do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
+
+#define __get_unaligned_t(type, ptr) ({ \
+ const struct { type x; } __attribute__((__packed__)) *__pptr = (typeof(__pptr))(ptr); \
+ __pptr->x; \
+})
+
+#define get_unaligned(ptr) __get_unaligned_t(typeof(*(ptr)), (ptr))
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, __u64);
+ __uint(max_entries, 2);
+} values SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, __u16);
+ __uint(max_entries, MAX_ALLOWED_PORTS);
+} allowed_ports SEC(".maps");
+
+/* Some symbols defined in net/netfilter/nf_conntrack_bpf.c are unavailable in
+ * vmlinux.h if CONFIG_NF_CONNTRACK=m, so they are redefined locally.
+ */
+
+struct bpf_ct_opts___local {
+ s32 netns_id;
+ s32 error;
+ u8 l4proto;
+ u8 dir;
+ u8 reserved[2];
+} __attribute__((preserve_access_index));
+
+#define BPF_F_CURRENT_NETNS (-1)
+
+extern struct nf_conn *bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx,
+ struct bpf_sock_tuple *bpf_tuple,
+ __u32 len_tuple,
+ struct bpf_ct_opts___local *opts,
+ __u32 len_opts) __ksym;
+
+extern struct nf_conn *bpf_skb_ct_lookup(struct __sk_buff *skb_ctx,
+ struct bpf_sock_tuple *bpf_tuple,
+ u32 len_tuple,
+ struct bpf_ct_opts___local *opts,
+ u32 len_opts) __ksym;
+
+extern void bpf_ct_release(struct nf_conn *ct) __ksym;
+
+static __always_inline void swap_eth_addr(__u8 *a, __u8 *b)
+{
+ __u8 tmp[ETH_ALEN];
+
+ __builtin_memcpy(tmp, a, ETH_ALEN);
+ __builtin_memcpy(a, b, ETH_ALEN);
+ __builtin_memcpy(b, tmp, ETH_ALEN);
+}
+
+static __always_inline __u16 csum_fold(__u32 csum)
+{
+ csum = (csum & 0xffff) + (csum >> 16);
+ csum = (csum & 0xffff) + (csum >> 16);
+ return (__u16)~csum;
+}
+
+static __always_inline __u16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
+ __u32 len, __u8 proto,
+ __u32 csum)
+{
+ __u64 s = csum;
+
+ s += (__u32)saddr;
+ s += (__u32)daddr;
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ s += proto + len;
+#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ s += (proto + len) << 8;
+#else
+#error Unknown endian
+#endif
+ s = (s & 0xffffffff) + (s >> 32);
+ s = (s & 0xffffffff) + (s >> 32);
+
+ return csum_fold((__u32)s);
+}
+
+static __always_inline __u16 csum_ipv6_magic(const struct in6_addr *saddr,
+ const struct in6_addr *daddr,
+ __u32 len, __u8 proto, __u32 csum)
+{
+ __u64 sum = csum;
+ int i;
+
+#pragma unroll
+ for (i = 0; i < 4; i++)
+ sum += (__u32)saddr->in6_u.u6_addr32[i];
+
+#pragma unroll
+ for (i = 0; i < 4; i++)
+ sum += (__u32)daddr->in6_u.u6_addr32[i];
+
+ /* Don't combine additions to avoid 32-bit overflow. */
+ sum += bpf_htonl(len);
+ sum += bpf_htonl(proto);
+
+ sum = (sum & 0xffffffff) + (sum >> 32);
+ sum = (sum & 0xffffffff) + (sum >> 32);
+
+ return csum_fold((__u32)sum);
+}
+
+static __always_inline __u64 tcp_clock_ns(void)
+{
+ return bpf_ktime_get_ns();
+}
+
+static __always_inline __u32 tcp_ns_to_ts(__u64 ns)
+{
+ return ns / (NSEC_PER_SEC / TCP_TS_HZ);
+}
+
+static __always_inline __u32 tcp_time_stamp_raw(void)
+{
+ return tcp_ns_to_ts(tcp_clock_ns());
+}
+
+struct tcpopt_context {
+ __u8 *ptr;
+ __u8 *end;
+ void *data_end;
+ __be32 *tsecr;
+ __u8 wscale;
+ bool option_timestamp;
+ bool option_sack;
+};
+
+static int tscookie_tcpopt_parse(struct tcpopt_context *ctx)
+{
+ __u8 opcode, opsize;
+
+ if (ctx->ptr >= ctx->end)
+ return 1;
+ if (ctx->ptr >= ctx->data_end)
+ return 1;
+
+ opcode = ctx->ptr[0];
+
+ if (opcode == TCPOPT_EOL)
+ return 1;
+ if (opcode == TCPOPT_NOP) {
+ ++ctx->ptr;
+ return 0;
+ }
+
+ if (ctx->ptr + 1 >= ctx->end)
+ return 1;
+ if (ctx->ptr + 1 >= ctx->data_end)
+ return 1;
+ opsize = ctx->ptr[1];
+ if (opsize < 2)
+ return 1;
+
+ if (ctx->ptr + opsize > ctx->end)
+ return 1;
+
+ switch (opcode) {
+ case TCPOPT_WINDOW:
+ if (opsize == TCPOLEN_WINDOW && ctx->ptr + TCPOLEN_WINDOW <= ctx->data_end)
+ ctx->wscale = ctx->ptr[2] < TCP_MAX_WSCALE ? ctx->ptr[2] : TCP_MAX_WSCALE;
+ break;
+ case TCPOPT_TIMESTAMP:
+ if (opsize == TCPOLEN_TIMESTAMP && ctx->ptr + TCPOLEN_TIMESTAMP <= ctx->data_end) {
+ ctx->option_timestamp = true;
+ /* Client's tsval becomes our tsecr. */
+ *ctx->tsecr = get_unaligned((__be32 *)(ctx->ptr + 2));
+ }
+ break;
+ case TCPOPT_SACK_PERM:
+ if (opsize == TCPOLEN_SACK_PERM)
+ ctx->option_sack = true;
+ break;
+ }
+
+ ctx->ptr += opsize;
+
+ return 0;
+}
+
+static int tscookie_tcpopt_parse_batch(__u32 index, void *context)
+{
+ int i;
+
+ for (i = 0; i < 7; i++)
+ if (tscookie_tcpopt_parse(context))
+ return 1;
+ return 0;
+}
+
+static __always_inline bool tscookie_init(struct tcphdr *tcp_header,
+ __u16 tcp_len, __be32 *tsval,
+ __be32 *tsecr, void *data_end)
+{
+ struct tcpopt_context loop_ctx = {
+ .ptr = (__u8 *)(tcp_header + 1),
+ .end = (__u8 *)tcp_header + tcp_len,
+ .data_end = data_end,
+ .tsecr = tsecr,
+ .wscale = TS_OPT_WSCALE_MASK,
+ .option_timestamp = false,
+ .option_sack = false,
+ };
+ u32 cookie;
+
+ bpf_loop(6, tscookie_tcpopt_parse_batch, &loop_ctx, 0);
+
+ if (!loop_ctx.option_timestamp)
+ return false;
+
+ cookie = tcp_time_stamp_raw() & ~TSMASK;
+ cookie |= loop_ctx.wscale & TS_OPT_WSCALE_MASK;
+ if (loop_ctx.option_sack)
+ cookie |= TS_OPT_SACK;
+ if (tcp_header->ece && tcp_header->cwr)
+ cookie |= TS_OPT_ECN;
+ *tsval = bpf_htonl(cookie);
+
+ return true;
+}
+
+static __always_inline void values_get_tcpipopts(__u16 *mss, __u8 *wscale,
+ __u8 *ttl, bool ipv6)
+{
+ __u32 key = 0;
+ __u64 *value;
+
+ value = bpf_map_lookup_elem(&values, &key);
+ if (value && *value != 0) {
+ if (ipv6)
+ *mss = (*value >> 32) & 0xffff;
+ else
+ *mss = *value & 0xffff;
+ *wscale = (*value >> 16) & 0xf;
+ *ttl = (*value >> 24) & 0xff;
+ return;
+ }
+
+ *mss = ipv6 ? DEFAULT_MSS6 : DEFAULT_MSS4;
+ *wscale = DEFAULT_WSCALE;
+ *ttl = DEFAULT_TTL;
+}
+
+static __always_inline void values_inc_synacks(void)
+{
+ __u32 key = 1;
+ __u32 *value;
+
+ value = bpf_map_lookup_elem(&values, &key);
+ if (value)
+ __sync_fetch_and_add(value, 1);
+}
+
+static __always_inline bool check_port_allowed(__u16 port)
+{
+ __u32 i;
+
+ for (i = 0; i < MAX_ALLOWED_PORTS; i++) {
+ __u32 key = i;
+ __u16 *value;
+
+ value = bpf_map_lookup_elem(&allowed_ports, &key);
+
+ if (!value)
+ break;
+ /* 0 is a terminator value. Check it first to avoid matching on
+ * a forbidden port == 0 and returning true.
+ */
+ if (*value == 0)
+ break;
+
+ if (*value == port)
+ return true;
+ }
+
+ return false;
+}
+
+struct header_pointers {
+ struct ethhdr *eth;
+ struct iphdr *ipv4;
+ struct ipv6hdr *ipv6;
+ struct tcphdr *tcp;
+ __u16 tcp_len;
+};
+
+static __always_inline int tcp_dissect(void *data, void *data_end,
+ struct header_pointers *hdr)
+{
+ hdr->eth = data;
+ if (hdr->eth + 1 > data_end)
+ return XDP_DROP;
+
+ switch (bpf_ntohs(hdr->eth->h_proto)) {
+ case ETH_P_IP:
+ hdr->ipv6 = NULL;
+
+ hdr->ipv4 = (void *)hdr->eth + sizeof(*hdr->eth);
+ if (hdr->ipv4 + 1 > data_end)
+ return XDP_DROP;
+ if (hdr->ipv4->ihl * 4 < sizeof(*hdr->ipv4))
+ return XDP_DROP;
+ if (hdr->ipv4->version != 4)
+ return XDP_DROP;
+
+ if (hdr->ipv4->protocol != IPPROTO_TCP)
+ return XDP_PASS;
+
+ hdr->tcp = (void *)hdr->ipv4 + hdr->ipv4->ihl * 4;
+ break;
+ case ETH_P_IPV6:
+ hdr->ipv4 = NULL;
+
+ hdr->ipv6 = (void *)hdr->eth + sizeof(*hdr->eth);
+ if (hdr->ipv6 + 1 > data_end)
+ return XDP_DROP;
+ if (hdr->ipv6->version != 6)
+ return XDP_DROP;
+
+ /* XXX: Extension headers are not supported and could circumvent
+ * XDP SYN flood protection.
+ */
+ if (hdr->ipv6->nexthdr != NEXTHDR_TCP)
+ return XDP_PASS;
+
+ hdr->tcp = (void *)hdr->ipv6 + sizeof(*hdr->ipv6);
+ break;
+ default:
+ /* XXX: VLANs will circumvent XDP SYN flood protection. */
+ return XDP_PASS;
+ }
+
+ if (hdr->tcp + 1 > data_end)
+ return XDP_DROP;
+ hdr->tcp_len = hdr->tcp->doff * 4;
+ if (hdr->tcp_len < sizeof(*hdr->tcp))
+ return XDP_DROP;
+
+ return XDP_TX;
+}
+
+static __always_inline int tcp_lookup(void *ctx, struct header_pointers *hdr, bool xdp)
+{
+ struct bpf_ct_opts___local ct_lookup_opts = {
+ .netns_id = BPF_F_CURRENT_NETNS,
+ .l4proto = IPPROTO_TCP,
+ };
+ struct bpf_sock_tuple tup = {};
+ struct nf_conn *ct;
+ __u32 tup_size;
+
+ if (hdr->ipv4) {
+ /* TCP doesn't normally use fragments, and XDP can't reassemble
+ * them.
+ */
+ if ((hdr->ipv4->frag_off & bpf_htons(IP_DF | IP_MF | IP_OFFSET)) != bpf_htons(IP_DF))
+ return XDP_DROP;
+
+ tup.ipv4.saddr = hdr->ipv4->saddr;
+ tup.ipv4.daddr = hdr->ipv4->daddr;
+ tup.ipv4.sport = hdr->tcp->source;
+ tup.ipv4.dport = hdr->tcp->dest;
+ tup_size = sizeof(tup.ipv4);
+ } else if (hdr->ipv6) {
+ __builtin_memcpy(tup.ipv6.saddr, &hdr->ipv6->saddr, sizeof(tup.ipv6.saddr));
+ __builtin_memcpy(tup.ipv6.daddr, &hdr->ipv6->daddr, sizeof(tup.ipv6.daddr));
+ tup.ipv6.sport = hdr->tcp->source;
+ tup.ipv6.dport = hdr->tcp->dest;
+ tup_size = sizeof(tup.ipv6);
+ } else {
+ /* The verifier can't track that either ipv4 or ipv6 is not
+ * NULL.
+ */
+ return XDP_ABORTED;
+ }
+ if (xdp)
+ ct = bpf_xdp_ct_lookup(ctx, &tup, tup_size, &ct_lookup_opts, sizeof(ct_lookup_opts));
+ else
+ ct = bpf_skb_ct_lookup(ctx, &tup, tup_size, &ct_lookup_opts, sizeof(ct_lookup_opts));
+ if (ct) {
+ unsigned long status = ct->status;
+
+ bpf_ct_release(ct);
+ if (status & IPS_CONFIRMED_BIT)
+ return XDP_PASS;
+ } else if (ct_lookup_opts.error != -ENOENT) {
+ return XDP_ABORTED;
+ }
+
+ /* error == -ENOENT || !(status & IPS_CONFIRMED_BIT) */
+ return XDP_TX;
+}
+
+static __always_inline __u8 tcp_mkoptions(__be32 *buf, __be32 *tsopt, __u16 mss,
+ __u8 wscale)
+{
+ __be32 *start = buf;
+
+ *buf++ = bpf_htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss);
+
+ if (!tsopt)
+ return buf - start;
+
+ if (tsopt[0] & bpf_htonl(1 << 4))
+ *buf++ = bpf_htonl((TCPOPT_SACK_PERM << 24) |
+ (TCPOLEN_SACK_PERM << 16) |
+ (TCPOPT_TIMESTAMP << 8) |
+ TCPOLEN_TIMESTAMP);
+ else
+ *buf++ = bpf_htonl((TCPOPT_NOP << 24) |
+ (TCPOPT_NOP << 16) |
+ (TCPOPT_TIMESTAMP << 8) |
+ TCPOLEN_TIMESTAMP);
+ *buf++ = tsopt[0];
+ *buf++ = tsopt[1];
+
+ if ((tsopt[0] & bpf_htonl(0xf)) != bpf_htonl(0xf))
+ *buf++ = bpf_htonl((TCPOPT_NOP << 24) |
+ (TCPOPT_WINDOW << 16) |
+ (TCPOLEN_WINDOW << 8) |
+ wscale);
+
+ return buf - start;
+}
+
+static __always_inline void tcp_gen_synack(struct tcphdr *tcp_header,
+ __u32 cookie, __be32 *tsopt,
+ __u16 mss, __u8 wscale)
+{
+ void *tcp_options;
+
+ tcp_flag_word(tcp_header) = TCP_FLAG_SYN | TCP_FLAG_ACK;
+ if (tsopt && (tsopt[0] & bpf_htonl(1 << 5)))
+ tcp_flag_word(tcp_header) |= TCP_FLAG_ECE;
+ tcp_header->doff = 5; /* doff is part of tcp_flag_word. */
+ swap(tcp_header->source, tcp_header->dest);
+ tcp_header->ack_seq = bpf_htonl(bpf_ntohl(tcp_header->seq) + 1);
+ tcp_header->seq = bpf_htonl(cookie);
+ tcp_header->window = 0;
+ tcp_header->urg_ptr = 0;
+ tcp_header->check = 0; /* Calculate checksum later. */
+
+ tcp_options = (void *)(tcp_header + 1);
+ tcp_header->doff += tcp_mkoptions(tcp_options, tsopt, mss, wscale);
+}
+
+static __always_inline void tcpv4_gen_synack(struct header_pointers *hdr,
+ __u32 cookie, __be32 *tsopt)
+{
+ __u8 wscale;
+ __u16 mss;
+ __u8 ttl;
+
+ values_get_tcpipopts(&mss, &wscale, &ttl, false);
+
+ swap_eth_addr(hdr->eth->h_source, hdr->eth->h_dest);
+
+ swap(hdr->ipv4->saddr, hdr->ipv4->daddr);
+ hdr->ipv4->check = 0; /* Calculate checksum later. */
+ hdr->ipv4->tos = 0;
+ hdr->ipv4->id = 0;
+ hdr->ipv4->ttl = ttl;
+
+ tcp_gen_synack(hdr->tcp, cookie, tsopt, mss, wscale);
+
+ hdr->tcp_len = hdr->tcp->doff * 4;
+ hdr->ipv4->tot_len = bpf_htons(sizeof(*hdr->ipv4) + hdr->tcp_len);
+}
+
+static __always_inline void tcpv6_gen_synack(struct header_pointers *hdr,
+ __u32 cookie, __be32 *tsopt)
+{
+ __u8 wscale;
+ __u16 mss;
+ __u8 ttl;
+
+ values_get_tcpipopts(&mss, &wscale, &ttl, true);
+
+ swap_eth_addr(hdr->eth->h_source, hdr->eth->h_dest);
+
+ swap(hdr->ipv6->saddr, hdr->ipv6->daddr);
+ *(__be32 *)hdr->ipv6 = bpf_htonl(0x60000000);
+ hdr->ipv6->hop_limit = ttl;
+
+ tcp_gen_synack(hdr->tcp, cookie, tsopt, mss, wscale);
+
+ hdr->tcp_len = hdr->tcp->doff * 4;
+ hdr->ipv6->payload_len = bpf_htons(hdr->tcp_len);
+}
+
+static __always_inline int syncookie_handle_syn(struct header_pointers *hdr,
+ void *ctx,
+ void *data, void *data_end,
+ bool xdp)
+{
+ __u32 old_pkt_size, new_pkt_size;
+ /* Unlike clang 10, clang 11 and 12 generate code that doesn't pass the
+ * BPF verifier if tsopt is not volatile. Volatile forces it to store
+ * the pointer value and use it directly, otherwise tcp_mkoptions is
+ * (mis)compiled like this:
+ * if (!tsopt)
+ * return buf - start;
+ * reg = stored_return_value_of_tscookie_init;
+ * if (reg)
+ * tsopt = tsopt_buf;
+ * else
+ * tsopt = NULL;
+ * ...
+ * *buf++ = tsopt[1];
+ * It creates a dead branch where tsopt is assigned NULL, but the
+ * verifier can't prove it's dead and blocks the program.
+ */
+ __be32 * volatile tsopt = NULL;
+ __be32 tsopt_buf[2] = {};
+ __u16 ip_len;
+ __u32 cookie;
+ __s64 value;
+
+ /* Checksum is not yet verified, but both checksum failure and TCP
+ * header checks return XDP_DROP, so the order doesn't matter.
+ */
+ if (hdr->tcp->fin || hdr->tcp->rst)
+ return XDP_DROP;
+
+ /* Issue SYN cookies on allowed ports, drop SYN packets on blocked
+ * ports.
+ */
+ if (!check_port_allowed(bpf_ntohs(hdr->tcp->dest)))
+ return XDP_DROP;
+
+ if (hdr->ipv4) {
+ /* Check the IPv4 and TCP checksums before creating a SYNACK. */
+ value = bpf_csum_diff(0, 0, (void *)hdr->ipv4, hdr->ipv4->ihl * 4, 0);
+ if (value < 0)
+ return XDP_ABORTED;
+ if (csum_fold(value) != 0)
+ return XDP_DROP; /* Bad IPv4 checksum. */
+
+ value = bpf_csum_diff(0, 0, (void *)hdr->tcp, hdr->tcp_len, 0);
+ if (value < 0)
+ return XDP_ABORTED;
+ if (csum_tcpudp_magic(hdr->ipv4->saddr, hdr->ipv4->daddr,
+ hdr->tcp_len, IPPROTO_TCP, value) != 0)
+ return XDP_DROP; /* Bad TCP checksum. */
+
+ ip_len = sizeof(*hdr->ipv4);
+
+ value = bpf_tcp_raw_gen_syncookie_ipv4(hdr->ipv4, hdr->tcp,
+ hdr->tcp_len);
+ } else if (hdr->ipv6) {
+ /* Check the TCP checksum before creating a SYNACK. */
+ value = bpf_csum_diff(0, 0, (void *)hdr->tcp, hdr->tcp_len, 0);
+ if (value < 0)
+ return XDP_ABORTED;
+ if (csum_ipv6_magic(&hdr->ipv6->saddr, &hdr->ipv6->daddr,
+ hdr->tcp_len, IPPROTO_TCP, value) != 0)
+ return XDP_DROP; /* Bad TCP checksum. */
+
+ ip_len = sizeof(*hdr->ipv6);
+
+ value = bpf_tcp_raw_gen_syncookie_ipv6(hdr->ipv6, hdr->tcp,
+ hdr->tcp_len);
+ } else {
+ return XDP_ABORTED;
+ }
+
+ if (value < 0)
+ return XDP_ABORTED;
+ cookie = (__u32)value;
+
+ if (tscookie_init((void *)hdr->tcp, hdr->tcp_len,
+ &tsopt_buf[0], &tsopt_buf[1], data_end))
+ tsopt = tsopt_buf;
+
+ /* Check that there is enough space for a SYNACK. It also covers
+ * the check that the destination of the __builtin_memmove below
+ * doesn't overflow.
+ */
+ if (data + sizeof(*hdr->eth) + ip_len + TCP_MAXLEN > data_end)
+ return XDP_ABORTED;
+
+ if (hdr->ipv4) {
+ if (hdr->ipv4->ihl * 4 > sizeof(*hdr->ipv4)) {
+ struct tcphdr *new_tcp_header;
+
+ new_tcp_header = data + sizeof(*hdr->eth) + sizeof(*hdr->ipv4);
+ __builtin_memmove(new_tcp_header, hdr->tcp, sizeof(*hdr->tcp));
+ hdr->tcp = new_tcp_header;
+
+ hdr->ipv4->ihl = sizeof(*hdr->ipv4) / 4;
+ }
+
+ tcpv4_gen_synack(hdr, cookie, tsopt);
+ } else if (hdr->ipv6) {
+ tcpv6_gen_synack(hdr, cookie, tsopt);
+ } else {
+ return XDP_ABORTED;
+ }
+
+ /* Recalculate checksums. */
+ hdr->tcp->check = 0;
+ value = bpf_csum_diff(0, 0, (void *)hdr->tcp, hdr->tcp_len, 0);
+ if (value < 0)
+ return XDP_ABORTED;
+ if (hdr->ipv4) {
+ hdr->tcp->check = csum_tcpudp_magic(hdr->ipv4->saddr,
+ hdr->ipv4->daddr,
+ hdr->tcp_len,
+ IPPROTO_TCP,
+ value);
+
+ hdr->ipv4->check = 0;
+ value = bpf_csum_diff(0, 0, (void *)hdr->ipv4, sizeof(*hdr->ipv4), 0);
+ if (value < 0)
+ return XDP_ABORTED;
+ hdr->ipv4->check = csum_fold(value);
+ } else if (hdr->ipv6) {
+ hdr->tcp->check = csum_ipv6_magic(&hdr->ipv6->saddr,
+ &hdr->ipv6->daddr,
+ hdr->tcp_len,
+ IPPROTO_TCP,
+ value);
+ } else {
+ return XDP_ABORTED;
+ }
+
+ /* Set the new packet size. */
+ old_pkt_size = data_end - data;
+ new_pkt_size = sizeof(*hdr->eth) + ip_len + hdr->tcp->doff * 4;
+ if (xdp) {
+ if (bpf_xdp_adjust_tail(ctx, new_pkt_size - old_pkt_size))
+ return XDP_ABORTED;
+ } else {
+ if (bpf_skb_change_tail(ctx, new_pkt_size, 0))
+ return XDP_ABORTED;
+ }
+
+ values_inc_synacks();
+
+ return XDP_TX;
+}
+
+static __always_inline int syncookie_handle_ack(struct header_pointers *hdr)
+{
+ int err;
+
+ if (hdr->tcp->rst)
+ return XDP_DROP;
+
+ if (hdr->ipv4)
+ err = bpf_tcp_raw_check_syncookie_ipv4(hdr->ipv4, hdr->tcp);
+ else if (hdr->ipv6)
+ err = bpf_tcp_raw_check_syncookie_ipv6(hdr->ipv6, hdr->tcp);
+ else
+ return XDP_ABORTED;
+ if (err)
+ return XDP_DROP;
+
+ return XDP_PASS;
+}
+
+static __always_inline int syncookie_part1(void *ctx, void *data, void *data_end,
+ struct header_pointers *hdr, bool xdp)
+{
+ int ret;
+
+ ret = tcp_dissect(data, data_end, hdr);
+ if (ret != XDP_TX)
+ return ret;
+
+ ret = tcp_lookup(ctx, hdr, xdp);
+ if (ret != XDP_TX)
+ return ret;
+
+ /* Packet is TCP and doesn't belong to an established connection. */
+
+ if ((hdr->tcp->syn ^ hdr->tcp->ack) != 1)
+ return XDP_DROP;
+
+ /* Grow the TCP header to TCP_MAXLEN to be able to pass any hdr->tcp_len
+ * to bpf_tcp_raw_gen_syncookie_ipv{4,6} and pass the verifier.
+ */
+ if (xdp) {
+ if (bpf_xdp_adjust_tail(ctx, TCP_MAXLEN - hdr->tcp_len))
+ return XDP_ABORTED;
+ } else {
+ /* Without volatile the verifier throws this error:
+ * R9 32-bit pointer arithmetic prohibited
+ */
+ volatile u64 old_len = data_end - data;
+
+ if (bpf_skb_change_tail(ctx, old_len + TCP_MAXLEN - hdr->tcp_len, 0))
+ return XDP_ABORTED;
+ }
+
+ return XDP_TX;
+}
+
+static __always_inline int syncookie_part2(void *ctx, void *data, void *data_end,
+ struct header_pointers *hdr, bool xdp)
+{
+ if (hdr->ipv4) {
+ hdr->eth = data;
+ hdr->ipv4 = (void *)hdr->eth + sizeof(*hdr->eth);
+ /* IPV4_MAXLEN is needed when calculating checksum.
+ * At least sizeof(struct iphdr) is needed here to access ihl.
+ */
+ if ((void *)hdr->ipv4 + IPV4_MAXLEN > data_end)
+ return XDP_ABORTED;
+ hdr->tcp = (void *)hdr->ipv4 + hdr->ipv4->ihl * 4;
+ } else if (hdr->ipv6) {
+ hdr->eth = data;
+ hdr->ipv6 = (void *)hdr->eth + sizeof(*hdr->eth);
+ hdr->tcp = (void *)hdr->ipv6 + sizeof(*hdr->ipv6);
+ } else {
+ return XDP_ABORTED;
+ }
+
+ if ((void *)hdr->tcp + TCP_MAXLEN > data_end)
+ return XDP_ABORTED;
+
+ /* We run out of registers, tcp_len gets spilled to the stack, and the
+ * verifier forgets its min and max values checked above in tcp_dissect.
+ */
+ hdr->tcp_len = hdr->tcp->doff * 4;
+ if (hdr->tcp_len < sizeof(*hdr->tcp))
+ return XDP_ABORTED;
+
+ return hdr->tcp->syn ? syncookie_handle_syn(hdr, ctx, data, data_end, xdp) :
+ syncookie_handle_ack(hdr);
+}
+
+SEC("xdp")
+int syncookie_xdp(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct header_pointers hdr;
+ int ret;
+
+ ret = syncookie_part1(ctx, data, data_end, &hdr, true);
+ if (ret != XDP_TX)
+ return ret;
+
+ data_end = (void *)(long)ctx->data_end;
+ data = (void *)(long)ctx->data;
+
+ return syncookie_part2(ctx, data, data_end, &hdr, true);
+}
+
+SEC("tc")
+int syncookie_tc(struct __sk_buff *skb)
+{
+ void *data_end = (void *)(long)skb->data_end;
+ void *data = (void *)(long)skb->data;
+ struct header_pointers hdr;
+ int ret;
+
+ ret = syncookie_part1(skb, data, data_end, &hdr, false);
+ if (ret != XDP_TX)
+ return ret == XDP_PASS ? TC_ACT_OK : TC_ACT_SHOT;
+
+ data_end = (void *)(long)skb->data_end;
+ data = (void *)(long)skb->data;
+
+ ret = syncookie_part2(skb, data, data_end, &hdr, false);
+ switch (ret) {
+ case XDP_PASS:
+ return TC_ACT_OK;
+ case XDP_TX:
+ return bpf_redirect(skb->ifindex, 0);
+ default:
+ return TC_ACT_SHOT;
+ }
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_bpftool_synctypes.py b/tools/testing/selftests/bpf/test_bpftool_synctypes.py
index c0e7acd698ed..a6410bebe603 100755
--- a/tools/testing/selftests/bpf/test_bpftool_synctypes.py
+++ b/tools/testing/selftests/bpf/test_bpftool_synctypes.py
@@ -58,7 +58,7 @@ class BlockParser(object):
class ArrayParser(BlockParser):
"""
- A parser for extracting dicionaries of values from some BPF-related arrays.
+ A parser for extracting a set of values from some BPF-related arrays.
@reader: a pointer to the open file to parse
@array_name: name of the array to parse
"""
@@ -66,7 +66,7 @@ class ArrayParser(BlockParser):
def __init__(self, reader, array_name):
self.array_name = array_name
- self.start_marker = re.compile(f'(static )?const char \* const {self.array_name}\[.*\] = {{\n')
+ self.start_marker = re.compile(f'(static )?const bool {self.array_name}\[.*\] = {{\n')
super().__init__(reader)
def search_block(self):
@@ -80,15 +80,15 @@ class ArrayParser(BlockParser):
Parse a block and return data as a dictionary. Items to extract must be
on separate lines in the file.
"""
- pattern = re.compile('\[(BPF_\w*)\]\s*= "(.*)",?$')
- entries = {}
+ pattern = re.compile('\[(BPF_\w*)\]\s*= (true|false),?$')
+ entries = set()
while True:
line = self.reader.readline()
if line == '' or re.match(self.end_marker, line):
break
capture = pattern.search(line)
if capture:
- entries[capture.group(1)] = capture.group(2)
+ entries |= {capture.group(1)}
return entries
class InlineListParser(BlockParser):
@@ -115,7 +115,7 @@ class InlineListParser(BlockParser):
class FileExtractor(object):
"""
A generic reader for extracting data from a given file. This class contains
- several helper methods that wrap arround parser objects to extract values
+ several helper methods that wrap around parser objects to extract values
from different structures.
This class does not offer a way to set a filename, which is expected to be
defined in children classes.
@@ -139,21 +139,19 @@ class FileExtractor(object):
def get_types_from_array(self, array_name):
"""
- Search for and parse an array associating names to BPF_* enum members,
- for example:
+ Search for and parse a list of allowed BPF_* enum members, for example:
- const char * const prog_type_name[] = {
- [BPF_PROG_TYPE_UNSPEC] = "unspec",
- [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter",
- [BPF_PROG_TYPE_KPROBE] = "kprobe",
+ const bool prog_type_name[] = {
+ [BPF_PROG_TYPE_UNSPEC] = true,
+ [BPF_PROG_TYPE_SOCKET_FILTER] = true,
+ [BPF_PROG_TYPE_KPROBE] = true,
};
- Return a dictionary with the enum member names as keys and the
- associated names as values, for example:
+ Return a set of the enum members, for example:
- {'BPF_PROG_TYPE_UNSPEC': 'unspec',
- 'BPF_PROG_TYPE_SOCKET_FILTER': 'socket_filter',
- 'BPF_PROG_TYPE_KPROBE': 'kprobe'}
+ {'BPF_PROG_TYPE_UNSPEC',
+ 'BPF_PROG_TYPE_SOCKET_FILTER',
+ 'BPF_PROG_TYPE_KPROBE'}
@array_name: name of the array to parse
"""
@@ -186,6 +184,27 @@ class FileExtractor(object):
parser.search_block(start_marker)
return parser.parse(pattern, end_marker)
+ def make_enum_map(self, names, enum_prefix):
+ """
+ Search for and parse an enum containing BPF_* members, just as get_enum
+ does. However, instead of just returning a set of the variant names,
+ also generate a textual representation from them by (assuming and)
+ removing a provided prefix and lowercasing the remainder. Then return a
+ dict mapping from name to textual representation.
+
+ @enum_values: a set of enum values; e.g., as retrieved by get_enum
+ @enum_prefix: the prefix to remove from each of the variants to infer
+ textual representation
+ """
+ mapping = {}
+ for name in names:
+ if not name.startswith(enum_prefix):
+ raise Exception(f"enum variant {name} does not start with {enum_prefix}")
+ text = name[len(enum_prefix):].lower()
+ mapping[name] = text
+
+ return mapping
+
def __get_description_list(self, start_marker, pattern, end_marker):
parser = InlineListParser(self.reader)
parser.search_block(start_marker)
@@ -333,11 +352,9 @@ class ProgFileExtractor(SourceFileExtractor):
"""
filename = os.path.join(BPFTOOL_DIR, 'prog.c')
- def get_prog_types(self):
- return self.get_types_from_array('prog_type_name')
-
def get_attach_types(self):
- return self.get_types_from_array('attach_type_strings')
+ types = self.get_types_from_array('attach_types')
+ return self.make_enum_map(types, 'BPF_')
def get_prog_attach_help(self):
return self.get_help_list('ATTACH_TYPE')
@@ -348,9 +365,6 @@ class MapFileExtractor(SourceFileExtractor):
"""
filename = os.path.join(BPFTOOL_DIR, 'map.c')
- def get_map_types(self):
- return self.get_types_from_array('map_type_name')
-
def get_map_help(self):
return self.get_help_list('TYPE')
@@ -363,30 +377,6 @@ class CgroupFileExtractor(SourceFileExtractor):
def get_prog_attach_help(self):
return self.get_help_list('ATTACH_TYPE')
-class CommonFileExtractor(SourceFileExtractor):
- """
- An extractor for bpftool's common.c.
- """
- filename = os.path.join(BPFTOOL_DIR, 'common.c')
-
- def __init__(self):
- super().__init__()
- self.attach_types = {}
-
- def get_attach_types(self):
- if not self.attach_types:
- self.attach_types = self.get_types_from_array('attach_type_name')
- return self.attach_types
-
- def get_cgroup_attach_types(self):
- if not self.attach_types:
- self.get_attach_types()
- cgroup_types = {}
- for (key, value) in self.attach_types.items():
- if key.find('BPF_CGROUP') != -1:
- cgroup_types[key] = value
- return cgroup_types
-
class GenericSourceExtractor(SourceFileExtractor):
"""
An extractor for generic source code files.
@@ -403,14 +393,28 @@ class BpfHeaderExtractor(FileExtractor):
"""
filename = os.path.join(INCLUDE_DIR, 'uapi/linux/bpf.h')
+ def __init__(self):
+ super().__init__()
+ self.attach_types = {}
+
def get_prog_types(self):
return self.get_enum('bpf_prog_type')
- def get_map_types(self):
- return self.get_enum('bpf_map_type')
+ def get_map_type_map(self):
+ names = self.get_enum('bpf_map_type')
+ return self.make_enum_map(names, 'BPF_MAP_TYPE_')
- def get_attach_types(self):
- return self.get_enum('bpf_attach_type')
+ def get_attach_type_map(self):
+ if not self.attach_types:
+ names = self.get_enum('bpf_attach_type')
+ self.attach_types = self.make_enum_map(names, 'BPF_')
+ return self.attach_types
+
+ def get_cgroup_attach_type_map(self):
+ if not self.attach_types:
+ self.get_attach_type_map()
+ return {name: text for name, text in self.attach_types.items()
+ if name.startswith('BPF_CGROUP')}
class ManPageExtractor(FileExtractor):
"""
@@ -467,12 +471,6 @@ class BashcompExtractor(FileExtractor):
def get_prog_attach_types(self):
return self.get_bashcomp_list('BPFTOOL_PROG_ATTACH_TYPES')
- def get_map_types(self):
- return self.get_bashcomp_list('BPFTOOL_MAP_CREATE_TYPES')
-
- def get_cgroup_attach_types(self):
- return self.get_bashcomp_list('BPFTOOL_CGROUP_ATTACH_TYPES')
-
def verify(first_set, second_set, message):
"""
Print all values that differ between two sets.
@@ -495,21 +493,12 @@ def main():
""")
args = argParser.parse_args()
- # Map types (enum)
-
bpf_info = BpfHeaderExtractor()
- ref = bpf_info.get_map_types()
-
- map_info = MapFileExtractor()
- source_map_items = map_info.get_map_types()
- map_types_enum = set(source_map_items.keys())
-
- verify(ref, map_types_enum,
- f'Comparing BPF header (enum bpf_map_type) and {MapFileExtractor.filename} (map_type_name):')
# Map types (names)
- source_map_types = set(source_map_items.values())
+ map_info = MapFileExtractor()
+ source_map_types = set(bpf_info.get_map_type_map().values())
source_map_types.discard('unspec')
help_map_types = map_info.get_map_help()
@@ -521,41 +510,16 @@ def main():
man_map_types = man_map_info.get_map_types()
man_map_info.close()
- bashcomp_info = BashcompExtractor()
- bashcomp_map_types = bashcomp_info.get_map_types()
-
verify(source_map_types, help_map_types,
- f'Comparing {MapFileExtractor.filename} (map_type_name) and {MapFileExtractor.filename} (do_help() TYPE):')
+ f'Comparing {BpfHeaderExtractor.filename} (bpf_map_type) and {MapFileExtractor.filename} (do_help() TYPE):')
verify(source_map_types, man_map_types,
- f'Comparing {MapFileExtractor.filename} (map_type_name) and {ManMapExtractor.filename} (TYPE):')
+ f'Comparing {BpfHeaderExtractor.filename} (bpf_map_type) and {ManMapExtractor.filename} (TYPE):')
verify(help_map_options, man_map_options,
f'Comparing {MapFileExtractor.filename} (do_help() OPTIONS) and {ManMapExtractor.filename} (OPTIONS):')
- verify(source_map_types, bashcomp_map_types,
- f'Comparing {MapFileExtractor.filename} (map_type_name) and {BashcompExtractor.filename} (BPFTOOL_MAP_CREATE_TYPES):')
-
- # Program types (enum)
-
- ref = bpf_info.get_prog_types()
-
- prog_info = ProgFileExtractor()
- prog_types = set(prog_info.get_prog_types().keys())
-
- verify(ref, prog_types,
- f'Comparing BPF header (enum bpf_prog_type) and {ProgFileExtractor.filename} (prog_type_name):')
-
- # Attach types (enum)
-
- ref = bpf_info.get_attach_types()
- bpf_info.close()
-
- common_info = CommonFileExtractor()
- attach_types = common_info.get_attach_types()
-
- verify(ref, attach_types,
- f'Comparing BPF header (enum bpf_attach_type) and {CommonFileExtractor.filename} (attach_type_name):')
# Attach types (names)
+ prog_info = ProgFileExtractor()
source_prog_attach_types = set(prog_info.get_attach_types().values())
help_prog_attach_types = prog_info.get_prog_attach_help()
@@ -567,22 +531,23 @@ def main():
man_prog_attach_types = man_prog_info.get_attach_types()
man_prog_info.close()
- bashcomp_info.reset_read() # We stopped at map types, rewind
+
+ bashcomp_info = BashcompExtractor()
bashcomp_prog_attach_types = bashcomp_info.get_prog_attach_types()
+ bashcomp_info.close()
verify(source_prog_attach_types, help_prog_attach_types,
- f'Comparing {ProgFileExtractor.filename} (attach_type_strings) and {ProgFileExtractor.filename} (do_help() ATTACH_TYPE):')
+ f'Comparing {ProgFileExtractor.filename} (bpf_attach_type) and {ProgFileExtractor.filename} (do_help() ATTACH_TYPE):')
verify(source_prog_attach_types, man_prog_attach_types,
- f'Comparing {ProgFileExtractor.filename} (attach_type_strings) and {ManProgExtractor.filename} (ATTACH_TYPE):')
+ f'Comparing {ProgFileExtractor.filename} (bpf_attach_type) and {ManProgExtractor.filename} (ATTACH_TYPE):')
verify(help_prog_options, man_prog_options,
f'Comparing {ProgFileExtractor.filename} (do_help() OPTIONS) and {ManProgExtractor.filename} (OPTIONS):')
verify(source_prog_attach_types, bashcomp_prog_attach_types,
- f'Comparing {ProgFileExtractor.filename} (attach_type_strings) and {BashcompExtractor.filename} (BPFTOOL_PROG_ATTACH_TYPES):')
+ f'Comparing {ProgFileExtractor.filename} (bpf_attach_type) and {BashcompExtractor.filename} (BPFTOOL_PROG_ATTACH_TYPES):')
# Cgroup attach types
-
- source_cgroup_attach_types = set(common_info.get_cgroup_attach_types().values())
- common_info.close()
+ source_cgroup_attach_types = set(bpf_info.get_cgroup_attach_type_map().values())
+ bpf_info.close()
cgroup_info = CgroupFileExtractor()
help_cgroup_attach_types = cgroup_info.get_prog_attach_help()
@@ -594,17 +559,12 @@ def main():
man_cgroup_attach_types = man_cgroup_info.get_attach_types()
man_cgroup_info.close()
- bashcomp_cgroup_attach_types = bashcomp_info.get_cgroup_attach_types()
- bashcomp_info.close()
-
verify(source_cgroup_attach_types, help_cgroup_attach_types,
- f'Comparing {CommonFileExtractor.filename} (attach_type_strings) and {CgroupFileExtractor.filename} (do_help() ATTACH_TYPE):')
+ f'Comparing {BpfHeaderExtractor.filename} (bpf_attach_type) and {CgroupFileExtractor.filename} (do_help() ATTACH_TYPE):')
verify(source_cgroup_attach_types, man_cgroup_attach_types,
- f'Comparing {CommonFileExtractor.filename} (attach_type_strings) and {ManCgroupExtractor.filename} (ATTACH_TYPE):')
+ f'Comparing {BpfHeaderExtractor.filename} (bpf_attach_type) and {ManCgroupExtractor.filename} (ATTACH_TYPE):')
verify(help_cgroup_options, man_cgroup_options,
f'Comparing {CgroupFileExtractor.filename} (do_help() OPTIONS) and {ManCgroupExtractor.filename} (OPTIONS):')
- verify(source_cgroup_attach_types, bashcomp_cgroup_attach_types,
- f'Comparing {CommonFileExtractor.filename} (attach_type_strings) and {BashcompExtractor.filename} (BPFTOOL_CGROUP_ATTACH_TYPES):')
# Options for remaining commands
diff --git a/tools/testing/selftests/bpf/test_btf.h b/tools/testing/selftests/bpf/test_btf.h
index 128989bed8b7..fb4f4714eeb4 100644
--- a/tools/testing/selftests/bpf/test_btf.h
+++ b/tools/testing/selftests/bpf/test_btf.h
@@ -4,6 +4,8 @@
#ifndef _TEST_BTF_H
#define _TEST_BTF_H
+#define BTF_END_RAW 0xdeadbeef
+
#define BTF_INFO_ENC(kind, kind_flag, vlen) \
((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
@@ -39,6 +41,7 @@
#define BTF_MEMBER_ENC(name, type, bits_offset) \
(name), (type), (bits_offset)
#define BTF_ENUM_ENC(name, val) (name), (val)
+#define BTF_ENUM64_ENC(name, val_lo32, val_hi32) (name), (val_lo32), (val_hi32)
#define BTF_MEMBER_OFFSET(bitfield_size, bits_offset) \
((bitfield_size) << 24 | (bits_offset))
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index c639f2e56fc5..3561c97701f2 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -1604,11 +1604,8 @@ int main(int argc, char **argv)
struct prog_test_def *test = &prog_test_defs[i];
test->test_num = i + 1;
- if (should_run(&env.test_selector,
- test->test_num, test->test_name))
- test->should_run = true;
- else
- test->should_run = false;
+ test->should_run = should_run(&env.test_selector,
+ test->test_num, test->test_name);
if ((test->run_test == NULL && test->run_serial_test == NULL) ||
(test->run_test != NULL && test->run_serial_test != NULL)) {
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 372579c9f45e..f9d553fbf68a 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -51,12 +51,24 @@
#endif
#define MAX_INSNS BPF_MAXINSNS
+#define MAX_EXPECTED_INSNS 32
+#define MAX_UNEXPECTED_INSNS 32
#define MAX_TEST_INSNS 1000000
#define MAX_FIXUPS 8
#define MAX_NR_MAPS 23
#define MAX_TEST_RUNS 8
#define POINTER_VALUE 0xcafe4all
#define TEST_DATA_LEN 64
+#define MAX_FUNC_INFOS 8
+#define MAX_BTF_STRINGS 256
+#define MAX_BTF_TYPES 256
+
+#define INSN_OFF_MASK ((__s16)0xFFFF)
+#define INSN_IMM_MASK ((__s32)0xFFFFFFFF)
+#define SKIP_INSNS() BPF_RAW_INSN(0xde, 0xa, 0xd, 0xbeef, 0xdeadbeef)
+
+#define DEFAULT_LIBBPF_LOG_LEVEL 4
+#define VERBOSE_LIBBPF_LOG_LEVEL 1
#define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS (1 << 0)
#define F_LOAD_WITH_STRICT_ALIGNMENT (1 << 1)
@@ -79,6 +91,23 @@ struct bpf_test {
const char *descr;
struct bpf_insn insns[MAX_INSNS];
struct bpf_insn *fill_insns;
+ /* If specified, test engine looks for this sequence of
+ * instructions in the BPF program after loading. Allows to
+ * test rewrites applied by verifier. Use values
+ * INSN_OFF_MASK and INSN_IMM_MASK to mask `off` and `imm`
+ * fields if content does not matter. The test case fails if
+ * specified instructions are not found.
+ *
+ * The sequence could be split into sub-sequences by adding
+ * SKIP_INSNS instruction at the end of each sub-sequence. In
+ * such case sub-sequences are searched for one after another.
+ */
+ struct bpf_insn expected_insns[MAX_EXPECTED_INSNS];
+ /* If specified, test engine applies same pattern matching
+ * logic as for `expected_insns`. If the specified pattern is
+ * matched test case is marked as failed.
+ */
+ struct bpf_insn unexpected_insns[MAX_UNEXPECTED_INSNS];
int fixup_map_hash_8b[MAX_FIXUPS];
int fixup_map_hash_48b[MAX_FIXUPS];
int fixup_map_hash_16b[MAX_FIXUPS];
@@ -135,6 +164,14 @@ struct bpf_test {
};
enum bpf_attach_type expected_attach_type;
const char *kfunc;
+ struct bpf_func_info func_info[MAX_FUNC_INFOS];
+ int func_info_cnt;
+ char btf_strings[MAX_BTF_STRINGS];
+ /* A set of BTF types to load when specified,
+ * use macro definitions from test_btf.h,
+ * must end with BTF_END_RAW
+ */
+ __u32 btf_types[MAX_BTF_TYPES];
};
/* Note we want this to be 64 bit aligned so that the end of our array is
@@ -388,6 +425,45 @@ static void bpf_fill_torturous_jumps(struct bpf_test *self)
}
}
+static void bpf_fill_big_prog_with_loop_1(struct bpf_test *self)
+{
+ struct bpf_insn *insn = self->fill_insns;
+ /* This test was added to catch a specific use after free
+ * error, which happened upon BPF program reallocation.
+ * Reallocation is handled by core.c:bpf_prog_realloc, which
+ * reuses old memory if page boundary is not crossed. The
+ * value of `len` is chosen to cross this boundary on bpf_loop
+ * patching.
+ */
+ const int len = getpagesize() - 25;
+ int callback_load_idx;
+ int callback_idx;
+ int i = 0;
+
+ insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1);
+ callback_load_idx = i;
+ insn[i++] = BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW,
+ BPF_REG_2, BPF_PSEUDO_FUNC, 0,
+ 777 /* filled below */);
+ insn[i++] = BPF_RAW_INSN(0, 0, 0, 0, 0);
+ insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0);
+ insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0);
+ insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop);
+
+ while (i < len - 3)
+ insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0);
+ insn[i++] = BPF_EXIT_INSN();
+
+ callback_idx = i;
+ insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0);
+ insn[i++] = BPF_EXIT_INSN();
+
+ insn[callback_load_idx].imm = callback_idx - callback_load_idx - 1;
+ self->func_info[1].insn_off = callback_idx;
+ self->prog_len = i;
+ assert(i == len);
+}
+
/* BPF_SK_LOOKUP contains 13 instructions, if you need to fix up maps */
#define BPF_SK_LOOKUP(func) \
/* struct bpf_sock_tuple tuple = {} */ \
@@ -664,34 +740,66 @@ static __u32 btf_raw_types[] = {
BTF_MEMBER_ENC(71, 13, 128), /* struct prog_test_member __kptr_ref *ptr; */
};
-static int load_btf(void)
+static char bpf_vlog[UINT_MAX >> 8];
+
+static int load_btf_spec(__u32 *types, int types_len,
+ const char *strings, int strings_len)
{
struct btf_header hdr = {
.magic = BTF_MAGIC,
.version = BTF_VERSION,
.hdr_len = sizeof(struct btf_header),
- .type_len = sizeof(btf_raw_types),
- .str_off = sizeof(btf_raw_types),
- .str_len = sizeof(btf_str_sec),
+ .type_len = types_len,
+ .str_off = types_len,
+ .str_len = strings_len,
};
void *ptr, *raw_btf;
int btf_fd;
+ LIBBPF_OPTS(bpf_btf_load_opts, opts,
+ .log_buf = bpf_vlog,
+ .log_size = sizeof(bpf_vlog),
+ .log_level = (verbose
+ ? VERBOSE_LIBBPF_LOG_LEVEL
+ : DEFAULT_LIBBPF_LOG_LEVEL),
+ );
- ptr = raw_btf = malloc(sizeof(hdr) + sizeof(btf_raw_types) +
- sizeof(btf_str_sec));
+ raw_btf = malloc(sizeof(hdr) + types_len + strings_len);
+ ptr = raw_btf;
memcpy(ptr, &hdr, sizeof(hdr));
ptr += sizeof(hdr);
- memcpy(ptr, btf_raw_types, hdr.type_len);
+ memcpy(ptr, types, hdr.type_len);
ptr += hdr.type_len;
- memcpy(ptr, btf_str_sec, hdr.str_len);
+ memcpy(ptr, strings, hdr.str_len);
ptr += hdr.str_len;
- btf_fd = bpf_btf_load(raw_btf, ptr - raw_btf, NULL);
- free(raw_btf);
+ btf_fd = bpf_btf_load(raw_btf, ptr - raw_btf, &opts);
if (btf_fd < 0)
- return -1;
- return btf_fd;
+ printf("Failed to load BTF spec: '%s'\n", strerror(errno));
+
+ free(raw_btf);
+
+ return btf_fd < 0 ? -1 : btf_fd;
+}
+
+static int load_btf(void)
+{
+ return load_btf_spec(btf_raw_types, sizeof(btf_raw_types),
+ btf_str_sec, sizeof(btf_str_sec));
+}
+
+static int load_btf_for_test(struct bpf_test *test)
+{
+ int types_num = 0;
+
+ while (types_num < MAX_BTF_TYPES &&
+ test->btf_types[types_num] != BTF_END_RAW)
+ ++types_num;
+
+ int types_len = types_num * sizeof(test->btf_types[0]);
+
+ return load_btf_spec(test->btf_types, types_len,
+ test->btf_strings, sizeof(test->btf_strings));
}
static int create_map_spin_lock(void)
@@ -770,8 +878,6 @@ static int create_map_kptr(void)
return fd;
}
-static char bpf_vlog[UINT_MAX >> 8];
-
static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
struct bpf_insn *prog, int *map_fds)
{
@@ -1126,10 +1232,218 @@ static bool cmp_str_seq(const char *log, const char *exp)
return true;
}
+static int get_xlated_program(int fd_prog, struct bpf_insn **buf, int *cnt)
+{
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
+ __u32 xlated_prog_len;
+ __u32 buf_element_size = sizeof(struct bpf_insn);
+
+ if (bpf_obj_get_info_by_fd(fd_prog, &info, &info_len)) {
+ perror("bpf_obj_get_info_by_fd failed");
+ return -1;
+ }
+
+ xlated_prog_len = info.xlated_prog_len;
+ if (xlated_prog_len % buf_element_size) {
+ printf("Program length %d is not multiple of %d\n",
+ xlated_prog_len, buf_element_size);
+ return -1;
+ }
+
+ *cnt = xlated_prog_len / buf_element_size;
+ *buf = calloc(*cnt, buf_element_size);
+ if (!buf) {
+ perror("can't allocate xlated program buffer");
+ return -ENOMEM;
+ }
+
+ bzero(&info, sizeof(info));
+ info.xlated_prog_len = xlated_prog_len;
+ info.xlated_prog_insns = (__u64)*buf;
+ if (bpf_obj_get_info_by_fd(fd_prog, &info, &info_len)) {
+ perror("second bpf_obj_get_info_by_fd failed");
+ goto out_free_buf;
+ }
+
+ return 0;
+
+out_free_buf:
+ free(*buf);
+ return -1;
+}
+
+static bool is_null_insn(struct bpf_insn *insn)
+{
+ struct bpf_insn null_insn = {};
+
+ return memcmp(insn, &null_insn, sizeof(null_insn)) == 0;
+}
+
+static bool is_skip_insn(struct bpf_insn *insn)
+{
+ struct bpf_insn skip_insn = SKIP_INSNS();
+
+ return memcmp(insn, &skip_insn, sizeof(skip_insn)) == 0;
+}
+
+static int null_terminated_insn_len(struct bpf_insn *seq, int max_len)
+{
+ int i;
+
+ for (i = 0; i < max_len; ++i) {
+ if (is_null_insn(&seq[i]))
+ return i;
+ }
+ return max_len;
+}
+
+static bool compare_masked_insn(struct bpf_insn *orig, struct bpf_insn *masked)
+{
+ struct bpf_insn orig_masked;
+
+ memcpy(&orig_masked, orig, sizeof(orig_masked));
+ if (masked->imm == INSN_IMM_MASK)
+ orig_masked.imm = INSN_IMM_MASK;
+ if (masked->off == INSN_OFF_MASK)
+ orig_masked.off = INSN_OFF_MASK;
+
+ return memcmp(&orig_masked, masked, sizeof(orig_masked)) == 0;
+}
+
+static int find_insn_subseq(struct bpf_insn *seq, struct bpf_insn *subseq,
+ int seq_len, int subseq_len)
+{
+ int i, j;
+
+ if (subseq_len > seq_len)
+ return -1;
+
+ for (i = 0; i < seq_len - subseq_len + 1; ++i) {
+ bool found = true;
+
+ for (j = 0; j < subseq_len; ++j) {
+ if (!compare_masked_insn(&seq[i + j], &subseq[j])) {
+ found = false;
+ break;
+ }
+ }
+ if (found)
+ return i;
+ }
+
+ return -1;
+}
+
+static int find_skip_insn_marker(struct bpf_insn *seq, int len)
+{
+ int i;
+
+ for (i = 0; i < len; ++i)
+ if (is_skip_insn(&seq[i]))
+ return i;
+
+ return -1;
+}
+
+/* Return true if all sub-sequences in `subseqs` could be found in
+ * `seq` one after another. Sub-sequences are separated by a single
+ * nil instruction.
+ */
+static bool find_all_insn_subseqs(struct bpf_insn *seq, struct bpf_insn *subseqs,
+ int seq_len, int max_subseqs_len)
+{
+ int subseqs_len = null_terminated_insn_len(subseqs, max_subseqs_len);
+
+ while (subseqs_len > 0) {
+ int skip_idx = find_skip_insn_marker(subseqs, subseqs_len);
+ int cur_subseq_len = skip_idx < 0 ? subseqs_len : skip_idx;
+ int subseq_idx = find_insn_subseq(seq, subseqs,
+ seq_len, cur_subseq_len);
+
+ if (subseq_idx < 0)
+ return false;
+ seq += subseq_idx + cur_subseq_len;
+ seq_len -= subseq_idx + cur_subseq_len;
+ subseqs += cur_subseq_len + 1;
+ subseqs_len -= cur_subseq_len + 1;
+ }
+
+ return true;
+}
+
+static void print_insn(struct bpf_insn *buf, int cnt)
+{
+ int i;
+
+ printf(" addr op d s off imm\n");
+ for (i = 0; i < cnt; ++i) {
+ struct bpf_insn *insn = &buf[i];
+
+ if (is_null_insn(insn))
+ break;
+
+ if (is_skip_insn(insn))
+ printf(" ...\n");
+ else
+ printf(" %04x: %02x %1x %x %04hx %08x\n",
+ i, insn->code, insn->dst_reg,
+ insn->src_reg, insn->off, insn->imm);
+ }
+}
+
+static bool check_xlated_program(struct bpf_test *test, int fd_prog)
+{
+ struct bpf_insn *buf;
+ int cnt;
+ bool result = true;
+ bool check_expected = !is_null_insn(test->expected_insns);
+ bool check_unexpected = !is_null_insn(test->unexpected_insns);
+
+ if (!check_expected && !check_unexpected)
+ goto out;
+
+ if (get_xlated_program(fd_prog, &buf, &cnt)) {
+ printf("FAIL: can't get xlated program\n");
+ result = false;
+ goto out;
+ }
+
+ if (check_expected &&
+ !find_all_insn_subseqs(buf, test->expected_insns,
+ cnt, MAX_EXPECTED_INSNS)) {
+ printf("FAIL: can't find expected subsequence of instructions\n");
+ result = false;
+ if (verbose) {
+ printf("Program:\n");
+ print_insn(buf, cnt);
+ printf("Expected subsequence:\n");
+ print_insn(test->expected_insns, MAX_EXPECTED_INSNS);
+ }
+ }
+
+ if (check_unexpected &&
+ find_all_insn_subseqs(buf, test->unexpected_insns,
+ cnt, MAX_UNEXPECTED_INSNS)) {
+ printf("FAIL: found unexpected subsequence of instructions\n");
+ result = false;
+ if (verbose) {
+ printf("Program:\n");
+ print_insn(buf, cnt);
+ printf("Un-expected subsequence:\n");
+ print_insn(test->unexpected_insns, MAX_UNEXPECTED_INSNS);
+ }
+ }
+
+ free(buf);
+ out:
+ return result;
+}
+
static void do_test_single(struct bpf_test *test, bool unpriv,
int *passes, int *errors)
{
- int fd_prog, expected_ret, alignment_prevented_execution;
+ int fd_prog, btf_fd, expected_ret, alignment_prevented_execution;
int prog_len, prog_type = test->prog_type;
struct bpf_insn *prog = test->insns;
LIBBPF_OPTS(bpf_prog_load_opts, opts);
@@ -1141,8 +1455,10 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
__u32 pflags;
int i, err;
+ fd_prog = -1;
for (i = 0; i < MAX_NR_MAPS; i++)
map_fds[i] = -1;
+ btf_fd = -1;
if (!prog_type)
prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
@@ -1175,11 +1491,11 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
opts.expected_attach_type = test->expected_attach_type;
if (verbose)
- opts.log_level = 1;
+ opts.log_level = VERBOSE_LIBBPF_LOG_LEVEL;
else if (expected_ret == VERBOSE_ACCEPT)
opts.log_level = 2;
else
- opts.log_level = 4;
+ opts.log_level = DEFAULT_LIBBPF_LOG_LEVEL;
opts.prog_flags = pflags;
if (prog_type == BPF_PROG_TYPE_TRACING && test->kfunc) {
@@ -1197,6 +1513,19 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
opts.attach_btf_id = attach_btf_id;
}
+ if (test->btf_types[0] != 0) {
+ btf_fd = load_btf_for_test(test);
+ if (btf_fd < 0)
+ goto fail_log;
+ opts.prog_btf_fd = btf_fd;
+ }
+
+ if (test->func_info_cnt != 0) {
+ opts.func_info = test->func_info;
+ opts.func_info_cnt = test->func_info_cnt;
+ opts.func_info_rec_size = sizeof(test->func_info[0]);
+ }
+
opts.log_buf = bpf_vlog;
opts.log_size = sizeof(bpf_vlog);
fd_prog = bpf_prog_load(prog_type, NULL, "GPL", prog, prog_len, &opts);
@@ -1262,6 +1591,9 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
if (verbose)
printf(", verifier log:\n%s", bpf_vlog);
+ if (!check_xlated_program(test, fd_prog))
+ goto fail_log;
+
run_errs = 0;
run_successes = 0;
if (!alignment_prevented_execution && fd_prog >= 0 && test->runs >= 0) {
@@ -1305,6 +1637,7 @@ close_fds:
if (test->fill_insns)
free(test->fill_insns);
close(fd_prog);
+ close(btf_fd);
for (i = 0; i < MAX_NR_MAPS; i++)
close(map_fds[i]);
sched_yield();
diff --git a/tools/testing/selftests/bpf/test_xdp_veth.sh b/tools/testing/selftests/bpf/test_xdp_veth.sh
index 392d28cc4e58..49936c4c8567 100755
--- a/tools/testing/selftests/bpf/test_xdp_veth.sh
+++ b/tools/testing/selftests/bpf/test_xdp_veth.sh
@@ -106,9 +106,9 @@ bpftool prog loadall \
bpftool map update pinned $BPF_DIR/maps/tx_port key 0 0 0 0 value 122 0 0 0
bpftool map update pinned $BPF_DIR/maps/tx_port key 1 0 0 0 value 133 0 0 0
bpftool map update pinned $BPF_DIR/maps/tx_port key 2 0 0 0 value 111 0 0 0
-ip link set dev veth1 xdp pinned $BPF_DIR/progs/redirect_map_0
-ip link set dev veth2 xdp pinned $BPF_DIR/progs/redirect_map_1
-ip link set dev veth3 xdp pinned $BPF_DIR/progs/redirect_map_2
+ip link set dev veth1 xdp pinned $BPF_DIR/progs/xdp_redirect_map_0
+ip link set dev veth2 xdp pinned $BPF_DIR/progs/xdp_redirect_map_1
+ip link set dev veth3 xdp pinned $BPF_DIR/progs/xdp_redirect_map_2
ip -n ${NS1} link set dev veth11 xdp obj xdp_dummy.o sec xdp
ip -n ${NS2} link set dev veth22 xdp obj xdp_tx.o sec xdp
diff --git a/tools/testing/selftests/bpf/test_xdping.sh b/tools/testing/selftests/bpf/test_xdping.sh
index c2f0ddb45531..c3d82e0a7378 100755
--- a/tools/testing/selftests/bpf/test_xdping.sh
+++ b/tools/testing/selftests/bpf/test_xdping.sh
@@ -95,5 +95,9 @@ for server_args in "" "-I veth0 -s -S" ; do
test "$client_args" "$server_args"
done
+# Test drv mode
+test "-I veth1 -N" "-I veth0 -s -N"
+test "-I veth1 -N -c 10" "-I veth0 -s -N"
+
echo "OK. All tests passed"
exit 0
diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh
index 567500299231..096a957594cd 100755
--- a/tools/testing/selftests/bpf/test_xsk.sh
+++ b/tools/testing/selftests/bpf/test_xsk.sh
@@ -47,7 +47,7 @@
# conflict with any existing interface
# * tests the veth and xsk layers of the topology
#
-# See the source xdpxceiver.c for information on each test
+# See the source xskxceiver.c for information on each test
#
# Kernel configuration:
# ---------------------
@@ -160,14 +160,14 @@ statusList=()
TEST_NAME="XSK_SELFTESTS_SOFTIRQ"
-execxdpxceiver
+exec_xskxceiver
cleanup_exit ${VETH0} ${VETH1} ${NS1}
TEST_NAME="XSK_SELFTESTS_BUSY_POLL"
busy_poll=1
setup_vethPairs
-execxdpxceiver
+exec_xskxceiver
## END TESTS
diff --git a/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c b/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c
new file mode 100644
index 000000000000..a535d41dc20d
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c
@@ -0,0 +1,264 @@
+#define BTF_TYPES \
+ .btf_strings = "\0int\0i\0ctx\0callback\0main\0", \
+ .btf_types = { \
+ /* 1: int */ BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), \
+ /* 2: int* */ BTF_PTR_ENC(1), \
+ /* 3: void* */ BTF_PTR_ENC(0), \
+ /* 4: int __(void*) */ BTF_FUNC_PROTO_ENC(1, 1), \
+ BTF_FUNC_PROTO_ARG_ENC(7, 3), \
+ /* 5: int __(int, int*) */ BTF_FUNC_PROTO_ENC(1, 2), \
+ BTF_FUNC_PROTO_ARG_ENC(5, 1), \
+ BTF_FUNC_PROTO_ARG_ENC(7, 2), \
+ /* 6: main */ BTF_FUNC_ENC(20, 4), \
+ /* 7: callback */ BTF_FUNC_ENC(11, 5), \
+ BTF_END_RAW \
+ }
+
+#define MAIN_TYPE 6
+#define CALLBACK_TYPE 7
+
+/* can't use BPF_CALL_REL, jit_subprogs adjusts IMM & OFF
+ * fields for pseudo calls
+ */
+#define PSEUDO_CALL_INSN() \
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_CALL, \
+ INSN_OFF_MASK, INSN_IMM_MASK)
+
+/* can't use BPF_FUNC_loop constant,
+ * do_mix_fixups adjusts the IMM field
+ */
+#define HELPER_CALL_INSN() \
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, INSN_OFF_MASK, INSN_IMM_MASK)
+
+{
+ "inline simple bpf_loop call",
+ .insns = {
+ /* main */
+ /* force verifier state branching to verify logic on first and
+ * subsequent bpf_loop insn processing steps
+ */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 777, 2),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 2),
+
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 6),
+ BPF_RAW_INSN(0, 0, 0, 0, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ /* callback */
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .expected_insns = { PSEUDO_CALL_INSN() },
+ .unexpected_insns = { HELPER_CALL_INSN() },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .result = ACCEPT,
+ .runs = 0,
+ .func_info = { { 0, MAIN_TYPE }, { 12, CALLBACK_TYPE } },
+ .func_info_cnt = 2,
+ BTF_TYPES
+},
+{
+ "don't inline bpf_loop call, flags non-zero",
+ .insns = {
+ /* main */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_7, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 9),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1),
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 7),
+ BPF_RAW_INSN(0, 0, 0, 0, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, -10),
+ /* callback */
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .expected_insns = { HELPER_CALL_INSN() },
+ .unexpected_insns = { PSEUDO_CALL_INSN() },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .result = ACCEPT,
+ .runs = 0,
+ .func_info = { { 0, MAIN_TYPE }, { 16, CALLBACK_TYPE } },
+ .func_info_cnt = 2,
+ BTF_TYPES
+},
+{
+ "don't inline bpf_loop call, callback non-constant",
+ .insns = {
+ /* main */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 777, 4), /* pick a random callback */
+
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1),
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 10),
+ BPF_RAW_INSN(0, 0, 0, 0, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 3),
+
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1),
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 8),
+ BPF_RAW_INSN(0, 0, 0, 0, 0),
+
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ /* callback */
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ /* callback #2 */
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .expected_insns = { HELPER_CALL_INSN() },
+ .unexpected_insns = { PSEUDO_CALL_INSN() },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .result = ACCEPT,
+ .runs = 0,
+ .func_info = {
+ { 0, MAIN_TYPE },
+ { 14, CALLBACK_TYPE },
+ { 16, CALLBACK_TYPE }
+ },
+ .func_info_cnt = 3,
+ BTF_TYPES
+},
+{
+ "bpf_loop_inline and a dead func",
+ .insns = {
+ /* main */
+
+ /* A reference to callback #1 to make verifier count it as a func.
+ * This reference is overwritten below and callback #1 is dead.
+ */
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 9),
+ BPF_RAW_INSN(0, 0, 0, 0, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1),
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 8),
+ BPF_RAW_INSN(0, 0, 0, 0, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ /* callback */
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ /* callback #2 */
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .expected_insns = { PSEUDO_CALL_INSN() },
+ .unexpected_insns = { HELPER_CALL_INSN() },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .result = ACCEPT,
+ .runs = 0,
+ .func_info = {
+ { 0, MAIN_TYPE },
+ { 10, CALLBACK_TYPE },
+ { 12, CALLBACK_TYPE }
+ },
+ .func_info_cnt = 3,
+ BTF_TYPES
+},
+{
+ "bpf_loop_inline stack locations for loop vars",
+ .insns = {
+ /* main */
+ BPF_ST_MEM(BPF_W, BPF_REG_10, -12, 0x77),
+ /* bpf_loop call #1 */
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1),
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 22),
+ BPF_RAW_INSN(0, 0, 0, 0, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop),
+ /* bpf_loop call #2 */
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 2),
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 16),
+ BPF_RAW_INSN(0, 0, 0, 0, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop),
+ /* call func and exit */
+ BPF_CALL_REL(2),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ /* func */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -32, 0x55),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 2),
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 6),
+ BPF_RAW_INSN(0, 0, 0, 0, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ /* callback */
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .expected_insns = {
+ BPF_ST_MEM(BPF_W, BPF_REG_10, -12, 0x77),
+ SKIP_INSNS(),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -40),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, -32),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, -24),
+ SKIP_INSNS(),
+ /* offsets are the same as in the first call */
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -40),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, -32),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, -24),
+ SKIP_INSNS(),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -32, 0x55),
+ SKIP_INSNS(),
+ /* offsets differ from main because of different offset
+ * in BPF_ST_MEM instruction
+ */
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -56),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, -48),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, -40),
+ },
+ .unexpected_insns = { HELPER_CALL_INSN() },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .result = ACCEPT,
+ .func_info = {
+ { 0, MAIN_TYPE },
+ { 16, MAIN_TYPE },
+ { 25, CALLBACK_TYPE },
+ },
+ .func_info_cnt = 3,
+ BTF_TYPES
+},
+{
+ "inline bpf_loop call in a big program",
+ .insns = {},
+ .fill_helper = bpf_fill_big_prog_with_loop_1,
+ .expected_insns = { PSEUDO_CALL_INSN() },
+ .unexpected_insns = { HELPER_CALL_INSN() },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .func_info = { { 0, MAIN_TYPE }, { 16, CALLBACK_TYPE } },
+ .func_info_cnt = 2,
+ BTF_TYPES
+},
+
+#undef HELPER_CALL_INSN
+#undef PSEUDO_CALL_INSN
+#undef CALLBACK_TYPE
+#undef MAIN_TYPE
+#undef BTF_TYPES
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index 743ed34c1238..3fb4f69b1962 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -219,6 +219,59 @@
.errstr = "variable ptr_ access var_off=(0x0; 0x7) disallowed",
},
{
+ "calls: invalid kfunc call: referenced arg needs refcounted PTR_TO_BTF_ID",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 16),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_kfunc_btf_id = {
+ { "bpf_kfunc_call_test_acquire", 3 },
+ { "bpf_kfunc_call_test_ref", 8 },
+ { "bpf_kfunc_call_test_ref", 10 },
+ },
+ .result_unpriv = REJECT,
+ .result = REJECT,
+ .errstr = "R1 must be referenced",
+},
+{
+ "calls: valid kfunc call: referenced arg needs refcounted PTR_TO_BTF_ID",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_kfunc_btf_id = {
+ { "bpf_kfunc_call_test_acquire", 3 },
+ { "bpf_kfunc_call_test_ref", 8 },
+ { "bpf_kfunc_call_test_release", 10 },
+ },
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+},
+{
"calls: basic sanity",
.insns = {
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
diff --git a/tools/testing/selftests/bpf/verifier/jmp32.c b/tools/testing/selftests/bpf/verifier/jmp32.c
index 6ddc418fdfaf..1a27a6210554 100644
--- a/tools/testing/selftests/bpf/verifier/jmp32.c
+++ b/tools/testing/selftests/bpf/verifier/jmp32.c
@@ -864,3 +864,24 @@
.result = ACCEPT,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
+{
+ "jeq32/jne32: bounds checking",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_6, 563),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
+ BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
+ BPF_ALU32_REG(BPF_OR, BPF_REG_2, BPF_REG_6),
+ BPF_JMP32_IMM(BPF_JNE, BPF_REG_2, 8, 5),
+ BPF_JMP_IMM(BPF_JSGE, BPF_REG_2, 500, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_4),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 1,
+},
diff --git a/tools/testing/selftests/bpf/verifier/jump.c b/tools/testing/selftests/bpf/verifier/jump.c
index 6f951d1ff0a4..497fe17d2eaf 100644
--- a/tools/testing/selftests/bpf/verifier/jump.c
+++ b/tools/testing/selftests/bpf/verifier/jump.c
@@ -373,3 +373,25 @@
.result = ACCEPT,
.retval = 3,
},
+{
+ "jump & dead code elimination",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_ALU64_IMM(BPF_NEG, BPF_REG_3, 0),
+ BPF_ALU64_IMM(BPF_NEG, BPF_REG_3, 0),
+ BPF_ALU64_IMM(BPF_OR, BPF_REG_3, 32767),
+ BPF_JMP_IMM(BPF_JSGE, BPF_REG_3, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 0x8000, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -32767),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_JMP_IMM(BPF_JLE, BPF_REG_3, 0, 1),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_4),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 2,
+},
diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh
index e0bb04a97e10..b86ae4a2e5c5 100755
--- a/tools/testing/selftests/bpf/vmtest.sh
+++ b/tools/testing/selftests/bpf/vmtest.sh
@@ -30,8 +30,7 @@ DEFAULT_COMMAND="./test_progs"
MOUNT_DIR="mnt"
ROOTFS_IMAGE="root.img"
OUTPUT_DIR="$HOME/.bpf_selftests"
-KCONFIG_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/config-latest.${ARCH}"
-KCONFIG_API_URL="https://api.github.com/repos/libbpf/libbpf/contents/travis-ci/vmtest/configs/config-latest.${ARCH}"
+KCONFIG_REL_PATHS=("tools/testing/selftests/bpf/config" "tools/testing/selftests/bpf/config.${ARCH}")
INDEX_URL="https://raw.githubusercontent.com/libbpf/ci/master/INDEX"
NUM_COMPILE_JOBS="$(nproc)"
LOG_FILE_BASE="$(date +"bpf_selftests.%Y-%m-%d_%H-%M-%S")"
@@ -269,26 +268,42 @@ is_rel_path()
[[ ${path:0:1} != "/" ]]
}
+do_update_kconfig()
+{
+ local kernel_checkout="$1"
+ local kconfig_file="$2"
+
+ rm -f "$kconfig_file" 2> /dev/null
+
+ for config in "${KCONFIG_REL_PATHS[@]}"; do
+ local kconfig_src="${kernel_checkout}/${config}"
+ cat "$kconfig_src" >> "$kconfig_file"
+ done
+}
+
update_kconfig()
{
- local kconfig_file="$1"
- local update_command="curl -sLf ${KCONFIG_URL} -o ${kconfig_file}"
- # Github does not return the "last-modified" header when retrieving the
- # raw contents of the file. Use the API call to get the last-modified
- # time of the kernel config and only update the config if it has been
- # updated after the previously cached config was created. This avoids
- # unnecessarily compiling the kernel and selftests.
- if [[ -f "${kconfig_file}" ]]; then
- local last_modified_date="$(curl -sL -D - "${KCONFIG_API_URL}" -o /dev/null | \
- grep "last-modified" | awk -F ': ' '{print $2}')"
- local remote_modified_timestamp="$(date -d "${last_modified_date}" +"%s")"
- local local_creation_timestamp="$(stat -c %Y "${kconfig_file}")"
+ local kernel_checkout="$1"
+ local kconfig_file="$2"
- if [[ "${remote_modified_timestamp}" -gt "${local_creation_timestamp}" ]]; then
- ${update_command}
- fi
+ if [[ -f "${kconfig_file}" ]]; then
+ local local_modified="$(stat -c %Y "${kconfig_file}")"
+
+ for config in "${KCONFIG_REL_PATHS[@]}"; do
+ local kconfig_src="${kernel_checkout}/${config}"
+ local src_modified="$(stat -c %Y "${kconfig_src}")"
+ # Only update the config if it has been updated after the
+ # previously cached config was created. This avoids
+ # unnecessarily compiling the kernel and selftests.
+ if [[ "${src_modified}" -gt "${local_modified}" ]]; then
+ do_update_kconfig "$kernel_checkout" "$kconfig_file"
+ # Once we have found one outdated configuration
+ # there is no need to check other ones.
+ break
+ fi
+ done
else
- ${update_command}
+ do_update_kconfig "$kernel_checkout" "$kconfig_file"
fi
}
@@ -372,7 +387,7 @@ main()
mkdir -p "${OUTPUT_DIR}"
mkdir -p "${mount_dir}"
- update_kconfig "${kconfig_file}"
+ update_kconfig "${kernel_checkout}" "${kconfig_file}"
recompile_kernel "${kernel_checkout}" "${make_command}"
diff --git a/tools/testing/selftests/bpf/xdp_synproxy.c b/tools/testing/selftests/bpf/xdp_synproxy.c
new file mode 100644
index 000000000000..d874ddfb39c4
--- /dev/null
+++ b/tools/testing/selftests/bpf/xdp_synproxy.c
@@ -0,0 +1,466 @@
+// SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include <stdnoreturn.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <getopt.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include <net/if.h>
+#include <linux/if_link.h>
+#include <linux/limits.h>
+
+static unsigned int ifindex;
+static __u32 attached_prog_id;
+static bool attached_tc;
+
+static void noreturn cleanup(int sig)
+{
+ LIBBPF_OPTS(bpf_xdp_attach_opts, opts);
+ int prog_fd;
+ int err;
+
+ if (attached_prog_id == 0)
+ exit(0);
+
+ if (attached_tc) {
+ LIBBPF_OPTS(bpf_tc_hook, hook,
+ .ifindex = ifindex,
+ .attach_point = BPF_TC_INGRESS);
+
+ err = bpf_tc_hook_destroy(&hook);
+ if (err < 0) {
+ fprintf(stderr, "Error: bpf_tc_hook_destroy: %s\n", strerror(-err));
+ fprintf(stderr, "Failed to destroy the TC hook\n");
+ exit(1);
+ }
+ exit(0);
+ }
+
+ prog_fd = bpf_prog_get_fd_by_id(attached_prog_id);
+ if (prog_fd < 0) {
+ fprintf(stderr, "Error: bpf_prog_get_fd_by_id: %s\n", strerror(-prog_fd));
+ err = bpf_xdp_attach(ifindex, -1, 0, NULL);
+ if (err < 0) {
+ fprintf(stderr, "Error: bpf_set_link_xdp_fd: %s\n", strerror(-err));
+ fprintf(stderr, "Failed to detach XDP program\n");
+ exit(1);
+ }
+ } else {
+ opts.old_prog_fd = prog_fd;
+ err = bpf_xdp_attach(ifindex, -1, XDP_FLAGS_REPLACE, &opts);
+ close(prog_fd);
+ if (err < 0) {
+ fprintf(stderr, "Error: bpf_set_link_xdp_fd_opts: %s\n", strerror(-err));
+ /* Not an error if already replaced by someone else. */
+ if (err != -EEXIST) {
+ fprintf(stderr, "Failed to detach XDP program\n");
+ exit(1);
+ }
+ }
+ }
+ exit(0);
+}
+
+static noreturn void usage(const char *progname)
+{
+ fprintf(stderr, "Usage: %s [--iface <iface>|--prog <prog_id>] [--mss4 <mss ipv4> --mss6 <mss ipv6> --wscale <wscale> --ttl <ttl>] [--ports <port1>,<port2>,...] [--single] [--tc]\n",
+ progname);
+ exit(1);
+}
+
+static unsigned long parse_arg_ul(const char *progname, const char *arg, unsigned long limit)
+{
+ unsigned long res;
+ char *endptr;
+
+ errno = 0;
+ res = strtoul(arg, &endptr, 10);
+ if (errno != 0 || *endptr != '\0' || arg[0] == '\0' || res > limit)
+ usage(progname);
+
+ return res;
+}
+
+static void parse_options(int argc, char *argv[], unsigned int *ifindex, __u32 *prog_id,
+ __u64 *tcpipopts, char **ports, bool *single, bool *tc)
+{
+ static struct option long_options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "iface", required_argument, NULL, 'i' },
+ { "prog", required_argument, NULL, 'x' },
+ { "mss4", required_argument, NULL, 4 },
+ { "mss6", required_argument, NULL, 6 },
+ { "wscale", required_argument, NULL, 'w' },
+ { "ttl", required_argument, NULL, 't' },
+ { "ports", required_argument, NULL, 'p' },
+ { "single", no_argument, NULL, 's' },
+ { "tc", no_argument, NULL, 'c' },
+ { NULL, 0, NULL, 0 },
+ };
+ unsigned long mss4, mss6, wscale, ttl;
+ unsigned int tcpipopts_mask = 0;
+
+ if (argc < 2)
+ usage(argv[0]);
+
+ *ifindex = 0;
+ *prog_id = 0;
+ *tcpipopts = 0;
+ *ports = NULL;
+ *single = false;
+
+ while (true) {
+ int opt;
+
+ opt = getopt_long(argc, argv, "", long_options, NULL);
+ if (opt == -1)
+ break;
+
+ switch (opt) {
+ case 'h':
+ usage(argv[0]);
+ break;
+ case 'i':
+ *ifindex = if_nametoindex(optarg);
+ if (*ifindex == 0)
+ usage(argv[0]);
+ break;
+ case 'x':
+ *prog_id = parse_arg_ul(argv[0], optarg, UINT32_MAX);
+ if (*prog_id == 0)
+ usage(argv[0]);
+ break;
+ case 4:
+ mss4 = parse_arg_ul(argv[0], optarg, UINT16_MAX);
+ tcpipopts_mask |= 1 << 0;
+ break;
+ case 6:
+ mss6 = parse_arg_ul(argv[0], optarg, UINT16_MAX);
+ tcpipopts_mask |= 1 << 1;
+ break;
+ case 'w':
+ wscale = parse_arg_ul(argv[0], optarg, 14);
+ tcpipopts_mask |= 1 << 2;
+ break;
+ case 't':
+ ttl = parse_arg_ul(argv[0], optarg, UINT8_MAX);
+ tcpipopts_mask |= 1 << 3;
+ break;
+ case 'p':
+ *ports = optarg;
+ break;
+ case 's':
+ *single = true;
+ break;
+ case 'c':
+ *tc = true;
+ break;
+ default:
+ usage(argv[0]);
+ }
+ }
+ if (optind < argc)
+ usage(argv[0]);
+
+ if (tcpipopts_mask == 0xf) {
+ if (mss4 == 0 || mss6 == 0 || wscale == 0 || ttl == 0)
+ usage(argv[0]);
+ *tcpipopts = (mss6 << 32) | (ttl << 24) | (wscale << 16) | mss4;
+ } else if (tcpipopts_mask != 0) {
+ usage(argv[0]);
+ }
+
+ if (*ifindex != 0 && *prog_id != 0)
+ usage(argv[0]);
+ if (*ifindex == 0 && *prog_id == 0)
+ usage(argv[0]);
+}
+
+static int syncookie_attach(const char *argv0, unsigned int ifindex, bool tc)
+{
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
+ char xdp_filename[PATH_MAX];
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ int prog_fd;
+ int err;
+
+ snprintf(xdp_filename, sizeof(xdp_filename), "%s_kern.o", argv0);
+ obj = bpf_object__open_file(xdp_filename, NULL);
+ err = libbpf_get_error(obj);
+ if (err < 0) {
+ fprintf(stderr, "Error: bpf_object__open_file: %s\n", strerror(-err));
+ return err;
+ }
+
+ err = bpf_object__load(obj);
+ if (err < 0) {
+ fprintf(stderr, "Error: bpf_object__open_file: %s\n", strerror(-err));
+ return err;
+ }
+
+ prog = bpf_object__find_program_by_name(obj, tc ? "syncookie_tc" : "syncookie_xdp");
+ if (!prog) {
+ fprintf(stderr, "Error: bpf_object__find_program_by_name: program was not found\n");
+ return -ENOENT;
+ }
+
+ prog_fd = bpf_program__fd(prog);
+
+ err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+ if (err < 0) {
+ fprintf(stderr, "Error: bpf_obj_get_info_by_fd: %s\n", strerror(-err));
+ goto out;
+ }
+ attached_tc = tc;
+ attached_prog_id = info.id;
+ signal(SIGINT, cleanup);
+ signal(SIGTERM, cleanup);
+ if (tc) {
+ LIBBPF_OPTS(bpf_tc_hook, hook,
+ .ifindex = ifindex,
+ .attach_point = BPF_TC_INGRESS);
+ LIBBPF_OPTS(bpf_tc_opts, opts,
+ .handle = 1,
+ .priority = 1,
+ .prog_fd = prog_fd);
+
+ err = bpf_tc_hook_create(&hook);
+ if (err < 0) {
+ fprintf(stderr, "Error: bpf_tc_hook_create: %s\n",
+ strerror(-err));
+ goto fail;
+ }
+ err = bpf_tc_attach(&hook, &opts);
+ if (err < 0) {
+ fprintf(stderr, "Error: bpf_tc_attach: %s\n",
+ strerror(-err));
+ goto fail;
+ }
+
+ } else {
+ err = bpf_xdp_attach(ifindex, prog_fd,
+ XDP_FLAGS_UPDATE_IF_NOEXIST, NULL);
+ if (err < 0) {
+ fprintf(stderr, "Error: bpf_set_link_xdp_fd: %s\n",
+ strerror(-err));
+ goto fail;
+ }
+ }
+ err = 0;
+out:
+ bpf_object__close(obj);
+ return err;
+fail:
+ signal(SIGINT, SIG_DFL);
+ signal(SIGTERM, SIG_DFL);
+ attached_prog_id = 0;
+ goto out;
+}
+
+static int syncookie_open_bpf_maps(__u32 prog_id, int *values_map_fd, int *ports_map_fd)
+{
+ struct bpf_prog_info prog_info;
+ __u32 map_ids[8];
+ __u32 info_len;
+ int prog_fd;
+ int err;
+ int i;
+
+ *values_map_fd = -1;
+ *ports_map_fd = -1;
+
+ prog_fd = bpf_prog_get_fd_by_id(prog_id);
+ if (prog_fd < 0) {
+ fprintf(stderr, "Error: bpf_prog_get_fd_by_id: %s\n", strerror(-prog_fd));
+ return prog_fd;
+ }
+
+ prog_info = (struct bpf_prog_info) {
+ .nr_map_ids = 8,
+ .map_ids = (__u64)map_ids,
+ };
+ info_len = sizeof(prog_info);
+
+ err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &info_len);
+ if (err != 0) {
+ fprintf(stderr, "Error: bpf_obj_get_info_by_fd: %s\n", strerror(-err));
+ goto out;
+ }
+
+ if (prog_info.nr_map_ids < 2) {
+ fprintf(stderr, "Error: Found %u BPF maps, expected at least 2\n",
+ prog_info.nr_map_ids);
+ err = -ENOENT;
+ goto out;
+ }
+
+ for (i = 0; i < prog_info.nr_map_ids; i++) {
+ struct bpf_map_info map_info = {};
+ int map_fd;
+
+ err = bpf_map_get_fd_by_id(map_ids[i]);
+ if (err < 0) {
+ fprintf(stderr, "Error: bpf_map_get_fd_by_id: %s\n", strerror(-err));
+ goto err_close_map_fds;
+ }
+ map_fd = err;
+
+ info_len = sizeof(map_info);
+ err = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len);
+ if (err != 0) {
+ fprintf(stderr, "Error: bpf_obj_get_info_by_fd: %s\n", strerror(-err));
+ close(map_fd);
+ goto err_close_map_fds;
+ }
+ if (strcmp(map_info.name, "values") == 0) {
+ *values_map_fd = map_fd;
+ continue;
+ }
+ if (strcmp(map_info.name, "allowed_ports") == 0) {
+ *ports_map_fd = map_fd;
+ continue;
+ }
+ close(map_fd);
+ }
+
+ if (*values_map_fd != -1 && *ports_map_fd != -1) {
+ err = 0;
+ goto out;
+ }
+
+ err = -ENOENT;
+
+err_close_map_fds:
+ if (*values_map_fd != -1)
+ close(*values_map_fd);
+ if (*ports_map_fd != -1)
+ close(*ports_map_fd);
+ *values_map_fd = -1;
+ *ports_map_fd = -1;
+
+out:
+ close(prog_fd);
+ return err;
+}
+
+int main(int argc, char *argv[])
+{
+ int values_map_fd, ports_map_fd;
+ __u64 tcpipopts;
+ bool firstiter;
+ __u64 prevcnt;
+ __u32 prog_id;
+ char *ports;
+ bool single;
+ int err = 0;
+ bool tc;
+
+ parse_options(argc, argv, &ifindex, &prog_id, &tcpipopts, &ports,
+ &single, &tc);
+
+ if (prog_id == 0) {
+ if (!tc) {
+ err = bpf_xdp_query_id(ifindex, 0, &prog_id);
+ if (err < 0) {
+ fprintf(stderr, "Error: bpf_get_link_xdp_id: %s\n",
+ strerror(-err));
+ goto out;
+ }
+ }
+ if (prog_id == 0) {
+ err = syncookie_attach(argv[0], ifindex, tc);
+ if (err < 0)
+ goto out;
+ prog_id = attached_prog_id;
+ }
+ }
+
+ err = syncookie_open_bpf_maps(prog_id, &values_map_fd, &ports_map_fd);
+ if (err < 0)
+ goto out;
+
+ if (ports) {
+ __u16 port_last = 0;
+ __u32 port_idx = 0;
+ char *p = ports;
+
+ fprintf(stderr, "Replacing allowed ports\n");
+
+ while (p && *p != '\0') {
+ char *token = strsep(&p, ",");
+ __u16 port;
+
+ port = parse_arg_ul(argv[0], token, UINT16_MAX);
+ err = bpf_map_update_elem(ports_map_fd, &port_idx, &port, BPF_ANY);
+ if (err != 0) {
+ fprintf(stderr, "Error: bpf_map_update_elem: %s\n", strerror(-err));
+ fprintf(stderr, "Failed to add port %u (index %u)\n",
+ port, port_idx);
+ goto out_close_maps;
+ }
+ fprintf(stderr, "Added port %u\n", port);
+ port_idx++;
+ }
+ err = bpf_map_update_elem(ports_map_fd, &port_idx, &port_last, BPF_ANY);
+ if (err != 0) {
+ fprintf(stderr, "Error: bpf_map_update_elem: %s\n", strerror(-err));
+ fprintf(stderr, "Failed to add the terminator value 0 (index %u)\n",
+ port_idx);
+ goto out_close_maps;
+ }
+ }
+
+ if (tcpipopts) {
+ __u32 key = 0;
+
+ fprintf(stderr, "Replacing TCP/IP options\n");
+
+ err = bpf_map_update_elem(values_map_fd, &key, &tcpipopts, BPF_ANY);
+ if (err != 0) {
+ fprintf(stderr, "Error: bpf_map_update_elem: %s\n", strerror(-err));
+ goto out_close_maps;
+ }
+ }
+
+ if ((ports || tcpipopts) && attached_prog_id == 0 && !single)
+ goto out_close_maps;
+
+ prevcnt = 0;
+ firstiter = true;
+ while (true) {
+ __u32 key = 1;
+ __u64 value;
+
+ err = bpf_map_lookup_elem(values_map_fd, &key, &value);
+ if (err != 0) {
+ fprintf(stderr, "Error: bpf_map_lookup_elem: %s\n", strerror(-err));
+ goto out_close_maps;
+ }
+ if (firstiter) {
+ prevcnt = value;
+ firstiter = false;
+ }
+ if (single) {
+ printf("Total SYNACKs generated: %llu\n", value);
+ break;
+ }
+ printf("SYNACKs generated: %llu (total %llu)\n", value - prevcnt, value);
+ prevcnt = value;
+ sleep(1);
+ }
+
+out_close_maps:
+ close(values_map_fd);
+ close(ports_map_fd);
+out:
+ return err == 0 ? 0 : 1;
+}
diff --git a/tools/lib/bpf/xsk.c b/tools/testing/selftests/bpf/xsk.c
index af136f73b09d..f2721a4ae7c5 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/testing/selftests/bpf/xsk.c
@@ -30,16 +30,10 @@
#include <sys/types.h>
#include <linux/if_link.h>
-#include "bpf.h"
-#include "libbpf.h"
-#include "libbpf_internal.h"
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
#include "xsk.h"
-/* entire xsk.h and xsk.c is going away in libbpf 1.0, so ignore all internal
- * uses of deprecated APIs
- */
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-
#ifndef SOL_XDP
#define SOL_XDP 283
#endif
@@ -52,6 +46,8 @@
#define PF_XDP AF_XDP
#endif
+#define pr_warn(fmt, ...) fprintf(stderr, fmt, ##__VA_ARGS__)
+
enum xsk_prog {
XSK_PROG_FALLBACK,
XSK_PROG_REDIRECT_FLAGS,
@@ -286,11 +282,10 @@ out_mmap:
return err;
}
-DEFAULT_VERSION(xsk_umem__create_v0_0_4, xsk_umem__create, LIBBPF_0.0.4)
-int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,
- __u64 size, struct xsk_ring_prod *fill,
- struct xsk_ring_cons *comp,
- const struct xsk_umem_config *usr_config)
+int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area,
+ __u64 size, struct xsk_ring_prod *fill,
+ struct xsk_ring_cons *comp,
+ const struct xsk_umem_config *usr_config)
{
struct xdp_umem_reg mr;
struct xsk_umem *umem;
@@ -351,25 +346,9 @@ struct xsk_umem_config_v1 {
__u32 frame_headroom;
};
-COMPAT_VERSION(xsk_umem__create_v0_0_2, xsk_umem__create, LIBBPF_0.0.2)
-int xsk_umem__create_v0_0_2(struct xsk_umem **umem_ptr, void *umem_area,
- __u64 size, struct xsk_ring_prod *fill,
- struct xsk_ring_cons *comp,
- const struct xsk_umem_config *usr_config)
-{
- struct xsk_umem_config config;
-
- memcpy(&config, usr_config, sizeof(struct xsk_umem_config_v1));
- config.flags = 0;
-
- return xsk_umem__create_v0_0_4(umem_ptr, umem_area, size, fill, comp,
- &config);
-}
-
static enum xsk_prog get_xsk_prog(void)
{
enum xsk_prog detected = XSK_PROG_FALLBACK;
- __u32 size_out, retval, duration;
char data_in = 0, data_out;
struct bpf_insn insns[] = {
BPF_LD_MAP_FD(BPF_REG_1, 0),
@@ -378,6 +357,12 @@ static enum xsk_prog get_xsk_prog(void)
BPF_EMIT_CALL(BPF_FUNC_redirect_map),
BPF_EXIT_INSN(),
};
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &data_in,
+ .data_size_in = 1,
+ .data_out = &data_out,
+ );
+
int prog_fd, map_fd, ret, insn_cnt = ARRAY_SIZE(insns);
map_fd = bpf_map_create(BPF_MAP_TYPE_XSKMAP, NULL, sizeof(int), sizeof(int), 1, NULL);
@@ -392,8 +377,8 @@ static enum xsk_prog get_xsk_prog(void)
return detected;
}
- ret = bpf_prog_test_run(prog_fd, 0, &data_in, 1, &data_out, &size_out, &retval, &duration);
- if (!ret && retval == XDP_PASS)
+ ret = bpf_prog_test_run_opts(prog_fd, &opts);
+ if (!ret && opts.retval == XDP_PASS)
detected = XSK_PROG_REDIRECT_FLAGS;
close(prog_fd);
close(map_fd);
@@ -510,7 +495,7 @@ static int xsk_create_bpf_link(struct xsk_socket *xsk)
int link_fd;
int err;
- err = bpf_get_link_xdp_id(ctx->ifindex, &prog_id, xsk->config.xdp_flags);
+ err = bpf_xdp_query_id(ctx->ifindex, xsk->config.xdp_flags, &prog_id);
if (err) {
pr_warn("getting XDP prog id failed\n");
return err;
@@ -536,6 +521,25 @@ static int xsk_create_bpf_link(struct xsk_socket *xsk)
return 0;
}
+/* Copy up to sz - 1 bytes from zero-terminated src string and ensure that dst
+ * is zero-terminated string no matter what (unless sz == 0, in which case
+ * it's a no-op). It's conceptually close to FreeBSD's strlcpy(), but differs
+ * in what is returned. Given this is internal helper, it's trivial to extend
+ * this, when necessary. Use this instead of strncpy inside libbpf source code.
+ */
+static inline void libbpf_strlcpy(char *dst, const char *src, size_t sz)
+{
+ size_t i;
+
+ if (sz == 0)
+ return;
+
+ sz--;
+ for (i = 0; i < sz && src[i]; i++)
+ dst[i] = src[i];
+ dst[i] = '\0';
+}
+
static int xsk_get_max_queues(struct xsk_socket *xsk)
{
struct ethtool_channels channels = { .cmd = ETHTOOL_GCHANNELS };
@@ -792,8 +796,8 @@ static int xsk_init_xdp_res(struct xsk_socket *xsk,
if (ctx->has_bpf_link)
err = xsk_create_bpf_link(xsk);
else
- err = bpf_set_link_xdp_fd(xsk->ctx->ifindex, ctx->prog_fd,
- xsk->config.xdp_flags);
+ err = bpf_xdp_attach(xsk->ctx->ifindex, ctx->prog_fd,
+ xsk->config.xdp_flags, NULL);
if (err)
goto err_attach_xdp_prog;
@@ -811,7 +815,7 @@ err_set_bpf_maps:
if (ctx->has_bpf_link)
close(ctx->link_fd);
else
- bpf_set_link_xdp_fd(ctx->ifindex, -1, 0);
+ bpf_xdp_detach(ctx->ifindex, 0, NULL);
err_attach_xdp_prog:
close(ctx->prog_fd);
err_load_xdp_prog:
@@ -862,7 +866,7 @@ static int __xsk_setup_xdp_prog(struct xsk_socket *_xdp, int *xsks_map_fd)
if (ctx->has_bpf_link)
err = xsk_link_lookup(ctx->ifindex, &prog_id, &ctx->link_fd);
else
- err = bpf_get_link_xdp_id(ctx->ifindex, &prog_id, xsk->config.xdp_flags);
+ err = bpf_xdp_query_id(ctx->ifindex, xsk->config.xdp_flags, &prog_id);
if (err)
return err;
@@ -876,6 +880,11 @@ static int __xsk_setup_xdp_prog(struct xsk_socket *_xdp, int *xsks_map_fd)
return err;
}
+int xsk_setup_xdp_prog_xsk(struct xsk_socket *xsk, int *xsks_map_fd)
+{
+ return __xsk_setup_xdp_prog(xsk, xsks_map_fd);
+}
+
static struct xsk_ctx *xsk_get_ctx(struct xsk_umem *umem, int ifindex,
__u32 queue_id)
{
@@ -954,6 +963,7 @@ static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk,
ctx->fill = fill;
ctx->comp = comp;
list_add(&ctx->list, &umem->ctx_list);
+ ctx->has_bpf_link = xsk_probe_bpf_link();
return ctx;
}
@@ -1055,7 +1065,6 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
}
}
xsk->ctx = ctx;
- xsk->ctx->has_bpf_link = xsk_probe_bpf_link();
if (rx && !rx_setup_done) {
err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING,
@@ -1147,8 +1156,6 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
goto out_mmap_tx;
}
- ctx->prog_fd = -1;
-
if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
err = __xsk_setup_xdp_prog(xsk, NULL);
if (err)
@@ -1229,7 +1236,10 @@ void xsk_socket__delete(struct xsk_socket *xsk)
ctx = xsk->ctx;
umem = ctx->umem;
- if (ctx->prog_fd != -1) {
+
+ xsk_put_ctx(ctx, true);
+
+ if (!ctx->refcount) {
xsk_delete_bpf_maps(xsk);
close(ctx->prog_fd);
if (ctx->has_bpf_link)
@@ -1248,8 +1258,6 @@ void xsk_socket__delete(struct xsk_socket *xsk)
}
}
- xsk_put_ctx(ctx, true);
-
umem->refcount--;
/* Do not close an fd that also has an associated umem connected
* to it.
diff --git a/tools/lib/bpf/xsk.h b/tools/testing/selftests/bpf/xsk.h
index 64e9c57fd792..997723b0bfb2 100644
--- a/tools/lib/bpf/xsk.h
+++ b/tools/testing/selftests/bpf/xsk.h
@@ -9,15 +9,15 @@
* Author(s): Magnus Karlsson <magnus.karlsson@intel.com>
*/
-#ifndef __LIBBPF_XSK_H
-#define __LIBBPF_XSK_H
+#ifndef __XSK_H
+#define __XSK_H
#include <stdio.h>
#include <stdint.h>
#include <stdbool.h>
#include <linux/if_xdp.h>
-#include "libbpf.h"
+#include <bpf/libbpf.h>
#ifdef __cplusplus
extern "C" {
@@ -251,9 +251,7 @@ static inline __u64 xsk_umem__add_offset_to_addr(__u64 addr)
return xsk_umem__extract_addr(addr) + xsk_umem__extract_offset(addr);
}
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
int xsk_umem__fd(const struct xsk_umem *umem);
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
int xsk_socket__fd(const struct xsk_socket *xsk);
#define XSK_RING_CONS__DEFAULT_NUM_DESCS 2048
@@ -271,9 +269,8 @@ struct xsk_umem_config {
__u32 flags;
};
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
+int xsk_setup_xdp_prog_xsk(struct xsk_socket *xsk, int *xsks_map_fd);
int xsk_setup_xdp_prog(int ifindex, int *xsks_map_fd);
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
int xsk_socket__update_xskmap(struct xsk_socket *xsk, int xsks_map_fd);
/* Flags for the libbpf_flags field. */
@@ -288,32 +285,17 @@ struct xsk_socket_config {
};
/* Set config to NULL to get the default configuration. */
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
int xsk_umem__create(struct xsk_umem **umem,
void *umem_area, __u64 size,
struct xsk_ring_prod *fill,
struct xsk_ring_cons *comp,
const struct xsk_umem_config *config);
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
-int xsk_umem__create_v0_0_2(struct xsk_umem **umem,
- void *umem_area, __u64 size,
- struct xsk_ring_prod *fill,
- struct xsk_ring_cons *comp,
- const struct xsk_umem_config *config);
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
-int xsk_umem__create_v0_0_4(struct xsk_umem **umem,
- void *umem_area, __u64 size,
- struct xsk_ring_prod *fill,
- struct xsk_ring_cons *comp,
- const struct xsk_umem_config *config);
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
int xsk_socket__create(struct xsk_socket **xsk,
const char *ifname, __u32 queue_id,
struct xsk_umem *umem,
struct xsk_ring_cons *rx,
struct xsk_ring_prod *tx,
const struct xsk_socket_config *config);
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
const char *ifname,
__u32 queue_id, struct xsk_umem *umem,
@@ -324,13 +306,11 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
const struct xsk_socket_config *config);
/* Returns 0 for success and -EBUSY if the umem is still in use. */
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
int xsk_umem__delete(struct xsk_umem *umem);
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
void xsk_socket__delete(struct xsk_socket *xsk);
#ifdef __cplusplus
} /* extern "C" */
#endif
-#endif /* __LIBBPF_XSK_H */
+#endif /* __XSK_H */
diff --git a/tools/testing/selftests/bpf/xsk_prereqs.sh b/tools/testing/selftests/bpf/xsk_prereqs.sh
index 684e813803ec..a0b71723a818 100755
--- a/tools/testing/selftests/bpf/xsk_prereqs.sh
+++ b/tools/testing/selftests/bpf/xsk_prereqs.sh
@@ -8,7 +8,7 @@ ksft_xfail=2
ksft_xpass=3
ksft_skip=4
-XSKOBJ=xdpxceiver
+XSKOBJ=xskxceiver
validate_root_exec()
{
@@ -77,7 +77,7 @@ validate_ip_utility()
[ ! $(type -P ip) ] && { echo "'ip' not found. Skipping tests."; test_exit $ksft_skip; }
}
-execxdpxceiver()
+exec_xskxceiver()
{
if [[ $busy_poll -eq 1 ]]; then
ARGS+="-b "
diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index e5992a6b5e09..74d56d971baf 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -97,12 +97,12 @@
#include <time.h>
#include <unistd.h>
#include <stdatomic.h>
-#include <bpf/xsk.h>
-#include "xdpxceiver.h"
+#include "xsk.h"
+#include "xskxceiver.h"
#include "../kselftest.h"
/* AF_XDP APIs were moved into libxdp and marked as deprecated in libbpf.
- * Until xdpxceiver is either moved or re-writed into libxdp, suppress
+ * Until xskxceiver is either moved or re-writed into libxdp, suppress
* deprecation warnings in this file
*/
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
@@ -1085,6 +1085,7 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
{
u64 umem_sz = ifobject->umem->num_frames * ifobject->umem->frame_size;
int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
+ LIBBPF_OPTS(bpf_xdp_query_opts, opts);
int ret, ifindex;
void *bufs;
u32 i;
@@ -1130,10 +1131,26 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
if (!ifindex)
exit_with_error(errno);
- ret = xsk_setup_xdp_prog(ifindex, &ifobject->xsk_map_fd);
+ ret = xsk_setup_xdp_prog_xsk(ifobject->xsk->xsk, &ifobject->xsk_map_fd);
if (ret)
exit_with_error(-ret);
+ ret = bpf_xdp_query(ifindex, ifobject->xdp_flags, &opts);
+ if (ret)
+ exit_with_error(-ret);
+
+ if (ifobject->xdp_flags & XDP_FLAGS_SKB_MODE) {
+ if (opts.attach_mode != XDP_ATTACHED_SKB) {
+ ksft_print_msg("ERROR: [%s] XDP prog not in SKB mode\n");
+ exit_with_error(-EINVAL);
+ }
+ } else if (ifobject->xdp_flags & XDP_FLAGS_DRV_MODE) {
+ if (opts.attach_mode != XDP_ATTACHED_DRV) {
+ ksft_print_msg("ERROR: [%s] XDP prog not in DRV mode\n");
+ exit_with_error(-EINVAL);
+ }
+ }
+
ret = xsk_socket__update_xskmap(ifobject->xsk->xsk, ifobject->xsk_map_fd);
if (ret)
exit_with_error(-ret);
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h
index 8f672b0fe0e1..3d17053f98e5 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xskxceiver.h
@@ -2,8 +2,8 @@
* Copyright(c) 2020 Intel Corporation.
*/
-#ifndef XDPXCEIVER_H_
-#define XDPXCEIVER_H_
+#ifndef XSKXCEIVER_H_
+#define XSKXCEIVER_H_
#ifndef SOL_XDP
#define SOL_XDP 283
@@ -169,4 +169,4 @@ pthread_cond_t pacing_cond = PTHREAD_COND_INITIALIZER;
int pkts_in_flight;
-#endif /* XDPXCEIVER_H */
+#endif /* XSKXCEIVER_H_ */
diff --git a/tools/testing/selftests/damon/_chk_dependency.sh b/tools/testing/selftests/damon/_chk_dependency.sh
index 0189db81550b..0328ac0b5a5e 100644
--- a/tools/testing/selftests/damon/_chk_dependency.sh
+++ b/tools/testing/selftests/damon/_chk_dependency.sh
@@ -26,3 +26,13 @@ do
exit 1
fi
done
+
+permission_error="Operation not permitted"
+for f in attrs target_ids monitor_on
+do
+ status=$( cat "$DBGFS/$f" 2>&1 )
+ if [ "${status#*$permission_error}" != "$status" ]; then
+ echo "Permission for reading $DBGFS/$f denied; maybe secureboot enabled?"
+ exit $ksft_skip
+ fi
+done
diff --git a/tools/testing/selftests/drivers/dma-buf/udmabuf.c b/tools/testing/selftests/drivers/dma-buf/udmabuf.c
index de1c4e6de0b2..c812080e304e 100644
--- a/tools/testing/selftests/drivers/dma-buf/udmabuf.c
+++ b/tools/testing/selftests/drivers/dma-buf/udmabuf.c
@@ -32,7 +32,8 @@ int main(int argc, char *argv[])
devfd = open("/dev/udmabuf", O_RDWR);
if (devfd < 0) {
- printf("%s: [skip,no-udmabuf]\n", TEST_PREFIX);
+ printf("%s: [skip,no-udmabuf: Unable to access DMA buffer device file]\n",
+ TEST_PREFIX);
exit(77);
}
diff --git a/tools/testing/selftests/drivers/gpu/drm_mm.sh b/tools/testing/selftests/drivers/gpu/drm_mm.sh
index b789dc8257e6..09c76cd7661d 100755
--- a/tools/testing/selftests/drivers/gpu/drm_mm.sh
+++ b/tools/testing/selftests/drivers/gpu/drm_mm.sh
@@ -3,7 +3,7 @@
# Runs API tests for struct drm_mm (DRM range manager)
if ! /sbin/modprobe -n -q test-drm_mm; then
- echo "drivers/gpu/drm_mm: [skip]"
+ echo "drivers/gpu/drm_mm: module test-drm_mm is not found in /lib/modules/`uname -r` [skip]"
exit 77
fi
@@ -11,6 +11,6 @@ if /sbin/modprobe -q test-drm_mm; then
/sbin/modprobe -q -r test-drm_mm
echo "drivers/gpu/drm_mm: ok"
else
- echo "drivers/gpu/drm_mm: [FAIL]"
+ echo "drivers/gpu/drm_mm: module test-drm_mm could not be removed [FAIL]"
exit 1
fi
diff --git a/tools/testing/selftests/drivers/net/dsa/Makefile b/tools/testing/selftests/drivers/net/dsa/Makefile
new file mode 100644
index 000000000000..2a731d5c6d85
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/Makefile
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0+ OR MIT
+
+TEST_PROGS = bridge_locked_port.sh \
+ bridge_mdb.sh \
+ bridge_mld.sh \
+ bridge_vlan_aware.sh \
+ bridge_vlan_mcast.sh \
+ bridge_vlan_unaware.sh \
+ local_termination.sh \
+ no_forwarding.sh \
+ test_bridge_fdb_stress.sh
+
+TEST_PROGS_EXTENDED := lib.sh
+
+TEST_FILES := forwarding.config
+
+include ../../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh
index 08a922d8b86a..224ca3695c89 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh
@@ -84,6 +84,13 @@ lc_wait_until_port_count_is()
busywait "$timeout" until_lc_port_count_is "$port_count" lc_port_count_get "$lc"
}
+lc_nested_devlink_dev_get()
+{
+ local lc=$1
+
+ devlink lc show $DEVLINK_DEV lc $lc -j | jq -e -r ".[][][].nested_devlink"
+}
+
PROV_UNPROV_TIMEOUT=8000 # ms
POST_PROV_ACT_TIMEOUT=2000 # ms
PROV_PORTS_INSTANTIATION_TIMEOUT=15000 # ms
@@ -191,12 +198,30 @@ ports_check()
check_err $? "Unexpected port count linecard $lc (got $port_count, expected $expected_port_count)"
}
+lc_dev_info_provisioned_check()
+{
+ local lc=$1
+ local nested_devlink_dev=$2
+ local fixed_hw_revision
+ local running_ini_version
+
+ fixed_hw_revision=$(devlink dev info $nested_devlink_dev -j | \
+ jq -e -r '.[][].versions.fixed."hw.revision"')
+ check_err $? "Failed to get linecard $lc fixed.hw.revision"
+ log_info "Linecard $lc fixed.hw.revision: \"$fixed_hw_revision\""
+ running_ini_version=$(devlink dev info $nested_devlink_dev -j | \
+ jq -e -r '.[][].versions.running."ini.version"')
+ check_err $? "Failed to get linecard $lc running.ini.version"
+ log_info "Linecard $lc running.ini.version: \"$running_ini_version\""
+}
+
provision_test()
{
RET=0
local lc
local type
local state
+ local nested_devlink_dev
lc=$LC_SLOT
supported_types_check $lc
@@ -207,6 +232,11 @@ provision_test()
fi
provision_one $lc $LC_16X100G_TYPE
ports_check $lc $LC_16X100G_PORT_COUNT
+
+ nested_devlink_dev=$(lc_nested_devlink_dev_get $lc)
+ check_err $? "Failed to get nested devlink handle of linecard $lc"
+ lc_dev_info_provisioned_check $lc $nested_devlink_dev
+
log_test "Provision"
}
@@ -220,12 +250,32 @@ interface_check()
setup_wait
}
+lc_dev_info_active_check()
+{
+ local lc=$1
+ local nested_devlink_dev=$2
+ local fixed_device_fw_psid
+ local running_device_fw
+
+ fixed_device_fw_psid=$(devlink dev info $nested_devlink_dev -j | \
+ jq -e -r ".[][].versions.fixed" | \
+ jq -e -r '."fw.psid"')
+ check_err $? "Failed to get linecard $lc fixed fw PSID"
+ log_info "Linecard $lc fixed.fw.psid: \"$fixed_device_fw_psid\""
+
+ running_device_fw=$(devlink dev info $nested_devlink_dev -j | \
+ jq -e -r ".[][].versions.running.fw")
+ check_err $? "Failed to get linecard $lc running.fw.version"
+ log_info "Linecard $lc running.fw: \"$running_device_fw\""
+}
+
activation_16x100G_test()
{
RET=0
local lc
local type
local state
+ local nested_devlink_dev
lc=$LC_SLOT
type=$LC_16X100G_TYPE
@@ -238,6 +288,10 @@ activation_16x100G_test()
interface_check
+ nested_devlink_dev=$(lc_nested_devlink_dev_get $lc)
+ check_err $? "Failed to get nested devlink handle of linecard $lc"
+ lc_dev_info_active_check $lc $nested_devlink_dev
+
log_test "Activation 16x100G"
}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_counter_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_counter_scale.sh
new file mode 100644
index 000000000000..a43a9926e690
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/rif_counter_scale.sh
@@ -0,0 +1,107 @@
+# SPDX-License-Identifier: GPL-2.0
+
+RIF_COUNTER_NUM_NETIFS=2
+
+rif_counter_addr4()
+{
+ local i=$1; shift
+ local p=$1; shift
+
+ printf 192.0.%d.%d $((i / 64)) $(((4 * i % 256) + p))
+}
+
+rif_counter_addr4pfx()
+{
+ rif_counter_addr4 $@
+ printf /30
+}
+
+rif_counter_h1_create()
+{
+ simple_if_init $h1
+}
+
+rif_counter_h1_destroy()
+{
+ simple_if_fini $h1
+}
+
+rif_counter_h2_create()
+{
+ simple_if_init $h2
+}
+
+rif_counter_h2_destroy()
+{
+ simple_if_fini $h2
+}
+
+rif_counter_setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ h2=${NETIFS[p2]}
+
+ vrf_prepare
+
+ rif_counter_h1_create
+ rif_counter_h2_create
+}
+
+rif_counter_cleanup()
+{
+ local count=$1; shift
+
+ pre_cleanup
+
+ for ((i = 1; i <= count; i++)); do
+ vlan_destroy $h2 $i
+ done
+
+ rif_counter_h2_destroy
+ rif_counter_h1_destroy
+
+ vrf_cleanup
+
+ if [[ -v RIF_COUNTER_BATCH_FILE ]]; then
+ rm -f $RIF_COUNTER_BATCH_FILE
+ fi
+}
+
+
+rif_counter_test()
+{
+ local count=$1; shift
+ local should_fail=$1; shift
+
+ RIF_COUNTER_BATCH_FILE="$(mktemp)"
+
+ for ((i = 1; i <= count; i++)); do
+ vlan_create $h2 $i v$h2 $(rif_counter_addr4pfx $i 2)
+ done
+ for ((i = 1; i <= count; i++)); do
+ cat >> $RIF_COUNTER_BATCH_FILE <<-EOF
+ stats set dev $h2.$i l3_stats on
+ EOF
+ done
+
+ ip -b $RIF_COUNTER_BATCH_FILE
+ check_err_fail $should_fail $? "RIF counter enablement"
+}
+
+rif_counter_traffic_test()
+{
+ local count=$1; shift
+ local i;
+
+ for ((i = count; i > 0; i /= 2)); do
+ $MZ $h1 -Q $i -c 1 -d 20msec -p 100 -a own -b $(mac_get $h2) \
+ -A $(rif_counter_addr4 $i 1) \
+ -B $(rif_counter_addr4 $i 2) \
+ -q -t udp sp=54321,dp=12345
+ done
+ for ((i = count; i > 0; i /= 2)); do
+ busywait "$TC_HIT_TIMEOUT" until_counter_is "== 1" \
+ hw_stats_get l3_stats $h2.$i rx packets > /dev/null
+ check_err $? "Traffic not seen at RIF $h2.$i"
+ done
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
index e9f65bd2e299..688338bbeb97 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
@@ -25,7 +25,16 @@ cleanup()
trap cleanup EXIT
-ALL_TESTS="router tc_flower mirror_gre tc_police port rif_mac_profile"
+ALL_TESTS="
+ router
+ tc_flower
+ mirror_gre
+ tc_police
+ port
+ rif_mac_profile
+ rif_counter
+"
+
for current_test in ${TESTS:-$ALL_TESTS}; do
RET_FIN=0
source ${current_test}_scale.sh
@@ -36,16 +45,32 @@ for current_test in ${TESTS:-$ALL_TESTS}; do
for should_fail in 0 1; do
RET=0
target=$(${current_test}_get_target "$should_fail")
+ if ((target == 0)); then
+ log_test_skip "'$current_test' should_fail=$should_fail test"
+ continue
+ fi
+
${current_test}_setup_prepare
setup_wait $num_netifs
+ # Update target in case occupancy of a certain resource changed
+ # following the test setup.
+ target=$(${current_test}_get_target "$should_fail")
${current_test}_test "$target" "$should_fail"
- ${current_test}_cleanup
- devlink_reload
if [[ "$should_fail" -eq 0 ]]; then
log_test "'$current_test' $target"
+
+ if ((!RET)); then
+ tt=${current_test}_traffic_test
+ if [[ $(type -t $tt) == "function" ]]; then
+ $tt "$target"
+ log_test "'$current_test' $target traffic test"
+ fi
+ fi
else
log_test "'$current_test' overflow $target"
fi
+ ${current_test}_cleanup $target
+ devlink_reload
RET_FIN=$(( RET_FIN || RET ))
done
done
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_counter_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_counter_scale.sh
new file mode 120000
index 000000000000..1f5752e8ffc0
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_counter_scale.sh
@@ -0,0 +1 @@
+../spectrum/rif_counter_scale.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh
index efd798a85931..4444bbace1a9 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh
@@ -4,17 +4,22 @@ source ../tc_flower_scale.sh
tc_flower_get_target()
{
local should_fail=$1; shift
+ local max_cnts
# The driver associates a counter with each tc filter, which means the
# number of supported filters is bounded by the number of available
# counters.
- # Currently, the driver supports 30K (30,720) flow counters and six of
- # these are used for multicast routing.
- local target=30714
+ max_cnts=$(devlink_resource_size_get counters flow)
+
+ # Remove already allocated counters.
+ ((max_cnts -= $(devlink_resource_occ_get counters flow)))
+
+ # Each rule uses two counters, for packets and bytes.
+ ((max_cnts /= 2))
if ((! should_fail)); then
- echo $target
+ echo $max_cnts
else
- echo $((target + 1))
+ echo $((max_cnts + 1))
fi
}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
index dea33dc93790..95d9f710a630 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
@@ -22,7 +22,16 @@ cleanup()
devlink_sp_read_kvd_defaults
trap cleanup EXIT
-ALL_TESTS="router tc_flower mirror_gre tc_police port rif_mac_profile"
+ALL_TESTS="
+ router
+ tc_flower
+ mirror_gre
+ tc_police
+ port
+ rif_mac_profile
+ rif_counter
+"
+
for current_test in ${TESTS:-$ALL_TESTS}; do
RET_FIN=0
source ${current_test}_scale.sh
@@ -41,15 +50,31 @@ for current_test in ${TESTS:-$ALL_TESTS}; do
for should_fail in 0 1; do
RET=0
target=$(${current_test}_get_target "$should_fail")
+ if ((target == 0)); then
+ log_test_skip "'$current_test' [$profile] should_fail=$should_fail test"
+ continue
+ fi
${current_test}_setup_prepare
setup_wait $num_netifs
+ # Update target in case occupancy of a certain resource
+ # changed following the test setup.
+ target=$(${current_test}_get_target "$should_fail")
${current_test}_test "$target" "$should_fail"
- ${current_test}_cleanup
if [[ "$should_fail" -eq 0 ]]; then
log_test "'$current_test' [$profile] $target"
+
+ if ((!RET)); then
+ tt=${current_test}_traffic_test
+ if [[ $(type -t $tt) == "function" ]]
+ then
+ $tt "$target"
+ log_test "'$current_test' [$profile] $target traffic test"
+ fi
+ fi
else
log_test "'$current_test' [$profile] overflow $target"
fi
+ ${current_test}_cleanup $target
RET_FIN=$(( RET_FIN || RET ))
done
done
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_counter_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_counter_scale.sh
new file mode 100644
index 000000000000..d44536276e8a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_counter_scale.sh
@@ -0,0 +1,34 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../rif_counter_scale.sh
+
+rif_counter_get_target()
+{
+ local should_fail=$1; shift
+ local max_cnts
+ local max_rifs
+ local target
+
+ max_rifs=$(devlink_resource_size_get rifs)
+ max_cnts=$(devlink_resource_size_get counters rif)
+
+ # Remove already allocated RIFs.
+ ((max_rifs -= $(devlink_resource_occ_get rifs)))
+
+ # 10 KVD slots per counter, ingress+egress counters per RIF
+ ((max_cnts /= 20))
+
+ # Pointless to run the overflow test if we don't have enough RIFs to
+ # host all the counters.
+ if ((max_cnts > max_rifs && should_fail)); then
+ echo 0
+ return
+ fi
+
+ target=$((max_rifs < max_cnts ? max_rifs : max_cnts))
+
+ if ((! should_fail)); then
+ echo $target
+ else
+ echo $((target + 1))
+ fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh
index aa74be9f47c8..d3d9e60d6ddf 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh
@@ -77,6 +77,7 @@ tc_flower_rules_create()
filter add dev $h2 ingress \
prot ipv6 \
pref 1000 \
+ handle 42$i \
flower $tcflags dst_ip $(tc_flower_addr $i) \
action drop
EOF
@@ -121,3 +122,19 @@ tc_flower_test()
tcflags="skip_sw"
__tc_flower_test $count $should_fail
}
+
+tc_flower_traffic_test()
+{
+ local count=$1; shift
+ local i;
+
+ for ((i = count - 1; i > 0; i /= 2)); do
+ $MZ -6 $h1 -c 1 -d 20msec -p 100 -a own -b $(mac_get $h2) \
+ -A $(tc_flower_addr 0) -B $(tc_flower_addr $i) \
+ -q -t udp sp=54321,dp=12345
+ done
+ for ((i = count - 1; i > 0; i /= 2)); do
+ tc_check_packets "dev $h2 ingress" 42$i 1
+ check_err $? "Traffic not seen at rule #$i"
+ done
+}
diff --git a/tools/testing/selftests/drivers/net/netdevsim/fib.sh b/tools/testing/selftests/drivers/net/netdevsim/fib.sh
index fc794cd30389..6800de816e8b 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/fib.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/fib.sh
@@ -16,6 +16,7 @@ ALL_TESTS="
ipv4_replay
ipv4_flush
ipv4_error_path
+ ipv4_delete_fail
ipv6_add
ipv6_metric
ipv6_append_single
@@ -29,11 +30,13 @@ ALL_TESTS="
ipv6_replay_single
ipv6_replay_multipath
ipv6_error_path
+ ipv6_delete_fail
"
NETDEVSIM_PATH=/sys/bus/netdevsim/
DEV_ADDR=1337
DEV=netdevsim${DEV_ADDR}
SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/
+DEBUGFS_DIR=/sys/kernel/debug/netdevsim/$DEV/
NUM_NETIFS=0
source $lib_dir/lib.sh
source $lib_dir/fib_offload_lib.sh
@@ -157,6 +160,27 @@ ipv4_error_path()
ipv4_error_path_replay
}
+ipv4_delete_fail()
+{
+ RET=0
+
+ echo "y" > $DEBUGFS_DIR/fib/fail_route_delete
+
+ ip -n testns1 link add name dummy1 type dummy
+ ip -n testns1 link set dev dummy1 up
+
+ ip -n testns1 route add 192.0.2.0/24 dev dummy1
+ ip -n testns1 route del 192.0.2.0/24 dev dummy1 &> /dev/null
+
+ # We should not be able to delete the netdev if we are leaking a
+ # reference.
+ ip -n testns1 link del dev dummy1
+
+ log_test "IPv4 route delete failure"
+
+ echo "n" > $DEBUGFS_DIR/fib/fail_route_delete
+}
+
ipv6_add()
{
fib_ipv6_add_test "testns1"
@@ -304,6 +328,27 @@ ipv6_error_path()
ipv6_error_path_replay
}
+ipv6_delete_fail()
+{
+ RET=0
+
+ echo "y" > $DEBUGFS_DIR/fib/fail_route_delete
+
+ ip -n testns1 link add name dummy1 type dummy
+ ip -n testns1 link set dev dummy1 up
+
+ ip -n testns1 route add 2001:db8:1::/64 dev dummy1
+ ip -n testns1 route del 2001:db8:1::/64 dev dummy1 &> /dev/null
+
+ # We should not be able to delete the netdev if we are leaking a
+ # reference.
+ ip -n testns1 link del dev dummy1
+
+ log_test "IPv6 route delete failure"
+
+ echo "n" > $DEBUGFS_DIR/fib/fail_route_delete
+}
+
fib_notify_on_flag_change_set()
{
local notify=$1; shift
diff --git a/tools/testing/selftests/drivers/s390x/uvdevice/Makefile b/tools/testing/selftests/drivers/s390x/uvdevice/Makefile
index 5e701d2708d4..891215a7dc8a 100644
--- a/tools/testing/selftests/drivers/s390x/uvdevice/Makefile
+++ b/tools/testing/selftests/drivers/s390x/uvdevice/Makefile
@@ -11,7 +11,6 @@ else
TEST_GEN_PROGS := test_uvdevice
top_srcdir ?= ../../../../../..
-KSFT_KHDR_INSTALL := 1
khdr_dir = $(top_srcdir)/usr/include
LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include
diff --git a/tools/testing/selftests/filesystems/binderfs/config b/tools/testing/selftests/filesystems/binderfs/config
index 02dd6cc9cf99..7b4fc6ee6205 100644
--- a/tools/testing/selftests/filesystems/binderfs/config
+++ b/tools/testing/selftests/filesystems/binderfs/config
@@ -1,3 +1,2 @@
-CONFIG_ANDROID=y
CONFIG_ANDROID_BINDERFS=y
CONFIG_ANDROID_BINDER_IPC=y
diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile
index b8152c573e8a..732149011692 100644
--- a/tools/testing/selftests/futex/functional/Makefile
+++ b/tools/testing/selftests/futex/functional/Makefile
@@ -22,7 +22,6 @@ TEST_GEN_FILES := \
TEST_PROGS := run.sh
top_srcdir = ../../../../..
-KSFT_KHDR_INSTALL := 1
DEFAULT_INSTALL_HDR_PATH := 1
include ../../lib.mk
diff --git a/tools/testing/selftests/gpio/Makefile b/tools/testing/selftests/gpio/Makefile
index 71b306602368..616ed4019655 100644
--- a/tools/testing/selftests/gpio/Makefile
+++ b/tools/testing/selftests/gpio/Makefile
@@ -3,6 +3,6 @@
TEST_PROGS := gpio-mockup.sh gpio-sim.sh
TEST_FILES := gpio-mockup-sysfs.sh
TEST_GEN_PROGS_EXTENDED := gpio-mockup-cdev gpio-chip-info gpio-line-name
-CFLAGS += -O2 -g -Wall -I../../../../usr/include/
+CFLAGS += -O2 -g -Wall -I../../../../usr/include/ $(KHDR_INCLUDES)
include ../lib.mk
diff --git a/tools/testing/selftests/kcmp/kcmp_test.c b/tools/testing/selftests/kcmp/kcmp_test.c
index 6ea7b9f37a41..25110c7c0b3e 100644
--- a/tools/testing/selftests/kcmp/kcmp_test.c
+++ b/tools/testing/selftests/kcmp/kcmp_test.c
@@ -88,6 +88,9 @@ int main(int argc, char **argv)
int pid2 = getpid();
int ret;
+ ksft_print_header();
+ ksft_set_plan(3);
+
fd2 = open(kpath, O_RDWR, 0644);
if (fd2 < 0) {
perror("Can't open file");
@@ -152,7 +155,6 @@ int main(int argc, char **argv)
ksft_inc_pass_cnt();
}
- ksft_print_cnts();
if (ret)
ksft_exit_fail();
@@ -162,5 +164,5 @@ int main(int argc, char **argv)
waitpid(pid2, &status, P_ALL);
- return ksft_exit_pass();
+ return 0;
}
diff --git a/tools/testing/selftests/kexec/kexec_common_lib.sh b/tools/testing/selftests/kexec/kexec_common_lib.sh
index 0e114b34d5d7..641ef05863b2 100755
--- a/tools/testing/selftests/kexec/kexec_common_lib.sh
+++ b/tools/testing/selftests/kexec/kexec_common_lib.sh
@@ -65,32 +65,6 @@ get_efivarfs_secureboot_mode()
return 0;
}
-get_efi_var_secureboot_mode()
-{
- local efi_vars
- local secure_boot_file
- local setup_mode_file
- local secureboot_mode
- local setup_mode
-
- if [ ! -d "$efi_vars" ]; then
- log_skip "efi_vars is not enabled\n"
- fi
- secure_boot_file=$(find "$efi_vars" -name SecureBoot-* 2>/dev/null)
- setup_mode_file=$(find "$efi_vars" -name SetupMode-* 2>/dev/null)
- if [ -f "$secure_boot_file/data" ] && \
- [ -f "$setup_mode_file/data" ]; then
- secureboot_mode=`od -An -t u1 "$secure_boot_file/data"`
- setup_mode=`od -An -t u1 "$setup_mode_file/data"`
-
- if [ $secureboot_mode -eq 1 ] && [ $setup_mode -eq 0 ]; then
- log_info "secure boot mode enabled (CONFIG_EFI_VARS)"
- return 1;
- fi
- fi
- return 0;
-}
-
# On powerpc platform, check device-tree property
# /proc/device-tree/ibm,secureboot/os-secureboot-enforcing
# to detect secureboot state.
@@ -113,9 +87,8 @@ get_arch()
}
# Check efivar SecureBoot-$(the UUID) and SetupMode-$(the UUID).
-# The secure boot mode can be accessed either as the last integer
-# of "od -An -t u1 /sys/firmware/efi/efivars/SecureBoot-*" or from
-# "od -An -t u1 /sys/firmware/efi/vars/SecureBoot-*/data". The efi
+# The secure boot mode can be accessed as the last integer of
+# "od -An -t u1 /sys/firmware/efi/efivars/SecureBoot-*". The efi
# SetupMode can be similarly accessed.
# Return 1 for SecureBoot mode enabled and SetupMode mode disabled.
get_secureboot_mode()
@@ -129,11 +102,6 @@ get_secureboot_mode()
else
get_efivarfs_secureboot_mode
secureboot_mode=$?
- # fallback to using the efi_var files
- if [ $secureboot_mode -eq 0 ]; then
- get_efi_var_secureboot_mode
- secureboot_mode=$?
- fi
fi
if [ $secureboot_mode -eq 0 ]; then
diff --git a/tools/testing/selftests/kselftest_deps.sh b/tools/testing/selftests/kselftest_deps.sh
index 00e60d6eb16b..708cb5429633 100755
--- a/tools/testing/selftests/kselftest_deps.sh
+++ b/tools/testing/selftests/kselftest_deps.sh
@@ -26,7 +26,7 @@ echo " main Makefile when optional -p is specified."
echo "- Prints pass/fail dependency check for each tests/sub-test."
echo "- Prints pass/fail targets and libraries."
echo "- Default: runs dependency checks on all tests."
-echo "- Optional test name can be specified to check dependencies for it."
+echo "- Optional: test name can be specified to check dependencies for it."
exit 1
}
diff --git a/tools/testing/selftests/kselftest_module.h b/tools/testing/selftests/kselftest_module.h
index e2ea41de3f35..63cd7487373f 100644
--- a/tools/testing/selftests/kselftest_module.h
+++ b/tools/testing/selftests/kselftest_module.h
@@ -3,6 +3,7 @@
#define __KSELFTEST_MODULE_H
#include <linux/module.h>
+#include <linux/panic.h>
/*
* Test framework for writing test modules to be loaded by kselftest.
@@ -41,6 +42,7 @@ static inline int kstm_report(unsigned int total_tests, unsigned int failed_test
static int __init __module##_init(void) \
{ \
pr_info("loaded.\n"); \
+ add_taint(TAINT_TEST, LOCKDEP_STILL_OK); \
selftest(); \
return kstm_report(total_tests, failed_tests, skipped_tests); \
} \
@@ -51,4 +53,6 @@ static void __exit __module##_exit(void) \
module_init(__module##_init); \
module_exit(__module##_exit)
+MODULE_INFO(test, "Y");
+
#endif /* __KSELFTEST_MODULE_H */
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 4509a3a7eeae..d625a3f83780 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -25,8 +25,10 @@
/x86_64/hyperv_cpuid
/x86_64/hyperv_features
/x86_64/hyperv_svm_test
+/x86_64/max_vcpuid_cap_test
/x86_64/mmio_warning_test
-/x86_64/mmu_role_test
+/x86_64/monitor_mwait_test
+/x86_64/nx_huge_pages_test
/x86_64/platform_info_test
/x86_64/pmu_event_filter_test
/x86_64/set_boot_cpu_id
@@ -36,9 +38,11 @@
/x86_64/state_test
/x86_64/svm_vmcall_test
/x86_64/svm_int_ctl_test
-/x86_64/tsc_scaling_sync
+/x86_64/svm_nested_soft_inject_test
/x86_64/sync_regs_test
/x86_64/tsc_msrs_test
+/x86_64/tsc_scaling_sync
+/x86_64/ucna_injection_test
/x86_64/userspace_io_test
/x86_64/userspace_msr_exit_test
/x86_64/vmx_apic_access_test
@@ -46,6 +50,7 @@
/x86_64/vmx_dirty_log_test
/x86_64/vmx_exception_with_invalid_guest_state
/x86_64/vmx_invalid_nested_guest_state
+/x86_64/vmx_msrs_test
/x86_64/vmx_preemption_timer_test
/x86_64/vmx_set_nested_state_test
/x86_64/vmx_tsc_adjust_test
@@ -56,6 +61,7 @@
/x86_64/xen_vmcall_test
/x86_64/xss_msr_test
/x86_64/vmx_pmu_caps_test
+/x86_64/triple_fault_event_test
/access_tracking_perf_test
/demand_paging_test
/dirty_log_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 22423c871ed6..c7f47429d6cd 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -4,7 +4,6 @@ include ../../../build/Build.include
all:
top_srcdir = ../../../..
-KSFT_KHDR_INSTALL := 1
# For cross-builds to work, UNAME_M has to map to ARCH and arch specific
# directories and targets in this Makefile. "uname -m" doesn't map to
@@ -70,6 +69,10 @@ LIBKVM_s390x += lib/s390x/ucall.c
LIBKVM_riscv += lib/riscv/processor.c
LIBKVM_riscv += lib/riscv/ucall.c
+# Non-compiled test targets
+TEST_PROGS_x86_64 += x86_64/nx_huge_pages_test.sh
+
+# Compiled test targets
TEST_GEN_PROGS_x86_64 = x86_64/cpuid_test
TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test
TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features
@@ -83,7 +86,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/hyperv_svm_test
TEST_GEN_PROGS_x86_64 += x86_64/kvm_clock_test
TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test
TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test
-TEST_GEN_PROGS_x86_64 += x86_64/mmu_role_test
+TEST_GEN_PROGS_x86_64 += x86_64/monitor_mwait_test
TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test
TEST_GEN_PROGS_x86_64 += x86_64/pmu_event_filter_test
TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id
@@ -93,14 +96,17 @@ TEST_GEN_PROGS_x86_64 += x86_64/state_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test
TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test
TEST_GEN_PROGS_x86_64 += x86_64/svm_int_ctl_test
+TEST_GEN_PROGS_x86_64 += x86_64/svm_nested_soft_inject_test
TEST_GEN_PROGS_x86_64 += x86_64/tsc_scaling_sync
TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test
+TEST_GEN_PROGS_x86_64 += x86_64/ucna_injection_test
TEST_GEN_PROGS_x86_64 += x86_64/userspace_io_test
TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_exception_with_invalid_guest_state
+TEST_GEN_PROGS_x86_64 += x86_64/vmx_msrs_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_invalid_nested_guest_state
TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
@@ -115,6 +121,8 @@ TEST_GEN_PROGS_x86_64 += x86_64/xen_shinfo_test
TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test
TEST_GEN_PROGS_x86_64 += x86_64/sev_migrate_tests
TEST_GEN_PROGS_x86_64 += x86_64/amx_test
+TEST_GEN_PROGS_x86_64 += x86_64/max_vcpuid_cap_test
+TEST_GEN_PROGS_x86_64 += x86_64/triple_fault_event_test
TEST_GEN_PROGS_x86_64 += access_tracking_perf_test
TEST_GEN_PROGS_x86_64 += demand_paging_test
TEST_GEN_PROGS_x86_64 += dirty_log_test
@@ -131,6 +139,9 @@ TEST_GEN_PROGS_x86_64 += steal_time
TEST_GEN_PROGS_x86_64 += kvm_binary_stats_test
TEST_GEN_PROGS_x86_64 += system_counter_offset_test
+# Compiled outputs used by test targets
+TEST_GEN_PROGS_EXTENDED_x86_64 += x86_64/nx_huge_pages_test
+
TEST_GEN_PROGS_aarch64 += aarch64/arch_timer
TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions
TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list
@@ -170,7 +181,9 @@ TEST_GEN_PROGS_riscv += kvm_page_table_test
TEST_GEN_PROGS_riscv += set_memory_region_test
TEST_GEN_PROGS_riscv += kvm_binary_stats_test
+TEST_PROGS += $(TEST_PROGS_$(UNAME_M))
TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M))
+TEST_GEN_PROGS_EXTENDED += $(TEST_GEN_PROGS_EXTENDED_$(UNAME_M))
LIBKVM += $(LIBKVM_$(UNAME_M))
INSTALL_HDR_PATH = $(top_srcdir)/usr
@@ -217,6 +230,7 @@ $(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S
x := $(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS))))
$(TEST_GEN_PROGS): $(LIBKVM_OBJS)
+$(TEST_GEN_PROGS_EXTENDED): $(LIBKVM_OBJS)
cscope: include_paths = $(LINUX_TOOL_INCLUDE) $(LINUX_HDR_PATH) include lib ..
cscope:
diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c
index 3b940a101bc0..574eb73f0e90 100644
--- a/tools/testing/selftests/kvm/aarch64/arch_timer.c
+++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c
@@ -76,13 +76,8 @@ struct test_vcpu_shared_data {
uint64_t xcnt;
};
-struct test_vcpu {
- uint32_t vcpuid;
- pthread_t pt_vcpu_run;
- struct kvm_vm *vm;
-};
-
-static struct test_vcpu test_vcpu[KVM_MAX_VCPUS];
+static struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
+static pthread_t pt_vcpu_run[KVM_MAX_VCPUS];
static struct test_vcpu_shared_data vcpu_shared_data[KVM_MAX_VCPUS];
static int vtimer_irq, ptimer_irq;
@@ -217,29 +212,32 @@ static void guest_code(void)
static void *test_vcpu_run(void *arg)
{
+ unsigned int vcpu_idx = (unsigned long)arg;
struct ucall uc;
- struct test_vcpu *vcpu = arg;
+ struct kvm_vcpu *vcpu = vcpus[vcpu_idx];
struct kvm_vm *vm = vcpu->vm;
- uint32_t vcpuid = vcpu->vcpuid;
- struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[vcpuid];
+ struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[vcpu_idx];
- vcpu_run(vm, vcpuid);
+ vcpu_run(vcpu);
/* Currently, any exit from guest is an indication of completion */
pthread_mutex_lock(&vcpu_done_map_lock);
- set_bit(vcpuid, vcpu_done_map);
+ set_bit(vcpu_idx, vcpu_done_map);
pthread_mutex_unlock(&vcpu_done_map_lock);
- switch (get_ucall(vm, vcpuid, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_SYNC:
case UCALL_DONE:
break;
case UCALL_ABORT:
sync_global_from_guest(vm, *shared_data);
- TEST_FAIL("%s at %s:%ld\n\tvalues: %lu, %lu; %lu, vcpu: %u; stage: %u; iter: %u",
- (const char *)uc.args[0], __FILE__, uc.args[1],
- uc.args[2], uc.args[3], uc.args[4], vcpuid,
- shared_data->guest_stage, shared_data->nr_iter);
+ REPORT_GUEST_ASSERT_N(uc, "values: %lu, %lu; %lu, vcpu %u; stage; %u; iter: %u",
+ GUEST_ASSERT_ARG(uc, 0),
+ GUEST_ASSERT_ARG(uc, 1),
+ GUEST_ASSERT_ARG(uc, 2),
+ vcpu_idx,
+ shared_data->guest_stage,
+ shared_data->nr_iter);
break;
default:
TEST_FAIL("Unexpected guest exit\n");
@@ -265,7 +263,7 @@ static uint32_t test_get_pcpu(void)
return pcpu;
}
-static int test_migrate_vcpu(struct test_vcpu *vcpu)
+static int test_migrate_vcpu(unsigned int vcpu_idx)
{
int ret;
cpu_set_t cpuset;
@@ -274,15 +272,15 @@ static int test_migrate_vcpu(struct test_vcpu *vcpu)
CPU_ZERO(&cpuset);
CPU_SET(new_pcpu, &cpuset);
- pr_debug("Migrating vCPU: %u to pCPU: %u\n", vcpu->vcpuid, new_pcpu);
+ pr_debug("Migrating vCPU: %u to pCPU: %u\n", vcpu_idx, new_pcpu);
- ret = pthread_setaffinity_np(vcpu->pt_vcpu_run,
- sizeof(cpuset), &cpuset);
+ ret = pthread_setaffinity_np(pt_vcpu_run[vcpu_idx],
+ sizeof(cpuset), &cpuset);
/* Allow the error where the vCPU thread is already finished */
TEST_ASSERT(ret == 0 || ret == ESRCH,
- "Failed to migrate the vCPU:%u to pCPU: %u; ret: %d\n",
- vcpu->vcpuid, new_pcpu, ret);
+ "Failed to migrate the vCPU:%u to pCPU: %u; ret: %d\n",
+ vcpu_idx, new_pcpu, ret);
return ret;
}
@@ -305,7 +303,7 @@ static void *test_vcpu_migration(void *arg)
continue;
}
- test_migrate_vcpu(&test_vcpu[i]);
+ test_migrate_vcpu(i);
}
} while (test_args.nr_vcpus != n_done);
@@ -314,16 +312,17 @@ static void *test_vcpu_migration(void *arg)
static void test_run(struct kvm_vm *vm)
{
- int i, ret;
pthread_t pt_vcpu_migration;
+ unsigned int i;
+ int ret;
pthread_mutex_init(&vcpu_done_map_lock, NULL);
vcpu_done_map = bitmap_zalloc(test_args.nr_vcpus);
TEST_ASSERT(vcpu_done_map, "Failed to allocate vcpu done bitmap\n");
- for (i = 0; i < test_args.nr_vcpus; i++) {
- ret = pthread_create(&test_vcpu[i].pt_vcpu_run, NULL,
- test_vcpu_run, &test_vcpu[i]);
+ for (i = 0; i < (unsigned long)test_args.nr_vcpus; i++) {
+ ret = pthread_create(&pt_vcpu_run[i], NULL, test_vcpu_run,
+ (void *)(unsigned long)i);
TEST_ASSERT(!ret, "Failed to create vCPU-%d pthread\n", i);
}
@@ -338,7 +337,7 @@ static void test_run(struct kvm_vm *vm)
for (i = 0; i < test_args.nr_vcpus; i++)
- pthread_join(test_vcpu[i].pt_vcpu_run, NULL);
+ pthread_join(pt_vcpu_run[i], NULL);
if (test_args.migration_freq_ms)
pthread_join(pt_vcpu_migration, NULL);
@@ -349,12 +348,10 @@ static void test_run(struct kvm_vm *vm)
static void test_init_timer_irq(struct kvm_vm *vm)
{
/* Timer initid should be same for all the vCPUs, so query only vCPU-0 */
- int vcpu0_fd = vcpu_get_fd(vm, 0);
-
- kvm_device_access(vcpu0_fd, KVM_ARM_VCPU_TIMER_CTRL,
- KVM_ARM_VCPU_TIMER_IRQ_PTIMER, &ptimer_irq, false);
- kvm_device_access(vcpu0_fd, KVM_ARM_VCPU_TIMER_CTRL,
- KVM_ARM_VCPU_TIMER_IRQ_VTIMER, &vtimer_irq, false);
+ vcpu_device_attr_get(vcpus[0], KVM_ARM_VCPU_TIMER_CTRL,
+ KVM_ARM_VCPU_TIMER_IRQ_PTIMER, &ptimer_irq);
+ vcpu_device_attr_get(vcpus[0], KVM_ARM_VCPU_TIMER_CTRL,
+ KVM_ARM_VCPU_TIMER_IRQ_VTIMER, &vtimer_irq);
sync_global_to_guest(vm, ptimer_irq);
sync_global_to_guest(vm, vtimer_irq);
@@ -370,25 +367,18 @@ static struct kvm_vm *test_vm_create(void)
unsigned int i;
int nr_vcpus = test_args.nr_vcpus;
- vm = vm_create_default_with_vcpus(nr_vcpus, 0, 0, guest_code, NULL);
+ vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus);
vm_init_descriptor_tables(vm);
vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handler);
- for (i = 0; i < nr_vcpus; i++) {
- vcpu_init_descriptor_tables(vm, i);
-
- test_vcpu[i].vcpuid = i;
- test_vcpu[i].vm = vm;
- }
+ for (i = 0; i < nr_vcpus; i++)
+ vcpu_init_descriptor_tables(vcpus[i]);
ucall_init(vm, NULL);
test_init_timer_irq(vm);
gic_fd = vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA);
- if (gic_fd < 0) {
- print_skip("Failed to create vgic-v3");
- exit(KSFT_SKIP);
- }
+ __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3");
/* Make all the test's cmdline args visible to the guest */
sync_global_to_guest(vm, test_args);
@@ -478,10 +468,8 @@ int main(int argc, char *argv[])
if (!parse_args(argc, argv))
exit(KSFT_SKIP);
- if (test_args.migration_freq_ms && get_nprocs() < 2) {
- print_skip("At least two physical CPUs needed for vCPU migration");
- exit(KSFT_SKIP);
- }
+ __TEST_REQUIRE(!test_args.migration_freq_ms || get_nprocs() >= 2,
+ "At least two physical CPUs needed for vCPU migration");
vm = test_vm_create();
test_run(vm);
diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
index 63b2178210c4..2ee35cf9801e 100644
--- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
+++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
@@ -3,8 +3,6 @@
#include <kvm_util.h>
#include <processor.h>
-#define VCPU_ID 0
-
#define MDSCR_KDE (1 << 13)
#define MDSCR_MDE (1 << 15)
#define MDSCR_SS (1 << 0)
@@ -240,31 +238,29 @@ static void guest_svc_handler(struct ex_regs *regs)
svc_addr = regs->pc;
}
-static int debug_version(struct kvm_vm *vm)
+static int debug_version(struct kvm_vcpu *vcpu)
{
uint64_t id_aa64dfr0;
- get_reg(vm, VCPU_ID, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1), &id_aa64dfr0);
+ vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1), &id_aa64dfr0);
return id_aa64dfr0 & 0xf;
}
int main(int argc, char *argv[])
{
+ struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
struct ucall uc;
int stage;
- vm = vm_create_default(VCPU_ID, 0, guest_code);
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
ucall_init(vm, NULL);
vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vm, VCPU_ID);
+ vcpu_init_descriptor_tables(vcpu);
- if (debug_version(vm) < 6) {
- print_skip("Armv8 debug architecture not supported.");
- kvm_vm_free(vm);
- exit(KSFT_SKIP);
- }
+ __TEST_REQUIRE(debug_version(vcpu) >= 6,
+ "Armv8 debug architecture not supported.");
vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
ESR_EC_BRK_INS, guest_sw_bp_handler);
@@ -278,18 +274,16 @@ int main(int argc, char *argv[])
ESR_EC_SVC64, guest_svc_handler);
for (stage = 0; stage < 11; stage++) {
- vcpu_run(vm, VCPU_ID);
+ vcpu_run(vcpu);
- switch (get_ucall(vm, VCPU_ID, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_SYNC:
TEST_ASSERT(uc.args[1] == stage,
"Stage %d: Unexpected sync ucall, got %lx",
stage, (ulong)uc.args[1]);
break;
case UCALL_ABORT:
- TEST_FAIL("%s at %s:%ld\n\tvalues: %#lx, %#lx",
- (const char *)uc.args[0],
- __FILE__, uc.args[1], uc.args[2], uc.args[3]);
+ REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx");
break;
case UCALL_DONE:
goto done;
diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
index d3a7dbfcbb3d..d287dd2cac0a 100644
--- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c
+++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
@@ -377,7 +377,7 @@ static void prepare_vcpu_init(struct vcpu_config *c, struct kvm_vcpu_init *init)
init->features[s->feature / 32] |= 1 << (s->feature % 32);
}
-static void finalize_vcpu(struct kvm_vm *vm, uint32_t vcpuid, struct vcpu_config *c)
+static void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_config *c)
{
struct reg_sublist *s;
int feature;
@@ -385,7 +385,7 @@ static void finalize_vcpu(struct kvm_vm *vm, uint32_t vcpuid, struct vcpu_config
for_each_sublist(c, s) {
if (s->finalize) {
feature = s->feature;
- vcpu_ioctl(vm, vcpuid, KVM_ARM_VCPU_FINALIZE, &feature);
+ vcpu_ioctl(vcpu, KVM_ARM_VCPU_FINALIZE, &feature);
}
}
}
@@ -395,10 +395,12 @@ static void check_supported(struct vcpu_config *c)
struct reg_sublist *s;
for_each_sublist(c, s) {
- if (s->capability && !kvm_check_cap(s->capability)) {
- fprintf(stderr, "%s: %s not available, skipping tests\n", config_name(c), s->name);
- exit(KSFT_SKIP);
- }
+ if (!s->capability)
+ continue;
+
+ __TEST_REQUIRE(kvm_has_cap(s->capability),
+ "%s: %s not available, skipping tests\n",
+ config_name(c), s->name);
}
}
@@ -411,17 +413,19 @@ static void run_test(struct vcpu_config *c)
struct kvm_vcpu_init init = { .target = -1, };
int new_regs = 0, missing_regs = 0, i, n;
int failed_get = 0, failed_set = 0, failed_reject = 0;
+ struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
struct reg_sublist *s;
check_supported(c);
- vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
+ vm = vm_create_barebones();
prepare_vcpu_init(c, &init);
- aarch64_vcpu_add_default(vm, 0, &init, NULL);
- finalize_vcpu(vm, 0, c);
+ vcpu = __vm_vcpu_add(vm, 0);
+ aarch64_vcpu_setup(vcpu, &init);
+ finalize_vcpu(vcpu, c);
- reg_list = vcpu_get_reg_list(vm, 0);
+ reg_list = vcpu_get_reg_list(vcpu);
if (fixup_core_regs)
core_reg_fixup();
@@ -457,7 +461,7 @@ static void run_test(struct vcpu_config *c)
bool reject_reg = false;
int ret;
- ret = _vcpu_ioctl(vm, 0, KVM_GET_ONE_REG, &reg);
+ ret = __vcpu_get_reg(vcpu, reg_list->reg[i], &addr);
if (ret) {
printf("%s: Failed to get ", config_name(c));
print_reg(c, reg.id);
@@ -469,7 +473,7 @@ static void run_test(struct vcpu_config *c)
for_each_sublist(c, s) {
if (s->rejects_set && find_reg(s->rejects_set, s->rejects_set_n, reg.id)) {
reject_reg = true;
- ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, &reg);
+ ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, &reg);
if (ret != -1 || errno != EPERM) {
printf("%s: Failed to reject (ret=%d, errno=%d) ", config_name(c), ret, errno);
print_reg(c, reg.id);
@@ -481,7 +485,7 @@ static void run_test(struct vcpu_config *c)
}
if (!reject_reg) {
- ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, &reg);
+ ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, &reg);
if (ret) {
printf("%s: Failed to set ", config_name(c));
print_reg(c, reg.id);
diff --git a/tools/testing/selftests/kvm/aarch64/hypercalls.c b/tools/testing/selftests/kvm/aarch64/hypercalls.c
index 41e0210b7a5e..a39da3fe4952 100644
--- a/tools/testing/selftests/kvm/aarch64/hypercalls.c
+++ b/tools/testing/selftests/kvm/aarch64/hypercalls.c
@@ -141,26 +141,6 @@ static void guest_code(void)
GUEST_DONE();
}
-static int set_fw_reg(struct kvm_vm *vm, uint64_t id, uint64_t val)
-{
- struct kvm_one_reg reg = {
- .id = id,
- .addr = (uint64_t)&val,
- };
-
- return _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, &reg);
-}
-
-static void get_fw_reg(struct kvm_vm *vm, uint64_t id, uint64_t *addr)
-{
- struct kvm_one_reg reg = {
- .id = id,
- .addr = (uint64_t)addr,
- };
-
- vcpu_ioctl(vm, 0, KVM_GET_ONE_REG, &reg);
-}
-
struct st_time {
uint32_t rev;
uint32_t attr;
@@ -170,23 +150,19 @@ struct st_time {
#define STEAL_TIME_SIZE ((sizeof(struct st_time) + 63) & ~63)
#define ST_GPA_BASE (1 << 30)
-static void steal_time_init(struct kvm_vm *vm)
+static void steal_time_init(struct kvm_vcpu *vcpu)
{
uint64_t st_ipa = (ulong)ST_GPA_BASE;
unsigned int gpages;
- struct kvm_device_attr dev = {
- .group = KVM_ARM_VCPU_PVTIME_CTRL,
- .attr = KVM_ARM_VCPU_PVTIME_IPA,
- .addr = (uint64_t)&st_ipa,
- };
gpages = vm_calc_num_guest_pages(VM_MODE_DEFAULT, STEAL_TIME_SIZE);
- vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, ST_GPA_BASE, 1, gpages, 0);
+ vm_userspace_mem_region_add(vcpu->vm, VM_MEM_SRC_ANONYMOUS, ST_GPA_BASE, 1, gpages, 0);
- vcpu_ioctl(vm, 0, KVM_SET_DEVICE_ATTR, &dev);
+ vcpu_device_attr_set(vcpu, KVM_ARM_VCPU_PVTIME_CTRL,
+ KVM_ARM_VCPU_PVTIME_IPA, &st_ipa);
}
-static void test_fw_regs_before_vm_start(struct kvm_vm *vm)
+static void test_fw_regs_before_vm_start(struct kvm_vcpu *vcpu)
{
uint64_t val;
unsigned int i;
@@ -196,18 +172,18 @@ static void test_fw_regs_before_vm_start(struct kvm_vm *vm)
const struct kvm_fw_reg_info *reg_info = &fw_reg_info[i];
/* First 'read' should be an upper limit of the features supported */
- get_fw_reg(vm, reg_info->reg, &val);
+ vcpu_get_reg(vcpu, reg_info->reg, &val);
TEST_ASSERT(val == FW_REG_ULIMIT_VAL(reg_info->max_feat_bit),
"Expected all the features to be set for reg: 0x%lx; expected: 0x%lx; read: 0x%lx\n",
reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit), val);
/* Test a 'write' by disabling all the features of the register map */
- ret = set_fw_reg(vm, reg_info->reg, 0);
+ ret = __vcpu_set_reg(vcpu, reg_info->reg, 0);
TEST_ASSERT(ret == 0,
"Failed to clear all the features of reg: 0x%lx; ret: %d\n",
reg_info->reg, errno);
- get_fw_reg(vm, reg_info->reg, &val);
+ vcpu_get_reg(vcpu, reg_info->reg, &val);
TEST_ASSERT(val == 0,
"Expected all the features to be cleared for reg: 0x%lx\n", reg_info->reg);
@@ -216,7 +192,7 @@ static void test_fw_regs_before_vm_start(struct kvm_vm *vm)
* Avoid this check if all the bits are occupied.
*/
if (reg_info->max_feat_bit < 63) {
- ret = set_fw_reg(vm, reg_info->reg, BIT(reg_info->max_feat_bit + 1));
+ ret = __vcpu_set_reg(vcpu, reg_info->reg, BIT(reg_info->max_feat_bit + 1));
TEST_ASSERT(ret != 0 && errno == EINVAL,
"Unexpected behavior or return value (%d) while setting an unsupported feature for reg: 0x%lx\n",
errno, reg_info->reg);
@@ -224,7 +200,7 @@ static void test_fw_regs_before_vm_start(struct kvm_vm *vm)
}
}
-static void test_fw_regs_after_vm_start(struct kvm_vm *vm)
+static void test_fw_regs_after_vm_start(struct kvm_vcpu *vcpu)
{
uint64_t val;
unsigned int i;
@@ -237,7 +213,7 @@ static void test_fw_regs_after_vm_start(struct kvm_vm *vm)
* Before starting the VM, the test clears all the bits.
* Check if that's still the case.
*/
- get_fw_reg(vm, reg_info->reg, &val);
+ vcpu_get_reg(vcpu, reg_info->reg, &val);
TEST_ASSERT(val == 0,
"Expected all the features to be cleared for reg: 0x%lx\n",
reg_info->reg);
@@ -247,77 +223,78 @@ static void test_fw_regs_after_vm_start(struct kvm_vm *vm)
* the registers and should return EBUSY. Set the registers and check for
* the expected errno.
*/
- ret = set_fw_reg(vm, reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit));
+ ret = __vcpu_set_reg(vcpu, reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit));
TEST_ASSERT(ret != 0 && errno == EBUSY,
"Unexpected behavior or return value (%d) while setting a feature while VM is running for reg: 0x%lx\n",
errno, reg_info->reg);
}
}
-static struct kvm_vm *test_vm_create(void)
+static struct kvm_vm *test_vm_create(struct kvm_vcpu **vcpu)
{
struct kvm_vm *vm;
- vm = vm_create_default(0, 0, guest_code);
+ vm = vm_create_with_one_vcpu(vcpu, guest_code);
ucall_init(vm, NULL);
- steal_time_init(vm);
+ steal_time_init(*vcpu);
return vm;
}
-static struct kvm_vm *test_guest_stage(struct kvm_vm *vm)
+static void test_guest_stage(struct kvm_vm **vm, struct kvm_vcpu **vcpu)
{
- struct kvm_vm *ret_vm = vm;
+ int prev_stage = stage;
+
+ pr_debug("Stage: %d\n", prev_stage);
- pr_debug("Stage: %d\n", stage);
+ /* Sync the stage early, the VM might be freed below. */
+ stage++;
+ sync_global_to_guest(*vm, stage);
- switch (stage) {
+ switch (prev_stage) {
case TEST_STAGE_REG_IFACE:
- test_fw_regs_after_vm_start(vm);
+ test_fw_regs_after_vm_start(*vcpu);
break;
case TEST_STAGE_HVC_IFACE_FEAT_DISABLED:
/* Start a new VM so that all the features are now enabled by default */
- kvm_vm_free(vm);
- ret_vm = test_vm_create();
+ kvm_vm_free(*vm);
+ *vm = test_vm_create(vcpu);
break;
case TEST_STAGE_HVC_IFACE_FEAT_ENABLED:
case TEST_STAGE_HVC_IFACE_FALSE_INFO:
break;
default:
- TEST_FAIL("Unknown test stage: %d\n", stage);
+ TEST_FAIL("Unknown test stage: %d\n", prev_stage);
}
-
- stage++;
- sync_global_to_guest(vm, stage);
-
- return ret_vm;
}
static void test_run(void)
{
+ struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
struct ucall uc;
bool guest_done = false;
- vm = test_vm_create();
+ vm = test_vm_create(&vcpu);
- test_fw_regs_before_vm_start(vm);
+ test_fw_regs_before_vm_start(vcpu);
while (!guest_done) {
- vcpu_run(vm, 0);
+ vcpu_run(vcpu);
- switch (get_ucall(vm, 0, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_SYNC:
- vm = test_guest_stage(vm);
+ test_guest_stage(&vm, &vcpu);
break;
case UCALL_DONE:
guest_done = true;
break;
case UCALL_ABORT:
- TEST_FAIL("%s at %s:%ld\n\tvalues: 0x%lx, 0x%lx; 0x%lx, stage: %u",
- (const char *)uc.args[0], __FILE__, uc.args[1],
- uc.args[2], uc.args[3], uc.args[4], stage);
+ REPORT_GUEST_ASSERT_N(uc, "values: 0x%lx, 0x%lx; 0x%lx, stage: %u",
+ GUEST_ASSERT_ARG(uc, 0),
+ GUEST_ASSERT_ARG(uc, 1),
+ GUEST_ASSERT_ARG(uc, 2), stage);
break;
default:
TEST_FAIL("Unexpected guest exit\n");
diff --git a/tools/testing/selftests/kvm/aarch64/psci_test.c b/tools/testing/selftests/kvm/aarch64/psci_test.c
index 88541de21c41..f7621f6e938e 100644
--- a/tools/testing/selftests/kvm/aarch64/psci_test.c
+++ b/tools/testing/selftests/kvm/aarch64/psci_test.c
@@ -17,9 +17,6 @@
#include "processor.h"
#include "test_util.h"
-#define VCPU_ID_SOURCE 0
-#define VCPU_ID_TARGET 1
-
#define CPU_ON_ENTRY_ADDR 0xfeedf00dul
#define CPU_ON_CONTEXT_ID 0xdeadc0deul
@@ -64,49 +61,48 @@ static uint64_t psci_features(uint32_t func_id)
return res.a0;
}
-static void vcpu_power_off(struct kvm_vm *vm, uint32_t vcpuid)
+static void vcpu_power_off(struct kvm_vcpu *vcpu)
{
struct kvm_mp_state mp_state = {
.mp_state = KVM_MP_STATE_STOPPED,
};
- vcpu_set_mp_state(vm, vcpuid, &mp_state);
+ vcpu_mp_state_set(vcpu, &mp_state);
}
-static struct kvm_vm *setup_vm(void *guest_code)
+static struct kvm_vm *setup_vm(void *guest_code, struct kvm_vcpu **source,
+ struct kvm_vcpu **target)
{
struct kvm_vcpu_init init;
struct kvm_vm *vm;
- vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
- kvm_vm_elf_load(vm, program_invocation_name);
+ vm = vm_create(2);
ucall_init(vm, NULL);
vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init);
init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2);
- aarch64_vcpu_add_default(vm, VCPU_ID_SOURCE, &init, guest_code);
- aarch64_vcpu_add_default(vm, VCPU_ID_TARGET, &init, guest_code);
+ *source = aarch64_vcpu_add(vm, 0, &init, guest_code);
+ *target = aarch64_vcpu_add(vm, 1, &init, guest_code);
return vm;
}
-static void enter_guest(struct kvm_vm *vm, uint32_t vcpuid)
+static void enter_guest(struct kvm_vcpu *vcpu)
{
struct ucall uc;
- vcpu_run(vm, vcpuid);
- if (get_ucall(vm, vcpuid, &uc) == UCALL_ABORT)
- TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], __FILE__,
- uc.args[1]);
+ vcpu_run(vcpu);
+ if (get_ucall(vcpu, &uc) == UCALL_ABORT)
+ REPORT_GUEST_ASSERT(uc);
}
-static void assert_vcpu_reset(struct kvm_vm *vm, uint32_t vcpuid)
+static void assert_vcpu_reset(struct kvm_vcpu *vcpu)
{
uint64_t obs_pc, obs_x0;
- get_reg(vm, vcpuid, ARM64_CORE_REG(regs.pc), &obs_pc);
- get_reg(vm, vcpuid, ARM64_CORE_REG(regs.regs[0]), &obs_x0);
+ vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc), &obs_pc);
+ vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.regs[0]), &obs_x0);
TEST_ASSERT(obs_pc == CPU_ON_ENTRY_ADDR,
"unexpected target cpu pc: %lx (expected: %lx)",
@@ -134,37 +130,29 @@ static void guest_test_cpu_on(uint64_t target_cpu)
static void host_test_cpu_on(void)
{
+ struct kvm_vcpu *source, *target;
uint64_t target_mpidr;
struct kvm_vm *vm;
struct ucall uc;
- vm = setup_vm(guest_test_cpu_on);
+ vm = setup_vm(guest_test_cpu_on, &source, &target);
/*
* make sure the target is already off when executing the test.
*/
- vcpu_power_off(vm, VCPU_ID_TARGET);
+ vcpu_power_off(target);
- get_reg(vm, VCPU_ID_TARGET, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), &target_mpidr);
- vcpu_args_set(vm, VCPU_ID_SOURCE, 1, target_mpidr & MPIDR_HWID_BITMASK);
- enter_guest(vm, VCPU_ID_SOURCE);
+ vcpu_get_reg(target, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), &target_mpidr);
+ vcpu_args_set(source, 1, target_mpidr & MPIDR_HWID_BITMASK);
+ enter_guest(source);
- if (get_ucall(vm, VCPU_ID_SOURCE, &uc) != UCALL_DONE)
+ if (get_ucall(source, &uc) != UCALL_DONE)
TEST_FAIL("Unhandled ucall: %lu", uc.cmd);
- assert_vcpu_reset(vm, VCPU_ID_TARGET);
+ assert_vcpu_reset(target);
kvm_vm_free(vm);
}
-static void enable_system_suspend(struct kvm_vm *vm)
-{
- struct kvm_enable_cap cap = {
- .cap = KVM_CAP_ARM_SYSTEM_SUSPEND,
- };
-
- vm_enable_cap(vm, &cap);
-}
-
static void guest_test_system_suspend(void)
{
uint64_t ret;
@@ -179,16 +167,17 @@ static void guest_test_system_suspend(void)
static void host_test_system_suspend(void)
{
+ struct kvm_vcpu *source, *target;
struct kvm_run *run;
struct kvm_vm *vm;
- vm = setup_vm(guest_test_system_suspend);
- enable_system_suspend(vm);
+ vm = setup_vm(guest_test_system_suspend, &source, &target);
+ vm_enable_cap(vm, KVM_CAP_ARM_SYSTEM_SUSPEND, 0);
- vcpu_power_off(vm, VCPU_ID_TARGET);
- run = vcpu_state(vm, VCPU_ID_SOURCE);
+ vcpu_power_off(target);
+ run = source->run;
- enter_guest(vm, VCPU_ID_SOURCE);
+ enter_guest(source);
TEST_ASSERT(run->exit_reason == KVM_EXIT_SYSTEM_EVENT,
"Unhandled exit reason: %u (%s)",
@@ -202,10 +191,7 @@ static void host_test_system_suspend(void)
int main(void)
{
- if (!kvm_check_cap(KVM_CAP_ARM_SYSTEM_SUSPEND)) {
- print_skip("KVM_CAP_ARM_SYSTEM_SUSPEND not supported");
- exit(KSFT_SKIP);
- }
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SYSTEM_SUSPEND));
host_test_cpu_on();
host_test_system_suspend();
diff --git a/tools/testing/selftests/kvm/aarch64/vcpu_width_config.c b/tools/testing/selftests/kvm/aarch64/vcpu_width_config.c
index 6e9402679229..80b74c6f152b 100644
--- a/tools/testing/selftests/kvm/aarch64/vcpu_width_config.c
+++ b/tools/testing/selftests/kvm/aarch64/vcpu_width_config.c
@@ -15,24 +15,25 @@
/*
- * Add a vCPU, run KVM_ARM_VCPU_INIT with @init1, and then
- * add another vCPU, and run KVM_ARM_VCPU_INIT with @init2.
+ * Add a vCPU, run KVM_ARM_VCPU_INIT with @init0, and then
+ * add another vCPU, and run KVM_ARM_VCPU_INIT with @init1.
*/
-static int add_init_2vcpus(struct kvm_vcpu_init *init1,
- struct kvm_vcpu_init *init2)
+static int add_init_2vcpus(struct kvm_vcpu_init *init0,
+ struct kvm_vcpu_init *init1)
{
+ struct kvm_vcpu *vcpu0, *vcpu1;
struct kvm_vm *vm;
int ret;
- vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
+ vm = vm_create_barebones();
- vm_vcpu_add(vm, 0);
- ret = _vcpu_ioctl(vm, 0, KVM_ARM_VCPU_INIT, init1);
+ vcpu0 = __vm_vcpu_add(vm, 0);
+ ret = __vcpu_ioctl(vcpu0, KVM_ARM_VCPU_INIT, init0);
if (ret)
goto free_exit;
- vm_vcpu_add(vm, 1);
- ret = _vcpu_ioctl(vm, 1, KVM_ARM_VCPU_INIT, init2);
+ vcpu1 = __vm_vcpu_add(vm, 1);
+ ret = __vcpu_ioctl(vcpu1, KVM_ARM_VCPU_INIT, init1);
free_exit:
kvm_vm_free(vm);
@@ -40,25 +41,26 @@ free_exit:
}
/*
- * Add two vCPUs, then run KVM_ARM_VCPU_INIT for one vCPU with @init1,
- * and run KVM_ARM_VCPU_INIT for another vCPU with @init2.
+ * Add two vCPUs, then run KVM_ARM_VCPU_INIT for one vCPU with @init0,
+ * and run KVM_ARM_VCPU_INIT for another vCPU with @init1.
*/
-static int add_2vcpus_init_2vcpus(struct kvm_vcpu_init *init1,
- struct kvm_vcpu_init *init2)
+static int add_2vcpus_init_2vcpus(struct kvm_vcpu_init *init0,
+ struct kvm_vcpu_init *init1)
{
+ struct kvm_vcpu *vcpu0, *vcpu1;
struct kvm_vm *vm;
int ret;
- vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
+ vm = vm_create_barebones();
- vm_vcpu_add(vm, 0);
- vm_vcpu_add(vm, 1);
+ vcpu0 = __vm_vcpu_add(vm, 0);
+ vcpu1 = __vm_vcpu_add(vm, 1);
- ret = _vcpu_ioctl(vm, 0, KVM_ARM_VCPU_INIT, init1);
+ ret = __vcpu_ioctl(vcpu0, KVM_ARM_VCPU_INIT, init0);
if (ret)
goto free_exit;
- ret = _vcpu_ioctl(vm, 1, KVM_ARM_VCPU_INIT, init2);
+ ret = __vcpu_ioctl(vcpu1, KVM_ARM_VCPU_INIT, init1);
free_exit:
kvm_vm_free(vm);
@@ -76,45 +78,42 @@ free_exit:
*/
int main(void)
{
- struct kvm_vcpu_init init1, init2;
+ struct kvm_vcpu_init init0, init1;
struct kvm_vm *vm;
int ret;
- if (!kvm_check_cap(KVM_CAP_ARM_EL1_32BIT)) {
- print_skip("KVM_CAP_ARM_EL1_32BIT is not supported");
- exit(KSFT_SKIP);
- }
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_EL1_32BIT));
- /* Get the preferred target type and copy that to init2 for later use */
- vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
- vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init1);
+ /* Get the preferred target type and copy that to init1 for later use */
+ vm = vm_create_barebones();
+ vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init0);
kvm_vm_free(vm);
- init2 = init1;
+ init1 = init0;
/* Test with 64bit vCPUs */
- ret = add_init_2vcpus(&init1, &init1);
+ ret = add_init_2vcpus(&init0, &init0);
TEST_ASSERT(ret == 0,
"Configuring 64bit EL1 vCPUs failed unexpectedly");
- ret = add_2vcpus_init_2vcpus(&init1, &init1);
+ ret = add_2vcpus_init_2vcpus(&init0, &init0);
TEST_ASSERT(ret == 0,
"Configuring 64bit EL1 vCPUs failed unexpectedly");
/* Test with 32bit vCPUs */
- init1.features[0] = (1 << KVM_ARM_VCPU_EL1_32BIT);
- ret = add_init_2vcpus(&init1, &init1);
+ init0.features[0] = (1 << KVM_ARM_VCPU_EL1_32BIT);
+ ret = add_init_2vcpus(&init0, &init0);
TEST_ASSERT(ret == 0,
"Configuring 32bit EL1 vCPUs failed unexpectedly");
- ret = add_2vcpus_init_2vcpus(&init1, &init1);
+ ret = add_2vcpus_init_2vcpus(&init0, &init0);
TEST_ASSERT(ret == 0,
"Configuring 32bit EL1 vCPUs failed unexpectedly");
/* Test with mixed-width vCPUs */
- init1.features[0] = 0;
- init2.features[0] = (1 << KVM_ARM_VCPU_EL1_32BIT);
- ret = add_init_2vcpus(&init1, &init2);
+ init0.features[0] = 0;
+ init1.features[0] = (1 << KVM_ARM_VCPU_EL1_32BIT);
+ ret = add_init_2vcpus(&init0, &init1);
TEST_ASSERT(ret != 0,
"Configuring mixed-width vCPUs worked unexpectedly");
- ret = add_2vcpus_init_2vcpus(&init1, &init2);
+ ret = add_2vcpus_init_2vcpus(&init0, &init1);
TEST_ASSERT(ret != 0,
"Configuring mixed-width vCPUs worked unexpectedly");
diff --git a/tools/testing/selftests/kvm/aarch64/vgic_init.c b/tools/testing/selftests/kvm/aarch64/vgic_init.c
index 34379c98d2f4..e05ecb31823f 100644
--- a/tools/testing/selftests/kvm/aarch64/vgic_init.c
+++ b/tools/testing/selftests/kvm/aarch64/vgic_init.c
@@ -32,14 +32,28 @@ struct vm_gic {
static uint64_t max_phys_size;
-/* helper to access a redistributor register */
-static int access_v3_redist_reg(int gicv3_fd, int vcpu, int offset,
- uint32_t *val, bool write)
+/*
+ * Helpers to access a redistributor register and verify the ioctl() failed or
+ * succeeded as expected, and provided the correct value on success.
+ */
+static void v3_redist_reg_get_errno(int gicv3_fd, int vcpu, int offset,
+ int want, const char *msg)
{
- uint64_t attr = REG_OFFSET(vcpu, offset);
+ uint32_t ignored_val;
+ int ret = __kvm_device_attr_get(gicv3_fd, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS,
+ REG_OFFSET(vcpu, offset), &ignored_val);
- return _kvm_device_access(gicv3_fd, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS,
- attr, val, write);
+ TEST_ASSERT(ret && errno == want, "%s; want errno = %d", msg, want);
+}
+
+static void v3_redist_reg_get(int gicv3_fd, int vcpu, int offset, uint32_t want,
+ const char *msg)
+{
+ uint32_t val;
+
+ kvm_device_attr_get(gicv3_fd, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS,
+ REG_OFFSET(vcpu, offset), &val);
+ TEST_ASSERT(val == want, "%s; want '0x%x', got '0x%x'", msg, want, val);
}
/* dummy guest code */
@@ -52,22 +66,22 @@ static void guest_code(void)
}
/* we don't want to assert on run execution, hence that helper */
-static int run_vcpu(struct kvm_vm *vm, uint32_t vcpuid)
+static int run_vcpu(struct kvm_vcpu *vcpu)
{
- ucall_init(vm, NULL);
- int ret = _vcpu_ioctl(vm, vcpuid, KVM_RUN, NULL);
- if (ret)
- return -errno;
- return 0;
+ ucall_init(vcpu->vm, NULL);
+
+ return __vcpu_run(vcpu) ? -errno : 0;
}
-static struct vm_gic vm_gic_create_with_vcpus(uint32_t gic_dev_type, uint32_t nr_vcpus)
+static struct vm_gic vm_gic_create_with_vcpus(uint32_t gic_dev_type,
+ uint32_t nr_vcpus,
+ struct kvm_vcpu *vcpus[])
{
struct vm_gic v;
v.gic_dev_type = gic_dev_type;
- v.vm = vm_create_default_with_vcpus(nr_vcpus, 0, 0, guest_code, NULL);
- v.gic_fd = kvm_create_device(v.vm, gic_dev_type, false);
+ v.vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus);
+ v.gic_fd = kvm_create_device(v.vm, gic_dev_type);
return v;
}
@@ -129,63 +143,60 @@ static void subtest_dist_rdist(struct vm_gic *v)
: gic_v2_dist_region;
/* Check existing group/attributes */
- kvm_device_check_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- dist.attr);
+ kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, dist.attr);
- kvm_device_check_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- rdist.attr);
+ kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, rdist.attr);
/* check non existing attribute */
- ret = _kvm_device_check_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, -1);
+ ret = __kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, -1);
TEST_ASSERT(ret && errno == ENXIO, "attribute not supported");
/* misaligned DIST and REDIST address settings */
addr = dist.alignment / 0x10;
- ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- dist.attr, &addr, true);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ dist.attr, &addr);
TEST_ASSERT(ret && errno == EINVAL, "GIC dist base not aligned");
addr = rdist.alignment / 0x10;
- ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- rdist.attr, &addr, true);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ rdist.attr, &addr);
TEST_ASSERT(ret && errno == EINVAL, "GIC redist/cpu base not aligned");
/* out of range address */
addr = max_phys_size;
- ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- dist.attr, &addr, true);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ dist.attr, &addr);
TEST_ASSERT(ret && errno == E2BIG, "dist address beyond IPA limit");
- ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- rdist.attr, &addr, true);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ rdist.attr, &addr);
TEST_ASSERT(ret && errno == E2BIG, "redist address beyond IPA limit");
/* Space for half a rdist (a rdist is: 2 * rdist.alignment). */
addr = max_phys_size - dist.alignment;
- ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- rdist.attr, &addr, true);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ rdist.attr, &addr);
TEST_ASSERT(ret && errno == E2BIG,
"half of the redist is beyond IPA limit");
/* set REDIST base address @0x0*/
addr = 0x00000;
- kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- rdist.attr, &addr, true);
+ kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ rdist.attr, &addr);
/* Attempt to create a second legacy redistributor region */
addr = 0xE0000;
- ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- rdist.attr, &addr, true);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ rdist.attr, &addr);
TEST_ASSERT(ret && errno == EEXIST, "GIC redist base set again");
- ret = _kvm_device_check_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ ret = __kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
KVM_VGIC_V3_ADDR_TYPE_REDIST);
if (!ret) {
/* Attempt to mix legacy and new redistributor regions */
addr = REDIST_REGION_ATTR_ADDR(NR_VCPUS, 0x100000, 0, 0);
- ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION,
- &addr, true);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
TEST_ASSERT(ret && errno == EINVAL,
"attempt to mix GICv3 REDIST and REDIST_REGION");
}
@@ -195,8 +206,8 @@ static void subtest_dist_rdist(struct vm_gic *v)
* on first vcpu run instead.
*/
addr = rdist.size - rdist.alignment;
- kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- dist.attr, &addr, true);
+ kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ dist.attr, &addr);
}
/* Test the new REDIST region API */
@@ -205,71 +216,71 @@ static void subtest_v3_redist_regions(struct vm_gic *v)
uint64_t addr, expected_addr;
int ret;
- ret = kvm_device_check_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST);
+ ret = __kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST);
TEST_ASSERT(!ret, "Multiple redist regions advertised");
addr = REDIST_REGION_ATTR_ADDR(NR_VCPUS, 0x100000, 2, 0);
- ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
TEST_ASSERT(ret && errno == EINVAL, "redist region attr value with flags != 0");
addr = REDIST_REGION_ATTR_ADDR(0, 0x100000, 0, 0);
- ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
TEST_ASSERT(ret && errno == EINVAL, "redist region attr value with count== 0");
addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 1);
- ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
TEST_ASSERT(ret && errno == EINVAL,
"attempt to register the first rdist region with index != 0");
addr = REDIST_REGION_ATTR_ADDR(2, 0x201000, 0, 1);
- ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
TEST_ASSERT(ret && errno == EINVAL, "rdist region with misaligned address");
addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 0);
- kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true);
+ kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 1);
- ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
TEST_ASSERT(ret && errno == EINVAL, "register an rdist region with already used index");
addr = REDIST_REGION_ATTR_ADDR(1, 0x210000, 0, 2);
- ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
TEST_ASSERT(ret && errno == EINVAL,
"register an rdist region overlapping with another one");
addr = REDIST_REGION_ATTR_ADDR(1, 0x240000, 0, 2);
- ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
TEST_ASSERT(ret && errno == EINVAL, "register redist region with index not +1");
addr = REDIST_REGION_ATTR_ADDR(1, 0x240000, 0, 1);
- kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true);
+ kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
addr = REDIST_REGION_ATTR_ADDR(1, max_phys_size, 0, 2);
- ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
TEST_ASSERT(ret && errno == E2BIG,
"register redist region with base address beyond IPA range");
/* The last redist is above the pa range. */
addr = REDIST_REGION_ATTR_ADDR(2, max_phys_size - 0x30000, 0, 2);
- ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
TEST_ASSERT(ret && errno == E2BIG,
"register redist region with top address beyond IPA range");
addr = 0x260000;
- ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr, true);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr);
TEST_ASSERT(ret && errno == EINVAL,
"Mix KVM_VGIC_V3_ADDR_TYPE_REDIST and REDIST_REGION");
@@ -282,28 +293,28 @@ static void subtest_v3_redist_regions(struct vm_gic *v)
addr = REDIST_REGION_ATTR_ADDR(0, 0, 0, 0);
expected_addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 0);
- ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, false);
+ ret = __kvm_device_attr_get(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
TEST_ASSERT(!ret && addr == expected_addr, "read characteristics of region #0");
addr = REDIST_REGION_ATTR_ADDR(0, 0, 0, 1);
expected_addr = REDIST_REGION_ATTR_ADDR(1, 0x240000, 0, 1);
- ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, false);
+ ret = __kvm_device_attr_get(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
TEST_ASSERT(!ret && addr == expected_addr, "read characteristics of region #1");
addr = REDIST_REGION_ATTR_ADDR(0, 0, 0, 2);
- ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, false);
+ ret = __kvm_device_attr_get(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
TEST_ASSERT(ret && errno == ENOENT, "read characteristics of non existing region");
addr = 0x260000;
- kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_DIST, &addr, true);
+ kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_DIST, &addr);
addr = REDIST_REGION_ATTR_ADDR(1, 0x260000, 0, 2);
- ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
TEST_ASSERT(ret && errno == EINVAL, "register redist region colliding with dist");
}
@@ -313,18 +324,19 @@ static void subtest_v3_redist_regions(struct vm_gic *v)
*/
static void test_vgic_then_vcpus(uint32_t gic_dev_type)
{
+ struct kvm_vcpu *vcpus[NR_VCPUS];
struct vm_gic v;
int ret, i;
- v = vm_gic_create_with_vcpus(gic_dev_type, 1);
+ v = vm_gic_create_with_vcpus(gic_dev_type, 1, vcpus);
subtest_dist_rdist(&v);
/* Add the rest of the VCPUs */
for (i = 1; i < NR_VCPUS; ++i)
- vm_vcpu_add_default(v.vm, i, guest_code);
+ vcpus[i] = vm_vcpu_add(v.vm, i, guest_code);
- ret = run_vcpu(v.vm, 3);
+ ret = run_vcpu(vcpus[3]);
TEST_ASSERT(ret == -EINVAL, "dist/rdist overlap detected on 1st vcpu run");
vm_gic_destroy(&v);
@@ -333,14 +345,15 @@ static void test_vgic_then_vcpus(uint32_t gic_dev_type)
/* All the VCPUs are created before the VGIC KVM device gets initialized */
static void test_vcpus_then_vgic(uint32_t gic_dev_type)
{
+ struct kvm_vcpu *vcpus[NR_VCPUS];
struct vm_gic v;
int ret;
- v = vm_gic_create_with_vcpus(gic_dev_type, NR_VCPUS);
+ v = vm_gic_create_with_vcpus(gic_dev_type, NR_VCPUS, vcpus);
subtest_dist_rdist(&v);
- ret = run_vcpu(v.vm, 3);
+ ret = run_vcpu(vcpus[3]);
TEST_ASSERT(ret == -EINVAL, "dist/rdist overlap detected on 1st vcpu run");
vm_gic_destroy(&v);
@@ -348,52 +361,53 @@ static void test_vcpus_then_vgic(uint32_t gic_dev_type)
static void test_v3_new_redist_regions(void)
{
+ struct kvm_vcpu *vcpus[NR_VCPUS];
void *dummy = NULL;
struct vm_gic v;
uint64_t addr;
int ret;
- v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS);
+ v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus);
subtest_v3_redist_regions(&v);
- kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
- KVM_DEV_ARM_VGIC_CTRL_INIT, NULL, true);
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
- ret = run_vcpu(v.vm, 3);
+ ret = run_vcpu(vcpus[3]);
TEST_ASSERT(ret == -ENXIO, "running without sufficient number of rdists");
vm_gic_destroy(&v);
/* step2 */
- v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS);
+ v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus);
subtest_v3_redist_regions(&v);
addr = REDIST_REGION_ATTR_ADDR(1, 0x280000, 0, 2);
- kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true);
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
- ret = run_vcpu(v.vm, 3);
+ ret = run_vcpu(vcpus[3]);
TEST_ASSERT(ret == -EBUSY, "running without vgic explicit init");
vm_gic_destroy(&v);
/* step 3 */
- v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS);
+ v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus);
subtest_v3_redist_regions(&v);
- _kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, dummy, true);
+ ret = __kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, dummy);
TEST_ASSERT(ret && errno == EFAULT,
"register a third region allowing to cover the 4 vcpus");
addr = REDIST_REGION_ATTR_ADDR(1, 0x280000, 0, 2);
- kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true);
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
- kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
- KVM_DEV_ARM_VGIC_CTRL_INIT, NULL, true);
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
- ret = run_vcpu(v.vm, 3);
+ ret = run_vcpu(vcpus[3]);
TEST_ASSERT(!ret, "vcpu run");
vm_gic_destroy(&v);
@@ -403,71 +417,77 @@ static void test_v3_typer_accesses(void)
{
struct vm_gic v;
uint64_t addr;
- uint32_t val;
int ret, i;
- v.vm = vm_create_default(0, 0, guest_code);
+ v.vm = vm_create(NR_VCPUS);
+ (void)vm_vcpu_add(v.vm, 0, guest_code);
- v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3, false);
+ v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3);
- vm_vcpu_add_default(v.vm, 3, guest_code);
+ (void)vm_vcpu_add(v.vm, 3, guest_code);
- ret = access_v3_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false);
- TEST_ASSERT(ret && errno == EINVAL, "attempting to read GICR_TYPER of non created vcpu");
+ v3_redist_reg_get_errno(v.gic_fd, 1, GICR_TYPER, EINVAL,
+ "attempting to read GICR_TYPER of non created vcpu");
- vm_vcpu_add_default(v.vm, 1, guest_code);
+ (void)vm_vcpu_add(v.vm, 1, guest_code);
- ret = access_v3_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false);
- TEST_ASSERT(ret && errno == EBUSY, "read GICR_TYPER before GIC initialized");
+ v3_redist_reg_get_errno(v.gic_fd, 1, GICR_TYPER, EBUSY,
+ "read GICR_TYPER before GIC initialized");
- vm_vcpu_add_default(v.vm, 2, guest_code);
+ (void)vm_vcpu_add(v.vm, 2, guest_code);
- kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
- KVM_DEV_ARM_VGIC_CTRL_INIT, NULL, true);
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
for (i = 0; i < NR_VCPUS ; i++) {
- ret = access_v3_redist_reg(v.gic_fd, 0, GICR_TYPER, &val, false);
- TEST_ASSERT(!ret && !val, "read GICR_TYPER before rdist region setting");
+ v3_redist_reg_get(v.gic_fd, i, GICR_TYPER, i * 0x100,
+ "read GICR_TYPER before rdist region setting");
}
addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 0);
- kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true);
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
/* The 2 first rdists should be put there (vcpu 0 and 3) */
- ret = access_v3_redist_reg(v.gic_fd, 0, GICR_TYPER, &val, false);
- TEST_ASSERT(!ret && !val, "read typer of rdist #0");
-
- ret = access_v3_redist_reg(v.gic_fd, 3, GICR_TYPER, &val, false);
- TEST_ASSERT(!ret && val == 0x310, "read typer of rdist #1");
+ v3_redist_reg_get(v.gic_fd, 0, GICR_TYPER, 0x0, "read typer of rdist #0");
+ v3_redist_reg_get(v.gic_fd, 3, GICR_TYPER, 0x310, "read typer of rdist #1");
addr = REDIST_REGION_ATTR_ADDR(10, 0x100000, 0, 1);
- ret = _kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true);
+ ret = __kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
TEST_ASSERT(ret && errno == EINVAL, "collision with previous rdist region");
- ret = access_v3_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false);
- TEST_ASSERT(!ret && val == 0x100,
- "no redist region attached to vcpu #1 yet, last cannot be returned");
-
- ret = access_v3_redist_reg(v.gic_fd, 2, GICR_TYPER, &val, false);
- TEST_ASSERT(!ret && val == 0x200,
- "no redist region attached to vcpu #2, last cannot be returned");
+ v3_redist_reg_get(v.gic_fd, 1, GICR_TYPER, 0x100,
+ "no redist region attached to vcpu #1 yet, last cannot be returned");
+ v3_redist_reg_get(v.gic_fd, 2, GICR_TYPER, 0x200,
+ "no redist region attached to vcpu #2, last cannot be returned");
addr = REDIST_REGION_ATTR_ADDR(10, 0x20000, 0, 1);
- kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true);
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
- ret = access_v3_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false);
- TEST_ASSERT(!ret && val == 0x100, "read typer of rdist #1");
-
- ret = access_v3_redist_reg(v.gic_fd, 2, GICR_TYPER, &val, false);
- TEST_ASSERT(!ret && val == 0x210,
- "read typer of rdist #1, last properly returned");
+ v3_redist_reg_get(v.gic_fd, 1, GICR_TYPER, 0x100, "read typer of rdist #1");
+ v3_redist_reg_get(v.gic_fd, 2, GICR_TYPER, 0x210,
+ "read typer of rdist #1, last properly returned");
vm_gic_destroy(&v);
}
+static struct vm_gic vm_gic_v3_create_with_vcpuids(int nr_vcpus,
+ uint32_t vcpuids[])
+{
+ struct vm_gic v;
+ int i;
+
+ v.vm = vm_create(nr_vcpus);
+ for (i = 0; i < nr_vcpus; i++)
+ vm_vcpu_add(v.vm, vcpuids[i], guest_code);
+
+ v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3);
+
+ return v;
+}
+
/**
* Test GICR_TYPER last bit with new redist regions
* rdist regions #1 and #2 are contiguous
@@ -483,45 +503,30 @@ static void test_v3_last_bit_redist_regions(void)
uint32_t vcpuids[] = { 0, 3, 5, 4, 1, 2 };
struct vm_gic v;
uint64_t addr;
- uint32_t val;
- int ret;
-
- v.vm = vm_create_default_with_vcpus(6, 0, 0, guest_code, vcpuids);
- v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3, false);
+ v = vm_gic_v3_create_with_vcpuids(ARRAY_SIZE(vcpuids), vcpuids);
- kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
- KVM_DEV_ARM_VGIC_CTRL_INIT, NULL, true);
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
addr = REDIST_REGION_ATTR_ADDR(2, 0x100000, 0, 0);
- kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true);
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
addr = REDIST_REGION_ATTR_ADDR(2, 0x240000, 0, 1);
- kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true);
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 2);
- kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true);
-
- ret = access_v3_redist_reg(v.gic_fd, 0, GICR_TYPER, &val, false);
- TEST_ASSERT(!ret && val == 0x000, "read typer of rdist #0");
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
- ret = access_v3_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false);
- TEST_ASSERT(!ret && val == 0x100, "read typer of rdist #1");
-
- ret = access_v3_redist_reg(v.gic_fd, 2, GICR_TYPER, &val, false);
- TEST_ASSERT(!ret && val == 0x200, "read typer of rdist #2");
-
- ret = access_v3_redist_reg(v.gic_fd, 3, GICR_TYPER, &val, false);
- TEST_ASSERT(!ret && val == 0x310, "read typer of rdist #3");
-
- ret = access_v3_redist_reg(v.gic_fd, 5, GICR_TYPER, &val, false);
- TEST_ASSERT(!ret && val == 0x500, "read typer of rdist #5");
-
- ret = access_v3_redist_reg(v.gic_fd, 4, GICR_TYPER, &val, false);
- TEST_ASSERT(!ret && val == 0x410, "read typer of rdist #4");
+ v3_redist_reg_get(v.gic_fd, 0, GICR_TYPER, 0x000, "read typer of rdist #0");
+ v3_redist_reg_get(v.gic_fd, 1, GICR_TYPER, 0x100, "read typer of rdist #1");
+ v3_redist_reg_get(v.gic_fd, 2, GICR_TYPER, 0x200, "read typer of rdist #2");
+ v3_redist_reg_get(v.gic_fd, 3, GICR_TYPER, 0x310, "read typer of rdist #3");
+ v3_redist_reg_get(v.gic_fd, 5, GICR_TYPER, 0x500, "read typer of rdist #5");
+ v3_redist_reg_get(v.gic_fd, 4, GICR_TYPER, 0x410, "read typer of rdist #4");
vm_gic_destroy(&v);
}
@@ -532,34 +537,21 @@ static void test_v3_last_bit_single_rdist(void)
uint32_t vcpuids[] = { 0, 3, 5, 4, 1, 2 };
struct vm_gic v;
uint64_t addr;
- uint32_t val;
- int ret;
-
- v.vm = vm_create_default_with_vcpus(6, 0, 0, guest_code, vcpuids);
- v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3, false);
+ v = vm_gic_v3_create_with_vcpuids(ARRAY_SIZE(vcpuids), vcpuids);
- kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
- KVM_DEV_ARM_VGIC_CTRL_INIT, NULL, true);
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
addr = 0x10000;
- kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr, true);
-
- ret = access_v3_redist_reg(v.gic_fd, 0, GICR_TYPER, &val, false);
- TEST_ASSERT(!ret && val == 0x000, "read typer of rdist #0");
-
- ret = access_v3_redist_reg(v.gic_fd, 3, GICR_TYPER, &val, false);
- TEST_ASSERT(!ret && val == 0x300, "read typer of rdist #1");
-
- ret = access_v3_redist_reg(v.gic_fd, 5, GICR_TYPER, &val, false);
- TEST_ASSERT(!ret && val == 0x500, "read typer of rdist #2");
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr);
- ret = access_v3_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false);
- TEST_ASSERT(!ret && val == 0x100, "read typer of rdist #3");
-
- ret = access_v3_redist_reg(v.gic_fd, 2, GICR_TYPER, &val, false);
- TEST_ASSERT(!ret && val == 0x210, "read typer of rdist #3");
+ v3_redist_reg_get(v.gic_fd, 0, GICR_TYPER, 0x000, "read typer of rdist #0");
+ v3_redist_reg_get(v.gic_fd, 3, GICR_TYPER, 0x300, "read typer of rdist #1");
+ v3_redist_reg_get(v.gic_fd, 5, GICR_TYPER, 0x500, "read typer of rdist #2");
+ v3_redist_reg_get(v.gic_fd, 1, GICR_TYPER, 0x100, "read typer of rdist #3");
+ v3_redist_reg_get(v.gic_fd, 2, GICR_TYPER, 0x210, "read typer of rdist #3");
vm_gic_destroy(&v);
}
@@ -567,30 +559,31 @@ static void test_v3_last_bit_single_rdist(void)
/* Uses the legacy REDIST region API. */
static void test_v3_redist_ipa_range_check_at_vcpu_run(void)
{
+ struct kvm_vcpu *vcpus[NR_VCPUS];
struct vm_gic v;
int ret, i;
uint64_t addr;
- v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, 1);
+ v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, 1, vcpus);
/* Set space for 3 redists, we have 1 vcpu, so this succeeds. */
addr = max_phys_size - (3 * 2 * 0x10000);
- kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr, true);
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr);
addr = 0x00000;
- kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_DIST, &addr, true);
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_DIST, &addr);
/* Add the rest of the VCPUs */
for (i = 1; i < NR_VCPUS; ++i)
- vm_vcpu_add_default(v.vm, i, guest_code);
+ vcpus[i] = vm_vcpu_add(v.vm, i, guest_code);
- kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
- KVM_DEV_ARM_VGIC_CTRL_INIT, NULL, true);
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
/* Attempt to run a vcpu without enough redist space. */
- ret = run_vcpu(v.vm, 2);
+ ret = run_vcpu(vcpus[2]);
TEST_ASSERT(ret && errno == EINVAL,
"redist base+size above PA range detected on 1st vcpu run");
@@ -599,39 +592,40 @@ static void test_v3_redist_ipa_range_check_at_vcpu_run(void)
static void test_v3_its_region(void)
{
+ struct kvm_vcpu *vcpus[NR_VCPUS];
struct vm_gic v;
uint64_t addr;
int its_fd, ret;
- v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS);
- its_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_ITS, false);
+ v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus);
+ its_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_ITS);
addr = 0x401000;
- ret = _kvm_device_access(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_ITS_ADDR_TYPE, &addr, true);
+ ret = __kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_ITS_ADDR_TYPE, &addr);
TEST_ASSERT(ret && errno == EINVAL,
"ITS region with misaligned address");
addr = max_phys_size;
- ret = _kvm_device_access(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_ITS_ADDR_TYPE, &addr, true);
+ ret = __kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_ITS_ADDR_TYPE, &addr);
TEST_ASSERT(ret && errno == E2BIG,
"register ITS region with base address beyond IPA range");
addr = max_phys_size - 0x10000;
- ret = _kvm_device_access(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_ITS_ADDR_TYPE, &addr, true);
+ ret = __kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_ITS_ADDR_TYPE, &addr);
TEST_ASSERT(ret && errno == E2BIG,
"Half of ITS region is beyond IPA range");
/* This one succeeds setting the ITS base */
addr = 0x400000;
- kvm_device_access(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_ITS_ADDR_TYPE, &addr, true);
+ kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_ITS_ADDR_TYPE, &addr);
addr = 0x300000;
- ret = _kvm_device_access(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_ITS_ADDR_TYPE, &addr, true);
+ ret = __kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_ITS_ADDR_TYPE, &addr);
TEST_ASSERT(ret && errno == EEXIST, "ITS base set again");
close(its_fd);
@@ -643,34 +637,33 @@ static void test_v3_its_region(void)
*/
int test_kvm_device(uint32_t gic_dev_type)
{
+ struct kvm_vcpu *vcpus[NR_VCPUS];
struct vm_gic v;
- int ret, fd;
uint32_t other;
+ int ret;
- v.vm = vm_create_default_with_vcpus(NR_VCPUS, 0, 0, guest_code, NULL);
+ v.vm = vm_create_with_vcpus(NR_VCPUS, guest_code, vcpus);
/* try to create a non existing KVM device */
- ret = _kvm_create_device(v.vm, 0, true, &fd);
+ ret = __kvm_test_create_device(v.vm, 0);
TEST_ASSERT(ret && errno == ENODEV, "unsupported device");
/* trial mode */
- ret = _kvm_create_device(v.vm, gic_dev_type, true, &fd);
+ ret = __kvm_test_create_device(v.vm, gic_dev_type);
if (ret)
return ret;
- v.gic_fd = kvm_create_device(v.vm, gic_dev_type, false);
-
- ret = _kvm_create_device(v.vm, gic_dev_type, false, &fd);
- TEST_ASSERT(ret && errno == EEXIST, "create GIC device twice");
+ v.gic_fd = kvm_create_device(v.vm, gic_dev_type);
- kvm_create_device(v.vm, gic_dev_type, true);
+ ret = __kvm_create_device(v.vm, gic_dev_type);
+ TEST_ASSERT(ret < 0 && errno == EEXIST, "create GIC device twice");
/* try to create the other gic_dev_type */
other = VGIC_DEV_IS_V2(gic_dev_type) ? KVM_DEV_TYPE_ARM_VGIC_V3
: KVM_DEV_TYPE_ARM_VGIC_V2;
- if (!_kvm_create_device(v.vm, other, true, &fd)) {
- ret = _kvm_create_device(v.vm, other, false, &fd);
- TEST_ASSERT(ret && errno == EINVAL,
+ if (!__kvm_test_create_device(v.vm, other)) {
+ ret = __kvm_test_create_device(v.vm, other);
+ TEST_ASSERT(ret && (errno == EINVAL || errno == EEXIST),
"create GIC device while other version exists");
}
@@ -698,6 +691,7 @@ int main(int ac, char **av)
{
int ret;
int pa_bits;
+ int cnt_impl = 0;
pa_bits = vm_guest_mode_params[VM_MODE_DEFAULT].pa_bits;
max_phys_size = 1ULL << pa_bits;
@@ -706,17 +700,19 @@ int main(int ac, char **av)
if (!ret) {
pr_info("Running GIC_v3 tests.\n");
run_tests(KVM_DEV_TYPE_ARM_VGIC_V3);
- return 0;
+ cnt_impl++;
}
ret = test_kvm_device(KVM_DEV_TYPE_ARM_VGIC_V2);
if (!ret) {
pr_info("Running GIC_v2 tests.\n");
run_tests(KVM_DEV_TYPE_ARM_VGIC_V2);
- return 0;
+ cnt_impl++;
}
- print_skip("No GICv2 nor GICv3 support");
- exit(KSFT_SKIP);
+ if (!cnt_impl) {
+ print_skip("No GICv2 nor GICv3 support");
+ exit(KSFT_SKIP);
+ }
return 0;
}
diff --git a/tools/testing/selftests/kvm/aarch64/vgic_irq.c b/tools/testing/selftests/kvm/aarch64/vgic_irq.c
index 554ca649d470..17417220a083 100644
--- a/tools/testing/selftests/kvm/aarch64/vgic_irq.c
+++ b/tools/testing/selftests/kvm/aarch64/vgic_irq.c
@@ -22,7 +22,6 @@
#define GICD_BASE_GPA 0x08000000ULL
#define GICR_BASE_GPA 0x080A0000ULL
-#define VCPU_ID 0
/*
* Stores the user specified args; it's passed to the guest and to every test
@@ -589,7 +588,8 @@ static void kvm_set_gsi_routing_irqchip_check(struct kvm_vm *vm,
}
static void kvm_irq_write_ispendr_check(int gic_fd, uint32_t intid,
- uint32_t vcpu, bool expect_failure)
+ struct kvm_vcpu *vcpu,
+ bool expect_failure)
{
/*
* Ignore this when expecting failure as invalid intids will lead to
@@ -630,8 +630,7 @@ static void kvm_routing_and_irqfd_check(struct kvm_vm *vm,
for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
fd[f] = eventfd(0, 0);
- TEST_ASSERT(fd[f] != -1,
- "eventfd failed, errno: %i\n", errno);
+ TEST_ASSERT(fd[f] != -1, __KVM_SYSCALL_ERROR("eventfd()", fd[f]));
}
for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
@@ -647,7 +646,7 @@ static void kvm_routing_and_irqfd_check(struct kvm_vm *vm,
val = 1;
ret = write(fd[f], &val, sizeof(uint64_t));
TEST_ASSERT(ret == sizeof(uint64_t),
- "Write to KVM_IRQFD failed with ret: %d\n", ret);
+ __KVM_SYSCALL_ERROR("write()", ret));
}
for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++)
@@ -660,15 +659,16 @@ static void kvm_routing_and_irqfd_check(struct kvm_vm *vm,
(tmp) < (uint64_t)(first) + (uint64_t)(num); \
(tmp)++, (i)++)
-static void run_guest_cmd(struct kvm_vm *vm, int gic_fd,
- struct kvm_inject_args *inject_args,
- struct test_args *test_args)
+static void run_guest_cmd(struct kvm_vcpu *vcpu, int gic_fd,
+ struct kvm_inject_args *inject_args,
+ struct test_args *test_args)
{
kvm_inject_cmd cmd = inject_args->cmd;
uint32_t intid = inject_args->first_intid;
uint32_t num = inject_args->num;
int level = inject_args->level;
bool expect_failure = inject_args->expect_failure;
+ struct kvm_vm *vm = vcpu->vm;
uint64_t tmp;
uint32_t i;
@@ -706,12 +706,12 @@ static void run_guest_cmd(struct kvm_vm *vm, int gic_fd,
break;
case KVM_WRITE_ISPENDR:
for (i = intid; i < intid + num; i++)
- kvm_irq_write_ispendr_check(gic_fd, i,
- VCPU_ID, expect_failure);
+ kvm_irq_write_ispendr_check(gic_fd, i, vcpu,
+ expect_failure);
break;
case KVM_WRITE_ISACTIVER:
for (i = intid; i < intid + num; i++)
- kvm_irq_write_isactiver(gic_fd, i, VCPU_ID);
+ kvm_irq_write_isactiver(gic_fd, i, vcpu);
break;
default:
break;
@@ -740,6 +740,7 @@ static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split)
{
struct ucall uc;
int gic_fd;
+ struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
struct kvm_inject_args inject_args;
vm_vaddr_t args_gva;
@@ -754,39 +755,34 @@ static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split)
print_args(&args);
- vm = vm_create_default(VCPU_ID, 0, guest_code);
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
ucall_init(vm, NULL);
vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vm, VCPU_ID);
+ vcpu_init_descriptor_tables(vcpu);
/* Setup the guest args page (so it gets the args). */
args_gva = vm_vaddr_alloc_page(vm);
memcpy(addr_gva2hva(vm, args_gva), &args, sizeof(args));
- vcpu_args_set(vm, 0, 1, args_gva);
+ vcpu_args_set(vcpu, 1, args_gva);
gic_fd = vgic_v3_setup(vm, 1, nr_irqs,
GICD_BASE_GPA, GICR_BASE_GPA);
- if (gic_fd < 0) {
- print_skip("Failed to create vgic-v3, skipping");
- exit(KSFT_SKIP);
- }
+ __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3, skipping");
vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT,
guest_irq_handlers[args.eoi_split][args.level_sensitive]);
while (1) {
- vcpu_run(vm, VCPU_ID);
+ vcpu_run(vcpu);
- switch (get_ucall(vm, VCPU_ID, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_SYNC:
kvm_inject_get_call(vm, &uc, &inject_args);
- run_guest_cmd(vm, gic_fd, &inject_args, &args);
+ run_guest_cmd(vcpu, gic_fd, &inject_args, &args);
break;
case UCALL_ABORT:
- TEST_FAIL("%s at %s:%ld\n\tvalues: %#lx, %#lx",
- (const char *)uc.args[0],
- __FILE__, uc.args[1], uc.args[2], uc.args[3]);
+ REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx");
break;
case UCALL_DONE:
goto done;
diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c
index d8909032317a..1c2749b1481a 100644
--- a/tools/testing/selftests/kvm/access_tracking_perf_test.c
+++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c
@@ -74,7 +74,7 @@ struct test_params {
uint64_t vcpu_memory_bytes;
/* The number of vCPUs to create in the VM. */
- int vcpus;
+ int nr_vcpus;
};
static uint64_t pread_uint64(int fd, const char *filename, uint64_t index)
@@ -104,10 +104,7 @@ static uint64_t lookup_pfn(int pagemap_fd, struct kvm_vm *vm, uint64_t gva)
return 0;
pfn = entry & PAGEMAP_PFN_MASK;
- if (!pfn) {
- print_skip("Looking up PFNs requires CAP_SYS_ADMIN");
- exit(KSFT_SKIP);
- }
+ __TEST_REQUIRE(pfn, "Looking up PFNs requires CAP_SYS_ADMIN");
return pfn;
}
@@ -127,10 +124,12 @@ static void mark_page_idle(int page_idle_fd, uint64_t pfn)
"Set page_idle bits for PFN 0x%" PRIx64, pfn);
}
-static void mark_vcpu_memory_idle(struct kvm_vm *vm, int vcpu_id)
+static void mark_vcpu_memory_idle(struct kvm_vm *vm,
+ struct perf_test_vcpu_args *vcpu_args)
{
- uint64_t base_gva = perf_test_args.vcpu_args[vcpu_id].gva;
- uint64_t pages = perf_test_args.vcpu_args[vcpu_id].pages;
+ int vcpu_idx = vcpu_args->vcpu_idx;
+ uint64_t base_gva = vcpu_args->gva;
+ uint64_t pages = vcpu_args->pages;
uint64_t page;
uint64_t still_idle = 0;
uint64_t no_pfn = 0;
@@ -138,7 +137,7 @@ static void mark_vcpu_memory_idle(struct kvm_vm *vm, int vcpu_id)
int pagemap_fd;
/* If vCPUs are using an overlapping region, let vCPU 0 mark it idle. */
- if (overlap_memory_access && vcpu_id)
+ if (overlap_memory_access && vcpu_idx)
return;
page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR);
@@ -170,7 +169,7 @@ static void mark_vcpu_memory_idle(struct kvm_vm *vm, int vcpu_id)
*/
TEST_ASSERT(no_pfn < pages / 100,
"vCPU %d: No PFN for %" PRIu64 " out of %" PRIu64 " pages.",
- vcpu_id, no_pfn, pages);
+ vcpu_idx, no_pfn, pages);
/*
* Test that at least 90% of memory has been marked idle (the rest might
@@ -183,17 +182,16 @@ static void mark_vcpu_memory_idle(struct kvm_vm *vm, int vcpu_id)
TEST_ASSERT(still_idle < pages / 10,
"vCPU%d: Too many pages still idle (%"PRIu64 " out of %"
PRIu64 ").\n",
- vcpu_id, still_idle, pages);
+ vcpu_idx, still_idle, pages);
close(page_idle_fd);
close(pagemap_fd);
}
-static void assert_ucall(struct kvm_vm *vm, uint32_t vcpu_id,
- uint64_t expected_ucall)
+static void assert_ucall(struct kvm_vcpu *vcpu, uint64_t expected_ucall)
{
struct ucall uc;
- uint64_t actual_ucall = get_ucall(vm, vcpu_id, &uc);
+ uint64_t actual_ucall = get_ucall(vcpu, &uc);
TEST_ASSERT(expected_ucall == actual_ucall,
"Guest exited unexpectedly (expected ucall %" PRIu64
@@ -217,28 +215,29 @@ static bool spin_wait_for_next_iteration(int *current_iteration)
static void vcpu_thread_main(struct perf_test_vcpu_args *vcpu_args)
{
+ struct kvm_vcpu *vcpu = vcpu_args->vcpu;
struct kvm_vm *vm = perf_test_args.vm;
- int vcpu_id = vcpu_args->vcpu_id;
+ int vcpu_idx = vcpu_args->vcpu_idx;
int current_iteration = 0;
while (spin_wait_for_next_iteration(&current_iteration)) {
switch (READ_ONCE(iteration_work)) {
case ITERATION_ACCESS_MEMORY:
- vcpu_run(vm, vcpu_id);
- assert_ucall(vm, vcpu_id, UCALL_SYNC);
+ vcpu_run(vcpu);
+ assert_ucall(vcpu, UCALL_SYNC);
break;
case ITERATION_MARK_IDLE:
- mark_vcpu_memory_idle(vm, vcpu_id);
+ mark_vcpu_memory_idle(vm, vcpu_args);
break;
};
- vcpu_last_completed_iteration[vcpu_id] = current_iteration;
+ vcpu_last_completed_iteration[vcpu_idx] = current_iteration;
}
}
-static void spin_wait_for_vcpu(int vcpu_id, int target_iteration)
+static void spin_wait_for_vcpu(int vcpu_idx, int target_iteration)
{
- while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) !=
+ while (READ_ONCE(vcpu_last_completed_iteration[vcpu_idx]) !=
target_iteration) {
continue;
}
@@ -250,12 +249,11 @@ enum access_type {
ACCESS_WRITE,
};
-static void run_iteration(struct kvm_vm *vm, int vcpus, const char *description)
+static void run_iteration(struct kvm_vm *vm, int nr_vcpus, const char *description)
{
struct timespec ts_start;
struct timespec ts_elapsed;
- int next_iteration;
- int vcpu_id;
+ int next_iteration, i;
/* Kick off the vCPUs by incrementing iteration. */
next_iteration = ++iteration;
@@ -263,23 +261,23 @@ static void run_iteration(struct kvm_vm *vm, int vcpus, const char *description)
clock_gettime(CLOCK_MONOTONIC, &ts_start);
/* Wait for all vCPUs to finish the iteration. */
- for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++)
- spin_wait_for_vcpu(vcpu_id, next_iteration);
+ for (i = 0; i < nr_vcpus; i++)
+ spin_wait_for_vcpu(i, next_iteration);
ts_elapsed = timespec_elapsed(ts_start);
pr_info("%-30s: %ld.%09lds\n",
description, ts_elapsed.tv_sec, ts_elapsed.tv_nsec);
}
-static void access_memory(struct kvm_vm *vm, int vcpus, enum access_type access,
- const char *description)
+static void access_memory(struct kvm_vm *vm, int nr_vcpus,
+ enum access_type access, const char *description)
{
perf_test_set_wr_fract(vm, (access == ACCESS_READ) ? INT_MAX : 1);
iteration_work = ITERATION_ACCESS_MEMORY;
- run_iteration(vm, vcpus, description);
+ run_iteration(vm, nr_vcpus, description);
}
-static void mark_memory_idle(struct kvm_vm *vm, int vcpus)
+static void mark_memory_idle(struct kvm_vm *vm, int nr_vcpus)
{
/*
* Even though this parallelizes the work across vCPUs, this is still a
@@ -289,37 +287,37 @@ static void mark_memory_idle(struct kvm_vm *vm, int vcpus)
*/
pr_debug("Marking VM memory idle (slow)...\n");
iteration_work = ITERATION_MARK_IDLE;
- run_iteration(vm, vcpus, "Mark memory idle");
+ run_iteration(vm, nr_vcpus, "Mark memory idle");
}
static void run_test(enum vm_guest_mode mode, void *arg)
{
struct test_params *params = arg;
struct kvm_vm *vm;
- int vcpus = params->vcpus;
+ int nr_vcpus = params->nr_vcpus;
- vm = perf_test_create_vm(mode, vcpus, params->vcpu_memory_bytes, 1,
+ vm = perf_test_create_vm(mode, nr_vcpus, params->vcpu_memory_bytes, 1,
params->backing_src, !overlap_memory_access);
- perf_test_start_vcpu_threads(vcpus, vcpu_thread_main);
+ perf_test_start_vcpu_threads(nr_vcpus, vcpu_thread_main);
pr_info("\n");
- access_memory(vm, vcpus, ACCESS_WRITE, "Populating memory");
+ access_memory(vm, nr_vcpus, ACCESS_WRITE, "Populating memory");
/* As a control, read and write to the populated memory first. */
- access_memory(vm, vcpus, ACCESS_WRITE, "Writing to populated memory");
- access_memory(vm, vcpus, ACCESS_READ, "Reading from populated memory");
+ access_memory(vm, nr_vcpus, ACCESS_WRITE, "Writing to populated memory");
+ access_memory(vm, nr_vcpus, ACCESS_READ, "Reading from populated memory");
/* Repeat on memory that has been marked as idle. */
- mark_memory_idle(vm, vcpus);
- access_memory(vm, vcpus, ACCESS_WRITE, "Writing to idle memory");
- mark_memory_idle(vm, vcpus);
- access_memory(vm, vcpus, ACCESS_READ, "Reading from idle memory");
+ mark_memory_idle(vm, nr_vcpus);
+ access_memory(vm, nr_vcpus, ACCESS_WRITE, "Writing to idle memory");
+ mark_memory_idle(vm, nr_vcpus);
+ access_memory(vm, nr_vcpus, ACCESS_READ, "Reading from idle memory");
/* Set done to signal the vCPU threads to exit */
done = true;
- perf_test_join_vcpu_threads(vcpus);
+ perf_test_join_vcpu_threads(nr_vcpus);
perf_test_destroy_vm(vm);
}
@@ -347,7 +345,7 @@ int main(int argc, char *argv[])
struct test_params params = {
.backing_src = DEFAULT_VM_MEM_SRC,
.vcpu_memory_bytes = DEFAULT_PER_VCPU_MEM_SIZE,
- .vcpus = 1,
+ .nr_vcpus = 1,
};
int page_idle_fd;
int opt;
@@ -363,7 +361,7 @@ int main(int argc, char *argv[])
params.vcpu_memory_bytes = parse_size(optarg);
break;
case 'v':
- params.vcpus = atoi(optarg);
+ params.nr_vcpus = atoi(optarg);
break;
case 'o':
overlap_memory_access = true;
@@ -379,10 +377,8 @@ int main(int argc, char *argv[])
}
page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR);
- if (page_idle_fd < 0) {
- print_skip("CONFIG_IDLE_PAGE_TRACKING is not enabled");
- exit(KSFT_SKIP);
- }
+ __TEST_REQUIRE(page_idle_fd >= 0,
+ "CONFIG_IDLE_PAGE_TRACKING is not enabled");
close(page_idle_fd);
for_each_guest_mode(run_test, &params);
diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c
index 6a719d065599..779ae54f89c4 100644
--- a/tools/testing/selftests/kvm/demand_paging_test.c
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -44,28 +44,26 @@ static char *guest_data_prototype;
static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args)
{
- int ret;
- int vcpu_id = vcpu_args->vcpu_id;
- struct kvm_vm *vm = perf_test_args.vm;
- struct kvm_run *run;
+ struct kvm_vcpu *vcpu = vcpu_args->vcpu;
+ int vcpu_idx = vcpu_args->vcpu_idx;
+ struct kvm_run *run = vcpu->run;
struct timespec start;
struct timespec ts_diff;
-
- run = vcpu_state(vm, vcpu_id);
+ int ret;
clock_gettime(CLOCK_MONOTONIC, &start);
/* Let the guest access its memory */
- ret = _vcpu_run(vm, vcpu_id);
+ ret = _vcpu_run(vcpu);
TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret);
- if (get_ucall(vm, vcpu_id, NULL) != UCALL_SYNC) {
+ if (get_ucall(vcpu, NULL) != UCALL_SYNC) {
TEST_ASSERT(false,
"Invalid guest sync status: exit_reason=%s\n",
exit_reason_str(run->exit_reason));
}
ts_diff = timespec_elapsed(start);
- PER_VCPU_DEBUG("vCPU %d execution time: %ld.%.9lds\n", vcpu_id,
+ PER_VCPU_DEBUG("vCPU %d execution time: %ld.%.9lds\n", vcpu_idx,
ts_diff.tv_sec, ts_diff.tv_nsec);
}
@@ -223,6 +221,7 @@ static void setup_demand_paging(struct kvm_vm *vm,
struct uffdio_api uffdio_api;
struct uffdio_register uffdio_register;
uint64_t expected_ioctls = ((uint64_t) 1) << _UFFDIO_COPY;
+ int ret;
PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n",
is_minor ? "MINOR" : "MISSING",
@@ -242,19 +241,18 @@ static void setup_demand_paging(struct kvm_vm *vm,
}
uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
- TEST_ASSERT(uffd >= 0, "uffd creation failed, errno: %d", errno);
+ TEST_ASSERT(uffd >= 0, __KVM_SYSCALL_ERROR("userfaultfd()", uffd));
uffdio_api.api = UFFD_API;
uffdio_api.features = 0;
- TEST_ASSERT(ioctl(uffd, UFFDIO_API, &uffdio_api) != -1,
- "ioctl UFFDIO_API failed: %" PRIu64,
- (uint64_t)uffdio_api.api);
+ ret = ioctl(uffd, UFFDIO_API, &uffdio_api);
+ TEST_ASSERT(ret != -1, __KVM_SYSCALL_ERROR("UFFDIO_API", ret));
uffdio_register.range.start = (uint64_t)hva;
uffdio_register.range.len = len;
uffdio_register.mode = uffd_mode;
- TEST_ASSERT(ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) != -1,
- "ioctl UFFDIO_REGISTER failed");
+ ret = ioctl(uffd, UFFDIO_REGISTER, &uffdio_register);
+ TEST_ASSERT(ret != -1, __KVM_SYSCALL_ERROR("UFFDIO_REGISTER", ret));
TEST_ASSERT((uffdio_register.ioctls & expected_ioctls) ==
expected_ioctls, "missing userfaultfd ioctls");
@@ -285,8 +283,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
struct timespec ts_diff;
int *pipefds = NULL;
struct kvm_vm *vm;
- int vcpu_id;
- int r;
+ int r, i;
vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1,
p->src_type, p->partition_vcpu_memory_access);
@@ -309,12 +306,12 @@ static void run_test(enum vm_guest_mode mode, void *arg)
pipefds = malloc(sizeof(int) * nr_vcpus * 2);
TEST_ASSERT(pipefds, "Unable to allocate memory for pipefd");
- for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
+ for (i = 0; i < nr_vcpus; i++) {
struct perf_test_vcpu_args *vcpu_args;
void *vcpu_hva;
void *vcpu_alias;
- vcpu_args = &perf_test_args.vcpu_args[vcpu_id];
+ vcpu_args = &perf_test_args.vcpu_args[i];
/* Cache the host addresses of the region */
vcpu_hva = addr_gpa2hva(vm, vcpu_args->gpa);
@@ -324,13 +321,13 @@ static void run_test(enum vm_guest_mode mode, void *arg)
* Set up user fault fd to handle demand paging
* requests.
*/
- r = pipe2(&pipefds[vcpu_id * 2],
+ r = pipe2(&pipefds[i * 2],
O_CLOEXEC | O_NONBLOCK);
TEST_ASSERT(!r, "Failed to set up pipefd");
- setup_demand_paging(vm, &uffd_handler_threads[vcpu_id],
- pipefds[vcpu_id * 2], p->uffd_mode,
- p->uffd_delay, &uffd_args[vcpu_id],
+ setup_demand_paging(vm, &uffd_handler_threads[i],
+ pipefds[i * 2], p->uffd_mode,
+ p->uffd_delay, &uffd_args[i],
vcpu_hva, vcpu_alias,
vcpu_args->pages * perf_test_args.guest_page_size);
}
@@ -350,11 +347,11 @@ static void run_test(enum vm_guest_mode mode, void *arg)
char c;
/* Tell the user fault fd handler threads to quit */
- for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
- r = write(pipefds[vcpu_id * 2 + 1], &c, 1);
+ for (i = 0; i < nr_vcpus; i++) {
+ r = write(pipefds[i * 2 + 1], &c, 1);
TEST_ASSERT(r == 1, "Unable to write to pipefd");
- pthread_join(uffd_handler_threads[vcpu_id], NULL);
+ pthread_join(uffd_handler_threads[i], NULL);
}
}
diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index d60a34cdfaee..f99e39a672d3 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -59,6 +59,7 @@ static void arch_cleanup_vm(struct kvm_vm *vm)
static int nr_vcpus = 1;
static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
+static bool run_vcpus_while_disabling_dirty_logging;
/* Host variables */
static u64 dirty_log_manual_caps;
@@ -68,54 +69,59 @@ static int vcpu_last_completed_iteration[KVM_MAX_VCPUS];
static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args)
{
- int ret;
- struct kvm_vm *vm = perf_test_args.vm;
+ struct kvm_vcpu *vcpu = vcpu_args->vcpu;
+ int vcpu_idx = vcpu_args->vcpu_idx;
uint64_t pages_count = 0;
struct kvm_run *run;
struct timespec start;
struct timespec ts_diff;
struct timespec total = (struct timespec){0};
struct timespec avg;
- int vcpu_id = vcpu_args->vcpu_id;
+ int ret;
- run = vcpu_state(vm, vcpu_id);
+ run = vcpu->run;
while (!READ_ONCE(host_quit)) {
int current_iteration = READ_ONCE(iteration);
clock_gettime(CLOCK_MONOTONIC, &start);
- ret = _vcpu_run(vm, vcpu_id);
+ ret = _vcpu_run(vcpu);
ts_diff = timespec_elapsed(start);
TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret);
- TEST_ASSERT(get_ucall(vm, vcpu_id, NULL) == UCALL_SYNC,
+ TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC,
"Invalid guest sync status: exit_reason=%s\n",
exit_reason_str(run->exit_reason));
- pr_debug("Got sync event from vCPU %d\n", vcpu_id);
- vcpu_last_completed_iteration[vcpu_id] = current_iteration;
+ pr_debug("Got sync event from vCPU %d\n", vcpu_idx);
+ vcpu_last_completed_iteration[vcpu_idx] = current_iteration;
pr_debug("vCPU %d updated last completed iteration to %d\n",
- vcpu_id, vcpu_last_completed_iteration[vcpu_id]);
+ vcpu_idx, vcpu_last_completed_iteration[vcpu_idx]);
if (current_iteration) {
pages_count += vcpu_args->pages;
total = timespec_add(total, ts_diff);
pr_debug("vCPU %d iteration %d dirty memory time: %ld.%.9lds\n",
- vcpu_id, current_iteration, ts_diff.tv_sec,
+ vcpu_idx, current_iteration, ts_diff.tv_sec,
ts_diff.tv_nsec);
} else {
pr_debug("vCPU %d iteration %d populate memory time: %ld.%.9lds\n",
- vcpu_id, current_iteration, ts_diff.tv_sec,
+ vcpu_idx, current_iteration, ts_diff.tv_sec,
ts_diff.tv_nsec);
}
+ /*
+ * Keep running the guest while dirty logging is being disabled
+ * (iteration is negative) so that vCPUs are accessing memory
+ * for the entire duration of zapping collapsible SPTEs.
+ */
while (current_iteration == READ_ONCE(iteration) &&
- !READ_ONCE(host_quit)) {}
+ READ_ONCE(iteration) >= 0 && !READ_ONCE(host_quit)) {}
}
- avg = timespec_div(total, vcpu_last_completed_iteration[vcpu_id]);
+ avg = timespec_div(total, vcpu_last_completed_iteration[vcpu_idx]);
pr_debug("\nvCPU %d dirtied 0x%lx pages over %d iterations in %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
- vcpu_id, pages_count, vcpu_last_completed_iteration[vcpu_id],
+ vcpu_idx, pages_count, vcpu_last_completed_iteration[vcpu_idx],
total.tv_sec, total.tv_nsec, avg.tv_sec, avg.tv_nsec);
}
@@ -207,14 +213,13 @@ static void run_test(enum vm_guest_mode mode, void *arg)
uint64_t guest_num_pages;
uint64_t host_num_pages;
uint64_t pages_per_slot;
- int vcpu_id;
struct timespec start;
struct timespec ts_diff;
struct timespec get_dirty_log_total = (struct timespec){0};
struct timespec vcpu_dirty_total = (struct timespec){0};
struct timespec avg;
- struct kvm_enable_cap cap = {};
struct timespec clear_dirty_log_total = (struct timespec){0};
+ int i;
vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size,
p->slots, p->backing_src,
@@ -222,18 +227,16 @@ static void run_test(enum vm_guest_mode mode, void *arg)
perf_test_set_wr_fract(vm, p->wr_fract);
- guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm_get_page_shift(vm);
+ guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm->page_shift;
guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
host_num_pages = vm_num_host_pages(mode, guest_num_pages);
pages_per_slot = host_num_pages / p->slots;
bitmaps = alloc_bitmaps(p->slots, pages_per_slot);
- if (dirty_log_manual_caps) {
- cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2;
- cap.args[0] = dirty_log_manual_caps;
- vm_enable_cap(vm, &cap);
- }
+ if (dirty_log_manual_caps)
+ vm_enable_cap(vm, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2,
+ dirty_log_manual_caps);
arch_setup_vm(vm, nr_vcpus);
@@ -242,15 +245,15 @@ static void run_test(enum vm_guest_mode mode, void *arg)
host_quit = false;
clock_gettime(CLOCK_MONOTONIC, &start);
- for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++)
- vcpu_last_completed_iteration[vcpu_id] = -1;
+ for (i = 0; i < nr_vcpus; i++)
+ vcpu_last_completed_iteration[i] = -1;
perf_test_start_vcpu_threads(nr_vcpus, vcpu_worker);
/* Allow the vCPUs to populate memory */
pr_debug("Starting iteration %d - Populating\n", iteration);
- for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
- while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) !=
+ for (i = 0; i < nr_vcpus; i++) {
+ while (READ_ONCE(vcpu_last_completed_iteration[i]) !=
iteration)
;
}
@@ -275,8 +278,8 @@ static void run_test(enum vm_guest_mode mode, void *arg)
iteration++;
pr_debug("Starting iteration %d\n", iteration);
- for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
- while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id])
+ for (i = 0; i < nr_vcpus; i++) {
+ while (READ_ONCE(vcpu_last_completed_iteration[i])
!= iteration)
;
}
@@ -305,6 +308,14 @@ static void run_test(enum vm_guest_mode mode, void *arg)
}
}
+ /*
+ * Run vCPUs while dirty logging is being disabled to stress disabling
+ * in terms of both performance and correctness. Opt-in via command
+ * line as this significantly increases time to disable dirty logging.
+ */
+ if (run_vcpus_while_disabling_dirty_logging)
+ WRITE_ONCE(iteration, -1);
+
/* Disable dirty logging */
clock_gettime(CLOCK_MONOTONIC, &start);
disable_dirty_logging(vm, p->slots);
@@ -312,7 +323,11 @@ static void run_test(enum vm_guest_mode mode, void *arg)
pr_info("Disabling dirty logging time: %ld.%.9lds\n",
ts_diff.tv_sec, ts_diff.tv_nsec);
- /* Tell the vcpu thread to quit */
+ /*
+ * Tell the vCPU threads to quit. No need to manually check that vCPUs
+ * have stopped running after disabling dirty logging, the join will
+ * wait for them to exit.
+ */
host_quit = true;
perf_test_join_vcpu_threads(nr_vcpus);
@@ -352,6 +367,9 @@ static void help(char *name)
" Warning: a low offset can conflict with the loaded test code.\n");
guest_modes_help();
printf(" -n: Run the vCPUs in nested mode (L2)\n");
+ printf(" -e: Run vCPUs while dirty logging is being disabled. This\n"
+ " can significantly increase runtime, especially if there\n"
+ " isn't a dedicated pCPU for the main thread.\n");
printf(" -b: specify the size of the memory region which should be\n"
" dirtied by each vCPU. e.g. 10M or 3G.\n"
" (default: 1G)\n");
@@ -388,8 +406,11 @@ int main(int argc, char *argv[])
guest_modes_append_default();
- while ((opt = getopt(argc, argv, "ghi:p:m:nb:f:v:os:x:")) != -1) {
+ while ((opt = getopt(argc, argv, "eghi:p:m:nb:f:v:os:x:")) != -1) {
switch (opt) {
+ case 'e':
+ /* 'e' is for evil. */
+ run_vcpus_while_disabling_dirty_logging = true;
case 'g':
dirty_log_manual_caps = 0;
break;
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index 3fcd89e195c7..9c883c94d478 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -23,8 +23,6 @@
#include "guest_modes.h"
#include "processor.h"
-#define VCPU_ID 1
-
/* The memory slot index to track dirty pages */
#define TEST_MEM_SLOT_INDEX 1
@@ -212,34 +210,31 @@ static void sem_wait_until(sem_t *sem)
static bool clear_log_supported(void)
{
- return kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
+ return kvm_has_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
}
static void clear_log_create_vm_done(struct kvm_vm *vm)
{
- struct kvm_enable_cap cap = {};
u64 manual_caps;
manual_caps = kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
TEST_ASSERT(manual_caps, "MANUAL_CAPS is zero!");
manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
KVM_DIRTY_LOG_INITIALLY_SET);
- cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2;
- cap.args[0] = manual_caps;
- vm_enable_cap(vm, &cap);
+ vm_enable_cap(vm, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2, manual_caps);
}
-static void dirty_log_collect_dirty_pages(struct kvm_vm *vm, int slot,
+static void dirty_log_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
void *bitmap, uint32_t num_pages)
{
- kvm_vm_get_dirty_log(vm, slot, bitmap);
+ kvm_vm_get_dirty_log(vcpu->vm, slot, bitmap);
}
-static void clear_log_collect_dirty_pages(struct kvm_vm *vm, int slot,
+static void clear_log_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
void *bitmap, uint32_t num_pages)
{
- kvm_vm_get_dirty_log(vm, slot, bitmap);
- kvm_vm_clear_dirty_log(vm, slot, bitmap, 0, num_pages);
+ kvm_vm_get_dirty_log(vcpu->vm, slot, bitmap);
+ kvm_vm_clear_dirty_log(vcpu->vm, slot, bitmap, 0, num_pages);
}
/* Should only be called after a GUEST_SYNC */
@@ -253,14 +248,14 @@ static void vcpu_handle_sync_stop(void)
}
}
-static void default_after_vcpu_run(struct kvm_vm *vm, int ret, int err)
+static void default_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err)
{
- struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ struct kvm_run *run = vcpu->run;
TEST_ASSERT(ret == 0 || (ret == -1 && err == EINTR),
"vcpu run failed: errno=%d", err);
- TEST_ASSERT(get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC,
+ TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC,
"Invalid guest sync status: exit_reason=%s\n",
exit_reason_str(run->exit_reason));
@@ -269,7 +264,7 @@ static void default_after_vcpu_run(struct kvm_vm *vm, int ret, int err)
static bool dirty_ring_supported(void)
{
- return kvm_check_cap(KVM_CAP_DIRTY_LOG_RING);
+ return kvm_has_cap(KVM_CAP_DIRTY_LOG_RING);
}
static void dirty_ring_create_vm_done(struct kvm_vm *vm)
@@ -331,7 +326,7 @@ static void dirty_ring_continue_vcpu(void)
sem_post(&sem_vcpu_cont);
}
-static void dirty_ring_collect_dirty_pages(struct kvm_vm *vm, int slot,
+static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
void *bitmap, uint32_t num_pages)
{
/* We only have one vcpu */
@@ -351,10 +346,10 @@ static void dirty_ring_collect_dirty_pages(struct kvm_vm *vm, int slot,
}
/* Only have one vcpu */
- count = dirty_ring_collect_one(vcpu_map_dirty_ring(vm, VCPU_ID),
+ count = dirty_ring_collect_one(vcpu_map_dirty_ring(vcpu),
slot, bitmap, num_pages, &fetch_index);
- cleared = kvm_vm_reset_dirty_ring(vm);
+ cleared = kvm_vm_reset_dirty_ring(vcpu->vm);
/* Cleared pages should be the same as collected */
TEST_ASSERT(cleared == count, "Reset dirty pages (%u) mismatch "
@@ -369,12 +364,12 @@ static void dirty_ring_collect_dirty_pages(struct kvm_vm *vm, int slot,
pr_info("Iteration %ld collected %u pages\n", iteration, count);
}
-static void dirty_ring_after_vcpu_run(struct kvm_vm *vm, int ret, int err)
+static void dirty_ring_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err)
{
- struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ struct kvm_run *run = vcpu->run;
/* A ucall-sync or ring-full event is allowed */
- if (get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC) {
+ if (get_ucall(vcpu, NULL) == UCALL_SYNC) {
/* We should allow this to continue */
;
} else if (run->exit_reason == KVM_EXIT_DIRTY_RING_FULL ||
@@ -408,10 +403,10 @@ struct log_mode {
/* Hook when the vm creation is done (before vcpu creation) */
void (*create_vm_done)(struct kvm_vm *vm);
/* Hook to collect the dirty pages into the bitmap provided */
- void (*collect_dirty_pages) (struct kvm_vm *vm, int slot,
+ void (*collect_dirty_pages) (struct kvm_vcpu *vcpu, int slot,
void *bitmap, uint32_t num_pages);
/* Hook to call when after each vcpu run */
- void (*after_vcpu_run)(struct kvm_vm *vm, int ret, int err);
+ void (*after_vcpu_run)(struct kvm_vcpu *vcpu, int ret, int err);
void (*before_vcpu_join) (void);
} log_modes[LOG_MODE_NUM] = {
{
@@ -473,22 +468,22 @@ static void log_mode_create_vm_done(struct kvm_vm *vm)
mode->create_vm_done(vm);
}
-static void log_mode_collect_dirty_pages(struct kvm_vm *vm, int slot,
+static void log_mode_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
void *bitmap, uint32_t num_pages)
{
struct log_mode *mode = &log_modes[host_log_mode];
TEST_ASSERT(mode->collect_dirty_pages != NULL,
"collect_dirty_pages() is required for any log mode!");
- mode->collect_dirty_pages(vm, slot, bitmap, num_pages);
+ mode->collect_dirty_pages(vcpu, slot, bitmap, num_pages);
}
-static void log_mode_after_vcpu_run(struct kvm_vm *vm, int ret, int err)
+static void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err)
{
struct log_mode *mode = &log_modes[host_log_mode];
if (mode->after_vcpu_run)
- mode->after_vcpu_run(vm, ret, err);
+ mode->after_vcpu_run(vcpu, ret, err);
}
static void log_mode_before_vcpu_join(void)
@@ -509,16 +504,15 @@ static void generate_random_array(uint64_t *guest_array, uint64_t size)
static void *vcpu_worker(void *data)
{
- int ret, vcpu_fd;
- struct kvm_vm *vm = data;
+ int ret;
+ struct kvm_vcpu *vcpu = data;
+ struct kvm_vm *vm = vcpu->vm;
uint64_t *guest_array;
uint64_t pages_count = 0;
struct kvm_signal_mask *sigmask = alloca(offsetof(struct kvm_signal_mask, sigset)
+ sizeof(sigset_t));
sigset_t *sigset = (sigset_t *) &sigmask->sigset;
- vcpu_fd = vcpu_get_fd(vm, VCPU_ID);
-
/*
* SIG_IPI is unblocked atomically while in KVM_RUN. It causes the
* ioctl to return with -EINTR, but it is still pending and we need
@@ -527,7 +521,7 @@ static void *vcpu_worker(void *data)
sigmask->len = 8;
pthread_sigmask(0, NULL, sigset);
sigdelset(sigset, SIG_IPI);
- vcpu_ioctl(vm, VCPU_ID, KVM_SET_SIGNAL_MASK, sigmask);
+ vcpu_ioctl(vcpu, KVM_SET_SIGNAL_MASK, sigmask);
sigemptyset(sigset);
sigaddset(sigset, SIG_IPI);
@@ -539,13 +533,13 @@ static void *vcpu_worker(void *data)
generate_random_array(guest_array, TEST_PAGES_PER_LOOP);
pages_count += TEST_PAGES_PER_LOOP;
/* Let the guest dirty the random pages */
- ret = ioctl(vcpu_fd, KVM_RUN, NULL);
+ ret = __vcpu_run(vcpu);
if (ret == -1 && errno == EINTR) {
int sig = -1;
sigwait(sigset, &sig);
assert(sig == SIG_IPI);
}
- log_mode_after_vcpu_run(vm, ret, errno);
+ log_mode_after_vcpu_run(vcpu, ret, errno);
}
pr_info("Dirtied %"PRIu64" pages\n", pages_count);
@@ -671,21 +665,17 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap)
}
}
-static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid,
+static struct kvm_vm *create_vm(enum vm_guest_mode mode, struct kvm_vcpu **vcpu,
uint64_t extra_mem_pages, void *guest_code)
{
struct kvm_vm *vm;
- uint64_t extra_pg_pages = extra_mem_pages / 512 * 2;
pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
- vm = vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
- kvm_vm_elf_load(vm, program_invocation_name);
-#ifdef __x86_64__
- vm_create_irqchip(vm);
-#endif
+ vm = __vm_create(mode, 1, extra_mem_pages);
+
log_mode_create_vm_done(vm);
- vm_vcpu_add_default(vm, vcpuid, guest_code);
+ *vcpu = vm_vcpu_add(vm, 0, guest_code);
return vm;
}
@@ -701,6 +691,7 @@ struct test_params {
static void run_test(enum vm_guest_mode mode, void *arg)
{
struct test_params *p = arg;
+ struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
unsigned long *bmap;
@@ -718,25 +709,23 @@ static void run_test(enum vm_guest_mode mode, void *arg)
* (e.g., 64K page size guest will need even less memory for
* page tables).
*/
- vm = create_vm(mode, VCPU_ID,
- 2ul << (DIRTY_MEM_BITS - PAGE_SHIFT_4K),
- guest_code);
+ vm = create_vm(mode, &vcpu,
+ 2ul << (DIRTY_MEM_BITS - PAGE_SHIFT_4K), guest_code);
- guest_page_size = vm_get_page_size(vm);
+ guest_page_size = vm->page_size;
/*
* A little more than 1G of guest page sized pages. Cover the
* case where the size is not aligned to 64 pages.
*/
- guest_num_pages = (1ul << (DIRTY_MEM_BITS -
- vm_get_page_shift(vm))) + 3;
+ guest_num_pages = (1ul << (DIRTY_MEM_BITS - vm->page_shift)) + 3;
guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
host_page_size = getpagesize();
host_num_pages = vm_num_host_pages(mode, guest_num_pages);
if (!p->phys_offset) {
- guest_test_phys_mem = (vm_get_max_gfn(vm) -
- guest_num_pages) * guest_page_size;
+ guest_test_phys_mem = (vm->max_gfn - guest_num_pages) *
+ guest_page_size;
guest_test_phys_mem = align_down(guest_test_phys_mem, host_page_size);
} else {
guest_test_phys_mem = p->phys_offset;
@@ -781,12 +770,12 @@ static void run_test(enum vm_guest_mode mode, void *arg)
host_clear_count = 0;
host_track_next_count = 0;
- pthread_create(&vcpu_thread, NULL, vcpu_worker, vm);
+ pthread_create(&vcpu_thread, NULL, vcpu_worker, vcpu);
while (iteration < p->iterations) {
/* Give the vcpu thread some time to dirty some pages */
usleep(p->interval * 1000);
- log_mode_collect_dirty_pages(vm, TEST_MEM_SLOT_INDEX,
+ log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX,
bmap, host_num_pages);
/*
diff --git a/tools/testing/selftests/kvm/hardware_disable_test.c b/tools/testing/selftests/kvm/hardware_disable_test.c
index b21c69a56daa..f5d59b9934f1 100644
--- a/tools/testing/selftests/kvm/hardware_disable_test.c
+++ b/tools/testing/selftests/kvm/hardware_disable_test.c
@@ -27,12 +27,6 @@
sem_t *sem;
-/* Arguments for the pthreads */
-struct payload {
- struct kvm_vm *vm;
- uint32_t index;
-};
-
static void guest_code(void)
{
for (;;)
@@ -42,14 +36,14 @@ static void guest_code(void)
static void *run_vcpu(void *arg)
{
- struct payload *payload = (struct payload *)arg;
- struct kvm_run *state = vcpu_state(payload->vm, payload->index);
+ struct kvm_vcpu *vcpu = arg;
+ struct kvm_run *run = vcpu->run;
- vcpu_run(payload->vm, payload->index);
+ vcpu_run(vcpu);
TEST_ASSERT(false, "%s: exited with reason %d: %s\n",
- __func__, state->exit_reason,
- exit_reason_str(state->exit_reason));
+ __func__, run->exit_reason,
+ exit_reason_str(run->exit_reason));
pthread_exit(NULL);
}
@@ -92,11 +86,11 @@ static inline void check_join(pthread_t thread, void **retval)
static void run_test(uint32_t run)
{
+ struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
cpu_set_t cpu_set;
pthread_t threads[VCPU_NUM];
pthread_t throw_away;
- struct payload payloads[VCPU_NUM];
void *b;
uint32_t i, j;
@@ -104,18 +98,13 @@ static void run_test(uint32_t run)
for (i = 0; i < VCPU_NUM; i++)
CPU_SET(i, &cpu_set);
- vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
- kvm_vm_elf_load(vm, program_invocation_name);
- vm_create_irqchip(vm);
+ vm = vm_create(VCPU_NUM);
pr_debug("%s: [%d] start vcpus\n", __func__, run);
for (i = 0; i < VCPU_NUM; ++i) {
- vm_vcpu_add_default(vm, i, guest_code);
- payloads[i].vm = vm;
- payloads[i].index = i;
+ vcpu = vm_vcpu_add(vm, i, guest_code);
- check_create_thread(&threads[i], NULL, run_vcpu,
- (void *)&payloads[i]);
+ check_create_thread(&threads[i], NULL, run_vcpu, vcpu);
check_set_affinity(threads[i], &cpu_set);
for (j = 0; j < SLEEPING_THREAD_NUM; ++j) {
diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h
index 59ece9d4e0d1..a8124f9dd68a 100644
--- a/tools/testing/selftests/kvm/include/aarch64/processor.h
+++ b/tools/testing/selftests/kvm/include/aarch64/processor.h
@@ -19,7 +19,7 @@
/*
* KVM_ARM64_SYS_REG(sys_reg_id): Helper macro to convert
* SYS_* register definitions in asm/sysreg.h to use in KVM
- * calls such as get_reg() and set_reg().
+ * calls such as vcpu_get_reg() and vcpu_set_reg().
*/
#define KVM_ARM64_SYS_REG(sys_reg_id) \
ARM64_SYS_REG(sys_reg_Op0(sys_reg_id), \
@@ -47,25 +47,9 @@
#define MPIDR_HWID_BITMASK (0xff00fffffful)
-static inline void get_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id, uint64_t *addr)
-{
- struct kvm_one_reg reg;
- reg.id = id;
- reg.addr = (uint64_t)addr;
- vcpu_ioctl(vm, vcpuid, KVM_GET_ONE_REG, &reg);
-}
-
-static inline void set_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id, uint64_t val)
-{
- struct kvm_one_reg reg;
- reg.id = id;
- reg.addr = (uint64_t)&val;
- vcpu_ioctl(vm, vcpuid, KVM_SET_ONE_REG, &reg);
-}
-
-void aarch64_vcpu_setup(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_vcpu_init *init);
-void aarch64_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_vcpu_init *init, void *guest_code);
+void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init);
+struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
+ struct kvm_vcpu_init *init, void *guest_code);
struct ex_regs {
u64 regs[31];
@@ -117,7 +101,7 @@ void aarch64_get_supported_page_sizes(uint32_t ipa,
bool *ps4k, bool *ps16k, bool *ps64k);
void vm_init_descriptor_tables(struct kvm_vm *vm);
-void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid);
+void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu);
typedef void(*handler_fn)(struct ex_regs *);
void vm_install_exception_handler(struct kvm_vm *vm,
@@ -207,4 +191,6 @@ void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
uint64_t arg6, struct arm_smccc_res *res);
+uint32_t guest_get_vcpuid(void);
+
#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/aarch64/vgic.h b/tools/testing/selftests/kvm/include/aarch64/vgic.h
index 4442081221a0..0ac6f05c63f9 100644
--- a/tools/testing/selftests/kvm/include/aarch64/vgic.h
+++ b/tools/testing/selftests/kvm/include/aarch64/vgic.h
@@ -8,6 +8,8 @@
#include <linux/kvm.h>
+#include "kvm_util.h"
+
#define REDIST_REGION_ATTR_ADDR(count, base, flags, index) \
(((uint64_t)(count) << 52) | \
((uint64_t)((base) >> 16) << 16) | \
@@ -26,8 +28,8 @@ void kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level);
int _kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level);
/* The vcpu arg only applies to private interrupts. */
-void kvm_irq_write_ispendr(int gic_fd, uint32_t intid, uint32_t vcpu);
-void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, uint32_t vcpu);
+void kvm_irq_write_ispendr(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu);
+void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu);
#define KVM_IRQCHIP_NUM_PINS (1020 - 32)
diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h
index 92cef0ffb19e..24fde97f6121 100644
--- a/tools/testing/selftests/kvm/include/kvm_util_base.h
+++ b/tools/testing/selftests/kvm/include/kvm_util_base.h
@@ -9,9 +9,14 @@
#include "test_util.h"
-#include "asm/kvm.h"
+#include <linux/compiler.h>
+#include "linux/hashtable.h"
#include "linux/list.h"
-#include "linux/kvm.h"
+#include <linux/kernel.h>
+#include <linux/kvm.h>
+#include "linux/rbtree.h"
+
+
#include <sys/ioctl.h>
#include "sparsebit.h"
@@ -21,20 +26,88 @@
#define NSEC_PER_SEC 1000000000L
-/*
- * Callers of kvm_util only have an incomplete/opaque description of the
- * structure kvm_util is using to maintain the state of a VM.
- */
-struct kvm_vm;
-
typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */
typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */
+struct userspace_mem_region {
+ struct kvm_userspace_memory_region region;
+ struct sparsebit *unused_phy_pages;
+ int fd;
+ off_t offset;
+ void *host_mem;
+ void *host_alias;
+ void *mmap_start;
+ void *mmap_alias;
+ size_t mmap_size;
+ struct rb_node gpa_node;
+ struct rb_node hva_node;
+ struct hlist_node slot_node;
+};
+
+struct kvm_vcpu {
+ struct list_head list;
+ uint32_t id;
+ int fd;
+ struct kvm_vm *vm;
+ struct kvm_run *run;
+#ifdef __x86_64__
+ struct kvm_cpuid2 *cpuid;
+#endif
+ struct kvm_dirty_gfn *dirty_gfns;
+ uint32_t fetch_index;
+ uint32_t dirty_gfns_count;
+};
+
+struct userspace_mem_regions {
+ struct rb_root gpa_tree;
+ struct rb_root hva_tree;
+ DECLARE_HASHTABLE(slot_hash, 9);
+};
+
+struct kvm_vm {
+ int mode;
+ unsigned long type;
+ int kvm_fd;
+ int fd;
+ unsigned int pgtable_levels;
+ unsigned int page_size;
+ unsigned int page_shift;
+ unsigned int pa_bits;
+ unsigned int va_bits;
+ uint64_t max_gfn;
+ struct list_head vcpus;
+ struct userspace_mem_regions regions;
+ struct sparsebit *vpages_valid;
+ struct sparsebit *vpages_mapped;
+ bool has_irqchip;
+ bool pgd_created;
+ vm_paddr_t pgd;
+ vm_vaddr_t gdt;
+ vm_vaddr_t tss;
+ vm_vaddr_t idt;
+ vm_vaddr_t handlers;
+ uint32_t dirty_ring_size;
+
+ /* Cache of information for binary stats interface */
+ int stats_fd;
+ struct kvm_stats_header stats_header;
+ struct kvm_stats_desc *stats_desc;
+};
+
+
+#define kvm_for_each_vcpu(vm, i, vcpu) \
+ for ((i) = 0; (i) <= (vm)->last_vcpu_id; (i)++) \
+ if (!((vcpu) = vm->vcpus[i])) \
+ continue; \
+ else
+
+struct userspace_mem_region *
+memslot2region(struct kvm_vm *vm, uint32_t memslot);
+
/* Minimum allocated guest virtual and physical addresses */
#define KVM_UTIL_MIN_VADDR 0x2000
#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
-#define DEFAULT_GUEST_PHY_PAGES 512
#define DEFAULT_GUEST_STACK_VADDR_MIN 0xab6000
#define DEFAULT_STACK_PGS 5
@@ -102,49 +175,194 @@ extern const struct vm_guest_mode_params vm_guest_mode_params[];
int open_path_or_exit(const char *path, int flags);
int open_kvm_dev_path_or_exit(void);
-int kvm_check_cap(long cap);
-int vm_check_cap(struct kvm_vm *vm, long cap);
-int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap);
-int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id,
- struct kvm_enable_cap *cap);
+unsigned int kvm_check_cap(long cap);
+
+static inline bool kvm_has_cap(long cap)
+{
+ return kvm_check_cap(cap);
+}
+
+#define __KVM_SYSCALL_ERROR(_name, _ret) \
+ "%s failed, rc: %i errno: %i (%s)", (_name), (_ret), errno, strerror(errno)
+
+#define __KVM_IOCTL_ERROR(_name, _ret) __KVM_SYSCALL_ERROR(_name, _ret)
+#define KVM_IOCTL_ERROR(_ioctl, _ret) __KVM_IOCTL_ERROR(#_ioctl, _ret)
+
+#define kvm_do_ioctl(fd, cmd, arg) \
+({ \
+ static_assert(!_IOC_SIZE(cmd) || sizeof(*arg) == _IOC_SIZE(cmd), ""); \
+ ioctl(fd, cmd, arg); \
+})
+
+#define __kvm_ioctl(kvm_fd, cmd, arg) \
+ kvm_do_ioctl(kvm_fd, cmd, arg)
+
+
+#define _kvm_ioctl(kvm_fd, cmd, name, arg) \
+({ \
+ int ret = __kvm_ioctl(kvm_fd, cmd, arg); \
+ \
+ TEST_ASSERT(!ret, __KVM_IOCTL_ERROR(name, ret)); \
+})
+
+#define kvm_ioctl(kvm_fd, cmd, arg) \
+ _kvm_ioctl(kvm_fd, cmd, #cmd, arg)
+
+static __always_inline void static_assert_is_vm(struct kvm_vm *vm) { }
+
+#define __vm_ioctl(vm, cmd, arg) \
+({ \
+ static_assert_is_vm(vm); \
+ kvm_do_ioctl((vm)->fd, cmd, arg); \
+})
+
+#define _vm_ioctl(vm, cmd, name, arg) \
+({ \
+ int ret = __vm_ioctl(vm, cmd, arg); \
+ \
+ TEST_ASSERT(!ret, __KVM_IOCTL_ERROR(name, ret)); \
+})
+
+#define vm_ioctl(vm, cmd, arg) \
+ _vm_ioctl(vm, cmd, #cmd, arg)
+
+
+static __always_inline void static_assert_is_vcpu(struct kvm_vcpu *vcpu) { }
+
+#define __vcpu_ioctl(vcpu, cmd, arg) \
+({ \
+ static_assert_is_vcpu(vcpu); \
+ kvm_do_ioctl((vcpu)->fd, cmd, arg); \
+})
+
+#define _vcpu_ioctl(vcpu, cmd, name, arg) \
+({ \
+ int ret = __vcpu_ioctl(vcpu, cmd, arg); \
+ \
+ TEST_ASSERT(!ret, __KVM_IOCTL_ERROR(name, ret)); \
+})
+
+#define vcpu_ioctl(vcpu, cmd, arg) \
+ _vcpu_ioctl(vcpu, cmd, #cmd, arg)
+
+/*
+ * Looks up and returns the value corresponding to the capability
+ * (KVM_CAP_*) given by cap.
+ */
+static inline int vm_check_cap(struct kvm_vm *vm, long cap)
+{
+ int ret = __vm_ioctl(vm, KVM_CHECK_EXTENSION, (void *)cap);
+
+ TEST_ASSERT(ret >= 0, KVM_IOCTL_ERROR(KVM_CHECK_EXTENSION, ret));
+ return ret;
+}
+
+static inline int __vm_enable_cap(struct kvm_vm *vm, uint32_t cap, uint64_t arg0)
+{
+ struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } };
+
+ return __vm_ioctl(vm, KVM_ENABLE_CAP, &enable_cap);
+}
+static inline void vm_enable_cap(struct kvm_vm *vm, uint32_t cap, uint64_t arg0)
+{
+ struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } };
+
+ vm_ioctl(vm, KVM_ENABLE_CAP, &enable_cap);
+}
+
void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size);
const char *vm_guest_mode_string(uint32_t i);
-struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
void kvm_vm_free(struct kvm_vm *vmp);
-void kvm_vm_restart(struct kvm_vm *vmp, int perm);
+void kvm_vm_restart(struct kvm_vm *vmp);
void kvm_vm_release(struct kvm_vm *vmp);
-void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log);
-void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
- uint64_t first_page, uint32_t num_pages);
-uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm);
-
int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva,
size_t len);
-
void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename);
int kvm_memfd_alloc(size_t size, bool hugepages);
void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
-/*
- * VM VCPU Dump
- *
- * Input Args:
- * stream - Output FILE stream
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- * indent - Left margin indent amount
- *
- * Output Args: None
- *
- * Return: None
- *
- * Dumps the current state of the VCPU specified by @vcpuid, within the VM
- * given by @vm, to the FILE stream given by @stream.
- */
-void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid,
- uint8_t indent);
+static inline void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log)
+{
+ struct kvm_dirty_log args = { .dirty_bitmap = log, .slot = slot };
+
+ vm_ioctl(vm, KVM_GET_DIRTY_LOG, &args);
+}
+
+static inline void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
+ uint64_t first_page, uint32_t num_pages)
+{
+ struct kvm_clear_dirty_log args = {
+ .dirty_bitmap = log,
+ .slot = slot,
+ .first_page = first_page,
+ .num_pages = num_pages
+ };
+
+ vm_ioctl(vm, KVM_CLEAR_DIRTY_LOG, &args);
+}
+
+static inline uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm)
+{
+ return __vm_ioctl(vm, KVM_RESET_DIRTY_RINGS, NULL);
+}
+
+static inline int vm_get_stats_fd(struct kvm_vm *vm)
+{
+ int fd = __vm_ioctl(vm, KVM_GET_STATS_FD, NULL);
+
+ TEST_ASSERT(fd >= 0, KVM_IOCTL_ERROR(KVM_GET_STATS_FD, fd));
+ return fd;
+}
+
+static inline void read_stats_header(int stats_fd, struct kvm_stats_header *header)
+{
+ ssize_t ret;
+
+ ret = read(stats_fd, header, sizeof(*header));
+ TEST_ASSERT(ret == sizeof(*header), "Read stats header");
+}
+
+struct kvm_stats_desc *read_stats_descriptors(int stats_fd,
+ struct kvm_stats_header *header);
+
+static inline ssize_t get_stats_descriptor_size(struct kvm_stats_header *header)
+{
+ /*
+ * The base size of the descriptor is defined by KVM's ABI, but the
+ * size of the name field is variable, as far as KVM's ABI is
+ * concerned. For a given instance of KVM, the name field is the same
+ * size for all stats and is provided in the overall stats header.
+ */
+ return sizeof(struct kvm_stats_desc) + header->name_size;
+}
+
+static inline struct kvm_stats_desc *get_stats_descriptor(struct kvm_stats_desc *stats,
+ int index,
+ struct kvm_stats_header *header)
+{
+ /*
+ * Note, size_desc includes the size of the name field, which is
+ * variable. i.e. this is NOT equivalent to &stats_desc[i].
+ */
+ return (void *)stats + index * get_stats_descriptor_size(header);
+}
+
+void read_stat_data(int stats_fd, struct kvm_stats_header *header,
+ struct kvm_stats_desc *desc, uint64_t *data,
+ size_t max_elements);
+
+void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data,
+ size_t max_elements);
+
+static inline uint64_t vm_get_stat(struct kvm_vm *vm, const char *stat_name)
+{
+ uint64_t data;
+
+ __vm_get_stat(vm, stat_name, &data, 1);
+ return data;
+}
void vm_create_irqchip(struct kvm_vm *vm);
@@ -157,18 +375,10 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
uint64_t guest_paddr, uint32_t slot, uint64_t npages,
uint32_t flags);
-void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl,
- void *arg);
-int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl,
- void *arg);
-void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
-int _vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg);
-void kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
-int _kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa);
void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot);
-void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid);
+struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id);
vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min);
vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages);
vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm);
@@ -180,42 +390,219 @@ void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva);
vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva);
void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa);
-/*
- * Address Guest Virtual to Guest Physical
- *
- * Input Args:
- * vm - Virtual Machine
- * gva - VM virtual address
- *
- * Output Args: None
- *
- * Return:
- * Equivalent VM physical address
- *
- * Returns the VM physical address of the translated VM virtual
- * address given by @gva.
- */
-vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva);
-
-struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid);
-void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
-int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
-int vcpu_get_fd(struct kvm_vm *vm, uint32_t vcpuid);
-void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid);
-void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_guest_debug *debug);
-void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_mp_state *mp_state);
-struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vm *vm, uint32_t vcpuid);
-void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs);
-void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs);
+void vcpu_run(struct kvm_vcpu *vcpu);
+int _vcpu_run(struct kvm_vcpu *vcpu);
+
+static inline int __vcpu_run(struct kvm_vcpu *vcpu)
+{
+ return __vcpu_ioctl(vcpu, KVM_RUN, NULL);
+}
+
+void vcpu_run_complete_io(struct kvm_vcpu *vcpu);
+struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vcpu *vcpu);
+
+static inline void vcpu_enable_cap(struct kvm_vcpu *vcpu, uint32_t cap,
+ uint64_t arg0)
+{
+ struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } };
+
+ vcpu_ioctl(vcpu, KVM_ENABLE_CAP, &enable_cap);
+}
+
+static inline void vcpu_guest_debug_set(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug *debug)
+{
+ vcpu_ioctl(vcpu, KVM_SET_GUEST_DEBUG, debug);
+}
+
+static inline void vcpu_mp_state_get(struct kvm_vcpu *vcpu,
+ struct kvm_mp_state *mp_state)
+{
+ vcpu_ioctl(vcpu, KVM_GET_MP_STATE, mp_state);
+}
+static inline void vcpu_mp_state_set(struct kvm_vcpu *vcpu,
+ struct kvm_mp_state *mp_state)
+{
+ vcpu_ioctl(vcpu, KVM_SET_MP_STATE, mp_state);
+}
+
+static inline void vcpu_regs_get(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+ vcpu_ioctl(vcpu, KVM_GET_REGS, regs);
+}
+
+static inline void vcpu_regs_set(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+ vcpu_ioctl(vcpu, KVM_SET_REGS, regs);
+}
+static inline void vcpu_sregs_get(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+ vcpu_ioctl(vcpu, KVM_GET_SREGS, sregs);
+
+}
+static inline void vcpu_sregs_set(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+ vcpu_ioctl(vcpu, KVM_SET_SREGS, sregs);
+}
+static inline int _vcpu_sregs_set(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+ return __vcpu_ioctl(vcpu, KVM_SET_SREGS, sregs);
+}
+static inline void vcpu_fpu_get(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+ vcpu_ioctl(vcpu, KVM_GET_FPU, fpu);
+}
+static inline void vcpu_fpu_set(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+ vcpu_ioctl(vcpu, KVM_SET_FPU, fpu);
+}
+
+static inline int __vcpu_get_reg(struct kvm_vcpu *vcpu, uint64_t id, void *addr)
+{
+ struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)addr };
+
+ return __vcpu_ioctl(vcpu, KVM_GET_ONE_REG, &reg);
+}
+static inline int __vcpu_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val)
+{
+ struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val };
+
+ return __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, &reg);
+}
+static inline void vcpu_get_reg(struct kvm_vcpu *vcpu, uint64_t id, void *addr)
+{
+ struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)addr };
+
+ vcpu_ioctl(vcpu, KVM_GET_ONE_REG, &reg);
+}
+static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val)
+{
+ struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val };
+
+ vcpu_ioctl(vcpu, KVM_SET_ONE_REG, &reg);
+}
+
+#ifdef __KVM_HAVE_VCPU_EVENTS
+static inline void vcpu_events_get(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu_events *events)
+{
+ vcpu_ioctl(vcpu, KVM_GET_VCPU_EVENTS, events);
+}
+static inline void vcpu_events_set(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu_events *events)
+{
+ vcpu_ioctl(vcpu, KVM_SET_VCPU_EVENTS, events);
+}
+#endif
+#ifdef __x86_64__
+static inline void vcpu_nested_state_get(struct kvm_vcpu *vcpu,
+ struct kvm_nested_state *state)
+{
+ vcpu_ioctl(vcpu, KVM_GET_NESTED_STATE, state);
+}
+static inline int __vcpu_nested_state_set(struct kvm_vcpu *vcpu,
+ struct kvm_nested_state *state)
+{
+ return __vcpu_ioctl(vcpu, KVM_SET_NESTED_STATE, state);
+}
+
+static inline void vcpu_nested_state_set(struct kvm_vcpu *vcpu,
+ struct kvm_nested_state *state)
+{
+ vcpu_ioctl(vcpu, KVM_SET_NESTED_STATE, state);
+}
+#endif
+static inline int vcpu_get_stats_fd(struct kvm_vcpu *vcpu)
+{
+ int fd = __vcpu_ioctl(vcpu, KVM_GET_STATS_FD, NULL);
+
+ TEST_ASSERT(fd >= 0, KVM_IOCTL_ERROR(KVM_GET_STATS_FD, fd));
+ return fd;
+}
+
+int __kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr);
+
+static inline void kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr)
+{
+ int ret = __kvm_has_device_attr(dev_fd, group, attr);
+
+ TEST_ASSERT(!ret, "KVM_HAS_DEVICE_ATTR failed, rc: %i errno: %i", ret, errno);
+}
+
+int __kvm_device_attr_get(int dev_fd, uint32_t group, uint64_t attr, void *val);
+
+static inline void kvm_device_attr_get(int dev_fd, uint32_t group,
+ uint64_t attr, void *val)
+{
+ int ret = __kvm_device_attr_get(dev_fd, group, attr, val);
+
+ TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_GET_DEVICE_ATTR, ret));
+}
+
+int __kvm_device_attr_set(int dev_fd, uint32_t group, uint64_t attr, void *val);
+
+static inline void kvm_device_attr_set(int dev_fd, uint32_t group,
+ uint64_t attr, void *val)
+{
+ int ret = __kvm_device_attr_set(dev_fd, group, attr, val);
+
+ TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_SET_DEVICE_ATTR, ret));
+}
+
+static inline int __vcpu_has_device_attr(struct kvm_vcpu *vcpu, uint32_t group,
+ uint64_t attr)
+{
+ return __kvm_has_device_attr(vcpu->fd, group, attr);
+}
+
+static inline void vcpu_has_device_attr(struct kvm_vcpu *vcpu, uint32_t group,
+ uint64_t attr)
+{
+ kvm_has_device_attr(vcpu->fd, group, attr);
+}
+
+static inline int __vcpu_device_attr_get(struct kvm_vcpu *vcpu, uint32_t group,
+ uint64_t attr, void *val)
+{
+ return __kvm_device_attr_get(vcpu->fd, group, attr, val);
+}
+
+static inline void vcpu_device_attr_get(struct kvm_vcpu *vcpu, uint32_t group,
+ uint64_t attr, void *val)
+{
+ kvm_device_attr_get(vcpu->fd, group, attr, val);
+}
+
+static inline int __vcpu_device_attr_set(struct kvm_vcpu *vcpu, uint32_t group,
+ uint64_t attr, void *val)
+{
+ return __kvm_device_attr_set(vcpu->fd, group, attr, val);
+}
+
+static inline void vcpu_device_attr_set(struct kvm_vcpu *vcpu, uint32_t group,
+ uint64_t attr, void *val)
+{
+ kvm_device_attr_set(vcpu->fd, group, attr, val);
+}
+
+int __kvm_test_create_device(struct kvm_vm *vm, uint64_t type);
+int __kvm_create_device(struct kvm_vm *vm, uint64_t type);
+
+static inline int kvm_create_device(struct kvm_vm *vm, uint64_t type)
+{
+ int fd = __kvm_create_device(vm, type);
+
+ TEST_ASSERT(fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_DEVICE, fd));
+ return fd;
+}
+
+void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu);
/*
* VM VCPU Args Set
*
* Input Args:
* vm - Virtual Machine
- * vcpuid - VCPU ID
* num - number of arguments
* ... - arguments, each of type uint64_t
*
@@ -223,59 +610,16 @@ void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs);
*
* Return: None
*
- * Sets the first @num function input registers of the VCPU with @vcpuid,
- * per the C calling convention of the architecture, to the values given
- * as variable args. Each of the variable args is expected to be of type
- * uint64_t. The maximum @num can be is specific to the architecture.
+ * Sets the first @num input parameters for the function at @vcpu's entry point,
+ * per the C calling convention of the architecture, to the values given as
+ * variable args. Each of the variable args is expected to be of type uint64_t.
+ * The maximum @num can be is specific to the architecture.
*/
-void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...);
-
-void vcpu_sregs_get(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_sregs *sregs);
-void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_sregs *sregs);
-int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_sregs *sregs);
-void vcpu_fpu_get(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_fpu *fpu);
-void vcpu_fpu_set(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_fpu *fpu);
-void vcpu_get_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg);
-void vcpu_set_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg);
-#ifdef __KVM_HAVE_VCPU_EVENTS
-void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_vcpu_events *events);
-void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_vcpu_events *events);
-#endif
-#ifdef __x86_64__
-void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_nested_state *state);
-int vcpu_nested_state_set(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_nested_state *state, bool ignore_error);
-#endif
-void *vcpu_map_dirty_ring(struct kvm_vm *vm, uint32_t vcpuid);
-
-int _kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr);
-int kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr);
-int _kvm_create_device(struct kvm_vm *vm, uint64_t type, bool test, int *fd);
-int kvm_create_device(struct kvm_vm *vm, uint64_t type, bool test);
-int _kvm_device_access(int dev_fd, uint32_t group, uint64_t attr,
- void *val, bool write);
-int kvm_device_access(int dev_fd, uint32_t group, uint64_t attr,
- void *val, bool write);
+void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...);
+
void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level);
int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level);
-int _vcpu_has_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
- uint64_t attr);
-int vcpu_has_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
- uint64_t attr);
-int _vcpu_access_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
- uint64_t attr, void *val, bool write);
-int vcpu_access_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
- uint64_t attr, void *val, bool write);
-
#define KVM_MAX_IRQ_ROUTES 4096
struct kvm_irq_routing *kvm_gsi_routing_create(void);
@@ -286,26 +630,6 @@ void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing);
const char *exit_reason_str(unsigned int exit_reason);
-void virt_pgd_alloc(struct kvm_vm *vm);
-
-/*
- * VM Virtual Page Map
- *
- * Input Args:
- * vm - Virtual Machine
- * vaddr - VM Virtual Address
- * paddr - VM Physical Address
- * memslot - Memory region slot for new virtual translation tables
- *
- * Output Args: None
- *
- * Return: None
- *
- * Within @vm, creates a virtual translation for the page starting
- * at @vaddr to the page starting at @paddr.
- */
-void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr);
-
vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
uint32_t memslot);
vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
@@ -313,55 +637,54 @@ vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm);
/*
- * Create a VM with reasonable defaults
- *
- * Input Args:
- * vcpuid - The id of the single VCPU to add to the VM.
- * extra_mem_pages - The number of extra pages to add (this will
- * decide how much extra space we will need to
- * setup the page tables using memslot 0)
- * guest_code - The vCPU's entry point
- *
- * Output Args: None
- *
- * Return:
- * Pointer to opaque structure that describes the created VM.
+ * ____vm_create() does KVM_CREATE_VM and little else. __vm_create() also
+ * loads the test binary into guest memory and creates an IRQ chip (x86 only).
+ * __vm_create() does NOT create vCPUs, @nr_runnable_vcpus is used purely to
+ * calculate the amount of memory needed for per-vCPU data, e.g. stacks.
*/
-struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
- void *guest_code);
+struct kvm_vm *____vm_create(enum vm_guest_mode mode, uint64_t nr_pages);
+struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus,
+ uint64_t nr_extra_pages);
+
+static inline struct kvm_vm *vm_create_barebones(void)
+{
+ return ____vm_create(VM_MODE_DEFAULT, 0);
+}
-/* Same as vm_create_default, but can be used for more than one vcpu */
-struct kvm_vm *vm_create_default_with_vcpus(uint32_t nr_vcpus, uint64_t extra_mem_pages,
- uint32_t num_percpu_pages, void *guest_code,
- uint32_t vcpuids[]);
+static inline struct kvm_vm *vm_create(uint32_t nr_runnable_vcpus)
+{
+ return __vm_create(VM_MODE_DEFAULT, nr_runnable_vcpus, 0);
+}
-/* Like vm_create_default_with_vcpus, but accepts mode and slot0 memory as a parameter */
-struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
- uint64_t slot0_mem_pages, uint64_t extra_mem_pages,
- uint32_t num_percpu_pages, void *guest_code,
- uint32_t vcpuids[]);
+struct kvm_vm *__vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
+ uint64_t extra_mem_pages,
+ void *guest_code, struct kvm_vcpu *vcpus[]);
-/* Create a default VM without any vcpus. */
-struct kvm_vm *vm_create_without_vcpus(enum vm_guest_mode mode, uint64_t pages);
+static inline struct kvm_vm *vm_create_with_vcpus(uint32_t nr_vcpus,
+ void *guest_code,
+ struct kvm_vcpu *vcpus[])
+{
+ return __vm_create_with_vcpus(VM_MODE_DEFAULT, nr_vcpus, 0,
+ guest_code, vcpus);
+}
/*
- * Adds a vCPU with reasonable defaults (e.g. a stack)
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - The id of the VCPU to add to the VM.
- * guest_code - The vCPU's entry point
+ * Create a VM with a single vCPU with reasonable defaults and @extra_mem_pages
+ * additional pages of guest memory. Returns the VM and vCPU (via out param).
*/
-void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code);
+struct kvm_vm *__vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
+ uint64_t extra_mem_pages,
+ void *guest_code);
-bool vm_is_unrestricted_guest(struct kvm_vm *vm);
+static inline struct kvm_vm *vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
+ void *guest_code)
+{
+ return __vm_create_with_one_vcpu(vcpu, 0, guest_code);
+}
-unsigned int vm_get_page_size(struct kvm_vm *vm);
-unsigned int vm_get_page_shift(struct kvm_vm *vm);
-unsigned long vm_compute_max_gfn(struct kvm_vm *vm);
-uint64_t vm_get_max_gfn(struct kvm_vm *vm);
-int vm_get_fd(struct kvm_vm *vm);
+struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm);
+unsigned long vm_compute_max_gfn(struct kvm_vm *vm);
unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size);
unsigned int vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages);
unsigned int vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages);
@@ -381,11 +704,6 @@ struct kvm_userspace_memory_region *
kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
uint64_t end);
-struct kvm_dirty_log *
-allocate_kvm_dirty_log(struct kvm_userspace_memory_region *region);
-
-int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd);
-
#define sync_global_to_guest(vm, g) ({ \
typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \
memcpy(_p, &(g), sizeof(g)); \
@@ -396,11 +714,124 @@ int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd);
memcpy(&(g), _p, sizeof(g)); \
})
-void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid);
+void assert_on_unhandled_exception(struct kvm_vcpu *vcpu);
+
+void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu,
+ uint8_t indent);
+
+static inline void vcpu_dump(FILE *stream, struct kvm_vcpu *vcpu,
+ uint8_t indent)
+{
+ vcpu_arch_dump(stream, vcpu, indent);
+}
+
+/*
+ * Adds a vCPU with reasonable defaults (e.g. a stack)
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpu_id - The id of the VCPU to add to the VM.
+ * guest_code - The vCPU's entry point
+ */
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
+ void *guest_code);
+
+static inline struct kvm_vcpu *vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
+ void *guest_code)
+{
+ return vm_arch_vcpu_add(vm, vcpu_id, guest_code);
+}
+
+/* Re-create a vCPU after restarting a VM, e.g. for state save/restore tests. */
+struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, uint32_t vcpu_id);
+
+static inline struct kvm_vcpu *vm_vcpu_recreate(struct kvm_vm *vm,
+ uint32_t vcpu_id)
+{
+ return vm_arch_vcpu_recreate(vm, vcpu_id);
+}
+
+void vcpu_arch_free(struct kvm_vcpu *vcpu);
+
+void virt_arch_pgd_alloc(struct kvm_vm *vm);
+
+static inline void virt_pgd_alloc(struct kvm_vm *vm)
+{
+ virt_arch_pgd_alloc(vm);
+}
+
+/*
+ * VM Virtual Page Map
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vaddr - VM Virtual Address
+ * paddr - VM Physical Address
+ * memslot - Memory region slot for new virtual translation tables
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Within @vm, creates a virtual translation for the page starting
+ * at @vaddr to the page starting at @paddr.
+ */
+void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr);
+
+static inline void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
+{
+ virt_arch_pg_map(vm, vaddr, paddr);
+}
+
+
+/*
+ * Address Guest Virtual to Guest Physical
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * gva - VM virtual address
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Equivalent VM physical address
+ *
+ * Returns the VM physical address of the translated VM virtual
+ * address given by @gva.
+ */
+vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva);
+
+static inline vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+ return addr_arch_gva2gpa(vm, gva);
+}
+
+/*
+ * Virtual Translation Tables Dump
+ *
+ * Input Args:
+ * stream - Output FILE stream
+ * vm - Virtual Machine
+ * indent - Left margin indent amount
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Dumps to the FILE stream given by @stream, the contents of all the
+ * virtual translation tables for the VM given by @vm.
+ */
+void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
+
+static inline void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+ virt_arch_dump(stream, vm, indent);
+}
-int vm_get_stats_fd(struct kvm_vm *vm);
-int vcpu_get_stats_fd(struct kvm_vm *vm, uint32_t vcpuid);
-uint32_t guest_get_vcpuid(void);
+static inline int __vm_disable_nx_huge_pages(struct kvm_vm *vm)
+{
+ return __vm_enable_cap(vm, KVM_CAP_VM_DISABLE_NX_HUGE_PAGES, 0);
+}
#endif /* SELFTEST_KVM_UTIL_BASE_H */
diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h
index d822cb670f1c..eaa88df0555a 100644
--- a/tools/testing/selftests/kvm/include/perf_test_util.h
+++ b/tools/testing/selftests/kvm/include/perf_test_util.h
@@ -25,7 +25,8 @@ struct perf_test_vcpu_args {
uint64_t pages;
/* Only used by the host userspace part of the vCPU thread */
- int vcpu_id;
+ struct kvm_vcpu *vcpu;
+ int vcpu_idx;
};
struct perf_test_args {
@@ -44,7 +45,7 @@ struct perf_test_args {
extern struct perf_test_args perf_test_args;
-struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
+struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus,
uint64_t vcpu_memory_bytes, int slots,
enum vm_mem_backing_src_type backing_src,
bool partition_vcpu_memory_access);
@@ -57,6 +58,6 @@ void perf_test_join_vcpu_threads(int vcpus);
void perf_test_guest_code(uint32_t vcpu_id);
uint64_t perf_test_nested_pages(int nr_vcpus);
-void perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus);
+void perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[]);
#endif /* SELFTEST_KVM_PERF_TEST_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/riscv/processor.h b/tools/testing/selftests/kvm/include/riscv/processor.h
index 4fcfd1c0389d..d00d213c3805 100644
--- a/tools/testing/selftests/kvm/include/riscv/processor.h
+++ b/tools/testing/selftests/kvm/include/riscv/processor.h
@@ -38,26 +38,6 @@ static inline uint64_t __kvm_reg_id(uint64_t type, uint64_t idx,
KVM_REG_RISCV_TIMER_REG(name), \
KVM_REG_SIZE_U64)
-static inline void get_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id,
- unsigned long *addr)
-{
- struct kvm_one_reg reg;
-
- reg.id = id;
- reg.addr = (unsigned long)addr;
- vcpu_get_reg(vm, vcpuid, &reg);
-}
-
-static inline void set_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id,
- unsigned long val)
-{
- struct kvm_one_reg reg;
-
- reg.id = id;
- reg.addr = (unsigned long)&val;
- vcpu_set_reg(vm, vcpuid, &reg);
-}
-
/* L3 index Bit[47:39] */
#define PGTBL_L3_INDEX_MASK 0x0000FF8000000000ULL
#define PGTBL_L3_INDEX_SHIFT 39
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index 99e0dcdc923f..5c5a88180b6c 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -34,6 +34,13 @@ static inline int _no_printf(const char *format, ...) { return 0; }
#endif
void print_skip(const char *fmt, ...) __attribute__((format(printf, 1, 2)));
+#define __TEST_REQUIRE(f, fmt, ...) \
+do { \
+ if (!(f)) \
+ ksft_exit_skip("- " fmt "\n", ##__VA_ARGS__); \
+} while (0)
+
+#define TEST_REQUIRE(f) __TEST_REQUIRE(f, "Requirement not met: %s", #f)
ssize_t test_write(int fd, const void *buf, size_t count);
ssize_t test_read(int fd, void *buf, size_t count);
diff --git a/tools/testing/selftests/kvm/include/ucall_common.h b/tools/testing/selftests/kvm/include/ucall_common.h
index 9eecc9d40b79..ee79d180e07e 100644
--- a/tools/testing/selftests/kvm/include/ucall_common.h
+++ b/tools/testing/selftests/kvm/include/ucall_common.h
@@ -6,6 +6,7 @@
*/
#ifndef SELFTEST_KVM_UCALL_COMMON_H
#define SELFTEST_KVM_UCALL_COMMON_H
+#include "test_util.h"
/* Common ucalls */
enum {
@@ -16,7 +17,7 @@ enum {
UCALL_UNHANDLED,
};
-#define UCALL_MAX_ARGS 6
+#define UCALL_MAX_ARGS 7
struct ucall {
uint64_t cmd;
@@ -26,17 +27,26 @@ struct ucall {
void ucall_init(struct kvm_vm *vm, void *arg);
void ucall_uninit(struct kvm_vm *vm);
void ucall(uint64_t cmd, int nargs, ...);
-uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc);
+uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc);
#define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4) \
ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4)
#define GUEST_SYNC(stage) ucall(UCALL_SYNC, 2, "hello", stage)
#define GUEST_DONE() ucall(UCALL_DONE, 0)
-#define __GUEST_ASSERT(_condition, _condstr, _nargs, _args...) do { \
- if (!(_condition)) \
- ucall(UCALL_ABORT, 2 + _nargs, \
- "Failed guest assert: " \
- _condstr, __LINE__, _args); \
+
+enum guest_assert_builtin_args {
+ GUEST_ERROR_STRING,
+ GUEST_FILE,
+ GUEST_LINE,
+ GUEST_ASSERT_BUILTIN_NARGS
+};
+
+#define __GUEST_ASSERT(_condition, _condstr, _nargs, _args...) \
+do { \
+ if (!(_condition)) \
+ ucall(UCALL_ABORT, GUEST_ASSERT_BUILTIN_NARGS + _nargs, \
+ "Failed guest assert: " _condstr, \
+ __FILE__, __LINE__, ##_args); \
} while (0)
#define GUEST_ASSERT(_condition) \
@@ -56,4 +66,45 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc);
#define GUEST_ASSERT_EQ(a, b) __GUEST_ASSERT((a) == (b), #a " == " #b, 2, a, b)
+#define __REPORT_GUEST_ASSERT(_ucall, fmt, _args...) \
+ TEST_FAIL("%s at %s:%ld\n" fmt, \
+ (const char *)(_ucall).args[GUEST_ERROR_STRING], \
+ (const char *)(_ucall).args[GUEST_FILE], \
+ (_ucall).args[GUEST_LINE], \
+ ##_args)
+
+#define GUEST_ASSERT_ARG(ucall, i) ((ucall).args[GUEST_ASSERT_BUILTIN_NARGS + i])
+
+#define REPORT_GUEST_ASSERT(ucall) \
+ __REPORT_GUEST_ASSERT((ucall), "")
+
+#define REPORT_GUEST_ASSERT_1(ucall, fmt) \
+ __REPORT_GUEST_ASSERT((ucall), \
+ fmt, \
+ GUEST_ASSERT_ARG((ucall), 0))
+
+#define REPORT_GUEST_ASSERT_2(ucall, fmt) \
+ __REPORT_GUEST_ASSERT((ucall), \
+ fmt, \
+ GUEST_ASSERT_ARG((ucall), 0), \
+ GUEST_ASSERT_ARG((ucall), 1))
+
+#define REPORT_GUEST_ASSERT_3(ucall, fmt) \
+ __REPORT_GUEST_ASSERT((ucall), \
+ fmt, \
+ GUEST_ASSERT_ARG((ucall), 0), \
+ GUEST_ASSERT_ARG((ucall), 1), \
+ GUEST_ASSERT_ARG((ucall), 2))
+
+#define REPORT_GUEST_ASSERT_4(ucall, fmt) \
+ __REPORT_GUEST_ASSERT((ucall), \
+ fmt, \
+ GUEST_ASSERT_ARG((ucall), 0), \
+ GUEST_ASSERT_ARG((ucall), 1), \
+ GUEST_ASSERT_ARG((ucall), 2), \
+ GUEST_ASSERT_ARG((ucall), 3))
+
+#define REPORT_GUEST_ASSERT_N(ucall, fmt, args...) \
+ __REPORT_GUEST_ASSERT((ucall), fmt, ##args)
+
#endif /* SELFTEST_KVM_UCALL_COMMON_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/apic.h b/tools/testing/selftests/kvm/include/x86_64/apic.h
index ac88557dcc9a..bed316fdecd5 100644
--- a/tools/testing/selftests/kvm/include/x86_64/apic.h
+++ b/tools/testing/selftests/kvm/include/x86_64/apic.h
@@ -35,6 +35,7 @@
#define APIC_SPIV_APIC_ENABLED (1 << 8)
#define APIC_IRR 0x200
#define APIC_ICR 0x300
+#define APIC_LVTCMCI 0x2f0
#define APIC_DEST_SELF 0x40000
#define APIC_DEST_ALLINC 0x80000
#define APIC_DEST_ALLBUT 0xC0000
diff --git a/tools/testing/selftests/kvm/include/x86_64/evmcs.h b/tools/testing/selftests/kvm/include/x86_64/evmcs.h
index cc5d14a45702..3c9260f8e116 100644
--- a/tools/testing/selftests/kvm/include/x86_64/evmcs.h
+++ b/tools/testing/selftests/kvm/include/x86_64/evmcs.h
@@ -241,7 +241,7 @@ struct hv_enlightened_vmcs {
extern struct hv_enlightened_vmcs *current_evmcs;
extern struct hv_vp_assist_page *current_vp_assist;
-int vcpu_enable_evmcs(struct kvm_vm *vm, int vcpu_id);
+int vcpu_enable_evmcs(struct kvm_vcpu *vcpu);
static inline int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist)
{
diff --git a/tools/testing/selftests/kvm/include/x86_64/mce.h b/tools/testing/selftests/kvm/include/x86_64/mce.h
new file mode 100644
index 000000000000..6119321f3f5d
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86_64/mce.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * tools/testing/selftests/kvm/include/x86_64/mce.h
+ *
+ * Copyright (C) 2022, Google LLC.
+ */
+
+#ifndef SELFTEST_KVM_MCE_H
+#define SELFTEST_KVM_MCE_H
+
+#define MCG_CTL_P BIT_ULL(8) /* MCG_CTL register available */
+#define MCG_SER_P BIT_ULL(24) /* MCA recovery/new status bits */
+#define MCG_LMCE_P BIT_ULL(27) /* Local machine check supported */
+#define MCG_CMCI_P BIT_ULL(10) /* CMCI supported */
+#define KVM_MAX_MCE_BANKS 32
+#define MCG_CAP_BANKS_MASK 0xff /* Bit 0-7 of the MCG_CAP register are #banks */
+#define MCI_STATUS_VAL (1ULL << 63) /* valid error */
+#define MCI_STATUS_UC (1ULL << 61) /* uncorrected error */
+#define MCI_STATUS_EN (1ULL << 60) /* error enabled */
+#define MCI_STATUS_MISCV (1ULL << 59) /* misc error reg. valid */
+#define MCI_STATUS_ADDRV (1ULL << 58) /* addr reg. valid */
+#define MCM_ADDR_PHYS 2 /* physical address */
+#define MCI_CTL2_CMCI_EN BIT_ULL(30)
+
+#endif /* SELFTEST_KVM_MCE_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 6ce185449259..45edf45821d0 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -15,8 +15,12 @@
#include <asm/msr-index.h>
#include <asm/prctl.h>
+#include <linux/stringify.h>
+
#include "../kvm_util.h"
+#define NMI_VECTOR 0x02
+
#define X86_EFLAGS_FIXED (1u << 1)
#define X86_CR4_VME (1ul << 0)
@@ -41,24 +45,122 @@
#define X86_CR4_SMAP (1ul << 21)
#define X86_CR4_PKE (1ul << 22)
-/* CPUID.1.ECX */
-#define CPUID_VMX (1ul << 5)
-#define CPUID_SMX (1ul << 6)
-#define CPUID_PCID (1ul << 17)
-#define CPUID_XSAVE (1ul << 26)
+/* Note, these are ordered alphabetically to match kvm_cpuid_entry2. Eww. */
+enum cpuid_output_regs {
+ KVM_CPUID_EAX,
+ KVM_CPUID_EBX,
+ KVM_CPUID_ECX,
+ KVM_CPUID_EDX
+};
-/* CPUID.7.EBX */
-#define CPUID_FSGSBASE (1ul << 0)
-#define CPUID_SMEP (1ul << 7)
-#define CPUID_SMAP (1ul << 20)
+/*
+ * Pack the information into a 64-bit value so that each X86_FEATURE_XXX can be
+ * passed by value with no overhead.
+ */
+struct kvm_x86_cpu_feature {
+ u32 function;
+ u16 index;
+ u8 reg;
+ u8 bit;
+};
+#define KVM_X86_CPU_FEATURE(fn, idx, gpr, __bit) \
+({ \
+ struct kvm_x86_cpu_feature feature = { \
+ .function = fn, \
+ .index = idx, \
+ .reg = KVM_CPUID_##gpr, \
+ .bit = __bit, \
+ }; \
+ \
+ feature; \
+})
-/* CPUID.7.ECX */
-#define CPUID_UMIP (1ul << 2)
-#define CPUID_PKU (1ul << 3)
-#define CPUID_LA57 (1ul << 16)
+/*
+ * Basic Leafs, a.k.a. Intel defined
+ */
+#define X86_FEATURE_MWAIT KVM_X86_CPU_FEATURE(0x1, 0, ECX, 3)
+#define X86_FEATURE_VMX KVM_X86_CPU_FEATURE(0x1, 0, ECX, 5)
+#define X86_FEATURE_SMX KVM_X86_CPU_FEATURE(0x1, 0, ECX, 6)
+#define X86_FEATURE_PDCM KVM_X86_CPU_FEATURE(0x1, 0, ECX, 15)
+#define X86_FEATURE_PCID KVM_X86_CPU_FEATURE(0x1, 0, ECX, 17)
+#define X86_FEATURE_X2APIC KVM_X86_CPU_FEATURE(0x1, 0, ECX, 21)
+#define X86_FEATURE_MOVBE KVM_X86_CPU_FEATURE(0x1, 0, ECX, 22)
+#define X86_FEATURE_TSC_DEADLINE_TIMER KVM_X86_CPU_FEATURE(0x1, 0, ECX, 24)
+#define X86_FEATURE_XSAVE KVM_X86_CPU_FEATURE(0x1, 0, ECX, 26)
+#define X86_FEATURE_OSXSAVE KVM_X86_CPU_FEATURE(0x1, 0, ECX, 27)
+#define X86_FEATURE_RDRAND KVM_X86_CPU_FEATURE(0x1, 0, ECX, 30)
+#define X86_FEATURE_MCE KVM_X86_CPU_FEATURE(0x1, 0, EDX, 7)
+#define X86_FEATURE_APIC KVM_X86_CPU_FEATURE(0x1, 0, EDX, 9)
+#define X86_FEATURE_CLFLUSH KVM_X86_CPU_FEATURE(0x1, 0, EDX, 19)
+#define X86_FEATURE_XMM KVM_X86_CPU_FEATURE(0x1, 0, EDX, 25)
+#define X86_FEATURE_XMM2 KVM_X86_CPU_FEATURE(0x1, 0, EDX, 26)
+#define X86_FEATURE_FSGSBASE KVM_X86_CPU_FEATURE(0x7, 0, EBX, 0)
+#define X86_FEATURE_TSC_ADJUST KVM_X86_CPU_FEATURE(0x7, 0, EBX, 1)
+#define X86_FEATURE_HLE KVM_X86_CPU_FEATURE(0x7, 0, EBX, 4)
+#define X86_FEATURE_SMEP KVM_X86_CPU_FEATURE(0x7, 0, EBX, 7)
+#define X86_FEATURE_INVPCID KVM_X86_CPU_FEATURE(0x7, 0, EBX, 10)
+#define X86_FEATURE_RTM KVM_X86_CPU_FEATURE(0x7, 0, EBX, 11)
+#define X86_FEATURE_MPX KVM_X86_CPU_FEATURE(0x7, 0, EBX, 14)
+#define X86_FEATURE_SMAP KVM_X86_CPU_FEATURE(0x7, 0, EBX, 20)
+#define X86_FEATURE_PCOMMIT KVM_X86_CPU_FEATURE(0x7, 0, EBX, 22)
+#define X86_FEATURE_CLFLUSHOPT KVM_X86_CPU_FEATURE(0x7, 0, EBX, 23)
+#define X86_FEATURE_CLWB KVM_X86_CPU_FEATURE(0x7, 0, EBX, 24)
+#define X86_FEATURE_UMIP KVM_X86_CPU_FEATURE(0x7, 0, ECX, 2)
+#define X86_FEATURE_PKU KVM_X86_CPU_FEATURE(0x7, 0, ECX, 3)
+#define X86_FEATURE_LA57 KVM_X86_CPU_FEATURE(0x7, 0, ECX, 16)
+#define X86_FEATURE_RDPID KVM_X86_CPU_FEATURE(0x7, 0, ECX, 22)
+#define X86_FEATURE_SHSTK KVM_X86_CPU_FEATURE(0x7, 0, ECX, 7)
+#define X86_FEATURE_IBT KVM_X86_CPU_FEATURE(0x7, 0, EDX, 20)
+#define X86_FEATURE_AMX_TILE KVM_X86_CPU_FEATURE(0x7, 0, EDX, 24)
+#define X86_FEATURE_SPEC_CTRL KVM_X86_CPU_FEATURE(0x7, 0, EDX, 26)
+#define X86_FEATURE_ARCH_CAPABILITIES KVM_X86_CPU_FEATURE(0x7, 0, EDX, 29)
+#define X86_FEATURE_PKS KVM_X86_CPU_FEATURE(0x7, 0, ECX, 31)
+#define X86_FEATURE_XTILECFG KVM_X86_CPU_FEATURE(0xD, 0, EAX, 17)
+#define X86_FEATURE_XTILEDATA KVM_X86_CPU_FEATURE(0xD, 0, EAX, 18)
+#define X86_FEATURE_XSAVES KVM_X86_CPU_FEATURE(0xD, 1, EAX, 3)
+#define X86_FEATURE_XFD KVM_X86_CPU_FEATURE(0xD, 1, EAX, 4)
-/* CPUID.0x8000_0001.EDX */
-#define CPUID_GBPAGES (1ul << 26)
+/*
+ * Extended Leafs, a.k.a. AMD defined
+ */
+#define X86_FEATURE_SVM KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 2)
+#define X86_FEATURE_NX KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 20)
+#define X86_FEATURE_GBPAGES KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 26)
+#define X86_FEATURE_RDTSCP KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 27)
+#define X86_FEATURE_LM KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 29)
+#define X86_FEATURE_RDPRU KVM_X86_CPU_FEATURE(0x80000008, 0, EBX, 4)
+#define X86_FEATURE_AMD_IBPB KVM_X86_CPU_FEATURE(0x80000008, 0, EBX, 12)
+#define X86_FEATURE_NPT KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 0)
+#define X86_FEATURE_LBRV KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 1)
+#define X86_FEATURE_NRIPS KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 3)
+#define X86_FEATURE_TSCRATEMSR KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 4)
+#define X86_FEATURE_PAUSEFILTER KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 10)
+#define X86_FEATURE_PFTHRESHOLD KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 12)
+#define X86_FEATURE_VGIF KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 16)
+#define X86_FEATURE_SEV KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 1)
+#define X86_FEATURE_SEV_ES KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 3)
+
+/*
+ * KVM defined paravirt features.
+ */
+#define X86_FEATURE_KVM_CLOCKSOURCE KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 0)
+#define X86_FEATURE_KVM_NOP_IO_DELAY KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 1)
+#define X86_FEATURE_KVM_MMU_OP KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 2)
+#define X86_FEATURE_KVM_CLOCKSOURCE2 KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 3)
+#define X86_FEATURE_KVM_ASYNC_PF KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 4)
+#define X86_FEATURE_KVM_STEAL_TIME KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 5)
+#define X86_FEATURE_KVM_PV_EOI KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 6)
+#define X86_FEATURE_KVM_PV_UNHALT KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 7)
+/* Bit 8 apparently isn't used?!?! */
+#define X86_FEATURE_KVM_PV_TLB_FLUSH KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 9)
+#define X86_FEATURE_KVM_ASYNC_PF_VMEXIT KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 10)
+#define X86_FEATURE_KVM_PV_SEND_IPI KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 11)
+#define X86_FEATURE_KVM_POLL_CONTROL KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 12)
+#define X86_FEATURE_KVM_PV_SCHED_YIELD KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 13)
+#define X86_FEATURE_KVM_ASYNC_PF_INT KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 14)
+#define X86_FEATURE_KVM_MSI_EXT_DEST_ID KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 15)
+#define X86_FEATURE_KVM_HC_MAP_GPA_RANGE KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 16)
+#define X86_FEATURE_KVM_MIGRATION_CONTROL KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 17)
/* Page table bitfield declarations */
#define PTE_PRESENT_MASK BIT_ULL(0)
@@ -300,10 +402,13 @@ static inline void outl(uint16_t port, uint32_t value)
__asm__ __volatile__("outl %%eax, %%dx" : : "d"(port), "a"(value));
}
-static inline void cpuid(uint32_t *eax, uint32_t *ebx,
- uint32_t *ecx, uint32_t *edx)
+static inline void __cpuid(uint32_t function, uint32_t index,
+ uint32_t *eax, uint32_t *ebx,
+ uint32_t *ecx, uint32_t *edx)
{
- /* ecx is often an input as well as an output. */
+ *eax = function;
+ *ecx = index;
+
asm volatile("cpuid"
: "=a" (*eax),
"=b" (*ebx),
@@ -313,6 +418,24 @@ static inline void cpuid(uint32_t *eax, uint32_t *ebx,
: "memory");
}
+static inline void cpuid(uint32_t function,
+ uint32_t *eax, uint32_t *ebx,
+ uint32_t *ecx, uint32_t *edx)
+{
+ return __cpuid(function, 0, eax, ebx, ecx, edx);
+}
+
+static inline bool this_cpu_has(struct kvm_x86_cpu_feature feature)
+{
+ uint32_t gprs[4];
+
+ __cpuid(feature.function, feature.index,
+ &gprs[KVM_CPUID_EAX], &gprs[KVM_CPUID_EBX],
+ &gprs[KVM_CPUID_ECX], &gprs[KVM_CPUID_EDX]);
+
+ return gprs[feature.reg] & BIT(feature.bit);
+}
+
#define SET_XMM(__var, __xmm) \
asm volatile("movq %0, %%"#__xmm : : "r"(__var) : #__xmm)
@@ -385,6 +508,21 @@ static inline void cpu_relax(void)
asm volatile("rep; nop" ::: "memory");
}
+#define vmmcall() \
+ __asm__ __volatile__( \
+ "vmmcall\n" \
+ )
+
+#define ud2() \
+ __asm__ __volatile__( \
+ "ud2\n" \
+ )
+
+#define hlt() \
+ __asm__ __volatile__( \
+ "hlt\n" \
+ )
+
bool is_intel_cpu(void);
bool is_amd_cpu(void);
@@ -405,39 +543,198 @@ static inline unsigned int x86_model(unsigned int eax)
return ((eax >> 12) & 0xf0) | ((eax >> 4) & 0x0f);
}
-struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid);
-void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_x86_state *state);
+struct kvm_x86_state *vcpu_save_state(struct kvm_vcpu *vcpu);
+void vcpu_load_state(struct kvm_vcpu *vcpu, struct kvm_x86_state *state);
void kvm_x86_state_cleanup(struct kvm_x86_state *state);
-struct kvm_msr_list *kvm_get_msr_index_list(void);
+const struct kvm_msr_list *kvm_get_msr_index_list(void);
+const struct kvm_msr_list *kvm_get_feature_msr_index_list(void);
+bool kvm_msr_is_in_save_restore_list(uint32_t msr_index);
uint64_t kvm_get_feature_msr(uint64_t msr_index);
-struct kvm_cpuid2 *kvm_get_supported_cpuid(void);
-struct kvm_cpuid2 *vcpu_get_cpuid(struct kvm_vm *vm, uint32_t vcpuid);
-int __vcpu_set_cpuid(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_cpuid2 *cpuid);
-void vcpu_set_cpuid(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_cpuid2 *cpuid);
+static inline void vcpu_msrs_get(struct kvm_vcpu *vcpu,
+ struct kvm_msrs *msrs)
+{
+ int r = __vcpu_ioctl(vcpu, KVM_GET_MSRS, msrs);
+
+ TEST_ASSERT(r == msrs->nmsrs,
+ "KVM_GET_MSRS failed, r: %i (failed on MSR %x)",
+ r, r < 0 || r >= msrs->nmsrs ? -1 : msrs->entries[r].index);
+}
+static inline void vcpu_msrs_set(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs)
+{
+ int r = __vcpu_ioctl(vcpu, KVM_SET_MSRS, msrs);
+
+ TEST_ASSERT(r == msrs->nmsrs,
+ "KVM_GET_MSRS failed, r: %i (failed on MSR %x)",
+ r, r < 0 || r >= msrs->nmsrs ? -1 : msrs->entries[r].index);
+}
+static inline void vcpu_debugregs_get(struct kvm_vcpu *vcpu,
+ struct kvm_debugregs *debugregs)
+{
+ vcpu_ioctl(vcpu, KVM_GET_DEBUGREGS, debugregs);
+}
+static inline void vcpu_debugregs_set(struct kvm_vcpu *vcpu,
+ struct kvm_debugregs *debugregs)
+{
+ vcpu_ioctl(vcpu, KVM_SET_DEBUGREGS, debugregs);
+}
+static inline void vcpu_xsave_get(struct kvm_vcpu *vcpu,
+ struct kvm_xsave *xsave)
+{
+ vcpu_ioctl(vcpu, KVM_GET_XSAVE, xsave);
+}
+static inline void vcpu_xsave2_get(struct kvm_vcpu *vcpu,
+ struct kvm_xsave *xsave)
+{
+ vcpu_ioctl(vcpu, KVM_GET_XSAVE2, xsave);
+}
+static inline void vcpu_xsave_set(struct kvm_vcpu *vcpu,
+ struct kvm_xsave *xsave)
+{
+ vcpu_ioctl(vcpu, KVM_SET_XSAVE, xsave);
+}
+static inline void vcpu_xcrs_get(struct kvm_vcpu *vcpu,
+ struct kvm_xcrs *xcrs)
+{
+ vcpu_ioctl(vcpu, KVM_GET_XCRS, xcrs);
+}
+static inline void vcpu_xcrs_set(struct kvm_vcpu *vcpu, struct kvm_xcrs *xcrs)
+{
+ vcpu_ioctl(vcpu, KVM_SET_XCRS, xcrs);
+}
+
+const struct kvm_cpuid2 *kvm_get_supported_cpuid(void);
+const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void);
+const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu);
-struct kvm_cpuid_entry2 *
-kvm_get_supported_cpuid_index(uint32_t function, uint32_t index);
+bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid,
+ struct kvm_x86_cpu_feature feature);
-static inline struct kvm_cpuid_entry2 *
-kvm_get_supported_cpuid_entry(uint32_t function)
+static inline bool kvm_cpu_has(struct kvm_x86_cpu_feature feature)
{
- return kvm_get_supported_cpuid_index(function, 0);
+ return kvm_cpuid_has(kvm_get_supported_cpuid(), feature);
+}
+
+static inline size_t kvm_cpuid2_size(int nr_entries)
+{
+ return sizeof(struct kvm_cpuid2) +
+ sizeof(struct kvm_cpuid_entry2) * nr_entries;
+}
+
+/*
+ * Allocate a "struct kvm_cpuid2* instance, with the 0-length arrary of
+ * entries sized to hold @nr_entries. The caller is responsible for freeing
+ * the struct.
+ */
+static inline struct kvm_cpuid2 *allocate_kvm_cpuid2(int nr_entries)
+{
+ struct kvm_cpuid2 *cpuid;
+
+ cpuid = malloc(kvm_cpuid2_size(nr_entries));
+ TEST_ASSERT(cpuid, "-ENOMEM when allocating kvm_cpuid2");
+
+ cpuid->nent = nr_entries;
+
+ return cpuid;
}
-uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index);
-int _vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
- uint64_t msr_value);
-void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
- uint64_t msr_value);
+const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid,
+ uint32_t function, uint32_t index);
+void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid);
+void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu);
+
+static inline struct kvm_cpuid_entry2 *__vcpu_get_cpuid_entry(struct kvm_vcpu *vcpu,
+ uint32_t function,
+ uint32_t index)
+{
+ return (struct kvm_cpuid_entry2 *)get_cpuid_entry(vcpu->cpuid,
+ function, index);
+}
+
+static inline struct kvm_cpuid_entry2 *vcpu_get_cpuid_entry(struct kvm_vcpu *vcpu,
+ uint32_t function)
+{
+ return __vcpu_get_cpuid_entry(vcpu, function, 0);
+}
+
+static inline int __vcpu_set_cpuid(struct kvm_vcpu *vcpu)
+{
+ int r;
+
+ TEST_ASSERT(vcpu->cpuid, "Must do vcpu_init_cpuid() first");
+ r = __vcpu_ioctl(vcpu, KVM_SET_CPUID2, vcpu->cpuid);
+ if (r)
+ return r;
+
+ /* On success, refresh the cache to pick up adjustments made by KVM. */
+ vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid);
+ return 0;
+}
+
+static inline void vcpu_set_cpuid(struct kvm_vcpu *vcpu)
+{
+ TEST_ASSERT(vcpu->cpuid, "Must do vcpu_init_cpuid() first");
+ vcpu_ioctl(vcpu, KVM_SET_CPUID2, vcpu->cpuid);
+
+ /* Refresh the cache to pick up adjustments made by KVM. */
+ vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid);
+}
+
+void vcpu_set_cpuid_maxphyaddr(struct kvm_vcpu *vcpu, uint8_t maxphyaddr);
+
+void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function);
+void vcpu_set_or_clear_cpuid_feature(struct kvm_vcpu *vcpu,
+ struct kvm_x86_cpu_feature feature,
+ bool set);
+
+static inline void vcpu_set_cpuid_feature(struct kvm_vcpu *vcpu,
+ struct kvm_x86_cpu_feature feature)
+{
+ vcpu_set_or_clear_cpuid_feature(vcpu, feature, true);
+
+}
+
+static inline void vcpu_clear_cpuid_feature(struct kvm_vcpu *vcpu,
+ struct kvm_x86_cpu_feature feature)
+{
+ vcpu_set_or_clear_cpuid_feature(vcpu, feature, false);
+}
+
+static inline const struct kvm_cpuid_entry2 *__kvm_get_supported_cpuid_entry(uint32_t function,
+ uint32_t index)
+{
+ return get_cpuid_entry(kvm_get_supported_cpuid(), function, index);
+}
+
+static inline const struct kvm_cpuid_entry2 *kvm_get_supported_cpuid_entry(uint32_t function)
+{
+ return __kvm_get_supported_cpuid_entry(function, 0);
+}
+
+uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index);
+int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t msr_value);
+
+static inline void vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index,
+ uint64_t msr_value)
+{
+ int r = _vcpu_set_msr(vcpu, msr_index, msr_value);
+
+ TEST_ASSERT(r == 1, KVM_IOCTL_ERROR(KVM_SET_MSRS, r));
+}
+
+static inline uint32_t kvm_get_cpuid_max_basic(void)
+{
+ return kvm_get_supported_cpuid_entry(0)->eax;
+}
+
+static inline uint32_t kvm_get_cpuid_max_extended(void)
+{
+ return kvm_get_supported_cpuid_entry(0x80000000)->eax;
+}
-uint32_t kvm_get_cpuid_max_basic(void);
-uint32_t kvm_get_cpuid_max_extended(void);
void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits);
+bool vm_is_unrestricted_guest(struct kvm_vm *vm);
struct ex_regs {
uint64_t rax, rcx, rdx, rbx;
@@ -452,35 +749,94 @@ struct ex_regs {
};
void vm_init_descriptor_tables(struct kvm_vm *vm);
-void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid);
+void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu);
void vm_install_exception_handler(struct kvm_vm *vm, int vector,
void (*handler)(struct ex_regs *));
-uint64_t vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr);
-void vm_set_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr,
- uint64_t pte);
+/* If a toddler were to say "abracadabra". */
+#define KVM_EXCEPTION_MAGIC 0xabacadabaull
/*
- * get_cpuid() - find matching CPUID entry and return pointer to it.
- */
-struct kvm_cpuid_entry2 *get_cpuid(struct kvm_cpuid2 *cpuid, uint32_t function,
- uint32_t index);
-/*
- * set_cpuid() - overwrites a matching cpuid entry with the provided value.
- * matches based on ent->function && ent->index. returns true
- * if a match was found and successfully overwritten.
- * @cpuid: the kvm cpuid list to modify.
- * @ent: cpuid entry to insert
+ * KVM selftest exception fixup uses registers to coordinate with the exception
+ * handler, versus the kernel's in-memory tables and KVM-Unit-Tests's in-memory
+ * per-CPU data. Using only registers avoids having to map memory into the
+ * guest, doesn't require a valid, stable GS.base, and reduces the risk of
+ * for recursive faults when accessing memory in the handler. The downside to
+ * using registers is that it restricts what registers can be used by the actual
+ * instruction. But, selftests are 64-bit only, making register* pressure a
+ * minor concern. Use r9-r11 as they are volatile, i.e. don't need* to be saved
+ * by the callee, and except for r11 are not implicit parameters to any
+ * instructions. Ideally, fixup would use r8-r10 and thus avoid implicit
+ * parameters entirely, but Hyper-V's hypercall ABI uses r8 and testing Hyper-V
+ * is higher priority than testing non-faulting SYSCALL/SYSRET.
+ *
+ * Note, the fixup handler deliberately does not handle #DE, i.e. the vector
+ * is guaranteed to be non-zero on fault.
+ *
+ * REGISTER INPUTS:
+ * r9 = MAGIC
+ * r10 = RIP
+ * r11 = new RIP on fault
+ *
+ * REGISTER OUTPUTS:
+ * r9 = exception vector (non-zero)
*/
-bool set_cpuid(struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 *ent);
+#define KVM_ASM_SAFE(insn) \
+ "mov $" __stringify(KVM_EXCEPTION_MAGIC) ", %%r9\n\t" \
+ "lea 1f(%%rip), %%r10\n\t" \
+ "lea 2f(%%rip), %%r11\n\t" \
+ "1: " insn "\n\t" \
+ "mov $0, %[vector]\n\t" \
+ "jmp 3f\n\t" \
+ "2:\n\t" \
+ "mov %%r9b, %[vector]\n\t" \
+ "3:\n\t"
+
+#define KVM_ASM_SAFE_OUTPUTS(v) [vector] "=qm"(v)
+#define KVM_ASM_SAFE_CLOBBERS "r9", "r10", "r11"
+
+#define kvm_asm_safe(insn, inputs...) \
+({ \
+ uint8_t vector; \
+ \
+ asm volatile(KVM_ASM_SAFE(insn) \
+ : KVM_ASM_SAFE_OUTPUTS(vector) \
+ : inputs \
+ : KVM_ASM_SAFE_CLOBBERS); \
+ vector; \
+})
+
+static inline uint8_t rdmsr_safe(uint32_t msr, uint64_t *val)
+{
+ uint8_t vector;
+ uint32_t a, d;
+
+ asm volatile(KVM_ASM_SAFE("rdmsr")
+ : "=a"(a), "=d"(d), KVM_ASM_SAFE_OUTPUTS(vector)
+ : "c"(msr)
+ : KVM_ASM_SAFE_CLOBBERS);
+
+ *val = (uint64_t)a | ((uint64_t)d << 32);
+ return vector;
+}
+
+static inline uint8_t wrmsr_safe(uint32_t msr, uint64_t val)
+{
+ return kvm_asm_safe("wrmsr", "a"(val & -1u), "d"(val >> 32), "c"(msr));
+}
+
+uint64_t vm_get_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
+ uint64_t vaddr);
+void vm_set_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
+ uint64_t vaddr, uint64_t pte);
uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
uint64_t a3);
-struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void);
-void vcpu_set_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid);
-struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid);
-void vm_xsave_req_perm(int bit);
+void __vm_xsave_require_permission(int bit, const char *name);
+
+#define vm_xsave_require_permission(perm) \
+ __vm_xsave_require_permission(perm, #perm)
enum pg_level {
PG_LEVEL_NONE,
diff --git a/tools/testing/selftests/kvm/include/x86_64/svm.h b/tools/testing/selftests/kvm/include/x86_64/svm.h
index 2225e5077350..c8343ff84f7f 100644
--- a/tools/testing/selftests/kvm/include/x86_64/svm.h
+++ b/tools/testing/selftests/kvm/include/x86_64/svm.h
@@ -218,8 +218,6 @@ struct __attribute__ ((__packed__)) vmcb {
struct vmcb_save_area save;
};
-#define SVM_CPUID_FUNC 0x8000000a
-
#define SVM_VM_CR_SVM_DISABLE 4
#define SVM_SELECTOR_S_SHIFT 4
diff --git a/tools/testing/selftests/kvm/include/x86_64/svm_util.h b/tools/testing/selftests/kvm/include/x86_64/svm_util.h
index a25aabd8f5e7..a339b537a575 100644
--- a/tools/testing/selftests/kvm/include/x86_64/svm_util.h
+++ b/tools/testing/selftests/kvm/include/x86_64/svm_util.h
@@ -13,9 +13,8 @@
#include "svm.h"
#include "processor.h"
-#define CPUID_SVM_BIT 2
-#define CPUID_SVM BIT_ULL(CPUID_SVM_BIT)
-
+#define SVM_EXIT_EXCP_BASE 0x040
+#define SVM_EXIT_HLT 0x078
#define SVM_EXIT_MSR 0x07c
#define SVM_EXIT_VMMCALL 0x081
@@ -36,21 +35,19 @@ struct svm_test_data {
uint64_t msr_gpa;
};
+#define stgi() \
+ __asm__ __volatile__( \
+ "stgi\n" \
+ )
+
+#define clgi() \
+ __asm__ __volatile__( \
+ "clgi\n" \
+ )
+
struct svm_test_data *vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva);
void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp);
void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa);
-bool nested_svm_supported(void);
-void nested_svm_check_supported(void);
-
-static inline bool cpu_has_svm(void)
-{
- u32 eax = 0x80000001, ecx;
-
- asm("cpuid" :
- "=a" (eax), "=c" (ecx) : "0" (eax) : "ebx", "edx");
-
- return ecx & CPUID_SVM;
-}
int open_sev_dev_path_or_exit(void);
diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h
index cc3604f8f1d3..99fa1410964c 100644
--- a/tools/testing/selftests/kvm/include/x86_64/vmx.h
+++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h
@@ -607,8 +607,6 @@ bool prepare_for_vmx_operation(struct vmx_pages *vmx);
void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp);
bool load_vmcs(struct vmx_pages *vmx);
-bool nested_vmx_supported(void);
-void nested_vmx_check_supported(void);
bool ept_1g_pages_supported(void);
void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
diff --git a/tools/testing/selftests/kvm/kvm_binary_stats_test.c b/tools/testing/selftests/kvm/kvm_binary_stats_test.c
index 17f65d514915..0b45ac593387 100644
--- a/tools/testing/selftests/kvm/kvm_binary_stats_test.c
+++ b/tools/testing/selftests/kvm/kvm_binary_stats_test.c
@@ -26,163 +26,167 @@ static void stats_test(int stats_fd)
int i;
size_t size_desc;
size_t size_data = 0;
- struct kvm_stats_header *header;
+ struct kvm_stats_header header;
char *id;
struct kvm_stats_desc *stats_desc;
u64 *stats_data;
struct kvm_stats_desc *pdesc;
+ u32 type, unit, base;
/* Read kvm stats header */
- header = malloc(sizeof(*header));
- TEST_ASSERT(header, "Allocate memory for stats header");
+ read_stats_header(stats_fd, &header);
- ret = read(stats_fd, header, sizeof(*header));
- TEST_ASSERT(ret == sizeof(*header), "Read stats header");
- size_desc = sizeof(*stats_desc) + header->name_size;
+ size_desc = get_stats_descriptor_size(&header);
/* Read kvm stats id string */
- id = malloc(header->name_size);
+ id = malloc(header.name_size);
TEST_ASSERT(id, "Allocate memory for id string");
- ret = read(stats_fd, id, header->name_size);
- TEST_ASSERT(ret == header->name_size, "Read id string");
+
+ ret = read(stats_fd, id, header.name_size);
+ TEST_ASSERT(ret == header.name_size, "Read id string");
/* Check id string, that should start with "kvm" */
- TEST_ASSERT(!strncmp(id, "kvm", 3) && strlen(id) < header->name_size,
- "Invalid KVM stats type, id: %s", id);
+ TEST_ASSERT(!strncmp(id, "kvm", 3) && strlen(id) < header.name_size,
+ "Invalid KVM stats type, id: %s", id);
/* Sanity check for other fields in header */
- if (header->num_desc == 0) {
+ if (header.num_desc == 0) {
printf("No KVM stats defined!");
return;
}
- /* Check overlap */
- TEST_ASSERT(header->desc_offset > 0 && header->data_offset > 0
- && header->desc_offset >= sizeof(*header)
- && header->data_offset >= sizeof(*header),
- "Invalid offset fields in header");
- TEST_ASSERT(header->desc_offset > header->data_offset ||
- (header->desc_offset + size_desc * header->num_desc <=
- header->data_offset),
- "Descriptor block is overlapped with data block");
-
- /* Allocate memory for stats descriptors */
- stats_desc = calloc(header->num_desc, size_desc);
- TEST_ASSERT(stats_desc, "Allocate memory for stats descriptors");
+ /*
+ * The descriptor and data offsets must be valid, they must not overlap
+ * the header, and the descriptor and data blocks must not overlap each
+ * other. Note, the data block is rechecked after its size is known.
+ */
+ TEST_ASSERT(header.desc_offset && header.desc_offset >= sizeof(header) &&
+ header.data_offset && header.data_offset >= sizeof(header),
+ "Invalid offset fields in header");
+
+ TEST_ASSERT(header.desc_offset > header.data_offset ||
+ (header.desc_offset + size_desc * header.num_desc <= header.data_offset),
+ "Descriptor block is overlapped with data block");
+
/* Read kvm stats descriptors */
- ret = pread(stats_fd, stats_desc,
- size_desc * header->num_desc, header->desc_offset);
- TEST_ASSERT(ret == size_desc * header->num_desc,
- "Read KVM stats descriptors");
+ stats_desc = read_stats_descriptors(stats_fd, &header);
/* Sanity check for fields in descriptors */
- for (i = 0; i < header->num_desc; ++i) {
- pdesc = (void *)stats_desc + i * size_desc;
+ for (i = 0; i < header.num_desc; ++i) {
+ pdesc = get_stats_descriptor(stats_desc, i, &header);
+ type = pdesc->flags & KVM_STATS_TYPE_MASK;
+ unit = pdesc->flags & KVM_STATS_UNIT_MASK;
+ base = pdesc->flags & KVM_STATS_BASE_MASK;
+
+ /* Check name string */
+ TEST_ASSERT(strlen(pdesc->name) < header.name_size,
+ "KVM stats name (index: %d) too long", i);
+
/* Check type,unit,base boundaries */
- TEST_ASSERT((pdesc->flags & KVM_STATS_TYPE_MASK)
- <= KVM_STATS_TYPE_MAX, "Unknown KVM stats type");
- TEST_ASSERT((pdesc->flags & KVM_STATS_UNIT_MASK)
- <= KVM_STATS_UNIT_MAX, "Unknown KVM stats unit");
- TEST_ASSERT((pdesc->flags & KVM_STATS_BASE_MASK)
- <= KVM_STATS_BASE_MAX, "Unknown KVM stats base");
- /* Check exponent for stats unit
+ TEST_ASSERT(type <= KVM_STATS_TYPE_MAX,
+ "Unknown KVM stats (%s) type: %u", pdesc->name, type);
+ TEST_ASSERT(unit <= KVM_STATS_UNIT_MAX,
+ "Unknown KVM stats (%s) unit: %u", pdesc->name, unit);
+ TEST_ASSERT(base <= KVM_STATS_BASE_MAX,
+ "Unknown KVM stats (%s) base: %u", pdesc->name, base);
+
+ /*
+ * Check exponent for stats unit
* Exponent for counter should be greater than or equal to 0
* Exponent for unit bytes should be greater than or equal to 0
* Exponent for unit seconds should be less than or equal to 0
* Exponent for unit clock cycles should be greater than or
* equal to 0
+ * Exponent for unit boolean should be 0
*/
switch (pdesc->flags & KVM_STATS_UNIT_MASK) {
case KVM_STATS_UNIT_NONE:
case KVM_STATS_UNIT_BYTES:
case KVM_STATS_UNIT_CYCLES:
TEST_ASSERT(pdesc->exponent >= 0,
- "Unsupported KVM stats unit");
+ "Unsupported KVM stats (%s) exponent: %i",
+ pdesc->name, pdesc->exponent);
break;
case KVM_STATS_UNIT_SECONDS:
TEST_ASSERT(pdesc->exponent <= 0,
- "Unsupported KVM stats unit");
+ "Unsupported KVM stats (%s) exponent: %i",
+ pdesc->name, pdesc->exponent);
+ break;
+ case KVM_STATS_UNIT_BOOLEAN:
+ TEST_ASSERT(pdesc->exponent == 0,
+ "Unsupported KVM stats (%s) exponent: %d",
+ pdesc->name, pdesc->exponent);
break;
}
- /* Check name string */
- TEST_ASSERT(strlen(pdesc->name) < header->name_size,
- "KVM stats name(%s) too long", pdesc->name);
+
/* Check size field, which should not be zero */
- TEST_ASSERT(pdesc->size, "KVM descriptor(%s) with size of 0",
- pdesc->name);
+ TEST_ASSERT(pdesc->size,
+ "KVM descriptor(%s) with size of 0", pdesc->name);
/* Check bucket_size field */
switch (pdesc->flags & KVM_STATS_TYPE_MASK) {
case KVM_STATS_TYPE_LINEAR_HIST:
TEST_ASSERT(pdesc->bucket_size,
- "Bucket size of Linear Histogram stats (%s) is zero",
- pdesc->name);
+ "Bucket size of Linear Histogram stats (%s) is zero",
+ pdesc->name);
break;
default:
TEST_ASSERT(!pdesc->bucket_size,
- "Bucket size of stats (%s) is not zero",
- pdesc->name);
+ "Bucket size of stats (%s) is not zero",
+ pdesc->name);
}
size_data += pdesc->size * sizeof(*stats_data);
}
- /* Check overlap */
- TEST_ASSERT(header->data_offset >= header->desc_offset
- || header->data_offset + size_data <= header->desc_offset,
- "Data block is overlapped with Descriptor block");
+
+ /*
+ * Now that the size of the data block is known, verify the data block
+ * doesn't overlap the descriptor block.
+ */
+ TEST_ASSERT(header.data_offset >= header.desc_offset ||
+ header.data_offset + size_data <= header.desc_offset,
+ "Data block is overlapped with Descriptor block");
+
/* Check validity of all stats data size */
- TEST_ASSERT(size_data >= header->num_desc * sizeof(*stats_data),
- "Data size is not correct");
+ TEST_ASSERT(size_data >= header.num_desc * sizeof(*stats_data),
+ "Data size is not correct");
+
/* Check stats offset */
- for (i = 0; i < header->num_desc; ++i) {
- pdesc = (void *)stats_desc + i * size_desc;
+ for (i = 0; i < header.num_desc; ++i) {
+ pdesc = get_stats_descriptor(stats_desc, i, &header);
TEST_ASSERT(pdesc->offset < size_data,
- "Invalid offset (%u) for stats: %s",
- pdesc->offset, pdesc->name);
+ "Invalid offset (%u) for stats: %s",
+ pdesc->offset, pdesc->name);
}
/* Allocate memory for stats data */
stats_data = malloc(size_data);
TEST_ASSERT(stats_data, "Allocate memory for stats data");
/* Read kvm stats data as a bulk */
- ret = pread(stats_fd, stats_data, size_data, header->data_offset);
+ ret = pread(stats_fd, stats_data, size_data, header.data_offset);
TEST_ASSERT(ret == size_data, "Read KVM stats data");
/* Read kvm stats data one by one */
- size_data = 0;
- for (i = 0; i < header->num_desc; ++i) {
- pdesc = (void *)stats_desc + i * size_desc;
- ret = pread(stats_fd, stats_data,
- pdesc->size * sizeof(*stats_data),
- header->data_offset + size_data);
- TEST_ASSERT(ret == pdesc->size * sizeof(*stats_data),
- "Read data of KVM stats: %s", pdesc->name);
- size_data += pdesc->size * sizeof(*stats_data);
+ for (i = 0; i < header.num_desc; ++i) {
+ pdesc = get_stats_descriptor(stats_desc, i, &header);
+ read_stat_data(stats_fd, &header, pdesc, stats_data,
+ pdesc->size);
}
free(stats_data);
free(stats_desc);
free(id);
- free(header);
}
static void vm_stats_test(struct kvm_vm *vm)
{
- int stats_fd;
-
- /* Get fd for VM stats */
- stats_fd = vm_get_stats_fd(vm);
- TEST_ASSERT(stats_fd >= 0, "Get VM stats fd");
+ int stats_fd = vm_get_stats_fd(vm);
stats_test(stats_fd);
close(stats_fd);
TEST_ASSERT(fcntl(stats_fd, F_GETFD) == -1, "Stats fd not freed");
}
-static void vcpu_stats_test(struct kvm_vm *vm, int vcpu_id)
+static void vcpu_stats_test(struct kvm_vcpu *vcpu)
{
- int stats_fd;
-
- /* Get fd for VCPU stats */
- stats_fd = vcpu_get_stats_fd(vm, vcpu_id);
- TEST_ASSERT(stats_fd >= 0, "Get VCPU stats fd");
+ int stats_fd = vcpu_get_stats_fd(vcpu);
stats_test(stats_fd);
close(stats_fd);
@@ -203,6 +207,7 @@ static void vcpu_stats_test(struct kvm_vm *vm, int vcpu_id)
int main(int argc, char *argv[])
{
int i, j;
+ struct kvm_vcpu **vcpus;
struct kvm_vm **vms;
int max_vm = DEFAULT_NUM_VM;
int max_vcpu = DEFAULT_NUM_VCPU;
@@ -220,26 +225,26 @@ int main(int argc, char *argv[])
}
/* Check the extension for binary stats */
- if (kvm_check_cap(KVM_CAP_BINARY_STATS_FD) <= 0) {
- print_skip("Binary form statistics interface is not supported");
- exit(KSFT_SKIP);
- }
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_BINARY_STATS_FD));
/* Create VMs and VCPUs */
vms = malloc(sizeof(vms[0]) * max_vm);
TEST_ASSERT(vms, "Allocate memory for storing VM pointers");
+
+ vcpus = malloc(sizeof(struct kvm_vcpu *) * max_vm * max_vcpu);
+ TEST_ASSERT(vcpus, "Allocate memory for storing vCPU pointers");
+
for (i = 0; i < max_vm; ++i) {
- vms[i] = vm_create(VM_MODE_DEFAULT,
- DEFAULT_GUEST_PHY_PAGES, O_RDWR);
+ vms[i] = vm_create_barebones();
for (j = 0; j < max_vcpu; ++j)
- vm_vcpu_add(vms[i], j);
+ vcpus[i * max_vcpu + j] = __vm_vcpu_add(vms[i], j);
}
/* Check stats read for every VM and VCPU */
for (i = 0; i < max_vm; ++i) {
vm_stats_test(vms[i]);
for (j = 0; j < max_vcpu; ++j)
- vcpu_stats_test(vms[i], j);
+ vcpu_stats_test(vcpus[i * max_vcpu + j]);
}
for (i = 0; i < max_vm; ++i)
diff --git a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
index aed9dc3ca1e9..31b3cb24b9a7 100644
--- a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
+++ b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
@@ -28,11 +28,11 @@ void test_vcpu_creation(int first_vcpu_id, int num_vcpus)
pr_info("Testing creating %d vCPUs, with IDs %d...%d.\n",
num_vcpus, first_vcpu_id, first_vcpu_id + num_vcpus - 1);
- vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
+ vm = vm_create_barebones();
for (i = first_vcpu_id; i < first_vcpu_id + num_vcpus; i++)
/* This asserts that the vCPU was created. */
- vm_vcpu_add(vm, i);
+ __vm_vcpu_add(vm, i);
kvm_vm_free(vm);
}
@@ -64,11 +64,9 @@ int main(int argc, char *argv[])
rl.rlim_max = nr_fds_wanted;
int r = setrlimit(RLIMIT_NOFILE, &rl);
- if (r < 0) {
- printf("RLIMIT_NOFILE hard limit is too low (%d, wanted %d)\n",
+ __TEST_REQUIRE(r >= 0,
+ "RLIMIT_NOFILE hard limit is too low (%d, wanted %d)\n",
old_rlim_max, nr_fds_wanted);
- exit(KSFT_SKIP);
- }
} else {
TEST_ASSERT(!setrlimit(RLIMIT_NOFILE, &rl), "setrlimit() failed!");
}
diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c
index 2c4a7563a4f8..f42c6ac6d71d 100644
--- a/tools/testing/selftests/kvm/kvm_page_table_test.c
+++ b/tools/testing/selftests/kvm/kvm_page_table_test.c
@@ -46,11 +46,6 @@ static const char * const test_stage_string[] = {
"KVM_ADJUST_MAPPINGS",
};
-struct vcpu_args {
- int vcpu_id;
- bool vcpu_write;
-};
-
struct test_args {
struct kvm_vm *vm;
uint64_t guest_test_virt_mem;
@@ -60,7 +55,7 @@ struct test_args {
uint64_t large_num_pages;
uint64_t host_pages_per_lpage;
enum vm_mem_backing_src_type src_type;
- struct vcpu_args vcpu_args[KVM_MAX_VCPUS];
+ struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
};
/*
@@ -92,17 +87,13 @@ static uint64_t guest_test_phys_mem;
*/
static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
-static void guest_code(int vcpu_id)
+static void guest_code(bool do_write)
{
struct test_args *p = &test_args;
- struct vcpu_args *vcpu_args = &p->vcpu_args[vcpu_id];
enum test_stage *current_stage = &guest_test_stage;
uint64_t addr;
int i, j;
- /* Make sure vCPU args data structure is not corrupt */
- GUEST_ASSERT(vcpu_args->vcpu_id == vcpu_id);
-
while (true) {
addr = p->guest_test_virt_mem;
@@ -123,7 +114,7 @@ static void guest_code(int vcpu_id)
*/
case KVM_CREATE_MAPPINGS:
for (i = 0; i < p->large_num_pages; i++) {
- if (vcpu_args->vcpu_write)
+ if (do_write)
*(uint64_t *)addr = 0x0123456789ABCDEF;
else
READ_ONCE(*(uint64_t *)addr);
@@ -193,17 +184,14 @@ static void guest_code(int vcpu_id)
static void *vcpu_worker(void *data)
{
- int ret;
- struct vcpu_args *vcpu_args = data;
- struct kvm_vm *vm = test_args.vm;
- int vcpu_id = vcpu_args->vcpu_id;
- struct kvm_run *run;
+ struct kvm_vcpu *vcpu = data;
+ bool do_write = !(vcpu->id % 2);
struct timespec start;
struct timespec ts_diff;
enum test_stage stage;
+ int ret;
- vcpu_args_set(vm, vcpu_id, 1, vcpu_id);
- run = vcpu_state(vm, vcpu_id);
+ vcpu_args_set(vcpu, 1, do_write);
while (!READ_ONCE(host_quit)) {
ret = sem_wait(&test_stage_updated);
@@ -213,15 +201,15 @@ static void *vcpu_worker(void *data)
return NULL;
clock_gettime(CLOCK_MONOTONIC_RAW, &start);
- ret = _vcpu_run(vm, vcpu_id);
+ ret = _vcpu_run(vcpu);
ts_diff = timespec_elapsed(start);
TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret);
- TEST_ASSERT(get_ucall(vm, vcpu_id, NULL) == UCALL_SYNC,
+ TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC,
"Invalid guest sync status: exit_reason=%s\n",
- exit_reason_str(run->exit_reason));
+ exit_reason_str(vcpu->run->exit_reason));
- pr_debug("Got sync event from vCPU %d\n", vcpu_id);
+ pr_debug("Got sync event from vCPU %d\n", vcpu->id);
stage = READ_ONCE(*current_stage);
/*
@@ -230,7 +218,7 @@ static void *vcpu_worker(void *data)
*/
pr_debug("vCPU %d has completed stage %s\n"
"execution time is: %ld.%.9lds\n\n",
- vcpu_id, test_stage_string[stage],
+ vcpu->id, test_stage_string[stage],
ts_diff.tv_sec, ts_diff.tv_nsec);
ret = sem_post(&test_stage_completed);
@@ -250,7 +238,6 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg)
{
int ret;
struct test_params *p = arg;
- struct vcpu_args *vcpu_args;
enum vm_mem_backing_src_type src_type = p->src_type;
uint64_t large_page_size = get_backing_src_pagesz(src_type);
uint64_t guest_page_size = vm_guest_mode_params[mode].page_size;
@@ -260,7 +247,6 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg)
uint64_t alignment;
void *host_test_mem;
struct kvm_vm *vm;
- int vcpu_id;
/* Align up the test memory size */
alignment = max(large_page_size, guest_page_size);
@@ -268,12 +254,12 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg)
/* Create a VM with enough guest pages */
guest_num_pages = test_mem_size / guest_page_size;
- vm = vm_create_with_vcpus(mode, nr_vcpus, DEFAULT_GUEST_PHY_PAGES,
- guest_num_pages, 0, guest_code, NULL);
+ vm = __vm_create_with_vcpus(mode, nr_vcpus, guest_num_pages,
+ guest_code, test_args.vcpus);
/* Align down GPA of the testing memslot */
if (!p->phys_offset)
- guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) *
+ guest_test_phys_mem = (vm->max_gfn - guest_num_pages) *
guest_page_size;
else
guest_test_phys_mem = p->phys_offset;
@@ -292,12 +278,6 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg)
test_args.host_pages_per_lpage = large_page_size / host_page_size;
test_args.src_type = src_type;
- for (vcpu_id = 0; vcpu_id < KVM_MAX_VCPUS; vcpu_id++) {
- vcpu_args = &test_args.vcpu_args[vcpu_id];
- vcpu_args->vcpu_id = vcpu_id;
- vcpu_args->vcpu_write = !(vcpu_id % 2);
- }
-
/* Add an extra memory slot with specified backing src type */
vm_userspace_mem_region_add(vm, src_type, guest_test_phys_mem,
TEST_MEM_SLOT_INDEX, guest_num_pages, 0);
@@ -363,12 +343,11 @@ static void vcpus_complete_new_stage(enum test_stage stage)
static void run_test(enum vm_guest_mode mode, void *arg)
{
- int ret;
pthread_t *vcpu_threads;
struct kvm_vm *vm;
- int vcpu_id;
struct timespec start;
struct timespec ts_diff;
+ int ret, i;
/* Create VM with vCPUs and make some pre-initialization */
vm = pre_init_before_test(mode, arg);
@@ -379,10 +358,9 @@ static void run_test(enum vm_guest_mode mode, void *arg)
host_quit = false;
*current_stage = KVM_BEFORE_MAPPINGS;
- for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
- pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker,
- &test_args.vcpu_args[vcpu_id]);
- }
+ for (i = 0; i < nr_vcpus; i++)
+ pthread_create(&vcpu_threads[i], NULL, vcpu_worker,
+ test_args.vcpus[i]);
vcpus_complete_new_stage(*current_stage);
pr_info("Started all vCPUs successfully\n");
@@ -424,13 +402,13 @@ static void run_test(enum vm_guest_mode mode, void *arg)
/* Tell the vcpu thread to quit */
host_quit = true;
- for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
+ for (i = 0; i < nr_vcpus; i++) {
ret = sem_post(&test_stage_updated);
TEST_ASSERT(ret == 0, "Error in sem_post");
}
- for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++)
- pthread_join(vcpu_threads[vcpu_id], NULL);
+ for (i = 0; i < nr_vcpus; i++)
+ pthread_join(vcpu_threads[i], NULL);
ret = sem_destroy(&test_stage_updated);
TEST_ASSERT(ret == 0, "Error in sem_destroy");
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
index 6a041289fa80..6f5551368944 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -10,7 +10,6 @@
#include "guest_modes.h"
#include "kvm_util.h"
-#include "../kvm_util_internal.h"
#include "processor.h"
#define DEFAULT_ARM64_GUEST_STACK_VADDR_MIN 0xac0000
@@ -75,7 +74,7 @@ static uint64_t __maybe_unused ptrs_per_pte(struct kvm_vm *vm)
return 1 << (vm->page_shift - 3);
}
-void virt_pgd_alloc(struct kvm_vm *vm)
+void virt_arch_pgd_alloc(struct kvm_vm *vm)
{
if (!vm->pgd_created) {
vm_paddr_t paddr = vm_phy_pages_alloc(vm,
@@ -132,14 +131,14 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
*ptep |= (attr_idx << 2) | (1 << 10) /* Access Flag */;
}
-void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
+void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
{
uint64_t attr_idx = 4; /* NORMAL (See DEFAULT_MAIR_EL1) */
_virt_pg_map(vm, vaddr, paddr, attr_idx);
}
-vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
{
uint64_t *ptep;
@@ -196,7 +195,7 @@ static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t p
#endif
}
-void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
{
int level = 4 - (vm->pgtable_levels - 1);
uint64_t pgd, *ptep;
@@ -213,9 +212,10 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
}
}
-void aarch64_vcpu_setup(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_vcpu_init *init)
+void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init)
{
struct kvm_vcpu_init default_init = { .target = -1, };
+ struct kvm_vm *vm = vcpu->vm;
uint64_t sctlr_el1, tcr_el1;
if (!init)
@@ -227,16 +227,16 @@ void aarch64_vcpu_setup(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_vcpu_init
init->target = preferred.target;
}
- vcpu_ioctl(vm, vcpuid, KVM_ARM_VCPU_INIT, init);
+ vcpu_ioctl(vcpu, KVM_ARM_VCPU_INIT, init);
/*
* Enable FP/ASIMD to avoid trapping when accessing Q0-Q15
* registers, which the variable argument list macros do.
*/
- set_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_CPACR_EL1), 3 << 20);
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CPACR_EL1), 3 << 20);
- get_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), &sctlr_el1);
- get_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_TCR_EL1), &tcr_el1);
+ vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), &sctlr_el1);
+ vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), &tcr_el1);
/* Configure base granule size */
switch (vm->mode) {
@@ -297,46 +297,49 @@ void aarch64_vcpu_setup(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_vcpu_init
tcr_el1 |= (1 << 8) | (1 << 10) | (3 << 12);
tcr_el1 |= (64 - vm->va_bits) /* T0SZ */;
- set_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), sctlr_el1);
- set_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_TCR_EL1), tcr_el1);
- set_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_MAIR_EL1), DEFAULT_MAIR_EL1);
- set_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_TTBR0_EL1), vm->pgd);
- set_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_TPIDR_EL1), vcpuid);
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), sctlr_el1);
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), tcr_el1);
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MAIR_EL1), DEFAULT_MAIR_EL1);
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TTBR0_EL1), vm->pgd);
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TPIDR_EL1), vcpu->id);
}
-void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
+void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
{
uint64_t pstate, pc;
- get_reg(vm, vcpuid, ARM64_CORE_REG(regs.pstate), &pstate);
- get_reg(vm, vcpuid, ARM64_CORE_REG(regs.pc), &pc);
+ vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pstate), &pstate);
+ vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc), &pc);
fprintf(stream, "%*spstate: 0x%.16lx pc: 0x%.16lx\n",
indent, "", pstate, pc);
}
-void aarch64_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_vcpu_init *init, void *guest_code)
+struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
+ struct kvm_vcpu_init *init, void *guest_code)
{
size_t stack_size = vm->page_size == 4096 ?
DEFAULT_STACK_PGS * vm->page_size :
vm->page_size;
uint64_t stack_vaddr = vm_vaddr_alloc(vm, stack_size,
DEFAULT_ARM64_GUEST_STACK_VADDR_MIN);
+ struct kvm_vcpu *vcpu = __vm_vcpu_add(vm, vcpu_id);
- vm_vcpu_add(vm, vcpuid);
- aarch64_vcpu_setup(vm, vcpuid, init);
+ aarch64_vcpu_setup(vcpu, init);
- set_reg(vm, vcpuid, ARM64_CORE_REG(sp_el1), stack_vaddr + stack_size);
- set_reg(vm, vcpuid, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code);
+ vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), stack_vaddr + stack_size);
+ vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code);
+
+ return vcpu;
}
-void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
+ void *guest_code)
{
- aarch64_vcpu_add_default(vm, vcpuid, NULL, guest_code);
+ return aarch64_vcpu_add(vm, vcpu_id, NULL, guest_code);
}
-void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
+void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
{
va_list ap;
int i;
@@ -347,8 +350,8 @@ void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
va_start(ap, num);
for (i = 0; i < num; i++) {
- set_reg(vm, vcpuid, ARM64_CORE_REG(regs.regs[i]),
- va_arg(ap, uint64_t));
+ vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.regs[i]),
+ va_arg(ap, uint64_t));
}
va_end(ap);
@@ -361,11 +364,11 @@ void kvm_exit_unexpected_exception(int vector, uint64_t ec, bool valid_ec)
;
}
-void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid)
+void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
{
struct ucall uc;
- if (get_ucall(vm, vcpuid, &uc) != UCALL_UNHANDLED)
+ if (get_ucall(vcpu, &uc) != UCALL_UNHANDLED)
return;
if (uc.args[2]) /* valid_ec */ {
@@ -383,11 +386,11 @@ struct handlers {
handler_fn exception_handlers[VECTOR_NUM][ESR_EC_NUM];
};
-void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid)
+void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu)
{
extern char vectors;
- set_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_VBAR_EL1), (uint64_t)&vectors);
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_VBAR_EL1), (uint64_t)&vectors);
}
void route_exception(struct ex_regs *regs, int vector)
@@ -469,19 +472,19 @@ void aarch64_get_supported_page_sizes(uint32_t ipa,
};
kvm_fd = open_kvm_dev_path_or_exit();
- vm_fd = ioctl(kvm_fd, KVM_CREATE_VM, ipa);
- TEST_ASSERT(vm_fd >= 0, "Can't create VM");
+ vm_fd = __kvm_ioctl(kvm_fd, KVM_CREATE_VM, (void *)(unsigned long)ipa);
+ TEST_ASSERT(vm_fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, vm_fd));
vcpu_fd = ioctl(vm_fd, KVM_CREATE_VCPU, 0);
- TEST_ASSERT(vcpu_fd >= 0, "Can't create vcpu");
+ TEST_ASSERT(vcpu_fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VCPU, vcpu_fd));
err = ioctl(vm_fd, KVM_ARM_PREFERRED_TARGET, &preferred_init);
- TEST_ASSERT(err == 0, "Can't get target");
+ TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_ARM_PREFERRED_TARGET, err));
err = ioctl(vcpu_fd, KVM_ARM_VCPU_INIT, &preferred_init);
- TEST_ASSERT(err == 0, "Can't get init vcpu");
+ TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_ARM_VCPU_INIT, err));
err = ioctl(vcpu_fd, KVM_GET_ONE_REG, &reg);
- TEST_ASSERT(err == 0, "Can't get MMFR0");
+ TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_GET_ONE_REG, vcpu_fd));
*ps4k = ((val >> 28) & 0xf) != 0xf;
*ps64k = ((val >> 24) & 0xf) == 0;
diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
index be1d9728c4ce..ed237b744690 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
@@ -5,7 +5,6 @@
* Copyright (C) 2018, Red Hat, Inc.
*/
#include "kvm_util.h"
-#include "../kvm_util_internal.h"
static vm_vaddr_t *ucall_exit_mmio_addr;
@@ -52,7 +51,7 @@ void ucall_init(struct kvm_vm *vm, void *arg)
* lower and won't match physical addresses.
*/
bits = vm->va_bits - 1;
- bits = vm->pa_bits < bits ? vm->pa_bits : bits;
+ bits = min(vm->pa_bits, bits);
end = 1ul << bits;
start = end * 5 / 8;
step = end / 16;
@@ -78,7 +77,7 @@ void ucall(uint64_t cmd, int nargs, ...)
int i;
WRITE_ONCE(uc.cmd, cmd);
- nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS;
+ nargs = min(nargs, UCALL_MAX_ARGS);
va_start(va, nargs);
for (i = 0; i < nargs; ++i)
@@ -88,9 +87,9 @@ void ucall(uint64_t cmd, int nargs, ...)
WRITE_ONCE(*ucall_exit_mmio_addr, (vm_vaddr_t)&uc);
}
-uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
+uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc)
{
- struct kvm_run *run = vcpu_state(vm, vcpu_id);
+ struct kvm_run *run = vcpu->run;
struct ucall ucall = {};
if (uc)
@@ -103,9 +102,9 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
TEST_ASSERT(run->mmio.is_write && run->mmio.len == 8,
"Unexpected ucall exit mmio address access");
memcpy(&gva, run->mmio.data, sizeof(gva));
- memcpy(&ucall, addr_gva2hva(vm, gva), sizeof(ucall));
+ memcpy(&ucall, addr_gva2hva(vcpu->vm, gva), sizeof(ucall));
- vcpu_run_complete_io(vm, vcpu_id);
+ vcpu_run_complete_io(vcpu);
if (uc)
memcpy(uc, &ucall, sizeof(ucall));
}
diff --git a/tools/testing/selftests/kvm/lib/aarch64/vgic.c b/tools/testing/selftests/kvm/lib/aarch64/vgic.c
index 5d45046c1b80..b5f28d21a947 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/vgic.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/vgic.c
@@ -9,7 +9,6 @@
#include <asm/kvm.h>
#include "kvm_util.h"
-#include "../kvm_util_internal.h"
#include "vgic.h"
#include "gic.h"
#include "gic_v3.h"
@@ -52,31 +51,30 @@ int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs,
nr_vcpus, nr_vcpus_created);
/* Distributor setup */
- if (_kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3,
- false, &gic_fd) != 0)
- return -1;
+ gic_fd = __kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3);
+ if (gic_fd < 0)
+ return gic_fd;
- kvm_device_access(gic_fd, KVM_DEV_ARM_VGIC_GRP_NR_IRQS,
- 0, &nr_irqs, true);
+ kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_NR_IRQS, 0, &nr_irqs);
- kvm_device_access(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
- KVM_DEV_ARM_VGIC_CTRL_INIT, NULL, true);
+ kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
- kvm_device_access(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_DIST, &gicd_base_gpa, true);
+ kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_DIST, &gicd_base_gpa);
nr_gic_pages = vm_calc_num_guest_pages(vm->mode, KVM_VGIC_V3_DIST_SIZE);
virt_map(vm, gicd_base_gpa, gicd_base_gpa, nr_gic_pages);
/* Redistributor setup */
redist_attr = REDIST_REGION_ATTR_ADDR(nr_vcpus, gicr_base_gpa, 0, 0);
- kvm_device_access(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &redist_attr, true);
+ kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &redist_attr);
nr_gic_pages = vm_calc_num_guest_pages(vm->mode,
KVM_VGIC_V3_REDIST_SIZE * nr_vcpus);
virt_map(vm, gicr_base_gpa, gicr_base_gpa, nr_gic_pages);
- kvm_device_access(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
- KVM_DEV_ARM_VGIC_CTRL_INIT, NULL, true);
+ kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
return gic_fd;
}
@@ -89,14 +87,14 @@ int _kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level)
uint64_t val;
int ret;
- ret = _kvm_device_access(gic_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO,
- attr, &val, false);
+ ret = __kvm_device_attr_get(gic_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO,
+ attr, &val);
if (ret != 0)
return ret;
val |= 1U << index;
- ret = _kvm_device_access(gic_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO,
- attr, &val, true);
+ ret = __kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO,
+ attr, &val);
return ret;
}
@@ -104,8 +102,7 @@ void kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level)
{
int ret = _kvm_irq_set_level_info(gic_fd, intid, level);
- TEST_ASSERT(ret == 0, "KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO failed, "
- "rc: %i errno: %i", ret, errno);
+ TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO, ret));
}
int _kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level)
@@ -127,12 +124,11 @@ void kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level)
{
int ret = _kvm_arm_irq_line(vm, intid, level);
- TEST_ASSERT(ret == 0, "KVM_IRQ_LINE failed, rc: %i errno: %i",
- ret, errno);
+ TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_IRQ_LINE, ret));
}
-static void vgic_poke_irq(int gic_fd, uint32_t intid,
- uint32_t vcpu, uint64_t reg_off)
+static void vgic_poke_irq(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu,
+ uint64_t reg_off)
{
uint64_t reg = intid / 32;
uint64_t index = intid % 32;
@@ -145,7 +141,7 @@ static void vgic_poke_irq(int gic_fd, uint32_t intid,
if (intid_is_private) {
/* TODO: only vcpu 0 implemented for now. */
- assert(vcpu == 0);
+ assert(vcpu->id == 0);
attr += SZ_64K;
}
@@ -158,17 +154,17 @@ static void vgic_poke_irq(int gic_fd, uint32_t intid,
* intid will just make the read/writes point to above the intended
* register space (i.e., ICPENDR after ISPENDR).
*/
- kvm_device_access(gic_fd, group, attr, &val, false);
+ kvm_device_attr_get(gic_fd, group, attr, &val);
val |= 1ULL << index;
- kvm_device_access(gic_fd, group, attr, &val, true);
+ kvm_device_attr_set(gic_fd, group, attr, &val);
}
-void kvm_irq_write_ispendr(int gic_fd, uint32_t intid, uint32_t vcpu)
+void kvm_irq_write_ispendr(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu)
{
vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISPENDR);
}
-void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, uint32_t vcpu)
+void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu)
{
vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISACTIVER);
}
diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c
index 13e8e3dcf984..9f54c098d9d0 100644
--- a/tools/testing/selftests/kvm/lib/elf.c
+++ b/tools/testing/selftests/kvm/lib/elf.c
@@ -11,7 +11,6 @@
#include <linux/elf.h>
#include "kvm_util.h"
-#include "kvm_util_internal.h"
static void elfhdr_get(const char *filename, Elf64_Ehdr *hdrp)
{
diff --git a/tools/testing/selftests/kvm/lib/guest_modes.c b/tools/testing/selftests/kvm/lib/guest_modes.c
index 8784013b747c..99a575bbbc52 100644
--- a/tools/testing/selftests/kvm/lib/guest_modes.c
+++ b/tools/testing/selftests/kvm/lib/guest_modes.c
@@ -65,9 +65,9 @@ void guest_modes_append_default(void)
struct kvm_s390_vm_cpu_processor info;
kvm_fd = open_kvm_dev_path_or_exit();
- vm_fd = ioctl(kvm_fd, KVM_CREATE_VM, 0);
- kvm_device_access(vm_fd, KVM_S390_VM_CPU_MODEL,
- KVM_S390_VM_CPU_PROCESSOR, &info, false);
+ vm_fd = __kvm_ioctl(kvm_fd, KVM_CREATE_VM, NULL);
+ kvm_device_attr_get(vm_fd, KVM_S390_VM_CPU_MODEL,
+ KVM_S390_VM_CPU_PROCESSOR, &info);
close(vm_fd);
close(kvm_fd);
/* Starting with z13 we have 47bits of physical address */
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 1665a220abcb..9889fe0d8919 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -8,7 +8,6 @@
#define _GNU_SOURCE /* for program_invocation_name */
#include "test_util.h"
#include "kvm_util.h"
-#include "kvm_util_internal.h"
#include "processor.h"
#include <assert.h>
@@ -27,10 +26,7 @@ int open_path_or_exit(const char *path, int flags)
int fd;
fd = open(path, flags);
- if (fd < 0) {
- print_skip("%s not available (errno: %d)", path, errno);
- exit(KSFT_SKIP);
- }
+ __TEST_REQUIRE(fd >= 0, "%s not available (errno: %d)", path, errno);
return fd;
}
@@ -70,121 +66,34 @@ int open_kvm_dev_path_or_exit(void)
* Looks up and returns the value corresponding to the capability
* (KVM_CAP_*) given by cap.
*/
-int kvm_check_cap(long cap)
+unsigned int kvm_check_cap(long cap)
{
int ret;
int kvm_fd;
kvm_fd = open_kvm_dev_path_or_exit();
- ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap);
- TEST_ASSERT(ret >= 0, "KVM_CHECK_EXTENSION IOCTL failed,\n"
- " rc: %i errno: %i", ret, errno);
+ ret = __kvm_ioctl(kvm_fd, KVM_CHECK_EXTENSION, (void *)cap);
+ TEST_ASSERT(ret >= 0, KVM_IOCTL_ERROR(KVM_CHECK_EXTENSION, ret));
close(kvm_fd);
- return ret;
-}
-
-/* VM Check Capability
- *
- * Input Args:
- * vm - Virtual Machine
- * cap - Capability
- *
- * Output Args: None
- *
- * Return:
- * On success, the Value corresponding to the capability (KVM_CAP_*)
- * specified by the value of cap. On failure a TEST_ASSERT failure
- * is produced.
- *
- * Looks up and returns the value corresponding to the capability
- * (KVM_CAP_*) given by cap.
- */
-int vm_check_cap(struct kvm_vm *vm, long cap)
-{
- int ret;
-
- ret = ioctl(vm->fd, KVM_CHECK_EXTENSION, cap);
- TEST_ASSERT(ret >= 0, "KVM_CHECK_EXTENSION VM IOCTL failed,\n"
- " rc: %i errno: %i", ret, errno);
-
- return ret;
-}
-
-/* VM Enable Capability
- *
- * Input Args:
- * vm - Virtual Machine
- * cap - Capability
- *
- * Output Args: None
- *
- * Return: On success, 0. On failure a TEST_ASSERT failure is produced.
- *
- * Enables a capability (KVM_CAP_*) on the VM.
- */
-int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap)
-{
- int ret;
-
- ret = ioctl(vm->fd, KVM_ENABLE_CAP, cap);
- TEST_ASSERT(ret == 0, "KVM_ENABLE_CAP IOCTL failed,\n"
- " rc: %i errno: %i", ret, errno);
-
- return ret;
-}
-
-/* VCPU Enable Capability
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpu_id - VCPU
- * cap - Capability
- *
- * Output Args: None
- *
- * Return: On success, 0. On failure a TEST_ASSERT failure is produced.
- *
- * Enables a capability (KVM_CAP_*) on the VCPU.
- */
-int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id,
- struct kvm_enable_cap *cap)
-{
- struct vcpu *vcpu = vcpu_find(vm, vcpu_id);
- int r;
-
- TEST_ASSERT(vcpu, "cannot find vcpu %d", vcpu_id);
-
- r = ioctl(vcpu->fd, KVM_ENABLE_CAP, cap);
- TEST_ASSERT(!r, "KVM_ENABLE_CAP vCPU ioctl failed,\n"
- " rc: %i, errno: %i", r, errno);
-
- return r;
+ return (unsigned int)ret;
}
void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size)
{
- struct kvm_enable_cap cap = { 0 };
-
- cap.cap = KVM_CAP_DIRTY_LOG_RING;
- cap.args[0] = ring_size;
- vm_enable_cap(vm, &cap);
+ vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING, ring_size);
vm->dirty_ring_size = ring_size;
}
-static void vm_open(struct kvm_vm *vm, int perm)
+static void vm_open(struct kvm_vm *vm)
{
- vm->kvm_fd = _open_kvm_dev_path_or_exit(perm);
+ vm->kvm_fd = _open_kvm_dev_path_or_exit(O_RDWR);
- if (!kvm_check_cap(KVM_CAP_IMMEDIATE_EXIT)) {
- print_skip("immediate_exit not available");
- exit(KSFT_SKIP);
- }
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_IMMEDIATE_EXIT));
- vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, vm->type);
- TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, "
- "rc: %i errno: %i", vm->fd, errno);
+ vm->fd = __kvm_ioctl(vm->kvm_fd, KVM_CREATE_VM, (void *)vm->type);
+ TEST_ASSERT(vm->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, vm->fd));
}
const char *vm_guest_mode_string(uint32_t i)
@@ -234,31 +143,12 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = {
_Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES,
"Missing new mode params?");
-/*
- * VM Create
- *
- * Input Args:
- * mode - VM Mode (e.g. VM_MODE_P52V48_4K)
- * phy_pages - Physical memory pages
- * perm - permission
- *
- * Output Args: None
- *
- * Return:
- * Pointer to opaque structure that describes the created VM.
- *
- * Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K).
- * When phy_pages is non-zero, a memory region of phy_pages physical pages
- * is created and mapped starting at guest physical address 0. The file
- * descriptor to control the created VM is created with the permissions
- * given by perm (e.g. O_RDWR).
- */
-struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
+struct kvm_vm *____vm_create(enum vm_guest_mode mode, uint64_t nr_pages)
{
struct kvm_vm *vm;
- pr_debug("%s: mode='%s' pages='%ld' perm='%d'\n", __func__,
- vm_guest_mode_string(mode), phy_pages, perm);
+ pr_debug("%s: mode='%s' pages='%ld'\n", __func__,
+ vm_guest_mode_string(mode), nr_pages);
vm = calloc(1, sizeof(*vm));
TEST_ASSERT(vm != NULL, "Insufficient Memory");
@@ -340,7 +230,7 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
vm->type = KVM_VM_TYPE_ARM_IPA_SIZE(vm->pa_bits);
#endif
- vm_open(vm, perm);
+ vm_open(vm);
/* Limit to VA-bit canonical virtual addresses. */
vm->vpages_valid = sparsebit_alloc();
@@ -355,18 +245,56 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
/* Allocate and setup memory for guest. */
vm->vpages_mapped = sparsebit_alloc();
- if (phy_pages != 0)
+ if (nr_pages != 0)
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
- 0, 0, phy_pages, 0);
+ 0, 0, nr_pages, 0);
return vm;
}
-struct kvm_vm *vm_create_without_vcpus(enum vm_guest_mode mode, uint64_t pages)
+static uint64_t vm_nr_pages_required(enum vm_guest_mode mode,
+ uint32_t nr_runnable_vcpus,
+ uint64_t extra_mem_pages)
+{
+ uint64_t nr_pages;
+
+ TEST_ASSERT(nr_runnable_vcpus,
+ "Use vm_create_barebones() for VMs that _never_ have vCPUs\n");
+
+ TEST_ASSERT(nr_runnable_vcpus <= kvm_check_cap(KVM_CAP_MAX_VCPUS),
+ "nr_vcpus = %d too large for host, max-vcpus = %d",
+ nr_runnable_vcpus, kvm_check_cap(KVM_CAP_MAX_VCPUS));
+
+ /*
+ * Arbitrarily allocate 512 pages (2mb when page size is 4kb) for the
+ * test code and other per-VM assets that will be loaded into memslot0.
+ */
+ nr_pages = 512;
+
+ /* Account for the per-vCPU stacks on behalf of the test. */
+ nr_pages += nr_runnable_vcpus * DEFAULT_STACK_PGS;
+
+ /*
+ * Account for the number of pages needed for the page tables. The
+ * maximum page table size for a memory region will be when the
+ * smallest page size is used. Considering each page contains x page
+ * table descriptors, the total extra size for page tables (for extra
+ * N pages) will be: N/x+N/x^2+N/x^3+... which is definitely smaller
+ * than N/x*2.
+ */
+ nr_pages += (nr_pages + extra_mem_pages) / PTES_PER_MIN_PAGE * 2;
+
+ return vm_adjust_num_guest_pages(mode, nr_pages);
+}
+
+struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus,
+ uint64_t nr_extra_pages)
{
+ uint64_t nr_pages = vm_nr_pages_required(mode, nr_runnable_vcpus,
+ nr_extra_pages);
struct kvm_vm *vm;
- vm = vm_create(mode, pages, O_RDWR);
+ vm = ____vm_create(mode, nr_pages);
kvm_vm_elf_load(vm, program_invocation_name);
@@ -382,9 +310,7 @@ struct kvm_vm *vm_create_without_vcpus(enum vm_guest_mode mode, uint64_t pages)
* Input Args:
* mode - VM Mode (e.g. VM_MODE_P52V48_4K)
* nr_vcpus - VCPU count
- * slot0_mem_pages - Slot0 physical memory size
* extra_mem_pages - Non-slot0 physical memory total size
- * num_percpu_pages - Per-cpu physical memory pages
* guest_code - Guest entry point
* vcpuids - VCPU IDs
*
@@ -393,64 +319,39 @@ struct kvm_vm *vm_create_without_vcpus(enum vm_guest_mode mode, uint64_t pages)
* Return:
* Pointer to opaque structure that describes the created VM.
*
- * Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K),
- * with customized slot0 memory size, at least 512 pages currently.
+ * Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K).
* extra_mem_pages is only used to calculate the maximum page table size,
* no real memory allocation for non-slot0 memory in this function.
*/
-struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
- uint64_t slot0_mem_pages, uint64_t extra_mem_pages,
- uint32_t num_percpu_pages, void *guest_code,
- uint32_t vcpuids[])
+struct kvm_vm *__vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
+ uint64_t extra_mem_pages,
+ void *guest_code, struct kvm_vcpu *vcpus[])
{
- uint64_t vcpu_pages, extra_pg_pages, pages;
struct kvm_vm *vm;
int i;
- /* Force slot0 memory size not small than DEFAULT_GUEST_PHY_PAGES */
- if (slot0_mem_pages < DEFAULT_GUEST_PHY_PAGES)
- slot0_mem_pages = DEFAULT_GUEST_PHY_PAGES;
-
- /* The maximum page table size for a memory region will be when the
- * smallest pages are used. Considering each page contains x page
- * table descriptors, the total extra size for page tables (for extra
- * N pages) will be: N/x+N/x^2+N/x^3+... which is definitely smaller
- * than N/x*2.
- */
- vcpu_pages = (DEFAULT_STACK_PGS + num_percpu_pages) * nr_vcpus;
- extra_pg_pages = (slot0_mem_pages + extra_mem_pages + vcpu_pages) / PTES_PER_MIN_PAGE * 2;
- pages = slot0_mem_pages + vcpu_pages + extra_pg_pages;
-
- TEST_ASSERT(nr_vcpus <= kvm_check_cap(KVM_CAP_MAX_VCPUS),
- "nr_vcpus = %d too large for host, max-vcpus = %d",
- nr_vcpus, kvm_check_cap(KVM_CAP_MAX_VCPUS));
-
- pages = vm_adjust_num_guest_pages(mode, pages);
-
- vm = vm_create_without_vcpus(mode, pages);
+ TEST_ASSERT(!nr_vcpus || vcpus, "Must provide vCPU array");
- for (i = 0; i < nr_vcpus; ++i) {
- uint32_t vcpuid = vcpuids ? vcpuids[i] : i;
+ vm = __vm_create(mode, nr_vcpus, extra_mem_pages);
- vm_vcpu_add_default(vm, vcpuid, guest_code);
- }
+ for (i = 0; i < nr_vcpus; ++i)
+ vcpus[i] = vm_vcpu_add(vm, i, guest_code);
return vm;
}
-struct kvm_vm *vm_create_default_with_vcpus(uint32_t nr_vcpus, uint64_t extra_mem_pages,
- uint32_t num_percpu_pages, void *guest_code,
- uint32_t vcpuids[])
+struct kvm_vm *__vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
+ uint64_t extra_mem_pages,
+ void *guest_code)
{
- return vm_create_with_vcpus(VM_MODE_DEFAULT, nr_vcpus, DEFAULT_GUEST_PHY_PAGES,
- extra_mem_pages, num_percpu_pages, guest_code, vcpuids);
-}
+ struct kvm_vcpu *vcpus[1];
+ struct kvm_vm *vm;
-struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
- void *guest_code)
-{
- return vm_create_default_with_vcpus(1, extra_mem_pages, 0, guest_code,
- (uint32_t []){ vcpuid });
+ vm = __vm_create_with_vcpus(VM_MODE_DEFAULT, 1, extra_mem_pages,
+ guest_code, vcpus);
+
+ *vcpu = vcpus[0];
+ return vm;
}
/*
@@ -458,7 +359,6 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
*
* Input Args:
* vm - VM that has been released before
- * perm - permission
*
* Output Args: None
*
@@ -466,12 +366,12 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
* global state, such as the irqchip and the memory regions that are mapped
* into the guest.
*/
-void kvm_vm_restart(struct kvm_vm *vmp, int perm)
+void kvm_vm_restart(struct kvm_vm *vmp)
{
int ctr;
struct userspace_mem_region *region;
- vm_open(vmp, perm);
+ vm_open(vmp);
if (vmp->has_irqchip)
vm_create_irqchip(vmp);
@@ -488,34 +388,17 @@ void kvm_vm_restart(struct kvm_vm *vmp, int perm)
}
}
-void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log)
+__weak struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm,
+ uint32_t vcpu_id)
{
- struct kvm_dirty_log args = { .dirty_bitmap = log, .slot = slot };
- int ret;
-
- ret = ioctl(vm->fd, KVM_GET_DIRTY_LOG, &args);
- TEST_ASSERT(ret == 0, "%s: KVM_GET_DIRTY_LOG failed: %s",
- __func__, strerror(-ret));
+ return __vm_vcpu_add(vm, vcpu_id);
}
-void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
- uint64_t first_page, uint32_t num_pages)
+struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm)
{
- struct kvm_clear_dirty_log args = {
- .dirty_bitmap = log, .slot = slot,
- .first_page = first_page,
- .num_pages = num_pages
- };
- int ret;
-
- ret = ioctl(vm->fd, KVM_CLEAR_DIRTY_LOG, &args);
- TEST_ASSERT(ret == 0, "%s: KVM_CLEAR_DIRTY_LOG failed: %s",
- __func__, strerror(-ret));
-}
+ kvm_vm_restart(vm);
-uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm)
-{
- return ioctl(vm->fd, KVM_RESET_DIRTY_RINGS);
+ return vm_vcpu_recreate(vm, 0);
}
/*
@@ -589,32 +472,9 @@ kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
return &region->region;
}
-/*
- * VCPU Find
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- *
- * Output Args: None
- *
- * Return:
- * Pointer to VCPU structure
- *
- * Locates a vcpu structure that describes the VCPU specified by vcpuid and
- * returns a pointer to it. Returns NULL if the VM doesn't contain a VCPU
- * for the specified vcpuid.
- */
-struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid)
+__weak void vcpu_arch_free(struct kvm_vcpu *vcpu)
{
- struct vcpu *vcpu;
-
- list_for_each_entry(vcpu, &vm->vcpus, list) {
- if (vcpu->id == vcpuid)
- return vcpu;
- }
- return NULL;
}
/*
@@ -629,43 +489,41 @@ struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid)
*
* Removes a vCPU from a VM and frees its resources.
*/
-static void vm_vcpu_rm(struct kvm_vm *vm, struct vcpu *vcpu)
+static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
{
int ret;
if (vcpu->dirty_gfns) {
ret = munmap(vcpu->dirty_gfns, vm->dirty_ring_size);
- TEST_ASSERT(ret == 0, "munmap of VCPU dirty ring failed, "
- "rc: %i errno: %i", ret, errno);
+ TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
vcpu->dirty_gfns = NULL;
}
- ret = munmap(vcpu->state, vcpu_mmap_sz());
- TEST_ASSERT(ret == 0, "munmap of VCPU fd failed, rc: %i "
- "errno: %i", ret, errno);
+ ret = munmap(vcpu->run, vcpu_mmap_sz());
+ TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
+
ret = close(vcpu->fd);
- TEST_ASSERT(ret == 0, "Close of VCPU fd failed, rc: %i "
- "errno: %i", ret, errno);
+ TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret));
list_del(&vcpu->list);
+
+ vcpu_arch_free(vcpu);
free(vcpu);
}
void kvm_vm_release(struct kvm_vm *vmp)
{
- struct vcpu *vcpu, *tmp;
+ struct kvm_vcpu *vcpu, *tmp;
int ret;
list_for_each_entry_safe(vcpu, tmp, &vmp->vcpus, list)
vm_vcpu_rm(vmp, vcpu);
ret = close(vmp->fd);
- TEST_ASSERT(ret == 0, "Close of vm fd failed,\n"
- " vmp->fd: %i rc: %i errno: %i", vmp->fd, ret, errno);
+ TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret));
ret = close(vmp->kvm_fd);
- TEST_ASSERT(ret == 0, "Close of /dev/kvm fd failed,\n"
- " vmp->kvm_fd: %i rc: %i errno: %i", vmp->kvm_fd, ret, errno);
+ TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret));
}
static void __vm_mem_region_delete(struct kvm_vm *vm,
@@ -681,13 +539,11 @@ static void __vm_mem_region_delete(struct kvm_vm *vm,
}
region->region.memory_size = 0;
- ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
- TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed, "
- "rc: %i errno: %i", ret, errno);
+ vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, &region->region);
sparsebit_free(&region->unused_phy_pages);
ret = munmap(region->mmap_start, region->mmap_size);
- TEST_ASSERT(ret == 0, "munmap failed, rc: %i errno: %i", ret, errno);
+ TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
free(region);
}
@@ -704,6 +560,12 @@ void kvm_vm_free(struct kvm_vm *vmp)
if (vmp == NULL)
return;
+ /* Free cached stats metadata and close FD */
+ if (vmp->stats_fd) {
+ free(vmp->stats_desc);
+ close(vmp->stats_fd);
+ }
+
/* Free userspace_mem_regions. */
hash_for_each_safe(vmp->regions.slot_hash, ctr, node, region, slot_node)
__vm_mem_region_delete(vmp, region, false);
@@ -727,14 +589,13 @@ int kvm_memfd_alloc(size_t size, bool hugepages)
memfd_flags |= MFD_HUGETLB;
fd = memfd_create("kvm_selftest", memfd_flags);
- TEST_ASSERT(fd != -1, "memfd_create() failed, errno: %i (%s)",
- errno, strerror(errno));
+ TEST_ASSERT(fd != -1, __KVM_SYSCALL_ERROR("memfd_create()", fd));
r = ftruncate(fd, size);
- TEST_ASSERT(!r, "ftruncate() failed, errno: %i (%s)", errno, strerror(errno));
+ TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("ftruncate()", r));
r = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, size);
- TEST_ASSERT(!r, "fallocate() failed, errno: %i (%s)", errno, strerror(errno));
+ TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r));
return fd;
}
@@ -1000,8 +861,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
vm_mem_backing_src_alias(src_type)->flag,
region->fd, 0);
TEST_ASSERT(region->mmap_start != MAP_FAILED,
- "test_malloc failed, mmap_start: %p errno: %i",
- region->mmap_start, errno);
+ __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED));
TEST_ASSERT(!is_backing_src_hugetlb(src_type) ||
region->mmap_start == align_ptr_up(region->mmap_start, backing_src_pagesz),
@@ -1029,7 +889,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
region->region.guest_phys_addr = guest_paddr;
region->region.memory_size = npages * vm->page_size;
region->region.userspace_addr = (uintptr_t) region->host_mem;
- ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
+ ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, &region->region);
TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
" rc: %i errno: %i\n"
" slot: %u flags: 0x%x\n"
@@ -1049,7 +909,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
vm_mem_backing_src_alias(src_type)->flag,
region->fd, 0);
TEST_ASSERT(region->mmap_alias != MAP_FAILED,
- "mmap of alias failed, errno: %i", errno);
+ __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED));
/* Align host alias address */
region->host_alias = align_ptr_up(region->mmap_alias, alignment);
@@ -1112,7 +972,7 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags)
region->region.flags = flags;
- ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
+ ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, &region->region);
TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
" rc: %i errno: %i slot: %u flags: 0x%x",
@@ -1142,7 +1002,7 @@ void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa)
region->region.guest_phys_addr = new_gpa;
- ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
+ ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, &region->region);
TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed\n"
"ret: %i errno: %i slot: %u new_gpa: 0x%lx",
@@ -1167,19 +1027,7 @@ void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot)
__vm_mem_region_delete(vm, memslot2region(vm, slot), true);
}
-/*
- * VCPU mmap Size
- *
- * Input Args: None
- *
- * Output Args: None
- *
- * Return:
- * Size of VCPU state
- *
- * Returns the size of the structure pointed to by the return value
- * of vcpu_state().
- */
+/* Returns the size of a vCPU's kvm_run structure. */
static int vcpu_mmap_sz(void)
{
int dev_fd, ret;
@@ -1188,59 +1036,57 @@ static int vcpu_mmap_sz(void)
ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL);
TEST_ASSERT(ret >= sizeof(struct kvm_run),
- "%s KVM_GET_VCPU_MMAP_SIZE ioctl failed, rc: %i errno: %i",
- __func__, ret, errno);
+ KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, ret));
close(dev_fd);
return ret;
}
+static bool vcpu_exists(struct kvm_vm *vm, uint32_t vcpu_id)
+{
+ struct kvm_vcpu *vcpu;
+
+ list_for_each_entry(vcpu, &vm->vcpus, list) {
+ if (vcpu->id == vcpu_id)
+ return true;
+ }
+
+ return false;
+}
+
/*
- * VM VCPU Add
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- *
- * Output Args: None
- *
- * Return: None
- *
- * Adds a virtual CPU to the VM specified by vm with the ID given by vcpuid.
- * No additional VCPU setup is done.
+ * Adds a virtual CPU to the VM specified by vm with the ID given by vcpu_id.
+ * No additional vCPU setup is done. Returns the vCPU.
*/
-void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid)
+struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
{
- struct vcpu *vcpu;
+ struct kvm_vcpu *vcpu;
/* Confirm a vcpu with the specified id doesn't already exist. */
- vcpu = vcpu_find(vm, vcpuid);
- if (vcpu != NULL)
- TEST_FAIL("vcpu with the specified id "
- "already exists,\n"
- " requested vcpuid: %u\n"
- " existing vcpuid: %u state: %p",
- vcpuid, vcpu->id, vcpu->state);
+ TEST_ASSERT(!vcpu_exists(vm, vcpu_id), "vCPU%d already exists\n", vcpu_id);
/* Allocate and initialize new vcpu structure. */
vcpu = calloc(1, sizeof(*vcpu));
TEST_ASSERT(vcpu != NULL, "Insufficient Memory");
- vcpu->id = vcpuid;
- vcpu->fd = ioctl(vm->fd, KVM_CREATE_VCPU, vcpuid);
- TEST_ASSERT(vcpu->fd >= 0, "KVM_CREATE_VCPU failed, rc: %i errno: %i",
- vcpu->fd, errno);
- TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->state), "vcpu mmap size "
+ vcpu->vm = vm;
+ vcpu->id = vcpu_id;
+ vcpu->fd = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)(unsigned long)vcpu_id);
+ TEST_ASSERT(vcpu->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VCPU, vcpu->fd));
+
+ TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->run), "vcpu mmap size "
"smaller than expected, vcpu_mmap_sz: %i expected_min: %zi",
- vcpu_mmap_sz(), sizeof(*vcpu->state));
- vcpu->state = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(),
+ vcpu_mmap_sz(), sizeof(*vcpu->run));
+ vcpu->run = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(),
PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0);
- TEST_ASSERT(vcpu->state != MAP_FAILED, "mmap vcpu_state failed, "
- "vcpu id: %u errno: %i", vcpuid, errno);
+ TEST_ASSERT(vcpu->run != MAP_FAILED,
+ __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED));
/* Add to linked-list of VCPUs. */
list_add(&vcpu->list, &vm->vcpus);
+
+ return vcpu;
}
/*
@@ -1336,8 +1182,6 @@ va_found:
* vm - Virtual Machine
* sz - Size in bytes
* vaddr_min - Minimum starting virtual address
- * data_memslot - Memory region slot for data pages
- * pgd_memslot - Memory region slot for new virtual translation tables
*
* Output Args: None
*
@@ -1423,7 +1267,6 @@ vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm)
* vaddr - Virtuall address to map
* paddr - VM Physical Address
* npages - The number of pages to map
- * pgd_memslot - Memory region slot for new virtual translation tables
*
* Output Args: None
*
@@ -1534,11 +1377,10 @@ vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva)
* (without failing the test) if the guest memory is not shared (so
* no alias exists).
*
- * When vm_create() and related functions are called with a shared memory
- * src_type, we also create a writable, shared alias mapping of the
- * underlying guest memory. This allows the host to manipulate guest memory
- * without mapping that memory in the guest's address space. And, for
- * userfaultfd-based demand paging, we can do so without triggering userfaults.
+ * Create a writable, shared virtual=>physical alias for the specific GPA.
+ * The primary use case is to allow the host selftest to manipulate guest
+ * memory without mapping said memory in the guest's address space. And, for
+ * userfaultfd-based demand paging, to do so without triggering userfaults.
*/
void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa)
{
@@ -1556,452 +1398,90 @@ void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa)
return (void *) ((uintptr_t) region->host_alias + offset);
}
-/*
- * VM Create IRQ Chip
- *
- * Input Args:
- * vm - Virtual Machine
- *
- * Output Args: None
- *
- * Return: None
- *
- * Creates an interrupt controller chip for the VM specified by vm.
- */
+/* Create an interrupt controller chip for the specified VM. */
void vm_create_irqchip(struct kvm_vm *vm)
{
- int ret;
-
- ret = ioctl(vm->fd, KVM_CREATE_IRQCHIP, 0);
- TEST_ASSERT(ret == 0, "KVM_CREATE_IRQCHIP IOCTL failed, "
- "rc: %i errno: %i", ret, errno);
+ vm_ioctl(vm, KVM_CREATE_IRQCHIP, NULL);
vm->has_irqchip = true;
}
-/*
- * VM VCPU State
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- *
- * Output Args: None
- *
- * Return:
- * Pointer to structure that describes the state of the VCPU.
- *
- * Locates and returns a pointer to a structure that describes the
- * state of the VCPU with the given vcpuid.
- */
-struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid)
-{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
-
- return vcpu->state;
-}
-
-/*
- * VM VCPU Run
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- *
- * Output Args: None
- *
- * Return: None
- *
- * Switch to executing the code for the VCPU given by vcpuid, within the VM
- * given by vm.
- */
-void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
-{
- int ret = _vcpu_run(vm, vcpuid);
- TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, "
- "rc: %i errno: %i", ret, errno);
-}
-
-int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
+int _vcpu_run(struct kvm_vcpu *vcpu)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
int rc;
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
do {
- rc = ioctl(vcpu->fd, KVM_RUN, NULL);
+ rc = __vcpu_run(vcpu);
} while (rc == -1 && errno == EINTR);
- assert_on_unhandled_exception(vm, vcpuid);
+ assert_on_unhandled_exception(vcpu);
return rc;
}
-int vcpu_get_fd(struct kvm_vm *vm, uint32_t vcpuid)
+/*
+ * Invoke KVM_RUN on a vCPU until KVM returns something other than -EINTR.
+ * Assert if the KVM returns an error (other than -EINTR).
+ */
+void vcpu_run(struct kvm_vcpu *vcpu)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ int ret = _vcpu_run(vcpu);
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
-
- return vcpu->fd;
+ TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_RUN, ret));
}
-void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid)
+void vcpu_run_complete_io(struct kvm_vcpu *vcpu)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
int ret;
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
-
- vcpu->state->immediate_exit = 1;
- ret = ioctl(vcpu->fd, KVM_RUN, NULL);
- vcpu->state->immediate_exit = 0;
+ vcpu->run->immediate_exit = 1;
+ ret = __vcpu_run(vcpu);
+ vcpu->run->immediate_exit = 0;
TEST_ASSERT(ret == -1 && errno == EINTR,
"KVM_RUN IOCTL didn't exit immediately, rc: %i, errno: %i",
ret, errno);
}
-void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_guest_debug *debug)
-{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- int ret = ioctl(vcpu->fd, KVM_SET_GUEST_DEBUG, debug);
-
- TEST_ASSERT(ret == 0, "KVM_SET_GUEST_DEBUG failed: %d", ret);
-}
-
-/*
- * VM VCPU Set MP State
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- * mp_state - mp_state to be set
- *
- * Output Args: None
- *
- * Return: None
- *
- * Sets the MP state of the VCPU given by vcpuid, to the state given
- * by mp_state.
- */
-void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_mp_state *mp_state)
-{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- int ret;
-
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
-
- ret = ioctl(vcpu->fd, KVM_SET_MP_STATE, mp_state);
- TEST_ASSERT(ret == 0, "KVM_SET_MP_STATE IOCTL failed, "
- "rc: %i errno: %i", ret, errno);
-}
-
/*
- * VM VCPU Get Reg List
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- *
- * Output Args:
- * None
- *
- * Return:
- * A pointer to an allocated struct kvm_reg_list
- *
* Get the list of guest registers which are supported for
- * KVM_GET_ONE_REG/KVM_SET_ONE_REG calls
+ * KVM_GET_ONE_REG/KVM_SET_ONE_REG ioctls. Returns a kvm_reg_list pointer,
+ * it is the caller's responsibility to free the list.
*/
-struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vm *vm, uint32_t vcpuid)
+struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vcpu *vcpu)
{
struct kvm_reg_list reg_list_n = { .n = 0 }, *reg_list;
int ret;
- ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_REG_LIST, &reg_list_n);
+ ret = __vcpu_ioctl(vcpu, KVM_GET_REG_LIST, &reg_list_n);
TEST_ASSERT(ret == -1 && errno == E2BIG, "KVM_GET_REG_LIST n=0");
+
reg_list = calloc(1, sizeof(*reg_list) + reg_list_n.n * sizeof(__u64));
reg_list->n = reg_list_n.n;
- vcpu_ioctl(vm, vcpuid, KVM_GET_REG_LIST, reg_list);
+ vcpu_ioctl(vcpu, KVM_GET_REG_LIST, reg_list);
return reg_list;
}
-/*
- * VM VCPU Regs Get
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- *
- * Output Args:
- * regs - current state of VCPU regs
- *
- * Return: None
- *
- * Obtains the current register state for the VCPU specified by vcpuid
- * and stores it at the location given by regs.
- */
-void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs)
-{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- int ret;
-
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
-
- ret = ioctl(vcpu->fd, KVM_GET_REGS, regs);
- TEST_ASSERT(ret == 0, "KVM_GET_REGS failed, rc: %i errno: %i",
- ret, errno);
-}
-
-/*
- * VM VCPU Regs Set
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- * regs - Values to set VCPU regs to
- *
- * Output Args: None
- *
- * Return: None
- *
- * Sets the regs of the VCPU specified by vcpuid to the values
- * given by regs.
- */
-void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs)
+void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- int ret;
-
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
-
- ret = ioctl(vcpu->fd, KVM_SET_REGS, regs);
- TEST_ASSERT(ret == 0, "KVM_SET_REGS failed, rc: %i errno: %i",
- ret, errno);
-}
-
-#ifdef __KVM_HAVE_VCPU_EVENTS
-void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_vcpu_events *events)
-{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- int ret;
-
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
-
- ret = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, events);
- TEST_ASSERT(ret == 0, "KVM_GET_VCPU_EVENTS, failed, rc: %i errno: %i",
- ret, errno);
-}
-
-void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_vcpu_events *events)
-{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- int ret;
-
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
-
- ret = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, events);
- TEST_ASSERT(ret == 0, "KVM_SET_VCPU_EVENTS, failed, rc: %i errno: %i",
- ret, errno);
-}
-#endif
-
-#ifdef __x86_64__
-void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_nested_state *state)
-{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- int ret;
-
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
-
- ret = ioctl(vcpu->fd, KVM_GET_NESTED_STATE, state);
- TEST_ASSERT(ret == 0,
- "KVM_SET_NESTED_STATE failed, ret: %i errno: %i",
- ret, errno);
-}
-
-int vcpu_nested_state_set(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_nested_state *state, bool ignore_error)
-{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- int ret;
-
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
-
- ret = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, state);
- if (!ignore_error) {
- TEST_ASSERT(ret == 0,
- "KVM_SET_NESTED_STATE failed, ret: %i errno: %i",
- ret, errno);
- }
-
- return ret;
-}
-#endif
-
-/*
- * VM VCPU System Regs Get
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- *
- * Output Args:
- * sregs - current state of VCPU system regs
- *
- * Return: None
- *
- * Obtains the current system register state for the VCPU specified by
- * vcpuid and stores it at the location given by sregs.
- */
-void vcpu_sregs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
-{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- int ret;
-
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
-
- ret = ioctl(vcpu->fd, KVM_GET_SREGS, sregs);
- TEST_ASSERT(ret == 0, "KVM_GET_SREGS failed, rc: %i errno: %i",
- ret, errno);
-}
-
-/*
- * VM VCPU System Regs Set
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- * sregs - Values to set VCPU system regs to
- *
- * Output Args: None
- *
- * Return: None
- *
- * Sets the system regs of the VCPU specified by vcpuid to the values
- * given by sregs.
- */
-void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
-{
- int ret = _vcpu_sregs_set(vm, vcpuid, sregs);
- TEST_ASSERT(ret == 0, "KVM_SET_SREGS IOCTL failed, "
- "rc: %i errno: %i", ret, errno);
-}
-
-int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
-{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
-
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
-
- return ioctl(vcpu->fd, KVM_SET_SREGS, sregs);
-}
-
-void vcpu_fpu_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_fpu *fpu)
-{
- int ret;
-
- ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_FPU, fpu);
- TEST_ASSERT(ret == 0, "KVM_GET_FPU failed, rc: %i errno: %i (%s)",
- ret, errno, strerror(errno));
-}
-
-void vcpu_fpu_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_fpu *fpu)
-{
- int ret;
-
- ret = _vcpu_ioctl(vm, vcpuid, KVM_SET_FPU, fpu);
- TEST_ASSERT(ret == 0, "KVM_SET_FPU failed, rc: %i errno: %i (%s)",
- ret, errno, strerror(errno));
-}
-
-void vcpu_get_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg)
-{
- int ret;
-
- ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_ONE_REG, reg);
- TEST_ASSERT(ret == 0, "KVM_GET_ONE_REG failed, rc: %i errno: %i (%s)",
- ret, errno, strerror(errno));
-}
-
-void vcpu_set_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg)
-{
- int ret;
-
- ret = _vcpu_ioctl(vm, vcpuid, KVM_SET_ONE_REG, reg);
- TEST_ASSERT(ret == 0, "KVM_SET_ONE_REG failed, rc: %i errno: %i (%s)",
- ret, errno, strerror(errno));
-}
-
-/*
- * VCPU Ioctl
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- * cmd - Ioctl number
- * arg - Argument to pass to the ioctl
- *
- * Return: None
- *
- * Issues an arbitrary ioctl on a VCPU fd.
- */
-void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid,
- unsigned long cmd, void *arg)
-{
- int ret;
-
- ret = _vcpu_ioctl(vm, vcpuid, cmd, arg);
- TEST_ASSERT(ret == 0, "vcpu ioctl %lu failed, rc: %i errno: %i (%s)",
- cmd, ret, errno, strerror(errno));
-}
-
-int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid,
- unsigned long cmd, void *arg)
-{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- int ret;
-
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
-
- ret = ioctl(vcpu->fd, cmd, arg);
-
- return ret;
-}
-
-void *vcpu_map_dirty_ring(struct kvm_vm *vm, uint32_t vcpuid)
-{
- struct vcpu *vcpu;
- uint32_t size = vm->dirty_ring_size;
+ uint32_t page_size = vcpu->vm->page_size;
+ uint32_t size = vcpu->vm->dirty_ring_size;
TEST_ASSERT(size > 0, "Should enable dirty ring first");
- vcpu = vcpu_find(vm, vcpuid);
-
- TEST_ASSERT(vcpu, "Cannot find vcpu %u", vcpuid);
-
if (!vcpu->dirty_gfns) {
void *addr;
- addr = mmap(NULL, size, PROT_READ,
- MAP_PRIVATE, vcpu->fd,
- vm->page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
+ addr = mmap(NULL, size, PROT_READ, MAP_PRIVATE, vcpu->fd,
+ page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped private");
- addr = mmap(NULL, size, PROT_READ | PROT_EXEC,
- MAP_PRIVATE, vcpu->fd,
- vm->page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
+ addr = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_PRIVATE, vcpu->fd,
+ page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped exec");
- addr = mmap(NULL, size, PROT_READ | PROT_WRITE,
- MAP_SHARED, vcpu->fd,
- vm->page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
+ addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd,
+ page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
TEST_ASSERT(addr != MAP_FAILED, "Dirty ring map failed");
vcpu->dirty_gfns = addr;
@@ -2012,62 +1492,10 @@ void *vcpu_map_dirty_ring(struct kvm_vm *vm, uint32_t vcpuid)
}
/*
- * VM Ioctl
- *
- * Input Args:
- * vm - Virtual Machine
- * cmd - Ioctl number
- * arg - Argument to pass to the ioctl
- *
- * Return: None
- *
- * Issues an arbitrary ioctl on a VM fd.
- */
-void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
-{
- int ret;
-
- ret = _vm_ioctl(vm, cmd, arg);
- TEST_ASSERT(ret == 0, "vm ioctl %lu failed, rc: %i errno: %i (%s)",
- cmd, ret, errno, strerror(errno));
-}
-
-int _vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
-{
- return ioctl(vm->fd, cmd, arg);
-}
-
-/*
- * KVM system ioctl
- *
- * Input Args:
- * vm - Virtual Machine
- * cmd - Ioctl number
- * arg - Argument to pass to the ioctl
- *
- * Return: None
- *
- * Issues an arbitrary ioctl on a KVM fd.
- */
-void kvm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
-{
- int ret;
-
- ret = ioctl(vm->kvm_fd, cmd, arg);
- TEST_ASSERT(ret == 0, "KVM ioctl %lu failed, rc: %i errno: %i (%s)",
- cmd, ret, errno, strerror(errno));
-}
-
-int _kvm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
-{
- return ioctl(vm->kvm_fd, cmd, arg);
-}
-
-/*
* Device Ioctl
*/
-int _kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr)
+int __kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr)
{
struct kvm_device_attr attribute = {
.group = group,
@@ -2078,43 +1506,31 @@ int _kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr)
return ioctl(dev_fd, KVM_HAS_DEVICE_ATTR, &attribute);
}
-int kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr)
-{
- int ret = _kvm_device_check_attr(dev_fd, group, attr);
-
- TEST_ASSERT(!ret, "KVM_HAS_DEVICE_ATTR failed, rc: %i errno: %i", ret, errno);
- return ret;
-}
-
-int _kvm_create_device(struct kvm_vm *vm, uint64_t type, bool test, int *fd)
+int __kvm_test_create_device(struct kvm_vm *vm, uint64_t type)
{
- struct kvm_create_device create_dev;
- int ret;
+ struct kvm_create_device create_dev = {
+ .type = type,
+ .flags = KVM_CREATE_DEVICE_TEST,
+ };
- create_dev.type = type;
- create_dev.fd = -1;
- create_dev.flags = test ? KVM_CREATE_DEVICE_TEST : 0;
- ret = ioctl(vm_get_fd(vm), KVM_CREATE_DEVICE, &create_dev);
- *fd = create_dev.fd;
- return ret;
+ return __vm_ioctl(vm, KVM_CREATE_DEVICE, &create_dev);
}
-int kvm_create_device(struct kvm_vm *vm, uint64_t type, bool test)
+int __kvm_create_device(struct kvm_vm *vm, uint64_t type)
{
- int fd, ret;
-
- ret = _kvm_create_device(vm, type, test, &fd);
+ struct kvm_create_device create_dev = {
+ .type = type,
+ .fd = -1,
+ .flags = 0,
+ };
+ int err;
- if (!test) {
- TEST_ASSERT(!ret,
- "KVM_CREATE_DEVICE IOCTL failed, rc: %i errno: %i", ret, errno);
- return fd;
- }
- return ret;
+ err = __vm_ioctl(vm, KVM_CREATE_DEVICE, &create_dev);
+ TEST_ASSERT(err <= 0, "KVM_CREATE_DEVICE shouldn't return a positive value");
+ return err ? : create_dev.fd;
}
-int _kvm_device_access(int dev_fd, uint32_t group, uint64_t attr,
- void *val, bool write)
+int __kvm_device_attr_get(int dev_fd, uint32_t group, uint64_t attr, void *val)
{
struct kvm_device_attr kvmattr = {
.group = group,
@@ -2122,58 +1538,20 @@ int _kvm_device_access(int dev_fd, uint32_t group, uint64_t attr,
.flags = 0,
.addr = (uintptr_t)val,
};
- int ret;
-
- ret = ioctl(dev_fd, write ? KVM_SET_DEVICE_ATTR : KVM_GET_DEVICE_ATTR,
- &kvmattr);
- return ret;
-}
-
-int kvm_device_access(int dev_fd, uint32_t group, uint64_t attr,
- void *val, bool write)
-{
- int ret = _kvm_device_access(dev_fd, group, attr, val, write);
-
- TEST_ASSERT(!ret, "KVM_SET|GET_DEVICE_ATTR IOCTL failed, rc: %i errno: %i", ret, errno);
- return ret;
-}
-
-int _vcpu_has_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
- uint64_t attr)
-{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
-
- TEST_ASSERT(vcpu, "nonexistent vcpu id: %d", vcpuid);
-
- return _kvm_device_check_attr(vcpu->fd, group, attr);
-}
-
-int vcpu_has_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
- uint64_t attr)
-{
- int ret = _vcpu_has_device_attr(vm, vcpuid, group, attr);
-
- TEST_ASSERT(!ret, "KVM_HAS_DEVICE_ATTR IOCTL failed, rc: %i errno: %i", ret, errno);
- return ret;
-}
-int _vcpu_access_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
- uint64_t attr, void *val, bool write)
-{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
-
- TEST_ASSERT(vcpu, "nonexistent vcpu id: %d", vcpuid);
-
- return _kvm_device_access(vcpu->fd, group, attr, val, write);
+ return __kvm_ioctl(dev_fd, KVM_GET_DEVICE_ATTR, &kvmattr);
}
-int vcpu_access_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
- uint64_t attr, void *val, bool write)
+int __kvm_device_attr_set(int dev_fd, uint32_t group, uint64_t attr, void *val)
{
- int ret = _vcpu_access_device_attr(vm, vcpuid, group, attr, val, write);
+ struct kvm_device_attr kvmattr = {
+ .group = group,
+ .attr = attr,
+ .flags = 0,
+ .addr = (uintptr_t)val,
+ };
- TEST_ASSERT(!ret, "KVM_SET|GET_DEVICE_ATTR IOCTL failed, rc: %i errno: %i", ret, errno);
- return ret;
+ return __kvm_ioctl(dev_fd, KVM_SET_DEVICE_ATTR, &kvmattr);
}
/*
@@ -2187,14 +1565,14 @@ int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level)
.level = level,
};
- return _vm_ioctl(vm, KVM_IRQ_LINE, &irq_level);
+ return __vm_ioctl(vm, KVM_IRQ_LINE, &irq_level);
}
void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level)
{
int ret = _kvm_irq_line(vm, irq, level);
- TEST_ASSERT(ret >= 0, "KVM_IRQ_LINE failed, rc: %i errno: %i", ret, errno);
+ TEST_ASSERT(ret >= 0, KVM_IOCTL_ERROR(KVM_IRQ_LINE, ret));
}
struct kvm_irq_routing *kvm_gsi_routing_create(void)
@@ -2233,7 +1611,7 @@ int _kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing)
int ret;
assert(routing);
- ret = ioctl(vm_get_fd(vm), KVM_SET_GSI_ROUTING, routing);
+ ret = __vm_ioctl(vm, KVM_SET_GSI_ROUTING, routing);
free(routing);
return ret;
@@ -2244,8 +1622,7 @@ void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing)
int ret;
ret = _kvm_gsi_routing_write(vm, routing);
- TEST_ASSERT(ret == 0, "KVM_SET_GSI_ROUTING failed, rc: %i errno: %i",
- ret, errno);
+ TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_SET_GSI_ROUTING, ret));
}
/*
@@ -2267,7 +1644,7 @@ void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
{
int ctr;
struct userspace_mem_region *region;
- struct vcpu *vcpu;
+ struct kvm_vcpu *vcpu;
fprintf(stream, "%*smode: 0x%x\n", indent, "", vm->mode);
fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd);
@@ -2292,8 +1669,9 @@ void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
virt_dump(stream, vm, indent + 4);
}
fprintf(stream, "%*sVCPUs:\n", indent, "");
+
list_for_each_entry(vcpu, &vm->vcpus, list)
- vcpu_dump(stream, vm, vcpu->id, indent + 2);
+ vcpu_dump(stream, vcpu, indent + 2);
}
/* Known KVM exit reasons */
@@ -2447,64 +1825,11 @@ void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva)
return addr_gpa2hva(vm, addr_gva2gpa(vm, gva));
}
-/*
- * Is Unrestricted Guest
- *
- * Input Args:
- * vm - Virtual Machine
- *
- * Output Args: None
- *
- * Return: True if the unrestricted guest is set to 'Y', otherwise return false.
- *
- * Check if the unrestricted guest flag is enabled.
- */
-bool vm_is_unrestricted_guest(struct kvm_vm *vm)
-{
- char val = 'N';
- size_t count;
- FILE *f;
-
- if (vm == NULL) {
- /* Ensure that the KVM vendor-specific module is loaded. */
- close(open_kvm_dev_path_or_exit());
- }
-
- f = fopen("/sys/module/kvm_intel/parameters/unrestricted_guest", "r");
- if (f) {
- count = fread(&val, sizeof(char), 1, f);
- TEST_ASSERT(count == 1, "Unable to read from param file.");
- fclose(f);
- }
-
- return val == 'Y';
-}
-
-unsigned int vm_get_page_size(struct kvm_vm *vm)
-{
- return vm->page_size;
-}
-
-unsigned int vm_get_page_shift(struct kvm_vm *vm)
-{
- return vm->page_shift;
-}
-
-unsigned long __attribute__((weak)) vm_compute_max_gfn(struct kvm_vm *vm)
+unsigned long __weak vm_compute_max_gfn(struct kvm_vm *vm)
{
return ((1ULL << vm->pa_bits) >> vm->page_shift) - 1;
}
-uint64_t vm_get_max_gfn(struct kvm_vm *vm)
-{
- return vm->max_gfn;
-}
-
-int vm_get_fd(struct kvm_vm *vm)
-{
- return vm->fd;
-}
-
static unsigned int vm_calc_num_pages(unsigned int num_pages,
unsigned int page_shift,
unsigned int new_page_shift,
@@ -2545,14 +1870,112 @@ unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size)
return vm_adjust_num_guest_pages(mode, n);
}
-int vm_get_stats_fd(struct kvm_vm *vm)
+/*
+ * Read binary stats descriptors
+ *
+ * Input Args:
+ * stats_fd - the file descriptor for the binary stats file from which to read
+ * header - the binary stats metadata header corresponding to the given FD
+ *
+ * Output Args: None
+ *
+ * Return:
+ * A pointer to a newly allocated series of stat descriptors.
+ * Caller is responsible for freeing the returned kvm_stats_desc.
+ *
+ * Read the stats descriptors from the binary stats interface.
+ */
+struct kvm_stats_desc *read_stats_descriptors(int stats_fd,
+ struct kvm_stats_header *header)
{
- return ioctl(vm->fd, KVM_GET_STATS_FD, NULL);
+ struct kvm_stats_desc *stats_desc;
+ ssize_t desc_size, total_size, ret;
+
+ desc_size = get_stats_descriptor_size(header);
+ total_size = header->num_desc * desc_size;
+
+ stats_desc = calloc(header->num_desc, desc_size);
+ TEST_ASSERT(stats_desc, "Allocate memory for stats descriptors");
+
+ ret = pread(stats_fd, stats_desc, total_size, header->desc_offset);
+ TEST_ASSERT(ret == total_size, "Read KVM stats descriptors");
+
+ return stats_desc;
}
-int vcpu_get_stats_fd(struct kvm_vm *vm, uint32_t vcpuid)
+/*
+ * Read stat data for a particular stat
+ *
+ * Input Args:
+ * stats_fd - the file descriptor for the binary stats file from which to read
+ * header - the binary stats metadata header corresponding to the given FD
+ * desc - the binary stat metadata for the particular stat to be read
+ * max_elements - the maximum number of 8-byte values to read into data
+ *
+ * Output Args:
+ * data - the buffer into which stat data should be read
+ *
+ * Read the data values of a specified stat from the binary stats interface.
+ */
+void read_stat_data(int stats_fd, struct kvm_stats_header *header,
+ struct kvm_stats_desc *desc, uint64_t *data,
+ size_t max_elements)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ size_t nr_elements = min_t(ssize_t, desc->size, max_elements);
+ size_t size = nr_elements * sizeof(*data);
+ ssize_t ret;
- return ioctl(vcpu->fd, KVM_GET_STATS_FD, NULL);
+ TEST_ASSERT(desc->size, "No elements in stat '%s'", desc->name);
+ TEST_ASSERT(max_elements, "Zero elements requested for stat '%s'", desc->name);
+
+ ret = pread(stats_fd, data, size,
+ header->data_offset + desc->offset);
+
+ TEST_ASSERT(ret >= 0, "pread() failed on stat '%s', errno: %i (%s)",
+ desc->name, errno, strerror(errno));
+ TEST_ASSERT(ret == size,
+ "pread() on stat '%s' read %ld bytes, wanted %lu bytes",
+ desc->name, size, ret);
+}
+
+/*
+ * Read the data of the named stat
+ *
+ * Input Args:
+ * vm - the VM for which the stat should be read
+ * stat_name - the name of the stat to read
+ * max_elements - the maximum number of 8-byte values to read into data
+ *
+ * Output Args:
+ * data - the buffer into which stat data should be read
+ *
+ * Read the data values of a specified stat from the binary stats interface.
+ */
+void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data,
+ size_t max_elements)
+{
+ struct kvm_stats_desc *desc;
+ size_t size_desc;
+ int i;
+
+ if (!vm->stats_fd) {
+ vm->stats_fd = vm_get_stats_fd(vm);
+ read_stats_header(vm->stats_fd, &vm->stats_header);
+ vm->stats_desc = read_stats_descriptors(vm->stats_fd,
+ &vm->stats_header);
+ }
+
+ size_desc = get_stats_descriptor_size(&vm->stats_header);
+
+ for (i = 0; i < vm->stats_header.num_desc; ++i) {
+ desc = (void *)vm->stats_desc + (i * size_desc);
+
+ if (strcmp(desc->name, stat_name))
+ continue;
+
+ read_stat_data(vm->stats_fd, &vm->stats_header, desc,
+ data, max_elements);
+
+ break;
+ }
}
diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
deleted file mode 100644
index a03febc24ba6..000000000000
--- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h
+++ /dev/null
@@ -1,128 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * tools/testing/selftests/kvm/lib/kvm_util_internal.h
- *
- * Copyright (C) 2018, Google LLC.
- */
-
-#ifndef SELFTEST_KVM_UTIL_INTERNAL_H
-#define SELFTEST_KVM_UTIL_INTERNAL_H
-
-#include "linux/hashtable.h"
-#include "linux/rbtree.h"
-
-#include "sparsebit.h"
-
-struct userspace_mem_region {
- struct kvm_userspace_memory_region region;
- struct sparsebit *unused_phy_pages;
- int fd;
- off_t offset;
- void *host_mem;
- void *host_alias;
- void *mmap_start;
- void *mmap_alias;
- size_t mmap_size;
- struct rb_node gpa_node;
- struct rb_node hva_node;
- struct hlist_node slot_node;
-};
-
-struct vcpu {
- struct list_head list;
- uint32_t id;
- int fd;
- struct kvm_run *state;
- struct kvm_dirty_gfn *dirty_gfns;
- uint32_t fetch_index;
- uint32_t dirty_gfns_count;
-};
-
-struct userspace_mem_regions {
- struct rb_root gpa_tree;
- struct rb_root hva_tree;
- DECLARE_HASHTABLE(slot_hash, 9);
-};
-
-struct kvm_vm {
- int mode;
- unsigned long type;
- int kvm_fd;
- int fd;
- unsigned int pgtable_levels;
- unsigned int page_size;
- unsigned int page_shift;
- unsigned int pa_bits;
- unsigned int va_bits;
- uint64_t max_gfn;
- struct list_head vcpus;
- struct userspace_mem_regions regions;
- struct sparsebit *vpages_valid;
- struct sparsebit *vpages_mapped;
- bool has_irqchip;
- bool pgd_created;
- vm_paddr_t pgd;
- vm_vaddr_t gdt;
- vm_vaddr_t tss;
- vm_vaddr_t idt;
- vm_vaddr_t handlers;
- uint32_t dirty_ring_size;
-};
-
-struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid);
-
-/*
- * Virtual Translation Tables Dump
- *
- * Input Args:
- * stream - Output FILE stream
- * vm - Virtual Machine
- * indent - Left margin indent amount
- *
- * Output Args: None
- *
- * Return: None
- *
- * Dumps to the FILE stream given by @stream, the contents of all the
- * virtual translation tables for the VM given by @vm.
- */
-void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
-
-/*
- * Register Dump
- *
- * Input Args:
- * stream - Output FILE stream
- * regs - Registers
- * indent - Left margin indent amount
- *
- * Output Args: None
- *
- * Return: None
- *
- * Dumps the state of the registers given by @regs, to the FILE stream
- * given by @stream.
- */
-void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent);
-
-/*
- * System Register Dump
- *
- * Input Args:
- * stream - Output FILE stream
- * sregs - System registers
- * indent - Left margin indent amount
- *
- * Output Args: None
- *
- * Return: None
- *
- * Dumps the state of the system registers given by @sregs, to the FILE stream
- * given by @stream.
- */
-void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent);
-
-struct userspace_mem_region *
-memslot2region(struct kvm_vm *vm, uint32_t memslot);
-
-#endif /* SELFTEST_KVM_UTIL_INTERNAL_H */
diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c
index f989ff91f022..9618b37c66f7 100644
--- a/tools/testing/selftests/kvm/lib/perf_test_util.c
+++ b/tools/testing/selftests/kvm/lib/perf_test_util.c
@@ -17,8 +17,8 @@ struct perf_test_args perf_test_args;
static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
struct vcpu_thread {
- /* The id of the vCPU. */
- int vcpu_id;
+ /* The index of the vCPU. */
+ int vcpu_idx;
/* The pthread backing the vCPU. */
pthread_t thread;
@@ -36,24 +36,26 @@ static void (*vcpu_thread_fn)(struct perf_test_vcpu_args *);
/* Set to true once all vCPU threads are up and running. */
static bool all_vcpu_threads_running;
+static struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
+
/*
* Continuously write to the first 8 bytes of each page in the
* specified region.
*/
-void perf_test_guest_code(uint32_t vcpu_id)
+void perf_test_guest_code(uint32_t vcpu_idx)
{
struct perf_test_args *pta = &perf_test_args;
- struct perf_test_vcpu_args *vcpu_args = &pta->vcpu_args[vcpu_id];
+ struct perf_test_vcpu_args *vcpu_args = &pta->vcpu_args[vcpu_idx];
uint64_t gva;
uint64_t pages;
int i;
- /* Make sure vCPU args data structure is not corrupt. */
- GUEST_ASSERT(vcpu_args->vcpu_id == vcpu_id);
-
gva = vcpu_args->gva;
pages = vcpu_args->pages;
+ /* Make sure vCPU args data structure is not corrupt. */
+ GUEST_ASSERT(vcpu_args->vcpu_idx == vcpu_idx);
+
while (true) {
for (i = 0; i < pages; i++) {
uint64_t addr = gva + (i * pta->guest_page_size);
@@ -68,47 +70,50 @@ void perf_test_guest_code(uint32_t vcpu_id)
}
}
-void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus,
+void perf_test_setup_vcpus(struct kvm_vm *vm, int nr_vcpus,
+ struct kvm_vcpu *vcpus[],
uint64_t vcpu_memory_bytes,
bool partition_vcpu_memory_access)
{
struct perf_test_args *pta = &perf_test_args;
struct perf_test_vcpu_args *vcpu_args;
- int vcpu_id;
+ int i;
+
+ for (i = 0; i < nr_vcpus; i++) {
+ vcpu_args = &pta->vcpu_args[i];
- for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
- vcpu_args = &pta->vcpu_args[vcpu_id];
+ vcpu_args->vcpu = vcpus[i];
+ vcpu_args->vcpu_idx = i;
- vcpu_args->vcpu_id = vcpu_id;
if (partition_vcpu_memory_access) {
vcpu_args->gva = guest_test_virt_mem +
- (vcpu_id * vcpu_memory_bytes);
+ (i * vcpu_memory_bytes);
vcpu_args->pages = vcpu_memory_bytes /
pta->guest_page_size;
- vcpu_args->gpa = pta->gpa + (vcpu_id * vcpu_memory_bytes);
+ vcpu_args->gpa = pta->gpa + (i * vcpu_memory_bytes);
} else {
vcpu_args->gva = guest_test_virt_mem;
- vcpu_args->pages = (vcpus * vcpu_memory_bytes) /
+ vcpu_args->pages = (nr_vcpus * vcpu_memory_bytes) /
pta->guest_page_size;
vcpu_args->gpa = pta->gpa;
}
- vcpu_args_set(vm, vcpu_id, 1, vcpu_id);
+ vcpu_args_set(vcpus[i], 1, i);
pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n",
- vcpu_id, vcpu_args->gpa, vcpu_args->gpa +
+ i, vcpu_args->gpa, vcpu_args->gpa +
(vcpu_args->pages * pta->guest_page_size));
}
}
-struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
+struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus,
uint64_t vcpu_memory_bytes, int slots,
enum vm_mem_backing_src_type backing_src,
bool partition_vcpu_memory_access)
{
struct perf_test_args *pta = &perf_test_args;
struct kvm_vm *vm;
- uint64_t guest_num_pages, slot0_pages = DEFAULT_GUEST_PHY_PAGES;
+ uint64_t guest_num_pages, slot0_pages = 0;
uint64_t backing_src_pagesz = get_backing_src_pagesz(backing_src);
uint64_t region_end_gfn;
int i;
@@ -125,7 +130,7 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
pta->guest_page_size = vm_guest_mode_params[mode].page_size;
guest_num_pages = vm_adjust_num_guest_pages(mode,
- (vcpus * vcpu_memory_bytes) / pta->guest_page_size);
+ (nr_vcpus * vcpu_memory_bytes) / pta->guest_page_size);
TEST_ASSERT(vcpu_memory_bytes % getpagesize() == 0,
"Guest memory size is not host page size aligned.");
@@ -140,20 +145,20 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
* in-memory data structures.
*/
if (pta->nested)
- slot0_pages += perf_test_nested_pages(vcpus);
+ slot0_pages += perf_test_nested_pages(nr_vcpus);
/*
* Pass guest_num_pages to populate the page tables for test memory.
* The memory is also added to memslot 0, but that's a benign side
* effect as KVM allows aliasing HVAs in meslots.
*/
- vm = vm_create_with_vcpus(mode, vcpus, slot0_pages, guest_num_pages, 0,
- perf_test_guest_code, NULL);
+ vm = __vm_create_with_vcpus(mode, nr_vcpus, slot0_pages + guest_num_pages,
+ perf_test_guest_code, vcpus);
pta->vm = vm;
/* Put the test region at the top guest physical memory. */
- region_end_gfn = vm_get_max_gfn(vm) + 1;
+ region_end_gfn = vm->max_gfn + 1;
#ifdef __x86_64__
/*
@@ -170,11 +175,10 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
TEST_ASSERT(guest_num_pages < region_end_gfn,
"Requested more guest memory than address space allows.\n"
" guest pages: %" PRIx64 " max gfn: %" PRIx64
- " vcpus: %d wss: %" PRIx64 "]\n",
- guest_num_pages, region_end_gfn - 1, vcpus,
- vcpu_memory_bytes);
+ " nr_vcpus: %d wss: %" PRIx64 "]\n",
+ guest_num_pages, region_end_gfn - 1, nr_vcpus, vcpu_memory_bytes);
- pta->gpa = (region_end_gfn - guest_num_pages) * pta->guest_page_size;
+ pta->gpa = (region_end_gfn - guest_num_pages - 1) * pta->guest_page_size;
pta->gpa = align_down(pta->gpa, backing_src_pagesz);
#ifdef __s390x__
/* Align to 1M (segment size) */
@@ -197,11 +201,12 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
/* Do mapping for the demand paging memory slot */
virt_map(vm, guest_test_virt_mem, pta->gpa, guest_num_pages);
- perf_test_setup_vcpus(vm, vcpus, vcpu_memory_bytes, partition_vcpu_memory_access);
+ perf_test_setup_vcpus(vm, nr_vcpus, vcpus, vcpu_memory_bytes,
+ partition_vcpu_memory_access);
if (pta->nested) {
pr_info("Configuring vCPUs to run in L2 (nested).\n");
- perf_test_setup_nested(vm, vcpus);
+ perf_test_setup_nested(vm, nr_vcpus, vcpus);
}
ucall_init(vm, NULL);
@@ -229,7 +234,7 @@ uint64_t __weak perf_test_nested_pages(int nr_vcpus)
return 0;
}
-void __weak perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus)
+void __weak perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu **vcpus)
{
pr_info("%s() not support on this architecture, skipping.\n", __func__);
exit(KSFT_SKIP);
@@ -250,39 +255,40 @@ static void *vcpu_thread_main(void *data)
while (!READ_ONCE(all_vcpu_threads_running))
;
- vcpu_thread_fn(&perf_test_args.vcpu_args[vcpu->vcpu_id]);
+ vcpu_thread_fn(&perf_test_args.vcpu_args[vcpu->vcpu_idx]);
return NULL;
}
-void perf_test_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct perf_test_vcpu_args *))
+void perf_test_start_vcpu_threads(int nr_vcpus,
+ void (*vcpu_fn)(struct perf_test_vcpu_args *))
{
- int vcpu_id;
+ int i;
vcpu_thread_fn = vcpu_fn;
WRITE_ONCE(all_vcpu_threads_running, false);
- for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
- struct vcpu_thread *vcpu = &vcpu_threads[vcpu_id];
+ for (i = 0; i < nr_vcpus; i++) {
+ struct vcpu_thread *vcpu = &vcpu_threads[i];
- vcpu->vcpu_id = vcpu_id;
+ vcpu->vcpu_idx = i;
WRITE_ONCE(vcpu->running, false);
pthread_create(&vcpu->thread, NULL, vcpu_thread_main, vcpu);
}
- for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
- while (!READ_ONCE(vcpu_threads[vcpu_id].running))
+ for (i = 0; i < nr_vcpus; i++) {
+ while (!READ_ONCE(vcpu_threads[i].running))
;
}
WRITE_ONCE(all_vcpu_threads_running, true);
}
-void perf_test_join_vcpu_threads(int vcpus)
+void perf_test_join_vcpu_threads(int nr_vcpus)
{
- int vcpu_id;
+ int i;
- for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++)
- pthread_join(vcpu_threads[vcpu_id].thread, NULL);
+ for (i = 0; i < nr_vcpus; i++)
+ pthread_join(vcpu_threads[i].thread, NULL);
}
diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c
index abc0ae5a4fe1..604478151212 100644
--- a/tools/testing/selftests/kvm/lib/riscv/processor.c
+++ b/tools/testing/selftests/kvm/lib/riscv/processor.c
@@ -9,7 +9,6 @@
#include <assert.h>
#include "kvm_util.h"
-#include "../kvm_util_internal.h"
#include "processor.h"
#define DEFAULT_RISCV_GUEST_STACK_VADDR_MIN 0xac0000
@@ -54,7 +53,7 @@ static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level)
return (gva & pte_index_mask[level]) >> pte_index_shift[level];
}
-void virt_pgd_alloc(struct kvm_vm *vm)
+void virt_arch_pgd_alloc(struct kvm_vm *vm)
{
if (!vm->pgd_created) {
vm_paddr_t paddr = vm_phy_pages_alloc(vm,
@@ -65,7 +64,7 @@ void virt_pgd_alloc(struct kvm_vm *vm)
}
}
-void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
+void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
{
uint64_t *ptep, next_ppn;
int level = vm->pgtable_levels - 1;
@@ -109,7 +108,7 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
PGTBL_PTE_PERM_MASK | PGTBL_PTE_VALID_MASK;
}
-vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
{
uint64_t *ptep;
int level = vm->pgtable_levels - 1;
@@ -160,7 +159,7 @@ static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent,
#endif
}
-void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
{
int level = vm->pgtable_levels - 1;
uint64_t pgd, *ptep;
@@ -179,8 +178,9 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
}
}
-void riscv_vcpu_mmu_setup(struct kvm_vm *vm, int vcpuid)
+void riscv_vcpu_mmu_setup(struct kvm_vcpu *vcpu)
{
+ struct kvm_vm *vm = vcpu->vm;
unsigned long satp;
/*
@@ -199,46 +199,46 @@ void riscv_vcpu_mmu_setup(struct kvm_vm *vm, int vcpuid)
satp = (vm->pgd >> PGTBL_PAGE_SIZE_SHIFT) & SATP_PPN;
satp |= SATP_MODE_48;
- set_reg(vm, vcpuid, RISCV_CSR_REG(satp), satp);
+ vcpu_set_reg(vcpu, RISCV_CSR_REG(satp), satp);
}
-void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
+void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
{
struct kvm_riscv_core core;
- get_reg(vm, vcpuid, RISCV_CORE_REG(mode), &core.mode);
- get_reg(vm, vcpuid, RISCV_CORE_REG(regs.pc), &core.regs.pc);
- get_reg(vm, vcpuid, RISCV_CORE_REG(regs.ra), &core.regs.ra);
- get_reg(vm, vcpuid, RISCV_CORE_REG(regs.sp), &core.regs.sp);
- get_reg(vm, vcpuid, RISCV_CORE_REG(regs.gp), &core.regs.gp);
- get_reg(vm, vcpuid, RISCV_CORE_REG(regs.tp), &core.regs.tp);
- get_reg(vm, vcpuid, RISCV_CORE_REG(regs.t0), &core.regs.t0);
- get_reg(vm, vcpuid, RISCV_CORE_REG(regs.t1), &core.regs.t1);
- get_reg(vm, vcpuid, RISCV_CORE_REG(regs.t2), &core.regs.t2);
- get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s0), &core.regs.s0);
- get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s1), &core.regs.s1);
- get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a0), &core.regs.a0);
- get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a1), &core.regs.a1);
- get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a2), &core.regs.a2);
- get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a3), &core.regs.a3);
- get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a4), &core.regs.a4);
- get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a5), &core.regs.a5);
- get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a6), &core.regs.a6);
- get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a7), &core.regs.a7);
- get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s2), &core.regs.s2);
- get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s3), &core.regs.s3);
- get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s4), &core.regs.s4);
- get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s5), &core.regs.s5);
- get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s6), &core.regs.s6);
- get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s7), &core.regs.s7);
- get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s8), &core.regs.s8);
- get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s9), &core.regs.s9);
- get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s10), &core.regs.s10);
- get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s11), &core.regs.s11);
- get_reg(vm, vcpuid, RISCV_CORE_REG(regs.t3), &core.regs.t3);
- get_reg(vm, vcpuid, RISCV_CORE_REG(regs.t4), &core.regs.t4);
- get_reg(vm, vcpuid, RISCV_CORE_REG(regs.t5), &core.regs.t5);
- get_reg(vm, vcpuid, RISCV_CORE_REG(regs.t6), &core.regs.t6);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(mode), &core.mode);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.pc), &core.regs.pc);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.ra), &core.regs.ra);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.sp), &core.regs.sp);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.gp), &core.regs.gp);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.tp), &core.regs.tp);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t0), &core.regs.t0);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t1), &core.regs.t1);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t2), &core.regs.t2);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s0), &core.regs.s0);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s1), &core.regs.s1);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a0), &core.regs.a0);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a1), &core.regs.a1);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a2), &core.regs.a2);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a3), &core.regs.a3);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a4), &core.regs.a4);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a5), &core.regs.a5);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a6), &core.regs.a6);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a7), &core.regs.a7);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s2), &core.regs.s2);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s3), &core.regs.s3);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s4), &core.regs.s4);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s5), &core.regs.s5);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s6), &core.regs.s6);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s7), &core.regs.s7);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s8), &core.regs.s8);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s9), &core.regs.s9);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s10), &core.regs.s10);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s11), &core.regs.s11);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t3), &core.regs.t3);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t4), &core.regs.t4);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t5), &core.regs.t5);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t6), &core.regs.t6);
fprintf(stream,
" MODE: 0x%lx\n", core.mode);
@@ -275,7 +275,8 @@ static void __aligned(16) guest_unexp_trap(void)
0, 0, 0, 0, 0, 0);
}
-void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
+ void *guest_code)
{
int r;
size_t stack_size = vm->page_size == 4096 ?
@@ -285,9 +286,10 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
DEFAULT_RISCV_GUEST_STACK_VADDR_MIN);
unsigned long current_gp = 0;
struct kvm_mp_state mps;
+ struct kvm_vcpu *vcpu;
- vm_vcpu_add(vm, vcpuid);
- riscv_vcpu_mmu_setup(vm, vcpuid);
+ vcpu = __vm_vcpu_add(vm, vcpu_id);
+ riscv_vcpu_mmu_setup(vcpu);
/*
* With SBI HSM support in KVM RISC-V, all secondary VCPUs are
@@ -295,26 +297,25 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
* are powered-on using KVM_SET_MP_STATE ioctl().
*/
mps.mp_state = KVM_MP_STATE_RUNNABLE;
- r = _vcpu_ioctl(vm, vcpuid, KVM_SET_MP_STATE, &mps);
+ r = __vcpu_ioctl(vcpu, KVM_SET_MP_STATE, &mps);
TEST_ASSERT(!r, "IOCTL KVM_SET_MP_STATE failed (error %d)", r);
/* Setup global pointer of guest to be same as the host */
asm volatile (
"add %0, gp, zero" : "=r" (current_gp) : : "memory");
- set_reg(vm, vcpuid, RISCV_CORE_REG(regs.gp), current_gp);
+ vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.gp), current_gp);
/* Setup stack pointer and program counter of guest */
- set_reg(vm, vcpuid, RISCV_CORE_REG(regs.sp),
- stack_vaddr + stack_size);
- set_reg(vm, vcpuid, RISCV_CORE_REG(regs.pc),
- (unsigned long)guest_code);
+ vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.sp), stack_vaddr + stack_size);
+ vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.pc), (unsigned long)guest_code);
/* Setup default exception vector of guest */
- set_reg(vm, vcpuid, RISCV_CSR_REG(stvec),
- (unsigned long)guest_unexp_trap);
+ vcpu_set_reg(vcpu, RISCV_CSR_REG(stvec), (unsigned long)guest_unexp_trap);
+
+ return vcpu;
}
-void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
+void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
{
va_list ap;
uint64_t id = RISCV_CORE_REG(regs.a0);
@@ -352,12 +353,12 @@ void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
id = RISCV_CORE_REG(regs.a7);
break;
}
- set_reg(vm, vcpuid, id, va_arg(ap, uint64_t));
+ vcpu_set_reg(vcpu, id, va_arg(ap, uint64_t));
}
va_end(ap);
}
-void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid)
+void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
{
}
diff --git a/tools/testing/selftests/kvm/lib/riscv/ucall.c b/tools/testing/selftests/kvm/lib/riscv/ucall.c
index 8550f424d093..087b9740bc8f 100644
--- a/tools/testing/selftests/kvm/lib/riscv/ucall.c
+++ b/tools/testing/selftests/kvm/lib/riscv/ucall.c
@@ -8,7 +8,6 @@
#include <linux/kvm.h>
#include "kvm_util.h"
-#include "../kvm_util_internal.h"
#include "processor.h"
void ucall_init(struct kvm_vm *vm, void *arg)
@@ -53,7 +52,7 @@ void ucall(uint64_t cmd, int nargs, ...)
va_list va;
int i;
- nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS;
+ nargs = min(nargs, UCALL_MAX_ARGS);
va_start(va, nargs);
for (i = 0; i < nargs; ++i)
@@ -65,9 +64,9 @@ void ucall(uint64_t cmd, int nargs, ...)
(vm_vaddr_t)&uc, 0, 0, 0, 0, 0);
}
-uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
+uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc)
{
- struct kvm_run *run = vcpu_state(vm, vcpu_id);
+ struct kvm_run *run = vcpu->run;
struct ucall ucall = {};
if (uc)
@@ -77,16 +76,17 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
run->riscv_sbi.extension_id == KVM_RISCV_SELFTESTS_SBI_EXT) {
switch (run->riscv_sbi.function_id) {
case KVM_RISCV_SELFTESTS_SBI_UCALL:
- memcpy(&ucall, addr_gva2hva(vm,
- run->riscv_sbi.args[0]), sizeof(ucall));
+ memcpy(&ucall,
+ addr_gva2hva(vcpu->vm, run->riscv_sbi.args[0]),
+ sizeof(ucall));
- vcpu_run_complete_io(vm, vcpu_id);
+ vcpu_run_complete_io(vcpu);
if (uc)
memcpy(uc, &ucall, sizeof(ucall));
break;
case KVM_RISCV_SELFTESTS_SBI_UNEXP:
- vcpu_dump(stderr, vm, vcpu_id, 2);
+ vcpu_dump(stderr, vcpu, 2);
TEST_ASSERT(0, "Unexpected trap taken by guest");
break;
default:
diff --git a/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c b/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c
index 86b9e611ad87..cdb7daeed5fd 100644
--- a/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c
+++ b/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c
@@ -8,8 +8,6 @@
#include "test_util.h"
#include "kvm_util.h"
-#define VCPU_ID 6
-
#define ICPT_INSTRUCTION 0x04
#define IPA0_DIAG 0x8300
@@ -27,14 +25,15 @@ static void guest_code(void)
*/
static uint64_t diag318_handler(void)
{
+ struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
struct kvm_run *run;
uint64_t reg;
uint64_t diag318_info;
- vm = vm_create_default(VCPU_ID, 0, guest_code);
- vcpu_run(vm, VCPU_ID);
- run = vcpu_state(vm, VCPU_ID);
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ vcpu_run(vcpu);
+ run = vcpu->run;
TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC,
"DIAGNOSE 0x0318 instruction was not intercepted");
@@ -62,7 +61,7 @@ uint64_t get_diag318_info(void)
* If KVM does not support diag318, then return 0 to
* ensure tests do not break.
*/
- if (!kvm_check_cap(KVM_CAP_S390_DIAG318)) {
+ if (!kvm_has_cap(KVM_CAP_S390_DIAG318)) {
if (!printed_skip) {
fprintf(stdout, "KVM_CAP_S390_DIAG318 not supported. "
"Skipping diag318 test.\n");
diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c
index f87c7137598e..89d7340d9cbd 100644
--- a/tools/testing/selftests/kvm/lib/s390x/processor.c
+++ b/tools/testing/selftests/kvm/lib/s390x/processor.c
@@ -7,11 +7,10 @@
#include "processor.h"
#include "kvm_util.h"
-#include "../kvm_util_internal.h"
#define PAGES_PER_REGION 4
-void virt_pgd_alloc(struct kvm_vm *vm)
+void virt_arch_pgd_alloc(struct kvm_vm *vm)
{
vm_paddr_t paddr;
@@ -47,7 +46,7 @@ static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri)
| ((ri < 4 ? (PAGES_PER_REGION - 1) : 0) & REGION_ENTRY_LENGTH);
}
-void virt_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa)
+void virt_arch_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa)
{
int ri, idx;
uint64_t *entry;
@@ -86,7 +85,7 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa)
entry[idx] = gpa;
}
-vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
{
int ri, idx;
uint64_t *entry;
@@ -147,7 +146,7 @@ static void virt_dump_region(FILE *stream, struct kvm_vm *vm, uint8_t indent,
}
}
-void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
{
if (!vm->pgd_created)
return;
@@ -155,12 +154,14 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
virt_dump_region(stream, vm, indent, vm->pgd);
}
-void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
+ void *guest_code)
{
size_t stack_size = DEFAULT_STACK_PGS * getpagesize();
uint64_t stack_vaddr;
struct kvm_regs regs;
struct kvm_sregs sregs;
+ struct kvm_vcpu *vcpu;
struct kvm_run *run;
TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x",
@@ -169,24 +170,26 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
stack_vaddr = vm_vaddr_alloc(vm, stack_size,
DEFAULT_GUEST_STACK_VADDR_MIN);
- vm_vcpu_add(vm, vcpuid);
+ vcpu = __vm_vcpu_add(vm, vcpu_id);
/* Setup guest registers */
- vcpu_regs_get(vm, vcpuid, &regs);
+ vcpu_regs_get(vcpu, &regs);
regs.gprs[15] = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize()) - 160;
- vcpu_regs_set(vm, vcpuid, &regs);
+ vcpu_regs_set(vcpu, &regs);
- vcpu_sregs_get(vm, vcpuid, &sregs);
+ vcpu_sregs_get(vcpu, &sregs);
sregs.crs[0] |= 0x00040000; /* Enable floating point regs */
sregs.crs[1] = vm->pgd | 0xf; /* Primary region table */
- vcpu_sregs_set(vm, vcpuid, &sregs);
+ vcpu_sregs_set(vcpu, &sregs);
- run = vcpu_state(vm, vcpuid);
+ run = vcpu->run;
run->psw_mask = 0x0400000180000000ULL; /* DAT enabled + 64 bit mode */
run->psw_addr = (uintptr_t)guest_code;
+
+ return vcpu;
}
-void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
+void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
{
va_list ap;
struct kvm_regs regs;
@@ -197,26 +200,21 @@ void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
num);
va_start(ap, num);
- vcpu_regs_get(vm, vcpuid, &regs);
+ vcpu_regs_get(vcpu, &regs);
for (i = 0; i < num; i++)
regs.gprs[i + 2] = va_arg(ap, uint64_t);
- vcpu_regs_set(vm, vcpuid, &regs);
+ vcpu_regs_set(vcpu, &regs);
va_end(ap);
}
-void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
+void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
-
- if (!vcpu)
- return;
-
fprintf(stream, "%*spstate: psw: 0x%.16llx:0x%.16llx\n",
- indent, "", vcpu->state->psw_mask, vcpu->state->psw_addr);
+ indent, "", vcpu->run->psw_mask, vcpu->run->psw_addr);
}
-void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid)
+void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
{
}
diff --git a/tools/testing/selftests/kvm/lib/s390x/ucall.c b/tools/testing/selftests/kvm/lib/s390x/ucall.c
index 9d3b0f15249a..73dc4e21190f 100644
--- a/tools/testing/selftests/kvm/lib/s390x/ucall.c
+++ b/tools/testing/selftests/kvm/lib/s390x/ucall.c
@@ -22,7 +22,7 @@ void ucall(uint64_t cmd, int nargs, ...)
va_list va;
int i;
- nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS;
+ nargs = min(nargs, UCALL_MAX_ARGS);
va_start(va, nargs);
for (i = 0; i < nargs; ++i)
@@ -33,9 +33,9 @@ void ucall(uint64_t cmd, int nargs, ...)
asm volatile ("diag 0,%0,0x501" : : "a"(&uc) : "memory");
}
-uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
+uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc)
{
- struct kvm_run *run = vcpu_state(vm, vcpu_id);
+ struct kvm_run *run = vcpu->run;
struct ucall ucall = {};
if (uc)
@@ -47,10 +47,10 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
(run->s390_sieic.ipb >> 16) == 0x501) {
int reg = run->s390_sieic.ipa & 0xf;
- memcpy(&ucall, addr_gva2hva(vm, run->s.regs.gprs[reg]),
+ memcpy(&ucall, addr_gva2hva(vcpu->vm, run->s.regs.gprs[reg]),
sizeof(ucall));
- vcpu_run_complete_io(vm, vcpu_id);
+ vcpu_run_complete_io(vcpu);
if (uc)
memcpy(uc, &ucall, sizeof(ucall));
}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/perf_test_util.c b/tools/testing/selftests/kvm/lib/x86_64/perf_test_util.c
index e258524435a0..0f344a7c89c4 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/perf_test_util.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/perf_test_util.c
@@ -12,7 +12,6 @@
#include "test_util.h"
#include "kvm_util.h"
#include "perf_test_util.h"
-#include "../kvm_util_internal.h"
#include "processor.h"
#include "vmx.h"
@@ -78,14 +77,14 @@ void perf_test_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm)
nested_identity_map_1g(vmx, vm, start, end - start);
}
-void perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus)
+void perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[])
{
struct vmx_pages *vmx, *vmx0 = NULL;
struct kvm_regs regs;
vm_vaddr_t vmx_gva;
int vcpu_id;
- nested_vmx_check_supported();
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
vmx = vcpu_alloc_vmx(vm, &vmx_gva);
@@ -104,9 +103,9 @@ void perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus)
* Override the vCPU to run perf_test_l1_guest_code() which will
* bounce it into L2 before calling perf_test_guest_code().
*/
- vcpu_regs_get(vm, vcpu_id, &regs);
+ vcpu_regs_get(vcpus[vcpu_id], &regs);
regs.rip = (unsigned long) perf_test_l1_guest_code;
- vcpu_regs_set(vm, vcpu_id, &regs);
- vcpu_args_set(vm, vcpu_id, 2, vmx_gva, vcpu_id);
+ vcpu_regs_set(vcpus[vcpu_id], &regs);
+ vcpu_args_set(vcpus[vcpu_id], 2, vmx_gva, vcpu_id);
}
}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index ead7011ee8f6..2e6e61bbe81b 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -7,7 +7,6 @@
#include "test_util.h"
#include "kvm_util.h"
-#include "../kvm_util_internal.h"
#include "processor.h"
#ifndef NUM_INTERRUPTS
@@ -17,10 +16,11 @@
#define DEFAULT_CODE_SELECTOR 0x8
#define DEFAULT_DATA_SELECTOR 0x10
+#define MAX_NR_CPUID_ENTRIES 100
+
vm_vaddr_t exception_handlers;
-void regs_dump(FILE *stream, struct kvm_regs *regs,
- uint8_t indent)
+static void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent)
{
fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx "
"rcx: 0x%.16llx rdx: 0x%.16llx\n",
@@ -43,21 +43,6 @@ void regs_dump(FILE *stream, struct kvm_regs *regs,
regs->rip, regs->rflags);
}
-/*
- * Segment Dump
- *
- * Input Args:
- * stream - Output FILE stream
- * segment - KVM segment
- * indent - Left margin indent amount
- *
- * Output Args: None
- *
- * Return: None
- *
- * Dumps the state of the KVM segment given by @segment, to the FILE stream
- * given by @stream.
- */
static void segment_dump(FILE *stream, struct kvm_segment *segment,
uint8_t indent)
{
@@ -75,21 +60,6 @@ static void segment_dump(FILE *stream, struct kvm_segment *segment,
segment->unusable, segment->padding);
}
-/*
- * dtable Dump
- *
- * Input Args:
- * stream - Output FILE stream
- * dtable - KVM dtable
- * indent - Left margin indent amount
- *
- * Output Args: None
- *
- * Return: None
- *
- * Dumps the state of the KVM dtable given by @dtable, to the FILE stream
- * given by @stream.
- */
static void dtable_dump(FILE *stream, struct kvm_dtable *dtable,
uint8_t indent)
{
@@ -99,8 +69,7 @@ static void dtable_dump(FILE *stream, struct kvm_dtable *dtable,
dtable->padding[0], dtable->padding[1], dtable->padding[2]);
}
-void sregs_dump(FILE *stream, struct kvm_sregs *sregs,
- uint8_t indent)
+static void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent)
{
unsigned int i;
@@ -142,7 +111,7 @@ void sregs_dump(FILE *stream, struct kvm_sregs *sregs,
}
}
-void virt_pgd_alloc(struct kvm_vm *vm)
+void virt_arch_pgd_alloc(struct kvm_vm *vm)
{
TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
@@ -240,27 +209,24 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level)
*pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK);
}
-void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
+void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
{
__virt_pg_map(vm, vaddr, paddr, PG_LEVEL_4K);
}
-static uint64_t *_vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid,
- uint64_t vaddr)
+static uint64_t *_vm_get_page_table_entry(struct kvm_vm *vm,
+ struct kvm_vcpu *vcpu,
+ uint64_t vaddr)
{
uint16_t index[4];
uint64_t *pml4e, *pdpe, *pde;
uint64_t *pte;
- struct kvm_cpuid_entry2 *entry;
struct kvm_sregs sregs;
- int max_phy_addr;
uint64_t rsvd_mask = 0;
- entry = kvm_get_supported_cpuid_index(0x80000008, 0);
- max_phy_addr = entry->eax & 0x000000ff;
/* Set the high bits in the reserved mask. */
- if (max_phy_addr < 52)
- rsvd_mask = GENMASK_ULL(51, max_phy_addr);
+ if (vm->pa_bits < 52)
+ rsvd_mask = GENMASK_ULL(51, vm->pa_bits);
/*
* SDM vol 3, fig 4-11 "Formats of CR3 and Paging-Structure Entries
@@ -268,7 +234,7 @@ static uint64_t *_vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid,
* If IA32_EFER.NXE = 0 and the P flag of a paging-structure entry is 1,
* the XD flag (bit 63) is reserved.
*/
- vcpu_sregs_get(vm, vcpuid, &sregs);
+ vcpu_sregs_get(vcpu, &sregs);
if ((sregs.efer & EFER_NX) == 0) {
rsvd_mask |= PTE_NX_MASK;
}
@@ -320,22 +286,23 @@ static uint64_t *_vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid,
return &pte[index[0]];
}
-uint64_t vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr)
+uint64_t vm_get_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
+ uint64_t vaddr)
{
- uint64_t *pte = _vm_get_page_table_entry(vm, vcpuid, vaddr);
+ uint64_t *pte = _vm_get_page_table_entry(vm, vcpu, vaddr);
return *(uint64_t *)pte;
}
-void vm_set_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr,
- uint64_t pte)
+void vm_set_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
+ uint64_t vaddr, uint64_t pte)
{
- uint64_t *new_pte = _vm_get_page_table_entry(vm, vcpuid, vaddr);
+ uint64_t *new_pte = _vm_get_page_table_entry(vm, vcpu, vaddr);
*(uint64_t *)new_pte = pte;
}
-void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
{
uint64_t *pml4e, *pml4e_start;
uint64_t *pdpe, *pdpe_start;
@@ -516,7 +483,7 @@ static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector,
kvm_seg_fill_gdt_64bit(vm, segp);
}
-vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
{
uint16_t index[4];
uint64_t *pml4e, *pdpe, *pde;
@@ -579,12 +546,12 @@ static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp,
kvm_seg_fill_gdt_64bit(vm, segp);
}
-static void vcpu_setup(struct kvm_vm *vm, int vcpuid)
+static void vcpu_setup(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
{
struct kvm_sregs sregs;
/* Set mode specific system register values. */
- vcpu_sregs_get(vm, vcpuid, &sregs);
+ vcpu_sregs_get(vcpu, &sregs);
sregs.idt.limit = 0;
@@ -608,25 +575,10 @@ static void vcpu_setup(struct kvm_vm *vm, int vcpuid)
}
sregs.cr3 = vm->pgd;
- vcpu_sregs_set(vm, vcpuid, &sregs);
+ vcpu_sregs_set(vcpu, &sregs);
}
-#define CPUID_XFD_BIT (1 << 4)
-static bool is_xfd_supported(void)
-{
- int eax, ebx, ecx, edx;
- const int leaf = 0xd, subleaf = 0x1;
-
- __asm__ __volatile__(
- "cpuid"
- : /* output */ "=a"(eax), "=b"(ebx),
- "=c"(ecx), "=d"(edx)
- : /* input */ "0"(leaf), "2"(subleaf));
-
- return !!(eax & CPUID_XFD_BIT);
-}
-
-void vm_xsave_req_perm(int bit)
+void __vm_xsave_require_permission(int bit, const char *name)
{
int kvm_fd;
u64 bitmask;
@@ -637,26 +589,21 @@ void vm_xsave_req_perm(int bit)
.addr = (unsigned long) &bitmask
};
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XFD));
+
kvm_fd = open_kvm_dev_path_or_exit();
- rc = ioctl(kvm_fd, KVM_GET_DEVICE_ATTR, &attr);
+ rc = __kvm_ioctl(kvm_fd, KVM_GET_DEVICE_ATTR, &attr);
close(kvm_fd);
+
if (rc == -1 && (errno == ENXIO || errno == EINVAL))
- exit(KSFT_SKIP);
- TEST_ASSERT(rc == 0, "KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) error: %ld", rc);
- if (!(bitmask & (1ULL << bit)))
- exit(KSFT_SKIP);
+ __TEST_REQUIRE(0, "KVM_X86_XCOMP_GUEST_SUPP not supported");
- if (!is_xfd_supported())
- exit(KSFT_SKIP);
+ TEST_ASSERT(rc == 0, "KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) error: %ld", rc);
- rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, bit);
+ __TEST_REQUIRE(bitmask & (1ULL << bit),
+ "Required XSAVE feature '%s' not supported", name);
- /*
- * The older kernel version(<5.15) can't support
- * ARCH_REQ_XCOMP_GUEST_PERM and directly return.
- */
- if (rc)
- return;
+ TEST_REQUIRE(!syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, bit));
rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask);
TEST_ASSERT(rc == 0, "prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc);
@@ -665,108 +612,89 @@ void vm_xsave_req_perm(int bit)
bitmask);
}
-void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
+ void *guest_code)
{
struct kvm_mp_state mp_state;
struct kvm_regs regs;
vm_vaddr_t stack_vaddr;
+ struct kvm_vcpu *vcpu;
+
stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(),
DEFAULT_GUEST_STACK_VADDR_MIN);
- /* Create VCPU */
- vm_vcpu_add(vm, vcpuid);
- vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid());
- vcpu_setup(vm, vcpuid);
+ vcpu = __vm_vcpu_add(vm, vcpu_id);
+ vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid());
+ vcpu_setup(vm, vcpu);
/* Setup guest general purpose registers */
- vcpu_regs_get(vm, vcpuid, &regs);
+ vcpu_regs_get(vcpu, &regs);
regs.rflags = regs.rflags | 0x2;
regs.rsp = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize());
regs.rip = (unsigned long) guest_code;
- vcpu_regs_set(vm, vcpuid, &regs);
+ vcpu_regs_set(vcpu, &regs);
/* Setup the MP state */
mp_state.mp_state = 0;
- vcpu_set_mp_state(vm, vcpuid, &mp_state);
+ vcpu_mp_state_set(vcpu, &mp_state);
+
+ return vcpu;
}
-/*
- * Allocate an instance of struct kvm_cpuid2
- *
- * Input Args: None
- *
- * Output Args: None
- *
- * Return: A pointer to the allocated struct. The caller is responsible
- * for freeing this struct.
- *
- * Since kvm_cpuid2 uses a 0-length array to allow a the size of the
- * array to be decided at allocation time, allocation is slightly
- * complicated. This function uses a reasonable default length for
- * the array and performs the appropriate allocation.
- */
-static struct kvm_cpuid2 *allocate_kvm_cpuid2(void)
+struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, uint32_t vcpu_id)
{
- struct kvm_cpuid2 *cpuid;
- int nent = 100;
- size_t size;
-
- size = sizeof(*cpuid);
- size += nent * sizeof(struct kvm_cpuid_entry2);
- cpuid = malloc(size);
- if (!cpuid) {
- perror("malloc");
- abort();
- }
+ struct kvm_vcpu *vcpu = __vm_vcpu_add(vm, vcpu_id);
- cpuid->nent = nent;
+ vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid());
- return cpuid;
+ return vcpu;
}
-/*
- * KVM Supported CPUID Get
- *
- * Input Args: None
- *
- * Output Args:
- *
- * Return: The supported KVM CPUID
- *
- * Get the guest CPUID supported by KVM.
- */
-struct kvm_cpuid2 *kvm_get_supported_cpuid(void)
+void vcpu_arch_free(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->cpuid)
+ free(vcpu->cpuid);
+}
+
+const struct kvm_cpuid2 *kvm_get_supported_cpuid(void)
{
static struct kvm_cpuid2 *cpuid;
- int ret;
int kvm_fd;
if (cpuid)
return cpuid;
- cpuid = allocate_kvm_cpuid2();
+ cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES);
kvm_fd = open_kvm_dev_path_or_exit();
- ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid);
- TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n",
- ret, errno);
+ kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid);
close(kvm_fd);
return cpuid;
}
-/*
- * KVM Get MSR
- *
- * Input Args:
- * msr_index - Index of MSR
- *
- * Output Args: None
- *
- * Return: On success, value of the MSR. On failure a TEST_ASSERT is produced.
- *
- * Get value of MSR for VCPU.
- */
+bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid,
+ struct kvm_x86_cpu_feature feature)
+{
+ const struct kvm_cpuid_entry2 *entry;
+ int i;
+
+ for (i = 0; i < cpuid->nent; i++) {
+ entry = &cpuid->entries[i];
+
+ /*
+ * The output registers in kvm_cpuid_entry2 are in alphabetical
+ * order, but kvm_x86_cpu_feature matches that mess, so yay
+ * pointer shenanigans!
+ */
+ if (entry->function == feature.function &&
+ entry->index == feature.index)
+ return (&entry->eax)[feature.reg] & BIT(feature.bit);
+ }
+
+ return false;
+}
+
uint64_t kvm_get_feature_msr(uint64_t msr_index)
{
struct {
@@ -779,218 +707,98 @@ uint64_t kvm_get_feature_msr(uint64_t msr_index)
buffer.entry.index = msr_index;
kvm_fd = open_kvm_dev_path_or_exit();
- r = ioctl(kvm_fd, KVM_GET_MSRS, &buffer.header);
- TEST_ASSERT(r == 1, "KVM_GET_MSRS IOCTL failed,\n"
- " rc: %i errno: %i", r, errno);
+ r = __kvm_ioctl(kvm_fd, KVM_GET_MSRS, &buffer.header);
+ TEST_ASSERT(r == 1, KVM_IOCTL_ERROR(KVM_GET_MSRS, r));
close(kvm_fd);
return buffer.entry.data;
}
-/*
- * VM VCPU CPUID Set
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU id
- *
- * Output Args: None
- *
- * Return: KVM CPUID (KVM_GET_CPUID2)
- *
- * Set the VCPU's CPUID.
- */
-struct kvm_cpuid2 *vcpu_get_cpuid(struct kvm_vm *vm, uint32_t vcpuid)
+void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- struct kvm_cpuid2 *cpuid;
- int max_ent;
- int rc = -1;
-
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
-
- cpuid = allocate_kvm_cpuid2();
- max_ent = cpuid->nent;
-
- for (cpuid->nent = 1; cpuid->nent <= max_ent; cpuid->nent++) {
- rc = ioctl(vcpu->fd, KVM_GET_CPUID2, cpuid);
- if (!rc)
- break;
+ TEST_ASSERT(cpuid != vcpu->cpuid, "@cpuid can't be the vCPU's CPUID");
- TEST_ASSERT(rc == -1 && errno == E2BIG,
- "KVM_GET_CPUID2 should either succeed or give E2BIG: %d %d",
- rc, errno);
+ /* Allow overriding the default CPUID. */
+ if (vcpu->cpuid && vcpu->cpuid->nent < cpuid->nent) {
+ free(vcpu->cpuid);
+ vcpu->cpuid = NULL;
}
- TEST_ASSERT(rc == 0, "KVM_GET_CPUID2 failed, rc: %i errno: %i",
- rc, errno);
+ if (!vcpu->cpuid)
+ vcpu->cpuid = allocate_kvm_cpuid2(cpuid->nent);
- return cpuid;
+ memcpy(vcpu->cpuid, cpuid, kvm_cpuid2_size(cpuid->nent));
+ vcpu_set_cpuid(vcpu);
}
-
-
-/*
- * Locate a cpuid entry.
- *
- * Input Args:
- * function: The function of the cpuid entry to find.
- * index: The index of the cpuid entry.
- *
- * Output Args: None
- *
- * Return: A pointer to the cpuid entry. Never returns NULL.
- */
-struct kvm_cpuid_entry2 *
-kvm_get_supported_cpuid_index(uint32_t function, uint32_t index)
+void vcpu_set_cpuid_maxphyaddr(struct kvm_vcpu *vcpu, uint8_t maxphyaddr)
{
- struct kvm_cpuid2 *cpuid;
- struct kvm_cpuid_entry2 *entry = NULL;
- int i;
-
- cpuid = kvm_get_supported_cpuid();
- for (i = 0; i < cpuid->nent; i++) {
- if (cpuid->entries[i].function == function &&
- cpuid->entries[i].index == index) {
- entry = &cpuid->entries[i];
- break;
- }
- }
+ struct kvm_cpuid_entry2 *entry = vcpu_get_cpuid_entry(vcpu, 0x80000008);
- TEST_ASSERT(entry, "Guest CPUID entry not found: (EAX=%x, ECX=%x).",
- function, index);
- return entry;
+ entry->eax = (entry->eax & ~0xff) | maxphyaddr;
+ vcpu_set_cpuid(vcpu);
}
-
-int __vcpu_set_cpuid(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_cpuid2 *cpuid)
+void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
-
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+ struct kvm_cpuid_entry2 *entry = vcpu_get_cpuid_entry(vcpu, function);
- return ioctl(vcpu->fd, KVM_SET_CPUID2, cpuid);
+ entry->eax = 0;
+ entry->ebx = 0;
+ entry->ecx = 0;
+ entry->edx = 0;
+ vcpu_set_cpuid(vcpu);
}
-/*
- * VM VCPU CPUID Set
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU id
- * cpuid - The CPUID values to set.
- *
- * Output Args: None
- *
- * Return: void
- *
- * Set the VCPU's CPUID.
- */
-void vcpu_set_cpuid(struct kvm_vm *vm,
- uint32_t vcpuid, struct kvm_cpuid2 *cpuid)
+void vcpu_set_or_clear_cpuid_feature(struct kvm_vcpu *vcpu,
+ struct kvm_x86_cpu_feature feature,
+ bool set)
{
- int rc;
+ struct kvm_cpuid_entry2 *entry;
+ u32 *reg;
- rc = __vcpu_set_cpuid(vm, vcpuid, cpuid);
- TEST_ASSERT(rc == 0, "KVM_SET_CPUID2 failed, rc: %i errno: %i",
- rc, errno);
+ entry = __vcpu_get_cpuid_entry(vcpu, feature.function, feature.index);
+ reg = (&entry->eax) + feature.reg;
+ if (set)
+ *reg |= BIT(feature.bit);
+ else
+ *reg &= ~BIT(feature.bit);
+
+ vcpu_set_cpuid(vcpu);
}
-/*
- * VCPU Get MSR
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- * msr_index - Index of MSR
- *
- * Output Args: None
- *
- * Return: On success, value of the MSR. On failure a TEST_ASSERT is produced.
- *
- * Get value of MSR for VCPU.
- */
-uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index)
+uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
struct {
struct kvm_msrs header;
struct kvm_msr_entry entry;
} buffer = {};
- int r;
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
buffer.header.nmsrs = 1;
buffer.entry.index = msr_index;
- r = ioctl(vcpu->fd, KVM_GET_MSRS, &buffer.header);
- TEST_ASSERT(r == 1, "KVM_GET_MSRS IOCTL failed,\n"
- " rc: %i errno: %i", r, errno);
+
+ vcpu_msrs_get(vcpu, &buffer.header);
return buffer.entry.data;
}
-/*
- * _VCPU Set MSR
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- * msr_index - Index of MSR
- * msr_value - New value of MSR
- *
- * Output Args: None
- *
- * Return: The result of KVM_SET_MSRS.
- *
- * Sets the value of an MSR for the given VCPU.
- */
-int _vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
- uint64_t msr_value)
+int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t msr_value)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
struct {
struct kvm_msrs header;
struct kvm_msr_entry entry;
} buffer = {};
- int r;
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
memset(&buffer, 0, sizeof(buffer));
buffer.header.nmsrs = 1;
buffer.entry.index = msr_index;
buffer.entry.data = msr_value;
- r = ioctl(vcpu->fd, KVM_SET_MSRS, &buffer.header);
- return r;
-}
-
-/*
- * VCPU Set MSR
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- * msr_index - Index of MSR
- * msr_value - New value of MSR
- *
- * Output Args: None
- *
- * Return: On success, nothing. On failure a TEST_ASSERT is produced.
- *
- * Set value of MSR for VCPU.
- */
-void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
- uint64_t msr_value)
-{
- int r;
- r = _vcpu_set_msr(vm, vcpuid, msr_index, msr_value);
- TEST_ASSERT(r == 1, "KVM_SET_MSRS IOCTL failed,\n"
- " rc: %i errno: %i", r, errno);
+ return __vcpu_ioctl(vcpu, KVM_SET_MSRS, &buffer.header);
}
-void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
+void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
{
va_list ap;
struct kvm_regs regs;
@@ -1000,7 +808,7 @@ void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
num);
va_start(ap, num);
- vcpu_regs_get(vm, vcpuid, &regs);
+ vcpu_regs_get(vcpu, &regs);
if (num >= 1)
regs.rdi = va_arg(ap, uint64_t);
@@ -1020,85 +828,112 @@ void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
if (num >= 6)
regs.r9 = va_arg(ap, uint64_t);
- vcpu_regs_set(vm, vcpuid, &regs);
+ vcpu_regs_set(vcpu, &regs);
va_end(ap);
}
-void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
+void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
{
struct kvm_regs regs;
struct kvm_sregs sregs;
- fprintf(stream, "%*scpuid: %u\n", indent, "", vcpuid);
+ fprintf(stream, "%*svCPU ID: %u\n", indent, "", vcpu->id);
fprintf(stream, "%*sregs:\n", indent + 2, "");
- vcpu_regs_get(vm, vcpuid, &regs);
+ vcpu_regs_get(vcpu, &regs);
regs_dump(stream, &regs, indent + 4);
fprintf(stream, "%*ssregs:\n", indent + 2, "");
- vcpu_sregs_get(vm, vcpuid, &sregs);
+ vcpu_sregs_get(vcpu, &sregs);
sregs_dump(stream, &sregs, indent + 4);
}
-static int kvm_get_num_msrs_fd(int kvm_fd)
+static struct kvm_msr_list *__kvm_get_msr_index_list(bool feature_msrs)
{
+ struct kvm_msr_list *list;
struct kvm_msr_list nmsrs;
- int r;
+ int kvm_fd, r;
+
+ kvm_fd = open_kvm_dev_path_or_exit();
nmsrs.nmsrs = 0;
- r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, &nmsrs);
- TEST_ASSERT(r == -1 && errno == E2BIG, "Unexpected result from KVM_GET_MSR_INDEX_LIST probe, r: %i",
- r);
+ if (!feature_msrs)
+ r = __kvm_ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, &nmsrs);
+ else
+ r = __kvm_ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, &nmsrs);
- return nmsrs.nmsrs;
-}
+ TEST_ASSERT(r == -1 && errno == E2BIG,
+ "Expected -E2BIG, got rc: %i errno: %i (%s)",
+ r, errno, strerror(errno));
-static int kvm_get_num_msrs(struct kvm_vm *vm)
-{
- return kvm_get_num_msrs_fd(vm->kvm_fd);
+ list = malloc(sizeof(*list) + nmsrs.nmsrs * sizeof(list->indices[0]));
+ TEST_ASSERT(list, "-ENOMEM when allocating MSR index list");
+ list->nmsrs = nmsrs.nmsrs;
+
+ if (!feature_msrs)
+ kvm_ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list);
+ else
+ kvm_ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, list);
+ close(kvm_fd);
+
+ TEST_ASSERT(list->nmsrs == nmsrs.nmsrs,
+ "Number of MSRs in list changed, was %d, now %d",
+ nmsrs.nmsrs, list->nmsrs);
+ return list;
}
-struct kvm_msr_list *kvm_get_msr_index_list(void)
+const struct kvm_msr_list *kvm_get_msr_index_list(void)
{
- struct kvm_msr_list *list;
- int nmsrs, r, kvm_fd;
+ static const struct kvm_msr_list *list;
- kvm_fd = open_kvm_dev_path_or_exit();
+ if (!list)
+ list = __kvm_get_msr_index_list(false);
+ return list;
+}
- nmsrs = kvm_get_num_msrs_fd(kvm_fd);
- list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
- list->nmsrs = nmsrs;
- r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list);
- close(kvm_fd);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i",
- r);
+const struct kvm_msr_list *kvm_get_feature_msr_index_list(void)
+{
+ static const struct kvm_msr_list *list;
+ if (!list)
+ list = __kvm_get_msr_index_list(true);
return list;
}
-static int vcpu_save_xsave_state(struct kvm_vm *vm, struct vcpu *vcpu,
- struct kvm_x86_state *state)
+bool kvm_msr_is_in_save_restore_list(uint32_t msr_index)
{
- int size;
+ const struct kvm_msr_list *list = kvm_get_msr_index_list();
+ int i;
- size = vm_check_cap(vm, KVM_CAP_XSAVE2);
- if (!size)
- size = sizeof(struct kvm_xsave);
+ for (i = 0; i < list->nmsrs; ++i) {
+ if (list->indices[i] == msr_index)
+ return true;
+ }
- state->xsave = malloc(size);
- if (size == sizeof(struct kvm_xsave))
- return ioctl(vcpu->fd, KVM_GET_XSAVE, state->xsave);
- else
- return ioctl(vcpu->fd, KVM_GET_XSAVE2, state->xsave);
+ return false;
}
-struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid)
+static void vcpu_save_xsave_state(struct kvm_vcpu *vcpu,
+ struct kvm_x86_state *state)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- struct kvm_msr_list *list;
+ int size = vm_check_cap(vcpu->vm, KVM_CAP_XSAVE2);
+
+ if (size) {
+ state->xsave = malloc(size);
+ vcpu_xsave2_get(vcpu, state->xsave);
+ } else {
+ state->xsave = malloc(sizeof(struct kvm_xsave));
+ vcpu_xsave_get(vcpu, state->xsave);
+ }
+}
+
+struct kvm_x86_state *vcpu_save_state(struct kvm_vcpu *vcpu)
+{
+ const struct kvm_msr_list *msr_list = kvm_get_msr_index_list();
struct kvm_x86_state *state;
- int nmsrs, r, i;
+ int i;
+
static int nested_size = -1;
if (nested_size == -1) {
@@ -1114,113 +949,57 @@ struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid)
* kernel with KVM_RUN. Complete IO prior to migrating state
* to a new VM.
*/
- vcpu_run_complete_io(vm, vcpuid);
-
- nmsrs = kvm_get_num_msrs(vm);
- list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
- list->nmsrs = nmsrs;
- r = ioctl(vm->kvm_fd, KVM_GET_MSR_INDEX_LIST, list);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i",
- r);
-
- state = malloc(sizeof(*state) + nmsrs * sizeof(state->msrs.entries[0]));
- r = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, &state->events);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_VCPU_EVENTS, r: %i",
- r);
-
- r = ioctl(vcpu->fd, KVM_GET_MP_STATE, &state->mp_state);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MP_STATE, r: %i",
- r);
-
- r = ioctl(vcpu->fd, KVM_GET_REGS, &state->regs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_REGS, r: %i",
- r);
-
- r = vcpu_save_xsave_state(vm, vcpu, state);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XSAVE, r: %i",
- r);
-
- if (kvm_check_cap(KVM_CAP_XCRS)) {
- r = ioctl(vcpu->fd, KVM_GET_XCRS, &state->xcrs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XCRS, r: %i",
- r);
- }
+ vcpu_run_complete_io(vcpu);
+
+ state = malloc(sizeof(*state) + msr_list->nmsrs * sizeof(state->msrs.entries[0]));
+
+ vcpu_events_get(vcpu, &state->events);
+ vcpu_mp_state_get(vcpu, &state->mp_state);
+ vcpu_regs_get(vcpu, &state->regs);
+ vcpu_save_xsave_state(vcpu, state);
- r = ioctl(vcpu->fd, KVM_GET_SREGS, &state->sregs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_SREGS, r: %i",
- r);
+ if (kvm_has_cap(KVM_CAP_XCRS))
+ vcpu_xcrs_get(vcpu, &state->xcrs);
+
+ vcpu_sregs_get(vcpu, &state->sregs);
if (nested_size) {
state->nested.size = sizeof(state->nested_);
- r = ioctl(vcpu->fd, KVM_GET_NESTED_STATE, &state->nested);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_NESTED_STATE, r: %i",
- r);
+
+ vcpu_nested_state_get(vcpu, &state->nested);
TEST_ASSERT(state->nested.size <= nested_size,
"Nested state size too big, %i (KVM_CHECK_CAP gave %i)",
state->nested.size, nested_size);
- } else
+ } else {
state->nested.size = 0;
+ }
- state->msrs.nmsrs = nmsrs;
- for (i = 0; i < nmsrs; i++)
- state->msrs.entries[i].index = list->indices[i];
- r = ioctl(vcpu->fd, KVM_GET_MSRS, &state->msrs);
- TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed MSR was 0x%x)",
- r, r == nmsrs ? -1 : list->indices[r]);
+ state->msrs.nmsrs = msr_list->nmsrs;
+ for (i = 0; i < msr_list->nmsrs; i++)
+ state->msrs.entries[i].index = msr_list->indices[i];
+ vcpu_msrs_get(vcpu, &state->msrs);
- r = ioctl(vcpu->fd, KVM_GET_DEBUGREGS, &state->debugregs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_DEBUGREGS, r: %i",
- r);
+ vcpu_debugregs_get(vcpu, &state->debugregs);
- free(list);
return state;
}
-void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *state)
+void vcpu_load_state(struct kvm_vcpu *vcpu, struct kvm_x86_state *state)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- int r;
-
- r = ioctl(vcpu->fd, KVM_SET_SREGS, &state->sregs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_SREGS, r: %i",
- r);
-
- r = ioctl(vcpu->fd, KVM_SET_MSRS, &state->msrs);
- TEST_ASSERT(r == state->msrs.nmsrs,
- "Unexpected result from KVM_SET_MSRS, r: %i (failed at %x)",
- r, r == state->msrs.nmsrs ? -1 : state->msrs.entries[r].index);
-
- if (kvm_check_cap(KVM_CAP_XCRS)) {
- r = ioctl(vcpu->fd, KVM_SET_XCRS, &state->xcrs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XCRS, r: %i",
- r);
- }
-
- r = ioctl(vcpu->fd, KVM_SET_XSAVE, state->xsave);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i",
- r);
-
- r = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, &state->events);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_VCPU_EVENTS, r: %i",
- r);
+ vcpu_sregs_set(vcpu, &state->sregs);
+ vcpu_msrs_set(vcpu, &state->msrs);
- r = ioctl(vcpu->fd, KVM_SET_MP_STATE, &state->mp_state);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_MP_STATE, r: %i",
- r);
+ if (kvm_has_cap(KVM_CAP_XCRS))
+ vcpu_xcrs_set(vcpu, &state->xcrs);
- r = ioctl(vcpu->fd, KVM_SET_DEBUGREGS, &state->debugregs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_DEBUGREGS, r: %i",
- r);
+ vcpu_xsave_set(vcpu, state->xsave);
+ vcpu_events_set(vcpu, &state->events);
+ vcpu_mp_state_set(vcpu, &state->mp_state);
+ vcpu_debugregs_set(vcpu, &state->debugregs);
+ vcpu_regs_set(vcpu, &state->regs);
- r = ioctl(vcpu->fd, KVM_SET_REGS, &state->regs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_REGS, r: %i",
- r);
-
- if (state->nested.size) {
- r = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, &state->nested);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_NESTED_STATE, r: %i",
- r);
- }
+ if (state->nested.size)
+ vcpu_nested_state_set(vcpu, &state->nested);
}
void kvm_x86_state_cleanup(struct kvm_x86_state *state)
@@ -1232,15 +1011,9 @@ void kvm_x86_state_cleanup(struct kvm_x86_state *state)
static bool cpu_vendor_string_is(const char *vendor)
{
const uint32_t *chunk = (const uint32_t *)vendor;
- int eax, ebx, ecx, edx;
- const int leaf = 0;
-
- __asm__ __volatile__(
- "cpuid"
- : /* output */ "=a"(eax), "=b"(ebx),
- "=c"(ecx), "=d"(edx)
- : /* input */ "0"(leaf), "2"(0));
+ uint32_t eax, ebx, ecx, edx;
+ cpuid(0, &eax, &ebx, &ecx, &edx);
return (ebx == chunk[0] && edx == chunk[1] && ecx == chunk[2]);
}
@@ -1257,19 +1030,9 @@ bool is_amd_cpu(void)
return cpu_vendor_string_is("AuthenticAMD");
}
-uint32_t kvm_get_cpuid_max_basic(void)
-{
- return kvm_get_supported_cpuid_entry(0)->eax;
-}
-
-uint32_t kvm_get_cpuid_max_extended(void)
-{
- return kvm_get_supported_cpuid_entry(0x80000000)->eax;
-}
-
void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits)
{
- struct kvm_cpuid_entry2 *entry;
+ const struct kvm_cpuid_entry2 *entry;
bool pae;
/* SDM 4.1.4 */
@@ -1315,6 +1078,20 @@ static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr,
e->offset2 = addr >> 32;
}
+
+static bool kvm_fixup_exception(struct ex_regs *regs)
+{
+ if (regs->r9 != KVM_EXCEPTION_MAGIC || regs->rip != regs->r10)
+ return false;
+
+ if (regs->vector == DE_VECTOR)
+ return false;
+
+ regs->rip = regs->r11;
+ regs->r9 = regs->vector;
+ return true;
+}
+
void kvm_exit_unexpected_vector(uint32_t value)
{
ucall(UCALL_UNHANDLED, 1, value);
@@ -1330,6 +1107,9 @@ void route_exception(struct ex_regs *regs)
return;
}
+ if (kvm_fixup_exception(regs))
+ return;
+
kvm_exit_unexpected_vector(regs->vector);
}
@@ -1346,17 +1126,18 @@ void vm_init_descriptor_tables(struct kvm_vm *vm)
DEFAULT_CODE_SELECTOR);
}
-void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid)
+void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu)
{
+ struct kvm_vm *vm = vcpu->vm;
struct kvm_sregs sregs;
- vcpu_sregs_get(vm, vcpuid, &sregs);
+ vcpu_sregs_get(vcpu, &sregs);
sregs.idt.base = vm->idt;
sregs.idt.limit = NUM_INTERRUPTS * sizeof(struct idt_entry) - 1;
sregs.gdt.base = vm->gdt;
sregs.gdt.limit = getpagesize() - 1;
kvm_seg_set_kernel_data_64bit(NULL, DEFAULT_DATA_SELECTOR, &sregs.gs);
- vcpu_sregs_set(vm, vcpuid, &sregs);
+ vcpu_sregs_set(vcpu, &sregs);
*(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
}
@@ -1368,11 +1149,11 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector,
handlers[vector] = (vm_vaddr_t)handler;
}
-void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid)
+void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
{
struct ucall uc;
- if (get_ucall(vm, vcpuid, &uc) == UCALL_UNHANDLED) {
+ if (get_ucall(vcpu, &uc) == UCALL_UNHANDLED) {
uint64_t vector = uc.args[0];
TEST_FAIL("Unexpected vectored event in guest (vector:0x%lx)",
@@ -1380,16 +1161,15 @@ void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid)
}
}
-struct kvm_cpuid_entry2 *get_cpuid(struct kvm_cpuid2 *cpuid, uint32_t function,
- uint32_t index)
+const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid,
+ uint32_t function, uint32_t index)
{
int i;
for (i = 0; i < cpuid->nent; i++) {
- struct kvm_cpuid_entry2 *cur = &cpuid->entries[i];
-
- if (cur->function == function && cur->index == index)
- return cur;
+ if (cpuid->entries[i].function == function &&
+ cpuid->entries[i].index == index)
+ return &cpuid->entries[i];
}
TEST_FAIL("CPUID function 0x%x index 0x%x not found ", function, index);
@@ -1397,24 +1177,6 @@ struct kvm_cpuid_entry2 *get_cpuid(struct kvm_cpuid2 *cpuid, uint32_t function,
return NULL;
}
-bool set_cpuid(struct kvm_cpuid2 *cpuid,
- struct kvm_cpuid_entry2 *ent)
-{
- int i;
-
- for (i = 0; i < cpuid->nent; i++) {
- struct kvm_cpuid_entry2 *cur = &cpuid->entries[i];
-
- if (cur->function != ent->function || cur->index != ent->index)
- continue;
-
- memcpy(cur, ent, sizeof(struct kvm_cpuid_entry2));
- return true;
- }
-
- return false;
-}
-
uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
uint64_t a3)
{
@@ -1422,43 +1184,38 @@ uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
asm volatile("vmcall"
: "=a"(r)
- : "b"(a0), "c"(a1), "d"(a2), "S"(a3));
+ : "a"(nr), "b"(a0), "c"(a1), "d"(a2), "S"(a3));
return r;
}
-struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void)
+const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void)
{
static struct kvm_cpuid2 *cpuid;
- int ret;
int kvm_fd;
if (cpuid)
return cpuid;
- cpuid = allocate_kvm_cpuid2();
+ cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES);
kvm_fd = open_kvm_dev_path_or_exit();
- ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
- TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_HV_CPUID failed %d %d\n",
- ret, errno);
+ kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
close(kvm_fd);
return cpuid;
}
-void vcpu_set_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid)
+void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu)
{
static struct kvm_cpuid2 *cpuid_full;
- struct kvm_cpuid2 *cpuid_sys, *cpuid_hv;
+ const struct kvm_cpuid2 *cpuid_sys, *cpuid_hv;
int i, nent = 0;
if (!cpuid_full) {
cpuid_sys = kvm_get_supported_cpuid();
cpuid_hv = kvm_get_supported_hv_cpuid();
- cpuid_full = malloc(sizeof(*cpuid_full) +
- (cpuid_sys->nent + cpuid_hv->nent) *
- sizeof(struct kvm_cpuid_entry2));
+ cpuid_full = allocate_kvm_cpuid2(cpuid_sys->nent + cpuid_hv->nent);
if (!cpuid_full) {
perror("malloc");
abort();
@@ -1478,16 +1235,14 @@ void vcpu_set_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid)
cpuid_full->nent = nent + cpuid_hv->nent;
}
- vcpu_set_cpuid(vm, vcpuid, cpuid_full);
+ vcpu_init_cpuid(vcpu, cpuid_full);
}
-struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid)
+const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu)
{
- static struct kvm_cpuid2 *cpuid;
+ struct kvm_cpuid2 *cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES);
- cpuid = allocate_kvm_cpuid2();
-
- vcpu_ioctl(vm, vcpuid, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
+ vcpu_ioctl(vcpu, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
return cpuid;
}
@@ -1510,9 +1265,7 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm)
/* Before family 17h, the HyperTransport area is just below 1T. */
ht_gfn = (1 << 28) - num_ht_pages;
- eax = 1;
- ecx = 0;
- cpuid(&eax, &ebx, &ecx, &edx);
+ cpuid(1, &eax, &ebx, &ecx, &edx);
if (x86_family(eax) < 0x17)
goto done;
@@ -1521,18 +1274,15 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm)
* reduced due to SME by bits 11:6 of CPUID[0x8000001f].EBX. Use
* the old conservative value if MAXPHYADDR is not enumerated.
*/
- eax = 0x80000000;
- cpuid(&eax, &ebx, &ecx, &edx);
+ cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
max_ext_leaf = eax;
if (max_ext_leaf < 0x80000008)
goto done;
- eax = 0x80000008;
- cpuid(&eax, &ebx, &ecx, &edx);
+ cpuid(0x80000008, &eax, &ebx, &ecx, &edx);
max_pfn = (1ULL << ((eax & 0xff) - vm->page_shift)) - 1;
if (max_ext_leaf >= 0x8000001f) {
- eax = 0x8000001f;
- cpuid(&eax, &ebx, &ecx, &edx);
+ cpuid(0x8000001f, &eax, &ebx, &ecx, &edx);
max_pfn >>= (ebx >> 6) & 0x3f;
}
@@ -1540,3 +1290,24 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm)
done:
return min(max_gfn, ht_gfn - 1);
}
+
+/* Returns true if kvm_intel was loaded with unrestricted_guest=1. */
+bool vm_is_unrestricted_guest(struct kvm_vm *vm)
+{
+ char val = 'N';
+ size_t count;
+ FILE *f;
+
+ /* Ensure that a KVM vendor-specific module is loaded. */
+ if (vm == NULL)
+ close(open_kvm_dev_path_or_exit());
+
+ f = fopen("/sys/module/kvm_intel/parameters/unrestricted_guest", "r");
+ if (f) {
+ count = fread(&val, sizeof(char), 1, f);
+ TEST_ASSERT(count == 1, "Unable to read from param file.");
+ fclose(f);
+ }
+
+ return val == 'Y';
+}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/svm.c b/tools/testing/selftests/kvm/lib/x86_64/svm.c
index 736ee4a23df6..6d445886e16c 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/svm.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/svm.c
@@ -9,7 +9,6 @@
#include "test_util.h"
#include "kvm_util.h"
-#include "../kvm_util_internal.h"
#include "processor.h"
#include "svm_util.h"
@@ -165,22 +164,6 @@ void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa)
: "r15", "memory");
}
-bool nested_svm_supported(void)
-{
- struct kvm_cpuid_entry2 *entry =
- kvm_get_supported_cpuid_entry(0x80000001);
-
- return entry->ecx & CPUID_SVM;
-}
-
-void nested_svm_check_supported(void)
-{
- if (!nested_svm_supported()) {
- print_skip("nested SVM not enabled");
- exit(KSFT_SKIP);
- }
-}
-
/*
* Open SEV_DEV_PATH if available, otherwise exit the entire program.
*
diff --git a/tools/testing/selftests/kvm/lib/x86_64/ucall.c b/tools/testing/selftests/kvm/lib/x86_64/ucall.c
index a3489973e290..e5f0f9e0d3ee 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/ucall.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/ucall.c
@@ -24,7 +24,7 @@ void ucall(uint64_t cmd, int nargs, ...)
va_list va;
int i;
- nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS;
+ nargs = min(nargs, UCALL_MAX_ARGS);
va_start(va, nargs);
for (i = 0; i < nargs; ++i)
@@ -35,9 +35,9 @@ void ucall(uint64_t cmd, int nargs, ...)
: : [port] "d" (UCALL_PIO_PORT), "D" (&uc) : "rax", "memory");
}
-uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
+uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc)
{
- struct kvm_run *run = vcpu_state(vm, vcpu_id);
+ struct kvm_run *run = vcpu->run;
struct ucall ucall = {};
if (uc)
@@ -46,11 +46,11 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
if (run->exit_reason == KVM_EXIT_IO && run->io.port == UCALL_PIO_PORT) {
struct kvm_regs regs;
- vcpu_regs_get(vm, vcpu_id, &regs);
- memcpy(&ucall, addr_gva2hva(vm, (vm_vaddr_t)regs.rdi),
+ vcpu_regs_get(vcpu, &regs);
+ memcpy(&ucall, addr_gva2hva(vcpu->vm, (vm_vaddr_t)regs.rdi),
sizeof(ucall));
- vcpu_run_complete_io(vm, vcpu_id);
+ vcpu_run_complete_io(vcpu);
if (uc)
memcpy(uc, &ucall, sizeof(ucall));
}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
index b77a01d0a271..80a568c439b8 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
@@ -7,7 +7,6 @@
#include "test_util.h"
#include "kvm_util.h"
-#include "../kvm_util_internal.h"
#include "processor.h"
#include "vmx.h"
@@ -43,16 +42,12 @@ struct eptPageTablePointer {
uint64_t address:40;
uint64_t reserved_63_52:12;
};
-int vcpu_enable_evmcs(struct kvm_vm *vm, int vcpu_id)
+int vcpu_enable_evmcs(struct kvm_vcpu *vcpu)
{
uint16_t evmcs_ver;
- struct kvm_enable_cap enable_evmcs_cap = {
- .cap = KVM_CAP_HYPERV_ENLIGHTENED_VMCS,
- .args[0] = (unsigned long)&evmcs_ver
- };
-
- vcpu_ioctl(vm, vcpu_id, KVM_ENABLE_CAP, &enable_evmcs_cap);
+ vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_ENLIGHTENED_VMCS,
+ (unsigned long)&evmcs_ver);
/* KVM should return supported EVMCS version range */
TEST_ASSERT(((evmcs_ver >> 8) >= (evmcs_ver & 0xff)) &&
@@ -387,21 +382,6 @@ void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp)
init_vmcs_guest_state(guest_rip, guest_rsp);
}
-bool nested_vmx_supported(void)
-{
- struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
-
- return entry->ecx & CPUID_VMX;
-}
-
-void nested_vmx_check_supported(void)
-{
- if (!nested_vmx_supported()) {
- print_skip("nested VMX not enabled");
- exit(KSFT_SKIP);
- }
-}
-
static void nested_create_pte(struct kvm_vm *vm,
struct eptPageTableEntry *pte,
uint64_t nested_paddr,
diff --git a/tools/testing/selftests/kvm/max_guest_memory_test.c b/tools/testing/selftests/kvm/max_guest_memory_test.c
index 15f046e19cb2..9a6e4f3ad6b5 100644
--- a/tools/testing/selftests/kvm/max_guest_memory_test.c
+++ b/tools/testing/selftests/kvm/max_guest_memory_test.c
@@ -28,8 +28,7 @@ static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride)
}
struct vcpu_info {
- struct kvm_vm *vm;
- uint32_t id;
+ struct kvm_vcpu *vcpu;
uint64_t start_gpa;
uint64_t end_gpa;
};
@@ -52,45 +51,45 @@ static void rendezvous_with_boss(void)
}
}
-static void run_vcpu(struct kvm_vm *vm, uint32_t vcpu_id)
+static void run_vcpu(struct kvm_vcpu *vcpu)
{
- vcpu_run(vm, vcpu_id);
- ASSERT_EQ(get_ucall(vm, vcpu_id, NULL), UCALL_DONE);
+ vcpu_run(vcpu);
+ ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
}
static void *vcpu_worker(void *data)
{
- struct vcpu_info *vcpu = data;
+ struct vcpu_info *info = data;
+ struct kvm_vcpu *vcpu = info->vcpu;
struct kvm_vm *vm = vcpu->vm;
struct kvm_sregs sregs;
struct kvm_regs regs;
- vcpu_args_set(vm, vcpu->id, 3, vcpu->start_gpa, vcpu->end_gpa,
- vm_get_page_size(vm));
+ vcpu_args_set(vcpu, 3, info->start_gpa, info->end_gpa, vm->page_size);
/* Snapshot regs before the first run. */
- vcpu_regs_get(vm, vcpu->id, &regs);
+ vcpu_regs_get(vcpu, &regs);
rendezvous_with_boss();
- run_vcpu(vm, vcpu->id);
+ run_vcpu(vcpu);
rendezvous_with_boss();
- vcpu_regs_set(vm, vcpu->id, &regs);
- vcpu_sregs_get(vm, vcpu->id, &sregs);
+ vcpu_regs_set(vcpu, &regs);
+ vcpu_sregs_get(vcpu, &sregs);
#ifdef __x86_64__
/* Toggle CR0.WP to trigger a MMU context reset. */
sregs.cr0 ^= X86_CR0_WP;
#endif
- vcpu_sregs_set(vm, vcpu->id, &sregs);
+ vcpu_sregs_set(vcpu, &sregs);
rendezvous_with_boss();
- run_vcpu(vm, vcpu->id);
+ run_vcpu(vcpu);
rendezvous_with_boss();
return NULL;
}
-static pthread_t *spawn_workers(struct kvm_vm *vm, uint64_t start_gpa,
- uint64_t end_gpa)
+static pthread_t *spawn_workers(struct kvm_vm *vm, struct kvm_vcpu **vcpus,
+ uint64_t start_gpa, uint64_t end_gpa)
{
struct vcpu_info *info;
uint64_t gpa, nr_bytes;
@@ -104,12 +103,11 @@ static pthread_t *spawn_workers(struct kvm_vm *vm, uint64_t start_gpa,
TEST_ASSERT(info, "Failed to allocate vCPU gpa ranges");
nr_bytes = ((end_gpa - start_gpa) / nr_vcpus) &
- ~((uint64_t)vm_get_page_size(vm) - 1);
+ ~((uint64_t)vm->page_size - 1);
TEST_ASSERT(nr_bytes, "C'mon, no way you have %d CPUs", nr_vcpus);
for (i = 0, gpa = start_gpa; i < nr_vcpus; i++, gpa += nr_bytes) {
- info[i].vm = vm;
- info[i].id = i;
+ info[i].vcpu = vcpus[i];
info[i].start_gpa = gpa;
info[i].end_gpa = gpa + nr_bytes;
pthread_create(&threads[i], NULL, vcpu_worker, &info[i]);
@@ -172,6 +170,7 @@ int main(int argc, char *argv[])
uint64_t max_gpa, gpa, slot_size, max_mem, i;
int max_slots, slot, opt, fd;
bool hugepages = false;
+ struct kvm_vcpu **vcpus;
pthread_t *threads;
struct kvm_vm *vm;
void *mem;
@@ -215,9 +214,12 @@ int main(int argc, char *argv[])
}
}
- vm = vm_create_default_with_vcpus(nr_vcpus, 0, 0, guest_code, NULL);
+ vcpus = malloc(nr_vcpus * sizeof(*vcpus));
+ TEST_ASSERT(vcpus, "Failed to allocate vCPU array");
+
+ vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus);
- max_gpa = vm_get_max_gfn(vm) << vm_get_page_shift(vm);
+ max_gpa = vm->max_gfn << vm->page_shift;
TEST_ASSERT(max_gpa > (4 * slot_size), "MAXPHYADDR <4gb ");
fd = kvm_memfd_alloc(slot_size, hugepages);
@@ -227,7 +229,7 @@ int main(int argc, char *argv[])
TEST_ASSERT(!madvise(mem, slot_size, MADV_NOHUGEPAGE), "madvise() failed");
/* Pre-fault the memory to avoid taking mmap_sem on guest page faults. */
- for (i = 0; i < slot_size; i += vm_get_page_size(vm))
+ for (i = 0; i < slot_size; i += vm->page_size)
((uint8_t *)mem)[i] = 0xaa;
gpa = 0;
@@ -246,13 +248,16 @@ int main(int argc, char *argv[])
for (i = 0; i < slot_size; i += size_1gb)
__virt_pg_map(vm, gpa + i, gpa + i, PG_LEVEL_1G);
#else
- for (i = 0; i < slot_size; i += vm_get_page_size(vm))
+ for (i = 0; i < slot_size; i += vm->page_size)
virt_pg_map(vm, gpa + i, gpa + i);
#endif
}
atomic_set(&rendezvous, nr_vcpus + 1);
- threads = spawn_workers(vm, start_gpa, gpa);
+ threads = spawn_workers(vm, vcpus, start_gpa, gpa);
+
+ free(vcpus);
+ vcpus = NULL;
pr_info("Running with %lugb of guest memory and %u vCPUs\n",
(gpa - start_gpa) / size_1gb, nr_vcpus);
diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
index 1410d0a9141a..6ee7e1dde404 100644
--- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c
+++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
@@ -38,19 +38,18 @@ static bool run_vcpus = true;
static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args)
{
- int ret;
- int vcpu_id = vcpu_args->vcpu_id;
- struct kvm_vm *vm = perf_test_args.vm;
+ struct kvm_vcpu *vcpu = vcpu_args->vcpu;
struct kvm_run *run;
+ int ret;
- run = vcpu_state(vm, vcpu_id);
+ run = vcpu->run;
/* Let the guest access its memory until a stop signal is received */
while (READ_ONCE(run_vcpus)) {
- ret = _vcpu_run(vm, vcpu_id);
+ ret = _vcpu_run(vcpu);
TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret);
- if (get_ucall(vm, vcpu_id, NULL) == UCALL_SYNC)
+ if (get_ucall(vcpu, NULL) == UCALL_SYNC)
continue;
TEST_ASSERT(false,
@@ -76,7 +75,7 @@ static void add_remove_memslot(struct kvm_vm *vm, useconds_t delay,
* Add the dummy memslot just below the perf_test_util memslot, which is
* at the top of the guest physical address space.
*/
- gpa = perf_test_args.gpa - pages * vm_get_page_size(vm);
+ gpa = perf_test_args.gpa - pages * vm->page_size;
for (i = 0; i < nr_modifications; i++) {
usleep(delay);
diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c
index 1727f75e0c2c..44995446d942 100644
--- a/tools/testing/selftests/kvm/memslot_perf_test.c
+++ b/tools/testing/selftests/kvm/memslot_perf_test.c
@@ -25,8 +25,6 @@
#include <kvm_util.h>
#include <processor.h>
-#define VCPU_ID 0
-
#define MEM_SIZE ((512U << 20) + 4096)
#define MEM_SIZE_PAGES (MEM_SIZE / 4096)
#define MEM_GPA 0x10000000UL
@@ -90,6 +88,7 @@ static_assert(MEM_TEST_MOVE_SIZE <= MEM_TEST_SIZE,
struct vm_data {
struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
pthread_t vcpu_thread;
uint32_t nslots;
uint64_t npages;
@@ -127,29 +126,29 @@ static bool verbose;
pr_info(__VA_ARGS__); \
} while (0)
-static void check_mmio_access(struct vm_data *vm, struct kvm_run *run)
+static void check_mmio_access(struct vm_data *data, struct kvm_run *run)
{
- TEST_ASSERT(vm->mmio_ok, "Unexpected mmio exit");
+ TEST_ASSERT(data->mmio_ok, "Unexpected mmio exit");
TEST_ASSERT(run->mmio.is_write, "Unexpected mmio read");
TEST_ASSERT(run->mmio.len == 8,
"Unexpected exit mmio size = %u", run->mmio.len);
- TEST_ASSERT(run->mmio.phys_addr >= vm->mmio_gpa_min &&
- run->mmio.phys_addr <= vm->mmio_gpa_max,
+ TEST_ASSERT(run->mmio.phys_addr >= data->mmio_gpa_min &&
+ run->mmio.phys_addr <= data->mmio_gpa_max,
"Unexpected exit mmio address = 0x%llx",
run->mmio.phys_addr);
}
-static void *vcpu_worker(void *data)
+static void *vcpu_worker(void *__data)
{
- struct vm_data *vm = data;
- struct kvm_run *run;
+ struct vm_data *data = __data;
+ struct kvm_vcpu *vcpu = data->vcpu;
+ struct kvm_run *run = vcpu->run;
struct ucall uc;
- run = vcpu_state(vm->vm, VCPU_ID);
while (1) {
- vcpu_run(vm->vm, VCPU_ID);
+ vcpu_run(vcpu);
- switch (get_ucall(vm->vm, VCPU_ID, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_SYNC:
TEST_ASSERT(uc.args[1] == 0,
"Unexpected sync ucall, got %lx",
@@ -158,14 +157,12 @@ static void *vcpu_worker(void *data)
continue;
case UCALL_NONE:
if (run->exit_reason == KVM_EXIT_MMIO)
- check_mmio_access(vm, run);
+ check_mmio_access(data, run);
else
goto done;
break;
case UCALL_ABORT:
- TEST_FAIL("%s at %s:%ld, val = %lu",
- (const char *)uc.args[0],
- __FILE__, uc.args[1], uc.args[2]);
+ REPORT_GUEST_ASSERT_1(uc, "val = %lu");
break;
case UCALL_DONE:
goto done;
@@ -238,6 +235,7 @@ static struct vm_data *alloc_vm(void)
TEST_ASSERT(data, "malloc(vmdata) failed");
data->vm = NULL;
+ data->vcpu = NULL;
data->hva_slots = NULL;
return data;
@@ -278,7 +276,7 @@ static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots,
data->hva_slots = malloc(sizeof(*data->hva_slots) * data->nslots);
TEST_ASSERT(data->hva_slots, "malloc() fail");
- data->vm = vm_create_default(VCPU_ID, mempages, guest_code);
+ data->vm = __vm_create_with_one_vcpu(&data->vcpu, mempages, guest_code);
ucall_init(data->vm, NULL);
pr_info_v("Adding slots 1..%i, each slot with %"PRIu64" pages + %"PRIu64" extra pages last\n",
diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c
index 4158da0da2bb..a54d4d05a058 100644
--- a/tools/testing/selftests/kvm/rseq_test.c
+++ b/tools/testing/selftests/kvm/rseq_test.c
@@ -20,8 +20,6 @@
#include "processor.h"
#include "test_util.h"
-#define VCPU_ID 0
-
static __thread volatile struct rseq __rseq = {
.cpu_id = RSEQ_CPU_ID_UNINITIALIZED,
};
@@ -82,8 +80,9 @@ static int next_cpu(int cpu)
return cpu;
}
-static void *migration_worker(void *ign)
+static void *migration_worker(void *__rseq_tid)
{
+ pid_t rseq_tid = (pid_t)(unsigned long)__rseq_tid;
cpu_set_t allowed_mask;
int r, i, cpu;
@@ -106,7 +105,7 @@ static void *migration_worker(void *ign)
* stable, i.e. while changing affinity is in-progress.
*/
smp_wmb();
- r = sched_setaffinity(0, sizeof(allowed_mask), &allowed_mask);
+ r = sched_setaffinity(rseq_tid, sizeof(allowed_mask), &allowed_mask);
TEST_ASSERT(!r, "sched_setaffinity failed, errno = %d (%s)",
errno, strerror(errno));
smp_wmb();
@@ -173,12 +172,11 @@ static void *migration_worker(void *ign)
return NULL;
}
-static int calc_min_max_cpu(void)
+static void calc_min_max_cpu(void)
{
int i, cnt, nproc;
- if (CPU_COUNT(&possible_mask) < 2)
- return -EINVAL;
+ TEST_REQUIRE(CPU_COUNT(&possible_mask) >= 2);
/*
* CPU_SET doesn't provide a FOR_EACH helper, get the min/max CPU that
@@ -200,13 +198,15 @@ static int calc_min_max_cpu(void)
cnt++;
}
- return (cnt < 2) ? -EINVAL : 0;
+ __TEST_REQUIRE(cnt >= 2,
+ "Only one usable CPU, task migration not possible");
}
int main(int argc, char *argv[])
{
int r, i, snapshot;
struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
u32 cpu, rseq_cpu;
/* Tell stdout not to buffer its content */
@@ -216,10 +216,7 @@ int main(int argc, char *argv[])
TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)", errno,
strerror(errno));
- if (calc_min_max_cpu()) {
- print_skip("Only one usable CPU, task migration not possible");
- exit(KSFT_SKIP);
- }
+ calc_min_max_cpu();
sys_rseq(0);
@@ -228,14 +225,15 @@ int main(int argc, char *argv[])
* GUEST_SYNC, while concurrently migrating the process by setting its
* CPU affinity.
*/
- vm = vm_create_default(VCPU_ID, 0, guest_code);
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
ucall_init(vm, NULL);
- pthread_create(&migration_thread, NULL, migration_worker, 0);
+ pthread_create(&migration_thread, NULL, migration_worker,
+ (void *)(unsigned long)gettid());
for (i = 0; !done; i++) {
- vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC,
+ vcpu_run(vcpu);
+ TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC,
"Guest failed?");
/*
diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c
index 49f26f544127..9113696d5178 100644
--- a/tools/testing/selftests/kvm/s390x/memop.c
+++ b/tools/testing/selftests/kvm/s390x/memop.c
@@ -14,6 +14,7 @@
#include "test_util.h"
#include "kvm_util.h"
+#include "kselftest.h"
enum mop_target {
LOGICAL,
@@ -98,21 +99,18 @@ static struct kvm_s390_mem_op ksmo_from_desc(struct mop_desc desc)
return ksmo;
}
-/* vcpu dummy id signifying that vm instead of vcpu ioctl is to occur */
-const uint32_t VM_VCPU_ID = (uint32_t)-1;
-
-struct test_vcpu {
+struct test_info {
struct kvm_vm *vm;
- uint32_t id;
+ struct kvm_vcpu *vcpu;
};
#define PRINT_MEMOP false
-static void print_memop(uint32_t vcpu_id, const struct kvm_s390_mem_op *ksmo)
+static void print_memop(struct kvm_vcpu *vcpu, const struct kvm_s390_mem_op *ksmo)
{
if (!PRINT_MEMOP)
return;
- if (vcpu_id == VM_VCPU_ID)
+ if (!vcpu)
printf("vm memop(");
else
printf("vcpu memop(");
@@ -147,25 +145,29 @@ static void print_memop(uint32_t vcpu_id, const struct kvm_s390_mem_op *ksmo)
puts(")");
}
-static void memop_ioctl(struct test_vcpu vcpu, struct kvm_s390_mem_op *ksmo)
+static void memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo)
{
- if (vcpu.id == VM_VCPU_ID)
- vm_ioctl(vcpu.vm, KVM_S390_MEM_OP, ksmo);
+ struct kvm_vcpu *vcpu = info.vcpu;
+
+ if (!vcpu)
+ vm_ioctl(info.vm, KVM_S390_MEM_OP, ksmo);
else
- vcpu_ioctl(vcpu.vm, vcpu.id, KVM_S390_MEM_OP, ksmo);
+ vcpu_ioctl(vcpu, KVM_S390_MEM_OP, ksmo);
}
-static int err_memop_ioctl(struct test_vcpu vcpu, struct kvm_s390_mem_op *ksmo)
+static int err_memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo)
{
- if (vcpu.id == VM_VCPU_ID)
- return _vm_ioctl(vcpu.vm, KVM_S390_MEM_OP, ksmo);
+ struct kvm_vcpu *vcpu = info.vcpu;
+
+ if (!vcpu)
+ return __vm_ioctl(info.vm, KVM_S390_MEM_OP, ksmo);
else
- return _vcpu_ioctl(vcpu.vm, vcpu.id, KVM_S390_MEM_OP, ksmo);
+ return __vcpu_ioctl(vcpu, KVM_S390_MEM_OP, ksmo);
}
-#define MEMOP(err, vcpu_p, mop_target_p, access_mode_p, buf_p, size_p, ...) \
+#define MEMOP(err, info_p, mop_target_p, access_mode_p, buf_p, size_p, ...) \
({ \
- struct test_vcpu __vcpu = (vcpu_p); \
+ struct test_info __info = (info_p); \
struct mop_desc __desc = { \
.target = (mop_target_p), \
.mode = (access_mode_p), \
@@ -177,13 +179,13 @@ static int err_memop_ioctl(struct test_vcpu vcpu, struct kvm_s390_mem_op *ksmo)
\
if (__desc._gaddr_v) { \
if (__desc.target == ABSOLUTE) \
- __desc.gaddr = addr_gva2gpa(__vcpu.vm, __desc.gaddr_v); \
+ __desc.gaddr = addr_gva2gpa(__info.vm, __desc.gaddr_v); \
else \
__desc.gaddr = __desc.gaddr_v; \
} \
__ksmo = ksmo_from_desc(__desc); \
- print_memop(__vcpu.id, &__ksmo); \
- err##memop_ioctl(__vcpu, &__ksmo); \
+ print_memop(__info.vcpu, &__ksmo); \
+ err##memop_ioctl(__info, &__ksmo); \
})
#define MOP(...) MEMOP(, __VA_ARGS__)
@@ -200,7 +202,6 @@ static int err_memop_ioctl(struct test_vcpu vcpu, struct kvm_s390_mem_op *ksmo)
#define CHECK_N_DO(f, ...) ({ f(__VA_ARGS__, CHECK_ONLY); f(__VA_ARGS__); })
-#define VCPU_ID 1
#define PAGE_SHIFT 12
#define PAGE_SIZE (1ULL << PAGE_SHIFT)
#define PAGE_MASK (~(PAGE_SIZE - 1))
@@ -212,21 +213,22 @@ static uint8_t mem2[65536];
struct test_default {
struct kvm_vm *kvm_vm;
- struct test_vcpu vm;
- struct test_vcpu vcpu;
+ struct test_info vm;
+ struct test_info vcpu;
struct kvm_run *run;
int size;
};
static struct test_default test_default_init(void *guest_code)
{
+ struct kvm_vcpu *vcpu;
struct test_default t;
t.size = min((size_t)kvm_check_cap(KVM_CAP_S390_MEM_OP), sizeof(mem1));
- t.kvm_vm = vm_create_default(VCPU_ID, 0, guest_code);
- t.vm = (struct test_vcpu) { t.kvm_vm, VM_VCPU_ID };
- t.vcpu = (struct test_vcpu) { t.kvm_vm, VCPU_ID };
- t.run = vcpu_state(t.kvm_vm, VCPU_ID);
+ t.kvm_vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ t.vm = (struct test_info) { t.kvm_vm, NULL };
+ t.vcpu = (struct test_info) { t.kvm_vm, vcpu };
+ t.run = vcpu->run;
return t;
}
@@ -241,14 +243,15 @@ enum stage {
STAGE_COPIED,
};
-#define HOST_SYNC(vcpu_p, stage) \
+#define HOST_SYNC(info_p, stage) \
({ \
- struct test_vcpu __vcpu = (vcpu_p); \
+ struct test_info __info = (info_p); \
+ struct kvm_vcpu *__vcpu = __info.vcpu; \
struct ucall uc; \
int __stage = (stage); \
\
- vcpu_run(__vcpu.vm, __vcpu.id); \
- get_ucall(__vcpu.vm, __vcpu.id, &uc); \
+ vcpu_run(__vcpu); \
+ get_ucall(__vcpu, &uc); \
ASSERT_EQ(uc.cmd, UCALL_SYNC); \
ASSERT_EQ(uc.args[1], __stage); \
}) \
@@ -267,7 +270,7 @@ static void prepare_mem12(void)
#define DEFAULT_WRITE_READ(copy_cpu, mop_cpu, mop_target_p, size, ...) \
({ \
- struct test_vcpu __copy_cpu = (copy_cpu), __mop_cpu = (mop_cpu); \
+ struct test_info __copy_cpu = (copy_cpu), __mop_cpu = (mop_cpu); \
enum mop_target __target = (mop_target_p); \
uint32_t __size = (size); \
\
@@ -282,7 +285,7 @@ static void prepare_mem12(void)
#define DEFAULT_READ(copy_cpu, mop_cpu, mop_target_p, size, ...) \
({ \
- struct test_vcpu __copy_cpu = (copy_cpu), __mop_cpu = (mop_cpu); \
+ struct test_info __copy_cpu = (copy_cpu), __mop_cpu = (mop_cpu); \
enum mop_target __target = (mop_target_p); \
uint32_t __size = (size); \
\
@@ -623,34 +626,34 @@ static void guest_idle(void)
GUEST_SYNC(STAGE_IDLED);
}
-static void _test_errors_common(struct test_vcpu vcpu, enum mop_target target, int size)
+static void _test_errors_common(struct test_info info, enum mop_target target, int size)
{
int rv;
/* Bad size: */
- rv = ERR_MOP(vcpu, target, WRITE, mem1, -1, GADDR_V(mem1));
+ rv = ERR_MOP(info, target, WRITE, mem1, -1, GADDR_V(mem1));
TEST_ASSERT(rv == -1 && errno == E2BIG, "ioctl allows insane sizes");
/* Zero size: */
- rv = ERR_MOP(vcpu, target, WRITE, mem1, 0, GADDR_V(mem1));
+ rv = ERR_MOP(info, target, WRITE, mem1, 0, GADDR_V(mem1));
TEST_ASSERT(rv == -1 && (errno == EINVAL || errno == ENOMEM),
"ioctl allows 0 as size");
/* Bad flags: */
- rv = ERR_MOP(vcpu, target, WRITE, mem1, size, GADDR_V(mem1), SET_FLAGS(-1));
+ rv = ERR_MOP(info, target, WRITE, mem1, size, GADDR_V(mem1), SET_FLAGS(-1));
TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows all flags");
/* Bad guest address: */
- rv = ERR_MOP(vcpu, target, WRITE, mem1, size, GADDR((void *)~0xfffUL), CHECK_ONLY);
+ rv = ERR_MOP(info, target, WRITE, mem1, size, GADDR((void *)~0xfffUL), CHECK_ONLY);
TEST_ASSERT(rv > 0, "ioctl does not report bad guest memory access");
/* Bad host address: */
- rv = ERR_MOP(vcpu, target, WRITE, 0, size, GADDR_V(mem1));
+ rv = ERR_MOP(info, target, WRITE, 0, size, GADDR_V(mem1));
TEST_ASSERT(rv == -1 && errno == EFAULT,
"ioctl does not report bad host memory address");
/* Bad key: */
- rv = ERR_MOP(vcpu, target, WRITE, mem1, size, GADDR_V(mem1), KEY(17));
+ rv = ERR_MOP(info, target, WRITE, mem1, size, GADDR_V(mem1), KEY(17));
TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows invalid key");
}
@@ -691,34 +694,89 @@ static void test_errors(void)
kvm_vm_free(t.kvm_vm);
}
+struct testdef {
+ const char *name;
+ void (*test)(void);
+ int extension;
+} testlist[] = {
+ {
+ .name = "simple copy",
+ .test = test_copy,
+ },
+ {
+ .name = "generic error checks",
+ .test = test_errors,
+ },
+ {
+ .name = "copy with storage keys",
+ .test = test_copy_key,
+ .extension = 1,
+ },
+ {
+ .name = "copy with key storage protection override",
+ .test = test_copy_key_storage_prot_override,
+ .extension = 1,
+ },
+ {
+ .name = "copy with key fetch protection",
+ .test = test_copy_key_fetch_prot,
+ .extension = 1,
+ },
+ {
+ .name = "copy with key fetch protection override",
+ .test = test_copy_key_fetch_prot_override,
+ .extension = 1,
+ },
+ {
+ .name = "error checks with key",
+ .test = test_errors_key,
+ .extension = 1,
+ },
+ {
+ .name = "termination",
+ .test = test_termination,
+ .extension = 1,
+ },
+ {
+ .name = "error checks with key storage protection override",
+ .test = test_errors_key_storage_prot_override,
+ .extension = 1,
+ },
+ {
+ .name = "error checks without key fetch prot override",
+ .test = test_errors_key_fetch_prot_override_not_enabled,
+ .extension = 1,
+ },
+ {
+ .name = "error checks with key fetch prot override",
+ .test = test_errors_key_fetch_prot_override_enabled,
+ .extension = 1,
+ },
+};
+
int main(int argc, char *argv[])
{
- int memop_cap, extension_cap;
+ int extension_cap, idx;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_MEM_OP));
setbuf(stdout, NULL); /* Tell stdout not to buffer its content */
- memop_cap = kvm_check_cap(KVM_CAP_S390_MEM_OP);
- extension_cap = kvm_check_cap(KVM_CAP_S390_MEM_OP_EXTENSION);
- if (!memop_cap) {
- print_skip("CAP_S390_MEM_OP not supported");
- exit(KSFT_SKIP);
- }
+ ksft_print_header();
- test_copy();
- if (extension_cap > 0) {
- test_copy_key();
- test_copy_key_storage_prot_override();
- test_copy_key_fetch_prot();
- test_copy_key_fetch_prot_override();
- test_errors_key();
- test_termination();
- test_errors_key_storage_prot_override();
- test_errors_key_fetch_prot_override_not_enabled();
- test_errors_key_fetch_prot_override_enabled();
- } else {
- print_skip("storage key memop extension not supported");
+ ksft_set_plan(ARRAY_SIZE(testlist));
+
+ extension_cap = kvm_check_cap(KVM_CAP_S390_MEM_OP_EXTENSION);
+ for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
+ if (extension_cap >= testlist[idx].extension) {
+ testlist[idx].test();
+ ksft_test_result_pass("%s\n", testlist[idx].name);
+ } else {
+ ksft_test_result_skip("%s - extension level %d not supported\n",
+ testlist[idx].name,
+ testlist[idx].extension);
+ }
}
- test_errors();
- return 0;
+ ksft_finished(); /* Print results and exit() accordingly */
}
diff --git a/tools/testing/selftests/kvm/s390x/resets.c b/tools/testing/selftests/kvm/s390x/resets.c
index b143db6d8693..19486084eb30 100644
--- a/tools/testing/selftests/kvm/s390x/resets.c
+++ b/tools/testing/selftests/kvm/s390x/resets.c
@@ -12,15 +12,14 @@
#include "test_util.h"
#include "kvm_util.h"
+#include "kselftest.h"
-#define VCPU_ID 3
#define LOCAL_IRQS 32
-struct kvm_s390_irq buf[VCPU_ID + LOCAL_IRQS];
+#define ARBITRARY_NON_ZERO_VCPU_ID 3
+
+struct kvm_s390_irq buf[ARBITRARY_NON_ZERO_VCPU_ID + LOCAL_IRQS];
-struct kvm_vm *vm;
-struct kvm_run *run;
-struct kvm_sync_regs *sync_regs;
static uint8_t regs_null[512];
static void guest_code_initial(void)
@@ -58,25 +57,22 @@ static void guest_code_initial(void)
);
}
-static void test_one_reg(uint64_t id, uint64_t value)
+static void test_one_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t value)
{
- struct kvm_one_reg reg;
uint64_t eval_reg;
- reg.addr = (uintptr_t)&eval_reg;
- reg.id = id;
- vcpu_get_reg(vm, VCPU_ID, &reg);
+ vcpu_get_reg(vcpu, id, &eval_reg);
TEST_ASSERT(eval_reg == value, "value == 0x%lx", value);
}
-static void assert_noirq(void)
+static void assert_noirq(struct kvm_vcpu *vcpu)
{
struct kvm_s390_irq_state irq_state;
int irqs;
irq_state.len = sizeof(buf);
irq_state.buf = (unsigned long)buf;
- irqs = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_GET_IRQ_STATE, &irq_state);
+ irqs = __vcpu_ioctl(vcpu, KVM_S390_GET_IRQ_STATE, &irq_state);
/*
* irqs contains the number of retrieved interrupts. Any interrupt
* (notably, the emergency call interrupt we have injected) should
@@ -86,19 +82,20 @@ static void assert_noirq(void)
TEST_ASSERT(!irqs, "IRQ pending");
}
-static void assert_clear(void)
+static void assert_clear(struct kvm_vcpu *vcpu)
{
+ struct kvm_sync_regs *sync_regs = &vcpu->run->s.regs;
struct kvm_sregs sregs;
struct kvm_regs regs;
struct kvm_fpu fpu;
- vcpu_regs_get(vm, VCPU_ID, &regs);
+ vcpu_regs_get(vcpu, &regs);
TEST_ASSERT(!memcmp(&regs.gprs, regs_null, sizeof(regs.gprs)), "grs == 0");
- vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ vcpu_sregs_get(vcpu, &sregs);
TEST_ASSERT(!memcmp(&sregs.acrs, regs_null, sizeof(sregs.acrs)), "acrs == 0");
- vcpu_fpu_get(vm, VCPU_ID, &fpu);
+ vcpu_fpu_get(vcpu, &fpu);
TEST_ASSERT(!memcmp(&fpu.fprs, regs_null, sizeof(fpu.fprs)), "fprs == 0");
/* sync regs */
@@ -112,8 +109,10 @@ static void assert_clear(void)
"vrs0-15 == 0 (sync_regs)");
}
-static void assert_initial_noclear(void)
+static void assert_initial_noclear(struct kvm_vcpu *vcpu)
{
+ struct kvm_sync_regs *sync_regs = &vcpu->run->s.regs;
+
TEST_ASSERT(sync_regs->gprs[0] == 0xffff000000000000UL,
"gpr0 == 0xffff000000000000 (sync_regs)");
TEST_ASSERT(sync_regs->gprs[1] == 0x0000555500000000UL,
@@ -127,13 +126,14 @@ static void assert_initial_noclear(void)
TEST_ASSERT(sync_regs->acrs[9] == 1, "ar9 == 1 (sync_regs)");
}
-static void assert_initial(void)
+static void assert_initial(struct kvm_vcpu *vcpu)
{
+ struct kvm_sync_regs *sync_regs = &vcpu->run->s.regs;
struct kvm_sregs sregs;
struct kvm_fpu fpu;
/* KVM_GET_SREGS */
- vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ vcpu_sregs_get(vcpu, &sregs);
TEST_ASSERT(sregs.crs[0] == 0xE0UL, "cr0 == 0xE0 (KVM_GET_SREGS)");
TEST_ASSERT(sregs.crs[14] == 0xC2000000UL,
"cr14 == 0xC2000000 (KVM_GET_SREGS)");
@@ -156,36 +156,38 @@ static void assert_initial(void)
TEST_ASSERT(sync_regs->gbea == 1, "gbea == 1 (sync_regs)");
/* kvm_run */
- TEST_ASSERT(run->psw_addr == 0, "psw_addr == 0 (kvm_run)");
- TEST_ASSERT(run->psw_mask == 0, "psw_mask == 0 (kvm_run)");
+ TEST_ASSERT(vcpu->run->psw_addr == 0, "psw_addr == 0 (kvm_run)");
+ TEST_ASSERT(vcpu->run->psw_mask == 0, "psw_mask == 0 (kvm_run)");
- vcpu_fpu_get(vm, VCPU_ID, &fpu);
+ vcpu_fpu_get(vcpu, &fpu);
TEST_ASSERT(!fpu.fpc, "fpc == 0");
- test_one_reg(KVM_REG_S390_GBEA, 1);
- test_one_reg(KVM_REG_S390_PP, 0);
- test_one_reg(KVM_REG_S390_TODPR, 0);
- test_one_reg(KVM_REG_S390_CPU_TIMER, 0);
- test_one_reg(KVM_REG_S390_CLOCK_COMP, 0);
+ test_one_reg(vcpu, KVM_REG_S390_GBEA, 1);
+ test_one_reg(vcpu, KVM_REG_S390_PP, 0);
+ test_one_reg(vcpu, KVM_REG_S390_TODPR, 0);
+ test_one_reg(vcpu, KVM_REG_S390_CPU_TIMER, 0);
+ test_one_reg(vcpu, KVM_REG_S390_CLOCK_COMP, 0);
}
-static void assert_normal_noclear(void)
+static void assert_normal_noclear(struct kvm_vcpu *vcpu)
{
+ struct kvm_sync_regs *sync_regs = &vcpu->run->s.regs;
+
TEST_ASSERT(sync_regs->crs[2] == 0x10, "cr2 == 10 (sync_regs)");
TEST_ASSERT(sync_regs->crs[8] == 1, "cr10 == 1 (sync_regs)");
TEST_ASSERT(sync_regs->crs[10] == 1, "cr10 == 1 (sync_regs)");
TEST_ASSERT(sync_regs->crs[11] == -1, "cr11 == -1 (sync_regs)");
}
-static void assert_normal(void)
+static void assert_normal(struct kvm_vcpu *vcpu)
{
- test_one_reg(KVM_REG_S390_PFTOKEN, KVM_S390_PFAULT_TOKEN_INVALID);
- TEST_ASSERT(sync_regs->pft == KVM_S390_PFAULT_TOKEN_INVALID,
+ test_one_reg(vcpu, KVM_REG_S390_PFTOKEN, KVM_S390_PFAULT_TOKEN_INVALID);
+ TEST_ASSERT(vcpu->run->s.regs.pft == KVM_S390_PFAULT_TOKEN_INVALID,
"pft == 0xff..... (sync_regs)");
- assert_noirq();
+ assert_noirq(vcpu);
}
-static void inject_irq(int cpu_id)
+static void inject_irq(struct kvm_vcpu *vcpu)
{
struct kvm_s390_irq_state irq_state;
struct kvm_s390_irq *irq = &buf[0];
@@ -195,85 +197,119 @@ static void inject_irq(int cpu_id)
irq_state.len = sizeof(struct kvm_s390_irq);
irq_state.buf = (unsigned long)buf;
irq->type = KVM_S390_INT_EMERGENCY;
- irq->u.emerg.code = cpu_id;
- irqs = _vcpu_ioctl(vm, cpu_id, KVM_S390_SET_IRQ_STATE, &irq_state);
+ irq->u.emerg.code = vcpu->id;
+ irqs = __vcpu_ioctl(vcpu, KVM_S390_SET_IRQ_STATE, &irq_state);
TEST_ASSERT(irqs >= 0, "Error injecting EMERGENCY IRQ errno %d\n", errno);
}
+static struct kvm_vm *create_vm(struct kvm_vcpu **vcpu)
+{
+ struct kvm_vm *vm;
+
+ vm = vm_create(1);
+
+ *vcpu = vm_vcpu_add(vm, ARBITRARY_NON_ZERO_VCPU_ID, guest_code_initial);
+
+ return vm;
+}
+
static void test_normal(void)
{
- pr_info("Testing normal reset\n");
- /* Create VM */
- vm = vm_create_default(VCPU_ID, 0, guest_code_initial);
- run = vcpu_state(vm, VCPU_ID);
- sync_regs = &run->s.regs;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ ksft_print_msg("Testing normal reset\n");
+ vm = create_vm(&vcpu);
- vcpu_run(vm, VCPU_ID);
+ vcpu_run(vcpu);
- inject_irq(VCPU_ID);
+ inject_irq(vcpu);
- vcpu_ioctl(vm, VCPU_ID, KVM_S390_NORMAL_RESET, 0);
+ vcpu_ioctl(vcpu, KVM_S390_NORMAL_RESET, NULL);
/* must clears */
- assert_normal();
+ assert_normal(vcpu);
/* must not clears */
- assert_normal_noclear();
- assert_initial_noclear();
+ assert_normal_noclear(vcpu);
+ assert_initial_noclear(vcpu);
kvm_vm_free(vm);
}
static void test_initial(void)
{
- pr_info("Testing initial reset\n");
- vm = vm_create_default(VCPU_ID, 0, guest_code_initial);
- run = vcpu_state(vm, VCPU_ID);
- sync_regs = &run->s.regs;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
- vcpu_run(vm, VCPU_ID);
+ ksft_print_msg("Testing initial reset\n");
+ vm = create_vm(&vcpu);
- inject_irq(VCPU_ID);
+ vcpu_run(vcpu);
- vcpu_ioctl(vm, VCPU_ID, KVM_S390_INITIAL_RESET, 0);
+ inject_irq(vcpu);
+
+ vcpu_ioctl(vcpu, KVM_S390_INITIAL_RESET, NULL);
/* must clears */
- assert_normal();
- assert_initial();
+ assert_normal(vcpu);
+ assert_initial(vcpu);
/* must not clears */
- assert_initial_noclear();
+ assert_initial_noclear(vcpu);
kvm_vm_free(vm);
}
static void test_clear(void)
{
- pr_info("Testing clear reset\n");
- vm = vm_create_default(VCPU_ID, 0, guest_code_initial);
- run = vcpu_state(vm, VCPU_ID);
- sync_regs = &run->s.regs;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ ksft_print_msg("Testing clear reset\n");
+ vm = create_vm(&vcpu);
- vcpu_run(vm, VCPU_ID);
+ vcpu_run(vcpu);
- inject_irq(VCPU_ID);
+ inject_irq(vcpu);
- vcpu_ioctl(vm, VCPU_ID, KVM_S390_CLEAR_RESET, 0);
+ vcpu_ioctl(vcpu, KVM_S390_CLEAR_RESET, NULL);
/* must clears */
- assert_normal();
- assert_initial();
- assert_clear();
+ assert_normal(vcpu);
+ assert_initial(vcpu);
+ assert_clear(vcpu);
kvm_vm_free(vm);
}
+struct testdef {
+ const char *name;
+ void (*test)(void);
+ bool needs_cap;
+} testlist[] = {
+ { "initial", test_initial, false },
+ { "normal", test_normal, true },
+ { "clear", test_clear, true },
+};
+
int main(int argc, char *argv[])
{
+ bool has_s390_vcpu_resets = kvm_check_cap(KVM_CAP_S390_VCPU_RESETS);
+ int idx;
+
setbuf(stdout, NULL); /* Tell stdout not to buffer its content */
- test_initial();
- if (kvm_check_cap(KVM_CAP_S390_VCPU_RESETS)) {
- test_normal();
- test_clear();
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(testlist));
+
+ for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
+ if (!testlist[idx].needs_cap || has_s390_vcpu_resets) {
+ testlist[idx].test();
+ ksft_test_result_pass("%s\n", testlist[idx].name);
+ } else {
+ ksft_test_result_skip("%s - no VCPU_RESETS capability\n",
+ testlist[idx].name);
+ }
}
- return 0;
+
+ ksft_finished(); /* Print results and exit() accordingly */
}
diff --git a/tools/testing/selftests/kvm/s390x/sync_regs_test.c b/tools/testing/selftests/kvm/s390x/sync_regs_test.c
index caf7b8859a94..3fdb6e2598eb 100644
--- a/tools/testing/selftests/kvm/s390x/sync_regs_test.c
+++ b/tools/testing/selftests/kvm/s390x/sync_regs_test.c
@@ -21,8 +21,7 @@
#include "test_util.h"
#include "kvm_util.h"
#include "diag318_test_handler.h"
-
-#define VCPU_ID 5
+#include "kselftest.h"
static void guest_code(void)
{
@@ -74,61 +73,58 @@ static void compare_sregs(struct kvm_sregs *left, struct kvm_sync_regs *right)
#define TEST_SYNC_FIELDS (KVM_SYNC_GPRS|KVM_SYNC_ACRS|KVM_SYNC_CRS|KVM_SYNC_DIAG318)
#define INVALID_SYNC_FIELD 0x80000000
-int main(int argc, char *argv[])
+void test_read_invalid(struct kvm_vcpu *vcpu)
{
- struct kvm_vm *vm;
- struct kvm_run *run;
- struct kvm_regs regs;
- struct kvm_sregs sregs;
- int rv, cap;
-
- /* Tell stdout not to buffer its content */
- setbuf(stdout, NULL);
-
- cap = kvm_check_cap(KVM_CAP_SYNC_REGS);
- if (!cap) {
- print_skip("CAP_SYNC_REGS not supported");
- exit(KSFT_SKIP);
- }
-
- /* Create VM */
- vm = vm_create_default(VCPU_ID, 0, guest_code);
-
- run = vcpu_state(vm, VCPU_ID);
+ struct kvm_run *run = vcpu->run;
+ int rv;
/* Request reading invalid register set from VCPU. */
run->kvm_valid_regs = INVALID_SYNC_FIELD;
- rv = _vcpu_run(vm, VCPU_ID);
+ rv = _vcpu_run(vcpu);
TEST_ASSERT(rv < 0 && errno == EINVAL,
"Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
rv);
- vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0;
+ run->kvm_valid_regs = 0;
run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
- rv = _vcpu_run(vm, VCPU_ID);
+ rv = _vcpu_run(vcpu);
TEST_ASSERT(rv < 0 && errno == EINVAL,
"Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
rv);
- vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0;
+ run->kvm_valid_regs = 0;
+}
+
+void test_set_invalid(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+ int rv;
/* Request setting invalid register set into VCPU. */
run->kvm_dirty_regs = INVALID_SYNC_FIELD;
- rv = _vcpu_run(vm, VCPU_ID);
+ rv = _vcpu_run(vcpu);
TEST_ASSERT(rv < 0 && errno == EINVAL,
"Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
rv);
- vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0;
+ run->kvm_dirty_regs = 0;
run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
- rv = _vcpu_run(vm, VCPU_ID);
+ rv = _vcpu_run(vcpu);
TEST_ASSERT(rv < 0 && errno == EINVAL,
"Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
rv);
- vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0;
+ run->kvm_dirty_regs = 0;
+}
+
+void test_req_and_verify_all_valid_regs(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+ struct kvm_sregs sregs;
+ struct kvm_regs regs;
+ int rv;
/* Request and verify all valid register sets. */
run->kvm_valid_regs = TEST_SYNC_FIELDS;
- rv = _vcpu_run(vm, VCPU_ID);
+ rv = _vcpu_run(vcpu);
TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv);
TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC,
"Unexpected exit reason: %u (%s)\n",
@@ -141,11 +137,19 @@ int main(int argc, char *argv[])
run->s390_sieic.icptcode, run->s390_sieic.ipa,
run->s390_sieic.ipb);
- vcpu_regs_get(vm, VCPU_ID, &regs);
+ vcpu_regs_get(vcpu, &regs);
compare_regs(&regs, &run->s.regs);
- vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ vcpu_sregs_get(vcpu, &sregs);
compare_sregs(&sregs, &run->s.regs);
+}
+
+void test_set_and_verify_various_reg_values(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+ struct kvm_sregs sregs;
+ struct kvm_regs regs;
+ int rv;
/* Set and verify various register values */
run->s.regs.gprs[11] = 0xBAD1DEA;
@@ -159,7 +163,7 @@ int main(int argc, char *argv[])
run->kvm_dirty_regs |= KVM_SYNC_DIAG318;
}
- rv = _vcpu_run(vm, VCPU_ID);
+ rv = _vcpu_run(vcpu);
TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv);
TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC,
"Unexpected exit reason: %u (%s)\n",
@@ -175,11 +179,17 @@ int main(int argc, char *argv[])
"diag318 sync regs value incorrect 0x%llx.",
run->s.regs.diag318);
- vcpu_regs_get(vm, VCPU_ID, &regs);
+ vcpu_regs_get(vcpu, &regs);
compare_regs(&regs, &run->s.regs);
- vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ vcpu_sregs_get(vcpu, &sregs);
compare_sregs(&sregs, &run->s.regs);
+}
+
+void test_clear_kvm_dirty_regs_bits(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+ int rv;
/* Clear kvm_dirty_regs bits, verify new s.regs values are
* overwritten with existing guest values.
@@ -188,7 +198,7 @@ int main(int argc, char *argv[])
run->kvm_dirty_regs = 0;
run->s.regs.gprs[11] = 0xDEADBEEF;
run->s.regs.diag318 = 0x4B1D;
- rv = _vcpu_run(vm, VCPU_ID);
+ rv = _vcpu_run(vcpu);
TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv);
TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC,
"Unexpected exit reason: %u (%s)\n",
@@ -200,8 +210,43 @@ int main(int argc, char *argv[])
TEST_ASSERT(run->s.regs.diag318 != 0x4B1D,
"diag318 sync regs value incorrect 0x%llx.",
run->s.regs.diag318);
+}
+
+struct testdef {
+ const char *name;
+ void (*test)(struct kvm_vcpu *vcpu);
+} testlist[] = {
+ { "read invalid", test_read_invalid },
+ { "set invalid", test_set_invalid },
+ { "request+verify all valid regs", test_req_and_verify_all_valid_regs },
+ { "set+verify various regs", test_set_and_verify_various_reg_values },
+ { "clear kvm_dirty_regs bits", test_clear_kvm_dirty_regs_bits },
+};
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int idx;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_SYNC_REGS));
+
+ /* Tell stdout not to buffer its content */
+ setbuf(stdout, NULL);
+
+ ksft_print_header();
+
+ ksft_set_plan(ARRAY_SIZE(testlist));
+
+ /* Create VM */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
+ testlist[idx].test(vcpu);
+ ksft_test_result_pass("%s\n", testlist[idx].name);
+ }
kvm_vm_free(vm);
- return 0;
+ ksft_finished(); /* Print results and exit() accordingly */
}
diff --git a/tools/testing/selftests/kvm/s390x/tprot.c b/tools/testing/selftests/kvm/s390x/tprot.c
index c097b9db495e..a9a0b76e5fa4 100644
--- a/tools/testing/selftests/kvm/s390x/tprot.c
+++ b/tools/testing/selftests/kvm/s390x/tprot.c
@@ -8,14 +8,13 @@
#include <sys/mman.h>
#include "test_util.h"
#include "kvm_util.h"
+#include "kselftest.h"
#define PAGE_SHIFT 12
#define PAGE_SIZE (1 << PAGE_SHIFT)
#define CR0_FETCH_PROTECTION_OVERRIDE (1UL << (63 - 38))
#define CR0_STORAGE_PROTECTION_OVERRIDE (1UL << (63 - 39))
-#define VCPU_ID 1
-
static __aligned(PAGE_SIZE) uint8_t pages[2][PAGE_SIZE];
static uint8_t *const page_store_prot = pages[0];
static uint8_t *const page_fetch_prot = pages[1];
@@ -63,12 +62,12 @@ static enum permission test_protection(void *addr, uint8_t key)
}
enum stage {
- STAGE_END,
STAGE_INIT_SIMPLE,
TEST_SIMPLE,
STAGE_INIT_FETCH_PROT_OVERRIDE,
TEST_FETCH_PROT_OVERRIDE,
TEST_STORAGE_PROT_OVERRIDE,
+ STAGE_END /* must be the last entry (it's the amount of tests) */
};
struct test {
@@ -182,46 +181,63 @@ static void guest_code(void)
GUEST_SYNC(perform_next_stage(&i, mapped_0));
}
-#define HOST_SYNC(vmp, stage) \
-({ \
- struct kvm_vm *__vm = (vmp); \
- struct ucall uc; \
- int __stage = (stage); \
- \
- vcpu_run(__vm, VCPU_ID); \
- get_ucall(__vm, VCPU_ID, &uc); \
- if (uc.cmd == UCALL_ABORT) { \
- TEST_FAIL("line %lu: %s, hints: %lu, %lu", uc.args[1], \
- (const char *)uc.args[0], uc.args[2], uc.args[3]); \
- } \
- ASSERT_EQ(uc.cmd, UCALL_SYNC); \
- ASSERT_EQ(uc.args[1], __stage); \
+#define HOST_SYNC_NO_TAP(vcpup, stage) \
+({ \
+ struct kvm_vcpu *__vcpu = (vcpup); \
+ struct ucall uc; \
+ int __stage = (stage); \
+ \
+ vcpu_run(__vcpu); \
+ get_ucall(__vcpu, &uc); \
+ if (uc.cmd == UCALL_ABORT) \
+ REPORT_GUEST_ASSERT_2(uc, "hints: %lu, %lu"); \
+ ASSERT_EQ(uc.cmd, UCALL_SYNC); \
+ ASSERT_EQ(uc.args[1], __stage); \
+})
+
+#define HOST_SYNC(vcpu, stage) \
+({ \
+ HOST_SYNC_NO_TAP(vcpu, stage); \
+ ksft_test_result_pass("" #stage "\n"); \
})
int main(int argc, char *argv[])
{
+ struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
struct kvm_run *run;
vm_vaddr_t guest_0_page;
- vm = vm_create_default(VCPU_ID, 0, guest_code);
- run = vcpu_state(vm, VCPU_ID);
+ ksft_print_header();
+ ksft_set_plan(STAGE_END);
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ run = vcpu->run;
- HOST_SYNC(vm, STAGE_INIT_SIMPLE);
+ HOST_SYNC(vcpu, STAGE_INIT_SIMPLE);
mprotect(addr_gva2hva(vm, (vm_vaddr_t)pages), PAGE_SIZE * 2, PROT_READ);
- HOST_SYNC(vm, TEST_SIMPLE);
+ HOST_SYNC(vcpu, TEST_SIMPLE);
guest_0_page = vm_vaddr_alloc(vm, PAGE_SIZE, 0);
- if (guest_0_page != 0)
- print_skip("Did not allocate page at 0 for fetch protection override tests");
- HOST_SYNC(vm, STAGE_INIT_FETCH_PROT_OVERRIDE);
+ if (guest_0_page != 0) {
+ /* Use NO_TAP so we don't get a PASS print */
+ HOST_SYNC_NO_TAP(vcpu, STAGE_INIT_FETCH_PROT_OVERRIDE);
+ ksft_test_result_skip("STAGE_INIT_FETCH_PROT_OVERRIDE - "
+ "Did not allocate page at 0\n");
+ } else {
+ HOST_SYNC(vcpu, STAGE_INIT_FETCH_PROT_OVERRIDE);
+ }
if (guest_0_page == 0)
mprotect(addr_gva2hva(vm, (vm_vaddr_t)0), PAGE_SIZE, PROT_READ);
run->s.regs.crs[0] |= CR0_FETCH_PROTECTION_OVERRIDE;
run->kvm_dirty_regs = KVM_SYNC_CRS;
- HOST_SYNC(vm, TEST_FETCH_PROT_OVERRIDE);
+ HOST_SYNC(vcpu, TEST_FETCH_PROT_OVERRIDE);
run->s.regs.crs[0] |= CR0_STORAGE_PROTECTION_OVERRIDE;
run->kvm_dirty_regs = KVM_SYNC_CRS;
- HOST_SYNC(vm, TEST_STORAGE_PROT_OVERRIDE);
+ HOST_SYNC(vcpu, TEST_STORAGE_PROT_OVERRIDE);
+
+ kvm_vm_free(vm);
+
+ ksft_finished(); /* Print results and exit() accordingly */
}
diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c
index 73bc297dabe6..0d55f508d595 100644
--- a/tools/testing/selftests/kvm/set_memory_region_test.c
+++ b/tools/testing/selftests/kvm/set_memory_region_test.c
@@ -17,8 +17,6 @@
#include <kvm_util.h>
#include <processor.h>
-#define VCPU_ID 0
-
/*
* s390x needs at least 1MB alignment, and the x86_64 MOVE/DELETE tests need a
* 2MB sized and aligned region so that the initial region corresponds to
@@ -54,8 +52,8 @@ static inline uint64_t guest_spin_on_val(uint64_t spin_val)
static void *vcpu_worker(void *data)
{
- struct kvm_vm *vm = data;
- struct kvm_run *run;
+ struct kvm_vcpu *vcpu = data;
+ struct kvm_run *run = vcpu->run;
struct ucall uc;
uint64_t cmd;
@@ -64,13 +62,11 @@ static void *vcpu_worker(void *data)
* which will occur if the guest attempts to access a memslot after it
* has been deleted or while it is being moved .
*/
- run = vcpu_state(vm, VCPU_ID);
-
while (1) {
- vcpu_run(vm, VCPU_ID);
+ vcpu_run(vcpu);
if (run->exit_reason == KVM_EXIT_IO) {
- cmd = get_ucall(vm, VCPU_ID, &uc);
+ cmd = get_ucall(vcpu, &uc);
if (cmd != UCALL_SYNC)
break;
@@ -92,8 +88,7 @@ static void *vcpu_worker(void *data)
}
if (run->exit_reason == KVM_EXIT_IO && cmd == UCALL_ABORT)
- TEST_FAIL("%s at %s:%ld, val = %lu", (const char *)uc.args[0],
- __FILE__, uc.args[1], uc.args[2]);
+ REPORT_GUEST_ASSERT_1(uc, "val = %lu");
return NULL;
}
@@ -113,13 +108,14 @@ static void wait_for_vcpu(void)
usleep(100000);
}
-static struct kvm_vm *spawn_vm(pthread_t *vcpu_thread, void *guest_code)
+static struct kvm_vm *spawn_vm(struct kvm_vcpu **vcpu, pthread_t *vcpu_thread,
+ void *guest_code)
{
struct kvm_vm *vm;
uint64_t *hva;
uint64_t gpa;
- vm = vm_create_default(VCPU_ID, 0, guest_code);
+ vm = vm_create_with_one_vcpu(vcpu, guest_code);
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_THP,
MEM_REGION_GPA, MEM_REGION_SLOT,
@@ -138,7 +134,7 @@ static struct kvm_vm *spawn_vm(pthread_t *vcpu_thread, void *guest_code)
hva = addr_gpa2hva(vm, MEM_REGION_GPA);
memset(hva, 0, 2 * 4096);
- pthread_create(vcpu_thread, NULL, vcpu_worker, vm);
+ pthread_create(vcpu_thread, NULL, vcpu_worker, *vcpu);
/* Ensure the guest thread is spun up. */
wait_for_vcpu();
@@ -180,10 +176,11 @@ static void guest_code_move_memory_region(void)
static void test_move_memory_region(void)
{
pthread_t vcpu_thread;
+ struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
uint64_t *hva;
- vm = spawn_vm(&vcpu_thread, guest_code_move_memory_region);
+ vm = spawn_vm(&vcpu, &vcpu_thread, guest_code_move_memory_region);
hva = addr_gpa2hva(vm, MEM_REGION_GPA);
@@ -258,11 +255,12 @@ static void guest_code_delete_memory_region(void)
static void test_delete_memory_region(void)
{
pthread_t vcpu_thread;
+ struct kvm_vcpu *vcpu;
struct kvm_regs regs;
struct kvm_run *run;
struct kvm_vm *vm;
- vm = spawn_vm(&vcpu_thread, guest_code_delete_memory_region);
+ vm = spawn_vm(&vcpu, &vcpu_thread, guest_code_delete_memory_region);
/* Delete the memory region, the guest should not die. */
vm_mem_region_delete(vm, MEM_REGION_SLOT);
@@ -286,13 +284,13 @@ static void test_delete_memory_region(void)
pthread_join(vcpu_thread, NULL);
- run = vcpu_state(vm, VCPU_ID);
+ run = vcpu->run;
TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN ||
run->exit_reason == KVM_EXIT_INTERNAL_ERROR,
"Unexpected exit reason = %d", run->exit_reason);
- vcpu_regs_get(vm, VCPU_ID, &regs);
+ vcpu_regs_get(vcpu, &regs);
/*
* On AMD, after KVM_EXIT_SHUTDOWN the VMCB has been reinitialized already,
@@ -309,19 +307,19 @@ static void test_delete_memory_region(void)
static void test_zero_memory_regions(void)
{
+ struct kvm_vcpu *vcpu;
struct kvm_run *run;
struct kvm_vm *vm;
pr_info("Testing KVM_RUN with zero added memory regions\n");
- vm = vm_create(VM_MODE_DEFAULT, 0, O_RDWR);
- vm_vcpu_add(vm, VCPU_ID);
+ vm = vm_create_barebones();
+ vcpu = __vm_vcpu_add(vm, 0);
- TEST_ASSERT(!ioctl(vm_get_fd(vm), KVM_SET_NR_MMU_PAGES, 64),
- "KVM_SET_NR_MMU_PAGES failed, errno = %d\n", errno);
- vcpu_run(vm, VCPU_ID);
+ vm_ioctl(vm, KVM_SET_NR_MMU_PAGES, (void *)64ul);
+ vcpu_run(vcpu);
- run = vcpu_state(vm, VCPU_ID);
+ run = vcpu->run;
TEST_ASSERT(run->exit_reason == KVM_EXIT_INTERNAL_ERROR,
"Unexpected exit_reason = %u\n", run->exit_reason);
@@ -354,7 +352,7 @@ static void test_add_max_memory_regions(void)
"KVM_CAP_NR_MEMSLOTS should be greater than 0");
pr_info("Allowed number of memory slots: %i\n", max_mem_slots);
- vm = vm_create(VM_MODE_DEFAULT, 0, O_RDWR);
+ vm = vm_create_barebones();
/* Check it can be added memory slots up to the maximum allowed */
pr_info("Adding slots 0..%i, each memory region with %dK size\n",
diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c
index 8c4e811bd586..db8967f1a17b 100644
--- a/tools/testing/selftests/kvm/steal_time.c
+++ b/tools/testing/selftests/kvm/steal_time.c
@@ -58,36 +58,32 @@ static void guest_code(int cpu)
GUEST_DONE();
}
-static void steal_time_init(struct kvm_vm *vm)
+static bool is_steal_time_supported(struct kvm_vcpu *vcpu)
{
- int i;
-
- if (!(kvm_get_supported_cpuid_entry(KVM_CPUID_FEATURES)->eax &
- KVM_FEATURE_STEAL_TIME)) {
- print_skip("steal-time not supported");
- exit(KSFT_SKIP);
- }
+ return kvm_cpu_has(X86_FEATURE_KVM_STEAL_TIME);
+}
- for (i = 0; i < NR_VCPUS; ++i) {
- int ret;
+static void steal_time_init(struct kvm_vcpu *vcpu, uint32_t i)
+{
+ int ret;
- /* ST_GPA_BASE is identity mapped */
- st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE);
- sync_global_to_guest(vm, st_gva[i]);
+ /* ST_GPA_BASE is identity mapped */
+ st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE);
+ sync_global_to_guest(vcpu->vm, st_gva[i]);
- ret = _vcpu_set_msr(vm, i, MSR_KVM_STEAL_TIME, (ulong)st_gva[i] | KVM_STEAL_RESERVED_MASK);
- TEST_ASSERT(ret == 0, "Bad GPA didn't fail");
+ ret = _vcpu_set_msr(vcpu, MSR_KVM_STEAL_TIME,
+ (ulong)st_gva[i] | KVM_STEAL_RESERVED_MASK);
+ TEST_ASSERT(ret == 0, "Bad GPA didn't fail");
- vcpu_set_msr(vm, i, MSR_KVM_STEAL_TIME, (ulong)st_gva[i] | KVM_MSR_ENABLED);
- }
+ vcpu_set_msr(vcpu, MSR_KVM_STEAL_TIME, (ulong)st_gva[i] | KVM_MSR_ENABLED);
}
-static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpuid)
+static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx)
{
- struct kvm_steal_time *st = addr_gva2hva(vm, (ulong)st_gva[vcpuid]);
+ struct kvm_steal_time *st = addr_gva2hva(vm, (ulong)st_gva[vcpu_idx]);
int i;
- pr_info("VCPU%d:\n", vcpuid);
+ pr_info("VCPU%d:\n", vcpu_idx);
pr_info(" steal: %lld\n", st->steal);
pr_info(" version: %d\n", st->version);
pr_info(" flags: %d\n", st->flags);
@@ -158,49 +154,50 @@ static void guest_code(int cpu)
GUEST_DONE();
}
-static void steal_time_init(struct kvm_vm *vm)
+static bool is_steal_time_supported(struct kvm_vcpu *vcpu)
{
struct kvm_device_attr dev = {
.group = KVM_ARM_VCPU_PVTIME_CTRL,
.attr = KVM_ARM_VCPU_PVTIME_IPA,
};
- int i, ret;
-
- ret = _vcpu_ioctl(vm, 0, KVM_HAS_DEVICE_ATTR, &dev);
- if (ret != 0 && errno == ENXIO) {
- print_skip("steal-time not supported");
- exit(KSFT_SKIP);
- }
- for (i = 0; i < NR_VCPUS; ++i) {
- uint64_t st_ipa;
+ return !__vcpu_ioctl(vcpu, KVM_HAS_DEVICE_ATTR, &dev);
+}
- vcpu_ioctl(vm, i, KVM_HAS_DEVICE_ATTR, &dev);
+static void steal_time_init(struct kvm_vcpu *vcpu, uint32_t i)
+{
+ struct kvm_vm *vm = vcpu->vm;
+ uint64_t st_ipa;
+ int ret;
- dev.addr = (uint64_t)&st_ipa;
+ struct kvm_device_attr dev = {
+ .group = KVM_ARM_VCPU_PVTIME_CTRL,
+ .attr = KVM_ARM_VCPU_PVTIME_IPA,
+ .addr = (uint64_t)&st_ipa,
+ };
- /* ST_GPA_BASE is identity mapped */
- st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE);
- sync_global_to_guest(vm, st_gva[i]);
+ vcpu_ioctl(vcpu, KVM_HAS_DEVICE_ATTR, &dev);
- st_ipa = (ulong)st_gva[i] | 1;
- ret = _vcpu_ioctl(vm, i, KVM_SET_DEVICE_ATTR, &dev);
- TEST_ASSERT(ret == -1 && errno == EINVAL, "Bad IPA didn't report EINVAL");
+ /* ST_GPA_BASE is identity mapped */
+ st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE);
+ sync_global_to_guest(vm, st_gva[i]);
- st_ipa = (ulong)st_gva[i];
- vcpu_ioctl(vm, i, KVM_SET_DEVICE_ATTR, &dev);
+ st_ipa = (ulong)st_gva[i] | 1;
+ ret = __vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev);
+ TEST_ASSERT(ret == -1 && errno == EINVAL, "Bad IPA didn't report EINVAL");
- ret = _vcpu_ioctl(vm, i, KVM_SET_DEVICE_ATTR, &dev);
- TEST_ASSERT(ret == -1 && errno == EEXIST, "Set IPA twice without EEXIST");
+ st_ipa = (ulong)st_gva[i];
+ vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev);
- }
+ ret = __vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev);
+ TEST_ASSERT(ret == -1 && errno == EEXIST, "Set IPA twice without EEXIST");
}
-static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpuid)
+static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx)
{
- struct st_time *st = addr_gva2hva(vm, (ulong)st_gva[vcpuid]);
+ struct st_time *st = addr_gva2hva(vm, (ulong)st_gva[vcpu_idx]);
- pr_info("VCPU%d:\n", vcpuid);
+ pr_info("VCPU%d:\n", vcpu_idx);
pr_info(" rev: %d\n", st->rev);
pr_info(" attr: %d\n", st->attr);
pr_info(" st_time: %ld\n", st->st_time);
@@ -224,29 +221,27 @@ static void *do_steal_time(void *arg)
return NULL;
}
-static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid)
+static void run_vcpu(struct kvm_vcpu *vcpu)
{
struct ucall uc;
- vcpu_args_set(vm, vcpuid, 1, vcpuid);
-
- vcpu_ioctl(vm, vcpuid, KVM_RUN, NULL);
+ vcpu_run(vcpu);
- switch (get_ucall(vm, vcpuid, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_SYNC:
case UCALL_DONE:
break;
case UCALL_ABORT:
- TEST_ASSERT(false, "%s at %s:%ld", (const char *)uc.args[0],
- __FILE__, uc.args[1]);
+ REPORT_GUEST_ASSERT(uc);
default:
TEST_ASSERT(false, "Unexpected exit: %s",
- exit_reason_str(vcpu_state(vm, vcpuid)->exit_reason));
+ exit_reason_str(vcpu->run->exit_reason));
}
}
int main(int ac, char **av)
{
+ struct kvm_vcpu *vcpus[NR_VCPUS];
struct kvm_vm *vm;
pthread_attr_t attr;
pthread_t thread;
@@ -266,26 +261,26 @@ int main(int ac, char **av)
pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset);
pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset);
- /* Create a one VCPU guest and an identity mapped memslot for the steal time structure */
- vm = vm_create_default(0, 0, guest_code);
+ /* Create a VM and an identity mapped memslot for the steal time structure */
+ vm = vm_create_with_vcpus(NR_VCPUS, guest_code, vcpus);
gpages = vm_calc_num_guest_pages(VM_MODE_DEFAULT, STEAL_TIME_SIZE * NR_VCPUS);
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, ST_GPA_BASE, 1, gpages, 0);
virt_map(vm, ST_GPA_BASE, ST_GPA_BASE, gpages);
ucall_init(vm, NULL);
- /* Add the rest of the VCPUs */
- for (i = 1; i < NR_VCPUS; ++i)
- vm_vcpu_add_default(vm, i, guest_code);
-
- steal_time_init(vm);
+ TEST_REQUIRE(is_steal_time_supported(vcpus[0]));
/* Run test on each VCPU */
for (i = 0; i < NR_VCPUS; ++i) {
+ steal_time_init(vcpus[i], i);
+
+ vcpu_args_set(vcpus[i], 1, i);
+
/* First VCPU run initializes steal-time */
- run_vcpu(vm, i);
+ run_vcpu(vcpus[i]);
/* Second VCPU run, expect guest stolen time to be <= run_delay */
- run_vcpu(vm, i);
+ run_vcpu(vcpus[i]);
sync_global_from_guest(vm, guest_stolen_time[i]);
stolen_time = guest_stolen_time[i];
run_delay = get_run_delay();
@@ -306,7 +301,7 @@ int main(int ac, char **av)
MIN_RUN_DELAY_NS, run_delay);
/* Run VCPU again to confirm stolen time is consistent with run_delay */
- run_vcpu(vm, i);
+ run_vcpu(vcpus[i]);
sync_global_from_guest(vm, guest_stolen_time[i]);
stolen_time = guest_stolen_time[i] - stolen_time;
TEST_ASSERT(stolen_time >= run_delay,
diff --git a/tools/testing/selftests/kvm/system_counter_offset_test.c b/tools/testing/selftests/kvm/system_counter_offset_test.c
index b337bbbfa41f..1c274933912b 100644
--- a/tools/testing/selftests/kvm/system_counter_offset_test.c
+++ b/tools/testing/selftests/kvm/system_counter_offset_test.c
@@ -14,8 +14,6 @@
#include "kvm_util.h"
#include "processor.h"
-#define VCPU_ID 0
-
#ifdef __x86_64__
struct test_case {
@@ -28,19 +26,17 @@ static struct test_case test_cases[] = {
{ -180 * NSEC_PER_SEC },
};
-static void check_preconditions(struct kvm_vm *vm)
+static void check_preconditions(struct kvm_vcpu *vcpu)
{
- if (!_vcpu_has_device_attr(vm, VCPU_ID, KVM_VCPU_TSC_CTRL, KVM_VCPU_TSC_OFFSET))
- return;
-
- print_skip("KVM_VCPU_TSC_OFFSET not supported; skipping test");
- exit(KSFT_SKIP);
+ __TEST_REQUIRE(!__vcpu_has_device_attr(vcpu, KVM_VCPU_TSC_CTRL,
+ KVM_VCPU_TSC_OFFSET),
+ "KVM_VCPU_TSC_OFFSET not supported; skipping test");
}
-static void setup_system_counter(struct kvm_vm *vm, struct test_case *test)
+static void setup_system_counter(struct kvm_vcpu *vcpu, struct test_case *test)
{
- vcpu_access_device_attr(vm, VCPU_ID, KVM_VCPU_TSC_CTRL,
- KVM_VCPU_TSC_OFFSET, &test->tsc_offset, true);
+ vcpu_device_attr_set(vcpu, KVM_VCPU_TSC_CTRL, KVM_VCPU_TSC_OFFSET,
+ &test->tsc_offset);
}
static uint64_t guest_read_system_counter(struct test_case *test)
@@ -87,11 +83,10 @@ static void handle_sync(struct ucall *uc, uint64_t start, uint64_t end)
static void handle_abort(struct ucall *uc)
{
- TEST_FAIL("%s at %s:%ld", (const char *)uc->args[0],
- __FILE__, uc->args[1]);
+ REPORT_GUEST_ASSERT(*uc);
}
-static void enter_guest(struct kvm_vm *vm)
+static void enter_guest(struct kvm_vcpu *vcpu)
{
uint64_t start, end;
struct ucall uc;
@@ -100,12 +95,12 @@ static void enter_guest(struct kvm_vm *vm)
for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
struct test_case *test = &test_cases[i];
- setup_system_counter(vm, test);
+ setup_system_counter(vcpu, test);
start = host_read_guest_system_counter(test);
- vcpu_run(vm, VCPU_ID);
+ vcpu_run(vcpu);
end = host_read_guest_system_counter(test);
- switch (get_ucall(vm, VCPU_ID, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_SYNC:
handle_sync(&uc, start, end);
break;
@@ -114,19 +109,20 @@ static void enter_guest(struct kvm_vm *vm)
return;
default:
TEST_ASSERT(0, "unhandled ucall %ld\n",
- get_ucall(vm, VCPU_ID, &uc));
+ get_ucall(vcpu, &uc));
}
}
}
int main(void)
{
+ struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
- vm = vm_create_default(VCPU_ID, 0, guest_main);
- check_preconditions(vm);
+ vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+ check_preconditions(vcpu);
ucall_init(vm, NULL);
- enter_guest(vm);
+ enter_guest(vcpu);
kvm_vm_free(vm);
}
diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86_64/amx_test.c
index 76f65c22796f..dadcbad10a1d 100644
--- a/tools/testing/selftests/kvm/x86_64/amx_test.c
+++ b/tools/testing/selftests/kvm/x86_64/amx_test.c
@@ -25,10 +25,6 @@
# error This test is 64-bit only
#endif
-#define VCPU_ID 0
-#define X86_FEATURE_XSAVE (1 << 26)
-#define X86_FEATURE_OSXSAVE (1 << 27)
-
#define NUM_TILES 8
#define TILE_SIZE 1024
#define XSAVE_SIZE ((NUM_TILES * TILE_SIZE) + PAGE_SIZE)
@@ -124,15 +120,8 @@ static inline void __xsavec(struct xsave_data *data, uint64_t rfbm)
static inline void check_cpuid_xsave(void)
{
- uint32_t eax, ebx, ecx, edx;
-
- eax = 1;
- ecx = 0;
- cpuid(&eax, &ebx, &ecx, &edx);
- if (!(ecx & X86_FEATURE_XSAVE))
- GUEST_ASSERT(!"cpuid: no CPU xsave support!");
- if (!(ecx & X86_FEATURE_OSXSAVE))
- GUEST_ASSERT(!"cpuid: no OS xsave support!");
+ GUEST_ASSERT(this_cpu_has(X86_FEATURE_XSAVE));
+ GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE));
}
static bool check_xsave_supports_xtile(void)
@@ -144,10 +133,7 @@ static bool enum_xtile_config(void)
{
u32 eax, ebx, ecx, edx;
- eax = TILE_CPUID;
- ecx = TILE_PALETTE_CPUID_SUBLEAVE;
-
- cpuid(&eax, &ebx, &ecx, &edx);
+ __cpuid(TILE_CPUID, TILE_PALETTE_CPUID_SUBLEAVE, &eax, &ebx, &ecx, &edx);
if (!eax || !ebx || !ecx)
return false;
@@ -169,10 +155,7 @@ static bool enum_xsave_tile(void)
{
u32 eax, ebx, ecx, edx;
- eax = XSTATE_CPUID;
- ecx = XFEATURE_XTILEDATA;
-
- cpuid(&eax, &ebx, &ecx, &edx);
+ __cpuid(XSTATE_CPUID, XFEATURE_XTILEDATA, &eax, &ebx, &ecx, &edx);
if (!eax || !ebx)
return false;
@@ -187,10 +170,7 @@ static bool check_xsave_size(void)
u32 eax, ebx, ecx, edx;
bool valid = false;
- eax = XSTATE_CPUID;
- ecx = XSTATE_USER_STATE_SUBLEAVE;
-
- cpuid(&eax, &ebx, &ecx, &edx);
+ __cpuid(XSTATE_CPUID, XSTATE_USER_STATE_SUBLEAVE, &eax, &ebx, &ecx, &edx);
if (ebx && ebx <= XSAVE_SIZE)
valid = true;
@@ -316,46 +296,36 @@ void guest_nm_handler(struct ex_regs *regs)
int main(int argc, char *argv[])
{
- struct kvm_cpuid_entry2 *entry;
struct kvm_regs regs1, regs2;
- bool amx_supported = false;
+ struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
struct kvm_run *run;
struct kvm_x86_state *state;
- int xsave_restore_size = 0;
+ int xsave_restore_size;
vm_vaddr_t amx_cfg, tiledata, xsavedata;
struct ucall uc;
u32 amx_offset;
int stage, ret;
- vm_xsave_req_perm(XSTATE_XTILE_DATA_BIT);
+ vm_xsave_require_permission(XSTATE_XTILE_DATA_BIT);
/* Create VM */
- vm = vm_create_default(VCPU_ID, 0, guest_code);
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
- entry = kvm_get_supported_cpuid_entry(1);
- if (!(entry->ecx & X86_FEATURE_XSAVE)) {
- print_skip("XSAVE feature not supported");
- exit(KSFT_SKIP);
- }
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_AMX_TILE));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILECFG));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILEDATA));
- if (kvm_get_cpuid_max_basic() >= 0xd) {
- entry = kvm_get_supported_cpuid_index(0xd, 0);
- amx_supported = entry && !!(entry->eax & XFEATURE_MASK_XTILE);
- if (!amx_supported) {
- print_skip("AMX is not supported by the vCPU (eax=0x%x)", entry->eax);
- exit(KSFT_SKIP);
- }
- /* Get xsave/restore max size */
- xsave_restore_size = entry->ecx;
- }
+ /* Get xsave/restore max size */
+ xsave_restore_size = kvm_get_supported_cpuid_entry(0xd)->ecx;
- run = vcpu_state(vm, VCPU_ID);
- vcpu_regs_get(vm, VCPU_ID, &regs1);
+ run = vcpu->run;
+ vcpu_regs_get(vcpu, &regs1);
/* Register #NM handler */
vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vm, VCPU_ID);
+ vcpu_init_descriptor_tables(vcpu);
vm_install_exception_handler(vm, NM_VECTOR, guest_nm_handler);
/* amx cfg for guest_code */
@@ -369,19 +339,18 @@ int main(int argc, char *argv[])
/* xsave data for guest_code */
xsavedata = vm_vaddr_alloc_pages(vm, 3);
memset(addr_gva2hva(vm, xsavedata), 0, 3 * getpagesize());
- vcpu_args_set(vm, VCPU_ID, 3, amx_cfg, tiledata, xsavedata);
+ vcpu_args_set(vcpu, 3, amx_cfg, tiledata, xsavedata);
for (stage = 1; ; stage++) {
- _vcpu_run(vm, VCPU_ID);
+ vcpu_run(vcpu);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
"Stage %d: unexpected exit reason: %u (%s),\n",
stage, run->exit_reason,
exit_reason_str(run->exit_reason));
- switch (get_ucall(vm, VCPU_ID, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
- TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
- __FILE__, uc.args[1]);
+ REPORT_GUEST_ASSERT(uc);
/* NOT REACHED */
case UCALL_SYNC:
switch (uc.args[1]) {
@@ -403,7 +372,7 @@ int main(int argc, char *argv[])
* size subtract 8K amx size.
*/
amx_offset = xsave_restore_size - NUM_TILES*TILE_SIZE;
- state = vcpu_save_state(vm, VCPU_ID);
+ state = vcpu_save_state(vcpu);
void *amx_start = (void *)state->xsave + amx_offset;
void *tiles_data = (void *)addr_gva2hva(vm, tiledata);
/* Only check TMM0 register, 1 tile */
@@ -424,22 +393,20 @@ int main(int argc, char *argv[])
TEST_FAIL("Unknown ucall %lu", uc.cmd);
}
- state = vcpu_save_state(vm, VCPU_ID);
+ state = vcpu_save_state(vcpu);
memset(&regs1, 0, sizeof(regs1));
- vcpu_regs_get(vm, VCPU_ID, &regs1);
+ vcpu_regs_get(vcpu, &regs1);
kvm_vm_release(vm);
/* Restore state in a new VM. */
- kvm_vm_restart(vm, O_RDWR);
- vm_vcpu_add(vm, VCPU_ID);
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
- vcpu_load_state(vm, VCPU_ID, state);
- run = vcpu_state(vm, VCPU_ID);
+ vcpu = vm_recreate_with_one_vcpu(vm);
+ vcpu_load_state(vcpu, state);
+ run = vcpu->run;
kvm_x86_state_cleanup(state);
memset(&regs2, 0, sizeof(regs2));
- vcpu_regs_get(vm, VCPU_ID, &regs2);
+ vcpu_regs_get(vcpu, &regs2);
TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
"Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
(ulong) regs2.rdi, (ulong) regs2.rsi);
diff --git a/tools/testing/selftests/kvm/x86_64/cpuid_test.c b/tools/testing/selftests/kvm/x86_64/cpuid_test.c
index 16d2465c5634..a6aeee2e62e4 100644
--- a/tools/testing/selftests/kvm/x86_64/cpuid_test.c
+++ b/tools/testing/selftests/kvm/x86_64/cpuid_test.c
@@ -12,8 +12,6 @@
#include "kvm_util.h"
#include "processor.h"
-#define VCPU_ID 0
-
/* CPUIDs known to differ */
struct {
u32 function;
@@ -33,10 +31,9 @@ static void test_guest_cpuids(struct kvm_cpuid2 *guest_cpuid)
u32 eax, ebx, ecx, edx;
for (i = 0; i < guest_cpuid->nent; i++) {
- eax = guest_cpuid->entries[i].function;
- ecx = guest_cpuid->entries[i].index;
-
- cpuid(&eax, &ebx, &ecx, &edx);
+ __cpuid(guest_cpuid->entries[i].function,
+ guest_cpuid->entries[i].index,
+ &eax, &ebx, &ecx, &edx);
GUEST_ASSERT(eax == guest_cpuid->entries[i].eax &&
ebx == guest_cpuid->entries[i].ebx &&
@@ -48,9 +45,9 @@ static void test_guest_cpuids(struct kvm_cpuid2 *guest_cpuid)
static void test_cpuid_40000000(struct kvm_cpuid2 *guest_cpuid)
{
- u32 eax = 0x40000000, ebx, ecx = 0, edx;
+ u32 eax, ebx, ecx, edx;
- cpuid(&eax, &ebx, &ecx, &edx);
+ cpuid(0x40000000, &eax, &ebx, &ecx, &edx);
GUEST_ASSERT(eax == 0x40000001);
}
@@ -68,7 +65,7 @@ static void guest_main(struct kvm_cpuid2 *guest_cpuid)
GUEST_DONE();
}
-static bool is_cpuid_mangled(struct kvm_cpuid_entry2 *entrie)
+static bool is_cpuid_mangled(const struct kvm_cpuid_entry2 *entrie)
{
int i;
@@ -81,50 +78,44 @@ static bool is_cpuid_mangled(struct kvm_cpuid_entry2 *entrie)
return false;
}
-static void check_cpuid(struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 *entrie)
+static void compare_cpuids(const struct kvm_cpuid2 *cpuid1,
+ const struct kvm_cpuid2 *cpuid2)
{
+ const struct kvm_cpuid_entry2 *e1, *e2;
int i;
- for (i = 0; i < cpuid->nent; i++) {
- if (cpuid->entries[i].function == entrie->function &&
- cpuid->entries[i].index == entrie->index) {
- if (is_cpuid_mangled(entrie))
- return;
-
- TEST_ASSERT(cpuid->entries[i].eax == entrie->eax &&
- cpuid->entries[i].ebx == entrie->ebx &&
- cpuid->entries[i].ecx == entrie->ecx &&
- cpuid->entries[i].edx == entrie->edx,
- "CPUID 0x%x.%x differ: 0x%x:0x%x:0x%x:0x%x vs 0x%x:0x%x:0x%x:0x%x",
- entrie->function, entrie->index,
- cpuid->entries[i].eax, cpuid->entries[i].ebx,
- cpuid->entries[i].ecx, cpuid->entries[i].edx,
- entrie->eax, entrie->ebx, entrie->ecx, entrie->edx);
- return;
- }
- }
+ TEST_ASSERT(cpuid1->nent == cpuid2->nent,
+ "CPUID nent mismatch: %d vs. %d", cpuid1->nent, cpuid2->nent);
- TEST_ASSERT(false, "CPUID 0x%x.%x not found", entrie->function, entrie->index);
-}
+ for (i = 0; i < cpuid1->nent; i++) {
+ e1 = &cpuid1->entries[i];
+ e2 = &cpuid2->entries[i];
-static void compare_cpuids(struct kvm_cpuid2 *cpuid1, struct kvm_cpuid2 *cpuid2)
-{
- int i;
+ TEST_ASSERT(e1->function == e2->function &&
+ e1->index == e2->index && e1->flags == e2->flags,
+ "CPUID entries[%d] mismtach: 0x%x.%d.%x vs. 0x%x.%d.%x\n",
+ i, e1->function, e1->index, e1->flags,
+ e2->function, e2->index, e2->flags);
- for (i = 0; i < cpuid1->nent; i++)
- check_cpuid(cpuid2, &cpuid1->entries[i]);
+ if (is_cpuid_mangled(e1))
+ continue;
- for (i = 0; i < cpuid2->nent; i++)
- check_cpuid(cpuid1, &cpuid2->entries[i]);
+ TEST_ASSERT(e1->eax == e2->eax && e1->ebx == e2->ebx &&
+ e1->ecx == e2->ecx && e1->edx == e2->edx,
+ "CPUID 0x%x.%x differ: 0x%x:0x%x:0x%x:0x%x vs 0x%x:0x%x:0x%x:0x%x",
+ e1->function, e1->index,
+ e1->eax, e1->ebx, e1->ecx, e1->edx,
+ e2->eax, e2->ebx, e2->ecx, e2->edx);
+ }
}
-static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid, int stage)
+static void run_vcpu(struct kvm_vcpu *vcpu, int stage)
{
struct ucall uc;
- _vcpu_run(vm, vcpuid);
+ vcpu_run(vcpu);
- switch (get_ucall(vm, vcpuid, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_SYNC:
TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
uc.args[1] == stage + 1,
@@ -134,11 +125,10 @@ static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid, int stage)
case UCALL_DONE:
return;
case UCALL_ABORT:
- TEST_ASSERT(false, "%s at %s:%ld\n\tvalues: %#lx, %#lx", (const char *)uc.args[0],
- __FILE__, uc.args[1], uc.args[2], uc.args[3]);
+ REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx");
default:
TEST_ASSERT(false, "Unexpected exit: %s",
- exit_reason_str(vcpu_state(vm, vcpuid)->exit_reason));
+ exit_reason_str(vcpu->run->exit_reason));
}
}
@@ -154,56 +144,53 @@ struct kvm_cpuid2 *vcpu_alloc_cpuid(struct kvm_vm *vm, vm_vaddr_t *p_gva, struct
return guest_cpuids;
}
-static void set_cpuid_after_run(struct kvm_vm *vm, struct kvm_cpuid2 *cpuid)
+static void set_cpuid_after_run(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *ent;
int rc;
u32 eax, ebx, x;
/* Setting unmodified CPUID is allowed */
- rc = __vcpu_set_cpuid(vm, VCPU_ID, cpuid);
+ rc = __vcpu_set_cpuid(vcpu);
TEST_ASSERT(!rc, "Setting unmodified CPUID after KVM_RUN failed: %d", rc);
/* Changing CPU features is forbidden */
- ent = get_cpuid(cpuid, 0x7, 0);
+ ent = vcpu_get_cpuid_entry(vcpu, 0x7);
ebx = ent->ebx;
ent->ebx--;
- rc = __vcpu_set_cpuid(vm, VCPU_ID, cpuid);
+ rc = __vcpu_set_cpuid(vcpu);
TEST_ASSERT(rc, "Changing CPU features should fail");
ent->ebx = ebx;
/* Changing MAXPHYADDR is forbidden */
- ent = get_cpuid(cpuid, 0x80000008, 0);
+ ent = vcpu_get_cpuid_entry(vcpu, 0x80000008);
eax = ent->eax;
x = eax & 0xff;
ent->eax = (eax & ~0xffu) | (x - 1);
- rc = __vcpu_set_cpuid(vm, VCPU_ID, cpuid);
+ rc = __vcpu_set_cpuid(vcpu);
TEST_ASSERT(rc, "Changing MAXPHYADDR should fail");
ent->eax = eax;
}
int main(void)
{
- struct kvm_cpuid2 *supp_cpuid, *cpuid2;
+ struct kvm_vcpu *vcpu;
vm_vaddr_t cpuid_gva;
struct kvm_vm *vm;
int stage;
- vm = vm_create_default(VCPU_ID, 0, guest_main);
-
- supp_cpuid = kvm_get_supported_cpuid();
- cpuid2 = vcpu_get_cpuid(vm, VCPU_ID);
+ vm = vm_create_with_one_vcpu(&vcpu, guest_main);
- compare_cpuids(supp_cpuid, cpuid2);
+ compare_cpuids(kvm_get_supported_cpuid(), vcpu->cpuid);
- vcpu_alloc_cpuid(vm, &cpuid_gva, cpuid2);
+ vcpu_alloc_cpuid(vm, &cpuid_gva, vcpu->cpuid);
- vcpu_args_set(vm, VCPU_ID, 1, cpuid_gva);
+ vcpu_args_set(vcpu, 1, cpuid_gva);
for (stage = 0; stage < 3; stage++)
- run_vcpu(vm, VCPU_ID, stage);
+ run_vcpu(vcpu, stage);
- set_cpuid_after_run(vm, cpuid2);
+ set_cpuid_after_run(vcpu);
kvm_vm_free(vm);
}
diff --git a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
index 6f6fd189dda3..4208487652f8 100644
--- a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
+++ b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
@@ -19,25 +19,11 @@
#include "kvm_util.h"
#include "processor.h"
-#define X86_FEATURE_XSAVE (1<<26)
-#define X86_FEATURE_OSXSAVE (1<<27)
-#define VCPU_ID 1
-
static inline bool cr4_cpuid_is_sync(void)
{
- int func, subfunc;
- uint32_t eax, ebx, ecx, edx;
- uint64_t cr4;
-
- func = 0x1;
- subfunc = 0x0;
- __asm__ __volatile__("cpuid"
- : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
- : "a"(func), "c"(subfunc));
+ uint64_t cr4 = get_cr4();
- cr4 = get_cr4();
-
- return (!!(ecx & X86_FEATURE_OSXSAVE)) == (!!(cr4 & X86_CR4_OSXSAVE));
+ return (this_cpu_has(X86_FEATURE_OSXSAVE) == !!(cr4 & X86_CR4_OSXSAVE));
}
static void guest_code(void)
@@ -63,44 +49,37 @@ static void guest_code(void)
int main(int argc, char *argv[])
{
+ struct kvm_vcpu *vcpu;
struct kvm_run *run;
struct kvm_vm *vm;
struct kvm_sregs sregs;
- struct kvm_cpuid_entry2 *entry;
struct ucall uc;
- int rc;
- entry = kvm_get_supported_cpuid_entry(1);
- if (!(entry->ecx & X86_FEATURE_XSAVE)) {
- print_skip("XSAVE feature not supported");
- return 0;
- }
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE));
/* Tell stdout not to buffer its content */
setbuf(stdout, NULL);
- /* Create VM */
- vm = vm_create_default(VCPU_ID, 0, guest_code);
- run = vcpu_state(vm, VCPU_ID);
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ run = vcpu->run;
while (1) {
- rc = _vcpu_run(vm, VCPU_ID);
+ vcpu_run(vcpu);
- TEST_ASSERT(rc == 0, "vcpu_run failed: %d\n", rc);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
"Unexpected exit reason: %u (%s),\n",
run->exit_reason,
exit_reason_str(run->exit_reason));
- switch (get_ucall(vm, VCPU_ID, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_SYNC:
/* emulate hypervisor clearing CR4.OSXSAVE */
- vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ vcpu_sregs_get(vcpu, &sregs);
sregs.cr4 &= ~X86_CR4_OSXSAVE;
- vcpu_sregs_set(vm, VCPU_ID, &sregs);
+ vcpu_sregs_set(vcpu, &sregs);
break;
case UCALL_ABORT:
- TEST_FAIL("Guest CR4 bit (OSXSAVE) unsynchronized with CPUID bit.");
+ REPORT_GUEST_ASSERT(uc);
break;
case UCALL_DONE:
goto done;
diff --git a/tools/testing/selftests/kvm/x86_64/debug_regs.c b/tools/testing/selftests/kvm/x86_64/debug_regs.c
index 5f078db1bcba..7ef99c3359a0 100644
--- a/tools/testing/selftests/kvm/x86_64/debug_regs.c
+++ b/tools/testing/selftests/kvm/x86_64/debug_regs.c
@@ -10,8 +10,6 @@
#include "processor.h"
#include "apic.h"
-#define VCPU_ID 0
-
#define DR6_BD (1 << 13)
#define DR7_GD (1 << 13)
@@ -66,21 +64,22 @@ static void guest_code(void)
GUEST_DONE();
}
-#define CLEAR_DEBUG() memset(&debug, 0, sizeof(debug))
-#define APPLY_DEBUG() vcpu_set_guest_debug(vm, VCPU_ID, &debug)
#define CAST_TO_RIP(v) ((unsigned long long)&(v))
-#define SET_RIP(v) do { \
- vcpu_regs_get(vm, VCPU_ID, &regs); \
- regs.rip = (v); \
- vcpu_regs_set(vm, VCPU_ID, &regs); \
- } while (0)
-#define MOVE_RIP(v) SET_RIP(regs.rip + (v));
+
+static void vcpu_skip_insn(struct kvm_vcpu *vcpu, int insn_len)
+{
+ struct kvm_regs regs;
+
+ vcpu_regs_get(vcpu, &regs);
+ regs.rip += insn_len;
+ vcpu_regs_set(vcpu, &regs);
+}
int main(void)
{
struct kvm_guest_debug debug;
unsigned long long target_dr6, target_rip;
- struct kvm_regs regs;
+ struct kvm_vcpu *vcpu;
struct kvm_run *run;
struct kvm_vm *vm;
struct ucall uc;
@@ -96,35 +95,32 @@ int main(void)
1, /* cli */
};
- if (!kvm_check_cap(KVM_CAP_SET_GUEST_DEBUG)) {
- print_skip("KVM_CAP_SET_GUEST_DEBUG not supported");
- return 0;
- }
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_SET_GUEST_DEBUG));
- vm = vm_create_default(VCPU_ID, 0, guest_code);
- run = vcpu_state(vm, VCPU_ID);
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ run = vcpu->run;
/* Test software BPs - int3 */
- CLEAR_DEBUG();
+ memset(&debug, 0, sizeof(debug));
debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
- APPLY_DEBUG();
- vcpu_run(vm, VCPU_ID);
+ vcpu_guest_debug_set(vcpu, &debug);
+ vcpu_run(vcpu);
TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
run->debug.arch.exception == BP_VECTOR &&
run->debug.arch.pc == CAST_TO_RIP(sw_bp),
"INT3: exit %d exception %d rip 0x%llx (should be 0x%llx)",
run->exit_reason, run->debug.arch.exception,
run->debug.arch.pc, CAST_TO_RIP(sw_bp));
- MOVE_RIP(1);
+ vcpu_skip_insn(vcpu, 1);
/* Test instruction HW BP over DR[0-3] */
for (i = 0; i < 4; i++) {
- CLEAR_DEBUG();
+ memset(&debug, 0, sizeof(debug));
debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
debug.arch.debugreg[i] = CAST_TO_RIP(hw_bp);
debug.arch.debugreg[7] = 0x400 | (1UL << (2*i+1));
- APPLY_DEBUG();
- vcpu_run(vm, VCPU_ID);
+ vcpu_guest_debug_set(vcpu, &debug);
+ vcpu_run(vcpu);
target_dr6 = 0xffff0ff0 | (1UL << i);
TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
run->debug.arch.exception == DB_VECTOR &&
@@ -137,17 +133,17 @@ int main(void)
run->debug.arch.dr6, target_dr6);
}
/* Skip "nop" */
- MOVE_RIP(1);
+ vcpu_skip_insn(vcpu, 1);
/* Test data access HW BP over DR[0-3] */
for (i = 0; i < 4; i++) {
- CLEAR_DEBUG();
+ memset(&debug, 0, sizeof(debug));
debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
debug.arch.debugreg[i] = CAST_TO_RIP(guest_value);
debug.arch.debugreg[7] = 0x00000400 | (1UL << (2*i+1)) |
(0x000d0000UL << (4*i));
- APPLY_DEBUG();
- vcpu_run(vm, VCPU_ID);
+ vcpu_guest_debug_set(vcpu, &debug);
+ vcpu_run(vcpu);
target_dr6 = 0xffff0ff0 | (1UL << i);
TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
run->debug.arch.exception == DB_VECTOR &&
@@ -159,23 +155,22 @@ int main(void)
run->debug.arch.pc, CAST_TO_RIP(write_data),
run->debug.arch.dr6, target_dr6);
/* Rollback the 4-bytes "mov" */
- MOVE_RIP(-7);
+ vcpu_skip_insn(vcpu, -7);
}
/* Skip the 4-bytes "mov" */
- MOVE_RIP(7);
+ vcpu_skip_insn(vcpu, 7);
/* Test single step */
target_rip = CAST_TO_RIP(ss_start);
target_dr6 = 0xffff4ff0ULL;
- vcpu_regs_get(vm, VCPU_ID, &regs);
for (i = 0; i < (sizeof(ss_size) / sizeof(ss_size[0])); i++) {
target_rip += ss_size[i];
- CLEAR_DEBUG();
+ memset(&debug, 0, sizeof(debug));
debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP |
KVM_GUESTDBG_BLOCKIRQ;
debug.arch.debugreg[7] = 0x00000400;
- APPLY_DEBUG();
- vcpu_run(vm, VCPU_ID);
+ vcpu_guest_debug_set(vcpu, &debug);
+ vcpu_run(vcpu);
TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
run->debug.arch.exception == DB_VECTOR &&
run->debug.arch.pc == target_rip &&
@@ -188,11 +183,11 @@ int main(void)
}
/* Finally test global disable */
- CLEAR_DEBUG();
+ memset(&debug, 0, sizeof(debug));
debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
debug.arch.debugreg[7] = 0x400 | DR7_GD;
- APPLY_DEBUG();
- vcpu_run(vm, VCPU_ID);
+ vcpu_guest_debug_set(vcpu, &debug);
+ vcpu_run(vcpu);
target_dr6 = 0xffff0ff0 | DR6_BD;
TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
run->debug.arch.exception == DB_VECTOR &&
@@ -205,12 +200,12 @@ int main(void)
target_dr6);
/* Disable all debug controls, run to the end */
- CLEAR_DEBUG();
- APPLY_DEBUG();
+ memset(&debug, 0, sizeof(debug));
+ vcpu_guest_debug_set(vcpu, &debug);
- vcpu_run(vm, VCPU_ID);
+ vcpu_run(vcpu);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "KVM_EXIT_IO");
- cmd = get_ucall(vm, VCPU_ID, &uc);
+ cmd = get_ucall(vcpu, &uc);
TEST_ASSERT(cmd == UCALL_DONE, "UCALL_DONE");
kvm_vm_free(vm);
diff --git a/tools/testing/selftests/kvm/x86_64/emulator_error_test.c b/tools/testing/selftests/kvm/x86_64/emulator_error_test.c
index aeb3850f81bd..236e11755ba6 100644
--- a/tools/testing/selftests/kvm/x86_64/emulator_error_test.c
+++ b/tools/testing/selftests/kvm/x86_64/emulator_error_test.c
@@ -11,7 +11,6 @@
#include "kvm_util.h"
#include "vmx.h"
-#define VCPU_ID 1
#define MAXPHYADDR 36
#define MEM_REGION_GVA 0x0000123456789000
@@ -27,14 +26,6 @@ static void guest_code(void)
GUEST_DONE();
}
-static void run_guest(struct kvm_vm *vm)
-{
- int rc;
-
- rc = _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(rc == 0, "vcpu_run failed: %d\n", rc);
-}
-
/*
* Accessors to get R/M, REG, and Mod bits described in the SDM vol 2,
* figure 2-2 "Table Interpretation of ModR/M Byte (C8H)".
@@ -56,9 +47,9 @@ static bool is_flds(uint8_t *insn_bytes, uint8_t insn_size)
GET_RM(insn_bytes[1]) != 0x5;
}
-static void process_exit_on_emulation_error(struct kvm_vm *vm)
+static void process_exit_on_emulation_error(struct kvm_vcpu *vcpu)
{
- struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ struct kvm_run *run = vcpu->run;
struct kvm_regs regs;
uint8_t *insn_bytes;
uint8_t insn_size;
@@ -92,50 +83,48 @@ static void process_exit_on_emulation_error(struct kvm_vm *vm)
* contained an flds instruction that is 2-bytes in
* length (ie: no prefix, no SIB, no displacement).
*/
- vcpu_regs_get(vm, VCPU_ID, &regs);
+ vcpu_regs_get(vcpu, &regs);
regs.rip += 2;
- vcpu_regs_set(vm, VCPU_ID, &regs);
+ vcpu_regs_set(vcpu, &regs);
}
}
}
-static void do_guest_assert(struct kvm_vm *vm, struct ucall *uc)
+static void do_guest_assert(struct ucall *uc)
{
- TEST_FAIL("%s at %s:%ld", (const char *)uc->args[0], __FILE__,
- uc->args[1]);
+ REPORT_GUEST_ASSERT(*uc);
}
-static void check_for_guest_assert(struct kvm_vm *vm)
+static void check_for_guest_assert(struct kvm_vcpu *vcpu)
{
- struct kvm_run *run = vcpu_state(vm, VCPU_ID);
struct ucall uc;
- if (run->exit_reason == KVM_EXIT_IO &&
- get_ucall(vm, VCPU_ID, &uc) == UCALL_ABORT) {
- do_guest_assert(vm, &uc);
+ if (vcpu->run->exit_reason == KVM_EXIT_IO &&
+ get_ucall(vcpu, &uc) == UCALL_ABORT) {
+ do_guest_assert(&uc);
}
}
-static void process_ucall_done(struct kvm_vm *vm)
+static void process_ucall_done(struct kvm_vcpu *vcpu)
{
- struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ struct kvm_run *run = vcpu->run;
struct ucall uc;
- check_for_guest_assert(vm);
+ check_for_guest_assert(vcpu);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
"Unexpected exit reason: %u (%s)",
run->exit_reason,
exit_reason_str(run->exit_reason));
- TEST_ASSERT(get_ucall(vm, VCPU_ID, &uc) == UCALL_DONE,
+ TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_DONE,
"Unexpected ucall command: %lu, expected UCALL_DONE (%d)",
uc.cmd, UCALL_DONE);
}
-static uint64_t process_ucall(struct kvm_vm *vm)
+static uint64_t process_ucall(struct kvm_vcpu *vcpu)
{
- struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ struct kvm_run *run = vcpu->run;
struct ucall uc;
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
@@ -143,14 +132,14 @@ static uint64_t process_ucall(struct kvm_vm *vm)
run->exit_reason,
exit_reason_str(run->exit_reason));
- switch (get_ucall(vm, VCPU_ID, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_SYNC:
break;
case UCALL_ABORT:
- do_guest_assert(vm, &uc);
+ do_guest_assert(&uc);
break;
case UCALL_DONE:
- process_ucall_done(vm);
+ process_ucall_done(vcpu);
break;
default:
TEST_ASSERT(false, "Unexpected ucall");
@@ -161,12 +150,7 @@ static uint64_t process_ucall(struct kvm_vm *vm)
int main(int argc, char *argv[])
{
- struct kvm_enable_cap emul_failure_cap = {
- .cap = KVM_CAP_EXIT_ON_EMULATION_FAILURE,
- .args[0] = 1,
- };
- struct kvm_cpuid_entry2 *entry;
- struct kvm_cpuid2 *cpuid;
+ struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
uint64_t gpa, pte;
uint64_t *hva;
@@ -175,24 +159,15 @@ int main(int argc, char *argv[])
/* Tell stdout not to buffer its content */
setbuf(stdout, NULL);
- vm = vm_create_default(VCPU_ID, 0, guest_code);
-
- if (!kvm_check_cap(KVM_CAP_SMALLER_MAXPHYADDR)) {
- printf("module parameter 'allow_smaller_maxphyaddr' is not set. Skipping test.\n");
- return 0;
- }
-
- cpuid = kvm_get_supported_cpuid();
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_SMALLER_MAXPHYADDR));
- entry = kvm_get_supported_cpuid_index(0x80000008, 0);
- entry->eax = (entry->eax & 0xffffff00) | MAXPHYADDR;
- set_cpuid(cpuid, entry);
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
- vcpu_set_cpuid(vm, VCPU_ID, cpuid);
+ vcpu_set_cpuid_maxphyaddr(vcpu, MAXPHYADDR);
rc = kvm_check_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE);
TEST_ASSERT(rc, "KVM_CAP_EXIT_ON_EMULATION_FAILURE is unavailable");
- vm_enable_cap(vm, &emul_failure_cap);
+ vm_enable_cap(vm, KVM_CAP_EXIT_ON_EMULATION_FAILURE, 1);
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
MEM_REGION_GPA, MEM_REGION_SLOT,
@@ -203,14 +178,14 @@ int main(int argc, char *argv[])
virt_map(vm, MEM_REGION_GVA, MEM_REGION_GPA, 1);
hva = addr_gpa2hva(vm, MEM_REGION_GPA);
memset(hva, 0, PAGE_SIZE);
- pte = vm_get_page_table_entry(vm, VCPU_ID, MEM_REGION_GVA);
- vm_set_page_table_entry(vm, VCPU_ID, MEM_REGION_GVA, pte | (1ull << 36));
+ pte = vm_get_page_table_entry(vm, vcpu, MEM_REGION_GVA);
+ vm_set_page_table_entry(vm, vcpu, MEM_REGION_GVA, pte | (1ull << 36));
- run_guest(vm);
- process_exit_on_emulation_error(vm);
- run_guest(vm);
+ vcpu_run(vcpu);
+ process_exit_on_emulation_error(vcpu);
+ vcpu_run(vcpu);
- TEST_ASSERT(process_ucall(vm) == UCALL_DONE, "Expected UCALL_DONE");
+ TEST_ASSERT(process_ucall(vcpu) == UCALL_DONE, "Expected UCALL_DONE");
kvm_vm_free(vm);
diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
index d12e043aa2ee..99bc202243d2 100644
--- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
@@ -18,9 +18,6 @@
#include "vmx.h"
-#define VCPU_ID 5
-#define NMI_VECTOR 2
-
static int ud_count;
static void guest_ud_handler(struct ex_regs *regs)
@@ -160,88 +157,86 @@ void guest_code(struct vmx_pages *vmx_pages)
GUEST_DONE();
}
-void inject_nmi(struct kvm_vm *vm)
+void inject_nmi(struct kvm_vcpu *vcpu)
{
struct kvm_vcpu_events events;
- vcpu_events_get(vm, VCPU_ID, &events);
+ vcpu_events_get(vcpu, &events);
events.nmi.pending = 1;
events.flags |= KVM_VCPUEVENT_VALID_NMI_PENDING;
- vcpu_events_set(vm, VCPU_ID, &events);
+ vcpu_events_set(vcpu, &events);
}
-static void save_restore_vm(struct kvm_vm *vm)
+static struct kvm_vcpu *save_restore_vm(struct kvm_vm *vm,
+ struct kvm_vcpu *vcpu)
{
struct kvm_regs regs1, regs2;
struct kvm_x86_state *state;
- state = vcpu_save_state(vm, VCPU_ID);
+ state = vcpu_save_state(vcpu);
memset(&regs1, 0, sizeof(regs1));
- vcpu_regs_get(vm, VCPU_ID, &regs1);
+ vcpu_regs_get(vcpu, &regs1);
kvm_vm_release(vm);
/* Restore state in a new VM. */
- kvm_vm_restart(vm, O_RDWR);
- vm_vcpu_add(vm, VCPU_ID);
- vcpu_set_hv_cpuid(vm, VCPU_ID);
- vcpu_enable_evmcs(vm, VCPU_ID);
- vcpu_load_state(vm, VCPU_ID, state);
+ vcpu = vm_recreate_with_one_vcpu(vm);
+ vcpu_set_hv_cpuid(vcpu);
+ vcpu_enable_evmcs(vcpu);
+ vcpu_load_state(vcpu, state);
kvm_x86_state_cleanup(state);
memset(&regs2, 0, sizeof(regs2));
- vcpu_regs_get(vm, VCPU_ID, &regs2);
+ vcpu_regs_get(vcpu, &regs2);
TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
"Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
(ulong) regs2.rdi, (ulong) regs2.rsi);
+ return vcpu;
}
int main(int argc, char *argv[])
{
vm_vaddr_t vmx_pages_gva = 0;
+ struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
struct kvm_run *run;
struct ucall uc;
int stage;
- /* Create VM */
- vm = vm_create_default(VCPU_ID, 0, guest_code);
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
- if (!nested_vmx_supported() ||
- !kvm_check_cap(KVM_CAP_NESTED_STATE) ||
- !kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
- print_skip("Enlightened VMCS is unsupported");
- exit(KSFT_SKIP);
- }
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS));
- vcpu_set_hv_cpuid(vm, VCPU_ID);
- vcpu_enable_evmcs(vm, VCPU_ID);
+ vcpu_set_hv_cpuid(vcpu);
+ vcpu_enable_evmcs(vcpu);
vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+ vcpu_args_set(vcpu, 1, vmx_pages_gva);
vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vm, VCPU_ID);
+ vcpu_init_descriptor_tables(vcpu);
vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler);
pr_info("Running L1 which uses EVMCS to run L2\n");
for (stage = 1;; stage++) {
- run = vcpu_state(vm, VCPU_ID);
- _vcpu_run(vm, VCPU_ID);
+ run = vcpu->run;
+
+ vcpu_run(vcpu);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
"Stage %d: unexpected exit reason: %u (%s),\n",
stage, run->exit_reason,
exit_reason_str(run->exit_reason));
- switch (get_ucall(vm, VCPU_ID, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
- TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
- __FILE__, uc.args[1]);
+ REPORT_GUEST_ASSERT(uc);
/* NOT REACHED */
case UCALL_SYNC:
break;
@@ -256,12 +251,12 @@ int main(int argc, char *argv[])
uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
stage, (ulong)uc.args[1]);
- save_restore_vm(vm);
+ vcpu = save_restore_vm(vm, vcpu);
/* Force immediate L2->L1 exit before resuming */
if (stage == 8) {
pr_info("Injecting NMI into L1 before L2 had a chance to run after restore\n");
- inject_nmi(vm);
+ inject_nmi(vcpu);
}
/*
@@ -271,7 +266,7 @@ int main(int argc, char *argv[])
*/
if (stage == 9) {
pr_info("Trying extra KVM_GET_NESTED_STATE/KVM_SET_NESTED_STATE cycle\n");
- save_restore_vm(vm);
+ vcpu = save_restore_vm(vm, vcpu);
}
}
diff --git a/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c b/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c
index 1f5c32146f3d..b1905d280ef5 100644
--- a/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c
+++ b/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c
@@ -14,8 +14,6 @@
#include "kvm_util.h"
#include "processor.h"
-#define VCPU_ID 0
-
static bool ud_expected;
static void guest_ud_handler(struct ex_regs *regs)
@@ -94,29 +92,27 @@ static void guest_main(void)
GUEST_DONE();
}
-static void setup_ud_vector(struct kvm_vm *vm)
+static void setup_ud_vector(struct kvm_vcpu *vcpu)
{
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vm, VCPU_ID);
- vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
+ vm_init_descriptor_tables(vcpu->vm);
+ vcpu_init_descriptor_tables(vcpu);
+ vm_install_exception_handler(vcpu->vm, UD_VECTOR, guest_ud_handler);
}
-static void enter_guest(struct kvm_vm *vm)
+static void enter_guest(struct kvm_vcpu *vcpu)
{
- struct kvm_run *run;
+ struct kvm_run *run = vcpu->run;
struct ucall uc;
- run = vcpu_state(vm, VCPU_ID);
-
- vcpu_run(vm, VCPU_ID);
- switch (get_ucall(vm, VCPU_ID, &uc)) {
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_SYNC:
pr_info("%s: %016lx\n", (const char *)uc.args[2], uc.args[3]);
break;
case UCALL_DONE:
return;
case UCALL_ABORT:
- TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], __FILE__, uc.args[1]);
+ REPORT_GUEST_ASSERT(uc);
default:
TEST_FAIL("Unhandled ucall: %ld\nexit_reason: %u (%s)",
uc.cmd, run->exit_reason, exit_reason_str(run->exit_reason));
@@ -125,45 +121,42 @@ static void enter_guest(struct kvm_vm *vm)
static void test_fix_hypercall(void)
{
+ struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
- vm = vm_create_default(VCPU_ID, 0, guest_main);
- setup_ud_vector(vm);
+ vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+ setup_ud_vector(vcpu);
ud_expected = false;
sync_global_to_guest(vm, ud_expected);
virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
- enter_guest(vm);
+ enter_guest(vcpu);
}
static void test_fix_hypercall_disabled(void)
{
- struct kvm_enable_cap cap = {0};
+ struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
- vm = vm_create_default(VCPU_ID, 0, guest_main);
- setup_ud_vector(vm);
+ vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+ setup_ud_vector(vcpu);
- cap.cap = KVM_CAP_DISABLE_QUIRKS2;
- cap.args[0] = KVM_X86_QUIRK_FIX_HYPERCALL_INSN;
- vm_enable_cap(vm, &cap);
+ vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2,
+ KVM_X86_QUIRK_FIX_HYPERCALL_INSN);
ud_expected = true;
sync_global_to_guest(vm, ud_expected);
virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
- enter_guest(vm);
+ enter_guest(vcpu);
}
int main(void)
{
- if (!(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & KVM_X86_QUIRK_FIX_HYPERCALL_INSN)) {
- print_skip("KVM_X86_QUIRK_HYPERCALL_INSN not supported");
- exit(KSFT_SKIP);
- }
+ TEST_REQUIRE(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & KVM_X86_QUIRK_FIX_HYPERCALL_INSN);
test_fix_hypercall();
test_fix_hypercall_disabled();
diff --git a/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c b/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c
index 8aed0db1331d..d09b3cbcadc6 100644
--- a/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c
+++ b/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c
@@ -15,116 +15,21 @@
#include "kvm_util.h"
#include "processor.h"
-static int kvm_num_index_msrs(int kvm_fd, int nmsrs)
-{
- struct kvm_msr_list *list;
- int r;
-
- list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
- list->nmsrs = nmsrs;
- r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list);
- TEST_ASSERT(r == -1 && errno == E2BIG,
- "Unexpected result from KVM_GET_MSR_INDEX_LIST probe, r: %i",
- r);
-
- r = list->nmsrs;
- free(list);
- return r;
-}
-
-static void test_get_msr_index(void)
-{
- int old_res, res, kvm_fd, r;
- struct kvm_msr_list *list;
-
- kvm_fd = open_kvm_dev_path_or_exit();
-
- old_res = kvm_num_index_msrs(kvm_fd, 0);
- TEST_ASSERT(old_res != 0, "Expecting nmsrs to be > 0");
-
- if (old_res != 1) {
- res = kvm_num_index_msrs(kvm_fd, 1);
- TEST_ASSERT(res > 1, "Expecting nmsrs to be > 1");
- TEST_ASSERT(res == old_res, "Expecting nmsrs to be identical");
- }
-
- list = malloc(sizeof(*list) + old_res * sizeof(list->indices[0]));
- list->nmsrs = old_res;
- r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list);
-
- TEST_ASSERT(r == 0,
- "Unexpected result from KVM_GET_MSR_FEATURE_INDEX_LIST, r: %i",
- r);
- TEST_ASSERT(list->nmsrs == old_res, "Expecting nmsrs to be identical");
- free(list);
-
- close(kvm_fd);
-}
-
-static int kvm_num_feature_msrs(int kvm_fd, int nmsrs)
-{
- struct kvm_msr_list *list;
- int r;
-
- list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
- list->nmsrs = nmsrs;
- r = ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, list);
- TEST_ASSERT(r == -1 && errno == E2BIG,
- "Unexpected result from KVM_GET_MSR_FEATURE_INDEX_LIST probe, r: %i",
- r);
-
- r = list->nmsrs;
- free(list);
- return r;
-}
-
-struct kvm_msr_list *kvm_get_msr_feature_list(int kvm_fd, int nmsrs)
-{
- struct kvm_msr_list *list;
- int r;
-
- list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
- list->nmsrs = nmsrs;
- r = ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, list);
-
- TEST_ASSERT(r == 0,
- "Unexpected result from KVM_GET_MSR_FEATURE_INDEX_LIST, r: %i",
- r);
-
- return list;
-}
-
-static void test_get_msr_feature(void)
+int main(int argc, char *argv[])
{
- int res, old_res, i, kvm_fd;
- struct kvm_msr_list *feature_list;
+ const struct kvm_msr_list *feature_list;
+ int i;
- kvm_fd = open_kvm_dev_path_or_exit();
+ /*
+ * Skip the entire test if MSR_FEATURES isn't supported, other tests
+ * will cover the "regular" list of MSRs, the coverage here is purely
+ * opportunistic and not interesting on its own.
+ */
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_GET_MSR_FEATURES));
- old_res = kvm_num_feature_msrs(kvm_fd, 0);
- TEST_ASSERT(old_res != 0, "Expecting nmsrs to be > 0");
-
- if (old_res != 1) {
- res = kvm_num_feature_msrs(kvm_fd, 1);
- TEST_ASSERT(res > 1, "Expecting nmsrs to be > 1");
- TEST_ASSERT(res == old_res, "Expecting nmsrs to be identical");
- }
-
- feature_list = kvm_get_msr_feature_list(kvm_fd, old_res);
- TEST_ASSERT(old_res == feature_list->nmsrs,
- "Unmatching number of msr indexes");
+ (void)kvm_get_msr_index_list();
+ feature_list = kvm_get_feature_msr_index_list();
for (i = 0; i < feature_list->nmsrs; i++)
kvm_get_feature_msr(feature_list->indices[i]);
-
- free(feature_list);
- close(kvm_fd);
-}
-
-int main(int argc, char *argv[])
-{
- if (kvm_check_cap(KVM_CAP_GET_MSR_FEATURES))
- test_get_msr_feature();
-
- test_get_msr_index();
}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
index 3330fb183c68..d576bc8ce823 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
@@ -173,23 +173,21 @@ static void guest_main(struct ms_hyperv_tsc_page *tsc_page, vm_paddr_t tsc_page_
GUEST_DONE();
}
-#define VCPU_ID 0
-
-static void host_check_tsc_msr_rdtsc(struct kvm_vm *vm)
+static void host_check_tsc_msr_rdtsc(struct kvm_vcpu *vcpu)
{
u64 tsc_freq, r1, r2, t1, t2;
s64 delta_ns;
- tsc_freq = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TSC_FREQUENCY);
+ tsc_freq = vcpu_get_msr(vcpu, HV_X64_MSR_TSC_FREQUENCY);
TEST_ASSERT(tsc_freq > 0, "TSC frequency must be nonzero");
/* For increased accuracy, take mean rdtsc() before and afrer ioctl */
r1 = rdtsc();
- t1 = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TIME_REF_COUNT);
+ t1 = vcpu_get_msr(vcpu, HV_X64_MSR_TIME_REF_COUNT);
r1 = (r1 + rdtsc()) / 2;
nop_loop();
r2 = rdtsc();
- t2 = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TIME_REF_COUNT);
+ t2 = vcpu_get_msr(vcpu, HV_X64_MSR_TIME_REF_COUNT);
r2 = (r2 + rdtsc()) / 2;
TEST_ASSERT(t2 > t1, "Time reference MSR is not monotonic (%ld <= %ld)", t1, t2);
@@ -207,36 +205,36 @@ static void host_check_tsc_msr_rdtsc(struct kvm_vm *vm)
int main(void)
{
+ struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
struct kvm_run *run;
struct ucall uc;
vm_vaddr_t tsc_page_gva;
int stage;
- vm = vm_create_default(VCPU_ID, 0, guest_main);
- run = vcpu_state(vm, VCPU_ID);
+ vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+ run = vcpu->run;
- vcpu_set_hv_cpuid(vm, VCPU_ID);
+ vcpu_set_hv_cpuid(vcpu);
tsc_page_gva = vm_vaddr_alloc_page(vm);
memset(addr_gva2hva(vm, tsc_page_gva), 0x0, getpagesize());
TEST_ASSERT((addr_gva2gpa(vm, tsc_page_gva) & (getpagesize() - 1)) == 0,
"TSC page has to be page aligned\n");
- vcpu_args_set(vm, VCPU_ID, 2, tsc_page_gva, addr_gva2gpa(vm, tsc_page_gva));
+ vcpu_args_set(vcpu, 2, tsc_page_gva, addr_gva2gpa(vm, tsc_page_gva));
- host_check_tsc_msr_rdtsc(vm);
+ host_check_tsc_msr_rdtsc(vcpu);
for (stage = 1;; stage++) {
- _vcpu_run(vm, VCPU_ID);
+ vcpu_run(vcpu);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
"Stage %d: unexpected exit reason: %u (%s),\n",
stage, run->exit_reason,
exit_reason_str(run->exit_reason));
- switch (get_ucall(vm, VCPU_ID, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
- TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
- __FILE__, uc.args[1]);
+ REPORT_GUEST_ASSERT(uc);
/* NOT REACHED */
case UCALL_SYNC:
break;
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
index 8c245ab2d98a..e804eb08dff9 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
@@ -20,8 +20,6 @@
#include "processor.h"
#include "vmx.h"
-#define VCPU_ID 0
-
static void guest_code(void)
{
}
@@ -45,7 +43,7 @@ static bool smt_possible(void)
return res;
}
-static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries,
+static void test_hv_cpuid(const struct kvm_cpuid2 *hv_cpuid_entries,
bool evmcs_expected)
{
int i;
@@ -58,7 +56,7 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries,
nent_expected, hv_cpuid_entries->nent);
for (i = 0; i < hv_cpuid_entries->nent; i++) {
- struct kvm_cpuid_entry2 *entry = &hv_cpuid_entries->entries[i];
+ const struct kvm_cpuid_entry2 *entry = &hv_cpuid_entries->entries[i];
TEST_ASSERT((entry->function >= 0x40000000) &&
(entry->function <= 0x40000082),
@@ -115,64 +113,62 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries,
}
}
-void test_hv_cpuid_e2big(struct kvm_vm *vm, bool system)
+void test_hv_cpuid_e2big(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
{
static struct kvm_cpuid2 cpuid = {.nent = 0};
int ret;
- if (!system)
- ret = _vcpu_ioctl(vm, VCPU_ID, KVM_GET_SUPPORTED_HV_CPUID, &cpuid);
+ if (vcpu)
+ ret = __vcpu_ioctl(vcpu, KVM_GET_SUPPORTED_HV_CPUID, &cpuid);
else
- ret = _kvm_ioctl(vm, KVM_GET_SUPPORTED_HV_CPUID, &cpuid);
+ ret = __kvm_ioctl(vm->kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, &cpuid);
TEST_ASSERT(ret == -1 && errno == E2BIG,
"%s KVM_GET_SUPPORTED_HV_CPUID didn't fail with -E2BIG when"
- " it should have: %d %d", system ? "KVM" : "vCPU", ret, errno);
+ " it should have: %d %d", !vcpu ? "KVM" : "vCPU", ret, errno);
}
int main(int argc, char *argv[])
{
struct kvm_vm *vm;
- struct kvm_cpuid2 *hv_cpuid_entries;
+ const struct kvm_cpuid2 *hv_cpuid_entries;
+ struct kvm_vcpu *vcpu;
/* Tell stdout not to buffer its content */
setbuf(stdout, NULL);
- if (!kvm_check_cap(KVM_CAP_HYPERV_CPUID)) {
- print_skip("KVM_CAP_HYPERV_CPUID not supported");
- exit(KSFT_SKIP);
- }
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_CPUID));
- vm = vm_create_default(VCPU_ID, 0, guest_code);
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
/* Test vCPU ioctl version */
- test_hv_cpuid_e2big(vm, false);
+ test_hv_cpuid_e2big(vm, vcpu);
- hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vm, VCPU_ID);
+ hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu);
test_hv_cpuid(hv_cpuid_entries, false);
- free(hv_cpuid_entries);
+ free((void *)hv_cpuid_entries);
- if (!nested_vmx_supported() ||
- !kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
+ if (!kvm_cpu_has(X86_FEATURE_VMX) ||
+ !kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
print_skip("Enlightened VMCS is unsupported");
goto do_sys;
}
- vcpu_enable_evmcs(vm, VCPU_ID);
- hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vm, VCPU_ID);
+ vcpu_enable_evmcs(vcpu);
+ hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu);
test_hv_cpuid(hv_cpuid_entries, true);
- free(hv_cpuid_entries);
+ free((void *)hv_cpuid_entries);
do_sys:
/* Test system ioctl version */
- if (!kvm_check_cap(KVM_CAP_SYS_HYPERV_CPUID)) {
+ if (!kvm_has_cap(KVM_CAP_SYS_HYPERV_CPUID)) {
print_skip("KVM_CAP_SYS_HYPERV_CPUID not supported");
goto out;
}
- test_hv_cpuid_e2big(vm, true);
+ test_hv_cpuid_e2big(vm, NULL);
hv_cpuid_entries = kvm_get_supported_hv_cpuid();
- test_hv_cpuid(hv_cpuid_entries, nested_vmx_supported());
+ test_hv_cpuid(hv_cpuid_entries, kvm_cpu_has(X86_FEATURE_VMX));
out:
kvm_vm_free(vm);
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
index 672915ce73d8..79ab0152d281 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
@@ -13,78 +13,22 @@
#include "processor.h"
#include "hyperv.h"
-#define VCPU_ID 0
#define LINUX_OS_ID ((u64)0x8100 << 48)
-extern unsigned char rdmsr_start;
-extern unsigned char rdmsr_end;
-
-static u64 do_rdmsr(u32 idx)
-{
- u32 lo, hi;
-
- asm volatile("rdmsr_start: rdmsr;"
- "rdmsr_end:"
- : "=a"(lo), "=c"(hi)
- : "c"(idx));
-
- return (((u64) hi) << 32) | lo;
-}
-
-extern unsigned char wrmsr_start;
-extern unsigned char wrmsr_end;
-
-static void do_wrmsr(u32 idx, u64 val)
-{
- u32 lo, hi;
-
- lo = val;
- hi = val >> 32;
-
- asm volatile("wrmsr_start: wrmsr;"
- "wrmsr_end:"
- : : "a"(lo), "c"(idx), "d"(hi));
-}
-
-static int nr_gp;
-static int nr_ud;
-
-static inline u64 hypercall(u64 control, vm_vaddr_t input_address,
- vm_vaddr_t output_address)
-{
- u64 hv_status;
-
- asm volatile("mov %3, %%r8\n"
- "vmcall"
- : "=a" (hv_status),
- "+c" (control), "+d" (input_address)
- : "r" (output_address)
- : "cc", "memory", "r8", "r9", "r10", "r11");
-
- return hv_status;
-}
-
-static void guest_gp_handler(struct ex_regs *regs)
+static inline uint8_t hypercall(u64 control, vm_vaddr_t input_address,
+ vm_vaddr_t output_address, uint64_t *hv_status)
{
- unsigned char *rip = (unsigned char *)regs->rip;
- bool r, w;
-
- r = rip == &rdmsr_start;
- w = rip == &wrmsr_start;
- GUEST_ASSERT(r || w);
-
- nr_gp++;
-
- if (r)
- regs->rip = (uint64_t)&rdmsr_end;
- else
- regs->rip = (uint64_t)&wrmsr_end;
-}
-
-static void guest_ud_handler(struct ex_regs *regs)
-{
- nr_ud++;
- regs->rip += 3;
+ uint8_t vector;
+
+ /* Note both the hypercall and the "asm safe" clobber r9-r11. */
+ asm volatile("mov %[output_address], %%r8\n\t"
+ KVM_ASM_SAFE("vmcall")
+ : "=a" (*hv_status),
+ "+c" (control), "+d" (input_address),
+ KVM_ASM_SAFE_OUTPUTS(vector)
+ : [output_address] "r"(output_address)
+ : "cc", "memory", "r8", KVM_ASM_SAFE_CLOBBERS);
+ return vector;
}
struct msr_data {
@@ -102,111 +46,105 @@ struct hcall_data {
static void guest_msr(struct msr_data *msr)
{
- int i = 0;
-
- while (msr->idx) {
- WRITE_ONCE(nr_gp, 0);
- if (!msr->write)
- do_rdmsr(msr->idx);
- else
- do_wrmsr(msr->idx, msr->write_val);
+ uint64_t ignored;
+ uint8_t vector;
- if (msr->available)
- GUEST_ASSERT(READ_ONCE(nr_gp) == 0);
- else
- GUEST_ASSERT(READ_ONCE(nr_gp) == 1);
+ GUEST_ASSERT(msr->idx);
- GUEST_SYNC(i++);
- }
+ if (!msr->write)
+ vector = rdmsr_safe(msr->idx, &ignored);
+ else
+ vector = wrmsr_safe(msr->idx, msr->write_val);
+ if (msr->available)
+ GUEST_ASSERT_2(!vector, msr->idx, vector);
+ else
+ GUEST_ASSERT_2(vector == GP_VECTOR, msr->idx, vector);
GUEST_DONE();
}
static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall)
{
- int i = 0;
u64 res, input, output;
+ uint8_t vector;
+
+ GUEST_ASSERT(hcall->control);
wrmsr(HV_X64_MSR_GUEST_OS_ID, LINUX_OS_ID);
wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa);
- while (hcall->control) {
- nr_ud = 0;
- if (!(hcall->control & HV_HYPERCALL_FAST_BIT)) {
- input = pgs_gpa;
- output = pgs_gpa + 4096;
- } else {
- input = output = 0;
- }
-
- res = hypercall(hcall->control, input, output);
- if (hcall->ud_expected)
- GUEST_ASSERT(nr_ud == 1);
- else
- GUEST_ASSERT(res == hcall->expect);
-
- GUEST_SYNC(i++);
+ if (!(hcall->control & HV_HYPERCALL_FAST_BIT)) {
+ input = pgs_gpa;
+ output = pgs_gpa + 4096;
+ } else {
+ input = output = 0;
}
+ vector = hypercall(hcall->control, input, output, &res);
+ if (hcall->ud_expected)
+ GUEST_ASSERT_2(vector == UD_VECTOR, hcall->control, vector);
+ else
+ GUEST_ASSERT_2(!vector, hcall->control, vector);
+
+ GUEST_ASSERT_2(!hcall->ud_expected || res == hcall->expect,
+ hcall->expect, res);
GUEST_DONE();
}
-static void hv_set_cpuid(struct kvm_vm *vm, struct kvm_cpuid2 *cpuid,
- struct kvm_cpuid_entry2 *feat,
- struct kvm_cpuid_entry2 *recomm,
- struct kvm_cpuid_entry2 *dbg)
+static void vcpu_reset_hv_cpuid(struct kvm_vcpu *vcpu)
{
- TEST_ASSERT(set_cpuid(cpuid, feat),
- "failed to set KVM_CPUID_FEATURES leaf");
- TEST_ASSERT(set_cpuid(cpuid, recomm),
- "failed to set HYPERV_CPUID_ENLIGHTMENT_INFO leaf");
- TEST_ASSERT(set_cpuid(cpuid, dbg),
- "failed to set HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES leaf");
- vcpu_set_cpuid(vm, VCPU_ID, cpuid);
+ /*
+ * Enable all supported Hyper-V features, then clear the leafs holding
+ * the features that will be tested one by one.
+ */
+ vcpu_set_hv_cpuid(vcpu);
+
+ vcpu_clear_cpuid_entry(vcpu, HYPERV_CPUID_FEATURES);
+ vcpu_clear_cpuid_entry(vcpu, HYPERV_CPUID_ENLIGHTMENT_INFO);
+ vcpu_clear_cpuid_entry(vcpu, HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES);
}
static void guest_test_msrs_access(void)
{
+ struct kvm_cpuid2 *prev_cpuid = NULL;
+ struct kvm_cpuid_entry2 *feat, *dbg;
+ struct kvm_vcpu *vcpu;
struct kvm_run *run;
struct kvm_vm *vm;
struct ucall uc;
- int stage = 0, r;
- struct kvm_cpuid_entry2 feat = {
- .function = HYPERV_CPUID_FEATURES
- };
- struct kvm_cpuid_entry2 recomm = {
- .function = HYPERV_CPUID_ENLIGHTMENT_INFO
- };
- struct kvm_cpuid_entry2 dbg = {
- .function = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES
- };
- struct kvm_cpuid2 *best;
+ int stage = 0;
vm_vaddr_t msr_gva;
- struct kvm_enable_cap cap = {
- .cap = KVM_CAP_HYPERV_ENFORCE_CPUID,
- .args = {1}
- };
struct msr_data *msr;
while (true) {
- vm = vm_create_default(VCPU_ID, 0, guest_msr);
+ vm = vm_create_with_one_vcpu(&vcpu, guest_msr);
msr_gva = vm_vaddr_alloc_page(vm);
memset(addr_gva2hva(vm, msr_gva), 0x0, getpagesize());
msr = addr_gva2hva(vm, msr_gva);
- vcpu_args_set(vm, VCPU_ID, 1, msr_gva);
- vcpu_enable_cap(vm, VCPU_ID, &cap);
+ vcpu_args_set(vcpu, 1, msr_gva);
+ vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_ENFORCE_CPUID, 1);
- vcpu_set_hv_cpuid(vm, VCPU_ID);
+ if (!prev_cpuid) {
+ vcpu_reset_hv_cpuid(vcpu);
- best = kvm_get_supported_hv_cpuid();
+ prev_cpuid = allocate_kvm_cpuid2(vcpu->cpuid->nent);
+ } else {
+ vcpu_init_cpuid(vcpu, prev_cpuid);
+ }
+
+ feat = vcpu_get_cpuid_entry(vcpu, HYPERV_CPUID_FEATURES);
+ dbg = vcpu_get_cpuid_entry(vcpu, HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES);
vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vm, VCPU_ID);
- vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
+ vcpu_init_descriptor_tables(vcpu);
- run = vcpu_state(vm, VCPU_ID);
+ run = vcpu->run;
+
+ /* TODO: Make this entire test easier to maintain. */
+ if (stage >= 21)
+ vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_SYNIC2, 0);
switch (stage) {
case 0:
@@ -223,7 +161,7 @@ static void guest_test_msrs_access(void)
msr->available = 0;
break;
case 2:
- feat.eax |= HV_MSR_HYPERCALL_AVAILABLE;
+ feat->eax |= HV_MSR_HYPERCALL_AVAILABLE;
/*
* HV_X64_MSR_GUEST_OS_ID has to be written first to make
* HV_X64_MSR_HYPERCALL available.
@@ -250,12 +188,14 @@ static void guest_test_msrs_access(void)
msr->available = 0;
break;
case 6:
- feat.eax |= HV_MSR_VP_RUNTIME_AVAILABLE;
+ feat->eax |= HV_MSR_VP_RUNTIME_AVAILABLE;
+ msr->idx = HV_X64_MSR_VP_RUNTIME;
msr->write = 0;
msr->available = 1;
break;
case 7:
/* Read only */
+ msr->idx = HV_X64_MSR_VP_RUNTIME;
msr->write = 1;
msr->write_val = 1;
msr->available = 0;
@@ -267,12 +207,14 @@ static void guest_test_msrs_access(void)
msr->available = 0;
break;
case 9:
- feat.eax |= HV_MSR_TIME_REF_COUNT_AVAILABLE;
+ feat->eax |= HV_MSR_TIME_REF_COUNT_AVAILABLE;
+ msr->idx = HV_X64_MSR_TIME_REF_COUNT;
msr->write = 0;
msr->available = 1;
break;
case 10:
/* Read only */
+ msr->idx = HV_X64_MSR_TIME_REF_COUNT;
msr->write = 1;
msr->write_val = 1;
msr->available = 0;
@@ -284,12 +226,14 @@ static void guest_test_msrs_access(void)
msr->available = 0;
break;
case 12:
- feat.eax |= HV_MSR_VP_INDEX_AVAILABLE;
+ feat->eax |= HV_MSR_VP_INDEX_AVAILABLE;
+ msr->idx = HV_X64_MSR_VP_INDEX;
msr->write = 0;
msr->available = 1;
break;
case 13:
/* Read only */
+ msr->idx = HV_X64_MSR_VP_INDEX;
msr->write = 1;
msr->write_val = 1;
msr->available = 0;
@@ -301,11 +245,13 @@ static void guest_test_msrs_access(void)
msr->available = 0;
break;
case 15:
- feat.eax |= HV_MSR_RESET_AVAILABLE;
+ feat->eax |= HV_MSR_RESET_AVAILABLE;
+ msr->idx = HV_X64_MSR_RESET;
msr->write = 0;
msr->available = 1;
break;
case 16:
+ msr->idx = HV_X64_MSR_RESET;
msr->write = 1;
msr->write_val = 0;
msr->available = 1;
@@ -317,11 +263,13 @@ static void guest_test_msrs_access(void)
msr->available = 0;
break;
case 18:
- feat.eax |= HV_MSR_REFERENCE_TSC_AVAILABLE;
+ feat->eax |= HV_MSR_REFERENCE_TSC_AVAILABLE;
+ msr->idx = HV_X64_MSR_REFERENCE_TSC;
msr->write = 0;
msr->available = 1;
break;
case 19:
+ msr->idx = HV_X64_MSR_REFERENCE_TSC;
msr->write = 1;
msr->write_val = 0;
msr->available = 1;
@@ -337,16 +285,18 @@ static void guest_test_msrs_access(void)
* Remains unavailable even with KVM_CAP_HYPERV_SYNIC2
* capability enabled and guest visible CPUID bit unset.
*/
- cap.cap = KVM_CAP_HYPERV_SYNIC2;
- cap.args[0] = 0;
- vcpu_enable_cap(vm, VCPU_ID, &cap);
+ msr->idx = HV_X64_MSR_EOM;
+ msr->write = 0;
+ msr->available = 0;
break;
case 22:
- feat.eax |= HV_MSR_SYNIC_AVAILABLE;
+ feat->eax |= HV_MSR_SYNIC_AVAILABLE;
+ msr->idx = HV_X64_MSR_EOM;
msr->write = 0;
msr->available = 1;
break;
case 23:
+ msr->idx = HV_X64_MSR_EOM;
msr->write = 1;
msr->write_val = 0;
msr->available = 1;
@@ -358,23 +308,29 @@ static void guest_test_msrs_access(void)
msr->available = 0;
break;
case 25:
- feat.eax |= HV_MSR_SYNTIMER_AVAILABLE;
+ feat->eax |= HV_MSR_SYNTIMER_AVAILABLE;
+ msr->idx = HV_X64_MSR_STIMER0_CONFIG;
msr->write = 0;
msr->available = 1;
break;
case 26:
+ msr->idx = HV_X64_MSR_STIMER0_CONFIG;
msr->write = 1;
msr->write_val = 0;
msr->available = 1;
break;
case 27:
/* Direct mode test */
+ msr->idx = HV_X64_MSR_STIMER0_CONFIG;
msr->write = 1;
msr->write_val = 1 << 12;
msr->available = 0;
break;
case 28:
- feat.edx |= HV_STIMER_DIRECT_MODE_AVAILABLE;
+ feat->edx |= HV_STIMER_DIRECT_MODE_AVAILABLE;
+ msr->idx = HV_X64_MSR_STIMER0_CONFIG;
+ msr->write = 1;
+ msr->write_val = 1 << 12;
msr->available = 1;
break;
@@ -384,7 +340,8 @@ static void guest_test_msrs_access(void)
msr->available = 0;
break;
case 30:
- feat.eax |= HV_MSR_APIC_ACCESS_AVAILABLE;
+ feat->eax |= HV_MSR_APIC_ACCESS_AVAILABLE;
+ msr->idx = HV_X64_MSR_EOI;
msr->write = 1;
msr->write_val = 1;
msr->available = 1;
@@ -396,12 +353,14 @@ static void guest_test_msrs_access(void)
msr->available = 0;
break;
case 32:
- feat.eax |= HV_ACCESS_FREQUENCY_MSRS;
+ feat->eax |= HV_ACCESS_FREQUENCY_MSRS;
+ msr->idx = HV_X64_MSR_TSC_FREQUENCY;
msr->write = 0;
msr->available = 1;
break;
case 33:
/* Read only */
+ msr->idx = HV_X64_MSR_TSC_FREQUENCY;
msr->write = 1;
msr->write_val = 1;
msr->available = 0;
@@ -413,11 +372,13 @@ static void guest_test_msrs_access(void)
msr->available = 0;
break;
case 35:
- feat.eax |= HV_ACCESS_REENLIGHTENMENT;
+ feat->eax |= HV_ACCESS_REENLIGHTENMENT;
+ msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL;
msr->write = 0;
msr->available = 1;
break;
case 36:
+ msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL;
msr->write = 1;
msr->write_val = 1;
msr->available = 1;
@@ -436,11 +397,13 @@ static void guest_test_msrs_access(void)
msr->available = 0;
break;
case 39:
- feat.edx |= HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE;
+ feat->edx |= HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE;
+ msr->idx = HV_X64_MSR_CRASH_P0;
msr->write = 0;
msr->available = 1;
break;
case 40:
+ msr->idx = HV_X64_MSR_CRASH_P0;
msr->write = 1;
msr->write_val = 1;
msr->available = 1;
@@ -452,48 +415,44 @@ static void guest_test_msrs_access(void)
msr->available = 0;
break;
case 42:
- feat.edx |= HV_FEATURE_DEBUG_MSRS_AVAILABLE;
- dbg.eax |= HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING;
+ feat->edx |= HV_FEATURE_DEBUG_MSRS_AVAILABLE;
+ dbg->eax |= HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING;
+ msr->idx = HV_X64_MSR_SYNDBG_STATUS;
msr->write = 0;
msr->available = 1;
break;
case 43:
+ msr->idx = HV_X64_MSR_SYNDBG_STATUS;
msr->write = 1;
msr->write_val = 0;
msr->available = 1;
break;
case 44:
- /* END */
- msr->idx = 0;
- break;
+ kvm_vm_free(vm);
+ return;
}
- hv_set_cpuid(vm, best, &feat, &recomm, &dbg);
+ vcpu_set_cpuid(vcpu);
+
+ memcpy(prev_cpuid, vcpu->cpuid, kvm_cpuid2_size(vcpu->cpuid->nent));
- if (msr->idx)
- pr_debug("Stage %d: testing msr: 0x%x for %s\n", stage,
- msr->idx, msr->write ? "write" : "read");
- else
- pr_debug("Stage %d: finish\n", stage);
+ pr_debug("Stage %d: testing msr: 0x%x for %s\n", stage,
+ msr->idx, msr->write ? "write" : "read");
- r = _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(!r, "vcpu_run failed: %d\n", r);
+ vcpu_run(vcpu);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
"unexpected exit reason: %u (%s)",
run->exit_reason, exit_reason_str(run->exit_reason));
- switch (get_ucall(vm, VCPU_ID, &uc)) {
- case UCALL_SYNC:
- TEST_ASSERT(uc.args[1] == 0,
- "Unexpected stage: %ld (0 expected)\n",
- uc.args[1]);
- break;
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
- TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
- __FILE__, uc.args[1]);
+ REPORT_GUEST_ASSERT_2(uc, "MSR = %lx, vector = %lx");
return;
case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unhandled ucall: %ld", uc.cmd);
return;
}
@@ -504,54 +463,50 @@ static void guest_test_msrs_access(void)
static void guest_test_hcalls_access(void)
{
+ struct kvm_cpuid_entry2 *feat, *recomm, *dbg;
+ struct kvm_cpuid2 *prev_cpuid = NULL;
+ struct kvm_vcpu *vcpu;
struct kvm_run *run;
struct kvm_vm *vm;
struct ucall uc;
- int stage = 0, r;
- struct kvm_cpuid_entry2 feat = {
- .function = HYPERV_CPUID_FEATURES,
- .eax = HV_MSR_HYPERCALL_AVAILABLE
- };
- struct kvm_cpuid_entry2 recomm = {
- .function = HYPERV_CPUID_ENLIGHTMENT_INFO
- };
- struct kvm_cpuid_entry2 dbg = {
- .function = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES
- };
- struct kvm_enable_cap cap = {
- .cap = KVM_CAP_HYPERV_ENFORCE_CPUID,
- .args = {1}
- };
+ int stage = 0;
vm_vaddr_t hcall_page, hcall_params;
struct hcall_data *hcall;
- struct kvm_cpuid2 *best;
while (true) {
- vm = vm_create_default(VCPU_ID, 0, guest_hcall);
+ vm = vm_create_with_one_vcpu(&vcpu, guest_hcall);
vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vm, VCPU_ID);
- vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
+ vcpu_init_descriptor_tables(vcpu);
/* Hypercall input/output */
hcall_page = vm_vaddr_alloc_pages(vm, 2);
- hcall = addr_gva2hva(vm, hcall_page);
memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
hcall_params = vm_vaddr_alloc_page(vm);
memset(addr_gva2hva(vm, hcall_params), 0x0, getpagesize());
+ hcall = addr_gva2hva(vm, hcall_params);
+
+ vcpu_args_set(vcpu, 2, addr_gva2gpa(vm, hcall_page), hcall_params);
+ vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_ENFORCE_CPUID, 1);
- vcpu_args_set(vm, VCPU_ID, 2, addr_gva2gpa(vm, hcall_page), hcall_params);
- vcpu_enable_cap(vm, VCPU_ID, &cap);
+ if (!prev_cpuid) {
+ vcpu_reset_hv_cpuid(vcpu);
- vcpu_set_hv_cpuid(vm, VCPU_ID);
+ prev_cpuid = allocate_kvm_cpuid2(vcpu->cpuid->nent);
+ } else {
+ vcpu_init_cpuid(vcpu, prev_cpuid);
+ }
- best = kvm_get_supported_hv_cpuid();
+ feat = vcpu_get_cpuid_entry(vcpu, HYPERV_CPUID_FEATURES);
+ recomm = vcpu_get_cpuid_entry(vcpu, HYPERV_CPUID_ENLIGHTMENT_INFO);
+ dbg = vcpu_get_cpuid_entry(vcpu, HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES);
- run = vcpu_state(vm, VCPU_ID);
+ run = vcpu->run;
switch (stage) {
case 0:
+ feat->eax |= HV_MSR_HYPERCALL_AVAILABLE;
hcall->control = 0xdeadbeef;
hcall->expect = HV_STATUS_INVALID_HYPERCALL_CODE;
break;
@@ -561,7 +516,8 @@ static void guest_test_hcalls_access(void)
hcall->expect = HV_STATUS_ACCESS_DENIED;
break;
case 2:
- feat.ebx |= HV_POST_MESSAGES;
+ feat->ebx |= HV_POST_MESSAGES;
+ hcall->control = HVCALL_POST_MESSAGE;
hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT;
break;
@@ -570,7 +526,8 @@ static void guest_test_hcalls_access(void)
hcall->expect = HV_STATUS_ACCESS_DENIED;
break;
case 4:
- feat.ebx |= HV_SIGNAL_EVENTS;
+ feat->ebx |= HV_SIGNAL_EVENTS;
+ hcall->control = HVCALL_SIGNAL_EVENT;
hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT;
break;
@@ -579,11 +536,13 @@ static void guest_test_hcalls_access(void)
hcall->expect = HV_STATUS_INVALID_HYPERCALL_CODE;
break;
case 6:
- dbg.eax |= HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING;
+ dbg->eax |= HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING;
+ hcall->control = HVCALL_RESET_DEBUG_SESSION;
hcall->expect = HV_STATUS_ACCESS_DENIED;
break;
case 7:
- feat.ebx |= HV_DEBUGGING;
+ feat->ebx |= HV_DEBUGGING;
+ hcall->control = HVCALL_RESET_DEBUG_SESSION;
hcall->expect = HV_STATUS_OPERATION_DENIED;
break;
@@ -592,7 +551,8 @@ static void guest_test_hcalls_access(void)
hcall->expect = HV_STATUS_ACCESS_DENIED;
break;
case 9:
- recomm.eax |= HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED;
+ recomm->eax |= HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED;
+ hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE;
hcall->expect = HV_STATUS_SUCCESS;
break;
case 10:
@@ -600,7 +560,8 @@ static void guest_test_hcalls_access(void)
hcall->expect = HV_STATUS_ACCESS_DENIED;
break;
case 11:
- recomm.eax |= HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED;
+ recomm->eax |= HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED;
+ hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX;
hcall->expect = HV_STATUS_SUCCESS;
break;
@@ -609,7 +570,8 @@ static void guest_test_hcalls_access(void)
hcall->expect = HV_STATUS_ACCESS_DENIED;
break;
case 13:
- recomm.eax |= HV_X64_CLUSTER_IPI_RECOMMENDED;
+ recomm->eax |= HV_X64_CLUSTER_IPI_RECOMMENDED;
+ hcall->control = HVCALL_SEND_IPI;
hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT;
break;
case 14:
@@ -623,7 +585,8 @@ static void guest_test_hcalls_access(void)
hcall->expect = HV_STATUS_ACCESS_DENIED;
break;
case 16:
- recomm.ebx = 0xfff;
+ recomm->ebx = 0xfff;
+ hcall->control = HVCALL_NOTIFY_LONG_SPIN_WAIT;
hcall->expect = HV_STATUS_SUCCESS;
break;
case 17:
@@ -632,42 +595,35 @@ static void guest_test_hcalls_access(void)
hcall->ud_expected = true;
break;
case 18:
- feat.edx |= HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE;
+ feat->edx |= HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE;
+ hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT;
hcall->ud_expected = false;
hcall->expect = HV_STATUS_SUCCESS;
break;
-
case 19:
- /* END */
- hcall->control = 0;
- break;
+ kvm_vm_free(vm);
+ return;
}
- hv_set_cpuid(vm, best, &feat, &recomm, &dbg);
+ vcpu_set_cpuid(vcpu);
+
+ memcpy(prev_cpuid, vcpu->cpuid, kvm_cpuid2_size(vcpu->cpuid->nent));
- if (hcall->control)
- pr_debug("Stage %d: testing hcall: 0x%lx\n", stage,
- hcall->control);
- else
- pr_debug("Stage %d: finish\n", stage);
+ pr_debug("Stage %d: testing hcall: 0x%lx\n", stage, hcall->control);
- r = _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(!r, "vcpu_run failed: %d\n", r);
+ vcpu_run(vcpu);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
"unexpected exit reason: %u (%s)",
run->exit_reason, exit_reason_str(run->exit_reason));
- switch (get_ucall(vm, VCPU_ID, &uc)) {
- case UCALL_SYNC:
- TEST_ASSERT(uc.args[1] == 0,
- "Unexpected stage: %ld (0 expected)\n",
- uc.args[1]);
- break;
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
- TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
- __FILE__, uc.args[1]);
+ REPORT_GUEST_ASSERT_2(uc, "arg1 = %lx, arg2 = %lx");
return;
case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unhandled ucall: %ld", uc.cmd);
return;
}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
index 21f5ca9197da..a380ad7bb9b3 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
@@ -21,7 +21,6 @@
#include "svm_util.h"
#include "hyperv.h"
-#define VCPU_ID 1
#define L2_GUEST_STACK_SIZE 256
struct hv_enlightenments {
@@ -42,11 +41,6 @@ struct hv_enlightenments {
*/
#define VMCB_HV_NESTED_ENLIGHTENMENTS (1U << 31)
-static inline void vmmcall(void)
-{
- __asm__ __volatile__("vmmcall");
-}
-
void l2_guest_code(void)
{
GUEST_SYNC(3);
@@ -127,33 +121,31 @@ int main(int argc, char *argv[])
{
vm_vaddr_t nested_gva = 0;
+ struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
struct kvm_run *run;
struct ucall uc;
int stage;
- if (!nested_svm_supported()) {
- print_skip("Nested SVM not supported");
- exit(KSFT_SKIP);
- }
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
+
/* Create VM */
- vm = vm_create_default(VCPU_ID, 0, guest_code);
- vcpu_set_hv_cpuid(vm, VCPU_ID);
- run = vcpu_state(vm, VCPU_ID);
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ vcpu_set_hv_cpuid(vcpu);
+ run = vcpu->run;
vcpu_alloc_svm(vm, &nested_gva);
- vcpu_args_set(vm, VCPU_ID, 1, nested_gva);
+ vcpu_args_set(vcpu, 1, nested_gva);
for (stage = 1;; stage++) {
- _vcpu_run(vm, VCPU_ID);
+ vcpu_run(vcpu);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
"Stage %d: unexpected exit reason: %u (%s),\n",
stage, run->exit_reason,
exit_reason_str(run->exit_reason));
- switch (get_ucall(vm, VCPU_ID, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
- TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
- __FILE__, uc.args[1]);
+ REPORT_GUEST_ASSERT(uc);
/* NOT REACHED */
case UCALL_SYNC:
break;
diff --git a/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c b/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c
index 97731454f3f3..813ce282cf56 100644
--- a/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c
@@ -16,8 +16,6 @@
#include "kvm_util.h"
#include "processor.h"
-#define VCPU_ID 0
-
struct test_case {
uint64_t kvmclock_base;
int64_t realtime_offset;
@@ -73,8 +71,7 @@ static void handle_sync(struct ucall *uc, struct kvm_clock_data *start,
static void handle_abort(struct ucall *uc)
{
- TEST_FAIL("%s at %s:%ld", (const char *)uc->args[0],
- __FILE__, uc->args[1]);
+ REPORT_GUEST_ASSERT(*uc);
}
static void setup_clock(struct kvm_vm *vm, struct test_case *test_case)
@@ -105,29 +102,27 @@ static void setup_clock(struct kvm_vm *vm, struct test_case *test_case)
vm_ioctl(vm, KVM_SET_CLOCK, &data);
}
-static void enter_guest(struct kvm_vm *vm)
+static void enter_guest(struct kvm_vcpu *vcpu)
{
struct kvm_clock_data start, end;
- struct kvm_run *run;
+ struct kvm_run *run = vcpu->run;
+ struct kvm_vm *vm = vcpu->vm;
struct ucall uc;
- int i, r;
-
- run = vcpu_state(vm, VCPU_ID);
+ int i;
for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
setup_clock(vm, &test_cases[i]);
vm_ioctl(vm, KVM_GET_CLOCK, &start);
- r = _vcpu_run(vm, VCPU_ID);
+ vcpu_run(vcpu);
vm_ioctl(vm, KVM_GET_CLOCK, &end);
- TEST_ASSERT(!r, "vcpu_run failed: %d\n", r);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
"unexpected exit reason: %u (%s)",
run->exit_reason, exit_reason_str(run->exit_reason));
- switch (get_ucall(vm, VCPU_ID, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_SYNC:
handle_sync(&uc, &start, &end);
break;
@@ -178,26 +173,23 @@ out:
int main(void)
{
+ struct kvm_vcpu *vcpu;
vm_vaddr_t pvti_gva;
vm_paddr_t pvti_gpa;
struct kvm_vm *vm;
int flags;
flags = kvm_check_cap(KVM_CAP_ADJUST_CLOCK);
- if (!(flags & KVM_CLOCK_REALTIME)) {
- print_skip("KVM_CLOCK_REALTIME not supported; flags: %x",
- flags);
- exit(KSFT_SKIP);
- }
+ TEST_REQUIRE(flags & KVM_CLOCK_REALTIME);
check_clocksource();
- vm = vm_create_default(VCPU_ID, 0, guest_main);
+ vm = vm_create_with_one_vcpu(&vcpu, guest_main);
pvti_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000);
pvti_gpa = addr_gva2gpa(vm, pvti_gva);
- vcpu_args_set(vm, VCPU_ID, 2, pvti_gpa, pvti_gva);
+ vcpu_args_set(vcpu, 2, pvti_gpa, pvti_gva);
- enter_guest(vm);
+ enter_guest(vcpu);
kvm_vm_free(vm);
}
diff --git a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
index 04ed975662c9..619655c1a1f3 100644
--- a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
@@ -12,55 +12,6 @@
#include "kvm_util.h"
#include "processor.h"
-extern unsigned char rdmsr_start;
-extern unsigned char rdmsr_end;
-
-static u64 do_rdmsr(u32 idx)
-{
- u32 lo, hi;
-
- asm volatile("rdmsr_start: rdmsr;"
- "rdmsr_end:"
- : "=a"(lo), "=c"(hi)
- : "c"(idx));
-
- return (((u64) hi) << 32) | lo;
-}
-
-extern unsigned char wrmsr_start;
-extern unsigned char wrmsr_end;
-
-static void do_wrmsr(u32 idx, u64 val)
-{
- u32 lo, hi;
-
- lo = val;
- hi = val >> 32;
-
- asm volatile("wrmsr_start: wrmsr;"
- "wrmsr_end:"
- : : "a"(lo), "c"(idx), "d"(hi));
-}
-
-static int nr_gp;
-
-static void guest_gp_handler(struct ex_regs *regs)
-{
- unsigned char *rip = (unsigned char *)regs->rip;
- bool r, w;
-
- r = rip == &rdmsr_start;
- w = rip == &wrmsr_start;
- GUEST_ASSERT(r || w);
-
- nr_gp++;
-
- if (r)
- regs->rip = (uint64_t)&rdmsr_end;
- else
- regs->rip = (uint64_t)&wrmsr_end;
-}
-
struct msr_data {
uint32_t idx;
const char *name;
@@ -89,14 +40,16 @@ static struct msr_data msrs_to_test[] = {
static void test_msr(struct msr_data *msr)
{
+ uint64_t ignored;
+ uint8_t vector;
+
PR_MSR(msr);
- do_rdmsr(msr->idx);
- GUEST_ASSERT(READ_ONCE(nr_gp) == 1);
- nr_gp = 0;
- do_wrmsr(msr->idx, 0);
- GUEST_ASSERT(READ_ONCE(nr_gp) == 1);
- nr_gp = 0;
+ vector = rdmsr_safe(msr->idx, &ignored);
+ GUEST_ASSERT_1(vector == GP_VECTOR, vector);
+
+ vector = wrmsr_safe(msr->idx, 0);
+ GUEST_ASSERT_1(vector == GP_VECTOR, vector);
}
struct hcall_data {
@@ -142,15 +95,6 @@ static void guest_main(void)
GUEST_DONE();
}
-static void clear_kvm_cpuid_features(struct kvm_cpuid2 *cpuid)
-{
- struct kvm_cpuid_entry2 ent = {0};
-
- ent.function = KVM_CPUID_FEATURES;
- TEST_ASSERT(set_cpuid(cpuid, &ent),
- "failed to clear KVM_CPUID_FEATURES leaf");
-}
-
static void pr_msr(struct ucall *uc)
{
struct msr_data *msr = (struct msr_data *)uc->args[0];
@@ -165,30 +109,18 @@ static void pr_hcall(struct ucall *uc)
pr_info("testing hcall: %s (%lu)\n", hc->name, hc->nr);
}
-static void handle_abort(struct ucall *uc)
+static void enter_guest(struct kvm_vcpu *vcpu)
{
- TEST_FAIL("%s at %s:%ld", (const char *)uc->args[0],
- __FILE__, uc->args[1]);
-}
-
-#define VCPU_ID 0
-
-static void enter_guest(struct kvm_vm *vm)
-{
- struct kvm_run *run;
+ struct kvm_run *run = vcpu->run;
struct ucall uc;
- int r;
-
- run = vcpu_state(vm, VCPU_ID);
while (true) {
- r = _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(!r, "vcpu_run failed: %d\n", r);
+ vcpu_run(vcpu);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
"unexpected exit reason: %u (%s)",
run->exit_reason, exit_reason_str(run->exit_reason));
- switch (get_ucall(vm, VCPU_ID, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_PR_MSR:
pr_msr(&uc);
break;
@@ -196,7 +128,7 @@ static void enter_guest(struct kvm_vm *vm)
pr_hcall(&uc);
break;
case UCALL_ABORT:
- handle_abort(&uc);
+ REPORT_GUEST_ASSERT_1(uc, "vector = %lu");
return;
case UCALL_DONE:
return;
@@ -206,29 +138,20 @@ static void enter_guest(struct kvm_vm *vm)
int main(void)
{
- struct kvm_enable_cap cap = {0};
- struct kvm_cpuid2 *best;
+ struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
- if (!kvm_check_cap(KVM_CAP_ENFORCE_PV_FEATURE_CPUID)) {
- print_skip("KVM_CAP_ENFORCE_PV_FEATURE_CPUID not supported");
- exit(KSFT_SKIP);
- }
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_ENFORCE_PV_FEATURE_CPUID));
- vm = vm_create_default(VCPU_ID, 0, guest_main);
+ vm = vm_create_with_one_vcpu(&vcpu, guest_main);
- cap.cap = KVM_CAP_ENFORCE_PV_FEATURE_CPUID;
- cap.args[0] = 1;
- vcpu_enable_cap(vm, VCPU_ID, &cap);
+ vcpu_enable_cap(vcpu, KVM_CAP_ENFORCE_PV_FEATURE_CPUID, 1);
- best = kvm_get_supported_cpuid();
- clear_kvm_cpuid_features(best);
- vcpu_set_cpuid(vm, VCPU_ID, best);
+ vcpu_clear_cpuid_entry(vcpu, KVM_CPUID_FEATURES);
vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vm, VCPU_ID);
- vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
+ vcpu_init_descriptor_tables(vcpu);
- enter_guest(vm);
+ enter_guest(vcpu);
kvm_vm_free(vm);
}
diff --git a/tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c b/tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c
new file mode 100644
index 000000000000..3cc4b86832fe
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * maximum APIC ID capability tests
+ *
+ * Copyright (C) 2022, Intel, Inc.
+ *
+ * Tests for getting/setting maximum APIC ID capability
+ */
+
+#include "kvm_util.h"
+
+#define MAX_VCPU_ID 2
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vm *vm;
+ int ret;
+
+ vm = vm_create_barebones();
+
+ /* Get KVM_CAP_MAX_VCPU_ID cap supported in KVM */
+ ret = vm_check_cap(vm, KVM_CAP_MAX_VCPU_ID);
+
+ /* Try to set KVM_CAP_MAX_VCPU_ID beyond KVM cap */
+ ret = __vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, ret + 1);
+ TEST_ASSERT(ret < 0,
+ "Setting KVM_CAP_MAX_VCPU_ID beyond KVM cap should fail");
+
+ /* Set KVM_CAP_MAX_VCPU_ID */
+ vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID);
+
+
+ /* Try to set KVM_CAP_MAX_VCPU_ID again */
+ ret = __vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID + 1);
+ TEST_ASSERT(ret < 0,
+ "Setting KVM_CAP_MAX_VCPU_ID multiple times should fail");
+
+ /* Create vCPU with id beyond KVM_CAP_MAX_VCPU_ID cap*/
+ ret = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)MAX_VCPU_ID);
+ TEST_ASSERT(ret < 0, "Creating vCPU with ID > MAX_VCPU_ID should fail");
+
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c b/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c
index 9f55ccd169a1..fb02581953a3 100644
--- a/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c
+++ b/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c
@@ -59,10 +59,10 @@ void test(void)
kvm = open("/dev/kvm", O_RDWR);
TEST_ASSERT(kvm != -1, "failed to open /dev/kvm");
- kvmvm = ioctl(kvm, KVM_CREATE_VM, 0);
- TEST_ASSERT(kvmvm != -1, "KVM_CREATE_VM failed");
+ kvmvm = __kvm_ioctl(kvm, KVM_CREATE_VM, NULL);
+ TEST_ASSERT(kvmvm > 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, kvmvm));
kvmcpu = ioctl(kvmvm, KVM_CREATE_VCPU, 0);
- TEST_ASSERT(kvmcpu != -1, "KVM_CREATE_VCPU failed");
+ TEST_ASSERT(kvmcpu != -1, KVM_IOCTL_ERROR(KVM_CREATE_VCPU, kvmcpu));
run = (struct kvm_run *)mmap(0, 4096, PROT_READ|PROT_WRITE, MAP_SHARED,
kvmcpu, 0);
tc.kvmcpu = kvmcpu;
@@ -93,15 +93,9 @@ int main(void)
{
int warnings_before, warnings_after;
- if (!is_intel_cpu()) {
- print_skip("Must be run on an Intel CPU");
- exit(KSFT_SKIP);
- }
+ TEST_REQUIRE(is_intel_cpu());
- if (vm_is_unrestricted_guest(NULL)) {
- print_skip("Unrestricted guest must be disabled");
- exit(KSFT_SKIP);
- }
+ TEST_REQUIRE(!vm_is_unrestricted_guest(NULL));
warnings_before = get_warnings_count();
diff --git a/tools/testing/selftests/kvm/x86_64/mmu_role_test.c b/tools/testing/selftests/kvm/x86_64/mmu_role_test.c
deleted file mode 100644
index bdecd532f935..000000000000
--- a/tools/testing/selftests/kvm/x86_64/mmu_role_test.c
+++ /dev/null
@@ -1,147 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-#include "kvm_util.h"
-#include "processor.h"
-
-#define VCPU_ID 1
-
-#define MMIO_GPA 0x100000000ull
-
-static void guest_code(void)
-{
- (void)READ_ONCE(*((uint64_t *)MMIO_GPA));
- (void)READ_ONCE(*((uint64_t *)MMIO_GPA));
-
- GUEST_ASSERT(0);
-}
-
-static void guest_pf_handler(struct ex_regs *regs)
-{
- /* PFEC == RSVD | PRESENT (read, kernel). */
- GUEST_ASSERT(regs->error_code == 0x9);
- GUEST_DONE();
-}
-
-static void mmu_role_test(u32 *cpuid_reg, u32 evil_cpuid_val)
-{
- u32 good_cpuid_val = *cpuid_reg;
- struct kvm_run *run;
- struct kvm_vm *vm;
- uint64_t cmd;
- int r;
-
- /* Create VM */
- vm = vm_create_default(VCPU_ID, 0, guest_code);
- run = vcpu_state(vm, VCPU_ID);
-
- /* Map 1gb page without a backing memlot. */
- __virt_pg_map(vm, MMIO_GPA, MMIO_GPA, PG_LEVEL_1G);
-
- r = _vcpu_run(vm, VCPU_ID);
-
- /* Guest access to the 1gb page should trigger MMIO. */
- TEST_ASSERT(r == 0, "vcpu_run failed: %d\n", r);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_MMIO,
- "Unexpected exit reason: %u (%s), expected MMIO exit (1gb page w/o memslot)\n",
- run->exit_reason, exit_reason_str(run->exit_reason));
-
- TEST_ASSERT(run->mmio.len == 8, "Unexpected exit mmio size = %u", run->mmio.len);
-
- TEST_ASSERT(run->mmio.phys_addr == MMIO_GPA,
- "Unexpected exit mmio address = 0x%llx", run->mmio.phys_addr);
-
- /*
- * Effect the CPUID change for the guest and re-enter the guest. Its
- * access should now #PF due to the PAGE_SIZE bit being reserved or
- * the resulting GPA being invalid. Note, kvm_get_supported_cpuid()
- * returns the struct that contains the entry being modified. Eww.
- */
- *cpuid_reg = evil_cpuid_val;
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
-
- /*
- * Add a dummy memslot to coerce KVM into bumping the MMIO generation.
- * KVM does not "officially" support mucking with CPUID after KVM_RUN,
- * and will incorrectly reuse MMIO SPTEs. Don't delete the memslot!
- * KVM x86 zaps all shadow pages on memslot deletion.
- */
- vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
- MMIO_GPA << 1, 10, 1, 0);
-
- /* Set up a #PF handler to eat the RSVD #PF and signal all done! */
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vm, VCPU_ID);
- vm_install_exception_handler(vm, PF_VECTOR, guest_pf_handler);
-
- r = _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(r == 0, "vcpu_run failed: %d\n", r);
-
- cmd = get_ucall(vm, VCPU_ID, NULL);
- TEST_ASSERT(cmd == UCALL_DONE,
- "Unexpected guest exit, exit_reason=%s, ucall.cmd = %lu\n",
- exit_reason_str(run->exit_reason), cmd);
-
- /*
- * Restore the happy CPUID value for the next test. Yes, changes are
- * indeed persistent across VM destruction.
- */
- *cpuid_reg = good_cpuid_val;
-
- kvm_vm_free(vm);
-}
-
-int main(int argc, char *argv[])
-{
- struct kvm_cpuid_entry2 *entry;
- int opt;
-
- /*
- * All tests are opt-in because TDP doesn't play nice with reserved #PF
- * in the GVA->GPA translation. The hardware page walker doesn't let
- * software change GBPAGES or MAXPHYADDR, and KVM doesn't manually walk
- * the GVA on fault for performance reasons.
- */
- bool do_gbpages = false;
- bool do_maxphyaddr = false;
-
- setbuf(stdout, NULL);
-
- while ((opt = getopt(argc, argv, "gm")) != -1) {
- switch (opt) {
- case 'g':
- do_gbpages = true;
- break;
- case 'm':
- do_maxphyaddr = true;
- break;
- case 'h':
- default:
- printf("usage: %s [-g (GBPAGES)] [-m (MAXPHYADDR)]\n", argv[0]);
- break;
- }
- }
-
- if (!do_gbpages && !do_maxphyaddr) {
- print_skip("No sub-tests selected");
- return 0;
- }
-
- entry = kvm_get_supported_cpuid_entry(0x80000001);
- if (!(entry->edx & CPUID_GBPAGES)) {
- print_skip("1gb hugepages not supported");
- return 0;
- }
-
- if (do_gbpages) {
- pr_info("Test MMIO after toggling CPUID.GBPAGES\n\n");
- mmu_role_test(&entry->edx, entry->edx & ~CPUID_GBPAGES);
- }
-
- if (do_maxphyaddr) {
- pr_info("Test MMIO after changing CPUID.MAXPHYADDR\n\n");
- entry = kvm_get_supported_cpuid_entry(0x80000008);
- mmu_role_test(&entry->eax, (entry->eax & ~0xff) | 0x20);
- }
-
- return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c
new file mode 100644
index 000000000000..016070cad36e
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+
+#define CPUID_MWAIT (1u << 3)
+
+enum monitor_mwait_testcases {
+ MWAIT_QUIRK_DISABLED = BIT(0),
+ MISC_ENABLES_QUIRK_DISABLED = BIT(1),
+ MWAIT_DISABLED = BIT(2),
+};
+
+static void guest_monitor_wait(int testcase)
+{
+ /*
+ * If both MWAIT and its quirk are disabled, MONITOR/MWAIT should #UD,
+ * in all other scenarios KVM should emulate them as nops.
+ */
+ bool fault_wanted = (testcase & MWAIT_QUIRK_DISABLED) &&
+ (testcase & MWAIT_DISABLED);
+ u8 vector;
+
+ GUEST_SYNC(testcase);
+
+ /*
+ * Arbitrarily MONITOR this function, SVM performs fault checks before
+ * intercept checks, so the inputs for MONITOR and MWAIT must be valid.
+ */
+ vector = kvm_asm_safe("monitor", "a"(guest_monitor_wait), "c"(0), "d"(0));
+ if (fault_wanted)
+ GUEST_ASSERT_2(vector == UD_VECTOR, testcase, vector);
+ else
+ GUEST_ASSERT_2(!vector, testcase, vector);
+
+ vector = kvm_asm_safe("mwait", "a"(guest_monitor_wait), "c"(0), "d"(0));
+ if (fault_wanted)
+ GUEST_ASSERT_2(vector == UD_VECTOR, testcase, vector);
+ else
+ GUEST_ASSERT_2(!vector, testcase, vector);
+}
+
+static void guest_code(void)
+{
+ guest_monitor_wait(MWAIT_DISABLED);
+
+ guest_monitor_wait(MWAIT_QUIRK_DISABLED | MWAIT_DISABLED);
+
+ guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_DISABLED);
+ guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED);
+
+ guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED | MWAIT_DISABLED);
+ guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED);
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ uint64_t disabled_quirks;
+ struct kvm_vcpu *vcpu;
+ struct kvm_run *run;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ int testcase;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT);
+
+ run = vcpu->run;
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ while (1) {
+ vcpu_run(vcpu);
+
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Unexpected exit reason: %u (%s),\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ testcase = uc.args[1];
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT_2(uc, "testcase = %lx, vector = %ld");
+ goto done;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ goto done;
+ }
+
+ disabled_quirks = 0;
+ if (testcase & MWAIT_QUIRK_DISABLED)
+ disabled_quirks |= KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS;
+ if (testcase & MISC_ENABLES_QUIRK_DISABLED)
+ disabled_quirks |= KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT;
+ vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, disabled_quirks);
+
+ /*
+ * If the MISC_ENABLES quirk (KVM neglects to update CPUID to
+ * enable/disable MWAIT) is disabled, toggle the ENABLE_MWAIT
+ * bit in MISC_ENABLES accordingly. If the quirk is enabled,
+ * the only valid configuration is MWAIT disabled, as CPUID
+ * can't be manually changed after running the vCPU.
+ */
+ if (!(testcase & MISC_ENABLES_QUIRK_DISABLED)) {
+ TEST_ASSERT(testcase & MWAIT_DISABLED,
+ "Can't toggle CPUID features after running vCPU");
+ continue;
+ }
+
+ vcpu_set_msr(vcpu, MSR_IA32_MISC_ENABLE,
+ (testcase & MWAIT_DISABLED) ? 0 : MSR_IA32_MISC_ENABLE_MWAIT);
+ }
+
+done:
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
new file mode 100644
index 000000000000..cc6421716400
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
@@ -0,0 +1,269 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * tools/testing/selftests/kvm/nx_huge_page_test.c
+ *
+ * Usage: to be run via nx_huge_page_test.sh, which does the necessary
+ * environment setup and teardown
+ *
+ * Copyright (C) 2022, Google LLC.
+ */
+
+#define _GNU_SOURCE
+
+#include <fcntl.h>
+#include <stdint.h>
+#include <time.h>
+
+#include <test_util.h>
+#include "kvm_util.h"
+#include "processor.h"
+
+#define HPAGE_SLOT 10
+#define HPAGE_GPA (4UL << 30) /* 4G prevents collision w/ slot 0 */
+#define HPAGE_GVA HPAGE_GPA /* GVA is arbitrary, so use GPA. */
+#define PAGES_PER_2MB_HUGE_PAGE 512
+#define HPAGE_SLOT_NPAGES (3 * PAGES_PER_2MB_HUGE_PAGE)
+
+/*
+ * Passed by nx_huge_pages_test.sh to provide an easy warning if this test is
+ * being run without it.
+ */
+#define MAGIC_TOKEN 887563923
+
+/*
+ * x86 opcode for the return instruction. Used to call into, and then
+ * immediately return from, memory backed with hugepages.
+ */
+#define RETURN_OPCODE 0xC3
+
+/* Call the specified memory address. */
+static void guest_do_CALL(uint64_t target)
+{
+ ((void (*)(void)) target)();
+}
+
+/*
+ * Exit the VM after each memory access so that the userspace component of the
+ * test can make assertions about the pages backing the VM.
+ *
+ * See the below for an explanation of how each access should affect the
+ * backing mappings.
+ */
+void guest_code(void)
+{
+ uint64_t hpage_1 = HPAGE_GVA;
+ uint64_t hpage_2 = hpage_1 + (PAGE_SIZE * 512);
+ uint64_t hpage_3 = hpage_2 + (PAGE_SIZE * 512);
+
+ READ_ONCE(*(uint64_t *)hpage_1);
+ GUEST_SYNC(1);
+
+ READ_ONCE(*(uint64_t *)hpage_2);
+ GUEST_SYNC(2);
+
+ guest_do_CALL(hpage_1);
+ GUEST_SYNC(3);
+
+ guest_do_CALL(hpage_3);
+ GUEST_SYNC(4);
+
+ READ_ONCE(*(uint64_t *)hpage_1);
+ GUEST_SYNC(5);
+
+ READ_ONCE(*(uint64_t *)hpage_3);
+ GUEST_SYNC(6);
+}
+
+static void check_2m_page_count(struct kvm_vm *vm, int expected_pages_2m)
+{
+ int actual_pages_2m;
+
+ actual_pages_2m = vm_get_stat(vm, "pages_2m");
+
+ TEST_ASSERT(actual_pages_2m == expected_pages_2m,
+ "Unexpected 2m page count. Expected %d, got %d",
+ expected_pages_2m, actual_pages_2m);
+}
+
+static void check_split_count(struct kvm_vm *vm, int expected_splits)
+{
+ int actual_splits;
+
+ actual_splits = vm_get_stat(vm, "nx_lpage_splits");
+
+ TEST_ASSERT(actual_splits == expected_splits,
+ "Unexpected NX huge page split count. Expected %d, got %d",
+ expected_splits, actual_splits);
+}
+
+static void wait_for_reclaim(int reclaim_period_ms)
+{
+ long reclaim_wait_ms;
+ struct timespec ts;
+
+ reclaim_wait_ms = reclaim_period_ms * 5;
+ ts.tv_sec = reclaim_wait_ms / 1000;
+ ts.tv_nsec = (reclaim_wait_ms - (ts.tv_sec * 1000)) * 1000000;
+ nanosleep(&ts, NULL);
+}
+
+void run_test(int reclaim_period_ms, bool disable_nx_huge_pages,
+ bool reboot_permissions)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ void *hva;
+ int r;
+
+ vm = vm_create(1);
+
+ if (disable_nx_huge_pages) {
+ /*
+ * Cannot run the test without NX huge pages if the kernel
+ * does not support it.
+ */
+ if (!kvm_check_cap(KVM_CAP_VM_DISABLE_NX_HUGE_PAGES))
+ return;
+
+ r = __vm_disable_nx_huge_pages(vm);
+ if (reboot_permissions) {
+ TEST_ASSERT(!r, "Disabling NX huge pages should succeed if process has reboot permissions");
+ } else {
+ TEST_ASSERT(r == -1 && errno == EPERM,
+ "This process should not have permission to disable NX huge pages");
+ return;
+ }
+ }
+
+ vcpu = vm_vcpu_add(vm, 0, guest_code);
+
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_HUGETLB,
+ HPAGE_GPA, HPAGE_SLOT,
+ HPAGE_SLOT_NPAGES, 0);
+
+ virt_map(vm, HPAGE_GVA, HPAGE_GPA, HPAGE_SLOT_NPAGES);
+
+ hva = addr_gpa2hva(vm, HPAGE_GPA);
+ memset(hva, RETURN_OPCODE, HPAGE_SLOT_NPAGES * PAGE_SIZE);
+
+ check_2m_page_count(vm, 0);
+ check_split_count(vm, 0);
+
+ /*
+ * The guest code will first read from the first hugepage, resulting
+ * in a huge page mapping being created.
+ */
+ vcpu_run(vcpu);
+ check_2m_page_count(vm, 1);
+ check_split_count(vm, 0);
+
+ /*
+ * Then the guest code will read from the second hugepage, resulting
+ * in another huge page mapping being created.
+ */
+ vcpu_run(vcpu);
+ check_2m_page_count(vm, 2);
+ check_split_count(vm, 0);
+
+ /*
+ * Next, the guest will execute from the first huge page, causing it
+ * to be remapped at 4k.
+ *
+ * If NX huge pages are disabled, this should have no effect.
+ */
+ vcpu_run(vcpu);
+ check_2m_page_count(vm, disable_nx_huge_pages ? 2 : 1);
+ check_split_count(vm, disable_nx_huge_pages ? 0 : 1);
+
+ /*
+ * Executing from the third huge page (previously unaccessed) will
+ * cause part to be mapped at 4k.
+ *
+ * If NX huge pages are disabled, it should be mapped at 2M.
+ */
+ vcpu_run(vcpu);
+ check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1);
+ check_split_count(vm, disable_nx_huge_pages ? 0 : 2);
+
+ /* Reading from the first huge page again should have no effect. */
+ vcpu_run(vcpu);
+ check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1);
+ check_split_count(vm, disable_nx_huge_pages ? 0 : 2);
+
+ /* Give recovery thread time to run. */
+ wait_for_reclaim(reclaim_period_ms);
+
+ /*
+ * Now that the reclaimer has run, all the split pages should be gone.
+ *
+ * If NX huge pages are disabled, the relaimer will not run, so
+ * nothing should change from here on.
+ */
+ check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1);
+ check_split_count(vm, 0);
+
+ /*
+ * The 4k mapping on hpage 3 should have been removed, so check that
+ * reading from it causes a huge page mapping to be installed.
+ */
+ vcpu_run(vcpu);
+ check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 2);
+ check_split_count(vm, 0);
+
+ kvm_vm_free(vm);
+}
+
+static void help(char *name)
+{
+ puts("");
+ printf("usage: %s [-h] [-p period_ms] [-t token]\n", name);
+ puts("");
+ printf(" -p: The NX reclaim period in miliseconds.\n");
+ printf(" -t: The magic token to indicate environment setup is done.\n");
+ printf(" -r: The test has reboot permissions and can disable NX huge pages.\n");
+ puts("");
+ exit(0);
+}
+
+int main(int argc, char **argv)
+{
+ int reclaim_period_ms = 0, token = 0, opt;
+ bool reboot_permissions = false;
+
+ while ((opt = getopt(argc, argv, "hp:t:r")) != -1) {
+ switch (opt) {
+ case 'p':
+ reclaim_period_ms = atoi(optarg);
+ break;
+ case 't':
+ token = atoi(optarg);
+ break;
+ case 'r':
+ reboot_permissions = true;
+ break;
+ case 'h':
+ default:
+ help(argv[0]);
+ break;
+ }
+ }
+
+ if (token != MAGIC_TOKEN) {
+ print_skip("This test must be run with the magic token %d.\n"
+ "This is done by nx_huge_pages_test.sh, which\n"
+ "also handles environment setup for the test.",
+ MAGIC_TOKEN);
+ exit(KSFT_SKIP);
+ }
+
+ if (!reclaim_period_ms) {
+ print_skip("The NX reclaim period must be specified and non-zero");
+ exit(KSFT_SKIP);
+ }
+
+ run_test(reclaim_period_ms, false, reboot_permissions);
+ run_test(reclaim_period_ms, true, reboot_permissions);
+
+ return 0;
+}
+
diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh
new file mode 100755
index 000000000000..0560149e66ed
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh
@@ -0,0 +1,59 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only */
+#
+# Wrapper script which performs setup and cleanup for nx_huge_pages_test.
+# Makes use of root privileges to set up huge pages and KVM module parameters.
+#
+# tools/testing/selftests/kvm/nx_huge_page_test.sh
+# Copyright (C) 2022, Google LLC.
+
+set -e
+
+NX_HUGE_PAGES=$(cat /sys/module/kvm/parameters/nx_huge_pages)
+NX_HUGE_PAGES_RECOVERY_RATIO=$(cat /sys/module/kvm/parameters/nx_huge_pages_recovery_ratio)
+NX_HUGE_PAGES_RECOVERY_PERIOD=$(cat /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms)
+HUGE_PAGES=$(cat /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages)
+
+set +e
+
+function sudo_echo () {
+ echo "$1" | sudo tee -a "$2" > /dev/null
+}
+
+NXECUTABLE="$(dirname $0)/nx_huge_pages_test"
+
+sudo_echo test /dev/null || exit 4 # KSFT_SKIP=4
+
+(
+ set -e
+
+ sudo_echo 1 /sys/module/kvm/parameters/nx_huge_pages
+ sudo_echo 1 /sys/module/kvm/parameters/nx_huge_pages_recovery_ratio
+ sudo_echo 100 /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms
+ sudo_echo "$(( $HUGE_PAGES + 3 ))" /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
+
+ # Test with reboot permissions
+ if [ $(whoami) == "root" ] || sudo setcap cap_sys_boot+ep $NXECUTABLE 2> /dev/null; then
+ echo Running test with CAP_SYS_BOOT enabled
+ $NXECUTABLE -t 887563923 -p 100 -r
+ test $(whoami) == "root" || sudo setcap cap_sys_boot-ep $NXECUTABLE
+ else
+ echo setcap failed, skipping nx_huge_pages_test with CAP_SYS_BOOT enabled
+ fi
+
+ # Test without reboot permissions
+ if [ $(whoami) != "root" ] ; then
+ echo Running test with CAP_SYS_BOOT disabled
+ $NXECUTABLE -t 887563923 -p 100
+ else
+ echo Running as root, skipping nx_huge_pages_test with CAP_SYS_BOOT disabled
+ fi
+)
+RET=$?
+
+sudo_echo "$NX_HUGE_PAGES" /sys/module/kvm/parameters/nx_huge_pages
+sudo_echo "$NX_HUGE_PAGES_RECOVERY_RATIO" /sys/module/kvm/parameters/nx_huge_pages_recovery_ratio
+sudo_echo "$NX_HUGE_PAGES_RECOVERY_PERIOD" /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms
+sudo_echo "$HUGE_PAGES" /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
+
+exit $RET
diff --git a/tools/testing/selftests/kvm/x86_64/platform_info_test.c b/tools/testing/selftests/kvm/x86_64/platform_info_test.c
index 1e89688cbbbf..76417c7d687b 100644
--- a/tools/testing/selftests/kvm/x86_64/platform_info_test.c
+++ b/tools/testing/selftests/kvm/x86_64/platform_info_test.c
@@ -21,7 +21,6 @@
#include "kvm_util.h"
#include "processor.h"
-#define VCPU_ID 0
#define MSR_PLATFORM_INFO_MAX_TURBO_RATIO 0xff00
static void guest_code(void)
@@ -35,28 +34,18 @@ static void guest_code(void)
}
}
-static void set_msr_platform_info_enabled(struct kvm_vm *vm, bool enable)
+static void test_msr_platform_info_enabled(struct kvm_vcpu *vcpu)
{
- struct kvm_enable_cap cap = {};
-
- cap.cap = KVM_CAP_MSR_PLATFORM_INFO;
- cap.flags = 0;
- cap.args[0] = (int)enable;
- vm_enable_cap(vm, &cap);
-}
-
-static void test_msr_platform_info_enabled(struct kvm_vm *vm)
-{
- struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ struct kvm_run *run = vcpu->run;
struct ucall uc;
- set_msr_platform_info_enabled(vm, true);
- vcpu_run(vm, VCPU_ID);
+ vm_enable_cap(vcpu->vm, KVM_CAP_MSR_PLATFORM_INFO, true);
+ vcpu_run(vcpu);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
"Exit_reason other than KVM_EXIT_IO: %u (%s),\n",
run->exit_reason,
exit_reason_str(run->exit_reason));
- get_ucall(vm, VCPU_ID, &uc);
+ get_ucall(vcpu, &uc);
TEST_ASSERT(uc.cmd == UCALL_SYNC,
"Received ucall other than UCALL_SYNC: %lu\n", uc.cmd);
TEST_ASSERT((uc.args[1] & MSR_PLATFORM_INFO_MAX_TURBO_RATIO) ==
@@ -65,12 +54,12 @@ static void test_msr_platform_info_enabled(struct kvm_vm *vm)
MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
}
-static void test_msr_platform_info_disabled(struct kvm_vm *vm)
+static void test_msr_platform_info_disabled(struct kvm_vcpu *vcpu)
{
- struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ struct kvm_run *run = vcpu->run;
- set_msr_platform_info_enabled(vm, false);
- vcpu_run(vm, VCPU_ID);
+ vm_enable_cap(vcpu->vm, KVM_CAP_MSR_PLATFORM_INFO, false);
+ vcpu_run(vcpu);
TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN,
"Exit_reason other than KVM_EXIT_SHUTDOWN: %u (%s)\n",
run->exit_reason,
@@ -79,27 +68,23 @@ static void test_msr_platform_info_disabled(struct kvm_vm *vm)
int main(int argc, char *argv[])
{
+ struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
- int rv;
uint64_t msr_platform_info;
/* Tell stdout not to buffer its content */
setbuf(stdout, NULL);
- rv = kvm_check_cap(KVM_CAP_MSR_PLATFORM_INFO);
- if (!rv) {
- print_skip("KVM_CAP_MSR_PLATFORM_INFO not supported");
- exit(KSFT_SKIP);
- }
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_MSR_PLATFORM_INFO));
- vm = vm_create_default(VCPU_ID, 0, guest_code);
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
- msr_platform_info = vcpu_get_msr(vm, VCPU_ID, MSR_PLATFORM_INFO);
- vcpu_set_msr(vm, VCPU_ID, MSR_PLATFORM_INFO,
- msr_platform_info | MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
- test_msr_platform_info_enabled(vm);
- test_msr_platform_info_disabled(vm);
- vcpu_set_msr(vm, VCPU_ID, MSR_PLATFORM_INFO, msr_platform_info);
+ msr_platform_info = vcpu_get_msr(vcpu, MSR_PLATFORM_INFO);
+ vcpu_set_msr(vcpu, MSR_PLATFORM_INFO,
+ msr_platform_info | MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
+ test_msr_platform_info_enabled(vcpu);
+ test_msr_platform_info_disabled(vcpu);
+ vcpu_set_msr(vcpu, MSR_PLATFORM_INFO, msr_platform_info);
kvm_vm_free(vm);
diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
index 93d77574b255..ea4e259a1e2e 100644
--- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
+++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
@@ -49,7 +49,6 @@ union cpuid10_ebx {
/* Oddly, this isn't in perf_event.h. */
#define ARCH_PERFMON_BRANCHES_RETIRED 5
-#define VCPU_ID 0
#define NUM_BRANCHES 42
/*
@@ -173,17 +172,17 @@ static void amd_guest_code(void)
* Run the VM to the next GUEST_SYNC(value), and return the value passed
* to the sync. Any other exit from the guest is fatal.
*/
-static uint64_t run_vm_to_sync(struct kvm_vm *vm)
+static uint64_t run_vcpu_to_sync(struct kvm_vcpu *vcpu)
{
- struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ struct kvm_run *run = vcpu->run;
struct ucall uc;
- vcpu_run(vm, VCPU_ID);
+ vcpu_run(vcpu);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
"Exit_reason other than KVM_EXIT_IO: %u (%s)\n",
run->exit_reason,
exit_reason_str(run->exit_reason));
- get_ucall(vm, VCPU_ID, &uc);
+ get_ucall(vcpu, &uc);
TEST_ASSERT(uc.cmd == UCALL_SYNC,
"Received ucall other than UCALL_SYNC: %lu", uc.cmd);
return uc.args[1];
@@ -197,13 +196,13 @@ static uint64_t run_vm_to_sync(struct kvm_vm *vm)
* a sanity check and then GUEST_SYNC(success). In the case of failure,
* the behavior of the guest on resumption is undefined.
*/
-static bool sanity_check_pmu(struct kvm_vm *vm)
+static bool sanity_check_pmu(struct kvm_vcpu *vcpu)
{
bool success;
- vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
- success = run_vm_to_sync(vm);
- vm_install_exception_handler(vm, GP_VECTOR, NULL);
+ vm_install_exception_handler(vcpu->vm, GP_VECTOR, guest_gp_handler);
+ success = run_vcpu_to_sync(vcpu);
+ vm_install_exception_handler(vcpu->vm, GP_VECTOR, NULL);
return success;
}
@@ -264,9 +263,9 @@ static struct kvm_pmu_event_filter *remove_event(struct kvm_pmu_event_filter *f,
return f;
}
-static void test_without_filter(struct kvm_vm *vm)
+static void test_without_filter(struct kvm_vcpu *vcpu)
{
- uint64_t count = run_vm_to_sync(vm);
+ uint64_t count = run_vcpu_to_sync(vcpu);
if (count != NUM_BRANCHES)
pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
@@ -274,21 +273,21 @@ static void test_without_filter(struct kvm_vm *vm)
TEST_ASSERT(count, "Allowed PMU event is not counting");
}
-static uint64_t test_with_filter(struct kvm_vm *vm,
+static uint64_t test_with_filter(struct kvm_vcpu *vcpu,
struct kvm_pmu_event_filter *f)
{
- vm_ioctl(vm, KVM_SET_PMU_EVENT_FILTER, (void *)f);
- return run_vm_to_sync(vm);
+ vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f);
+ return run_vcpu_to_sync(vcpu);
}
-static void test_amd_deny_list(struct kvm_vm *vm)
+static void test_amd_deny_list(struct kvm_vcpu *vcpu)
{
uint64_t event = EVENT(0x1C2, 0);
struct kvm_pmu_event_filter *f;
uint64_t count;
f = create_pmu_event_filter(&event, 1, KVM_PMU_EVENT_DENY);
- count = test_with_filter(vm, f);
+ count = test_with_filter(vcpu, f);
free(f);
if (count != NUM_BRANCHES)
@@ -297,10 +296,10 @@ static void test_amd_deny_list(struct kvm_vm *vm)
TEST_ASSERT(count, "Allowed PMU event is not counting");
}
-static void test_member_deny_list(struct kvm_vm *vm)
+static void test_member_deny_list(struct kvm_vcpu *vcpu)
{
struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY);
- uint64_t count = test_with_filter(vm, f);
+ uint64_t count = test_with_filter(vcpu, f);
free(f);
if (count)
@@ -309,10 +308,10 @@ static void test_member_deny_list(struct kvm_vm *vm)
TEST_ASSERT(!count, "Disallowed PMU Event is counting");
}
-static void test_member_allow_list(struct kvm_vm *vm)
+static void test_member_allow_list(struct kvm_vcpu *vcpu)
{
struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW);
- uint64_t count = test_with_filter(vm, f);
+ uint64_t count = test_with_filter(vcpu, f);
free(f);
if (count != NUM_BRANCHES)
@@ -321,14 +320,14 @@ static void test_member_allow_list(struct kvm_vm *vm)
TEST_ASSERT(count, "Allowed PMU event is not counting");
}
-static void test_not_member_deny_list(struct kvm_vm *vm)
+static void test_not_member_deny_list(struct kvm_vcpu *vcpu)
{
struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY);
uint64_t count;
remove_event(f, INTEL_BR_RETIRED);
remove_event(f, AMD_ZEN_BR_RETIRED);
- count = test_with_filter(vm, f);
+ count = test_with_filter(vcpu, f);
free(f);
if (count != NUM_BRANCHES)
pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
@@ -336,14 +335,14 @@ static void test_not_member_deny_list(struct kvm_vm *vm)
TEST_ASSERT(count, "Allowed PMU event is not counting");
}
-static void test_not_member_allow_list(struct kvm_vm *vm)
+static void test_not_member_allow_list(struct kvm_vcpu *vcpu)
{
struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW);
uint64_t count;
remove_event(f, INTEL_BR_RETIRED);
remove_event(f, AMD_ZEN_BR_RETIRED);
- count = test_with_filter(vm, f);
+ count = test_with_filter(vcpu, f);
free(f);
if (count)
pr_info("%s: Branch instructions retired = %lu (expected 0)\n",
@@ -358,25 +357,23 @@ static void test_not_member_allow_list(struct kvm_vm *vm)
*/
static void test_pmu_config_disable(void (*guest_code)(void))
{
+ struct kvm_vcpu *vcpu;
int r;
struct kvm_vm *vm;
- struct kvm_enable_cap cap = { 0 };
r = kvm_check_cap(KVM_CAP_PMU_CAPABILITY);
if (!(r & KVM_PMU_CAP_DISABLE))
return;
- vm = vm_create_without_vcpus(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES);
+ vm = vm_create(1);
- cap.cap = KVM_CAP_PMU_CAPABILITY;
- cap.args[0] = KVM_PMU_CAP_DISABLE;
- TEST_ASSERT(!vm_enable_cap(vm, &cap), "Failed to set KVM_PMU_CAP_DISABLE.");
+ vm_enable_cap(vm, KVM_CAP_PMU_CAPABILITY, KVM_PMU_CAP_DISABLE);
- vm_vcpu_add_default(vm, VCPU_ID, guest_code);
+ vcpu = vm_vcpu_add(vm, 0, guest_code);
vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vm, VCPU_ID);
+ vcpu_init_descriptor_tables(vcpu);
- TEST_ASSERT(!sanity_check_pmu(vm),
+ TEST_ASSERT(!sanity_check_pmu(vcpu),
"Guest should not be able to use disabled PMU.");
kvm_vm_free(vm);
@@ -387,7 +384,7 @@ static void test_pmu_config_disable(void (*guest_code)(void))
* counter per logical processor, an EBX bit vector of length greater
* than 5, and EBX[5] clear.
*/
-static bool check_intel_pmu_leaf(struct kvm_cpuid_entry2 *entry)
+static bool check_intel_pmu_leaf(const struct kvm_cpuid_entry2 *entry)
{
union cpuid10_eax eax = { .full = entry->eax };
union cpuid10_ebx ebx = { .full = entry->ebx };
@@ -403,10 +400,10 @@ static bool check_intel_pmu_leaf(struct kvm_cpuid_entry2 *entry)
*/
static bool use_intel_pmu(void)
{
- struct kvm_cpuid_entry2 *entry;
+ const struct kvm_cpuid_entry2 *entry;
- entry = kvm_get_supported_cpuid_index(0xa, 0);
- return is_intel_cpu() && entry && check_intel_pmu_leaf(entry);
+ entry = kvm_get_supported_cpuid_entry(0xa);
+ return is_intel_cpu() && check_intel_pmu_leaf(entry);
}
static bool is_zen1(uint32_t eax)
@@ -435,10 +432,10 @@ static bool is_zen3(uint32_t eax)
*/
static bool use_amd_pmu(void)
{
- struct kvm_cpuid_entry2 *entry;
+ const struct kvm_cpuid_entry2 *entry;
- entry = kvm_get_supported_cpuid_index(1, 0);
- return is_amd_cpu() && entry &&
+ entry = kvm_get_supported_cpuid_entry(1);
+ return is_amd_cpu() &&
(is_zen1(entry->eax) ||
is_zen2(entry->eax) ||
is_zen3(entry->eax));
@@ -446,47 +443,33 @@ static bool use_amd_pmu(void)
int main(int argc, char *argv[])
{
- void (*guest_code)(void) = NULL;
+ void (*guest_code)(void);
+ struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
- int r;
/* Tell stdout not to buffer its content */
setbuf(stdout, NULL);
- r = kvm_check_cap(KVM_CAP_PMU_EVENT_FILTER);
- if (!r) {
- print_skip("KVM_CAP_PMU_EVENT_FILTER not supported");
- exit(KSFT_SKIP);
- }
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_PMU_EVENT_FILTER));
- if (use_intel_pmu())
- guest_code = intel_guest_code;
- else if (use_amd_pmu())
- guest_code = amd_guest_code;
+ TEST_REQUIRE(use_intel_pmu() || use_amd_pmu());
+ guest_code = use_intel_pmu() ? intel_guest_code : amd_guest_code;
- if (!guest_code) {
- print_skip("Don't know how to test this guest PMU");
- exit(KSFT_SKIP);
- }
-
- vm = vm_create_default(VCPU_ID, 0, guest_code);
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vm, VCPU_ID);
+ vcpu_init_descriptor_tables(vcpu);
- if (!sanity_check_pmu(vm)) {
- print_skip("Guest PMU is not functional");
- exit(KSFT_SKIP);
- }
+ TEST_REQUIRE(sanity_check_pmu(vcpu));
if (use_amd_pmu())
- test_amd_deny_list(vm);
+ test_amd_deny_list(vcpu);
- test_without_filter(vm);
- test_member_deny_list(vm);
- test_member_allow_list(vm);
- test_not_member_deny_list(vm);
- test_not_member_allow_list(vm);
+ test_without_filter(vcpu);
+ test_member_deny_list(vcpu);
+ test_member_allow_list(vcpu);
+ test_not_member_deny_list(vcpu);
+ test_not_member_allow_list(vcpu);
kvm_vm_free(vm);
diff --git a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c
index ae76436af0cc..b25d7556b638 100644
--- a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c
+++ b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c
@@ -16,10 +16,6 @@
#include "processor.h"
#include "apic.h"
-#define N_VCPU 2
-#define VCPU_ID0 0
-#define VCPU_ID1 1
-
static void guest_bsp_vcpu(void *arg)
{
GUEST_SYNC(1);
@@ -38,31 +34,30 @@ static void guest_not_bsp_vcpu(void *arg)
GUEST_DONE();
}
-static void test_set_boot_busy(struct kvm_vm *vm)
+static void test_set_bsp_busy(struct kvm_vcpu *vcpu, const char *msg)
{
- int res;
+ int r = __vm_ioctl(vcpu->vm, KVM_SET_BOOT_CPU_ID,
+ (void *)(unsigned long)vcpu->id);
- res = _vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *) VCPU_ID0);
- TEST_ASSERT(res == -1 && errno == EBUSY,
- "KVM_SET_BOOT_CPU_ID set while running vm");
+ TEST_ASSERT(r == -1 && errno == EBUSY, "KVM_SET_BOOT_CPU_ID set %s", msg);
}
-static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid)
+static void run_vcpu(struct kvm_vcpu *vcpu)
{
struct ucall uc;
int stage;
for (stage = 0; stage < 2; stage++) {
- vcpu_run(vm, vcpuid);
+ vcpu_run(vcpu);
- switch (get_ucall(vm, vcpuid, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_SYNC:
TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
uc.args[1] == stage + 1,
"Stage %d: Unexpected register values vmexit, got %lx",
stage + 1, (ulong)uc.args[1]);
- test_set_boot_busy(vm);
+ test_set_bsp_busy(vcpu, "while running vm");
break;
case UCALL_DONE:
TEST_ASSERT(stage == 1,
@@ -70,91 +65,67 @@ static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid)
stage);
break;
case UCALL_ABORT:
- TEST_ASSERT(false, "%s at %s:%ld\n\tvalues: %#lx, %#lx",
- (const char *)uc.args[0], __FILE__,
- uc.args[1], uc.args[2], uc.args[3]);
+ REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx");
default:
TEST_ASSERT(false, "Unexpected exit: %s",
- exit_reason_str(vcpu_state(vm, vcpuid)->exit_reason));
+ exit_reason_str(vcpu->run->exit_reason));
}
}
}
-static struct kvm_vm *create_vm(void)
+static struct kvm_vm *create_vm(uint32_t nr_vcpus, uint32_t bsp_vcpu_id,
+ struct kvm_vcpu *vcpus[])
{
struct kvm_vm *vm;
- uint64_t vcpu_pages = (DEFAULT_STACK_PGS) * 2;
- uint64_t extra_pg_pages = vcpu_pages / PTES_PER_MIN_PAGE * N_VCPU;
- uint64_t pages = DEFAULT_GUEST_PHY_PAGES + vcpu_pages + extra_pg_pages;
+ uint32_t i;
- pages = vm_adjust_num_guest_pages(VM_MODE_DEFAULT, pages);
- vm = vm_create(VM_MODE_DEFAULT, pages, O_RDWR);
+ vm = vm_create(nr_vcpus);
- kvm_vm_elf_load(vm, program_invocation_name);
- vm_create_irqchip(vm);
+ vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)(unsigned long)bsp_vcpu_id);
+ for (i = 0; i < nr_vcpus; i++)
+ vcpus[i] = vm_vcpu_add(vm, i, i == bsp_vcpu_id ? guest_bsp_vcpu :
+ guest_not_bsp_vcpu);
return vm;
}
-static void add_x86_vcpu(struct kvm_vm *vm, uint32_t vcpuid, bool bsp_code)
-{
- if (bsp_code)
- vm_vcpu_add_default(vm, vcpuid, guest_bsp_vcpu);
- else
- vm_vcpu_add_default(vm, vcpuid, guest_not_bsp_vcpu);
-}
-
-static void run_vm_bsp(uint32_t bsp_vcpu)
+static void run_vm_bsp(uint32_t bsp_vcpu_id)
{
+ struct kvm_vcpu *vcpus[2];
struct kvm_vm *vm;
- bool is_bsp_vcpu1 = bsp_vcpu == VCPU_ID1;
- vm = create_vm();
+ vm = create_vm(ARRAY_SIZE(vcpus), bsp_vcpu_id, vcpus);
- if (is_bsp_vcpu1)
- vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *) VCPU_ID1);
-
- add_x86_vcpu(vm, VCPU_ID0, !is_bsp_vcpu1);
- add_x86_vcpu(vm, VCPU_ID1, is_bsp_vcpu1);
-
- run_vcpu(vm, VCPU_ID0);
- run_vcpu(vm, VCPU_ID1);
+ run_vcpu(vcpus[0]);
+ run_vcpu(vcpus[1]);
kvm_vm_free(vm);
}
static void check_set_bsp_busy(void)
{
+ struct kvm_vcpu *vcpus[2];
struct kvm_vm *vm;
- int res;
- vm = create_vm();
+ vm = create_vm(ARRAY_SIZE(vcpus), 0, vcpus);
- add_x86_vcpu(vm, VCPU_ID0, true);
- add_x86_vcpu(vm, VCPU_ID1, false);
+ test_set_bsp_busy(vcpus[1], "after adding vcpu");
- res = _vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *) VCPU_ID1);
- TEST_ASSERT(res == -1 && errno == EBUSY, "KVM_SET_BOOT_CPU_ID set after adding vcpu");
+ run_vcpu(vcpus[0]);
+ run_vcpu(vcpus[1]);
- run_vcpu(vm, VCPU_ID0);
- run_vcpu(vm, VCPU_ID1);
-
- res = _vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *) VCPU_ID1);
- TEST_ASSERT(res == -1 && errno == EBUSY, "KVM_SET_BOOT_CPU_ID set to a terminated vcpu");
+ test_set_bsp_busy(vcpus[1], "to a terminated vcpu");
kvm_vm_free(vm);
}
int main(int argc, char *argv[])
{
- if (!kvm_check_cap(KVM_CAP_SET_BOOT_CPU_ID)) {
- print_skip("set_boot_cpu_id not available");
- return 0;
- }
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_SET_BOOT_CPU_ID));
- run_vm_bsp(VCPU_ID0);
- run_vm_bsp(VCPU_ID1);
- run_vm_bsp(VCPU_ID0);
+ run_vm_bsp(0);
+ run_vm_bsp(1);
+ run_vm_bsp(0);
check_set_bsp_busy();
}
diff --git a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c
index 318be0bf77ab..2bb08bf2125d 100644
--- a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c
@@ -22,9 +22,7 @@
#include "kvm_util.h"
#include "processor.h"
-#define VCPU_ID 5
-
-static void test_cr4_feature_bit(struct kvm_vm *vm, struct kvm_sregs *orig,
+static void test_cr4_feature_bit(struct kvm_vcpu *vcpu, struct kvm_sregs *orig,
uint64_t feature_bit)
{
struct kvm_sregs sregs;
@@ -37,44 +35,40 @@ static void test_cr4_feature_bit(struct kvm_vm *vm, struct kvm_sregs *orig,
memcpy(&sregs, orig, sizeof(sregs));
sregs.cr4 |= feature_bit;
- rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs);
+ rc = _vcpu_sregs_set(vcpu, &sregs);
TEST_ASSERT(rc, "KVM allowed unsupported CR4 bit (0x%lx)", feature_bit);
/* Sanity check that KVM didn't change anything. */
- vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ vcpu_sregs_get(vcpu, &sregs);
TEST_ASSERT(!memcmp(&sregs, orig, sizeof(sregs)), "KVM modified sregs");
}
-static uint64_t calc_cr4_feature_bits(struct kvm_vm *vm)
+static uint64_t calc_supported_cr4_feature_bits(void)
{
- struct kvm_cpuid_entry2 *cpuid_1, *cpuid_7;
uint64_t cr4;
- cpuid_1 = kvm_get_supported_cpuid_entry(1);
- cpuid_7 = kvm_get_supported_cpuid_entry(7);
-
cr4 = X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE |
X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE | X86_CR4_PGE |
X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT;
- if (cpuid_7->ecx & CPUID_UMIP)
+ if (kvm_cpu_has(X86_FEATURE_UMIP))
cr4 |= X86_CR4_UMIP;
- if (cpuid_7->ecx & CPUID_LA57)
+ if (kvm_cpu_has(X86_FEATURE_LA57))
cr4 |= X86_CR4_LA57;
- if (cpuid_1->ecx & CPUID_VMX)
+ if (kvm_cpu_has(X86_FEATURE_VMX))
cr4 |= X86_CR4_VMXE;
- if (cpuid_1->ecx & CPUID_SMX)
+ if (kvm_cpu_has(X86_FEATURE_SMX))
cr4 |= X86_CR4_SMXE;
- if (cpuid_7->ebx & CPUID_FSGSBASE)
+ if (kvm_cpu_has(X86_FEATURE_FSGSBASE))
cr4 |= X86_CR4_FSGSBASE;
- if (cpuid_1->ecx & CPUID_PCID)
+ if (kvm_cpu_has(X86_FEATURE_PCID))
cr4 |= X86_CR4_PCIDE;
- if (cpuid_1->ecx & CPUID_XSAVE)
+ if (kvm_cpu_has(X86_FEATURE_XSAVE))
cr4 |= X86_CR4_OSXSAVE;
- if (cpuid_7->ebx & CPUID_SMEP)
+ if (kvm_cpu_has(X86_FEATURE_SMEP))
cr4 |= X86_CR4_SMEP;
- if (cpuid_7->ebx & CPUID_SMAP)
+ if (kvm_cpu_has(X86_FEATURE_SMAP))
cr4 |= X86_CR4_SMAP;
- if (cpuid_7->ecx & CPUID_PKU)
+ if (kvm_cpu_has(X86_FEATURE_PKU))
cr4 |= X86_CR4_PKE;
return cr4;
@@ -83,6 +77,7 @@ static uint64_t calc_cr4_feature_bits(struct kvm_vm *vm)
int main(int argc, char *argv[])
{
struct kvm_sregs sregs;
+ struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
uint64_t cr4;
int rc;
@@ -95,44 +90,44 @@ int main(int argc, char *argv[])
* use it to verify all supported CR4 bits can be set prior to defining
* the vCPU model, i.e. without doing KVM_SET_CPUID2.
*/
- vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
- vm_vcpu_add(vm, VCPU_ID);
+ vm = vm_create_barebones();
+ vcpu = __vm_vcpu_add(vm, 0);
- vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ vcpu_sregs_get(vcpu, &sregs);
- sregs.cr4 |= calc_cr4_feature_bits(vm);
+ sregs.cr4 |= calc_supported_cr4_feature_bits();
cr4 = sregs.cr4;
- rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs);
+ rc = _vcpu_sregs_set(vcpu, &sregs);
TEST_ASSERT(!rc, "Failed to set supported CR4 bits (0x%lx)", cr4);
- vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ vcpu_sregs_get(vcpu, &sregs);
TEST_ASSERT(sregs.cr4 == cr4, "sregs.CR4 (0x%llx) != CR4 (0x%lx)",
sregs.cr4, cr4);
/* Verify all unsupported features are rejected by KVM. */
- test_cr4_feature_bit(vm, &sregs, X86_CR4_UMIP);
- test_cr4_feature_bit(vm, &sregs, X86_CR4_LA57);
- test_cr4_feature_bit(vm, &sregs, X86_CR4_VMXE);
- test_cr4_feature_bit(vm, &sregs, X86_CR4_SMXE);
- test_cr4_feature_bit(vm, &sregs, X86_CR4_FSGSBASE);
- test_cr4_feature_bit(vm, &sregs, X86_CR4_PCIDE);
- test_cr4_feature_bit(vm, &sregs, X86_CR4_OSXSAVE);
- test_cr4_feature_bit(vm, &sregs, X86_CR4_SMEP);
- test_cr4_feature_bit(vm, &sregs, X86_CR4_SMAP);
- test_cr4_feature_bit(vm, &sregs, X86_CR4_PKE);
+ test_cr4_feature_bit(vcpu, &sregs, X86_CR4_UMIP);
+ test_cr4_feature_bit(vcpu, &sregs, X86_CR4_LA57);
+ test_cr4_feature_bit(vcpu, &sregs, X86_CR4_VMXE);
+ test_cr4_feature_bit(vcpu, &sregs, X86_CR4_SMXE);
+ test_cr4_feature_bit(vcpu, &sregs, X86_CR4_FSGSBASE);
+ test_cr4_feature_bit(vcpu, &sregs, X86_CR4_PCIDE);
+ test_cr4_feature_bit(vcpu, &sregs, X86_CR4_OSXSAVE);
+ test_cr4_feature_bit(vcpu, &sregs, X86_CR4_SMEP);
+ test_cr4_feature_bit(vcpu, &sregs, X86_CR4_SMAP);
+ test_cr4_feature_bit(vcpu, &sregs, X86_CR4_PKE);
kvm_vm_free(vm);
/* Create a "real" VM and verify APIC_BASE can be set. */
- vm = vm_create_default(VCPU_ID, 0, NULL);
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
- vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ vcpu_sregs_get(vcpu, &sregs);
sregs.apic_base = 1 << 10;
- rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs);
+ rc = _vcpu_sregs_set(vcpu, &sregs);
TEST_ASSERT(rc, "Set IA32_APIC_BASE to %llx (invalid)",
sregs.apic_base);
sregs.apic_base = 1 << 11;
- rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs);
+ rc = _vcpu_sregs_set(vcpu, &sregs);
TEST_ASSERT(!rc, "Couldn't set IA32_APIC_BASE to %llx (valid)",
sregs.apic_base);
diff --git a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c b/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c
index d1dc1acf997c..c7ef97561038 100644
--- a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c
+++ b/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c
@@ -12,7 +12,6 @@
#include "processor.h"
#include "svm_util.h"
#include "kselftest.h"
-#include "../lib/kvm_util_internal.h"
#define SEV_POLICY_ES 0b100
@@ -54,10 +53,10 @@ static struct kvm_vm *sev_vm_create(bool es)
struct kvm_sev_launch_start start = { 0 };
int i;
- vm = vm_create(VM_MODE_DEFAULT, 0, O_RDWR);
+ vm = vm_create_barebones();
sev_ioctl(vm->fd, es ? KVM_SEV_ES_INIT : KVM_SEV_INIT, NULL);
for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
- vm_vcpu_add(vm, i);
+ __vm_vcpu_add(vm, i);
if (es)
start.policy |= SEV_POLICY_ES;
sev_ioctl(vm->fd, KVM_SEV_LAUNCH_START, &start);
@@ -71,32 +70,27 @@ static struct kvm_vm *aux_vm_create(bool with_vcpus)
struct kvm_vm *vm;
int i;
- vm = vm_create(VM_MODE_DEFAULT, 0, O_RDWR);
+ vm = vm_create_barebones();
if (!with_vcpus)
return vm;
for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
- vm_vcpu_add(vm, i);
+ __vm_vcpu_add(vm, i);
return vm;
}
-static int __sev_migrate_from(int dst_fd, int src_fd)
+static int __sev_migrate_from(struct kvm_vm *dst, struct kvm_vm *src)
{
- struct kvm_enable_cap cap = {
- .cap = KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM,
- .args = { src_fd }
- };
-
- return ioctl(dst_fd, KVM_ENABLE_CAP, &cap);
+ return __vm_enable_cap(dst, KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM, src->fd);
}
-static void sev_migrate_from(int dst_fd, int src_fd)
+static void sev_migrate_from(struct kvm_vm *dst, struct kvm_vm *src)
{
int ret;
- ret = __sev_migrate_from(dst_fd, src_fd);
+ ret = __sev_migrate_from(dst, src);
TEST_ASSERT(!ret, "Migration failed, ret: %d, errno: %d\n", ret, errno);
}
@@ -111,13 +105,13 @@ static void test_sev_migrate_from(bool es)
dst_vms[i] = aux_vm_create(true);
/* Initial migration from the src to the first dst. */
- sev_migrate_from(dst_vms[0]->fd, src_vm->fd);
+ sev_migrate_from(dst_vms[0], src_vm);
for (i = 1; i < NR_MIGRATE_TEST_VMS; i++)
- sev_migrate_from(dst_vms[i]->fd, dst_vms[i - 1]->fd);
+ sev_migrate_from(dst_vms[i], dst_vms[i - 1]);
/* Migrate the guest back to the original VM. */
- ret = __sev_migrate_from(src_vm->fd, dst_vms[NR_MIGRATE_TEST_VMS - 1]->fd);
+ ret = __sev_migrate_from(src_vm, dst_vms[NR_MIGRATE_TEST_VMS - 1]);
TEST_ASSERT(ret == -1 && errno == EIO,
"VM that was migrated from should be dead. ret %d, errno: %d\n", ret,
errno);
@@ -129,7 +123,7 @@ static void test_sev_migrate_from(bool es)
struct locking_thread_input {
struct kvm_vm *vm;
- int source_fds[NR_LOCK_TESTING_THREADS];
+ struct kvm_vm *source_vms[NR_LOCK_TESTING_THREADS];
};
static void *locking_test_thread(void *arg)
@@ -139,7 +133,7 @@ static void *locking_test_thread(void *arg)
for (i = 0; i < NR_LOCK_TESTING_ITERATIONS; ++i) {
j = i % NR_LOCK_TESTING_THREADS;
- __sev_migrate_from(input->vm->fd, input->source_fds[j]);
+ __sev_migrate_from(input->vm, input->source_vms[j]);
}
return NULL;
@@ -153,11 +147,11 @@ static void test_sev_migrate_locking(void)
for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i) {
input[i].vm = sev_vm_create(/* es= */ false);
- input[0].source_fds[i] = input[i].vm->fd;
+ input[0].source_vms[i] = input[i].vm;
}
for (i = 1; i < NR_LOCK_TESTING_THREADS; ++i)
- memcpy(input[i].source_fds, input[0].source_fds,
- sizeof(input[i].source_fds));
+ memcpy(input[i].source_vms, input[0].source_vms,
+ sizeof(input[i].source_vms));
for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i)
pthread_create(&pt[i], NULL, locking_test_thread, &input[i]);
@@ -174,9 +168,9 @@ static void test_sev_migrate_parameters(void)
*sev_es_vm_no_vmsa;
int ret;
- vm_no_vcpu = vm_create(VM_MODE_DEFAULT, 0, O_RDWR);
+ vm_no_vcpu = vm_create_barebones();
vm_no_sev = aux_vm_create(true);
- ret = __sev_migrate_from(vm_no_vcpu->fd, vm_no_sev->fd);
+ ret = __sev_migrate_from(vm_no_vcpu, vm_no_sev);
TEST_ASSERT(ret == -1 && errno == EINVAL,
"Migrations require SEV enabled. ret %d, errno: %d\n", ret,
errno);
@@ -186,29 +180,29 @@ static void test_sev_migrate_parameters(void)
sev_vm = sev_vm_create(/* es= */ false);
sev_es_vm = sev_vm_create(/* es= */ true);
- sev_es_vm_no_vmsa = vm_create(VM_MODE_DEFAULT, 0, O_RDWR);
+ sev_es_vm_no_vmsa = vm_create_barebones();
sev_ioctl(sev_es_vm_no_vmsa->fd, KVM_SEV_ES_INIT, NULL);
- vm_vcpu_add(sev_es_vm_no_vmsa, 1);
+ __vm_vcpu_add(sev_es_vm_no_vmsa, 1);
- ret = __sev_migrate_from(sev_vm->fd, sev_es_vm->fd);
+ ret = __sev_migrate_from(sev_vm, sev_es_vm);
TEST_ASSERT(
ret == -1 && errno == EINVAL,
"Should not be able migrate to SEV enabled VM. ret: %d, errno: %d\n",
ret, errno);
- ret = __sev_migrate_from(sev_es_vm->fd, sev_vm->fd);
+ ret = __sev_migrate_from(sev_es_vm, sev_vm);
TEST_ASSERT(
ret == -1 && errno == EINVAL,
"Should not be able migrate to SEV-ES enabled VM. ret: %d, errno: %d\n",
ret, errno);
- ret = __sev_migrate_from(vm_no_vcpu->fd, sev_es_vm->fd);
+ ret = __sev_migrate_from(vm_no_vcpu, sev_es_vm);
TEST_ASSERT(
ret == -1 && errno == EINVAL,
"SEV-ES migrations require same number of vCPUS. ret: %d, errno: %d\n",
ret, errno);
- ret = __sev_migrate_from(vm_no_vcpu->fd, sev_es_vm_no_vmsa->fd);
+ ret = __sev_migrate_from(vm_no_vcpu, sev_es_vm_no_vmsa);
TEST_ASSERT(
ret == -1 && errno == EINVAL,
"SEV-ES migrations require UPDATE_VMSA. ret %d, errno: %d\n",
@@ -222,22 +216,17 @@ out:
kvm_vm_free(vm_no_sev);
}
-static int __sev_mirror_create(int dst_fd, int src_fd)
+static int __sev_mirror_create(struct kvm_vm *dst, struct kvm_vm *src)
{
- struct kvm_enable_cap cap = {
- .cap = KVM_CAP_VM_COPY_ENC_CONTEXT_FROM,
- .args = { src_fd }
- };
-
- return ioctl(dst_fd, KVM_ENABLE_CAP, &cap);
+ return __vm_enable_cap(dst, KVM_CAP_VM_COPY_ENC_CONTEXT_FROM, src->fd);
}
-static void sev_mirror_create(int dst_fd, int src_fd)
+static void sev_mirror_create(struct kvm_vm *dst, struct kvm_vm *src)
{
int ret;
- ret = __sev_mirror_create(dst_fd, src_fd);
+ ret = __sev_mirror_create(dst, src);
TEST_ASSERT(!ret, "Copying context failed, ret: %d, errno: %d\n", ret, errno);
}
@@ -285,11 +274,11 @@ static void test_sev_mirror(bool es)
src_vm = sev_vm_create(es);
dst_vm = aux_vm_create(false);
- sev_mirror_create(dst_vm->fd, src_vm->fd);
+ sev_mirror_create(dst_vm, src_vm);
/* Check that we can complete creation of the mirror VM. */
for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
- vm_vcpu_add(dst_vm, i);
+ __vm_vcpu_add(dst_vm, i);
if (es)
sev_ioctl(dst_vm->fd, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
@@ -309,18 +298,18 @@ static void test_sev_mirror_parameters(void)
vm_with_vcpu = aux_vm_create(true);
vm_no_vcpu = aux_vm_create(false);
- ret = __sev_mirror_create(sev_vm->fd, sev_vm->fd);
+ ret = __sev_mirror_create(sev_vm, sev_vm);
TEST_ASSERT(
ret == -1 && errno == EINVAL,
"Should not be able copy context to self. ret: %d, errno: %d\n",
ret, errno);
- ret = __sev_mirror_create(vm_no_vcpu->fd, vm_with_vcpu->fd);
+ ret = __sev_mirror_create(vm_no_vcpu, vm_with_vcpu);
TEST_ASSERT(ret == -1 && errno == EINVAL,
"Copy context requires SEV enabled. ret %d, errno: %d\n", ret,
errno);
- ret = __sev_mirror_create(vm_with_vcpu->fd, sev_vm->fd);
+ ret = __sev_mirror_create(vm_with_vcpu, sev_vm);
TEST_ASSERT(
ret == -1 && errno == EINVAL,
"SEV copy context requires no vCPUS on the destination. ret: %d, errno: %d\n",
@@ -330,13 +319,13 @@ static void test_sev_mirror_parameters(void)
goto out;
sev_es_vm = sev_vm_create(/* es= */ true);
- ret = __sev_mirror_create(sev_vm->fd, sev_es_vm->fd);
+ ret = __sev_mirror_create(sev_vm, sev_es_vm);
TEST_ASSERT(
ret == -1 && errno == EINVAL,
"Should not be able copy context to SEV enabled VM. ret: %d, errno: %d\n",
ret, errno);
- ret = __sev_mirror_create(sev_es_vm->fd, sev_vm->fd);
+ ret = __sev_mirror_create(sev_es_vm, sev_vm);
TEST_ASSERT(
ret == -1 && errno == EINVAL,
"Should not be able copy context to SEV-ES enabled VM. ret: %d, errno: %d\n",
@@ -364,16 +353,16 @@ static void test_sev_move_copy(void)
dst2_mirror_vm = aux_vm_create(false);
dst3_mirror_vm = aux_vm_create(false);
- sev_mirror_create(mirror_vm->fd, sev_vm->fd);
+ sev_mirror_create(mirror_vm, sev_vm);
- sev_migrate_from(dst_mirror_vm->fd, mirror_vm->fd);
- sev_migrate_from(dst_vm->fd, sev_vm->fd);
+ sev_migrate_from(dst_mirror_vm, mirror_vm);
+ sev_migrate_from(dst_vm, sev_vm);
- sev_migrate_from(dst2_vm->fd, dst_vm->fd);
- sev_migrate_from(dst2_mirror_vm->fd, dst_mirror_vm->fd);
+ sev_migrate_from(dst2_vm, dst_vm);
+ sev_migrate_from(dst2_mirror_vm, dst_mirror_vm);
- sev_migrate_from(dst3_mirror_vm->fd, dst2_mirror_vm->fd);
- sev_migrate_from(dst3_vm->fd, dst2_vm->fd);
+ sev_migrate_from(dst3_mirror_vm, dst2_mirror_vm);
+ sev_migrate_from(dst3_vm, dst2_vm);
kvm_vm_free(dst_vm);
kvm_vm_free(sev_vm);
@@ -393,10 +382,10 @@ static void test_sev_move_copy(void)
mirror_vm = aux_vm_create(false);
dst_mirror_vm = aux_vm_create(false);
- sev_mirror_create(mirror_vm->fd, sev_vm->fd);
+ sev_mirror_create(mirror_vm, sev_vm);
- sev_migrate_from(dst_mirror_vm->fd, mirror_vm->fd);
- sev_migrate_from(dst_vm->fd, sev_vm->fd);
+ sev_migrate_from(dst_mirror_vm, mirror_vm);
+ sev_migrate_from(dst_vm, sev_vm);
kvm_vm_free(mirror_vm);
kvm_vm_free(dst_mirror_vm);
@@ -404,41 +393,25 @@ static void test_sev_move_copy(void)
kvm_vm_free(sev_vm);
}
-#define X86_FEATURE_SEV (1 << 1)
-#define X86_FEATURE_SEV_ES (1 << 3)
-
int main(int argc, char *argv[])
{
- struct kvm_cpuid_entry2 *cpuid;
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM));
- if (!kvm_check_cap(KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM) &&
- !kvm_check_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM)) {
- print_skip("Capabilities not available");
- exit(KSFT_SKIP);
- }
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV));
- cpuid = kvm_get_supported_cpuid_entry(0x80000000);
- if (cpuid->eax < 0x8000001f) {
- print_skip("AMD memory encryption not available");
- exit(KSFT_SKIP);
- }
- cpuid = kvm_get_supported_cpuid_entry(0x8000001f);
- if (!(cpuid->eax & X86_FEATURE_SEV)) {
- print_skip("AMD SEV not available");
- exit(KSFT_SKIP);
- }
- have_sev_es = !!(cpuid->eax & X86_FEATURE_SEV_ES);
+ have_sev_es = kvm_cpu_has(X86_FEATURE_SEV_ES);
- if (kvm_check_cap(KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM)) {
+ if (kvm_has_cap(KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM)) {
test_sev_migrate_from(/* es= */ false);
if (have_sev_es)
test_sev_migrate_from(/* es= */ true);
test_sev_migrate_locking();
test_sev_migrate_parameters();
- if (kvm_check_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM))
+ if (kvm_has_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM))
test_sev_move_copy();
}
- if (kvm_check_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM)) {
+ if (kvm_has_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM)) {
test_sev_mirror(/* es= */ false);
if (have_sev_es)
test_sev_mirror(/* es= */ true);
diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c
index b4e0c860769e..1f136a81858e 100644
--- a/tools/testing/selftests/kvm/x86_64/smm_test.c
+++ b/tools/testing/selftests/kvm/x86_64/smm_test.c
@@ -19,8 +19,6 @@
#include "vmx.h"
#include "svm_util.h"
-#define VCPU_ID 1
-
#define SMRAM_SIZE 65536
#define SMRAM_MEMSLOT ((1 << 16) | 1)
#define SMRAM_PAGES (SMRAM_SIZE / PAGE_SIZE)
@@ -85,7 +83,7 @@ static void guest_code(void *arg)
sync_with_host(4);
if (arg) {
- if (cpu_has_svm()) {
+ if (this_cpu_has(X86_FEATURE_SVM)) {
generic_svm_setup(svm, l2_guest_code,
&l2_guest_stack[L2_GUEST_STACK_SIZE]);
} else {
@@ -101,7 +99,7 @@ static void guest_code(void *arg)
sync_with_host(7);
- if (cpu_has_svm()) {
+ if (this_cpu_has(X86_FEATURE_SVM)) {
run_guest(svm->vmcb, svm->vmcb_gpa);
run_guest(svm->vmcb, svm->vmcb_gpa);
} else {
@@ -116,22 +114,23 @@ static void guest_code(void *arg)
sync_with_host(DONE);
}
-void inject_smi(struct kvm_vm *vm)
+void inject_smi(struct kvm_vcpu *vcpu)
{
struct kvm_vcpu_events events;
- vcpu_events_get(vm, VCPU_ID, &events);
+ vcpu_events_get(vcpu, &events);
events.smi.pending = 1;
events.flags |= KVM_VCPUEVENT_VALID_SMM;
- vcpu_events_set(vm, VCPU_ID, &events);
+ vcpu_events_set(vcpu, &events);
}
int main(int argc, char *argv[])
{
vm_vaddr_t nested_gva = 0;
+ struct kvm_vcpu *vcpu;
struct kvm_regs regs;
struct kvm_vm *vm;
struct kvm_run *run;
@@ -139,9 +138,9 @@ int main(int argc, char *argv[])
int stage, stage_reported;
/* Create VM */
- vm = vm_create_default(VCPU_ID, 0, guest_code);
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
- run = vcpu_state(vm, VCPU_ID);
+ run = vcpu->run;
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, SMRAM_GPA,
SMRAM_MEMSLOT, SMRAM_PAGES, 0);
@@ -152,29 +151,29 @@ int main(int argc, char *argv[])
memcpy(addr_gpa2hva(vm, SMRAM_GPA) + 0x8000, smi_handler,
sizeof(smi_handler));
- vcpu_set_msr(vm, VCPU_ID, MSR_IA32_SMBASE, SMRAM_GPA);
+ vcpu_set_msr(vcpu, MSR_IA32_SMBASE, SMRAM_GPA);
- if (kvm_check_cap(KVM_CAP_NESTED_STATE)) {
- if (nested_svm_supported())
+ if (kvm_has_cap(KVM_CAP_NESTED_STATE)) {
+ if (kvm_cpu_has(X86_FEATURE_SVM))
vcpu_alloc_svm(vm, &nested_gva);
- else if (nested_vmx_supported())
+ else if (kvm_cpu_has(X86_FEATURE_VMX))
vcpu_alloc_vmx(vm, &nested_gva);
}
if (!nested_gva)
pr_info("will skip SMM test with VMX enabled\n");
- vcpu_args_set(vm, VCPU_ID, 1, nested_gva);
+ vcpu_args_set(vcpu, 1, nested_gva);
for (stage = 1;; stage++) {
- _vcpu_run(vm, VCPU_ID);
+ vcpu_run(vcpu);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
"Stage %d: unexpected exit reason: %u (%s),\n",
stage, run->exit_reason,
exit_reason_str(run->exit_reason));
memset(&regs, 0, sizeof(regs));
- vcpu_regs_get(vm, VCPU_ID, &regs);
+ vcpu_regs_get(vcpu, &regs);
stage_reported = regs.rax & 0xff;
@@ -191,7 +190,7 @@ int main(int argc, char *argv[])
* return from it. Do not perform save/restore while in SMM yet.
*/
if (stage == 8) {
- inject_smi(vm);
+ inject_smi(vcpu);
continue;
}
@@ -200,15 +199,14 @@ int main(int argc, char *argv[])
* during L2 execution.
*/
if (stage == 10)
- inject_smi(vm);
+ inject_smi(vcpu);
- state = vcpu_save_state(vm, VCPU_ID);
+ state = vcpu_save_state(vcpu);
kvm_vm_release(vm);
- kvm_vm_restart(vm, O_RDWR);
- vm_vcpu_add(vm, VCPU_ID);
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
- vcpu_load_state(vm, VCPU_ID, state);
- run = vcpu_state(vm, VCPU_ID);
+
+ vcpu = vm_recreate_with_one_vcpu(vm);
+ vcpu_load_state(vcpu, state);
+ run = vcpu->run;
kvm_x86_state_cleanup(state);
}
diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c
index 2e0a92da8ff5..ea578971fb9f 100644
--- a/tools/testing/selftests/kvm/x86_64/state_test.c
+++ b/tools/testing/selftests/kvm/x86_64/state_test.c
@@ -20,7 +20,6 @@
#include "vmx.h"
#include "svm_util.h"
-#define VCPU_ID 5
#define L2_GUEST_STACK_SIZE 256
void svm_l2_guest_code(void)
@@ -143,7 +142,7 @@ static void __attribute__((__flatten__)) guest_code(void *arg)
GUEST_SYNC(2);
if (arg) {
- if (cpu_has_svm())
+ if (this_cpu_has(X86_FEATURE_SVM))
svm_l1_guest_code(arg);
else
vmx_l1_guest_code(arg);
@@ -157,6 +156,7 @@ int main(int argc, char *argv[])
vm_vaddr_t nested_gva = 0;
struct kvm_regs regs1, regs2;
+ struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
struct kvm_run *run;
struct kvm_x86_state *state;
@@ -164,34 +164,33 @@ int main(int argc, char *argv[])
int stage;
/* Create VM */
- vm = vm_create_default(VCPU_ID, 0, guest_code);
- run = vcpu_state(vm, VCPU_ID);
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ run = vcpu->run;
- vcpu_regs_get(vm, VCPU_ID, &regs1);
+ vcpu_regs_get(vcpu, &regs1);
- if (kvm_check_cap(KVM_CAP_NESTED_STATE)) {
- if (nested_svm_supported())
+ if (kvm_has_cap(KVM_CAP_NESTED_STATE)) {
+ if (kvm_cpu_has(X86_FEATURE_SVM))
vcpu_alloc_svm(vm, &nested_gva);
- else if (nested_vmx_supported())
+ else if (kvm_cpu_has(X86_FEATURE_VMX))
vcpu_alloc_vmx(vm, &nested_gva);
}
if (!nested_gva)
pr_info("will skip nested state checks\n");
- vcpu_args_set(vm, VCPU_ID, 1, nested_gva);
+ vcpu_args_set(vcpu, 1, nested_gva);
for (stage = 1;; stage++) {
- _vcpu_run(vm, VCPU_ID);
+ vcpu_run(vcpu);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
"Stage %d: unexpected exit reason: %u (%s),\n",
stage, run->exit_reason,
exit_reason_str(run->exit_reason));
- switch (get_ucall(vm, VCPU_ID, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
- TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
- __FILE__, uc.args[1]);
+ REPORT_GUEST_ASSERT(uc);
/* NOT REACHED */
case UCALL_SYNC:
break;
@@ -206,22 +205,20 @@ int main(int argc, char *argv[])
uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
stage, (ulong)uc.args[1]);
- state = vcpu_save_state(vm, VCPU_ID);
+ state = vcpu_save_state(vcpu);
memset(&regs1, 0, sizeof(regs1));
- vcpu_regs_get(vm, VCPU_ID, &regs1);
+ vcpu_regs_get(vcpu, &regs1);
kvm_vm_release(vm);
/* Restore state in a new VM. */
- kvm_vm_restart(vm, O_RDWR);
- vm_vcpu_add(vm, VCPU_ID);
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
- vcpu_load_state(vm, VCPU_ID, state);
- run = vcpu_state(vm, VCPU_ID);
+ vcpu = vm_recreate_with_one_vcpu(vm);
+ vcpu_load_state(vcpu, state);
+ run = vcpu->run;
kvm_x86_state_cleanup(state);
memset(&regs2, 0, sizeof(regs2));
- vcpu_regs_get(vm, VCPU_ID, &regs2);
+ vcpu_regs_get(vcpu, &regs2);
TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
"Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
(ulong) regs2.rdi, (ulong) regs2.rsi);
diff --git a/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c b/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c
index 30a81038df46..4a07ba227b99 100644
--- a/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c
+++ b/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c
@@ -13,10 +13,6 @@
#include "svm_util.h"
#include "apic.h"
-#define VCPU_ID 0
-
-static struct kvm_vm *vm;
-
bool vintr_irq_called;
bool intr_irq_called;
@@ -88,33 +84,36 @@ static void l1_guest_code(struct svm_test_data *svm)
int main(int argc, char *argv[])
{
+ struct kvm_vcpu *vcpu;
+ struct kvm_run *run;
vm_vaddr_t svm_gva;
+ struct kvm_vm *vm;
+ struct ucall uc;
- nested_svm_check_supported();
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
- vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vm, VCPU_ID);
+ vcpu_init_descriptor_tables(vcpu);
vm_install_exception_handler(vm, VINTR_IRQ_NUMBER, vintr_irq_handler);
vm_install_exception_handler(vm, INTR_IRQ_NUMBER, intr_irq_handler);
vcpu_alloc_svm(vm, &svm_gva);
- vcpu_args_set(vm, VCPU_ID, 1, svm_gva);
+ vcpu_args_set(vcpu, 1, svm_gva);
- struct kvm_run *run = vcpu_state(vm, VCPU_ID);
- struct ucall uc;
+ run = vcpu->run;
- vcpu_run(vm, VCPU_ID);
+ vcpu_run(vcpu);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
"Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
run->exit_reason,
exit_reason_str(run->exit_reason));
- switch (get_ucall(vm, VCPU_ID, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
- TEST_FAIL("%s", (const char *)uc.args[0]);
+ REPORT_GUEST_ASSERT(uc);
break;
/* NOT REACHED */
case UCALL_DONE:
diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c b/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c
new file mode 100644
index 000000000000..e637d7736012
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c
@@ -0,0 +1,211 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2022 Oracle and/or its affiliates.
+ *
+ * Based on:
+ * svm_int_ctl_test
+ *
+ * Copyright (C) 2021, Red Hat, Inc.
+ *
+ */
+
+#include <stdatomic.h>
+#include <stdio.h>
+#include <unistd.h>
+#include "apic.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "test_util.h"
+
+#define INT_NR 0x20
+
+static_assert(ATOMIC_INT_LOCK_FREE == 2, "atomic int is not lockless");
+
+static unsigned int bp_fired;
+static void guest_bp_handler(struct ex_regs *regs)
+{
+ bp_fired++;
+}
+
+static unsigned int int_fired;
+static void l2_guest_code_int(void);
+
+static void guest_int_handler(struct ex_regs *regs)
+{
+ int_fired++;
+ GUEST_ASSERT_2(regs->rip == (unsigned long)l2_guest_code_int,
+ regs->rip, (unsigned long)l2_guest_code_int);
+}
+
+static void l2_guest_code_int(void)
+{
+ GUEST_ASSERT_1(int_fired == 1, int_fired);
+ vmmcall();
+ ud2();
+
+ GUEST_ASSERT_1(bp_fired == 1, bp_fired);
+ hlt();
+}
+
+static atomic_int nmi_stage;
+#define nmi_stage_get() atomic_load_explicit(&nmi_stage, memory_order_acquire)
+#define nmi_stage_inc() atomic_fetch_add_explicit(&nmi_stage, 1, memory_order_acq_rel)
+static void guest_nmi_handler(struct ex_regs *regs)
+{
+ nmi_stage_inc();
+
+ if (nmi_stage_get() == 1) {
+ vmmcall();
+ GUEST_ASSERT(false);
+ } else {
+ GUEST_ASSERT_1(nmi_stage_get() == 3, nmi_stage_get());
+ GUEST_DONE();
+ }
+}
+
+static void l2_guest_code_nmi(void)
+{
+ ud2();
+}
+
+static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t idt_alt)
+{
+ #define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ struct vmcb *vmcb = svm->vmcb;
+
+ if (is_nmi)
+ x2apic_enable();
+
+ /* Prepare for L2 execution. */
+ generic_svm_setup(svm,
+ is_nmi ? l2_guest_code_nmi : l2_guest_code_int,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ vmcb->control.intercept_exceptions |= BIT(PF_VECTOR) | BIT(UD_VECTOR);
+ vmcb->control.intercept |= BIT(INTERCEPT_NMI) | BIT(INTERCEPT_HLT);
+
+ if (is_nmi) {
+ vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
+ } else {
+ vmcb->control.event_inj = INT_NR | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_SOFT;
+ /* The return address pushed on stack */
+ vmcb->control.next_rip = vmcb->save.rip;
+ }
+
+ run_guest(vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT_3(vmcb->control.exit_code == SVM_EXIT_VMMCALL,
+ vmcb->control.exit_code,
+ vmcb->control.exit_info_1, vmcb->control.exit_info_2);
+
+ if (is_nmi) {
+ clgi();
+ x2apic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_NMI);
+
+ GUEST_ASSERT_1(nmi_stage_get() == 1, nmi_stage_get());
+ nmi_stage_inc();
+
+ stgi();
+ /* self-NMI happens here */
+ while (true)
+ cpu_relax();
+ }
+
+ /* Skip over VMMCALL */
+ vmcb->save.rip += 3;
+
+ /* Switch to alternate IDT to cause intervening NPF again */
+ vmcb->save.idtr.base = idt_alt;
+ vmcb->control.clean = 0; /* &= ~BIT(VMCB_DT) would be enough */
+
+ vmcb->control.event_inj = BP_VECTOR | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_EXEPT;
+ /* The return address pushed on stack, skip over UD2 */
+ vmcb->control.next_rip = vmcb->save.rip + 2;
+
+ run_guest(vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT_3(vmcb->control.exit_code == SVM_EXIT_HLT,
+ vmcb->control.exit_code,
+ vmcb->control.exit_info_1, vmcb->control.exit_info_2);
+
+ GUEST_DONE();
+}
+
+static void run_test(bool is_nmi)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ vm_vaddr_t svm_gva;
+ vm_vaddr_t idt_alt_vm;
+ struct kvm_guest_debug debug;
+
+ pr_info("Running %s test\n", is_nmi ? "NMI" : "soft int");
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler);
+ vm_install_exception_handler(vm, BP_VECTOR, guest_bp_handler);
+ vm_install_exception_handler(vm, INT_NR, guest_int_handler);
+
+ vcpu_alloc_svm(vm, &svm_gva);
+
+ if (!is_nmi) {
+ void *idt, *idt_alt;
+
+ idt_alt_vm = vm_vaddr_alloc_page(vm);
+ idt_alt = addr_gva2hva(vm, idt_alt_vm);
+ idt = addr_gva2hva(vm, vm->idt);
+ memcpy(idt_alt, idt, getpagesize());
+ } else {
+ idt_alt_vm = 0;
+ }
+ vcpu_args_set(vcpu, 3, svm_gva, (uint64_t)is_nmi, (uint64_t)idt_alt_vm);
+
+ memset(&debug, 0, sizeof(debug));
+ vcpu_guest_debug_set(vcpu, &debug);
+
+ struct kvm_run *run = vcpu->run;
+ struct ucall uc;
+
+ alarm(2);
+ vcpu_run(vcpu);
+ alarm(0);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT_3(uc, "vals = 0x%lx 0x%lx 0x%lx");
+ break;
+ /* NOT REACHED */
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+ }
+done:
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ /* Tell stdout not to buffer its content */
+ setbuf(stdout, NULL);
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
+
+ TEST_ASSERT(kvm_cpu_has(X86_FEATURE_NRIPS),
+ "KVM with nSVM is supposed to unconditionally advertise nRIP Save");
+
+ atomic_init(&nmi_stage, 0);
+
+ run_test(false);
+ run_test(true);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c b/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c
index be2ca157485b..c3ac45df7483 100644
--- a/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c
+++ b/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c
@@ -12,10 +12,6 @@
#include "processor.h"
#include "svm_util.h"
-#define VCPU_ID 5
-
-static struct kvm_vm *vm;
-
static void l2_guest_code(struct svm_test_data *svm)
{
__asm__ __volatile__("vmcall");
@@ -39,28 +35,30 @@ static void l1_guest_code(struct svm_test_data *svm)
int main(int argc, char *argv[])
{
+ struct kvm_vcpu *vcpu;
vm_vaddr_t svm_gva;
+ struct kvm_vm *vm;
- nested_svm_check_supported();
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
- vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
vcpu_alloc_svm(vm, &svm_gva);
- vcpu_args_set(vm, VCPU_ID, 1, svm_gva);
+ vcpu_args_set(vcpu, 1, svm_gva);
for (;;) {
- volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ volatile struct kvm_run *run = vcpu->run;
struct ucall uc;
- vcpu_run(vm, VCPU_ID);
+ vcpu_run(vcpu);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
"Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
run->exit_reason,
exit_reason_str(run->exit_reason));
- switch (get_ucall(vm, VCPU_ID, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
- TEST_FAIL("%s", (const char *)uc.args[0]);
+ REPORT_GUEST_ASSERT(uc);
/* NOT REACHED */
case UCALL_SYNC:
break;
diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
index fc03a150278d..9b6db0b0b13e 100644
--- a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
@@ -20,8 +20,6 @@
#include "kvm_util.h"
#include "processor.h"
-#define VCPU_ID 5
-
#define UCALL_PIO_PORT ((uint16_t)0x1000)
struct ucall uc_none = {
@@ -84,6 +82,7 @@ static void compare_vcpu_events(struct kvm_vcpu_events *left,
int main(int argc, char *argv[])
{
+ struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
struct kvm_run *run;
struct kvm_regs regs;
@@ -95,66 +94,59 @@ int main(int argc, char *argv[])
setbuf(stdout, NULL);
cap = kvm_check_cap(KVM_CAP_SYNC_REGS);
- if ((cap & TEST_SYNC_FIELDS) != TEST_SYNC_FIELDS) {
- print_skip("KVM_CAP_SYNC_REGS not supported");
- exit(KSFT_SKIP);
- }
- if ((cap & INVALID_SYNC_FIELD) != 0) {
- print_skip("The \"invalid\" field is not invalid");
- exit(KSFT_SKIP);
- }
+ TEST_REQUIRE((cap & TEST_SYNC_FIELDS) == TEST_SYNC_FIELDS);
+ TEST_REQUIRE(!(cap & INVALID_SYNC_FIELD));
- /* Create VM */
- vm = vm_create_default(VCPU_ID, 0, guest_code);
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
- run = vcpu_state(vm, VCPU_ID);
+ run = vcpu->run;
/* Request reading invalid register set from VCPU. */
run->kvm_valid_regs = INVALID_SYNC_FIELD;
- rv = _vcpu_run(vm, VCPU_ID);
+ rv = _vcpu_run(vcpu);
TEST_ASSERT(rv < 0 && errno == EINVAL,
"Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
rv);
- vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0;
+ run->kvm_valid_regs = 0;
run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
- rv = _vcpu_run(vm, VCPU_ID);
+ rv = _vcpu_run(vcpu);
TEST_ASSERT(rv < 0 && errno == EINVAL,
"Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
rv);
- vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0;
+ run->kvm_valid_regs = 0;
/* Request setting invalid register set into VCPU. */
run->kvm_dirty_regs = INVALID_SYNC_FIELD;
- rv = _vcpu_run(vm, VCPU_ID);
+ rv = _vcpu_run(vcpu);
TEST_ASSERT(rv < 0 && errno == EINVAL,
"Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
rv);
- vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0;
+ run->kvm_dirty_regs = 0;
run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
- rv = _vcpu_run(vm, VCPU_ID);
+ rv = _vcpu_run(vcpu);
TEST_ASSERT(rv < 0 && errno == EINVAL,
"Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
rv);
- vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0;
+ run->kvm_dirty_regs = 0;
/* Request and verify all valid register sets. */
/* TODO: BUILD TIME CHECK: TEST_ASSERT(KVM_SYNC_X86_NUM_FIELDS != 3); */
run->kvm_valid_regs = TEST_SYNC_FIELDS;
- rv = _vcpu_run(vm, VCPU_ID);
+ rv = _vcpu_run(vcpu);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
"Unexpected exit reason: %u (%s),\n",
run->exit_reason,
exit_reason_str(run->exit_reason));
- vcpu_regs_get(vm, VCPU_ID, &regs);
+ vcpu_regs_get(vcpu, &regs);
compare_regs(&regs, &run->s.regs.regs);
- vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ vcpu_sregs_get(vcpu, &sregs);
compare_sregs(&sregs, &run->s.regs.sregs);
- vcpu_events_get(vm, VCPU_ID, &events);
+ vcpu_events_get(vcpu, &events);
compare_vcpu_events(&events, &run->s.regs.events);
/* Set and verify various register values. */
@@ -164,7 +156,7 @@ int main(int argc, char *argv[])
run->kvm_valid_regs = TEST_SYNC_FIELDS;
run->kvm_dirty_regs = KVM_SYNC_X86_REGS | KVM_SYNC_X86_SREGS;
- rv = _vcpu_run(vm, VCPU_ID);
+ rv = _vcpu_run(vcpu);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
"Unexpected exit reason: %u (%s),\n",
run->exit_reason,
@@ -176,13 +168,13 @@ int main(int argc, char *argv[])
"apic_base sync regs value incorrect 0x%llx.",
run->s.regs.sregs.apic_base);
- vcpu_regs_get(vm, VCPU_ID, &regs);
+ vcpu_regs_get(vcpu, &regs);
compare_regs(&regs, &run->s.regs.regs);
- vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ vcpu_sregs_get(vcpu, &sregs);
compare_sregs(&sregs, &run->s.regs.sregs);
- vcpu_events_get(vm, VCPU_ID, &events);
+ vcpu_events_get(vcpu, &events);
compare_vcpu_events(&events, &run->s.regs.events);
/* Clear kvm_dirty_regs bits, verify new s.regs values are
@@ -191,7 +183,7 @@ int main(int argc, char *argv[])
run->kvm_valid_regs = TEST_SYNC_FIELDS;
run->kvm_dirty_regs = 0;
run->s.regs.regs.rbx = 0xDEADBEEF;
- rv = _vcpu_run(vm, VCPU_ID);
+ rv = _vcpu_run(vcpu);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
"Unexpected exit reason: %u (%s),\n",
run->exit_reason,
@@ -208,8 +200,8 @@ int main(int argc, char *argv[])
run->kvm_dirty_regs = 0;
run->s.regs.regs.rbx = 0xAAAA;
regs.rbx = 0xBAC0;
- vcpu_regs_set(vm, VCPU_ID, &regs);
- rv = _vcpu_run(vm, VCPU_ID);
+ vcpu_regs_set(vcpu, &regs);
+ rv = _vcpu_run(vcpu);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
"Unexpected exit reason: %u (%s),\n",
run->exit_reason,
@@ -217,7 +209,7 @@ int main(int argc, char *argv[])
TEST_ASSERT(run->s.regs.regs.rbx == 0xAAAA,
"rbx sync regs value incorrect 0x%llx.",
run->s.regs.regs.rbx);
- vcpu_regs_get(vm, VCPU_ID, &regs);
+ vcpu_regs_get(vcpu, &regs);
TEST_ASSERT(regs.rbx == 0xBAC0 + 1,
"rbx guest value incorrect 0x%llx.",
regs.rbx);
@@ -229,7 +221,7 @@ int main(int argc, char *argv[])
run->kvm_valid_regs = 0;
run->kvm_dirty_regs = TEST_SYNC_FIELDS;
run->s.regs.regs.rbx = 0xBBBB;
- rv = _vcpu_run(vm, VCPU_ID);
+ rv = _vcpu_run(vcpu);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
"Unexpected exit reason: %u (%s),\n",
run->exit_reason,
@@ -237,7 +229,7 @@ int main(int argc, char *argv[])
TEST_ASSERT(run->s.regs.regs.rbx == 0xBBBB,
"rbx sync regs value incorrect 0x%llx.",
run->s.regs.regs.rbx);
- vcpu_regs_get(vm, VCPU_ID, &regs);
+ vcpu_regs_get(vcpu, &regs);
TEST_ASSERT(regs.rbx == 0xBBBB + 1,
"rbx guest value incorrect 0x%llx.",
regs.rbx);
diff --git a/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c b/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c
new file mode 100644
index 000000000000..70b44f0b52fe
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "kselftest.h"
+
+#define ARBITRARY_IO_PORT 0x2000
+
+/* The virtual machine object. */
+static struct kvm_vm *vm;
+
+static void l2_guest_code(void)
+{
+ asm volatile("inb %%dx, %%al"
+ : : [port] "d" (ARBITRARY_IO_PORT) : "rax");
+}
+
+void l1_guest_code(struct vmx_pages *vmx)
+{
+#define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ GUEST_ASSERT(vmx->vmcs_gpa);
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx));
+ GUEST_ASSERT(load_vmcs(vmx));
+
+ prepare_vmcs(vmx, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ GUEST_ASSERT(!vmlaunch());
+ /* L2 should triple fault after a triple fault event injected. */
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_TRIPLE_FAULT);
+ GUEST_DONE();
+}
+
+int main(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_run *run;
+ struct kvm_vcpu_events events;
+ vm_vaddr_t vmx_pages_gva;
+ struct ucall uc;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_TRIPLE_FAULT_EVENT));
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+ vm_enable_cap(vm, KVM_CAP_X86_TRIPLE_FAULT_EVENT, 1);
+
+ run = vcpu->run;
+ vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ vcpu_args_set(vcpu, 1, vmx_pages_gva);
+ vcpu_run(vcpu);
+
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Expected KVM_EXIT_IO, got: %u (%s)\n",
+ run->exit_reason, exit_reason_str(run->exit_reason));
+ TEST_ASSERT(run->io.port == ARBITRARY_IO_PORT,
+ "Expected IN from port %d from L2, got port %d",
+ ARBITRARY_IO_PORT, run->io.port);
+ vcpu_events_get(vcpu, &events);
+ events.flags |= KVM_VCPUEVENT_VALID_TRIPLE_FAULT;
+ events.triple_fault.pending = true;
+ vcpu_events_set(vcpu, &events);
+ run->immediate_exit = true;
+ vcpu_run_complete_io(vcpu);
+
+ vcpu_events_get(vcpu, &events);
+ TEST_ASSERT(events.flags & KVM_VCPUEVENT_VALID_TRIPLE_FAULT,
+ "Triple fault event invalid");
+ TEST_ASSERT(events.triple_fault.pending,
+ "No triple fault pending");
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_DONE:
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ default:
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+ }
+
+}
diff --git a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
index a426078b16a3..22d366c697f7 100644
--- a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
@@ -9,14 +9,12 @@
#include "kvm_util.h"
#include "processor.h"
-#define VCPU_ID 0
-
#define UNITY (1ull << 30)
#define HOST_ADJUST (UNITY * 64)
#define GUEST_STEP (UNITY * 4)
#define ROUND(x) ((x + UNITY / 2) & -UNITY)
#define rounded_rdmsr(x) ROUND(rdmsr(x))
-#define rounded_host_rdmsr(x) ROUND(vcpu_get_msr(vm, 0, x))
+#define rounded_host_rdmsr(x) ROUND(vcpu_get_msr(vcpu, x))
static void guest_code(void)
{
@@ -66,15 +64,13 @@ static void guest_code(void)
GUEST_DONE();
}
-static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid, int stage)
+static void run_vcpu(struct kvm_vcpu *vcpu, int stage)
{
struct ucall uc;
- vcpu_args_set(vm, vcpuid, 1, vcpuid);
-
- vcpu_ioctl(vm, vcpuid, KVM_RUN, NULL);
+ vcpu_run(vcpu);
- switch (get_ucall(vm, vcpuid, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_SYNC:
TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
uc.args[1] == stage + 1, "Stage %d: Unexpected register values vmexit, got %lx",
@@ -83,34 +79,33 @@ static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid, int stage)
case UCALL_DONE:
return;
case UCALL_ABORT:
- TEST_ASSERT(false, "%s at %s:%ld\n" \
- "\tvalues: %#lx, %#lx", (const char *)uc.args[0],
- __FILE__, uc.args[1], uc.args[2], uc.args[3]);
+ REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx");
default:
TEST_ASSERT(false, "Unexpected exit: %s",
- exit_reason_str(vcpu_state(vm, vcpuid)->exit_reason));
+ exit_reason_str(vcpu->run->exit_reason));
}
}
int main(void)
{
+ struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
uint64_t val;
- vm = vm_create_default(VCPU_ID, 0, guest_code);
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
val = 0;
ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
/* Guest: writes to MSR_IA32_TSC affect both MSRs. */
- run_vcpu(vm, VCPU_ID, 1);
+ run_vcpu(vcpu, 1);
val = 1ull * GUEST_STEP;
ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
/* Guest: writes to MSR_IA32_TSC_ADJUST affect both MSRs. */
- run_vcpu(vm, VCPU_ID, 2);
+ run_vcpu(vcpu, 2);
val = 2ull * GUEST_STEP;
ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
@@ -119,18 +114,18 @@ int main(void)
* Host: writes to MSR_IA32_TSC set the host-side offset
* and therefore do not change MSR_IA32_TSC_ADJUST.
*/
- vcpu_set_msr(vm, 0, MSR_IA32_TSC, HOST_ADJUST + val);
+ vcpu_set_msr(vcpu, MSR_IA32_TSC, HOST_ADJUST + val);
ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
- run_vcpu(vm, VCPU_ID, 3);
+ run_vcpu(vcpu, 3);
/* Host: writes to MSR_IA32_TSC_ADJUST do not modify the TSC. */
- vcpu_set_msr(vm, 0, MSR_IA32_TSC_ADJUST, UNITY * 123456);
+ vcpu_set_msr(vcpu, MSR_IA32_TSC_ADJUST, UNITY * 123456);
ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
- ASSERT_EQ(vcpu_get_msr(vm, 0, MSR_IA32_TSC_ADJUST), UNITY * 123456);
+ ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_TSC_ADJUST), UNITY * 123456);
/* Restore previous value. */
- vcpu_set_msr(vm, 0, MSR_IA32_TSC_ADJUST, val);
+ vcpu_set_msr(vcpu, MSR_IA32_TSC_ADJUST, val);
ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
@@ -138,7 +133,7 @@ int main(void)
* Guest: writes to MSR_IA32_TSC_ADJUST do not destroy the
* host-side offset and affect both MSRs.
*/
- run_vcpu(vm, VCPU_ID, 4);
+ run_vcpu(vcpu, 4);
val = 3ull * GUEST_STEP;
ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
@@ -147,7 +142,7 @@ int main(void)
* Guest: writes to MSR_IA32_TSC affect both MSRs, so the host-side
* offset is now visible in MSR_IA32_TSC_ADJUST.
*/
- run_vcpu(vm, VCPU_ID, 5);
+ run_vcpu(vcpu, 5);
val = 4ull * GUEST_STEP;
ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val - HOST_ADJUST);
diff --git a/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c b/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c
index f0083d8cfe98..47139aab7408 100644
--- a/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c
+++ b/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c
@@ -46,38 +46,40 @@ static void guest_code(void)
static void *run_vcpu(void *_cpu_nr)
{
- unsigned long cpu = (unsigned long)_cpu_nr;
+ unsigned long vcpu_id = (unsigned long)_cpu_nr;
unsigned long failures = 0;
static bool first_cpu_done;
+ struct kvm_vcpu *vcpu;
- /* The kernel is fine, but vm_vcpu_add_default() needs locking */
+ /* The kernel is fine, but vm_vcpu_add() needs locking */
pthread_spin_lock(&create_lock);
- vm_vcpu_add_default(vm, cpu, guest_code);
+ vcpu = vm_vcpu_add(vm, vcpu_id, guest_code);
if (!first_cpu_done) {
first_cpu_done = true;
- vcpu_set_msr(vm, cpu, MSR_IA32_TSC, TEST_TSC_OFFSET);
+ vcpu_set_msr(vcpu, MSR_IA32_TSC, TEST_TSC_OFFSET);
}
pthread_spin_unlock(&create_lock);
for (;;) {
- volatile struct kvm_run *run = vcpu_state(vm, cpu);
+ volatile struct kvm_run *run = vcpu->run;
struct ucall uc;
- vcpu_run(vm, cpu);
+ vcpu_run(vcpu);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
"Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
run->exit_reason,
exit_reason_str(run->exit_reason));
- switch (get_ucall(vm, cpu, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_DONE:
goto out;
case UCALL_SYNC:
- printf("Guest %ld sync %lx %lx %ld\n", cpu, uc.args[2], uc.args[3], uc.args[2] - uc.args[3]);
+ printf("Guest %d sync %lx %lx %ld\n", vcpu->id,
+ uc.args[2], uc.args[3], uc.args[2] - uc.args[3]);
failures++;
break;
@@ -91,12 +93,9 @@ static void *run_vcpu(void *_cpu_nr)
int main(int argc, char *argv[])
{
- if (!kvm_check_cap(KVM_CAP_VM_TSC_CONTROL)) {
- print_skip("KVM_CAP_VM_TSC_CONTROL not available");
- exit(KSFT_SKIP);
- }
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_TSC_CONTROL));
- vm = vm_create_default_with_vcpus(0, DEFAULT_STACK_PGS * NR_TEST_VCPUS, 0, guest_code, NULL);
+ vm = vm_create(NR_TEST_VCPUS);
vm_ioctl(vm, KVM_SET_TSC_KHZ, (void *) TEST_TSC_KHZ);
pthread_spin_init(&create_lock, PTHREAD_PROCESS_PRIVATE);
diff --git a/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c b/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c
new file mode 100644
index 000000000000..a897c7fd8abe
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c
@@ -0,0 +1,316 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ucna_injection_test
+ *
+ * Copyright (C) 2022, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Test that user space can inject UnCorrectable No Action required (UCNA)
+ * memory errors to the guest.
+ *
+ * The test starts one vCPU with the MCG_CMCI_P enabled. It verifies that
+ * proper UCNA errors can be injected to a vCPU with MCG_CMCI_P and
+ * corresponding per-bank control register (MCI_CTL2) bit enabled.
+ * The test also checks that the UCNA errors get recorded in the
+ * Machine Check bank registers no matter the error signal interrupts get
+ * delivered into the guest or not.
+ *
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <pthread.h>
+#include <inttypes.h>
+#include <string.h>
+#include <time.h>
+
+#include "kvm_util_base.h"
+#include "kvm_util.h"
+#include "mce.h"
+#include "processor.h"
+#include "test_util.h"
+#include "apic.h"
+
+#define SYNC_FIRST_UCNA 9
+#define SYNC_SECOND_UCNA 10
+#define SYNC_GP 11
+#define FIRST_UCNA_ADDR 0xdeadbeef
+#define SECOND_UCNA_ADDR 0xcafeb0ba
+
+/*
+ * Vector for the CMCI interrupt.
+ * Value is arbitrary. Any value in 0x20-0xFF should work:
+ * https://wiki.osdev.org/Interrupt_Vector_Table
+ */
+#define CMCI_VECTOR 0xa9
+
+#define UCNA_BANK 0x7 // IMC0 bank
+
+#define MCI_CTL2_RESERVED_BIT BIT_ULL(29)
+
+static uint64_t supported_mcg_caps;
+
+/*
+ * Record states about the injected UCNA.
+ * The variables started with the 'i_' prefixes are recorded in interrupt
+ * handler. Variables without the 'i_' prefixes are recorded in guest main
+ * execution thread.
+ */
+static volatile uint64_t i_ucna_rcvd;
+static volatile uint64_t i_ucna_addr;
+static volatile uint64_t ucna_addr;
+static volatile uint64_t ucna_addr2;
+
+struct thread_params {
+ struct kvm_vcpu *vcpu;
+ uint64_t *p_i_ucna_rcvd;
+ uint64_t *p_i_ucna_addr;
+ uint64_t *p_ucna_addr;
+ uint64_t *p_ucna_addr2;
+};
+
+static void verify_apic_base_addr(void)
+{
+ uint64_t msr = rdmsr(MSR_IA32_APICBASE);
+ uint64_t base = GET_APIC_BASE(msr);
+
+ GUEST_ASSERT(base == APIC_DEFAULT_GPA);
+}
+
+static void ucna_injection_guest_code(void)
+{
+ uint64_t ctl2;
+ verify_apic_base_addr();
+ xapic_enable();
+
+ /* Sets up the interrupt vector and enables per-bank CMCI sigaling. */
+ xapic_write_reg(APIC_LVTCMCI, CMCI_VECTOR | APIC_DM_FIXED);
+ ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
+ wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_CMCI_EN);
+
+ /* Enables interrupt in guest. */
+ asm volatile("sti");
+
+ /* Let user space inject the first UCNA */
+ GUEST_SYNC(SYNC_FIRST_UCNA);
+
+ ucna_addr = rdmsr(MSR_IA32_MCx_ADDR(UCNA_BANK));
+
+ /* Disables the per-bank CMCI signaling. */
+ ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
+ wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 & ~MCI_CTL2_CMCI_EN);
+
+ /* Let the user space inject the second UCNA */
+ GUEST_SYNC(SYNC_SECOND_UCNA);
+
+ ucna_addr2 = rdmsr(MSR_IA32_MCx_ADDR(UCNA_BANK));
+ GUEST_DONE();
+}
+
+static void cmci_disabled_guest_code(void)
+{
+ uint64_t ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
+ wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_CMCI_EN);
+
+ GUEST_DONE();
+}
+
+static void cmci_enabled_guest_code(void)
+{
+ uint64_t ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
+ wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_RESERVED_BIT);
+
+ GUEST_DONE();
+}
+
+static void guest_cmci_handler(struct ex_regs *regs)
+{
+ i_ucna_rcvd++;
+ i_ucna_addr = rdmsr(MSR_IA32_MCx_ADDR(UCNA_BANK));
+ xapic_write_reg(APIC_EOI, 0);
+}
+
+static void guest_gp_handler(struct ex_regs *regs)
+{
+ GUEST_SYNC(SYNC_GP);
+}
+
+static void run_vcpu_expect_gp(struct kvm_vcpu *vcpu)
+{
+ unsigned int exit_reason;
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+
+ exit_reason = vcpu->run->exit_reason;
+ TEST_ASSERT(exit_reason == KVM_EXIT_IO,
+ "exited with unexpected exit reason %u-%s, expected KVM_EXIT_IO",
+ exit_reason, exit_reason_str(exit_reason));
+ TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_SYNC,
+ "Expect UCALL_SYNC\n");
+ TEST_ASSERT(uc.args[1] == SYNC_GP, "#GP is expected.");
+ printf("vCPU received GP in guest.\n");
+}
+
+static void inject_ucna(struct kvm_vcpu *vcpu, uint64_t addr) {
+ /*
+ * A UCNA error is indicated with VAL=1, UC=1, PCC=0, S=0 and AR=0 in
+ * the IA32_MCi_STATUS register.
+ * MSCOD=1 (BIT[16] - MscodDataRdErr).
+ * MCACOD=0x0090 (Memory controller error format, channel 0)
+ */
+ uint64_t status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN |
+ MCI_STATUS_MISCV | MCI_STATUS_ADDRV | 0x10090;
+ struct kvm_x86_mce mce = {};
+ mce.status = status;
+ mce.mcg_status = 0;
+ /*
+ * MCM_ADDR_PHYS indicates the reported address is a physical address.
+ * Lowest 6 bits is the recoverable address LSB, i.e., the injected MCE
+ * is at 4KB granularity.
+ */
+ mce.misc = (MCM_ADDR_PHYS << 6) | 0xc;
+ mce.addr = addr;
+ mce.bank = UCNA_BANK;
+
+ vcpu_ioctl(vcpu, KVM_X86_SET_MCE, &mce);
+}
+
+static void *run_ucna_injection(void *arg)
+{
+ struct thread_params *params = (struct thread_params *)arg;
+ struct ucall uc;
+ int old;
+ int r;
+ unsigned int exit_reason;
+
+ r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
+ TEST_ASSERT(r == 0,
+ "pthread_setcanceltype failed with errno=%d",
+ r);
+
+ vcpu_run(params->vcpu);
+
+ exit_reason = params->vcpu->run->exit_reason;
+ TEST_ASSERT(exit_reason == KVM_EXIT_IO,
+ "unexpected exit reason %u-%s, expected KVM_EXIT_IO",
+ exit_reason, exit_reason_str(exit_reason));
+ TEST_ASSERT(get_ucall(params->vcpu, &uc) == UCALL_SYNC,
+ "Expect UCALL_SYNC\n");
+ TEST_ASSERT(uc.args[1] == SYNC_FIRST_UCNA, "Injecting first UCNA.");
+
+ printf("Injecting first UCNA at %#x.\n", FIRST_UCNA_ADDR);
+
+ inject_ucna(params->vcpu, FIRST_UCNA_ADDR);
+ vcpu_run(params->vcpu);
+
+ exit_reason = params->vcpu->run->exit_reason;
+ TEST_ASSERT(exit_reason == KVM_EXIT_IO,
+ "unexpected exit reason %u-%s, expected KVM_EXIT_IO",
+ exit_reason, exit_reason_str(exit_reason));
+ TEST_ASSERT(get_ucall(params->vcpu, &uc) == UCALL_SYNC,
+ "Expect UCALL_SYNC\n");
+ TEST_ASSERT(uc.args[1] == SYNC_SECOND_UCNA, "Injecting second UCNA.");
+
+ printf("Injecting second UCNA at %#x.\n", SECOND_UCNA_ADDR);
+
+ inject_ucna(params->vcpu, SECOND_UCNA_ADDR);
+ vcpu_run(params->vcpu);
+
+ exit_reason = params->vcpu->run->exit_reason;
+ TEST_ASSERT(exit_reason == KVM_EXIT_IO,
+ "unexpected exit reason %u-%s, expected KVM_EXIT_IO",
+ exit_reason, exit_reason_str(exit_reason));
+ if (get_ucall(params->vcpu, &uc) == UCALL_ABORT) {
+ TEST_ASSERT(false, "vCPU assertion failure: %s.\n",
+ (const char *)uc.args[0]);
+ }
+
+ return NULL;
+}
+
+static void test_ucna_injection(struct kvm_vcpu *vcpu, struct thread_params *params)
+{
+ struct kvm_vm *vm = vcpu->vm;
+ params->vcpu = vcpu;
+ params->p_i_ucna_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&i_ucna_rcvd);
+ params->p_i_ucna_addr = (uint64_t *)addr_gva2hva(vm, (uint64_t)&i_ucna_addr);
+ params->p_ucna_addr = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ucna_addr);
+ params->p_ucna_addr2 = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ucna_addr2);
+
+ run_ucna_injection(params);
+
+ TEST_ASSERT(*params->p_i_ucna_rcvd == 1, "Only first UCNA get signaled.");
+ TEST_ASSERT(*params->p_i_ucna_addr == FIRST_UCNA_ADDR,
+ "Only first UCNA reported addr get recorded via interrupt.");
+ TEST_ASSERT(*params->p_ucna_addr == FIRST_UCNA_ADDR,
+ "First injected UCNAs should get exposed via registers.");
+ TEST_ASSERT(*params->p_ucna_addr2 == SECOND_UCNA_ADDR,
+ "Second injected UCNAs should get exposed via registers.");
+
+ printf("Test successful.\n"
+ "UCNA CMCI interrupts received: %ld\n"
+ "Last UCNA address received via CMCI: %lx\n"
+ "First UCNA address in vCPU thread: %lx\n"
+ "Second UCNA address in vCPU thread: %lx\n",
+ *params->p_i_ucna_rcvd, *params->p_i_ucna_addr,
+ *params->p_ucna_addr, *params->p_ucna_addr2);
+}
+
+static void setup_mce_cap(struct kvm_vcpu *vcpu, bool enable_cmci_p)
+{
+ uint64_t mcg_caps = MCG_CTL_P | MCG_SER_P | MCG_LMCE_P | KVM_MAX_MCE_BANKS;
+ if (enable_cmci_p)
+ mcg_caps |= MCG_CMCI_P;
+
+ mcg_caps &= supported_mcg_caps | MCG_CAP_BANKS_MASK;
+ vcpu_ioctl(vcpu, KVM_X86_SETUP_MCE, &mcg_caps);
+}
+
+static struct kvm_vcpu *create_vcpu_with_mce_cap(struct kvm_vm *vm, uint32_t vcpuid,
+ bool enable_cmci_p, void *guest_code)
+{
+ struct kvm_vcpu *vcpu = vm_vcpu_add(vm, vcpuid, guest_code);
+ setup_mce_cap(vcpu, enable_cmci_p);
+ return vcpu;
+}
+
+int main(int argc, char *argv[])
+{
+ struct thread_params params;
+ struct kvm_vm *vm;
+ struct kvm_vcpu *ucna_vcpu;
+ struct kvm_vcpu *cmcidis_vcpu;
+ struct kvm_vcpu *cmci_vcpu;
+
+ kvm_check_cap(KVM_CAP_MCE);
+
+ vm = __vm_create(VM_MODE_DEFAULT, 3, 0);
+
+ kvm_ioctl(vm->kvm_fd, KVM_X86_GET_MCE_CAP_SUPPORTED,
+ &supported_mcg_caps);
+
+ if (!(supported_mcg_caps & MCG_CMCI_P)) {
+ print_skip("MCG_CMCI_P is not supported");
+ exit(KSFT_SKIP);
+ }
+
+ ucna_vcpu = create_vcpu_with_mce_cap(vm, 0, true, ucna_injection_guest_code);
+ cmcidis_vcpu = create_vcpu_with_mce_cap(vm, 1, false, cmci_disabled_guest_code);
+ cmci_vcpu = create_vcpu_with_mce_cap(vm, 2, true, cmci_enabled_guest_code);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(ucna_vcpu);
+ vcpu_init_descriptor_tables(cmcidis_vcpu);
+ vcpu_init_descriptor_tables(cmci_vcpu);
+ vm_install_exception_handler(vm, CMCI_VECTOR, guest_cmci_handler);
+ vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
+
+ virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
+
+ test_ucna_injection(ucna_vcpu, &params);
+ run_vcpu_expect_gp(cmcidis_vcpu);
+ run_vcpu_expect_gp(cmci_vcpu);
+
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c b/tools/testing/selftests/kvm/x86_64/userspace_io_test.c
index e4bef2e05686..7316521428f8 100644
--- a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c
+++ b/tools/testing/selftests/kvm/x86_64/userspace_io_test.c
@@ -10,8 +10,6 @@
#include "kvm_util.h"
#include "processor.h"
-#define VCPU_ID 1
-
static void guest_ins_port80(uint8_t *buffer, unsigned int count)
{
unsigned long end;
@@ -52,31 +50,29 @@ static void guest_code(void)
int main(int argc, char *argv[])
{
+ struct kvm_vcpu *vcpu;
struct kvm_regs regs;
struct kvm_run *run;
struct kvm_vm *vm;
struct ucall uc;
- int rc;
/* Tell stdout not to buffer its content */
setbuf(stdout, NULL);
- /* Create VM */
- vm = vm_create_default(VCPU_ID, 0, guest_code);
- run = vcpu_state(vm, VCPU_ID);
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ run = vcpu->run;
memset(&regs, 0, sizeof(regs));
while (1) {
- rc = _vcpu_run(vm, VCPU_ID);
+ vcpu_run(vcpu);
- TEST_ASSERT(rc == 0, "vcpu_run failed: %d\n", rc);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
"Unexpected exit reason: %u (%s),\n",
run->exit_reason,
exit_reason_str(run->exit_reason));
- if (get_ucall(vm, VCPU_ID, &uc))
+ if (get_ucall(vcpu, &uc))
break;
TEST_ASSERT(run->io.port == 0x80,
@@ -89,22 +85,20 @@ int main(int argc, char *argv[])
* scope from a testing perspective as it's not ABI in any way,
* i.e. it really is abusing internal KVM knowledge.
*/
- vcpu_regs_get(vm, VCPU_ID, &regs);
+ vcpu_regs_get(vcpu, &regs);
if (regs.rcx == 2)
regs.rcx = 1;
if (regs.rcx == 3)
regs.rcx = 8192;
memset((void *)run + run->io.data_offset, 0xaa, 4096);
- vcpu_regs_set(vm, VCPU_ID, &regs);
+ vcpu_regs_set(vcpu, &regs);
}
switch (uc.cmd) {
case UCALL_DONE:
break;
case UCALL_ABORT:
- TEST_FAIL("%s at %s:%ld : argN+1 = 0x%lx, argN+2 = 0x%lx",
- (const char *)uc.args[0], __FILE__, uc.args[1],
- uc.args[2], uc.args[3]);
+ REPORT_GUEST_ASSERT_2(uc, "argN+1 = 0x%lx, argN+2 = 0x%lx");
default:
TEST_FAIL("Unknown ucall %lu", uc.cmd);
}
diff --git a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
index e3e20e8848d0..a4f06370a245 100644
--- a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
+++ b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
@@ -17,7 +17,6 @@
#define KVM_FEP_LENGTH 5
static int fep_available = 1;
-#define VCPU_ID 1
#define MSR_NON_EXISTENT 0x474f4f00
static u64 deny_bits = 0;
@@ -395,31 +394,21 @@ static void guest_ud_handler(struct ex_regs *regs)
regs->rip += KVM_FEP_LENGTH;
}
-static void run_guest(struct kvm_vm *vm)
+static void check_for_guest_assert(struct kvm_vcpu *vcpu)
{
- int rc;
-
- rc = _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(rc == 0, "vcpu_run failed: %d\n", rc);
-}
-
-static void check_for_guest_assert(struct kvm_vm *vm)
-{
- struct kvm_run *run = vcpu_state(vm, VCPU_ID);
struct ucall uc;
- if (run->exit_reason == KVM_EXIT_IO &&
- get_ucall(vm, VCPU_ID, &uc) == UCALL_ABORT) {
- TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
- __FILE__, uc.args[1]);
+ if (vcpu->run->exit_reason == KVM_EXIT_IO &&
+ get_ucall(vcpu, &uc) == UCALL_ABORT) {
+ REPORT_GUEST_ASSERT(uc);
}
}
-static void process_rdmsr(struct kvm_vm *vm, uint32_t msr_index)
+static void process_rdmsr(struct kvm_vcpu *vcpu, uint32_t msr_index)
{
- struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ struct kvm_run *run = vcpu->run;
- check_for_guest_assert(vm);
+ check_for_guest_assert(vcpu);
TEST_ASSERT(run->exit_reason == KVM_EXIT_X86_RDMSR,
"Unexpected exit reason: %u (%s),\n",
@@ -450,11 +439,11 @@ static void process_rdmsr(struct kvm_vm *vm, uint32_t msr_index)
}
}
-static void process_wrmsr(struct kvm_vm *vm, uint32_t msr_index)
+static void process_wrmsr(struct kvm_vcpu *vcpu, uint32_t msr_index)
{
- struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ struct kvm_run *run = vcpu->run;
- check_for_guest_assert(vm);
+ check_for_guest_assert(vcpu);
TEST_ASSERT(run->exit_reason == KVM_EXIT_X86_WRMSR,
"Unexpected exit reason: %u (%s),\n",
@@ -481,43 +470,43 @@ static void process_wrmsr(struct kvm_vm *vm, uint32_t msr_index)
}
}
-static void process_ucall_done(struct kvm_vm *vm)
+static void process_ucall_done(struct kvm_vcpu *vcpu)
{
- struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ struct kvm_run *run = vcpu->run;
struct ucall uc;
- check_for_guest_assert(vm);
+ check_for_guest_assert(vcpu);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
"Unexpected exit reason: %u (%s)",
run->exit_reason,
exit_reason_str(run->exit_reason));
- TEST_ASSERT(get_ucall(vm, VCPU_ID, &uc) == UCALL_DONE,
+ TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_DONE,
"Unexpected ucall command: %lu, expected UCALL_DONE (%d)",
uc.cmd, UCALL_DONE);
}
-static uint64_t process_ucall(struct kvm_vm *vm)
+static uint64_t process_ucall(struct kvm_vcpu *vcpu)
{
- struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ struct kvm_run *run = vcpu->run;
struct ucall uc = {};
- check_for_guest_assert(vm);
+ check_for_guest_assert(vcpu);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
"Unexpected exit reason: %u (%s)",
run->exit_reason,
exit_reason_str(run->exit_reason));
- switch (get_ucall(vm, VCPU_ID, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_SYNC:
break;
case UCALL_ABORT:
- check_for_guest_assert(vm);
+ check_for_guest_assert(vcpu);
break;
case UCALL_DONE:
- process_ucall_done(vm);
+ process_ucall_done(vcpu);
break;
default:
TEST_ASSERT(false, "Unexpected ucall");
@@ -526,45 +515,43 @@ static uint64_t process_ucall(struct kvm_vm *vm)
return uc.cmd;
}
-static void run_guest_then_process_rdmsr(struct kvm_vm *vm, uint32_t msr_index)
+static void run_guest_then_process_rdmsr(struct kvm_vcpu *vcpu,
+ uint32_t msr_index)
{
- run_guest(vm);
- process_rdmsr(vm, msr_index);
+ vcpu_run(vcpu);
+ process_rdmsr(vcpu, msr_index);
}
-static void run_guest_then_process_wrmsr(struct kvm_vm *vm, uint32_t msr_index)
+static void run_guest_then_process_wrmsr(struct kvm_vcpu *vcpu,
+ uint32_t msr_index)
{
- run_guest(vm);
- process_wrmsr(vm, msr_index);
+ vcpu_run(vcpu);
+ process_wrmsr(vcpu, msr_index);
}
-static uint64_t run_guest_then_process_ucall(struct kvm_vm *vm)
+static uint64_t run_guest_then_process_ucall(struct kvm_vcpu *vcpu)
{
- run_guest(vm);
- return process_ucall(vm);
+ vcpu_run(vcpu);
+ return process_ucall(vcpu);
}
-static void run_guest_then_process_ucall_done(struct kvm_vm *vm)
+static void run_guest_then_process_ucall_done(struct kvm_vcpu *vcpu)
{
- run_guest(vm);
- process_ucall_done(vm);
+ vcpu_run(vcpu);
+ process_ucall_done(vcpu);
}
-static void test_msr_filter_allow(void) {
- struct kvm_enable_cap cap = {
- .cap = KVM_CAP_X86_USER_SPACE_MSR,
- .args[0] = KVM_MSR_EXIT_REASON_FILTER,
- };
+static void test_msr_filter_allow(void)
+{
+ struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
int rc;
- /* Create VM */
- vm = vm_create_default(VCPU_ID, 0, guest_code_filter_allow);
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code_filter_allow);
rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
- vm_enable_cap(vm, &cap);
+ vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_FILTER);
rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
@@ -572,43 +559,43 @@ static void test_msr_filter_allow(void) {
vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_allow);
vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vm, VCPU_ID);
+ vcpu_init_descriptor_tables(vcpu);
vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
/* Process guest code userspace exits. */
- run_guest_then_process_rdmsr(vm, MSR_IA32_XSS);
- run_guest_then_process_wrmsr(vm, MSR_IA32_XSS);
- run_guest_then_process_wrmsr(vm, MSR_IA32_XSS);
+ run_guest_then_process_rdmsr(vcpu, MSR_IA32_XSS);
+ run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS);
+ run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS);
- run_guest_then_process_rdmsr(vm, MSR_IA32_FLUSH_CMD);
- run_guest_then_process_wrmsr(vm, MSR_IA32_FLUSH_CMD);
- run_guest_then_process_wrmsr(vm, MSR_IA32_FLUSH_CMD);
+ run_guest_then_process_rdmsr(vcpu, MSR_IA32_FLUSH_CMD);
+ run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD);
+ run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD);
- run_guest_then_process_wrmsr(vm, MSR_NON_EXISTENT);
- run_guest_then_process_rdmsr(vm, MSR_NON_EXISTENT);
+ run_guest_then_process_wrmsr(vcpu, MSR_NON_EXISTENT);
+ run_guest_then_process_rdmsr(vcpu, MSR_NON_EXISTENT);
vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
- run_guest(vm);
+ vcpu_run(vcpu);
vm_install_exception_handler(vm, UD_VECTOR, NULL);
- if (process_ucall(vm) != UCALL_DONE) {
+ if (process_ucall(vcpu) != UCALL_DONE) {
vm_install_exception_handler(vm, GP_VECTOR, guest_fep_gp_handler);
/* Process emulated rdmsr and wrmsr instructions. */
- run_guest_then_process_rdmsr(vm, MSR_IA32_XSS);
- run_guest_then_process_wrmsr(vm, MSR_IA32_XSS);
- run_guest_then_process_wrmsr(vm, MSR_IA32_XSS);
+ run_guest_then_process_rdmsr(vcpu, MSR_IA32_XSS);
+ run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS);
+ run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS);
- run_guest_then_process_rdmsr(vm, MSR_IA32_FLUSH_CMD);
- run_guest_then_process_wrmsr(vm, MSR_IA32_FLUSH_CMD);
- run_guest_then_process_wrmsr(vm, MSR_IA32_FLUSH_CMD);
+ run_guest_then_process_rdmsr(vcpu, MSR_IA32_FLUSH_CMD);
+ run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD);
+ run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD);
- run_guest_then_process_wrmsr(vm, MSR_NON_EXISTENT);
- run_guest_then_process_rdmsr(vm, MSR_NON_EXISTENT);
+ run_guest_then_process_wrmsr(vcpu, MSR_NON_EXISTENT);
+ run_guest_then_process_rdmsr(vcpu, MSR_NON_EXISTENT);
/* Confirm the guest completed without issues. */
- run_guest_then_process_ucall_done(vm);
+ run_guest_then_process_ucall_done(vcpu);
} else {
printf("To run the instruction emulated tests set the module parameter 'kvm.force_emulation_prefix=1'\n");
}
@@ -616,16 +603,16 @@ static void test_msr_filter_allow(void) {
kvm_vm_free(vm);
}
-static int handle_ucall(struct kvm_vm *vm)
+static int handle_ucall(struct kvm_vcpu *vcpu)
{
struct ucall uc;
- switch (get_ucall(vm, VCPU_ID, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
- TEST_FAIL("Guest assertion not met");
+ REPORT_GUEST_ASSERT(uc);
break;
case UCALL_SYNC:
- vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &no_filter_deny);
+ vm_ioctl(vcpu->vm, KVM_X86_SET_MSR_FILTER, &no_filter_deny);
break;
case UCALL_DONE:
return 1;
@@ -673,25 +660,21 @@ static void handle_wrmsr(struct kvm_run *run)
}
}
-static void test_msr_filter_deny(void) {
- struct kvm_enable_cap cap = {
- .cap = KVM_CAP_X86_USER_SPACE_MSR,
- .args[0] = KVM_MSR_EXIT_REASON_INVAL |
- KVM_MSR_EXIT_REASON_UNKNOWN |
- KVM_MSR_EXIT_REASON_FILTER,
- };
+static void test_msr_filter_deny(void)
+{
+ struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
struct kvm_run *run;
int rc;
- /* Create VM */
- vm = vm_create_default(VCPU_ID, 0, guest_code_filter_deny);
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
- run = vcpu_state(vm, VCPU_ID);
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code_filter_deny);
+ run = vcpu->run;
rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
- vm_enable_cap(vm, &cap);
+ vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_INVAL |
+ KVM_MSR_EXIT_REASON_UNKNOWN |
+ KVM_MSR_EXIT_REASON_FILTER);
rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
@@ -700,9 +683,7 @@ static void test_msr_filter_deny(void) {
vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_deny);
while (1) {
- rc = _vcpu_run(vm, VCPU_ID);
-
- TEST_ASSERT(rc == 0, "vcpu_run failed: %d\n", rc);
+ vcpu_run(vcpu);
switch (run->exit_reason) {
case KVM_EXIT_X86_RDMSR:
@@ -712,7 +693,7 @@ static void test_msr_filter_deny(void) {
handle_wrmsr(run);
break;
case KVM_EXIT_IO:
- if (handle_ucall(vm))
+ if (handle_ucall(vcpu))
goto done;
break;
}
@@ -726,31 +707,28 @@ done:
kvm_vm_free(vm);
}
-static void test_msr_permission_bitmap(void) {
- struct kvm_enable_cap cap = {
- .cap = KVM_CAP_X86_USER_SPACE_MSR,
- .args[0] = KVM_MSR_EXIT_REASON_FILTER,
- };
+static void test_msr_permission_bitmap(void)
+{
+ struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
int rc;
- /* Create VM */
- vm = vm_create_default(VCPU_ID, 0, guest_code_permission_bitmap);
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code_permission_bitmap);
rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
- vm_enable_cap(vm, &cap);
+ vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_FILTER);
rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_fs);
- run_guest_then_process_rdmsr(vm, MSR_FS_BASE);
- TEST_ASSERT(run_guest_then_process_ucall(vm) == UCALL_SYNC, "Expected ucall state to be UCALL_SYNC.");
+ run_guest_then_process_rdmsr(vcpu, MSR_FS_BASE);
+ TEST_ASSERT(run_guest_then_process_ucall(vcpu) == UCALL_SYNC,
+ "Expected ucall state to be UCALL_SYNC.");
vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_gs);
- run_guest_then_process_rdmsr(vm, MSR_GS_BASE);
- run_guest_then_process_ucall_done(vm);
+ run_guest_then_process_rdmsr(vcpu, MSR_GS_BASE);
+ run_guest_then_process_ucall_done(vcpu);
kvm_vm_free(vm);
}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c
index d438c4d3228a..5abecf06329e 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c
@@ -28,11 +28,6 @@
#include "kselftest.h"
-#define VCPU_ID 0
-
-/* The virtual machine object. */
-static struct kvm_vm *vm;
-
static void l2_guest_code(void)
{
/* Exit to L1 */
@@ -77,33 +72,29 @@ static void l1_guest_code(struct vmx_pages *vmx_pages, unsigned long high_gpa)
int main(int argc, char *argv[])
{
unsigned long apic_access_addr = ~0ul;
- unsigned int paddr_width;
- unsigned int vaddr_width;
vm_vaddr_t vmx_pages_gva;
unsigned long high_gpa;
struct vmx_pages *vmx;
bool done = false;
- nested_vmx_check_supported();
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
- vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
- kvm_get_cpu_address_width(&paddr_width, &vaddr_width);
- high_gpa = (1ul << paddr_width) - getpagesize();
- if ((unsigned long)DEFAULT_GUEST_PHY_PAGES * getpagesize() > high_gpa) {
- print_skip("No unbacked physical page available");
- exit(KSFT_SKIP);
- }
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+ high_gpa = (vm->max_gfn - 1) << vm->page_shift;
vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva);
prepare_virtualize_apic_accesses(vmx, vm);
- vcpu_args_set(vm, VCPU_ID, 2, vmx_pages_gva, high_gpa);
+ vcpu_args_set(vcpu, 2, vmx_pages_gva, high_gpa);
while (!done) {
- volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ volatile struct kvm_run *run = vcpu->run;
struct ucall uc;
- vcpu_run(vm, VCPU_ID);
+ vcpu_run(vcpu);
if (apic_access_addr == high_gpa) {
TEST_ASSERT(run->exit_reason ==
KVM_EXIT_INTERNAL_ERROR,
@@ -121,10 +112,9 @@ int main(int argc, char *argv[])
run->exit_reason,
exit_reason_str(run->exit_reason));
- switch (get_ucall(vm, VCPU_ID, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
- TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
- __FILE__, uc.args[1]);
+ REPORT_GUEST_ASSERT(uc);
/* NOT REACHED */
case UCALL_SYNC:
apic_access_addr = uc.args[1];
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
index edac8839e717..d79651b02740 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
@@ -18,15 +18,10 @@
#include "kselftest.h"
-#define VCPU_ID 5
-
enum {
PORT_L0_EXIT = 0x2000,
};
-/* The virtual machine object. */
-static struct kvm_vm *vm;
-
static void l2_guest_code(void)
{
/* Exit to L0 */
@@ -53,20 +48,22 @@ static void l1_guest_code(struct vmx_pages *vmx_pages)
int main(int argc, char *argv[])
{
vm_vaddr_t vmx_pages_gva;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
- nested_vmx_check_supported();
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
- vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
/* Allocate VMX pages and shared descriptors (vmx_pages). */
vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+ vcpu_args_set(vcpu, 1, vmx_pages_gva);
for (;;) {
- volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ volatile struct kvm_run *run = vcpu->run;
struct ucall uc;
- vcpu_run(vm, VCPU_ID);
+ vcpu_run(vcpu);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
"Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
run->exit_reason,
@@ -75,9 +72,9 @@ int main(int argc, char *argv[])
if (run->io.port == PORT_L0_EXIT)
break;
- switch (get_ucall(vm, VCPU_ID, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
- TEST_FAIL("%s", (const char *)uc.args[0]);
+ REPORT_GUEST_ASSERT(uc);
/* NOT REACHED */
default:
TEST_FAIL("Unknown ucall %lu", uc.cmd);
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
index 68f26a8b4f42..2d8c23d639f7 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
@@ -17,8 +17,6 @@
#include "processor.h"
#include "vmx.h"
-#define VCPU_ID 1
-
/* The memory slot index to track dirty pages */
#define TEST_MEM_SLOT_INDEX 1
#define TEST_MEM_PAGES 3
@@ -73,18 +71,19 @@ int main(int argc, char *argv[])
unsigned long *bmap;
uint64_t *host_test_mem;
+ struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
struct kvm_run *run;
struct ucall uc;
bool done = false;
- nested_vmx_check_supported();
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
/* Create VM */
- vm = vm_create_default(VCPU_ID, 0, l1_guest_code);
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
- run = vcpu_state(vm, VCPU_ID);
+ vcpu_args_set(vcpu, 1, vmx_pages_gva);
+ run = vcpu->run;
/* Add an extra memory slot for testing dirty logging */
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
@@ -116,16 +115,15 @@ int main(int argc, char *argv[])
while (!done) {
memset(host_test_mem, 0xaa, TEST_MEM_PAGES * 4096);
- _vcpu_run(vm, VCPU_ID);
+ vcpu_run(vcpu);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
"Unexpected exit reason: %u (%s),\n",
run->exit_reason,
exit_reason_str(run->exit_reason));
- switch (get_ucall(vm, VCPU_ID, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
- TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
- __FILE__, uc.args[1]);
+ REPORT_GUEST_ASSERT(uc);
/* NOT REACHED */
case UCALL_SYNC:
/*
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c b/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c
index 27a850f3d7ce..2641b286b4ed 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c
@@ -10,10 +10,6 @@
#include "kselftest.h"
-#define VCPU_ID 0
-
-static struct kvm_vm *vm;
-
static void guest_ud_handler(struct ex_regs *regs)
{
/* Loop on the ud2 until guest state is made invalid. */
@@ -24,11 +20,11 @@ static void guest_code(void)
asm volatile("ud2");
}
-static void __run_vcpu_with_invalid_state(void)
+static void __run_vcpu_with_invalid_state(struct kvm_vcpu *vcpu)
{
- struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ struct kvm_run *run = vcpu->run;
- vcpu_run(vm, VCPU_ID);
+ vcpu_run(vcpu);
TEST_ASSERT(run->exit_reason == KVM_EXIT_INTERNAL_ERROR,
"Expected KVM_EXIT_INTERNAL_ERROR, got %d (%s)\n",
@@ -38,15 +34,15 @@ static void __run_vcpu_with_invalid_state(void)
run->emulation_failure.suberror);
}
-static void run_vcpu_with_invalid_state(void)
+static void run_vcpu_with_invalid_state(struct kvm_vcpu *vcpu)
{
/*
* Always run twice to verify KVM handles the case where _KVM_ queues
* an exception with invalid state and then exits to userspace, i.e.
* that KVM doesn't explode if userspace ignores the initial error.
*/
- __run_vcpu_with_invalid_state();
- __run_vcpu_with_invalid_state();
+ __run_vcpu_with_invalid_state(vcpu);
+ __run_vcpu_with_invalid_state(vcpu);
}
static void set_timer(void)
@@ -59,33 +55,43 @@ static void set_timer(void)
ASSERT_EQ(setitimer(ITIMER_REAL, &timer, NULL), 0);
}
-static void set_or_clear_invalid_guest_state(bool set)
+static void set_or_clear_invalid_guest_state(struct kvm_vcpu *vcpu, bool set)
{
static struct kvm_sregs sregs;
if (!sregs.cr0)
- vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ vcpu_sregs_get(vcpu, &sregs);
sregs.tr.unusable = !!set;
- vcpu_sregs_set(vm, VCPU_ID, &sregs);
+ vcpu_sregs_set(vcpu, &sregs);
+}
+
+static void set_invalid_guest_state(struct kvm_vcpu *vcpu)
+{
+ set_or_clear_invalid_guest_state(vcpu, true);
}
-static void set_invalid_guest_state(void)
+static void clear_invalid_guest_state(struct kvm_vcpu *vcpu)
{
- set_or_clear_invalid_guest_state(true);
+ set_or_clear_invalid_guest_state(vcpu, false);
}
-static void clear_invalid_guest_state(void)
+static struct kvm_vcpu *get_set_sigalrm_vcpu(struct kvm_vcpu *__vcpu)
{
- set_or_clear_invalid_guest_state(false);
+ static struct kvm_vcpu *vcpu = NULL;
+
+ if (__vcpu)
+ vcpu = __vcpu;
+ return vcpu;
}
static void sigalrm_handler(int sig)
{
+ struct kvm_vcpu *vcpu = get_set_sigalrm_vcpu(NULL);
struct kvm_vcpu_events events;
TEST_ASSERT(sig == SIGALRM, "Unexpected signal = %d", sig);
- vcpu_events_get(vm, VCPU_ID, &events);
+ vcpu_events_get(vcpu, &events);
/*
* If an exception is pending, attempt KVM_RUN with invalid guest,
@@ -93,8 +99,8 @@ static void sigalrm_handler(int sig)
* between KVM queueing an exception and re-entering the guest.
*/
if (events.exception.pending) {
- set_invalid_guest_state();
- run_vcpu_with_invalid_state();
+ set_invalid_guest_state(vcpu);
+ run_vcpu_with_invalid_state(vcpu);
} else {
set_timer();
}
@@ -102,15 +108,17 @@ static void sigalrm_handler(int sig)
int main(int argc, char *argv[])
{
- if (!is_intel_cpu() || vm_is_unrestricted_guest(NULL)) {
- print_skip("Must be run with kvm_intel.unrestricted_guest=0");
- exit(KSFT_SKIP);
- }
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(is_intel_cpu());
+ TEST_REQUIRE(!vm_is_unrestricted_guest(NULL));
- vm = vm_create_default(VCPU_ID, 0, (void *)guest_code);
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ get_set_sigalrm_vcpu(vcpu);
vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vm, VCPU_ID);
+ vcpu_init_descriptor_tables(vcpu);
vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
@@ -119,8 +127,8 @@ int main(int argc, char *argv[])
* KVM_RUN should induce a TRIPLE_FAULT in L2 as KVM doesn't support
* emulating invalid guest state for L2.
*/
- set_invalid_guest_state();
- run_vcpu_with_invalid_state();
+ set_invalid_guest_state(vcpu);
+ run_vcpu_with_invalid_state(vcpu);
/*
* Verify KVM also handles the case where userspace gains control while
@@ -129,11 +137,11 @@ int main(int argc, char *argv[])
* guest with invalid state when the handler interrupts KVM with an
* exception pending.
*/
- clear_invalid_guest_state();
+ clear_invalid_guest_state(vcpu);
TEST_ASSERT(signal(SIGALRM, sigalrm_handler) != SIG_ERR,
"Failed to register SIGALRM handler, errno = %d (%s)",
errno, strerror(errno));
set_timer();
- run_vcpu_with_invalid_state();
+ run_vcpu_with_invalid_state(vcpu);
}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c b/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c
index 489fbed4ca6f..6bfb4bb471ca 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c
@@ -9,7 +9,6 @@
#include "kselftest.h"
-#define VCPU_ID 0
#define ARBITRARY_IO_PORT 0x2000
static struct kvm_vm *vm;
@@ -55,20 +54,21 @@ int main(int argc, char *argv[])
{
vm_vaddr_t vmx_pages_gva;
struct kvm_sregs sregs;
+ struct kvm_vcpu *vcpu;
struct kvm_run *run;
struct ucall uc;
- nested_vmx_check_supported();
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
- vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
/* Allocate VMX pages and shared descriptors (vmx_pages). */
vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+ vcpu_args_set(vcpu, 1, vmx_pages_gva);
- vcpu_run(vm, VCPU_ID);
+ vcpu_run(vcpu);
- run = vcpu_state(vm, VCPU_ID);
+ run = vcpu->run;
/*
* The first exit to L0 userspace should be an I/O access from L2.
@@ -88,17 +88,17 @@ int main(int argc, char *argv[])
* emulating invalid guest state for L2.
*/
memset(&sregs, 0, sizeof(sregs));
- vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ vcpu_sregs_get(vcpu, &sregs);
sregs.tr.unusable = 1;
- vcpu_sregs_set(vm, VCPU_ID, &sregs);
+ vcpu_sregs_set(vcpu, &sregs);
- vcpu_run(vm, VCPU_ID);
+ vcpu_run(vcpu);
- switch (get_ucall(vm, VCPU_ID, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_DONE:
break;
case UCALL_ABORT:
- TEST_FAIL("%s", (const char *)uc.args[0]);
+ REPORT_GUEST_ASSERT(uc);
default:
TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c b/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c
new file mode 100644
index 000000000000..322d561b4260
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * VMX control MSR test
+ *
+ * Copyright (C) 2022 Google LLC.
+ *
+ * Tests for KVM ownership of bits in the VMX entry/exit control MSRs. Checks
+ * that KVM will set owned bits where appropriate, and will not if
+ * KVM_X86_QUIRK_TWEAK_VMX_CTRL_MSRS is disabled.
+ */
+#include <linux/bitmap.h>
+#include "kvm_util.h"
+#include "vmx.h"
+
+static void vmx_fixed1_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index,
+ uint64_t mask)
+{
+ uint64_t val = vcpu_get_msr(vcpu, msr_index);
+ uint64_t bit;
+
+ mask &= val;
+
+ for_each_set_bit(bit, &mask, 64) {
+ vcpu_set_msr(vcpu, msr_index, val & ~BIT_ULL(bit));
+ vcpu_set_msr(vcpu, msr_index, val);
+ }
+}
+
+static void vmx_fixed0_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index,
+ uint64_t mask)
+{
+ uint64_t val = vcpu_get_msr(vcpu, msr_index);
+ uint64_t bit;
+
+ mask = ~mask | val;
+
+ for_each_clear_bit(bit, &mask, 64) {
+ vcpu_set_msr(vcpu, msr_index, val | BIT_ULL(bit));
+ vcpu_set_msr(vcpu, msr_index, val);
+ }
+}
+
+static void vmx_fixed0and1_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index)
+{
+ vmx_fixed0_msr_test(vcpu, msr_index, GENMASK_ULL(31, 0));
+ vmx_fixed1_msr_test(vcpu, msr_index, GENMASK_ULL(63, 32));
+}
+
+static void vmx_save_restore_msrs_test(struct kvm_vcpu *vcpu)
+{
+ vcpu_set_msr(vcpu, MSR_IA32_VMX_VMCS_ENUM, 0);
+ vcpu_set_msr(vcpu, MSR_IA32_VMX_VMCS_ENUM, -1ull);
+
+ vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_BASIC,
+ BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55));
+
+ vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_MISC,
+ BIT_ULL(5) | GENMASK_ULL(8, 6) | BIT_ULL(14) |
+ BIT_ULL(15) | BIT_ULL(28) | BIT_ULL(29) | BIT_ULL(30));
+
+ vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_PROCBASED_CTLS2);
+ vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_EPT_VPID_CAP, -1ull);
+ vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_PINBASED_CTLS);
+ vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_PROCBASED_CTLS);
+ vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_EXIT_CTLS);
+ vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_ENTRY_CTLS);
+ vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_VMFUNC, -1ull);
+}
+
+int main(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+
+ /* No need to actually do KVM_RUN, thus no guest code. */
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
+
+ vmx_save_restore_msrs_test(vcpu);
+
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c b/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c
index 280c01fd2412..465a9434d61c 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c
@@ -15,9 +15,6 @@
#include "vmx.h"
#include "kselftest.h"
-
-#define VCPU_ID 0
-
/* L2 is scaled up (from L1's perspective) by this factor */
#define L2_SCALE_FACTOR 4ULL
@@ -119,14 +116,6 @@ static void l1_guest_code(struct vmx_pages *vmx_pages)
GUEST_DONE();
}
-static void tsc_scaling_check_supported(void)
-{
- if (!kvm_check_cap(KVM_CAP_TSC_CONTROL)) {
- print_skip("TSC scaling not supported by the HW");
- exit(KSFT_SKIP);
- }
-}
-
static void stable_tsc_check_supported(void)
{
FILE *fp;
@@ -150,6 +139,7 @@ skip_test:
int main(int argc, char *argv[])
{
+ struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
vm_vaddr_t vmx_pages_gva;
@@ -160,8 +150,8 @@ int main(int argc, char *argv[])
uint64_t l1_tsc_freq = 0;
uint64_t l2_tsc_freq = 0;
- nested_vmx_check_supported();
- tsc_scaling_check_supported();
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_TSC_CONTROL));
stable_tsc_check_supported();
/*
@@ -182,30 +172,29 @@ int main(int argc, char *argv[])
l0_tsc_freq = tsc_end - tsc_start;
printf("real TSC frequency is around: %"PRIu64"\n", l0_tsc_freq);
- vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+ vcpu_args_set(vcpu, 1, vmx_pages_gva);
- tsc_khz = _vcpu_ioctl(vm, VCPU_ID, KVM_GET_TSC_KHZ, NULL);
+ tsc_khz = __vcpu_ioctl(vcpu, KVM_GET_TSC_KHZ, NULL);
TEST_ASSERT(tsc_khz != -1, "vcpu ioctl KVM_GET_TSC_KHZ failed");
/* scale down L1's TSC frequency */
- vcpu_ioctl(vm, VCPU_ID, KVM_SET_TSC_KHZ,
- (void *) (tsc_khz / l1_scale_factor));
+ vcpu_ioctl(vcpu, KVM_SET_TSC_KHZ, (void *) (tsc_khz / l1_scale_factor));
for (;;) {
- volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ volatile struct kvm_run *run = vcpu->run;
struct ucall uc;
- vcpu_run(vm, VCPU_ID);
+ vcpu_run(vcpu);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
"Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
run->exit_reason,
exit_reason_str(run->exit_reason));
- switch (get_ucall(vm, VCPU_ID, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
- TEST_FAIL("%s", (const char *) uc.args[0]);
+ REPORT_GUEST_ASSERT(uc);
case UCALL_SYNC:
switch (uc.args[0]) {
case USLEEP:
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c
index 97b7fd4a9a3d..6ec901dab61e 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c
@@ -17,9 +17,6 @@
#include "kvm_util.h"
#include "vmx.h"
-#define VCPU_ID 0
-
-#define X86_FEATURE_PDCM (1<<15)
#define PMU_CAP_FW_WRITES (1ULL << 13)
#define PMU_CAP_LBR_FMT 0x3f
@@ -56,11 +53,9 @@ static void guest_code(void)
int main(int argc, char *argv[])
{
- struct kvm_cpuid2 *cpuid;
- struct kvm_cpuid_entry2 *entry_1_0;
- struct kvm_cpuid_entry2 *entry_a_0;
- bool pdcm_supported = false;
+ const struct kvm_cpuid_entry2 *entry_a_0;
struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
int ret;
union cpuid10_eax eax;
union perf_capabilities host_cap;
@@ -69,46 +64,37 @@ int main(int argc, char *argv[])
host_cap.capabilities &= (PMU_CAP_FW_WRITES | PMU_CAP_LBR_FMT);
/* Create VM */
- vm = vm_create_default(VCPU_ID, 0, guest_code);
- cpuid = kvm_get_supported_cpuid();
-
- if (kvm_get_cpuid_max_basic() >= 0xa) {
- entry_1_0 = kvm_get_supported_cpuid_index(1, 0);
- entry_a_0 = kvm_get_supported_cpuid_index(0xa, 0);
- pdcm_supported = entry_1_0 && !!(entry_1_0->ecx & X86_FEATURE_PDCM);
- eax.full = entry_a_0->eax;
- }
- if (!pdcm_supported) {
- print_skip("MSR_IA32_PERF_CAPABILITIES is not supported by the vCPU");
- exit(KSFT_SKIP);
- }
- if (!eax.split.version_id) {
- print_skip("PMU is not supported by the vCPU");
- exit(KSFT_SKIP);
- }
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_PDCM));
+
+ TEST_REQUIRE(kvm_get_cpuid_max_basic() >= 0xa);
+ entry_a_0 = kvm_get_supported_cpuid_entry(0xa);
+
+ eax.full = entry_a_0->eax;
+ __TEST_REQUIRE(eax.split.version_id, "PMU is not supported by the vCPU");
/* testcase 1, set capabilities when we have PDCM bit */
- vcpu_set_cpuid(vm, VCPU_ID, cpuid);
- vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, PMU_CAP_FW_WRITES);
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, PMU_CAP_FW_WRITES);
/* check capabilities can be retrieved with KVM_GET_MSR */
- ASSERT_EQ(vcpu_get_msr(vm, VCPU_ID, MSR_IA32_PERF_CAPABILITIES), PMU_CAP_FW_WRITES);
+ ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), PMU_CAP_FW_WRITES);
/* check whatever we write with KVM_SET_MSR is _not_ modified */
- vcpu_run(vm, VCPU_ID);
- ASSERT_EQ(vcpu_get_msr(vm, VCPU_ID, MSR_IA32_PERF_CAPABILITIES), PMU_CAP_FW_WRITES);
+ vcpu_run(vcpu);
+ ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), PMU_CAP_FW_WRITES);
/* testcase 2, check valid LBR formats are accepted */
- vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, 0);
- ASSERT_EQ(vcpu_get_msr(vm, VCPU_ID, MSR_IA32_PERF_CAPABILITIES), 0);
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0);
+ ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), 0);
- vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, host_cap.lbr_format);
- ASSERT_EQ(vcpu_get_msr(vm, VCPU_ID, MSR_IA32_PERF_CAPABILITIES), (u64)host_cap.lbr_format);
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.lbr_format);
+ ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), (u64)host_cap.lbr_format);
/* testcase 3, check invalid LBR format is rejected */
/* Note, on Arch LBR capable platforms, LBR_FMT in perf capability msr is 0x3f,
* to avoid the failure, use a true invalid format 0x30 for the test. */
- ret = _vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, 0x30);
+ ret = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0x30);
TEST_ASSERT(ret == 0, "Bad PERF_CAPABILITIES didn't fail.");
printf("Completed perf capability tests.\n");
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
index ff92e25b6f1e..0efdc05969a5 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
@@ -22,7 +22,6 @@
#include "processor.h"
#include "vmx.h"
-#define VCPU_ID 5
#define PREEMPTION_TIMER_VALUE 100000000ull
#define PREEMPTION_TIMER_VALUE_THRESHOLD1 80000000ull
@@ -159,6 +158,7 @@ int main(int argc, char *argv[])
struct kvm_regs regs1, regs2;
struct kvm_vm *vm;
struct kvm_run *run;
+ struct kvm_vcpu *vcpu;
struct kvm_x86_state *state;
struct ucall uc;
int stage;
@@ -167,33 +167,29 @@ int main(int argc, char *argv[])
* AMD currently does not implement any VMX features, so for now we
* just early out.
*/
- nested_vmx_check_supported();
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
- if (!kvm_check_cap(KVM_CAP_NESTED_STATE)) {
- print_skip("KVM_CAP_NESTED_STATE not supported");
- exit(KSFT_SKIP);
- }
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
/* Create VM */
- vm = vm_create_default(VCPU_ID, 0, guest_code);
- run = vcpu_state(vm, VCPU_ID);
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ run = vcpu->run;
- vcpu_regs_get(vm, VCPU_ID, &regs1);
+ vcpu_regs_get(vcpu, &regs1);
vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+ vcpu_args_set(vcpu, 1, vmx_pages_gva);
for (stage = 1;; stage++) {
- _vcpu_run(vm, VCPU_ID);
+ vcpu_run(vcpu);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
"Stage %d: unexpected exit reason: %u (%s),\n",
stage, run->exit_reason,
exit_reason_str(run->exit_reason));
- switch (get_ucall(vm, VCPU_ID, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
- TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
- __FILE__, uc.args[1]);
+ REPORT_GUEST_ASSERT(uc);
/* NOT REACHED */
case UCALL_SYNC:
break;
@@ -232,22 +228,20 @@ int main(int argc, char *argv[])
stage, uc.args[4], uc.args[5]);
}
- state = vcpu_save_state(vm, VCPU_ID);
+ state = vcpu_save_state(vcpu);
memset(&regs1, 0, sizeof(regs1));
- vcpu_regs_get(vm, VCPU_ID, &regs1);
+ vcpu_regs_get(vcpu, &regs1);
kvm_vm_release(vm);
/* Restore state in a new VM. */
- kvm_vm_restart(vm, O_RDWR);
- vm_vcpu_add(vm, VCPU_ID);
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
- vcpu_load_state(vm, VCPU_ID, state);
- run = vcpu_state(vm, VCPU_ID);
+ vcpu = vm_recreate_with_one_vcpu(vm);
+ vcpu_load_state(vcpu, state);
+ run = vcpu->run;
kvm_x86_state_cleanup(state);
memset(&regs2, 0, sizeof(regs2));
- vcpu_regs_get(vm, VCPU_ID, &regs2);
+ vcpu_regs_get(vcpu, &regs2);
TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
"Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
(ulong) regs2.rdi, (ulong) regs2.rsi);
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c
index 5827b9bae468..41ea7028a1f8 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c
@@ -23,38 +23,37 @@
* changes this should be updated.
*/
#define VMCS12_REVISION 0x11e57ed0
-#define VCPU_ID 5
bool have_evmcs;
-void test_nested_state(struct kvm_vm *vm, struct kvm_nested_state *state)
+void test_nested_state(struct kvm_vcpu *vcpu, struct kvm_nested_state *state)
{
- vcpu_nested_state_set(vm, VCPU_ID, state, false);
+ vcpu_nested_state_set(vcpu, state);
}
-void test_nested_state_expect_errno(struct kvm_vm *vm,
+void test_nested_state_expect_errno(struct kvm_vcpu *vcpu,
struct kvm_nested_state *state,
int expected_errno)
{
int rv;
- rv = vcpu_nested_state_set(vm, VCPU_ID, state, true);
+ rv = __vcpu_nested_state_set(vcpu, state);
TEST_ASSERT(rv == -1 && errno == expected_errno,
"Expected %s (%d) from vcpu_nested_state_set but got rv: %i errno: %s (%d)",
strerror(expected_errno), expected_errno, rv, strerror(errno),
errno);
}
-void test_nested_state_expect_einval(struct kvm_vm *vm,
+void test_nested_state_expect_einval(struct kvm_vcpu *vcpu,
struct kvm_nested_state *state)
{
- test_nested_state_expect_errno(vm, state, EINVAL);
+ test_nested_state_expect_errno(vcpu, state, EINVAL);
}
-void test_nested_state_expect_efault(struct kvm_vm *vm,
+void test_nested_state_expect_efault(struct kvm_vcpu *vcpu,
struct kvm_nested_state *state)
{
- test_nested_state_expect_errno(vm, state, EFAULT);
+ test_nested_state_expect_errno(vcpu, state, EFAULT);
}
void set_revision_id_for_vmcs12(struct kvm_nested_state *state,
@@ -86,7 +85,7 @@ void set_default_vmx_state(struct kvm_nested_state *state, int size)
set_revision_id_for_vmcs12(state, VMCS12_REVISION);
}
-void test_vmx_nested_state(struct kvm_vm *vm)
+void test_vmx_nested_state(struct kvm_vcpu *vcpu)
{
/* Add a page for VMCS12. */
const int state_sz = sizeof(struct kvm_nested_state) + getpagesize();
@@ -96,14 +95,14 @@ void test_vmx_nested_state(struct kvm_vm *vm)
/* The format must be set to 0. 0 for VMX, 1 for SVM. */
set_default_vmx_state(state, state_sz);
state->format = 1;
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
/*
* We cannot virtualize anything if the guest does not have VMX
* enabled.
*/
set_default_vmx_state(state, state_sz);
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
/*
* We cannot virtualize anything if the guest does not have VMX
@@ -112,17 +111,17 @@ void test_vmx_nested_state(struct kvm_vm *vm)
*/
set_default_vmx_state(state, state_sz);
state->hdr.vmx.vmxon_pa = -1ull;
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
state->hdr.vmx.vmcs12_pa = -1ull;
state->flags = KVM_STATE_NESTED_EVMCS;
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
state->flags = 0;
- test_nested_state(vm, state);
+ test_nested_state(vcpu, state);
/* Enable VMX in the guest CPUID. */
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+ vcpu_set_cpuid_feature(vcpu, X86_FEATURE_VMX);
/*
* Setting vmxon_pa == -1ull and vmcs_pa == -1ull exits early without
@@ -133,34 +132,34 @@ void test_vmx_nested_state(struct kvm_vm *vm)
set_default_vmx_state(state, state_sz);
state->hdr.vmx.vmxon_pa = -1ull;
state->hdr.vmx.vmcs12_pa = -1ull;
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
state->flags &= KVM_STATE_NESTED_EVMCS;
if (have_evmcs) {
- test_nested_state_expect_einval(vm, state);
- vcpu_enable_evmcs(vm, VCPU_ID);
+ test_nested_state_expect_einval(vcpu, state);
+ vcpu_enable_evmcs(vcpu);
}
- test_nested_state(vm, state);
+ test_nested_state(vcpu, state);
/* It is invalid to have vmxon_pa == -1ull and SMM flags non-zero. */
state->hdr.vmx.smm.flags = 1;
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
/* Invalid flags are rejected. */
set_default_vmx_state(state, state_sz);
state->hdr.vmx.flags = ~0;
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
/* It is invalid to have vmxon_pa == -1ull and vmcs_pa != -1ull. */
set_default_vmx_state(state, state_sz);
state->hdr.vmx.vmxon_pa = -1ull;
state->flags = 0;
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
/* It is invalid to have vmxon_pa set to a non-page aligned address. */
set_default_vmx_state(state, state_sz);
state->hdr.vmx.vmxon_pa = 1;
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
/*
* It is invalid to have KVM_STATE_NESTED_SMM_GUEST_MODE and
@@ -170,7 +169,7 @@ void test_vmx_nested_state(struct kvm_vm *vm)
state->flags = KVM_STATE_NESTED_GUEST_MODE |
KVM_STATE_NESTED_RUN_PENDING;
state->hdr.vmx.smm.flags = KVM_STATE_NESTED_SMM_GUEST_MODE;
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
/*
* It is invalid to have any of the SMM flags set besides:
@@ -180,13 +179,13 @@ void test_vmx_nested_state(struct kvm_vm *vm)
set_default_vmx_state(state, state_sz);
state->hdr.vmx.smm.flags = ~(KVM_STATE_NESTED_SMM_GUEST_MODE |
KVM_STATE_NESTED_SMM_VMXON);
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
/* Outside SMM, SMM flags must be zero. */
set_default_vmx_state(state, state_sz);
state->flags = 0;
state->hdr.vmx.smm.flags = KVM_STATE_NESTED_SMM_GUEST_MODE;
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
/*
* Size must be large enough to fit kvm_nested_state and vmcs12
@@ -195,13 +194,13 @@ void test_vmx_nested_state(struct kvm_vm *vm)
set_default_vmx_state(state, state_sz);
state->size = sizeof(*state);
state->flags = 0;
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
set_default_vmx_state(state, state_sz);
state->size = sizeof(*state);
state->flags = 0;
state->hdr.vmx.vmcs12_pa = -1;
- test_nested_state(vm, state);
+ test_nested_state(vcpu, state);
/*
* KVM_SET_NESTED_STATE succeeds with invalid VMCS
@@ -209,7 +208,7 @@ void test_vmx_nested_state(struct kvm_vm *vm)
*/
set_default_vmx_state(state, state_sz);
state->flags = 0;
- test_nested_state(vm, state);
+ test_nested_state(vcpu, state);
/* Invalid flags are rejected, even if no VMCS loaded. */
set_default_vmx_state(state, state_sz);
@@ -217,13 +216,13 @@ void test_vmx_nested_state(struct kvm_vm *vm)
state->flags = 0;
state->hdr.vmx.vmcs12_pa = -1;
state->hdr.vmx.flags = ~0;
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
/* vmxon_pa cannot be the same address as vmcs_pa. */
set_default_vmx_state(state, state_sz);
state->hdr.vmx.vmxon_pa = 0;
state->hdr.vmx.vmcs12_pa = 0;
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
/*
* Test that if we leave nesting the state reflects that when we get
@@ -233,8 +232,8 @@ void test_vmx_nested_state(struct kvm_vm *vm)
state->hdr.vmx.vmxon_pa = -1ull;
state->hdr.vmx.vmcs12_pa = -1ull;
state->flags = 0;
- test_nested_state(vm, state);
- vcpu_nested_state_get(vm, VCPU_ID, state);
+ test_nested_state(vcpu, state);
+ vcpu_nested_state_get(vcpu, state);
TEST_ASSERT(state->size >= sizeof(*state) && state->size <= state_sz,
"Size must be between %ld and %d. The size returned was %d.",
sizeof(*state), state_sz, state->size);
@@ -244,54 +243,36 @@ void test_vmx_nested_state(struct kvm_vm *vm)
free(state);
}
-void disable_vmx(struct kvm_vm *vm)
-{
- struct kvm_cpuid2 *cpuid = kvm_get_supported_cpuid();
- int i;
-
- for (i = 0; i < cpuid->nent; ++i)
- if (cpuid->entries[i].function == 1 &&
- cpuid->entries[i].index == 0)
- break;
- TEST_ASSERT(i != cpuid->nent, "CPUID function 1 not found");
-
- cpuid->entries[i].ecx &= ~CPUID_VMX;
- vcpu_set_cpuid(vm, VCPU_ID, cpuid);
- cpuid->entries[i].ecx |= CPUID_VMX;
-}
-
int main(int argc, char *argv[])
{
struct kvm_vm *vm;
struct kvm_nested_state state;
+ struct kvm_vcpu *vcpu;
have_evmcs = kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS);
- if (!kvm_check_cap(KVM_CAP_NESTED_STATE)) {
- print_skip("KVM_CAP_NESTED_STATE not available");
- exit(KSFT_SKIP);
- }
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
/*
* AMD currently does not implement set_nested_state, so for now we
* just early out.
*/
- nested_vmx_check_supported();
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
- vm = vm_create_default(VCPU_ID, 0, 0);
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
/*
* First run tests with VMX disabled to check error handling.
*/
- disable_vmx(vm);
+ vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_VMX);
/* Passing a NULL kvm_nested_state causes a EFAULT. */
- test_nested_state_expect_efault(vm, NULL);
+ test_nested_state_expect_efault(vcpu, NULL);
/* 'size' cannot be smaller than sizeof(kvm_nested_state). */
set_default_state(&state);
state.size = 0;
- test_nested_state_expect_einval(vm, &state);
+ test_nested_state_expect_einval(vcpu, &state);
/*
* Setting the flags 0xf fails the flags check. The only flags that
@@ -302,7 +283,7 @@ int main(int argc, char *argv[])
*/
set_default_state(&state);
state.flags = 0xf;
- test_nested_state_expect_einval(vm, &state);
+ test_nested_state_expect_einval(vcpu, &state);
/*
* If KVM_STATE_NESTED_RUN_PENDING is set then
@@ -310,9 +291,9 @@ int main(int argc, char *argv[])
*/
set_default_state(&state);
state.flags = KVM_STATE_NESTED_RUN_PENDING;
- test_nested_state_expect_einval(vm, &state);
+ test_nested_state_expect_einval(vcpu, &state);
- test_vmx_nested_state(vm);
+ test_vmx_nested_state(vcpu);
kvm_vm_free(vm);
return 0;
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
index 19b35c607dc6..5943187e8594 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
@@ -32,8 +32,6 @@
#define MSR_IA32_TSC_ADJUST 0x3b
#endif
-#define VCPU_ID 5
-
#define TSC_ADJUST_VALUE (1ll << 32)
#define TSC_OFFSET_VALUE -(1ll << 48)
@@ -127,28 +125,29 @@ static void report(int64_t val)
int main(int argc, char *argv[])
{
vm_vaddr_t vmx_pages_gva;
+ struct kvm_vcpu *vcpu;
- nested_vmx_check_supported();
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
- vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
+ vm = vm_create_with_one_vcpu(&vcpu, (void *) l1_guest_code);
/* Allocate VMX pages and shared descriptors (vmx_pages). */
vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+ vcpu_args_set(vcpu, 1, vmx_pages_gva);
for (;;) {
- volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ volatile struct kvm_run *run = vcpu->run;
struct ucall uc;
- vcpu_run(vm, VCPU_ID);
+ vcpu_run(vcpu);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
"Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
run->exit_reason,
exit_reason_str(run->exit_reason));
- switch (get_ucall(vm, VCPU_ID, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
- TEST_FAIL("%s", (const char *)uc.args[0]);
+ REPORT_GUEST_ASSERT(uc);
/* NOT REACHED */
case UCALL_SYNC:
report(uc.args[1]);
diff --git a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
index afbbc40df884..3d272d7f961e 100644
--- a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
@@ -39,9 +39,6 @@
/* Default delay between migrate_pages calls (microseconds) */
#define DEFAULT_DELAY_USECS 500000
-#define HALTER_VCPU_ID 0
-#define SENDER_VCPU_ID 1
-
/*
* Vector for IPI from sender vCPU to halting vCPU.
* Value is arbitrary and was chosen for the alternating bit pattern. Any
@@ -79,8 +76,7 @@ struct test_data_page {
struct thread_params {
struct test_data_page *data;
- struct kvm_vm *vm;
- uint32_t vcpu_id;
+ struct kvm_vcpu *vcpu;
uint64_t *pipis_rcvd; /* host address of ipis_rcvd global */
};
@@ -198,6 +194,7 @@ static void sender_guest_code(struct test_data_page *data)
static void *vcpu_thread(void *arg)
{
struct thread_params *params = (struct thread_params *)arg;
+ struct kvm_vcpu *vcpu = params->vcpu;
struct ucall uc;
int old;
int r;
@@ -206,17 +203,17 @@ static void *vcpu_thread(void *arg)
r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
TEST_ASSERT(r == 0,
"pthread_setcanceltype failed on vcpu_id=%u with errno=%d",
- params->vcpu_id, r);
+ vcpu->id, r);
- fprintf(stderr, "vCPU thread running vCPU %u\n", params->vcpu_id);
- vcpu_run(params->vm, params->vcpu_id);
- exit_reason = vcpu_state(params->vm, params->vcpu_id)->exit_reason;
+ fprintf(stderr, "vCPU thread running vCPU %u\n", vcpu->id);
+ vcpu_run(vcpu);
+ exit_reason = vcpu->run->exit_reason;
TEST_ASSERT(exit_reason == KVM_EXIT_IO,
"vCPU %u exited with unexpected exit reason %u-%s, expected KVM_EXIT_IO",
- params->vcpu_id, exit_reason, exit_reason_str(exit_reason));
+ vcpu->id, exit_reason, exit_reason_str(exit_reason));
- if (get_ucall(params->vm, params->vcpu_id, &uc) == UCALL_ABORT) {
+ if (get_ucall(vcpu, &uc) == UCALL_ABORT) {
TEST_ASSERT(false,
"vCPU %u exited with error: %s.\n"
"Sending vCPU sent %lu IPIs to halting vCPU\n"
@@ -224,7 +221,7 @@ static void *vcpu_thread(void *arg)
"Halter TPR=%#x PPR=%#x LVR=%#x\n"
"Migrations attempted: %lu\n"
"Migrations completed: %lu\n",
- params->vcpu_id, (const char *)uc.args[0],
+ vcpu->id, (const char *)uc.args[0],
params->data->ipis_sent, params->data->hlt_count,
params->data->wake_count,
*params->pipis_rcvd, params->data->halter_tpr,
@@ -236,7 +233,7 @@ static void *vcpu_thread(void *arg)
return NULL;
}
-static void cancel_join_vcpu_thread(pthread_t thread, uint32_t vcpu_id)
+static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu)
{
void *retval;
int r;
@@ -244,12 +241,12 @@ static void cancel_join_vcpu_thread(pthread_t thread, uint32_t vcpu_id)
r = pthread_cancel(thread);
TEST_ASSERT(r == 0,
"pthread_cancel on vcpu_id=%d failed with errno=%d",
- vcpu_id, r);
+ vcpu->id, r);
r = pthread_join(thread, &retval);
TEST_ASSERT(r == 0,
"pthread_join on vcpu_id=%d failed with errno=%d",
- vcpu_id, r);
+ vcpu->id, r);
TEST_ASSERT(retval == PTHREAD_CANCELED,
"expected retval=%p, got %p", PTHREAD_CANCELED,
retval);
@@ -415,34 +412,30 @@ int main(int argc, char *argv[])
if (delay_usecs <= 0)
delay_usecs = DEFAULT_DELAY_USECS;
- vm = vm_create_default(HALTER_VCPU_ID, 0, halter_guest_code);
- params[0].vm = vm;
- params[1].vm = vm;
+ vm = vm_create_with_one_vcpu(&params[0].vcpu, halter_guest_code);
vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vm, HALTER_VCPU_ID);
+ vcpu_init_descriptor_tables(params[0].vcpu);
vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler);
virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
- vm_vcpu_add_default(vm, SENDER_VCPU_ID, sender_guest_code);
+ params[1].vcpu = vm_vcpu_add(vm, 1, sender_guest_code);
test_data_page_vaddr = vm_vaddr_alloc_page(vm);
- data =
- (struct test_data_page *)addr_gva2hva(vm, test_data_page_vaddr);
+ data = addr_gva2hva(vm, test_data_page_vaddr);
memset(data, 0, sizeof(*data));
params[0].data = data;
params[1].data = data;
- vcpu_args_set(vm, HALTER_VCPU_ID, 1, test_data_page_vaddr);
- vcpu_args_set(vm, SENDER_VCPU_ID, 1, test_data_page_vaddr);
+ vcpu_args_set(params[0].vcpu, 1, test_data_page_vaddr);
+ vcpu_args_set(params[1].vcpu, 1, test_data_page_vaddr);
pipis_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ipis_rcvd);
params[0].pipis_rcvd = pipis_rcvd;
params[1].pipis_rcvd = pipis_rcvd;
/* Start halter vCPU thread and wait for it to execute first HLT. */
- params[0].vcpu_id = HALTER_VCPU_ID;
r = pthread_create(&threads[0], NULL, vcpu_thread, &params[0]);
TEST_ASSERT(r == 0,
"pthread_create halter failed errno=%d", errno);
@@ -462,7 +455,6 @@ int main(int argc, char *argv[])
"Halter vCPU thread reported its APIC ID: %u after %d seconds.\n",
data->halter_apic_id, wait_secs);
- params[1].vcpu_id = SENDER_VCPU_ID;
r = pthread_create(&threads[1], NULL, vcpu_thread, &params[1]);
TEST_ASSERT(r == 0, "pthread_create sender failed errno=%d", errno);
@@ -478,8 +470,8 @@ int main(int argc, char *argv[])
/*
* Cancel threads and wait for them to stop.
*/
- cancel_join_vcpu_thread(threads[0], HALTER_VCPU_ID);
- cancel_join_vcpu_thread(threads[1], SENDER_VCPU_ID);
+ cancel_join_vcpu_thread(threads[0], params[0].vcpu);
+ cancel_join_vcpu_thread(threads[1], params[1].vcpu);
fprintf(stderr,
"Test successful after running for %d seconds.\n"
diff --git a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c
index 0792334ba243..6f7a5ef66718 100644
--- a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c
@@ -11,8 +11,8 @@
#include "processor.h"
#include "test_util.h"
-struct kvm_vcpu {
- uint32_t id;
+struct xapic_vcpu {
+ struct kvm_vcpu *vcpu;
bool is_x2apic;
};
@@ -47,8 +47,9 @@ static void x2apic_guest_code(void)
} while (1);
}
-static void ____test_icr(struct kvm_vm *vm, struct kvm_vcpu *vcpu, uint64_t val)
+static void ____test_icr(struct xapic_vcpu *x, uint64_t val)
{
+ struct kvm_vcpu *vcpu = x->vcpu;
struct kvm_lapic_state xapic;
struct ucall uc;
uint64_t icr;
@@ -58,40 +59,55 @@ static void ____test_icr(struct kvm_vm *vm, struct kvm_vcpu *vcpu, uint64_t val)
* all bits are valid and should not be modified by KVM (ignoring the
* fact that vectors 0-15 are technically illegal).
*/
- vcpu_ioctl(vm, vcpu->id, KVM_GET_LAPIC, &xapic);
+ vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
*((u32 *)&xapic.regs[APIC_IRR]) = val;
*((u32 *)&xapic.regs[APIC_IRR + 0x10]) = val >> 32;
- vcpu_ioctl(vm, vcpu->id, KVM_SET_LAPIC, &xapic);
+ vcpu_ioctl(vcpu, KVM_SET_LAPIC, &xapic);
- vcpu_run(vm, vcpu->id);
- ASSERT_EQ(get_ucall(vm, vcpu->id, &uc), UCALL_SYNC);
+ vcpu_run(vcpu);
+ ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC);
ASSERT_EQ(uc.args[1], val);
- vcpu_ioctl(vm, vcpu->id, KVM_GET_LAPIC, &xapic);
+ vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
icr = (u64)(*((u32 *)&xapic.regs[APIC_ICR])) |
(u64)(*((u32 *)&xapic.regs[APIC_ICR2])) << 32;
- if (!vcpu->is_x2apic)
+ if (!x->is_x2apic) {
val &= (-1u | (0xffull << (32 + 24)));
- ASSERT_EQ(icr, val & ~APIC_ICR_BUSY);
+ ASSERT_EQ(icr, val & ~APIC_ICR_BUSY);
+ } else {
+ ASSERT_EQ(icr & ~APIC_ICR_BUSY, val & ~APIC_ICR_BUSY);
+ }
}
-static void __test_icr(struct kvm_vm *vm, struct kvm_vcpu *vcpu, uint64_t val)
+#define X2APIC_RSVED_BITS_MASK (GENMASK_ULL(31,20) | \
+ GENMASK_ULL(17,16) | \
+ GENMASK_ULL(13,13))
+
+static void __test_icr(struct xapic_vcpu *x, uint64_t val)
{
- ____test_icr(vm, vcpu, val | APIC_ICR_BUSY);
- ____test_icr(vm, vcpu, val & ~(u64)APIC_ICR_BUSY);
+ if (x->is_x2apic) {
+ /* Hardware writing vICR register requires reserved bits 31:20,
+ * 17:16 and 13 kept as zero to avoid #GP exception. Data value
+ * written to vICR should mask out those bits above.
+ */
+ val &= ~X2APIC_RSVED_BITS_MASK;
+ }
+ ____test_icr(x, val | APIC_ICR_BUSY);
+ ____test_icr(x, val & ~(u64)APIC_ICR_BUSY);
}
-static void test_icr(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
+static void test_icr(struct xapic_vcpu *x)
{
+ struct kvm_vcpu *vcpu = x->vcpu;
uint64_t icr, i, j;
icr = APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_FIXED;
for (i = 0; i <= 0xff; i++)
- __test_icr(vm, vcpu, icr | i);
+ __test_icr(x, icr | i);
icr = APIC_INT_ASSERT | APIC_DM_FIXED;
for (i = 0; i <= 0xff; i++)
- __test_icr(vm, vcpu, icr | i);
+ __test_icr(x, icr | i);
/*
* Send all flavors of IPIs to non-existent vCPUs. TODO: use number of
@@ -100,32 +116,30 @@ static void test_icr(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
icr = APIC_INT_ASSERT | 0xff;
for (i = vcpu->id + 1; i < 0xff; i++) {
for (j = 0; j < 8; j++)
- __test_icr(vm, vcpu, i << (32 + 24) | APIC_INT_ASSERT | (j << 8));
+ __test_icr(x, i << (32 + 24) | icr | (j << 8));
}
/* And again with a shorthand destination for all types of IPIs. */
icr = APIC_DEST_ALLBUT | APIC_INT_ASSERT;
for (i = 0; i < 8; i++)
- __test_icr(vm, vcpu, icr | (i << 8));
+ __test_icr(x, icr | (i << 8));
/* And a few garbage value, just make sure it's an IRQ (blocked). */
- __test_icr(vm, vcpu, 0xa5a5a5a5a5a5a5a5 & ~APIC_DM_FIXED_MASK);
- __test_icr(vm, vcpu, 0x5a5a5a5a5a5a5a5a & ~APIC_DM_FIXED_MASK);
- __test_icr(vm, vcpu, -1ull & ~APIC_DM_FIXED_MASK);
+ __test_icr(x, 0xa5a5a5a5a5a5a5a5 & ~APIC_DM_FIXED_MASK);
+ __test_icr(x, 0x5a5a5a5a5a5a5a5a & ~APIC_DM_FIXED_MASK);
+ __test_icr(x, -1ull & ~APIC_DM_FIXED_MASK);
}
int main(int argc, char *argv[])
{
- struct kvm_vcpu vcpu = {
- .id = 0,
+ struct xapic_vcpu x = {
+ .vcpu = NULL,
.is_x2apic = true,
};
- struct kvm_cpuid2 *cpuid;
struct kvm_vm *vm;
- int i;
- vm = vm_create_default(vcpu.id, 0, x2apic_guest_code);
- test_icr(vm, &vcpu);
+ vm = vm_create_with_one_vcpu(&x.vcpu, x2apic_guest_code);
+ test_icr(&x);
kvm_vm_free(vm);
/*
@@ -133,18 +147,12 @@ int main(int argc, char *argv[])
* the guest in order to test AVIC. KVM disallows changing CPUID after
* KVM_RUN and AVIC is disabled if _any_ vCPU is allowed to use x2APIC.
*/
- vm = vm_create_default(vcpu.id, 0, xapic_guest_code);
- vcpu.is_x2apic = false;
+ vm = vm_create_with_one_vcpu(&x.vcpu, xapic_guest_code);
+ x.is_x2apic = false;
- cpuid = vcpu_get_cpuid(vm, vcpu.id);
- for (i = 0; i < cpuid->nent; i++) {
- if (cpuid->entries[i].function == 1)
- break;
- }
- cpuid->entries[i].ecx &= ~BIT(21);
- vcpu_set_cpuid(vm, vcpu.id, cpuid);
+ vcpu_clear_cpuid_feature(x.vcpu, X86_FEATURE_X2APIC);
virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
- test_icr(vm, &vcpu);
+ test_icr(&x);
kvm_vm_free(vm);
}
diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
index 7a51bb648fbb..8a5cb800f50e 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
@@ -18,8 +18,6 @@
#include <sys/eventfd.h>
-#define VCPU_ID 5
-
#define SHINFO_REGION_GVA 0xc0000000ULL
#define SHINFO_REGION_GPA 0xc0000000ULL
#define SHINFO_REGION_SLOT 10
@@ -42,8 +40,6 @@
#define EVTCHN_TEST2 66
#define EVTCHN_TIMER 13
-static struct kvm_vm *vm;
-
#define XEN_HYPERCALL_MSR 0x40000000
#define MIN_STEAL_TIME 50000
@@ -344,29 +340,29 @@ static int cmp_timespec(struct timespec *a, struct timespec *b)
else
return 0;
}
-struct vcpu_info *vinfo;
+
+static struct vcpu_info *vinfo;
+static struct kvm_vcpu *vcpu;
static void handle_alrm(int sig)
{
if (vinfo)
printf("evtchn_upcall_pending 0x%x\n", vinfo->evtchn_upcall_pending);
- vcpu_dump(stdout, vm, VCPU_ID, 0);
+ vcpu_dump(stdout, vcpu, 0);
TEST_FAIL("IRQ delivery timed out");
}
int main(int argc, char *argv[])
{
struct timespec min_ts, max_ts, vm_ts;
+ struct kvm_vm *vm;
bool verbose;
verbose = argc > 1 && (!strncmp(argv[1], "-v", 3) ||
!strncmp(argv[1], "--verbose", 10));
int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
- if (!(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO) ) {
- print_skip("KVM_XEN_HVM_CONFIG_SHARED_INFO not available");
- exit(KSFT_SKIP);
- }
+ TEST_REQUIRE(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO);
bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE);
bool do_eventfd_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL);
@@ -374,8 +370,7 @@ int main(int argc, char *argv[])
clock_gettime(CLOCK_REALTIME, &min_ts);
- vm = vm_create_default(VCPU_ID, 0, (void *) guest_code);
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
/* Map a region for the shared_info page */
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
@@ -425,13 +420,13 @@ int main(int argc, char *argv[])
.type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO,
.u.gpa = VCPU_INFO_ADDR,
};
- vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &vi);
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &vi);
struct kvm_xen_vcpu_attr pvclock = {
.type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
.u.gpa = PVTIME_ADDR,
};
- vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &pvclock);
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &pvclock);
struct kvm_xen_hvm_attr vec = {
.type = KVM_XEN_ATTR_TYPE_UPCALL_VECTOR,
@@ -440,7 +435,7 @@ int main(int argc, char *argv[])
vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &vec);
vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vm, VCPU_ID);
+ vcpu_init_descriptor_tables(vcpu);
vm_install_exception_handler(vm, EVTCHN_VECTOR, evtchn_handler);
if (do_runstate_tests) {
@@ -448,7 +443,7 @@ int main(int argc, char *argv[])
.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
.u.gpa = RUNSTATE_ADDR,
};
- vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &st);
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &st);
}
int irq_fd[2] = { -1, -1 };
@@ -468,16 +463,16 @@ int main(int argc, char *argv[])
irq_routes.entries[0].gsi = 32;
irq_routes.entries[0].type = KVM_IRQ_ROUTING_XEN_EVTCHN;
irq_routes.entries[0].u.xen_evtchn.port = EVTCHN_TEST1;
- irq_routes.entries[0].u.xen_evtchn.vcpu = VCPU_ID;
+ irq_routes.entries[0].u.xen_evtchn.vcpu = vcpu->id;
irq_routes.entries[0].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
irq_routes.entries[1].gsi = 33;
irq_routes.entries[1].type = KVM_IRQ_ROUTING_XEN_EVTCHN;
irq_routes.entries[1].u.xen_evtchn.port = EVTCHN_TEST2;
- irq_routes.entries[1].u.xen_evtchn.vcpu = VCPU_ID;
+ irq_routes.entries[1].u.xen_evtchn.vcpu = vcpu->id;
irq_routes.entries[1].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
- vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes);
+ vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes.info);
struct kvm_irqfd ifd = { };
@@ -508,14 +503,14 @@ int main(int argc, char *argv[])
.u.evtchn.type = EVTCHNSTAT_interdomain,
.u.evtchn.flags = 0,
.u.evtchn.deliver.port.port = EVTCHN_TEST1,
- .u.evtchn.deliver.port.vcpu = VCPU_ID + 1,
+ .u.evtchn.deliver.port.vcpu = vcpu->id + 1,
.u.evtchn.deliver.port.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
};
vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj);
/* Test migration to a different vCPU */
inj.u.evtchn.flags = KVM_XEN_EVTCHN_UPDATE;
- inj.u.evtchn.deliver.port.vcpu = VCPU_ID;
+ inj.u.evtchn.deliver.port.vcpu = vcpu->id;
vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj);
inj.u.evtchn.send_port = 197;
@@ -524,7 +519,7 @@ int main(int argc, char *argv[])
inj.u.evtchn.flags = 0;
vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj);
- vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &tmr);
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
}
vinfo = addr_gpa2hva(vm, VCPU_INFO_VADDR);
vinfo->evtchn_upcall_pending = 0;
@@ -535,19 +530,19 @@ int main(int argc, char *argv[])
bool evtchn_irq_expected = false;
for (;;) {
- volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ volatile struct kvm_run *run = vcpu->run;
struct ucall uc;
- vcpu_run(vm, VCPU_ID);
+ vcpu_run(vcpu);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
"Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
run->exit_reason,
exit_reason_str(run->exit_reason));
- switch (get_ucall(vm, VCPU_ID, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
- TEST_FAIL("%s", (const char *)uc.args[0]);
+ REPORT_GUEST_ASSERT(uc);
/* NOT REACHED */
case UCALL_SYNC: {
struct kvm_xen_vcpu_attr rst;
@@ -574,7 +569,7 @@ int main(int argc, char *argv[])
printf("Testing runstate %s\n", runstate_names[uc.args[1]]);
rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT;
rst.u.runstate.state = uc.args[1];
- vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst);
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst);
break;
case 4:
@@ -589,7 +584,7 @@ int main(int argc, char *argv[])
0x6b6b - rs->time[RUNSTATE_offline];
rst.u.runstate.time_runnable = -rst.u.runstate.time_blocked -
rst.u.runstate.time_offline;
- vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst);
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst);
break;
case 5:
@@ -601,7 +596,7 @@ int main(int argc, char *argv[])
rst.u.runstate.state_entry_time = 0x6b6b + 0x5a;
rst.u.runstate.time_blocked = 0x6b6b;
rst.u.runstate.time_offline = 0x5a;
- vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst);
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst);
break;
case 6:
@@ -660,7 +655,7 @@ int main(int argc, char *argv[])
struct kvm_irq_routing_xen_evtchn e;
e.port = EVTCHN_TEST2;
- e.vcpu = VCPU_ID;
+ e.vcpu = vcpu->id;
e.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
vm_ioctl(vm, KVM_XEN_HVM_EVTCHN_SEND, &e);
@@ -702,7 +697,7 @@ int main(int argc, char *argv[])
case 14:
memset(&tmr, 0, sizeof(tmr));
tmr.type = KVM_XEN_VCPU_ATTR_TYPE_TIMER;
- vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_GET_ATTR, &tmr);
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
TEST_ASSERT(tmr.u.timer.port == EVTCHN_TIMER,
"Timer port not returned");
TEST_ASSERT(tmr.u.timer.priority == KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
@@ -721,8 +716,8 @@ int main(int argc, char *argv[])
if (verbose)
printf("Testing restored oneshot timer\n");
- tmr.u.timer.expires_ns = rs->state_entry_time + 100000000,
- vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &tmr);
+ tmr.u.timer.expires_ns = rs->state_entry_time + 100000000;
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
evtchn_irq_expected = true;
alarm(1);
break;
@@ -748,8 +743,8 @@ int main(int argc, char *argv[])
if (verbose)
printf("Testing SCHEDOP_poll wake on masked event\n");
- tmr.u.timer.expires_ns = rs->state_entry_time + 100000000,
- vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &tmr);
+ tmr.u.timer.expires_ns = rs->state_entry_time + 100000000;
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
alarm(1);
break;
@@ -760,11 +755,11 @@ int main(int argc, char *argv[])
evtchn_irq_expected = true;
tmr.u.timer.expires_ns = rs->state_entry_time + 100000000;
- vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &tmr);
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
/* Read it back and check the pending time is reported correctly */
tmr.u.timer.expires_ns = 0;
- vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_GET_ATTR, &tmr);
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
TEST_ASSERT(tmr.u.timer.expires_ns == rs->state_entry_time + 100000000,
"Timer not reported pending");
alarm(1);
@@ -774,7 +769,7 @@ int main(int argc, char *argv[])
TEST_ASSERT(!evtchn_irq_expected,
"Expected event channel IRQ but it didn't happen");
/* Read timer and check it is no longer pending */
- vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_GET_ATTR, &tmr);
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
TEST_ASSERT(!tmr.u.timer.expires_ns, "Timer still reported pending");
shinfo->evtchn_pending[0] = 0;
@@ -783,7 +778,7 @@ int main(int argc, char *argv[])
evtchn_irq_expected = true;
tmr.u.timer.expires_ns = rs->state_entry_time - 100000000ULL;
- vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &tmr);
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
alarm(1);
break;
@@ -853,7 +848,7 @@ int main(int argc, char *argv[])
struct kvm_xen_vcpu_attr rst = {
.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA,
};
- vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_GET_ATTR, &rst);
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &rst);
if (verbose) {
printf("Runstate: %s(%d), entry %" PRIu64 " ns\n",
diff --git a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
index b30fe9de1d4f..88914d48c65e 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
@@ -11,13 +11,9 @@
#include "kvm_util.h"
#include "processor.h"
-#define VCPU_ID 5
-
#define HCALL_REGION_GPA 0xc0000000ULL
#define HCALL_REGION_SLOT 10
-static struct kvm_vm *vm;
-
#define INPUTVALUE 17
#define ARGVALUE(x) (0xdeadbeef5a5a0000UL + x)
#define RETVALUE 0xcafef00dfbfbffffUL
@@ -84,14 +80,15 @@ static void guest_code(void)
int main(int argc, char *argv[])
{
- if (!(kvm_check_cap(KVM_CAP_XEN_HVM) &
- KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL) ) {
- print_skip("KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL not available");
- exit(KSFT_SKIP);
- }
+ unsigned int xen_caps;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
+ TEST_REQUIRE(xen_caps & KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL);
- vm = vm_create_default(VCPU_ID, 0, (void *) guest_code);
- vcpu_set_hv_cpuid(vm, VCPU_ID);
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ vcpu_set_hv_cpuid(vcpu);
struct kvm_xen_hvm_config hvmc = {
.flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
@@ -105,10 +102,10 @@ int main(int argc, char *argv[])
virt_map(vm, HCALL_REGION_GPA, HCALL_REGION_GPA, 2);
for (;;) {
- volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ volatile struct kvm_run *run = vcpu->run;
struct ucall uc;
- vcpu_run(vm, VCPU_ID);
+ vcpu_run(vcpu);
if (run->exit_reason == KVM_EXIT_XEN) {
ASSERT_EQ(run->xen.type, KVM_EXIT_XEN_HCALL);
@@ -130,9 +127,9 @@ int main(int argc, char *argv[])
run->exit_reason,
exit_reason_str(run->exit_reason));
- switch (get_ucall(vm, VCPU_ID, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
- TEST_FAIL("%s", (const char *)uc.args[0]);
+ REPORT_GUEST_ASSERT(uc);
/* NOT REACHED */
case UCALL_SYNC:
break;
diff --git a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c b/tools/testing/selftests/kvm/x86_64/xss_msr_test.c
index 3529376747c2..e0ddf47362e7 100644
--- a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xss_msr_test.c
@@ -12,64 +12,44 @@
#include "kvm_util.h"
#include "vmx.h"
-#define VCPU_ID 1
#define MSR_BITS 64
-#define X86_FEATURE_XSAVES (1<<3)
-
-bool is_supported_msr(u32 msr_index)
-{
- struct kvm_msr_list *list;
- bool found = false;
- int i;
-
- list = kvm_get_msr_index_list();
- for (i = 0; i < list->nmsrs; ++i) {
- if (list->indices[i] == msr_index) {
- found = true;
- break;
- }
- }
-
- free(list);
- return found;
-}
-
int main(int argc, char *argv[])
{
- struct kvm_cpuid_entry2 *entry;
- bool xss_supported = false;
+ bool xss_in_msr_list;
struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
uint64_t xss_val;
int i, r;
/* Create VM */
- vm = vm_create_default(VCPU_ID, 0, 0);
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
- if (kvm_get_cpuid_max_basic() >= 0xd) {
- entry = kvm_get_supported_cpuid_index(0xd, 1);
- xss_supported = entry && !!(entry->eax & X86_FEATURE_XSAVES);
- }
- if (!xss_supported) {
- print_skip("IA32_XSS is not supported by the vCPU");
- exit(KSFT_SKIP);
- }
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVES));
- xss_val = vcpu_get_msr(vm, VCPU_ID, MSR_IA32_XSS);
+ xss_val = vcpu_get_msr(vcpu, MSR_IA32_XSS);
TEST_ASSERT(xss_val == 0,
"MSR_IA32_XSS should be initialized to zero\n");
- vcpu_set_msr(vm, VCPU_ID, MSR_IA32_XSS, xss_val);
+ vcpu_set_msr(vcpu, MSR_IA32_XSS, xss_val);
+
/*
* At present, KVM only supports a guest IA32_XSS value of 0. Verify
* that trying to set the guest IA32_XSS to an unsupported value fails.
* Also, in the future when a non-zero value succeeds check that
- * IA32_XSS is in the KVM_GET_MSR_INDEX_LIST.
+ * IA32_XSS is in the list of MSRs to save/restore.
*/
+ xss_in_msr_list = kvm_msr_is_in_save_restore_list(MSR_IA32_XSS);
for (i = 0; i < MSR_BITS; ++i) {
- r = _vcpu_set_msr(vm, VCPU_ID, MSR_IA32_XSS, 1ull << i);
- TEST_ASSERT(r == 0 || is_supported_msr(MSR_IA32_XSS),
- "IA32_XSS was able to be set, but was not found in KVM_GET_MSR_INDEX_LIST.\n");
+ r = _vcpu_set_msr(vcpu, MSR_IA32_XSS, 1ull << i);
+
+ /*
+ * Setting a list of MSRs returns the entry that "faulted", or
+ * the last entry +1 if all MSRs were successfully written.
+ */
+ TEST_ASSERT(!r || r == 1, KVM_IOCTL_ERROR(KVM_SET_MSRS, r));
+ TEST_ASSERT(r != 1 || xss_in_msr_list,
+ "IA32_XSS was able to be set, but was not in save/restore list");
}
kvm_vm_free(vm);
diff --git a/tools/testing/selftests/landlock/Makefile b/tools/testing/selftests/landlock/Makefile
index 0b0049e133bb..a6959df28eb0 100644
--- a/tools/testing/selftests/landlock/Makefile
+++ b/tools/testing/selftests/landlock/Makefile
@@ -8,17 +8,11 @@ TEST_GEN_PROGS := $(src_test:.c=)
TEST_GEN_PROGS_EXTENDED := true
-KSFT_KHDR_INSTALL := 1
OVERRIDE_TARGETS := 1
include ../lib.mk
-khdr_dir = $(top_srcdir)/usr/include
-
-$(khdr_dir)/linux/landlock.h: khdr
- @:
-
$(OUTPUT)/true: true.c
$(LINK.c) $< $(LDLIBS) -o $@ -static
-$(OUTPUT)/%_test: %_test.c $(khdr_dir)/linux/landlock.h ../kselftest_harness.h common.h
- $(LINK.c) $< $(LDLIBS) -o $@ -lcap -I$(khdr_dir)
+$(OUTPUT)/%_test: %_test.c ../kselftest_harness.h common.h
+ $(LINK.c) $< $(LDLIBS) -o $@ -lcap
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index 1a5cc3cd97ec..947fc72413e9 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -51,45 +51,7 @@ TEST_GEN_PROGS := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS))
TEST_GEN_PROGS_EXTENDED := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS_EXTENDED))
TEST_GEN_FILES := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_FILES))
-ifdef KSFT_KHDR_INSTALL
-top_srcdir ?= ../../../..
-include $(top_srcdir)/scripts/subarch.include
-ARCH ?= $(SUBARCH)
-
-# set default goal to all, so make without a target runs all, even when
-# all isn't the first target in the file.
-.DEFAULT_GOAL := all
-
-# Invoke headers install with --no-builtin-rules to avoid circular
-# dependency in "make kselftest" case. In this case, second level
-# make inherits builtin-rules which will use the rule generate
-# Makefile.o and runs into
-# "Circular Makefile.o <- prepare dependency dropped."
-# and headers_install fails and test compile fails.
-# O= KBUILD_OUTPUT cases don't run into this error, since main Makefile
-# invokes them as sub-makes and --no-builtin-rules is not necessary,
-# but doesn't cause any failures. Keep it simple and use the same
-# flags in both cases.
-# Note that the support to install headers from lib.mk is necessary
-# when test Makefile is run directly with "make -C".
-# When local build is done, headers are installed in the default
-# INSTALL_HDR_PATH usr/include.
-.PHONY: khdr
-.NOTPARALLEL:
-khdr:
-ifndef KSFT_KHDR_INSTALL_DONE
-ifeq (1,$(DEFAULT_INSTALL_HDR_PATH))
- $(MAKE) --no-builtin-rules ARCH=$(ARCH) -C $(top_srcdir) headers_install
-else
- $(MAKE) --no-builtin-rules INSTALL_HDR_PATH=$$OUTPUT/usr \
- ARCH=$(ARCH) -C $(top_srcdir) headers_install
-endif
-endif
-
-all: khdr $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES)
-else
all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES)
-endif
define RUN_TESTS
BASE_DIR="$(selfdir)"; \
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index a29f79618934..892306bdb47d 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -36,4 +36,6 @@ test_unix_oob
gro
ioam6_parser
toeplitz
+tun
cmsg_sender
+unix_connect \ No newline at end of file
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index ddad703ace34..e2dfef8b78a7 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -11,7 +11,7 @@ TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh
TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh
TEST_PROGS += test_vxlan_fdb_changelink.sh so_txtime.sh ipv6_flowlabel.sh
TEST_PROGS += tcp_fastopen_backup_key.sh fcnal-test.sh l2tp.sh traceroute.sh
-TEST_PROGS += fin_ack_lat.sh fib_nexthop_multiprefix.sh fib_nexthops.sh
+TEST_PROGS += fin_ack_lat.sh fib_nexthop_multiprefix.sh fib_nexthops.sh fib_nexthop_nongw.sh
TEST_PROGS += altnames.sh icmp.sh icmp_redirect.sh ip6_gre_headroom.sh
TEST_PROGS += route_localnet.sh
TEST_PROGS += reuseaddr_ports_exhausted.sh
@@ -35,9 +35,12 @@ TEST_PROGS += cmsg_time.sh cmsg_ipv6.sh
TEST_PROGS += srv6_end_dt46_l3vpn_test.sh
TEST_PROGS += srv6_end_dt4_l3vpn_test.sh
TEST_PROGS += srv6_end_dt6_l3vpn_test.sh
+TEST_PROGS += srv6_hencap_red_l3vpn_test.sh
+TEST_PROGS += srv6_hl2encap_red_l2vpn_test.sh
TEST_PROGS += vrf_strict_mode_test.sh
TEST_PROGS += arp_ndisc_evict_nocarrier.sh
TEST_PROGS += ndisc_unsolicited_na_test.sh
+TEST_PROGS += arp_ndisc_untracked_subnets.sh
TEST_PROGS += stress_reuseport_listen.sh
TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh
TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh
@@ -59,10 +62,10 @@ TEST_GEN_FILES += toeplitz
TEST_GEN_FILES += cmsg_sender
TEST_GEN_FILES += stress_reuseport_listen
TEST_PROGS += test_vxlan_vnifiltering.sh
+TEST_GEN_FILES += io_uring_zerocopy_tx
TEST_FILES := settings
-KSFT_KHDR_INSTALL := 1
include ../lib.mk
include bpf/Makefile
diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile
index df341648f818..969620ae9928 100644
--- a/tools/testing/selftests/net/af_unix/Makefile
+++ b/tools/testing/selftests/net/af_unix/Makefile
@@ -1,2 +1,3 @@
-TEST_GEN_PROGS := test_unix_oob
+TEST_GEN_PROGS := test_unix_oob unix_connect
+
include ../../lib.mk
diff --git a/tools/testing/selftests/net/af_unix/unix_connect.c b/tools/testing/selftests/net/af_unix/unix_connect.c
new file mode 100644
index 000000000000..d799fd8f5c7c
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/unix_connect.c
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <sched.h>
+
+#include <stddef.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include "../../kselftest_harness.h"
+
+FIXTURE(unix_connect)
+{
+ int server, client;
+ int family;
+};
+
+FIXTURE_VARIANT(unix_connect)
+{
+ int type;
+ char sun_path[8];
+ int len;
+ int flags;
+ int err;
+};
+
+FIXTURE_VARIANT_ADD(unix_connect, stream_pathname)
+{
+ .type = SOCK_STREAM,
+ .sun_path = "test",
+ .len = 4 + 1,
+ .flags = 0,
+ .err = 0,
+};
+
+FIXTURE_VARIANT_ADD(unix_connect, stream_abstract)
+{
+ .type = SOCK_STREAM,
+ .sun_path = "\0test",
+ .len = 5,
+ .flags = 0,
+ .err = 0,
+};
+
+FIXTURE_VARIANT_ADD(unix_connect, stream_pathname_netns)
+{
+ .type = SOCK_STREAM,
+ .sun_path = "test",
+ .len = 4 + 1,
+ .flags = CLONE_NEWNET,
+ .err = 0,
+};
+
+FIXTURE_VARIANT_ADD(unix_connect, stream_abstract_netns)
+{
+ .type = SOCK_STREAM,
+ .sun_path = "\0test",
+ .len = 5,
+ .flags = CLONE_NEWNET,
+ .err = ECONNREFUSED,
+};
+
+FIXTURE_VARIANT_ADD(unix_connect, dgram_pathname)
+{
+ .type = SOCK_DGRAM,
+ .sun_path = "test",
+ .len = 4 + 1,
+ .flags = 0,
+ .err = 0,
+};
+
+FIXTURE_VARIANT_ADD(unix_connect, dgram_abstract)
+{
+ .type = SOCK_DGRAM,
+ .sun_path = "\0test",
+ .len = 5,
+ .flags = 0,
+ .err = 0,
+};
+
+FIXTURE_VARIANT_ADD(unix_connect, dgram_pathname_netns)
+{
+ .type = SOCK_DGRAM,
+ .sun_path = "test",
+ .len = 4 + 1,
+ .flags = CLONE_NEWNET,
+ .err = 0,
+};
+
+FIXTURE_VARIANT_ADD(unix_connect, dgram_abstract_netns)
+{
+ .type = SOCK_DGRAM,
+ .sun_path = "\0test",
+ .len = 5,
+ .flags = CLONE_NEWNET,
+ .err = ECONNREFUSED,
+};
+
+FIXTURE_SETUP(unix_connect)
+{
+ self->family = AF_UNIX;
+}
+
+FIXTURE_TEARDOWN(unix_connect)
+{
+ close(self->server);
+ close(self->client);
+
+ if (variant->sun_path[0])
+ remove("test");
+}
+
+TEST_F(unix_connect, test)
+{
+ socklen_t addrlen;
+ struct sockaddr_un addr = {
+ .sun_family = self->family,
+ };
+ int err;
+
+ self->server = socket(self->family, variant->type, 0);
+ ASSERT_NE(-1, self->server);
+
+ addrlen = offsetof(struct sockaddr_un, sun_path) + variant->len;
+ memcpy(&addr.sun_path, variant->sun_path, variant->len);
+
+ err = bind(self->server, (struct sockaddr *)&addr, addrlen);
+ ASSERT_EQ(0, err);
+
+ if (variant->type == SOCK_STREAM) {
+ err = listen(self->server, 32);
+ ASSERT_EQ(0, err);
+ }
+
+ err = unshare(variant->flags);
+ ASSERT_EQ(0, err);
+
+ self->client = socket(self->family, variant->type, 0);
+ ASSERT_LT(0, self->client);
+
+ err = connect(self->client, (struct sockaddr *)&addr, addrlen);
+ ASSERT_EQ(variant->err, err == -1 ? errno : 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh b/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh
new file mode 100755
index 000000000000..c899b446acb6
--- /dev/null
+++ b/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh
@@ -0,0 +1,308 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# 2 namespaces: one host and one router. Use arping from the host to send a
+# garp to the router. Router accepts or ignores based on its arp_accept
+# or accept_untracked_na configuration.
+
+TESTS="arp ndisc"
+
+ROUTER_NS="ns-router"
+ROUTER_NS_V6="ns-router-v6"
+ROUTER_INTF="veth-router"
+ROUTER_ADDR="10.0.10.1"
+ROUTER_ADDR_V6="2001:db8:abcd:0012::1"
+
+HOST_NS="ns-host"
+HOST_NS_V6="ns-host-v6"
+HOST_INTF="veth-host"
+HOST_ADDR="10.0.10.2"
+HOST_ADDR_V6="2001:db8:abcd:0012::2"
+
+SUBNET_WIDTH=24
+PREFIX_WIDTH_V6=64
+
+cleanup() {
+ ip netns del ${HOST_NS}
+ ip netns del ${ROUTER_NS}
+}
+
+cleanup_v6() {
+ ip netns del ${HOST_NS_V6}
+ ip netns del ${ROUTER_NS_V6}
+}
+
+setup() {
+ set -e
+ local arp_accept=$1
+
+ # Set up two namespaces
+ ip netns add ${ROUTER_NS}
+ ip netns add ${HOST_NS}
+
+ # Set up interfaces veth0 and veth1, which are pairs in separate
+ # namespaces. veth0 is veth-router, veth1 is veth-host.
+ # first, set up the inteface's link to the namespace
+ # then, set the interface "up"
+ ip netns exec ${ROUTER_NS} ip link add name ${ROUTER_INTF} \
+ type veth peer name ${HOST_INTF}
+
+ ip netns exec ${ROUTER_NS} ip link set dev ${ROUTER_INTF} up
+ ip netns exec ${ROUTER_NS} ip link set dev ${HOST_INTF} netns ${HOST_NS}
+
+ ip netns exec ${HOST_NS} ip link set dev ${HOST_INTF} up
+ ip netns exec ${ROUTER_NS} ip addr add ${ROUTER_ADDR}/${SUBNET_WIDTH} \
+ dev ${ROUTER_INTF}
+
+ ip netns exec ${HOST_NS} ip addr add ${HOST_ADDR}/${SUBNET_WIDTH} \
+ dev ${HOST_INTF}
+ ip netns exec ${HOST_NS} ip route add default via ${HOST_ADDR} \
+ dev ${HOST_INTF}
+ ip netns exec ${ROUTER_NS} ip route add default via ${ROUTER_ADDR} \
+ dev ${ROUTER_INTF}
+
+ ROUTER_CONF=net.ipv4.conf.${ROUTER_INTF}
+ ip netns exec ${ROUTER_NS} sysctl -w \
+ ${ROUTER_CONF}.arp_accept=${arp_accept} >/dev/null 2>&1
+ set +e
+}
+
+setup_v6() {
+ set -e
+ local accept_untracked_na=$1
+
+ # Set up two namespaces
+ ip netns add ${ROUTER_NS_V6}
+ ip netns add ${HOST_NS_V6}
+
+ # Set up interfaces veth0 and veth1, which are pairs in separate
+ # namespaces. veth0 is veth-router, veth1 is veth-host.
+ # first, set up the inteface's link to the namespace
+ # then, set the interface "up"
+ ip -6 -netns ${ROUTER_NS_V6} link add name ${ROUTER_INTF} \
+ type veth peer name ${HOST_INTF}
+
+ ip -6 -netns ${ROUTER_NS_V6} link set dev ${ROUTER_INTF} up
+ ip -6 -netns ${ROUTER_NS_V6} link set dev ${HOST_INTF} netns \
+ ${HOST_NS_V6}
+
+ ip -6 -netns ${HOST_NS_V6} link set dev ${HOST_INTF} up
+ ip -6 -netns ${ROUTER_NS_V6} addr add \
+ ${ROUTER_ADDR_V6}/${PREFIX_WIDTH_V6} dev ${ROUTER_INTF} nodad
+
+ HOST_CONF=net.ipv6.conf.${HOST_INTF}
+ ip netns exec ${HOST_NS_V6} sysctl -qw ${HOST_CONF}.ndisc_notify=1
+ ip netns exec ${HOST_NS_V6} sysctl -qw ${HOST_CONF}.disable_ipv6=0
+ ip -6 -netns ${HOST_NS_V6} addr add ${HOST_ADDR_V6}/${PREFIX_WIDTH_V6} \
+ dev ${HOST_INTF}
+
+ ROUTER_CONF=net.ipv6.conf.${ROUTER_INTF}
+
+ ip netns exec ${ROUTER_NS_V6} sysctl -w \
+ ${ROUTER_CONF}.forwarding=1 >/dev/null 2>&1
+ ip netns exec ${ROUTER_NS_V6} sysctl -w \
+ ${ROUTER_CONF}.drop_unsolicited_na=0 >/dev/null 2>&1
+ ip netns exec ${ROUTER_NS_V6} sysctl -w \
+ ${ROUTER_CONF}.accept_untracked_na=${accept_untracked_na} \
+ >/dev/null 2>&1
+ set +e
+}
+
+verify_arp() {
+ local arp_accept=$1
+ local same_subnet=$2
+
+ neigh_show_output=$(ip netns exec ${ROUTER_NS} ip neigh get \
+ ${HOST_ADDR} dev ${ROUTER_INTF} 2>/dev/null)
+
+ if [ ${arp_accept} -eq 1 ]; then
+ # Neighbor entries expected
+ [[ ${neigh_show_output} ]]
+ elif [ ${arp_accept} -eq 2 ]; then
+ if [ ${same_subnet} -eq 1 ]; then
+ # Neighbor entries expected
+ [[ ${neigh_show_output} ]]
+ else
+ [[ -z "${neigh_show_output}" ]]
+ fi
+ else
+ [[ -z "${neigh_show_output}" ]]
+ fi
+ }
+
+arp_test_gratuitous() {
+ set -e
+ local arp_accept=$1
+ local same_subnet=$2
+
+ if [ ${arp_accept} -eq 2 ]; then
+ test_msg=("test_arp: "
+ "accept_arp=$1 "
+ "same_subnet=$2")
+ if [ ${same_subnet} -eq 0 ]; then
+ HOST_ADDR=10.0.11.3
+ else
+ HOST_ADDR=10.0.10.3
+ fi
+ else
+ test_msg=("test_arp: "
+ "accept_arp=$1")
+ fi
+ # Supply arp_accept option to set up which sets it in sysctl
+ setup ${arp_accept}
+ ip netns exec ${HOST_NS} arping -A -U ${HOST_ADDR} -c1 2>&1 >/dev/null
+
+ if verify_arp $1 $2; then
+ printf " TEST: %-60s [ OK ]\n" "${test_msg[*]}"
+ else
+ printf " TEST: %-60s [FAIL]\n" "${test_msg[*]}"
+ fi
+ cleanup
+ set +e
+}
+
+arp_test_gratuitous_combinations() {
+ arp_test_gratuitous 0
+ arp_test_gratuitous 1
+ arp_test_gratuitous 2 0 # Second entry indicates subnet or not
+ arp_test_gratuitous 2 1
+}
+
+cleanup_tcpdump() {
+ set -e
+ [[ ! -z ${tcpdump_stdout} ]] && rm -f ${tcpdump_stdout}
+ [[ ! -z ${tcpdump_stderr} ]] && rm -f ${tcpdump_stderr}
+ tcpdump_stdout=
+ tcpdump_stderr=
+ set +e
+}
+
+start_tcpdump() {
+ set -e
+ tcpdump_stdout=`mktemp`
+ tcpdump_stderr=`mktemp`
+ ip netns exec ${ROUTER_NS_V6} timeout 15s \
+ tcpdump --immediate-mode -tpni ${ROUTER_INTF} -c 1 \
+ "icmp6 && icmp6[0] == 136 && src ${HOST_ADDR_V6}" \
+ > ${tcpdump_stdout} 2> /dev/null
+ set +e
+}
+
+verify_ndisc() {
+ local accept_untracked_na=$1
+ local same_subnet=$2
+
+ neigh_show_output=$(ip -6 -netns ${ROUTER_NS_V6} neigh show \
+ to ${HOST_ADDR_V6} dev ${ROUTER_INTF} nud stale)
+
+ if [ ${accept_untracked_na} -eq 1 ]; then
+ # Neighbour entry expected to be present
+ [[ ${neigh_show_output} ]]
+ elif [ ${accept_untracked_na} -eq 2 ]; then
+ if [ ${same_subnet} -eq 1 ]; then
+ [[ ${neigh_show_output} ]]
+ else
+ [[ -z "${neigh_show_output}" ]]
+ fi
+ else
+ # Neighbour entry expected to be absent for all other cases
+ [[ -z "${neigh_show_output}" ]]
+ fi
+}
+
+ndisc_test_untracked_advertisements() {
+ set -e
+ test_msg=("test_ndisc: "
+ "accept_untracked_na=$1")
+
+ local accept_untracked_na=$1
+ local same_subnet=$2
+ if [ ${accept_untracked_na} -eq 2 ]; then
+ test_msg=("test_ndisc: "
+ "accept_untracked_na=$1 "
+ "same_subnet=$2")
+ if [ ${same_subnet} -eq 0 ]; then
+ # Not same subnet
+ HOST_ADDR_V6=2000:db8:abcd:0013::4
+ else
+ HOST_ADDR_V6=2001:db8:abcd:0012::3
+ fi
+ fi
+ setup_v6 $1 $2
+ start_tcpdump
+
+ if verify_ndisc $1 $2; then
+ printf " TEST: %-60s [ OK ]\n" "${test_msg[*]}"
+ else
+ printf " TEST: %-60s [FAIL]\n" "${test_msg[*]}"
+ fi
+
+ cleanup_tcpdump
+ cleanup_v6
+ set +e
+}
+
+ndisc_test_untracked_combinations() {
+ ndisc_test_untracked_advertisements 0
+ ndisc_test_untracked_advertisements 1
+ ndisc_test_untracked_advertisements 2 0
+ ndisc_test_untracked_advertisements 2 1
+}
+
+################################################################################
+# usage
+
+usage()
+{
+ cat <<EOF
+usage: ${0##*/} OPTS
+
+ -t <test> Test(s) to run (default: all)
+ (options: $TESTS)
+EOF
+}
+
+################################################################################
+# main
+
+while getopts ":t:h" opt; do
+ case $opt in
+ t) TESTS=$OPTARG;;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit $ksft_skip;
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v tcpdump)" ]; then
+ echo "SKIP: Could not run test without tcpdump tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v arping)" ]; then
+ echo "SKIP: Could not run test without arping tool"
+ exit $ksft_skip
+fi
+
+# start clean
+cleanup &> /dev/null
+cleanup_v6 &> /dev/null
+
+for t in $TESTS
+do
+ case $t in
+ arp_test_gratuitous_combinations|arp) arp_test_gratuitous_combinations;;
+ ndisc_test_untracked_combinations|ndisc) \
+ ndisc_test_untracked_combinations;;
+ help) echo "Test names: $TESTS"; exit 0;;
+esac
+done
diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c
index bc2162909a1a..75dd83e39207 100644
--- a/tools/testing/selftests/net/cmsg_sender.c
+++ b/tools/testing/selftests/net/cmsg_sender.c
@@ -456,7 +456,7 @@ int main(int argc, char *argv[])
buf[1] = 0;
} else if (opt.sock.type == SOCK_RAW) {
struct udphdr hdr = { 1, 2, htons(opt.size), 0 };
- struct sockaddr_in6 *sin6 = (void *)ai->ai_addr;;
+ struct sockaddr_in6 *sin6 = (void *)ai->ai_addr;
memcpy(buf, &hdr, sizeof(hdr));
sin6->sin6_port = htons(opt.sock.proto);
diff --git a/tools/testing/selftests/net/fib_nexthop_nongw.sh b/tools/testing/selftests/net/fib_nexthop_nongw.sh
new file mode 100755
index 000000000000..b7b928b38ce4
--- /dev/null
+++ b/tools/testing/selftests/net/fib_nexthop_nongw.sh
@@ -0,0 +1,119 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# ns: h1 | ns: h2
+# 192.168.0.1/24 |
+# eth0 |
+# | 192.168.1.1/32
+# veth0 <---|---> veth1
+# Validate source address selection for route without gateway
+
+PAUSE_ON_FAIL=no
+VERBOSE=0
+ret=0
+
+################################################################################
+# helpers
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ printf "TEST: %-60s [ OK ]\n" "${msg}"
+ nsuccess=$((nsuccess+1))
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+
+ [ "$VERBOSE" = "1" ] && echo
+}
+
+run_cmd()
+{
+ local cmd="$*"
+ local out
+ local rc
+
+ if [ "$VERBOSE" = "1" ]; then
+ echo "COMMAND: $cmd"
+ fi
+
+ out=$(eval $cmd 2>&1)
+ rc=$?
+ if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+ echo "$out"
+ fi
+
+ [ "$VERBOSE" = "1" ] && echo
+
+ return $rc
+}
+
+################################################################################
+# config
+setup()
+{
+ ip netns add h1
+ ip -n h1 link set lo up
+ ip netns add h2
+ ip -n h2 link set lo up
+
+ # Add a fake eth0 to support an ip address
+ ip -n h1 link add name eth0 type dummy
+ ip -n h1 link set eth0 up
+ ip -n h1 address add 192.168.0.1/24 dev eth0
+
+ # Configure veths (same @mac, arp off)
+ ip -n h1 link add name veth0 type veth peer name veth1 netns h2
+ ip -n h1 link set veth0 up
+
+ ip -n h2 link set veth1 up
+
+ # Configure @IP in the peer netns
+ ip -n h2 address add 192.168.1.1/32 dev veth1
+ ip -n h2 route add default dev veth1
+
+ # Add a nexthop without @gw and use it in a route
+ ip -n h1 nexthop add id 1 dev veth0
+ ip -n h1 route add 192.168.1.1 nhid 1
+}
+
+cleanup()
+{
+ ip netns del h1 2>/dev/null
+ ip netns del h2 2>/dev/null
+}
+
+trap cleanup EXIT
+
+################################################################################
+# main
+
+while getopts :pv o
+do
+ case $o in
+ p) PAUSE_ON_FAIL=yes;;
+ v) VERBOSE=1;;
+ esac
+done
+
+cleanup
+setup
+
+run_cmd ip -netns h1 route get 192.168.1.1
+log_test $? 0 "nexthop: get route with nexthop without gw"
+run_cmd ip netns exec h1 ping -c1 192.168.1.1
+log_test $? 0 "nexthop: ping through nexthop without gw"
+
+exit $ret
diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
index bbe3b379927a..c245476fa29d 100755
--- a/tools/testing/selftests/net/fib_rule_tests.sh
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -303,6 +303,29 @@ run_fibrule_tests()
log_section "IPv6 fib rule"
fib_rule6_test
}
+################################################################################
+# usage
+
+usage()
+{
+ cat <<EOF
+usage: ${0##*/} OPTS
+
+ -t <test> Test(s) to run (default: all)
+ (options: $TESTS)
+EOF
+}
+
+################################################################################
+# main
+
+while getopts ":t:h" opt; do
+ case $opt in
+ t) TESTS=$OPTARG;;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
if [ "$(id -u)" -ne 0 ];then
echo "SKIP: Need root privileges"
diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
index 8f481218a492..a9c5c1be5088 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -3,6 +3,7 @@
TEST_PROGS = bridge_igmp.sh \
bridge_locked_port.sh \
bridge_mdb.sh \
+ bridge_mdb_port_down.sh \
bridge_mld.sh \
bridge_port_isolation.sh \
bridge_sticky_fdb.sh \
@@ -37,6 +38,7 @@ TEST_PROGS = bridge_igmp.sh \
ipip_hier_gre_key.sh \
ipip_hier_gre_keys.sh \
ipip_hier_gre.sh \
+ local_termination.sh \
loopback.sh \
mirror_gre_bound.sh \
mirror_gre_bridge_1d.sh \
@@ -52,6 +54,7 @@ TEST_PROGS = bridge_igmp.sh \
mirror_gre_vlan_bridge_1q.sh \
mirror_gre_vlan.sh \
mirror_vlan.sh \
+ no_forwarding.sh \
pedit_dsfield.sh \
pedit_ip.sh \
pedit_l4port.sh \
diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb_port_down.sh b/tools/testing/selftests/net/forwarding/bridge_mdb_port_down.sh
new file mode 100755
index 000000000000..1a0480e71d83
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb_port_down.sh
@@ -0,0 +1,118 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Verify that permanent mdb entries can be added to and deleted from bridge
+# interfaces that are down, and works correctly when done so.
+
+ALL_TESTS="add_del_to_port_down"
+NUM_NETIFS=4
+
+TEST_GROUP="239.10.10.10"
+TEST_GROUP_MAC="01:00:5e:0a:0a:0a"
+
+source lib.sh
+
+
+add_del_to_port_down() {
+ RET=0
+
+ ip link set dev $swp2 down
+ bridge mdb add dev br0 port "$swp2" grp $TEST_GROUP permanent 2>/dev/null
+ check_err $? "Failed adding mdb entry"
+
+ ip link set dev $swp2 up
+ setup_wait_dev $swp2
+ mcast_packet_test $TEST_GROUP_MAC 192.0.2.1 $TEST_GROUP $h1 $h2
+ check_fail $? "Traffic to $TEST_GROUP wasn't forwarded"
+
+ ip link set dev $swp2 down
+ bridge mdb show dev br0 | grep -q "$TEST_GROUP permanent" 2>/dev/null
+ check_err $? "MDB entry did not persist after link up/down"
+
+ bridge mdb del dev br0 port "$swp2" grp $TEST_GROUP 2>/dev/null
+ check_err $? "Failed deleting mdb entry"
+
+ ip link set dev $swp2 up
+ setup_wait_dev $swp2
+ mcast_packet_test $TEST_GROUP_MAC 192.0.2.1 $TEST_GROUP $h1 $h2
+ check_err $? "Traffic to $TEST_GROUP was forwarded after entry removed"
+
+ log_test "MDB add/del entry to port with state down "
+}
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+switch_create()
+{
+ # Enable multicast filtering
+ ip link add dev br0 type bridge mcast_snooping 1 mcast_querier 1
+
+ ip link set dev $swp1 master br0
+ ip link set dev $swp2 master br0
+
+ ip link set dev br0 up
+ ip link set dev $swp1 up
+
+ bridge link set dev $swp2 mcast_flood off
+ # Bridge currently has a "grace time" at creation time before it
+ # forwards multicast according to the mdb. Since we disable the
+ # mcast_flood setting per port
+ sleep 10
+}
+
+switch_destroy()
+{
+ ip link set dev $swp1 down
+ ip link set dev $swp2 down
+ ip link del dev br0
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h1_destroy
+ h2_destroy
+
+ vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+tests_run
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh b/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh
index 4b42dfd4efd1..072faa77f53b 100755
--- a/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh
+++ b/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh
@@ -11,6 +11,8 @@ NUM_NETIFS=2
source lib.sh
source ethtool_lib.sh
+TIMEOUT=$((WAIT_TIMEOUT * 1000)) # ms
+
setup_prepare()
{
swp1=${NETIFS[p1]}
@@ -18,7 +20,7 @@ setup_prepare()
swp3=$NETIF_NO_CABLE
}
-ethtool_extended_state_check()
+ethtool_ext_state()
{
local dev=$1; shift
local expected_ext_state=$1; shift
@@ -30,21 +32,27 @@ ethtool_extended_state_check()
| sed -e 's/^[[:space:]]*//')
ext_state=$(echo $ext_state | cut -d "," -f1)
- [[ $ext_state == $expected_ext_state ]]
- check_err $? "Expected \"$expected_ext_state\", got \"$ext_state\""
-
- [[ $ext_substate == $expected_ext_substate ]]
- check_err $? "Expected \"$expected_ext_substate\", got \"$ext_substate\""
+ if [[ $ext_state != $expected_ext_state ]]; then
+ echo "Expected \"$expected_ext_state\", got \"$ext_state\""
+ return 1
+ fi
+ if [[ $ext_substate != $expected_ext_substate ]]; then
+ echo "Expected \"$expected_ext_substate\", got \"$ext_substate\""
+ return 1
+ fi
}
autoneg()
{
+ local msg
+
RET=0
ip link set dev $swp1 up
- sleep 4
- ethtool_extended_state_check $swp1 "Autoneg" "No partner detected"
+ msg=$(busywait $TIMEOUT ethtool_ext_state $swp1 \
+ "Autoneg" "No partner detected")
+ check_err $? "$msg"
log_test "Autoneg, No partner detected"
@@ -53,6 +61,8 @@ autoneg()
autoneg_force_mode()
{
+ local msg
+
RET=0
ip link set dev $swp1 up
@@ -65,12 +75,13 @@ autoneg_force_mode()
ethtool_set $swp1 speed $speed1 autoneg off
ethtool_set $swp2 speed $speed2 autoneg off
- sleep 4
- ethtool_extended_state_check $swp1 "Autoneg" \
- "No partner detected during force mode"
+ msg=$(busywait $TIMEOUT ethtool_ext_state $swp1 \
+ "Autoneg" "No partner detected during force mode")
+ check_err $? "$msg"
- ethtool_extended_state_check $swp2 "Autoneg" \
- "No partner detected during force mode"
+ msg=$(busywait $TIMEOUT ethtool_ext_state $swp2 \
+ "Autoneg" "No partner detected during force mode")
+ check_err $? "$msg"
log_test "Autoneg, No partner detected during force mode"
@@ -83,12 +94,14 @@ autoneg_force_mode()
no_cable()
{
+ local msg
+
RET=0
ip link set dev $swp3 up
- sleep 1
- ethtool_extended_state_check $swp3 "No cable"
+ msg=$(busywait $TIMEOUT ethtool_ext_state $swp3 "No cable")
+ check_err $? "$msg"
log_test "No cable"
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 37ae49d47853..3ffb9d6c0950 100755
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -1240,6 +1240,7 @@ learning_test()
# FDB entry was installed.
bridge link set dev $br_port1 flood off
+ ip link set $host1_if promisc on
tc qdisc add dev $host1_if ingress
tc filter add dev $host1_if ingress protocol ip pref 1 handle 101 \
flower dst_mac $mac action drop
@@ -1250,7 +1251,7 @@ learning_test()
tc -j -s filter show dev $host1_if ingress \
| jq -e ".[] | select(.options.handle == 101) \
| select(.options.actions[0].stats.packets == 1)" &> /dev/null
- check_fail $? "Packet reached second host when should not"
+ check_fail $? "Packet reached first host when should not"
$MZ $host1_if -c 1 -p 64 -a $mac -t ip -q
sleep 1
@@ -1289,6 +1290,7 @@ learning_test()
tc filter del dev $host1_if ingress protocol ip pref 1 handle 101 flower
tc qdisc del dev $host1_if ingress
+ ip link set $host1_if promisc off
bridge link set dev $br_port1 flood on
@@ -1306,6 +1308,7 @@ flood_test_do()
# Add an ACL on `host2_if` which will tell us whether the packet
# was flooded to it or not.
+ ip link set $host2_if promisc on
tc qdisc add dev $host2_if ingress
tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \
flower dst_mac $mac action drop
@@ -1323,6 +1326,7 @@ flood_test_do()
tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower
tc qdisc del dev $host2_if ingress
+ ip link set $host2_if promisc off
return $err
}
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh
index 28d568c48a73..91e431cd919e 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh
@@ -141,12 +141,13 @@ switch_create()
ip link set dev $swp4 up
ip link add name br1 type bridge vlan_filtering 1
- ip link set dev br1 up
- __addr_add_del br1 add 192.0.2.129/32
- ip -4 route add 192.0.2.130/32 dev br1
team_create lag loadbalance $swp3 $swp4
ip link set dev lag master br1
+
+ ip link set dev br1 up
+ __addr_add_del br1 add 192.0.2.129/32
+ ip -4 route add 192.0.2.130/32 dev br1
}
switch_destroy()
diff --git a/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh b/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh
index 0727e2012b68..43469c7de118 100755
--- a/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh
+++ b/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh
@@ -525,7 +525,7 @@ arp_suppression()
log_test "neigh_suppress: on / neigh exists: yes"
- # Delete the neighbour from the the SVI. A single ARP request should be
+ # Delete the neighbour from the SVI. A single ARP request should be
# received by the remote VTEP
RET=0
diff --git a/tools/testing/selftests/net/io_uring_zerocopy_tx.c b/tools/testing/selftests/net/io_uring_zerocopy_tx.c
new file mode 100644
index 000000000000..9d64c560a2d6
--- /dev/null
+++ b/tools/testing/selftests/net/io_uring_zerocopy_tx.c
@@ -0,0 +1,605 @@
+/* SPDX-License-Identifier: MIT */
+/* based on linux-kernel/tools/testing/selftests/net/msg_zerocopy.c */
+#include <assert.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <arpa/inet.h>
+#include <linux/errqueue.h>
+#include <linux/if_packet.h>
+#include <linux/io_uring.h>
+#include <linux/ipv6.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <sys/wait.h>
+
+#define NOTIF_TAG 0xfffffffULL
+#define NONZC_TAG 0
+#define ZC_TAG 1
+
+enum {
+ MODE_NONZC = 0,
+ MODE_ZC = 1,
+ MODE_ZC_FIXED = 2,
+ MODE_MIXED = 3,
+};
+
+static bool cfg_flush = false;
+static bool cfg_cork = false;
+static int cfg_mode = MODE_ZC_FIXED;
+static int cfg_nr_reqs = 8;
+static int cfg_family = PF_UNSPEC;
+static int cfg_payload_len;
+static int cfg_port = 8000;
+static int cfg_runtime_ms = 4200;
+
+static socklen_t cfg_alen;
+static struct sockaddr_storage cfg_dst_addr;
+
+static char payload[IP_MAXPACKET] __attribute__((aligned(4096)));
+
+struct io_sq_ring {
+ unsigned *head;
+ unsigned *tail;
+ unsigned *ring_mask;
+ unsigned *ring_entries;
+ unsigned *flags;
+ unsigned *array;
+};
+
+struct io_cq_ring {
+ unsigned *head;
+ unsigned *tail;
+ unsigned *ring_mask;
+ unsigned *ring_entries;
+ struct io_uring_cqe *cqes;
+};
+
+struct io_uring_sq {
+ unsigned *khead;
+ unsigned *ktail;
+ unsigned *kring_mask;
+ unsigned *kring_entries;
+ unsigned *kflags;
+ unsigned *kdropped;
+ unsigned *array;
+ struct io_uring_sqe *sqes;
+
+ unsigned sqe_head;
+ unsigned sqe_tail;
+
+ size_t ring_sz;
+};
+
+struct io_uring_cq {
+ unsigned *khead;
+ unsigned *ktail;
+ unsigned *kring_mask;
+ unsigned *kring_entries;
+ unsigned *koverflow;
+ struct io_uring_cqe *cqes;
+
+ size_t ring_sz;
+};
+
+struct io_uring {
+ struct io_uring_sq sq;
+ struct io_uring_cq cq;
+ int ring_fd;
+};
+
+#ifdef __alpha__
+# ifndef __NR_io_uring_setup
+# define __NR_io_uring_setup 535
+# endif
+# ifndef __NR_io_uring_enter
+# define __NR_io_uring_enter 536
+# endif
+# ifndef __NR_io_uring_register
+# define __NR_io_uring_register 537
+# endif
+#else /* !__alpha__ */
+# ifndef __NR_io_uring_setup
+# define __NR_io_uring_setup 425
+# endif
+# ifndef __NR_io_uring_enter
+# define __NR_io_uring_enter 426
+# endif
+# ifndef __NR_io_uring_register
+# define __NR_io_uring_register 427
+# endif
+#endif
+
+#if defined(__x86_64) || defined(__i386__)
+#define read_barrier() __asm__ __volatile__("":::"memory")
+#define write_barrier() __asm__ __volatile__("":::"memory")
+#else
+
+#define read_barrier() __sync_synchronize()
+#define write_barrier() __sync_synchronize()
+#endif
+
+static int io_uring_setup(unsigned int entries, struct io_uring_params *p)
+{
+ return syscall(__NR_io_uring_setup, entries, p);
+}
+
+static int io_uring_enter(int fd, unsigned int to_submit,
+ unsigned int min_complete,
+ unsigned int flags, sigset_t *sig)
+{
+ return syscall(__NR_io_uring_enter, fd, to_submit, min_complete,
+ flags, sig, _NSIG / 8);
+}
+
+static int io_uring_register_buffers(struct io_uring *ring,
+ const struct iovec *iovecs,
+ unsigned nr_iovecs)
+{
+ int ret;
+
+ ret = syscall(__NR_io_uring_register, ring->ring_fd,
+ IORING_REGISTER_BUFFERS, iovecs, nr_iovecs);
+ return (ret < 0) ? -errno : ret;
+}
+
+static int io_uring_register_notifications(struct io_uring *ring,
+ unsigned nr,
+ struct io_uring_notification_slot *slots)
+{
+ int ret;
+ struct io_uring_notification_register r = {
+ .nr_slots = nr,
+ .data = (unsigned long)slots,
+ };
+
+ ret = syscall(__NR_io_uring_register, ring->ring_fd,
+ IORING_REGISTER_NOTIFIERS, &r, sizeof(r));
+ return (ret < 0) ? -errno : ret;
+}
+
+static int io_uring_mmap(int fd, struct io_uring_params *p,
+ struct io_uring_sq *sq, struct io_uring_cq *cq)
+{
+ size_t size;
+ void *ptr;
+ int ret;
+
+ sq->ring_sz = p->sq_off.array + p->sq_entries * sizeof(unsigned);
+ ptr = mmap(0, sq->ring_sz, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQ_RING);
+ if (ptr == MAP_FAILED)
+ return -errno;
+ sq->khead = ptr + p->sq_off.head;
+ sq->ktail = ptr + p->sq_off.tail;
+ sq->kring_mask = ptr + p->sq_off.ring_mask;
+ sq->kring_entries = ptr + p->sq_off.ring_entries;
+ sq->kflags = ptr + p->sq_off.flags;
+ sq->kdropped = ptr + p->sq_off.dropped;
+ sq->array = ptr + p->sq_off.array;
+
+ size = p->sq_entries * sizeof(struct io_uring_sqe);
+ sq->sqes = mmap(0, size, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQES);
+ if (sq->sqes == MAP_FAILED) {
+ ret = -errno;
+err:
+ munmap(sq->khead, sq->ring_sz);
+ return ret;
+ }
+
+ cq->ring_sz = p->cq_off.cqes + p->cq_entries * sizeof(struct io_uring_cqe);
+ ptr = mmap(0, cq->ring_sz, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_CQ_RING);
+ if (ptr == MAP_FAILED) {
+ ret = -errno;
+ munmap(sq->sqes, p->sq_entries * sizeof(struct io_uring_sqe));
+ goto err;
+ }
+ cq->khead = ptr + p->cq_off.head;
+ cq->ktail = ptr + p->cq_off.tail;
+ cq->kring_mask = ptr + p->cq_off.ring_mask;
+ cq->kring_entries = ptr + p->cq_off.ring_entries;
+ cq->koverflow = ptr + p->cq_off.overflow;
+ cq->cqes = ptr + p->cq_off.cqes;
+ return 0;
+}
+
+static int io_uring_queue_init(unsigned entries, struct io_uring *ring,
+ unsigned flags)
+{
+ struct io_uring_params p;
+ int fd, ret;
+
+ memset(ring, 0, sizeof(*ring));
+ memset(&p, 0, sizeof(p));
+ p.flags = flags;
+
+ fd = io_uring_setup(entries, &p);
+ if (fd < 0)
+ return fd;
+ ret = io_uring_mmap(fd, &p, &ring->sq, &ring->cq);
+ if (!ret)
+ ring->ring_fd = fd;
+ else
+ close(fd);
+ return ret;
+}
+
+static int io_uring_submit(struct io_uring *ring)
+{
+ struct io_uring_sq *sq = &ring->sq;
+ const unsigned mask = *sq->kring_mask;
+ unsigned ktail, submitted, to_submit;
+ int ret;
+
+ read_barrier();
+ if (*sq->khead != *sq->ktail) {
+ submitted = *sq->kring_entries;
+ goto submit;
+ }
+ if (sq->sqe_head == sq->sqe_tail)
+ return 0;
+
+ ktail = *sq->ktail;
+ to_submit = sq->sqe_tail - sq->sqe_head;
+ for (submitted = 0; submitted < to_submit; submitted++) {
+ read_barrier();
+ sq->array[ktail++ & mask] = sq->sqe_head++ & mask;
+ }
+ if (!submitted)
+ return 0;
+
+ if (*sq->ktail != ktail) {
+ write_barrier();
+ *sq->ktail = ktail;
+ write_barrier();
+ }
+submit:
+ ret = io_uring_enter(ring->ring_fd, submitted, 0,
+ IORING_ENTER_GETEVENTS, NULL);
+ return ret < 0 ? -errno : ret;
+}
+
+static inline void io_uring_prep_send(struct io_uring_sqe *sqe, int sockfd,
+ const void *buf, size_t len, int flags)
+{
+ memset(sqe, 0, sizeof(*sqe));
+ sqe->opcode = (__u8) IORING_OP_SEND;
+ sqe->fd = sockfd;
+ sqe->addr = (unsigned long) buf;
+ sqe->len = len;
+ sqe->msg_flags = (__u32) flags;
+}
+
+static inline void io_uring_prep_sendzc(struct io_uring_sqe *sqe, int sockfd,
+ const void *buf, size_t len, int flags,
+ unsigned slot_idx, unsigned zc_flags)
+{
+ io_uring_prep_send(sqe, sockfd, buf, len, flags);
+ sqe->opcode = (__u8) IORING_OP_SENDZC_NOTIF;
+ sqe->notification_idx = slot_idx;
+ sqe->ioprio = zc_flags;
+}
+
+static struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring)
+{
+ struct io_uring_sq *sq = &ring->sq;
+
+ if (sq->sqe_tail + 1 - sq->sqe_head > *sq->kring_entries)
+ return NULL;
+ return &sq->sqes[sq->sqe_tail++ & *sq->kring_mask];
+}
+
+static int io_uring_wait_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr)
+{
+ struct io_uring_cq *cq = &ring->cq;
+ const unsigned mask = *cq->kring_mask;
+ unsigned head = *cq->khead;
+ int ret;
+
+ *cqe_ptr = NULL;
+ do {
+ read_barrier();
+ if (head != *cq->ktail) {
+ *cqe_ptr = &cq->cqes[head & mask];
+ break;
+ }
+ ret = io_uring_enter(ring->ring_fd, 0, 1,
+ IORING_ENTER_GETEVENTS, NULL);
+ if (ret < 0)
+ return -errno;
+ } while (1);
+
+ return 0;
+}
+
+static inline void io_uring_cqe_seen(struct io_uring *ring)
+{
+ *(&ring->cq)->khead += 1;
+ write_barrier();
+}
+
+static unsigned long gettimeofday_ms(void)
+{
+ struct timeval tv;
+
+ gettimeofday(&tv, NULL);
+ return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+}
+
+static void do_setsockopt(int fd, int level, int optname, int val)
+{
+ if (setsockopt(fd, level, optname, &val, sizeof(val)))
+ error(1, errno, "setsockopt %d.%d: %d", level, optname, val);
+}
+
+static int do_setup_tx(int domain, int type, int protocol)
+{
+ int fd;
+
+ fd = socket(domain, type, protocol);
+ if (fd == -1)
+ error(1, errno, "socket t");
+
+ do_setsockopt(fd, SOL_SOCKET, SO_SNDBUF, 1 << 21);
+
+ if (connect(fd, (void *) &cfg_dst_addr, cfg_alen))
+ error(1, errno, "connect");
+ return fd;
+}
+
+static void do_tx(int domain, int type, int protocol)
+{
+ struct io_uring_notification_slot b[1] = {{.tag = NOTIF_TAG}};
+ struct io_uring_sqe *sqe;
+ struct io_uring_cqe *cqe;
+ unsigned long packets = 0, bytes = 0;
+ struct io_uring ring;
+ struct iovec iov;
+ uint64_t tstop;
+ int i, fd, ret;
+ int compl_cqes = 0;
+
+ fd = do_setup_tx(domain, type, protocol);
+
+ ret = io_uring_queue_init(512, &ring, 0);
+ if (ret)
+ error(1, ret, "io_uring: queue init");
+
+ ret = io_uring_register_notifications(&ring, 1, b);
+ if (ret)
+ error(1, ret, "io_uring: tx ctx registration");
+
+ iov.iov_base = payload;
+ iov.iov_len = cfg_payload_len;
+
+ ret = io_uring_register_buffers(&ring, &iov, 1);
+ if (ret)
+ error(1, ret, "io_uring: buffer registration");
+
+ tstop = gettimeofday_ms() + cfg_runtime_ms;
+ do {
+ if (cfg_cork)
+ do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 1);
+
+ for (i = 0; i < cfg_nr_reqs; i++) {
+ unsigned zc_flags = 0;
+ unsigned buf_idx = 0;
+ unsigned slot_idx = 0;
+ unsigned mode = cfg_mode;
+ unsigned msg_flags = 0;
+
+ if (cfg_mode == MODE_MIXED)
+ mode = rand() % 3;
+
+ sqe = io_uring_get_sqe(&ring);
+
+ if (mode == MODE_NONZC) {
+ io_uring_prep_send(sqe, fd, payload,
+ cfg_payload_len, msg_flags);
+ sqe->user_data = NONZC_TAG;
+ } else {
+ if (cfg_flush) {
+ zc_flags |= IORING_RECVSEND_NOTIF_FLUSH;
+ compl_cqes++;
+ }
+ io_uring_prep_sendzc(sqe, fd, payload,
+ cfg_payload_len,
+ msg_flags, slot_idx, zc_flags);
+ if (mode == MODE_ZC_FIXED) {
+ sqe->ioprio |= IORING_RECVSEND_FIXED_BUF;
+ sqe->buf_index = buf_idx;
+ }
+ sqe->user_data = ZC_TAG;
+ }
+ }
+
+ ret = io_uring_submit(&ring);
+ if (ret != cfg_nr_reqs)
+ error(1, ret, "submit");
+
+ for (i = 0; i < cfg_nr_reqs; i++) {
+ ret = io_uring_wait_cqe(&ring, &cqe);
+ if (ret)
+ error(1, ret, "wait cqe");
+
+ if (cqe->user_data == NOTIF_TAG) {
+ compl_cqes--;
+ i--;
+ } else if (cqe->user_data != NONZC_TAG &&
+ cqe->user_data != ZC_TAG) {
+ error(1, cqe->res, "invalid user_data");
+ } else if (cqe->res <= 0 && cqe->res != -EAGAIN) {
+ error(1, cqe->res, "send failed");
+ } else {
+ if (cqe->res > 0) {
+ packets++;
+ bytes += cqe->res;
+ }
+ /* failed requests don't flush */
+ if (cfg_flush &&
+ cqe->res <= 0 &&
+ cqe->user_data == ZC_TAG)
+ compl_cqes--;
+ }
+ io_uring_cqe_seen(&ring);
+ }
+ if (cfg_cork)
+ do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0);
+ } while (gettimeofday_ms() < tstop);
+
+ if (close(fd))
+ error(1, errno, "close");
+
+ fprintf(stderr, "tx=%lu (MB=%lu), tx/s=%lu (MB/s=%lu)\n",
+ packets, bytes >> 20,
+ packets / (cfg_runtime_ms / 1000),
+ (bytes >> 20) / (cfg_runtime_ms / 1000));
+
+ while (compl_cqes) {
+ ret = io_uring_wait_cqe(&ring, &cqe);
+ if (ret)
+ error(1, ret, "wait cqe");
+ io_uring_cqe_seen(&ring);
+ compl_cqes--;
+ }
+}
+
+static void do_test(int domain, int type, int protocol)
+{
+ int i;
+
+ for (i = 0; i < IP_MAXPACKET; i++)
+ payload[i] = 'a' + (i % 26);
+ do_tx(domain, type, protocol);
+}
+
+static void usage(const char *filepath)
+{
+ error(1, 0, "Usage: %s [-f] [-n<N>] [-z0] [-s<payload size>] "
+ "(-4|-6) [-t<time s>] -D<dst_ip> udp", filepath);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ const int max_payload_len = sizeof(payload) -
+ sizeof(struct ipv6hdr) -
+ sizeof(struct tcphdr) -
+ 40 /* max tcp options */;
+ struct sockaddr_in6 *addr6 = (void *) &cfg_dst_addr;
+ struct sockaddr_in *addr4 = (void *) &cfg_dst_addr;
+ char *daddr = NULL;
+ int c;
+
+ if (argc <= 1)
+ usage(argv[0]);
+ cfg_payload_len = max_payload_len;
+
+ while ((c = getopt(argc, argv, "46D:p:s:t:n:fc:m:")) != -1) {
+ switch (c) {
+ case '4':
+ if (cfg_family != PF_UNSPEC)
+ error(1, 0, "Pass one of -4 or -6");
+ cfg_family = PF_INET;
+ cfg_alen = sizeof(struct sockaddr_in);
+ break;
+ case '6':
+ if (cfg_family != PF_UNSPEC)
+ error(1, 0, "Pass one of -4 or -6");
+ cfg_family = PF_INET6;
+ cfg_alen = sizeof(struct sockaddr_in6);
+ break;
+ case 'D':
+ daddr = optarg;
+ break;
+ case 'p':
+ cfg_port = strtoul(optarg, NULL, 0);
+ break;
+ case 's':
+ cfg_payload_len = strtoul(optarg, NULL, 0);
+ break;
+ case 't':
+ cfg_runtime_ms = 200 + strtoul(optarg, NULL, 10) * 1000;
+ break;
+ case 'n':
+ cfg_nr_reqs = strtoul(optarg, NULL, 0);
+ break;
+ case 'f':
+ cfg_flush = 1;
+ break;
+ case 'c':
+ cfg_cork = strtol(optarg, NULL, 0);
+ break;
+ case 'm':
+ cfg_mode = strtol(optarg, NULL, 0);
+ break;
+ }
+ }
+
+ switch (cfg_family) {
+ case PF_INET:
+ memset(addr4, 0, sizeof(*addr4));
+ addr4->sin_family = AF_INET;
+ addr4->sin_port = htons(cfg_port);
+ if (daddr &&
+ inet_pton(AF_INET, daddr, &(addr4->sin_addr)) != 1)
+ error(1, 0, "ipv4 parse error: %s", daddr);
+ break;
+ case PF_INET6:
+ memset(addr6, 0, sizeof(*addr6));
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_port = htons(cfg_port);
+ if (daddr &&
+ inet_pton(AF_INET6, daddr, &(addr6->sin6_addr)) != 1)
+ error(1, 0, "ipv6 parse error: %s", daddr);
+ break;
+ default:
+ error(1, 0, "illegal domain");
+ }
+
+ if (cfg_payload_len > max_payload_len)
+ error(1, 0, "-s: payload exceeds max (%d)", max_payload_len);
+ if (cfg_mode == MODE_NONZC && cfg_flush)
+ error(1, 0, "-f: only zerocopy modes support notifications");
+ if (optind != argc - 1)
+ usage(argv[0]);
+}
+
+int main(int argc, char **argv)
+{
+ const char *cfg_test = argv[argc - 1];
+
+ parse_opts(argc, argv);
+
+ if (!strcmp(cfg_test, "tcp"))
+ do_test(cfg_family, SOCK_STREAM, 0);
+ else if (!strcmp(cfg_test, "udp"))
+ do_test(cfg_family, SOCK_DGRAM, 0);
+ else
+ error(1, 0, "unknown cfg_test %s", cfg_test);
+ return 0;
+}
diff --git a/tools/testing/selftests/net/io_uring_zerocopy_tx.sh b/tools/testing/selftests/net/io_uring_zerocopy_tx.sh
new file mode 100755
index 000000000000..6a65e4437640
--- /dev/null
+++ b/tools/testing/selftests/net/io_uring_zerocopy_tx.sh
@@ -0,0 +1,131 @@
+#!/bin/bash
+#
+# Send data between two processes across namespaces
+# Run twice: once without and once with zerocopy
+
+set -e
+
+readonly DEV="veth0"
+readonly DEV_MTU=65535
+readonly BIN_TX="./io_uring_zerocopy_tx"
+readonly BIN_RX="./msg_zerocopy"
+
+readonly RAND="$(mktemp -u XXXXXX)"
+readonly NSPREFIX="ns-${RAND}"
+readonly NS1="${NSPREFIX}1"
+readonly NS2="${NSPREFIX}2"
+
+readonly SADDR4='192.168.1.1'
+readonly DADDR4='192.168.1.2'
+readonly SADDR6='fd::1'
+readonly DADDR6='fd::2'
+
+readonly path_sysctl_mem="net.core.optmem_max"
+
+# No arguments: automated test
+if [[ "$#" -eq "0" ]]; then
+ IPs=( "4" "6" )
+ protocols=( "tcp" "udp" )
+
+ for IP in "${IPs[@]}"; do
+ for proto in "${protocols[@]}"; do
+ for mode in $(seq 1 3); do
+ $0 "$IP" "$proto" -m "$mode" -t 1 -n 32
+ $0 "$IP" "$proto" -m "$mode" -t 1 -n 32 -f
+ $0 "$IP" "$proto" -m "$mode" -t 1 -n 32 -c -f
+ done
+ done
+ done
+
+ echo "OK. All tests passed"
+ exit 0
+fi
+
+# Argument parsing
+if [[ "$#" -lt "2" ]]; then
+ echo "Usage: $0 [4|6] [tcp|udp|raw|raw_hdrincl|packet|packet_dgram] <args>"
+ exit 1
+fi
+
+readonly IP="$1"
+shift
+readonly TXMODE="$1"
+shift
+readonly EXTRA_ARGS="$@"
+
+# Argument parsing: configure addresses
+if [[ "${IP}" == "4" ]]; then
+ readonly SADDR="${SADDR4}"
+ readonly DADDR="${DADDR4}"
+elif [[ "${IP}" == "6" ]]; then
+ readonly SADDR="${SADDR6}"
+ readonly DADDR="${DADDR6}"
+else
+ echo "Invalid IP version ${IP}"
+ exit 1
+fi
+
+# Argument parsing: select receive mode
+#
+# This differs from send mode for
+# - packet: use raw recv, because packet receives skb clones
+# - raw_hdrinc: use raw recv, because hdrincl is a tx-only option
+case "${TXMODE}" in
+'packet' | 'packet_dgram' | 'raw_hdrincl')
+ RXMODE='raw'
+ ;;
+*)
+ RXMODE="${TXMODE}"
+ ;;
+esac
+
+# Start of state changes: install cleanup handler
+save_sysctl_mem="$(sysctl -n ${path_sysctl_mem})"
+
+cleanup() {
+ ip netns del "${NS2}"
+ ip netns del "${NS1}"
+ sysctl -w -q "${path_sysctl_mem}=${save_sysctl_mem}"
+}
+
+trap cleanup EXIT
+
+# Configure system settings
+sysctl -w -q "${path_sysctl_mem}=1000000"
+
+# Create virtual ethernet pair between network namespaces
+ip netns add "${NS1}"
+ip netns add "${NS2}"
+
+ip link add "${DEV}" mtu "${DEV_MTU}" netns "${NS1}" type veth \
+ peer name "${DEV}" mtu "${DEV_MTU}" netns "${NS2}"
+
+# Bring the devices up
+ip -netns "${NS1}" link set "${DEV}" up
+ip -netns "${NS2}" link set "${DEV}" up
+
+# Set fixed MAC addresses on the devices
+ip -netns "${NS1}" link set dev "${DEV}" address 02:02:02:02:02:02
+ip -netns "${NS2}" link set dev "${DEV}" address 06:06:06:06:06:06
+
+# Add fixed IP addresses to the devices
+ip -netns "${NS1}" addr add 192.168.1.1/24 dev "${DEV}"
+ip -netns "${NS2}" addr add 192.168.1.2/24 dev "${DEV}"
+ip -netns "${NS1}" addr add fd::1/64 dev "${DEV}" nodad
+ip -netns "${NS2}" addr add fd::2/64 dev "${DEV}" nodad
+
+# Optionally disable sg or csum offload to test edge cases
+# ip netns exec "${NS1}" ethtool -K "${DEV}" sg off
+
+do_test() {
+ local readonly ARGS="$1"
+
+ echo "ipv${IP} ${TXMODE} ${ARGS}"
+ ip netns exec "${NS2}" "${BIN_RX}" "-${IP}" -t 2 -C 2 -S "${SADDR}" -D "${DADDR}" -r "${RXMODE}" &
+ sleep 0.2
+ ip netns exec "${NS1}" "${BIN_TX}" "-${IP}" -t 1 -D "${DADDR}" ${ARGS} "${TXMODE}"
+ wait
+}
+
+do_test "${EXTRA_ARGS}"
+echo ok
diff --git a/tools/testing/selftests/net/ioam6.sh b/tools/testing/selftests/net/ioam6.sh
index a2b9fad5a9a6..4ceb401da1bf 100755
--- a/tools/testing/selftests/net/ioam6.sh
+++ b/tools/testing/selftests/net/ioam6.sh
@@ -117,6 +117,8 @@
# | Schema Data | |
# +-----------------------------------------------------------+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
################################################################################
# #
@@ -211,7 +213,7 @@ check_kernel_compatibility()
echo "SKIP: kernel version probably too old, missing ioam support"
ip link del veth0 2>/dev/null || true
ip netns del ioam-tmp-node || true
- exit 1
+ exit $ksft_skip
fi
ip -netns ioam-tmp-node route add db02::/64 encap ioam6 mode inline \
@@ -227,7 +229,7 @@ check_kernel_compatibility()
"without CONFIG_IPV6_IOAM6_LWTUNNEL?"
ip link del veth0 2>/dev/null || true
ip netns del ioam-tmp-node || true
- exit 1
+ exit $ksft_skip
fi
ip link del veth0 2>/dev/null || true
@@ -752,20 +754,20 @@ nfailed=0
if [ "$(id -u)" -ne 0 ]
then
echo "SKIP: Need root privileges"
- exit 1
+ exit $ksft_skip
fi
if [ ! -x "$(command -v ip)" ]
then
echo "SKIP: Could not run test without ip tool"
- exit 1
+ exit $ksft_skip
fi
ip ioam &>/dev/null
if [ $? = 1 ]
then
echo "SKIP: iproute2 too old, missing ioam command"
- exit 1
+ exit $ksft_skip
fi
check_kernel_compatibility
diff --git a/tools/testing/selftests/net/ipv6_flowlabel.c b/tools/testing/selftests/net/ipv6_flowlabel.c
index a7c41375374f..708a9822259d 100644
--- a/tools/testing/selftests/net/ipv6_flowlabel.c
+++ b/tools/testing/selftests/net/ipv6_flowlabel.c
@@ -9,6 +9,7 @@
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
+#include <linux/icmpv6.h>
#include <linux/in6.h>
#include <stdbool.h>
#include <stdio.h>
@@ -29,26 +30,48 @@
#ifndef IPV6_FLOWLABEL_MGR
#define IPV6_FLOWLABEL_MGR 32
#endif
+#ifndef IPV6_FLOWINFO_SEND
+#define IPV6_FLOWINFO_SEND 33
+#endif
#define FLOWLABEL_WILDCARD ((uint32_t) -1)
static const char cfg_data[] = "a";
static uint32_t cfg_label = 1;
+static bool use_ping;
+static bool use_flowinfo_send;
+
+static struct icmp6hdr icmp6 = {
+ .icmp6_type = ICMPV6_ECHO_REQUEST
+};
+
+static struct sockaddr_in6 addr = {
+ .sin6_family = AF_INET6,
+ .sin6_addr = IN6ADDR_LOOPBACK_INIT,
+};
static void do_send(int fd, bool with_flowlabel, uint32_t flowlabel)
{
char control[CMSG_SPACE(sizeof(flowlabel))] = {0};
struct msghdr msg = {0};
- struct iovec iov = {0};
+ struct iovec iov = {
+ .iov_base = (char *)cfg_data,
+ .iov_len = sizeof(cfg_data)
+ };
int ret;
- iov.iov_base = (char *)cfg_data;
- iov.iov_len = sizeof(cfg_data);
+ if (use_ping) {
+ iov.iov_base = &icmp6;
+ iov.iov_len = sizeof(icmp6);
+ }
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
- if (with_flowlabel) {
+ if (use_flowinfo_send) {
+ msg.msg_name = &addr;
+ msg.msg_namelen = sizeof(addr);
+ } else if (with_flowlabel) {
struct cmsghdr *cm;
cm = (void *)control;
@@ -94,6 +117,8 @@ static void do_recv(int fd, bool with_flowlabel, uint32_t expect)
ret = recvmsg(fd, &msg, 0);
if (ret == -1)
error(1, errno, "recv");
+ if (use_ping)
+ goto parse_cmsg;
if (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))
error(1, 0, "recv: truncated");
if (ret != sizeof(cfg_data))
@@ -101,6 +126,7 @@ static void do_recv(int fd, bool with_flowlabel, uint32_t expect)
if (memcmp(data, cfg_data, sizeof(data)))
error(1, 0, "recv: data mismatch");
+parse_cmsg:
cm = CMSG_FIRSTHDR(&msg);
if (with_flowlabel) {
if (!cm)
@@ -114,9 +140,11 @@ static void do_recv(int fd, bool with_flowlabel, uint32_t expect)
flowlabel = ntohl(*(uint32_t *)CMSG_DATA(cm));
fprintf(stderr, "recv with label %u\n", flowlabel);
- if (expect != FLOWLABEL_WILDCARD && expect != flowlabel)
+ if (expect != FLOWLABEL_WILDCARD && expect != flowlabel) {
fprintf(stderr, "recv: incorrect flowlabel %u != %u\n",
flowlabel, expect);
+ error(1, 0, "recv: flowlabel is wrong");
+ }
} else {
fprintf(stderr, "recv without label\n");
@@ -165,11 +193,17 @@ static void parse_opts(int argc, char **argv)
{
int c;
- while ((c = getopt(argc, argv, "l:")) != -1) {
+ while ((c = getopt(argc, argv, "l:ps")) != -1) {
switch (c) {
case 'l':
cfg_label = strtoul(optarg, NULL, 0);
break;
+ case 'p':
+ use_ping = true;
+ break;
+ case 's':
+ use_flowinfo_send = true;
+ break;
default:
error(1, 0, "%s: parse error", argv[0]);
}
@@ -178,27 +212,30 @@ static void parse_opts(int argc, char **argv)
int main(int argc, char **argv)
{
- struct sockaddr_in6 addr = {
- .sin6_family = AF_INET6,
- .sin6_port = htons(8000),
- .sin6_addr = IN6ADDR_LOOPBACK_INIT,
- };
const int one = 1;
int fdt, fdr;
+ int prot = 0;
+
+ addr.sin6_port = htons(8000);
parse_opts(argc, argv);
- fdt = socket(PF_INET6, SOCK_DGRAM, 0);
+ if (use_ping) {
+ fprintf(stderr, "attempting to use ping sockets\n");
+ prot = IPPROTO_ICMPV6;
+ }
+
+ fdt = socket(PF_INET6, SOCK_DGRAM, prot);
if (fdt == -1)
error(1, errno, "socket t");
- fdr = socket(PF_INET6, SOCK_DGRAM, 0);
+ fdr = use_ping ? fdt : socket(PF_INET6, SOCK_DGRAM, 0);
if (fdr == -1)
error(1, errno, "socket r");
if (connect(fdt, (void *)&addr, sizeof(addr)))
error(1, errno, "connect");
- if (bind(fdr, (void *)&addr, sizeof(addr)))
+ if (!use_ping && bind(fdr, (void *)&addr, sizeof(addr)))
error(1, errno, "bind");
flowlabel_get(fdt, cfg_label, IPV6_FL_S_EXCL, IPV6_FL_F_CREATE);
@@ -216,13 +253,21 @@ int main(int argc, char **argv)
do_recv(fdr, false, 0);
}
+ if (use_flowinfo_send) {
+ fprintf(stderr, "using IPV6_FLOWINFO_SEND to send label\n");
+ addr.sin6_flowinfo = htonl(cfg_label);
+ if (setsockopt(fdt, SOL_IPV6, IPV6_FLOWINFO_SEND, &one,
+ sizeof(one)) == -1)
+ error(1, errno, "setsockopt flowinfo_send");
+ }
+
fprintf(stderr, "send label\n");
do_send(fdt, true, cfg_label);
do_recv(fdr, true, cfg_label);
if (close(fdr))
error(1, errno, "close r");
- if (close(fdt))
+ if (!use_ping && close(fdt))
error(1, errno, "close t");
return 0;
diff --git a/tools/testing/selftests/net/ipv6_flowlabel.sh b/tools/testing/selftests/net/ipv6_flowlabel.sh
index d3bc6442704e..cee95e252bee 100755
--- a/tools/testing/selftests/net/ipv6_flowlabel.sh
+++ b/tools/testing/selftests/net/ipv6_flowlabel.sh
@@ -18,4 +18,20 @@ echo "TEST datapath (with auto-flowlabels)"
./in_netns.sh \
sh -c 'sysctl -q -w net.ipv6.auto_flowlabels=1 && ./ipv6_flowlabel -l 1'
+echo "TEST datapath (with ping-sockets)"
+./in_netns.sh \
+ sh -c 'sysctl -q -w net.ipv6.flowlabel_reflect=4 && \
+ sysctl -q -w net.ipv4.ping_group_range="0 2147483647" && \
+ ./ipv6_flowlabel -l 1 -p'
+
+echo "TEST datapath (with flowinfo-send)"
+./in_netns.sh \
+ sh -c './ipv6_flowlabel -l 1 -s'
+
+echo "TEST datapath (with ping-sockets flowinfo-send)"
+./in_netns.sh \
+ sh -c 'sysctl -q -w net.ipv6.flowlabel_reflect=4 && \
+ sysctl -q -w net.ipv4.ping_group_range="0 2147483647" && \
+ ./ipv6_flowlabel -l 1 -p -s'
+
echo OK. All tests passed
diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile
index f905d5358e68..43a723626126 100644
--- a/tools/testing/selftests/net/mptcp/Makefile
+++ b/tools/testing/selftests/net/mptcp/Makefile
@@ -1,12 +1,11 @@
# SPDX-License-Identifier: GPL-2.0
top_srcdir = ../../../../..
-KSFT_KHDR_INSTALL := 1
CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES)
TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \
- simult_flows.sh mptcp_sockopt.sh
+ simult_flows.sh mptcp_sockopt.sh userspace_pm.sh
TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index a4406b7a8064..ff83ef426df5 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -455,6 +455,12 @@ wait_mpj()
done
}
+kill_wait()
+{
+ kill $1 > /dev/null 2>&1
+ wait $1 2>/dev/null
+}
+
pm_nl_set_limits()
{
local ns=$1
@@ -654,6 +660,11 @@ do_transfer()
local port=$((10000 + TEST_COUNT - 1))
local cappid
+ local userspace_pm=0
+ local evts_ns1
+ local evts_ns1_pid
+ local evts_ns2
+ local evts_ns2_pid
:> "$cout"
:> "$sout"
@@ -690,10 +701,29 @@ do_transfer()
extra_args="-r ${speed:6}"
fi
+ if [[ "${addr_nr_ns1}" = "userspace_"* ]]; then
+ userspace_pm=1
+ addr_nr_ns1=${addr_nr_ns1:10}
+ fi
+
if [[ "${addr_nr_ns2}" = "fastclose_"* ]]; then
# disconnect
extra_args="$extra_args -I ${addr_nr_ns2:10}"
addr_nr_ns2=0
+ elif [[ "${addr_nr_ns2}" = "userspace_"* ]]; then
+ userspace_pm=1
+ addr_nr_ns2=${addr_nr_ns2:10}
+ fi
+
+ if [ $userspace_pm -eq 1 ]; then
+ evts_ns1=$(mktemp)
+ evts_ns2=$(mktemp)
+ :> "$evts_ns1"
+ :> "$evts_ns2"
+ ip netns exec ${listener_ns} ./pm_nl_ctl events >> "$evts_ns1" 2>&1 &
+ evts_ns1_pid=$!
+ ip netns exec ${connector_ns} ./pm_nl_ctl events >> "$evts_ns2" 2>&1 &
+ evts_ns2_pid=$!
fi
local local_addr
@@ -748,6 +778,8 @@ do_transfer()
if [ $addr_nr_ns1 -gt 0 ]; then
local counter=2
local add_nr_ns1=${addr_nr_ns1}
+ local id=10
+ local tk
while [ $add_nr_ns1 -gt 0 ]; do
local addr
if is_v6 "${connect_addr}"; then
@@ -755,9 +787,18 @@ do_transfer()
else
addr="10.0.$counter.1"
fi
- pm_nl_add_endpoint $ns1 $addr flags signal
+ if [ $userspace_pm -eq 0 ]; then
+ pm_nl_add_endpoint $ns1 $addr flags signal
+ else
+ tk=$(sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns1")
+ ip netns exec ${listener_ns} ./pm_nl_ctl ann $addr token $tk id $id
+ sleep 1
+ ip netns exec ${listener_ns} ./pm_nl_ctl rem token $tk id $id
+ fi
+
counter=$((counter + 1))
add_nr_ns1=$((add_nr_ns1 - 1))
+ id=$((id + 1))
done
elif [ $addr_nr_ns1 -lt 0 ]; then
local rm_nr_ns1=$((-addr_nr_ns1))
@@ -804,6 +845,8 @@ do_transfer()
if [ $addr_nr_ns2 -gt 0 ]; then
local add_nr_ns2=${addr_nr_ns2}
local counter=3
+ local id=20
+ local tk da dp sp
while [ $add_nr_ns2 -gt 0 ]; do
local addr
if is_v6 "${connect_addr}"; then
@@ -811,9 +854,23 @@ do_transfer()
else
addr="10.0.$counter.2"
fi
- pm_nl_add_endpoint $ns2 $addr flags $flags
+ if [ $userspace_pm -eq 0 ]; then
+ pm_nl_add_endpoint $ns2 $addr flags $flags
+ else
+ tk=$(sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns2")
+ da=$(sed -n 's/.*\(daddr4:\)\([0-9.]*\).*$/\2/p;q' "$evts_ns2")
+ dp=$(sed -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns2")
+ ip netns exec ${connector_ns} ./pm_nl_ctl csf lip $addr lid $id \
+ rip $da rport $dp token $tk
+ sleep 1
+ sp=$(grep "type:10" "$evts_ns2" |
+ sed -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q')
+ ip netns exec ${connector_ns} ./pm_nl_ctl dsf lip $addr lport $sp \
+ rip $da rport $dp token $tk
+ fi
counter=$((counter + 1))
add_nr_ns2=$((add_nr_ns2 - 1))
+ id=$((id + 1))
done
elif [ $addr_nr_ns2 -lt 0 ]; then
local rm_nr_ns2=$((-addr_nr_ns2))
@@ -890,6 +947,12 @@ do_transfer()
kill $cappid
fi
+ if [ $userspace_pm -eq 1 ]; then
+ kill_wait $evts_ns1_pid
+ kill_wait $evts_ns2_pid
+ rm -rf $evts_ns1 $evts_ns2
+ fi
+
NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
nstat | grep Tcp > /tmp/${listener_ns}.out
NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
@@ -2365,6 +2428,36 @@ backup_tests()
chk_add_nr 1 1
chk_prio_nr 1 1
fi
+
+ if reset "mpc backup"; then
+ pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup
+ run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
+ chk_join_nr 0 0 0
+ chk_prio_nr 0 1
+ fi
+
+ if reset "mpc backup both sides"; then
+ pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow,backup
+ pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup
+ run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
+ chk_join_nr 0 0 0
+ chk_prio_nr 1 1
+ fi
+
+ if reset "mpc switch to backup"; then
+ pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup
+ chk_join_nr 0 0 0
+ chk_prio_nr 0 1
+ fi
+
+ if reset "mpc switch to backup both sides"; then
+ pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup
+ chk_join_nr 0 0 0
+ chk_prio_nr 1 1
+ fi
}
add_addr_ports_tests()
@@ -2810,6 +2903,25 @@ userspace_tests()
chk_join_nr 0 0 0
chk_rm_nr 0 0
fi
+
+ # userspace pm add & remove address
+ if reset "userspace pm add & remove address"; then
+ set_userspace_pm $ns1
+ pm_nl_set_limits $ns2 1 1
+ run_tests $ns1 $ns2 10.0.1.1 0 userspace_1 0 slow
+ chk_join_nr 1 1 1
+ chk_add_nr 1 1
+ chk_rm_nr 1 1 invert
+ fi
+
+ # userspace pm create destroy subflow
+ if reset "userspace pm create destroy subflow"; then
+ set_userspace_pm $ns2
+ pm_nl_set_limits $ns1 0 1
+ run_tests $ns1 $ns2 10.0.1.1 0 0 userspace_1 slow
+ chk_join_nr 1 1 1
+ chk_rm_nr 0 1
+ fi
}
endpoint_tests()
diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
index 6a2f4b981e1d..abddf4c63e79 100644
--- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
+++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
@@ -31,7 +31,7 @@
static void syntax(char *argv[])
{
- fprintf(stderr, "%s add|get|set|del|flush|dump|accept [<args>]\n", argv[0]);
+ fprintf(stderr, "%s add|ann|rem|csf|dsf|get|set|del|flush|dump|events|listen|accept [<args>]\n", argv[0]);
fprintf(stderr, "\tadd [flags signal|subflow|backup|fullmesh] [id <nr>] [dev <name>] <ip>\n");
fprintf(stderr, "\tann <local-ip> id <local-id> token <token> [port <local-port>] [dev <name>]\n");
fprintf(stderr, "\trem id <local-id> token <token>\n");
@@ -39,7 +39,7 @@ static void syntax(char *argv[])
fprintf(stderr, "\tdsf lip <local-ip> lport <local-port> rip <remote-ip> rport <remote-port> token <token>\n");
fprintf(stderr, "\tdel <id> [<ip>]\n");
fprintf(stderr, "\tget <id>\n");
- fprintf(stderr, "\tset [<ip>] [id <nr>] flags [no]backup|[no]fullmesh [port <nr>]\n");
+ fprintf(stderr, "\tset [<ip>] [id <nr>] flags [no]backup|[no]fullmesh [port <nr>] [token <token>] [rip <ip>] [rport <port>]\n");
fprintf(stderr, "\tflush\n");
fprintf(stderr, "\tdump\n");
fprintf(stderr, "\tlimits [<rcv addr max> <subflow max>]\n");
@@ -1279,7 +1279,10 @@ int set_flags(int fd, int pm_family, int argc, char *argv[])
struct rtattr *rta, *nest;
struct nlmsghdr *nh;
u_int32_t flags = 0;
+ u_int32_t token = 0;
+ u_int16_t rport = 0;
u_int16_t family;
+ void *rip = NULL;
int nest_start;
int use_id = 0;
u_int8_t id;
@@ -1339,7 +1342,13 @@ int set_flags(int fd, int pm_family, int argc, char *argv[])
error(1, 0, " missing flags keyword");
for (; arg < argc; arg++) {
- if (!strcmp(argv[arg], "flags")) {
+ if (!strcmp(argv[arg], "token")) {
+ if (++arg >= argc)
+ error(1, 0, " missing token value");
+
+ /* token */
+ token = atoi(argv[arg]);
+ } else if (!strcmp(argv[arg], "flags")) {
char *tok, *str;
/* flags */
@@ -1378,12 +1387,72 @@ int set_flags(int fd, int pm_family, int argc, char *argv[])
rta->rta_len = RTA_LENGTH(2);
memcpy(RTA_DATA(rta), &port, 2);
off += NLMSG_ALIGN(rta->rta_len);
+ } else if (!strcmp(argv[arg], "rport")) {
+ if (++arg >= argc)
+ error(1, 0, " missing remote port");
+
+ rport = atoi(argv[arg]);
+ } else if (!strcmp(argv[arg], "rip")) {
+ if (++arg >= argc)
+ error(1, 0, " missing remote ip");
+
+ rip = argv[arg];
} else {
error(1, 0, "unknown keyword %s", argv[arg]);
}
}
nest->rta_len = off - nest_start;
+ /* token */
+ if (token) {
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ATTR_TOKEN;
+ rta->rta_len = RTA_LENGTH(4);
+ memcpy(RTA_DATA(rta), &token, 4);
+ off += NLMSG_ALIGN(rta->rta_len);
+ }
+
+ /* remote addr/port */
+ if (rip) {
+ nest_start = off;
+ nest = (void *)(data + off);
+ nest->rta_type = NLA_F_NESTED | MPTCP_PM_ATTR_ADDR_REMOTE;
+ nest->rta_len = RTA_LENGTH(0);
+ off += NLMSG_ALIGN(nest->rta_len);
+
+ /* addr data */
+ rta = (void *)(data + off);
+ if (inet_pton(AF_INET, rip, RTA_DATA(rta))) {
+ family = AF_INET;
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4;
+ rta->rta_len = RTA_LENGTH(4);
+ } else if (inet_pton(AF_INET6, rip, RTA_DATA(rta))) {
+ family = AF_INET6;
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6;
+ rta->rta_len = RTA_LENGTH(16);
+ } else {
+ error(1, errno, "can't parse ip %s", (char *)rip);
+ }
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ /* family */
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY;
+ rta->rta_len = RTA_LENGTH(2);
+ memcpy(RTA_DATA(rta), &family, 2);
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ if (rport) {
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_PORT;
+ rta->rta_len = RTA_LENGTH(2);
+ memcpy(RTA_DATA(rta), &rport, 2);
+ off += NLMSG_ALIGN(rta->rta_len);
+ }
+
+ nest->rta_len = off - nest_start;
+ }
+
do_nl_req(fd, nh, off, 0);
return 0;
}
diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
index f441ff7904fc..ffa13a957a36 100755
--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -12,6 +12,7 @@ timeout_test=$((timeout_poll * 2 + 1))
test_cnt=1
ret=0
bail=0
+slack=50
usage() {
echo "Usage: $0 [ -b ] [ -c ] [ -d ]"
@@ -52,6 +53,7 @@ setup()
cout=$(mktemp)
capout=$(mktemp)
size=$((2 * 2048 * 4096))
+
dd if=/dev/zero of=$small bs=4096 count=20 >/dev/null 2>&1
dd if=/dev/zero of=$large bs=4096 count=$((size / 4096)) >/dev/null 2>&1
@@ -104,6 +106,16 @@ setup()
ip -net "$ns3" route add default via dead:beef:3::2
ip netns exec "$ns3" ./pm_nl_ctl limits 1 1
+
+ # debug build can slow down measurably the test program
+ # we use quite tight time limit on the run-time, to ensure
+ # maximum B/W usage.
+ # Use kmemleak/lockdep/kasan/prove_locking presence as a rough
+ # estimate for this being a debug kernel and increase the
+ # maximum run-time accordingly. Observed run times for CI builds
+ # running selftests, including kbuild, were used to determine the
+ # amount of time to add.
+ grep -q ' kmemleak_init$\| lockdep_init$\| kasan_init$\| prove_locking$' /proc/kallsyms && slack=$((slack+550))
}
# $1: ns, $2: port
@@ -241,7 +253,7 @@ run_test()
# mptcp_connect will do some sleeps to allow the mp_join handshake
# completion (see mptcp_connect): 200ms on each side, add some slack
- time=$((time + 450))
+ time=$((time + 400 + slack))
printf "%-60s" "$msg"
do_transfer $small $large $time
diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh
index 78d0bb640b11..3229725b64b0 100755
--- a/tools/testing/selftests/net/mptcp/userspace_pm.sh
+++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh
@@ -37,6 +37,12 @@ rndh=$(stdbuf -o0 -e0 printf %x "$sec")-$(mktemp -u XXXXXX)
ns1="ns1-$rndh"
ns2="ns2-$rndh"
+kill_wait()
+{
+ kill $1 > /dev/null 2>&1
+ wait $1 2>/dev/null
+}
+
cleanup()
{
echo "cleanup"
@@ -48,16 +54,16 @@ cleanup()
kill -SIGUSR1 $client4_pid > /dev/null 2>&1
fi
if [ $server4_pid -ne 0 ]; then
- kill $server4_pid > /dev/null 2>&1
+ kill_wait $server4_pid
fi
if [ $client6_pid -ne 0 ]; then
kill -SIGUSR1 $client6_pid > /dev/null 2>&1
fi
if [ $server6_pid -ne 0 ]; then
- kill $server6_pid > /dev/null 2>&1
+ kill_wait $server6_pid
fi
if [ $evts_pid -ne 0 ]; then
- kill $evts_pid > /dev/null 2>&1
+ kill_wait $evts_pid
fi
local netns
for netns in "$ns1" "$ns2" ;do
@@ -153,7 +159,7 @@ make_connection()
sleep 1
# Capture client/server attributes from MPTCP connection netlink events
- kill $client_evts_pid
+ kill_wait $client_evts_pid
local client_token
local client_port
@@ -165,7 +171,7 @@ make_connection()
client_port=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts")
client_serverside=$(sed --unbuffered -n 's/.*\(server_side:\)\([[:digit:]]*\).*$/\2/p;q'\
"$client_evts")
- kill $server_evts_pid
+ kill_wait $server_evts_pid
server_token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts")
server_serverside=$(sed --unbuffered -n 's/.*\(server_side:\)\([[:digit:]]*\).*$/\2/p;q'\
"$server_evts")
@@ -286,7 +292,7 @@ test_announce()
verify_announce_event "$evts" "$ANNOUNCED" "$server4_token" "10.0.2.2"\
"$client_addr_id" "$new4_port"
- kill $evts_pid
+ kill_wait $evts_pid
# Capture events on the network namespace running the client
:>"$evts"
@@ -321,7 +327,7 @@ test_announce()
verify_announce_event "$evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\
"$server_addr_id" "$new4_port"
- kill $evts_pid
+ kill_wait $evts_pid
rm -f "$evts"
}
@@ -416,7 +422,7 @@ test_remove()
sleep 0.5
verify_remove_event "$evts" "$REMOVED" "$server6_token" "$client_addr_id"
- kill $evts_pid
+ kill_wait $evts_pid
# Capture events on the network namespace running the client
:>"$evts"
@@ -449,7 +455,7 @@ test_remove()
sleep 0.5
verify_remove_event "$evts" "$REMOVED" "$client6_token" "$server_addr_id"
- kill $evts_pid
+ kill_wait $evts_pid
rm -f "$evts"
}
@@ -553,7 +559,7 @@ test_subflows()
"10.0.2.2" "$client4_port" "23" "$client_addr_id" "ns1" "ns2"
# Delete the listener from the client ns, if one was created
- kill $listener_pid > /dev/null 2>&1
+ kill_wait $listener_pid
local sport
sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts")
@@ -592,7 +598,7 @@ test_subflows()
"$client_addr_id" "ns1" "ns2"
# Delete the listener from the client ns, if one was created
- kill $listener_pid > /dev/null 2>&1
+ kill_wait $listener_pid
sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts")
@@ -631,7 +637,7 @@ test_subflows()
"$client_addr_id" "ns1" "ns2"
# Delete the listener from the client ns, if one was created
- kill $listener_pid > /dev/null 2>&1
+ kill_wait $listener_pid
sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts")
@@ -647,7 +653,7 @@ test_subflows()
ip netns exec "$ns2" ./pm_nl_ctl rem id $client_addr_id token\
"$client4_token" > /dev/null 2>&1
- kill $evts_pid
+ kill_wait $evts_pid
# Capture events on the network namespace running the client
:>"$evts"
@@ -674,7 +680,7 @@ test_subflows()
"10.0.2.1" "$app4_port" "23" "$server_addr_id" "ns2" "ns1"
# Delete the listener from the server ns, if one was created
- kill $listener_pid> /dev/null 2>&1
+ kill_wait $listener_pid
sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts")
@@ -713,7 +719,7 @@ test_subflows()
"$server_addr_id" "ns2" "ns1"
# Delete the listener from the server ns, if one was created
- kill $listener_pid > /dev/null 2>&1
+ kill_wait $listener_pid
sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts")
@@ -750,7 +756,7 @@ test_subflows()
"10.0.2.2" "10.0.2.1" "$new4_port" "23" "$server_addr_id" "ns2" "ns1"
# Delete the listener from the server ns, if one was created
- kill $listener_pid > /dev/null 2>&1
+ kill_wait $listener_pid
sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts")
@@ -766,14 +772,46 @@ test_subflows()
ip netns exec "$ns1" ./pm_nl_ctl rem id $server_addr_id token\
"$server4_token" > /dev/null 2>&1
- kill $evts_pid
+ kill_wait $evts_pid
rm -f "$evts"
}
+test_prio()
+{
+ local count
+
+ # Send MP_PRIO signal from client to server machine
+ ip netns exec "$ns2" ./pm_nl_ctl set 10.0.1.2 port "$client4_port" flags backup token "$client4_token" rip 10.0.1.1 rport "$server4_port"
+ sleep 0.5
+
+ # Check TX
+ stdbuf -o0 -e0 printf "MP_PRIO TX \t"
+ count=$(ip netns exec "$ns2" nstat -as | grep MPTcpExtMPPrioTx | awk '{print $2}')
+ [ -z "$count" ] && count=0
+ if [ $count != 1 ]; then
+ stdbuf -o0 -e0 printf "[FAIL]\n"
+ exit 1
+ else
+ stdbuf -o0 -e0 printf "[OK]\n"
+ fi
+
+ # Check RX
+ stdbuf -o0 -e0 printf "MP_PRIO RX \t"
+ count=$(ip netns exec "$ns1" nstat -as | grep MPTcpExtMPPrioRx | awk '{print $2}')
+ [ -z "$count" ] && count=0
+ if [ $count != 1 ]; then
+ stdbuf -o0 -e0 printf "[FAIL]\n"
+ exit 1
+ else
+ stdbuf -o0 -e0 printf "[OK]\n"
+ fi
+}
+
make_connection
make_connection "v6"
test_announce
test_remove
test_subflows
+test_prio
exit 0
diff --git a/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh
new file mode 100755
index 000000000000..28a775654b92
--- /dev/null
+++ b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh
@@ -0,0 +1,879 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# author: Andrea Mayer <andrea.mayer@uniroma2.it>
+#
+# This script is designed for testing the SRv6 H.Encaps.Red behavior.
+#
+# Below is depicted the IPv6 network of an operator which offers advanced
+# IPv4/IPv6 VPN services to hosts, enabling them to communicate with each
+# other.
+# In this example, hosts hs-1 and hs-2 are connected through an IPv4/IPv6 VPN
+# service, while hs-3 and hs-4 are connected using an IPv6 only VPN.
+#
+# Routers rt-1,rt-2,rt-3 and rt-4 implement IPv4/IPv6 L3 VPN services
+# leveraging the SRv6 architecture. The key components for such VPNs are:
+#
+# i) The SRv6 H.Encaps.Red behavior applies SRv6 Policies on traffic received
+# by connected hosts, initiating the VPN tunnel. Such a behavior is an
+# optimization of the SRv6 H.Encap aiming to reduce the length of the SID
+# List carried in the pushed SRH. Specifically, the H.Encaps.Red removes
+# the first SID contained in the SID List (i.e. SRv6 Policy) by storing it
+# into the IPv6 Destination Address. When a SRv6 Policy is made of only one
+# SID, the SRv6 H.Encaps.Red behavior omits the SRH at all and pushes that
+# SID directly into the IPv6 DA;
+#
+# ii) The SRv6 End behavior advances the active SID in the SID List carried by
+# the SRH;
+#
+# iii) The SRv6 End.DT46 behavior is used for removing the SRv6 Policy and,
+# thus, it terminates the VPN tunnel. Such a behavior is capable of
+# handling, at the same time, both tunneled IPv4 and IPv6 traffic.
+#
+#
+# cafe::1 cafe::2
+# 10.0.0.1 10.0.0.2
+# +--------+ +--------+
+# | | | |
+# | hs-1 | | hs-2 |
+# | | | |
+# +---+----+ +--- +---+
+# cafe::/64 | | cafe::/64
+# 10.0.0.0/24 | | 10.0.0.0/24
+# +---+----+ +----+---+
+# | | fcf0:0:1:2::/64 | |
+# | rt-1 +-------------------+ rt-2 |
+# | | | |
+# +---+----+ +----+---+
+# | . . |
+# | fcf0:0:1:3::/64 . |
+# | . . |
+# | . . |
+# fcf0:0:1:4::/64 | . | fcf0:0:2:3::/64
+# | . . |
+# | . . |
+# | fcf0:0:2:4::/64 . |
+# | . . |
+# +---+----+ +----+---+
+# | | | |
+# | rt-4 +-------------------+ rt-3 |
+# | | fcf0:0:3:4::/64 | |
+# +---+----+ +----+---+
+# cafe::/64 | | cafe::/64
+# 10.0.0.0/24 | | 10.0.0.0/24
+# +---+----+ +--- +---+
+# | | | |
+# | hs-4 | | hs-3 |
+# | | | |
+# +--------+ +--------+
+# cafe::4 cafe::3
+# 10.0.0.4 10.0.0.3
+#
+#
+# Every fcf0:0:x:y::/64 network interconnects the SRv6 routers rt-x with rt-y
+# in the IPv6 operator network.
+#
+# Local SID table
+# ===============
+#
+# Each SRv6 router is configured with a Local SID table in which SIDs are
+# stored. Considering the given SRv6 router rt-x, at least two SIDs are
+# configured in the Local SID table:
+#
+# Local SID table for SRv6 router rt-x
+# +----------------------------------------------------------+
+# |fcff:x::e is associated with the SRv6 End behavior |
+# |fcff:x::d46 is associated with the SRv6 End.DT46 behavior |
+# +----------------------------------------------------------+
+#
+# The fcff::/16 prefix is reserved by the operator for implementing SRv6 VPN
+# services. Reachability of SIDs is ensured by proper configuration of the IPv6
+# operator's network and SRv6 routers.
+#
+# # SRv6 Policies
+# ===============
+#
+# An SRv6 ingress router applies SRv6 policies to the traffic received from a
+# connected host. SRv6 policy enforcement consists of encapsulating the
+# received traffic into a new IPv6 packet with a given SID List contained in
+# the SRH.
+#
+# IPv4/IPv6 VPN between hs-1 and hs-2
+# -----------------------------------
+#
+# Hosts hs-1 and hs-2 are connected using dedicated IPv4/IPv6 VPNs.
+# Specifically, packets generated from hs-1 and directed towards hs-2 are
+# handled by rt-1 which applies the following SRv6 Policies:
+#
+# i.a) IPv6 traffic, SID List=fcff:3::e,fcff:4::e,fcff:2::d46
+# ii.a) IPv4 traffic, SID List=fcff:2::d46
+#
+# Policy (i.a) steers tunneled IPv6 traffic through SRv6 routers
+# rt-3,rt-4,rt-2. Instead, Policy (ii.a) steers tunneled IPv4 traffic through
+# rt-2.
+# The H.Encaps.Red reduces the SID List (i.a) carried in SRH by removing the
+# first SID (fcff:3::e) and pushing it into the IPv6 DA. In case of IPv4
+# traffic, the H.Encaps.Red omits the presence of SRH at all, since the SID
+# List (ii.a) consists of only one SID that can be stored directly in the IPv6
+# DA.
+#
+# On the reverse path (i.e. from hs-2 to hs-1), rt-2 applies the following
+# policies:
+#
+# i.b) IPv6 traffic, SID List=fcff:1::d46
+# ii.b) IPv4 traffic, SID List=fcff:4::e,fcff:3::e,fcff:1::d46
+#
+# Policy (i.b) steers tunneled IPv6 traffic through the SRv6 router rt-1.
+# Conversely, Policy (ii.b) steers tunneled IPv4 traffic through SRv6 routers
+# rt-4,rt-3,rt-1.
+# The H.Encaps.Red omits the SRH at all in case of (i.b) by pushing the single
+# SID (fcff::1::d46) inside the IPv6 DA.
+# The H.Encaps.Red reduces the SID List (ii.b) in the SRH by removing the first
+# SID (fcff:4::e) and pushing it into the IPv6 DA.
+#
+# In summary:
+# hs-1->hs-2 |IPv6 DA=fcff:3::e|SRH SIDs=fcff:4::e,fcff:2::d46|IPv6|...| (i.a)
+# hs-1->hs-2 |IPv6 DA=fcff:2::d46|IPv4|...| (ii.a)
+#
+# hs-2->hs-1 |IPv6 DA=fcff:1::d46|IPv6|...| (i.b)
+# hs-2->hs-1 |IPv6 DA=fcff:4::e|SRH SIDs=fcff:3::e,fcff:1::d46|IPv4|...| (ii.b)
+#
+#
+# IPv6 VPN between hs-3 and hs-4
+# ------------------------------
+#
+# Hosts hs-3 and hs-4 are connected using a dedicated IPv6 only VPN.
+# Specifically, packets generated from hs-3 and directed towards hs-4 are
+# handled by rt-3 which applies the following SRv6 Policy:
+#
+# i.c) IPv6 traffic, SID List=fcff:2::e,fcff:4::d46
+#
+# Policy (i.c) steers tunneled IPv6 traffic through SRv6 routers rt-2,rt-4.
+# The H.Encaps.Red reduces the SID List (i.c) carried in SRH by pushing the
+# first SID (fcff:2::e) in the IPv6 DA.
+#
+# On the reverse path (i.e. from hs-4 to hs-3) the router rt-4 applies the
+# following SRv6 Policy:
+#
+# i.d) IPv6 traffic, SID List=fcff:1::e,fcff:3::d46.
+#
+# Policy (i.d) steers tunneled IPv6 traffic through SRv6 routers rt-1,rt-3.
+# The H.Encaps.Red reduces the SID List (i.d) carried in SRH by pushing the
+# first SID (fcff:1::e) in the IPv6 DA.
+#
+# In summary:
+# hs-3->hs-4 |IPv6 DA=fcff:2::e|SRH SIDs=fcff:4::d46|IPv6|...| (i.c)
+# hs-4->hs-3 |IPv6 DA=fcff:1::e|SRH SIDs=fcff:3::d46|IPv6|...| (i.d)
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+readonly ksft_skip=4
+
+readonly RDMSUFF="$(mktemp -u XXXXXXXX)"
+readonly VRF_TID=100
+readonly VRF_DEVNAME="vrf-${VRF_TID}"
+readonly RT2HS_DEVNAME="veth-t${VRF_TID}"
+readonly LOCALSID_TABLE_ID=90
+readonly IPv6_RT_NETWORK=fcf0:0
+readonly IPv6_HS_NETWORK=cafe
+readonly IPv4_HS_NETWORK=10.0.0
+readonly VPN_LOCATOR_SERVICE=fcff
+readonly END_FUNC=000e
+readonly DT46_FUNC=0d46
+
+PING_TIMEOUT_SEC=4
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+# IDs of routers and hosts are initialized during the setup of the testing
+# network
+ROUTERS=''
+HOSTS=''
+
+SETUP_ERR=1
+
+ret=${ksft_skip}
+nsuccess=0
+nfail=0
+
+log_test()
+{
+ local rc="$1"
+ local expected="$2"
+ local msg="$3"
+
+ if [ "${rc}" -eq "${expected}" ]; then
+ nsuccess=$((nsuccess+1))
+ printf "\n TEST: %-60s [ OK ]\n" "${msg}"
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "\n TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+}
+
+print_log_test_results()
+{
+ printf "\nTests passed: %3d\n" "${nsuccess}"
+ printf "Tests failed: %3d\n" "${nfail}"
+
+ # when a test fails, the value of 'ret' is set to 1 (error code).
+ # Conversely, when all tests are passed successfully, the 'ret' value
+ # is set to 0 (success code).
+ if [ "${ret}" -ne 1 ]; then
+ ret=0
+ fi
+}
+
+log_section()
+{
+ echo
+ echo "################################################################################"
+ echo "TEST SECTION: $*"
+ echo "################################################################################"
+}
+
+test_command_or_ksft_skip()
+{
+ local cmd="$1"
+
+ if [ ! -x "$(command -v "${cmd}")" ]; then
+ echo "SKIP: Could not run test without \"${cmd}\" tool";
+ exit "${ksft_skip}"
+ fi
+}
+
+get_nodename()
+{
+ local name="$1"
+
+ echo "${name}-${RDMSUFF}"
+}
+
+get_rtname()
+{
+ local rtid="$1"
+
+ get_nodename "rt-${rtid}"
+}
+
+get_hsname()
+{
+ local hsid="$1"
+
+ get_nodename "hs-${hsid}"
+}
+
+__create_namespace()
+{
+ local name="$1"
+
+ ip netns add "${name}"
+}
+
+create_router()
+{
+ local rtid="$1"
+ local nsname
+
+ nsname="$(get_rtname "${rtid}")"
+
+ __create_namespace "${nsname}"
+}
+
+create_host()
+{
+ local hsid="$1"
+ local nsname
+
+ nsname="$(get_hsname "${hsid}")"
+
+ __create_namespace "${nsname}"
+}
+
+cleanup()
+{
+ local nsname
+ local i
+
+ # destroy routers
+ for i in ${ROUTERS}; do
+ nsname="$(get_rtname "${i}")"
+
+ ip netns del "${nsname}" &>/dev/null || true
+ done
+
+ # destroy hosts
+ for i in ${HOSTS}; do
+ nsname="$(get_hsname "${i}")"
+
+ ip netns del "${nsname}" &>/dev/null || true
+ done
+
+ # check whether the setup phase was completed successfully or not. In
+ # case of an error during the setup phase of the testing environment,
+ # the selftest is considered as "skipped".
+ if [ "${SETUP_ERR}" -ne 0 ]; then
+ echo "SKIP: Setting up the testing environment failed"
+ exit "${ksft_skip}"
+ fi
+
+ exit "${ret}"
+}
+
+add_link_rt_pairs()
+{
+ local rt="$1"
+ local rt_neighs="$2"
+ local neigh
+ local nsname
+ local neigh_nsname
+
+ nsname="$(get_rtname "${rt}")"
+
+ for neigh in ${rt_neighs}; do
+ neigh_nsname="$(get_rtname "${neigh}")"
+
+ ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \
+ type veth peer name "veth-rt-${neigh}-${rt}" \
+ netns "${neigh_nsname}"
+ done
+}
+
+get_network_prefix()
+{
+ local rt="$1"
+ local neigh="$2"
+ local p="${rt}"
+ local q="${neigh}"
+
+ if [ "${p}" -gt "${q}" ]; then
+ p="${q}"; q="${rt}"
+ fi
+
+ echo "${IPv6_RT_NETWORK}:${p}:${q}"
+}
+
+# Setup the basic networking for the routers
+setup_rt_networking()
+{
+ local rt="$1"
+ local rt_neighs="$2"
+ local nsname
+ local net_prefix
+ local devname
+ local neigh
+
+ nsname="$(get_rtname "${rt}")"
+
+ for neigh in ${rt_neighs}; do
+ devname="veth-rt-${rt}-${neigh}"
+
+ net_prefix="$(get_network_prefix "${rt}" "${neigh}")"
+
+ ip -netns "${nsname}" addr \
+ add "${net_prefix}::${rt}/64" dev "${devname}" nodad
+
+ ip -netns "${nsname}" link set "${devname}" up
+ done
+
+ ip -netns "${nsname}" link set lo up
+
+ ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
+ ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1
+
+ ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0
+ ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0
+ ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1
+}
+
+# Setup local SIDs for an SRv6 router
+setup_rt_local_sids()
+{
+ local rt="$1"
+ local rt_neighs="$2"
+ local net_prefix
+ local devname
+ local nsname
+ local neigh
+
+ nsname="$(get_rtname "${rt}")"
+
+ for neigh in ${rt_neighs}; do
+ devname="veth-rt-${rt}-${neigh}"
+
+ net_prefix="$(get_network_prefix "${rt}" "${neigh}")"
+
+ # set underlay network routes for SIDs reachability
+ ip -netns "${nsname}" -6 route \
+ add "${VPN_LOCATOR_SERVICE}:${neigh}::/32" \
+ table "${LOCALSID_TABLE_ID}" \
+ via "${net_prefix}::${neigh}" dev "${devname}"
+ done
+
+ # Local End behavior (note that "dev" is dummy and the VRF is chosen
+ # for the sake of simplicity).
+ ip -netns "${nsname}" -6 route \
+ add "${VPN_LOCATOR_SERVICE}:${rt}::${END_FUNC}" \
+ table "${LOCALSID_TABLE_ID}" \
+ encap seg6local action End dev "${VRF_DEVNAME}"
+
+ # Local End.DT46 behavior
+ ip -netns "${nsname}" -6 route \
+ add "${VPN_LOCATOR_SERVICE}:${rt}::${DT46_FUNC}" \
+ table "${LOCALSID_TABLE_ID}" \
+ encap seg6local action End.DT46 vrftable "${VRF_TID}" \
+ dev "${VRF_DEVNAME}"
+
+ # all SIDs for VPNs start with a common locator. Routes and SRv6
+ # Endpoint behavior instaces are grouped together in the 'localsid'
+ # table.
+ ip -netns "${nsname}" -6 rule \
+ add to "${VPN_LOCATOR_SERVICE}::/16" \
+ lookup "${LOCALSID_TABLE_ID}" prio 999
+
+ # set default routes to unreachable for both ipv4 and ipv6
+ ip -netns "${nsname}" -6 route \
+ add unreachable default metric 4278198272 \
+ vrf "${VRF_DEVNAME}"
+
+ ip -netns "${nsname}" -4 route \
+ add unreachable default metric 4278198272 \
+ vrf "${VRF_DEVNAME}"
+}
+
+# build and install the SRv6 policy into the ingress SRv6 router.
+# args:
+# $1 - destination host (i.e. cafe::x host)
+# $2 - SRv6 router configured for enforcing the SRv6 Policy
+# $3 - SRv6 routers configured for steering traffic (End behaviors)
+# $4 - SRv6 router configured for removing the SRv6 Policy (router connected
+# to the destination host)
+# $5 - encap mode (full or red)
+# $6 - traffic type (IPv6 or IPv4)
+__setup_rt_policy()
+{
+ local dst="$1"
+ local encap_rt="$2"
+ local end_rts="$3"
+ local dec_rt="$4"
+ local mode="$5"
+ local traffic="$6"
+ local nsname
+ local policy=''
+ local n
+
+ nsname="$(get_rtname "${encap_rt}")"
+
+ for n in ${end_rts}; do
+ policy="${policy}${VPN_LOCATOR_SERVICE}:${n}::${END_FUNC},"
+ done
+
+ policy="${policy}${VPN_LOCATOR_SERVICE}:${dec_rt}::${DT46_FUNC}"
+
+ # add SRv6 policy to incoming traffic sent by connected hosts
+ if [ "${traffic}" -eq 6 ]; then
+ ip -netns "${nsname}" -6 route \
+ add "${IPv6_HS_NETWORK}::${dst}" vrf "${VRF_DEVNAME}" \
+ encap seg6 mode "${mode}" segs "${policy}" \
+ dev "${VRF_DEVNAME}"
+
+ ip -netns "${nsname}" -6 neigh \
+ add proxy "${IPv6_HS_NETWORK}::${dst}" \
+ dev "${RT2HS_DEVNAME}"
+ else
+ # "dev" must be different from the one where the packet is
+ # received, otherwise the proxy arp does not work.
+ ip -netns "${nsname}" -4 route \
+ add "${IPv4_HS_NETWORK}.${dst}" vrf "${VRF_DEVNAME}" \
+ encap seg6 mode "${mode}" segs "${policy}" \
+ dev "${VRF_DEVNAME}"
+ fi
+}
+
+# see __setup_rt_policy
+setup_rt_policy_ipv6()
+{
+ __setup_rt_policy "$1" "$2" "$3" "$4" "$5" 6
+}
+
+#see __setup_rt_policy
+setup_rt_policy_ipv4()
+{
+ __setup_rt_policy "$1" "$2" "$3" "$4" "$5" 4
+}
+
+setup_hs()
+{
+ local hs="$1"
+ local rt="$2"
+ local hsname
+ local rtname
+
+ hsname="$(get_hsname "${hs}")"
+ rtname="$(get_rtname "${rt}")"
+
+ ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+ ip -netns "${hsname}" link add veth0 type veth \
+ peer name "${RT2HS_DEVNAME}" netns "${rtname}"
+
+ ip -netns "${hsname}" addr \
+ add "${IPv6_HS_NETWORK}::${hs}/64" dev veth0 nodad
+ ip -netns "${hsname}" addr add "${IPv4_HS_NETWORK}.${hs}/24" dev veth0
+
+ ip -netns "${hsname}" link set veth0 up
+ ip -netns "${hsname}" link set lo up
+
+ # configure the VRF on the router which is directly connected to the
+ # source host.
+ ip -netns "${rtname}" link \
+ add "${VRF_DEVNAME}" type vrf table "${VRF_TID}"
+ ip -netns "${rtname}" link set "${VRF_DEVNAME}" up
+
+ # enslave the veth interface connecting the router with the host to the
+ # VRF in the access router
+ ip -netns "${rtname}" link \
+ set "${RT2HS_DEVNAME}" master "${VRF_DEVNAME}"
+
+ ip -netns "${rtname}" addr \
+ add "${IPv6_HS_NETWORK}::254/64" dev "${RT2HS_DEVNAME}" nodad
+ ip -netns "${rtname}" addr \
+ add "${IPv4_HS_NETWORK}.254/24" dev "${RT2HS_DEVNAME}"
+
+ ip -netns "${rtname}" link set "${RT2HS_DEVNAME}" up
+
+ ip netns exec "${rtname}" \
+ sysctl -wq net.ipv6.conf."${RT2HS_DEVNAME}".proxy_ndp=1
+ ip netns exec "${rtname}" \
+ sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".proxy_arp=1
+
+ # disable the rp_filter otherwise the kernel gets confused about how
+ # to route decap ipv4 packets.
+ ip netns exec "${rtname}" \
+ sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0
+
+ ip netns exec "${rtname}" sh -c "echo 1 > /proc/sys/net/vrf/strict_mode"
+}
+
+setup()
+{
+ local i
+
+ # create routers
+ ROUTERS="1 2 3 4"; readonly ROUTERS
+ for i in ${ROUTERS}; do
+ create_router "${i}"
+ done
+
+ # create hosts
+ HOSTS="1 2 3 4"; readonly HOSTS
+ for i in ${HOSTS}; do
+ create_host "${i}"
+ done
+
+ # set up the links for connecting routers
+ add_link_rt_pairs 1 "2 3 4"
+ add_link_rt_pairs 2 "3 4"
+ add_link_rt_pairs 3 "4"
+
+ # set up the basic connectivity of routers and routes required for
+ # reachability of SIDs.
+ setup_rt_networking 1 "2 3 4"
+ setup_rt_networking 2 "1 3 4"
+ setup_rt_networking 3 "1 2 4"
+ setup_rt_networking 4 "1 2 3"
+
+ # set up the hosts connected to routers
+ setup_hs 1 1
+ setup_hs 2 2
+ setup_hs 3 3
+ setup_hs 4 4
+
+ # set up default SRv6 Endpoints (i.e. SRv6 End and SRv6 End.DT46)
+ setup_rt_local_sids 1 "2 3 4"
+ setup_rt_local_sids 2 "1 3 4"
+ setup_rt_local_sids 3 "1 2 4"
+ setup_rt_local_sids 4 "1 2 3"
+
+ # set up SRv6 policies
+
+ # create an IPv6 VPN between hosts hs-1 and hs-2.
+ # the network path between hs-1 and hs-2 traverses several routers
+ # depending on the direction of traffic.
+ #
+ # Direction hs-1 -> hs-2 (H.Encaps.Red)
+ # - rt-3,rt-4 (SRv6 End behaviors)
+ # - rt-2 (SRv6 End.DT46 behavior)
+ #
+ # Direction hs-2 -> hs-1 (H.Encaps.Red)
+ # - rt-1 (SRv6 End.DT46 behavior)
+ setup_rt_policy_ipv6 2 1 "3 4" 2 encap.red
+ setup_rt_policy_ipv6 1 2 "" 1 encap.red
+
+ # create an IPv4 VPN between hosts hs-1 and hs-2
+ # the network path between hs-1 and hs-2 traverses several routers
+ # depending on the direction of traffic.
+ #
+ # Direction hs-1 -> hs-2 (H.Encaps.Red)
+ # - rt-2 (SRv6 End.DT46 behavior)
+ #
+ # Direction hs-2 -> hs-1 (H.Encaps.Red)
+ # - rt-4,rt-3 (SRv6 End behaviors)
+ # - rt-1 (SRv6 End.DT46 behavior)
+ setup_rt_policy_ipv4 2 1 "" 2 encap.red
+ setup_rt_policy_ipv4 1 2 "4 3" 1 encap.red
+
+ # create an IPv6 VPN between hosts hs-3 and hs-4
+ # the network path between hs-3 and hs-4 traverses several routers
+ # depending on the direction of traffic.
+ #
+ # Direction hs-3 -> hs-4 (H.Encaps.Red)
+ # - rt-2 (SRv6 End Behavior)
+ # - rt-4 (SRv6 End.DT46 behavior)
+ #
+ # Direction hs-4 -> hs-3 (H.Encaps.Red)
+ # - rt-1 (SRv6 End behavior)
+ # - rt-3 (SRv6 End.DT46 behavior)
+ setup_rt_policy_ipv6 4 3 "2" 4 encap.red
+ setup_rt_policy_ipv6 3 4 "1" 3 encap.red
+
+ # testing environment was set up successfully
+ SETUP_ERR=0
+}
+
+check_rt_connectivity()
+{
+ local rtsrc="$1"
+ local rtdst="$2"
+ local prefix
+ local rtsrc_nsname
+
+ rtsrc_nsname="$(get_rtname "${rtsrc}")"
+
+ prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")"
+
+ ip netns exec "${rtsrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
+ "${prefix}::${rtdst}" >/dev/null 2>&1
+}
+
+check_and_log_rt_connectivity()
+{
+ local rtsrc="$1"
+ local rtdst="$2"
+
+ check_rt_connectivity "${rtsrc}" "${rtdst}"
+ log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}"
+}
+
+check_hs_ipv6_connectivity()
+{
+ local hssrc="$1"
+ local hsdst="$2"
+ local hssrc_nsname
+
+ hssrc_nsname="$(get_hsname "${hssrc}")"
+
+ ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
+ "${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1
+}
+
+check_hs_ipv4_connectivity()
+{
+ local hssrc="$1"
+ local hsdst="$2"
+ local hssrc_nsname
+
+ hssrc_nsname="$(get_hsname "${hssrc}")"
+
+ ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
+ "${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1
+}
+
+check_and_log_hs2gw_connectivity()
+{
+ local hssrc="$1"
+
+ check_hs_ipv6_connectivity "${hssrc}" 254
+ log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> gw"
+
+ check_hs_ipv4_connectivity "${hssrc}" 254
+ log_test $? 0 "IPv4 Hosts connectivity: hs-${hssrc} -> gw"
+}
+
+check_and_log_hs_ipv6_connectivity()
+{
+ local hssrc="$1"
+ local hsdst="$2"
+
+ check_hs_ipv6_connectivity "${hssrc}" "${hsdst}"
+ log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}"
+}
+
+check_and_log_hs_ipv4_connectivity()
+{
+ local hssrc="$1"
+ local hsdst="$2"
+
+ check_hs_ipv4_connectivity "${hssrc}" "${hsdst}"
+ log_test $? 0 "IPv4 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}"
+}
+
+check_and_log_hs_connectivity()
+{
+ local hssrc="$1"
+ local hsdst="$2"
+
+ check_and_log_hs_ipv4_connectivity "${hssrc}" "${hsdst}"
+ check_and_log_hs_ipv6_connectivity "${hssrc}" "${hsdst}"
+}
+
+check_and_log_hs_ipv6_isolation()
+{
+ local hssrc="$1"
+ local hsdst="$2"
+
+ # in this case, the connectivity test must fail
+ check_hs_ipv6_connectivity "${hssrc}" "${hsdst}"
+ log_test $? 1 "IPv6 Hosts isolation: hs-${hssrc} -X-> hs-${hsdst}"
+}
+
+check_and_log_hs_ipv4_isolation()
+{
+ local hssrc="$1"
+ local hsdst="$2"
+
+ # in this case, the connectivity test must fail
+ check_hs_ipv4_connectivity "${hssrc}" "${hsdst}"
+ log_test $? 1 "IPv4 Hosts isolation: hs-${hssrc} -X-> hs-${hsdst}"
+}
+
+check_and_log_hs_isolation()
+{
+ local hssrc="$1"
+ local hsdst="$2"
+
+ check_and_log_hs_ipv6_isolation "${hssrc}" "${hsdst}"
+ check_and_log_hs_ipv4_isolation "${hssrc}" "${hsdst}"
+}
+
+router_tests()
+{
+ local i
+ local j
+
+ log_section "IPv6 routers connectivity test"
+
+ for i in ${ROUTERS}; do
+ for j in ${ROUTERS}; do
+ if [ "${i}" -eq "${j}" ]; then
+ continue
+ fi
+
+ check_and_log_rt_connectivity "${i}" "${j}"
+ done
+ done
+}
+
+host2gateway_tests()
+{
+ local hs
+
+ log_section "IPv4/IPv6 connectivity test among hosts and gateways"
+
+ for hs in ${HOSTS}; do
+ check_and_log_hs2gw_connectivity "${hs}"
+ done
+}
+
+host_vpn_tests()
+{
+ log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv4/IPv6)"
+
+ check_and_log_hs_connectivity 1 2
+ check_and_log_hs_connectivity 2 1
+
+ log_section "SRv6 VPN connectivity test hosts (h3 <-> h4, IPv6 only)"
+
+ check_and_log_hs_ipv6_connectivity 3 4
+ check_and_log_hs_ipv6_connectivity 4 3
+}
+
+host_vpn_isolation_tests()
+{
+ local l1="1 2"
+ local l2="3 4"
+ local tmp
+ local i
+ local j
+ local k
+
+ log_section "SRv6 VPN isolation test among hosts"
+
+ for k in 0 1; do
+ for i in ${l1}; do
+ for j in ${l2}; do
+ check_and_log_hs_isolation "${i}" "${j}"
+ done
+ done
+
+ # let us test the reverse path
+ tmp="${l1}"; l1="${l2}"; l2="${tmp}"
+ done
+
+ log_section "SRv6 VPN isolation test among hosts (h2 <-> h4, IPv4 only)"
+
+ check_and_log_hs_ipv4_isolation 2 4
+ check_and_log_hs_ipv4_isolation 4 2
+}
+
+test_iproute2_supp_or_ksft_skip()
+{
+ if ! ip route help 2>&1 | grep -qo "encap.red"; then
+ echo "SKIP: Missing SRv6 encap.red support in iproute2"
+ exit "${ksft_skip}"
+ fi
+}
+
+test_vrf_or_ksft_skip()
+{
+ modprobe vrf &>/dev/null || true
+ if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
+ echo "SKIP: vrf sysctl does not exist"
+ exit "${ksft_skip}"
+ fi
+}
+
+if [ "$(id -u)" -ne 0 ]; then
+ echo "SKIP: Need root privileges"
+ exit "${ksft_skip}"
+fi
+
+# required programs to carry out this selftest
+test_command_or_ksft_skip ip
+test_command_or_ksft_skip ping
+test_command_or_ksft_skip sysctl
+test_command_or_ksft_skip grep
+
+test_iproute2_supp_or_ksft_skip
+test_vrf_or_ksft_skip
+
+set -e
+trap cleanup EXIT
+
+setup
+set +e
+
+router_tests
+host2gateway_tests
+host_vpn_tests
+host_vpn_isolation_tests
+
+print_log_test_results
diff --git a/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh
new file mode 100755
index 000000000000..cb4177d41b21
--- /dev/null
+++ b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh
@@ -0,0 +1,821 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# author: Andrea Mayer <andrea.mayer@uniroma2.it>
+#
+# This script is designed for testing the SRv6 H.L2Encaps.Red behavior.
+#
+# Below is depicted the IPv6 network of an operator which offers L2 VPN
+# services to hosts, enabling them to communicate with each other.
+# In this example, hosts hs-1 and hs-2 are connected through an L2 VPN service.
+# Currently, the SRv6 subsystem in Linux allows hosts hs-1 and hs-2 to exchange
+# full L2 frames as long as they carry IPv4/IPv6.
+#
+# Routers rt-1,rt-2,rt-3 and rt-4 implement L2 VPN services
+# leveraging the SRv6 architecture. The key components for such VPNs are:
+#
+# i) The SRv6 H.L2Encaps.Red behavior applies SRv6 Policies on traffic
+# received by connected hosts, initiating the VPN tunnel. Such a behavior
+# is an optimization of the SRv6 H.L2Encap aiming to reduce the
+# length of the SID List carried in the pushed SRH. Specifically, the
+# H.L2Encaps.Red removes the first SID contained in the SID List (i.e. SRv6
+# Policy) by storing it into the IPv6 Destination Address. When a SRv6
+# Policy is made of only one SID, the SRv6 H.L2Encaps.Red behavior omits
+# the SRH at all and pushes that SID directly into the IPv6 DA;
+#
+# ii) The SRv6 End behavior advances the active SID in the SID List
+# carried by the SRH;
+#
+# iii) The SRv6 End.DX2 behavior is used for removing the SRv6 Policy
+# and, thus, it terminates the VPN tunnel. The decapsulated L2 frame is
+# sent over the interface connected with the destination host.
+#
+# cafe::1 cafe::2
+# 10.0.0.1 10.0.0.2
+# +--------+ +--------+
+# | | | |
+# | hs-1 | | hs-2 |
+# | | | |
+# +---+----+ +--- +---+
+# cafe::/64 | | cafe::/64
+# 10.0.0.0/24 | | 10.0.0.0/24
+# +---+----+ +----+---+
+# | | fcf0:0:1:2::/64 | |
+# | rt-1 +-------------------+ rt-2 |
+# | | | |
+# +---+----+ +----+---+
+# | . . |
+# | fcf0:0:1:3::/64 . |
+# | . . |
+# | . . |
+# fcf0:0:1:4::/64 | . | fcf0:0:2:3::/64
+# | . . |
+# | . . |
+# | fcf0:0:2:4::/64 . |
+# | . . |
+# +---+----+ +----+---+
+# | | | |
+# | rt-4 +-------------------+ rt-3 |
+# | | fcf0:0:3:4::/64 | |
+# +---+----+ +----+---+
+#
+#
+# Every fcf0:0:x:y::/64 network interconnects the SRv6 routers rt-x with rt-y
+# in the IPv6 operator network.
+#
+# Local SID table
+# ===============
+#
+# Each SRv6 router is configured with a Local SID table in which SIDs are
+# stored. Considering the given SRv6 router rt-x, at least two SIDs are
+# configured in the Local SID table:
+#
+# Local SID table for SRv6 router rt-x
+# +----------------------------------------------------------+
+# |fcff:x::e is associated with the SRv6 End behavior |
+# |fcff:x::d2 is associated with the SRv6 End.DX2 behavior |
+# +----------------------------------------------------------+
+#
+# The fcff::/16 prefix is reserved by the operator for implementing SRv6 VPN
+# services. Reachability of SIDs is ensured by proper configuration of the IPv6
+# operator's network and SRv6 routers.
+#
+# SRv6 Policies
+# =============
+#
+# An SRv6 ingress router applies SRv6 policies to the traffic received from a
+# connected host. SRv6 policy enforcement consists of encapsulating the
+# received traffic into a new IPv6 packet with a given SID List contained in
+# the SRH.
+#
+# L2 VPN between hs-1 and hs-2
+# ----------------------------
+#
+# Hosts hs-1 and hs-2 are connected using a dedicated L2 VPN.
+# Specifically, packets generated from hs-1 and directed towards hs-2 are
+# handled by rt-1 which applies the following SRv6 Policies:
+#
+# i.a) L2 traffic, SID List=fcff:2::d2
+#
+# Policy (i.a) steers tunneled L2 traffic through SRv6 router rt-2.
+# The H.L2Encaps.Red omits the presence of SRH at all, since the SID List
+# consists of only one SID (fcff:2::d2) that can be stored directly in the IPv6
+# DA.
+#
+# On the reverse path (i.e. from hs-2 to hs-1), rt-2 applies the following
+# policies:
+#
+# i.b) L2 traffic, SID List=fcff:4::e,fcff:3::e,fcff:1::d2
+#
+# Policy (i.b) steers tunneled L2 traffic through the SRv6 routers
+# rt-4,rt-3,rt2. The H.L2Encaps.Red reduces the SID List in the SRH by removing
+# the first SID (fcff:4::e) and pushing it into the IPv6 DA.
+#
+# In summary:
+# hs-1->hs-2 |IPv6 DA=fcff:2::d2|eth|...| (i.a)
+# hs-2->hs-1 |IPv6 DA=fcff:4::e|SRH SIDs=fcff:3::e,fcff:1::d2|eth|...| (i.b)
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+readonly ksft_skip=4
+
+readonly RDMSUFF="$(mktemp -u XXXXXXXX)"
+readonly DUMMY_DEVNAME="dum0"
+readonly RT2HS_DEVNAME="veth-hs"
+readonly HS_VETH_NAME="veth0"
+readonly LOCALSID_TABLE_ID=90
+readonly IPv6_RT_NETWORK=fcf0:0
+readonly IPv6_HS_NETWORK=cafe
+readonly IPv4_HS_NETWORK=10.0.0
+readonly VPN_LOCATOR_SERVICE=fcff
+readonly MAC_PREFIX=00:00:00:c0:01
+readonly END_FUNC=000e
+readonly DX2_FUNC=00d2
+
+PING_TIMEOUT_SEC=4
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+# IDs of routers and hosts are initialized during the setup of the testing
+# network
+ROUTERS=''
+HOSTS=''
+
+SETUP_ERR=1
+
+ret=${ksft_skip}
+nsuccess=0
+nfail=0
+
+log_test()
+{
+ local rc="$1"
+ local expected="$2"
+ local msg="$3"
+
+ if [ "${rc}" -eq "${expected}" ]; then
+ nsuccess=$((nsuccess+1))
+ printf "\n TEST: %-60s [ OK ]\n" "${msg}"
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "\n TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+}
+
+print_log_test_results()
+{
+ printf "\nTests passed: %3d\n" "${nsuccess}"
+ printf "Tests failed: %3d\n" "${nfail}"
+
+ # when a test fails, the value of 'ret' is set to 1 (error code).
+ # Conversely, when all tests are passed successfully, the 'ret' value
+ # is set to 0 (success code).
+ if [ "${ret}" -ne 1 ]; then
+ ret=0
+ fi
+}
+
+log_section()
+{
+ echo
+ echo "################################################################################"
+ echo "TEST SECTION: $*"
+ echo "################################################################################"
+}
+
+test_command_or_ksft_skip()
+{
+ local cmd="$1"
+
+ if [ ! -x "$(command -v "${cmd}")" ]; then
+ echo "SKIP: Could not run test without \"${cmd}\" tool";
+ exit "${ksft_skip}"
+ fi
+}
+
+get_nodename()
+{
+ local name="$1"
+
+ echo "${name}-${RDMSUFF}"
+}
+
+get_rtname()
+{
+ local rtid="$1"
+
+ get_nodename "rt-${rtid}"
+}
+
+get_hsname()
+{
+ local hsid="$1"
+
+ get_nodename "hs-${hsid}"
+}
+
+__create_namespace()
+{
+ local name="$1"
+
+ ip netns add "${name}"
+}
+
+create_router()
+{
+ local rtid="$1"
+ local nsname
+
+ nsname="$(get_rtname "${rtid}")"
+
+ __create_namespace "${nsname}"
+}
+
+create_host()
+{
+ local hsid="$1"
+ local nsname
+
+ nsname="$(get_hsname "${hsid}")"
+
+ __create_namespace "${nsname}"
+}
+
+cleanup()
+{
+ local nsname
+ local i
+
+ # destroy routers
+ for i in ${ROUTERS}; do
+ nsname="$(get_rtname "${i}")"
+
+ ip netns del "${nsname}" &>/dev/null || true
+ done
+
+ # destroy hosts
+ for i in ${HOSTS}; do
+ nsname="$(get_hsname "${i}")"
+
+ ip netns del "${nsname}" &>/dev/null || true
+ done
+
+ # check whether the setup phase was completed successfully or not. In
+ # case of an error during the setup phase of the testing environment,
+ # the selftest is considered as "skipped".
+ if [ "${SETUP_ERR}" -ne 0 ]; then
+ echo "SKIP: Setting up the testing environment failed"
+ exit "${ksft_skip}"
+ fi
+
+ exit "${ret}"
+}
+
+add_link_rt_pairs()
+{
+ local rt="$1"
+ local rt_neighs="$2"
+ local neigh
+ local nsname
+ local neigh_nsname
+
+ nsname="$(get_rtname "${rt}")"
+
+ for neigh in ${rt_neighs}; do
+ neigh_nsname="$(get_rtname "${neigh}")"
+
+ ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \
+ type veth peer name "veth-rt-${neigh}-${rt}" \
+ netns "${neigh_nsname}"
+ done
+}
+
+get_network_prefix()
+{
+ local rt="$1"
+ local neigh="$2"
+ local p="${rt}"
+ local q="${neigh}"
+
+ if [ "${p}" -gt "${q}" ]; then
+ p="${q}"; q="${rt}"
+ fi
+
+ echo "${IPv6_RT_NETWORK}:${p}:${q}"
+}
+
+# Setup the basic networking for the routers
+setup_rt_networking()
+{
+ local rt="$1"
+ local rt_neighs="$2"
+ local nsname
+ local net_prefix
+ local devname
+ local neigh
+
+ nsname="$(get_rtname "${rt}")"
+
+ for neigh in ${rt_neighs}; do
+ devname="veth-rt-${rt}-${neigh}"
+
+ net_prefix="$(get_network_prefix "${rt}" "${neigh}")"
+
+ ip -netns "${nsname}" addr \
+ add "${net_prefix}::${rt}/64" dev "${devname}" nodad
+
+ ip -netns "${nsname}" link set "${devname}" up
+ done
+
+ ip -netns "${nsname}" link add "${DUMMY_DEVNAME}" type dummy
+
+ ip -netns "${nsname}" link set "${DUMMY_DEVNAME}" up
+ ip -netns "${nsname}" link set lo up
+
+ ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
+ ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1
+
+ ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0
+ ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0
+ ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1
+}
+
+# Setup local SIDs for an SRv6 router
+setup_rt_local_sids()
+{
+ local rt="$1"
+ local rt_neighs="$2"
+ local net_prefix
+ local devname
+ local nsname
+ local neigh
+
+ nsname="$(get_rtname "${rt}")"
+
+ for neigh in ${rt_neighs}; do
+ devname="veth-rt-${rt}-${neigh}"
+
+ net_prefix="$(get_network_prefix "${rt}" "${neigh}")"
+
+ # set underlay network routes for SIDs reachability
+ ip -netns "${nsname}" -6 route \
+ add "${VPN_LOCATOR_SERVICE}:${neigh}::/32" \
+ table "${LOCALSID_TABLE_ID}" \
+ via "${net_prefix}::${neigh}" dev "${devname}"
+ done
+
+ # Local End behavior (note that dev "${DUMMY_DEVNAME}" is a dummy
+ # interface)
+ ip -netns "${nsname}" -6 route \
+ add "${VPN_LOCATOR_SERVICE}:${rt}::${END_FUNC}" \
+ table "${LOCALSID_TABLE_ID}" \
+ encap seg6local action End dev "${DUMMY_DEVNAME}"
+
+ # all SIDs for VPNs start with a common locator. Routes and SRv6
+ # Endpoint behaviors instaces are grouped together in the 'localsid'
+ # table.
+ ip -netns "${nsname}" -6 rule add \
+ to "${VPN_LOCATOR_SERVICE}::/16" \
+ lookup "${LOCALSID_TABLE_ID}" prio 999
+}
+
+# build and install the SRv6 policy into the ingress SRv6 router.
+# args:
+# $1 - destination host (i.e. cafe::x host)
+# $2 - SRv6 router configured for enforcing the SRv6 Policy
+# $3 - SRv6 routers configured for steering traffic (End behaviors)
+# $4 - SRv6 router configured for removing the SRv6 Policy (router connected
+# to the destination host)
+# $5 - encap mode (full or red)
+# $6 - traffic type (IPv6 or IPv4)
+__setup_rt_policy()
+{
+ local dst="$1"
+ local encap_rt="$2"
+ local end_rts="$3"
+ local dec_rt="$4"
+ local mode="$5"
+ local traffic="$6"
+ local nsname
+ local policy=''
+ local n
+
+ nsname="$(get_rtname "${encap_rt}")"
+
+ for n in ${end_rts}; do
+ policy="${policy}${VPN_LOCATOR_SERVICE}:${n}::${END_FUNC},"
+ done
+
+ policy="${policy}${VPN_LOCATOR_SERVICE}:${dec_rt}::${DX2_FUNC}"
+
+ # add SRv6 policy to incoming traffic sent by connected hosts
+ if [ "${traffic}" -eq 6 ]; then
+ ip -netns "${nsname}" -6 route \
+ add "${IPv6_HS_NETWORK}::${dst}" \
+ encap seg6 mode "${mode}" segs "${policy}" \
+ dev dum0
+ else
+ ip -netns "${nsname}" -4 route \
+ add "${IPv4_HS_NETWORK}.${dst}" \
+ encap seg6 mode "${mode}" segs "${policy}" \
+ dev dum0
+ fi
+}
+
+# see __setup_rt_policy
+setup_rt_policy_ipv6()
+{
+ __setup_rt_policy "$1" "$2" "$3" "$4" "$5" 6
+}
+
+#see __setup_rt_policy
+setup_rt_policy_ipv4()
+{
+ __setup_rt_policy "$1" "$2" "$3" "$4" "$5" 4
+}
+
+setup_decap()
+{
+ local rt="$1"
+ local nsname
+
+ nsname="$(get_rtname "${rt}")"
+
+ # Local End.DX2 behavior
+ ip -netns "${nsname}" -6 route \
+ add "${VPN_LOCATOR_SERVICE}:${rt}::${DX2_FUNC}" \
+ table "${LOCALSID_TABLE_ID}" \
+ encap seg6local action End.DX2 oif "${RT2HS_DEVNAME}" \
+ dev "${RT2HS_DEVNAME}"
+}
+
+setup_hs()
+{
+ local hs="$1"
+ local rt="$2"
+ local hsname
+ local rtname
+
+ hsname="$(get_hsname "${hs}")"
+ rtname="$(get_rtname "${rt}")"
+
+ ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+ ip -netns "${hsname}" link add "${HS_VETH_NAME}" type veth \
+ peer name "${RT2HS_DEVNAME}" netns "${rtname}"
+
+ ip -netns "${hsname}" addr add "${IPv6_HS_NETWORK}::${hs}/64" \
+ dev "${HS_VETH_NAME}" nodad
+ ip -netns "${hsname}" addr add "${IPv4_HS_NETWORK}.${hs}/24" \
+ dev "${HS_VETH_NAME}"
+
+ ip -netns "${hsname}" link set "${HS_VETH_NAME}" up
+ ip -netns "${hsname}" link set lo up
+
+ ip -netns "${rtname}" addr add "${IPv6_HS_NETWORK}::254/64" \
+ dev "${RT2HS_DEVNAME}" nodad
+ ip -netns "${rtname}" addr \
+ add "${IPv4_HS_NETWORK}.254/24" dev "${RT2HS_DEVNAME}"
+
+ ip -netns "${rtname}" link set "${RT2HS_DEVNAME}" up
+
+ # disable the rp_filter otherwise the kernel gets confused about how
+ # to route decap ipv4 packets.
+ ip netns exec "${rtname}" \
+ sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0
+}
+
+# set an auto-generated mac address
+# args:
+# $1 - name of the node (e.g.: hs-1, rt-3, etc)
+# $2 - id of the node (e.g.: 1 for hs-1, 3 for rt-3, etc)
+# $3 - host part of the IPv6 network address
+# $4 - name of the network interface to which the generated mac address must
+# be set.
+set_mac_address()
+{
+ local nodename="$1"
+ local nodeid="$2"
+ local host="$3"
+ local ifname="$4"
+ local nsname
+
+ nsname=$(get_nodename "${nodename}")
+
+ ip -netns "${nsname}" link set dev "${ifname}" down
+
+ ip -netns "${nsname}" link set address "${MAC_PREFIX}:${nodeid}" \
+ dev "${ifname}"
+
+ # the IPv6 address must be set once again after the MAC address has
+ # been changed.
+ ip -netns "${nsname}" addr add "${IPv6_HS_NETWORK}::${host}/64" \
+ dev "${ifname}" nodad
+
+ ip -netns "${nsname}" link set dev "${ifname}" up
+}
+
+set_host_l2peer()
+{
+ local hssrc="$1"
+ local hsdst="$2"
+ local ipprefix="$3"
+ local proto="$4"
+ local hssrc_name
+ local ipaddr
+
+ hssrc_name="$(get_hsname "${hssrc}")"
+
+ if [ "${proto}" -eq 6 ]; then
+ ipaddr="${ipprefix}::${hsdst}"
+ else
+ ipaddr="${ipprefix}.${hsdst}"
+ fi
+
+ ip -netns "${hssrc_name}" route add "${ipaddr}" dev "${HS_VETH_NAME}"
+
+ ip -netns "${hssrc_name}" neigh \
+ add "${ipaddr}" lladdr "${MAC_PREFIX}:${hsdst}" \
+ dev "${HS_VETH_NAME}"
+}
+
+# setup an SRv6 L2 VPN between host hs-x and hs-y (currently, the SRv6
+# subsystem only supports L2 frames whose layer-3 is IPv4/IPv6).
+# args:
+# $1 - source host
+# $2 - SRv6 routers configured for steering tunneled traffic
+# $3 - destination host
+setup_l2vpn()
+{
+ local hssrc="$1"
+ local end_rts="$2"
+ local hsdst="$3"
+ local rtsrc="${hssrc}"
+ local rtdst="${hsdst}"
+
+ # set fixed mac for source node and the neigh MAC address
+ set_mac_address "hs-${hssrc}" "${hssrc}" "${hssrc}" "${HS_VETH_NAME}"
+ set_host_l2peer "${hssrc}" "${hsdst}" "${IPv6_HS_NETWORK}" 6
+ set_host_l2peer "${hssrc}" "${hsdst}" "${IPv4_HS_NETWORK}" 4
+
+ # we have to set the mac address of the veth-host (on ingress router)
+ # to the mac address of the remote peer (L2 VPN destination host).
+ # Otherwise, traffic coming from the source host is dropped at the
+ # ingress router.
+ set_mac_address "rt-${rtsrc}" "${hsdst}" 254 "${RT2HS_DEVNAME}"
+
+ # set the SRv6 Policies at the ingress router
+ setup_rt_policy_ipv6 "${hsdst}" "${rtsrc}" "${end_rts}" "${rtdst}" \
+ l2encap.red 6
+ setup_rt_policy_ipv4 "${hsdst}" "${rtsrc}" "${end_rts}" "${rtdst}" \
+ l2encap.red 4
+
+ # set the decap behavior
+ setup_decap "${rtsrc}"
+}
+
+setup()
+{
+ local i
+
+ # create routers
+ ROUTERS="1 2 3 4"; readonly ROUTERS
+ for i in ${ROUTERS}; do
+ create_router "${i}"
+ done
+
+ # create hosts
+ HOSTS="1 2"; readonly HOSTS
+ for i in ${HOSTS}; do
+ create_host "${i}"
+ done
+
+ # set up the links for connecting routers
+ add_link_rt_pairs 1 "2 3 4"
+ add_link_rt_pairs 2 "3 4"
+ add_link_rt_pairs 3 "4"
+
+ # set up the basic connectivity of routers and routes required for
+ # reachability of SIDs.
+ setup_rt_networking 1 "2 3 4"
+ setup_rt_networking 2 "1 3 4"
+ setup_rt_networking 3 "1 2 4"
+ setup_rt_networking 4 "1 2 3"
+
+ # set up the hosts connected to routers
+ setup_hs 1 1
+ setup_hs 2 2
+
+ # set up default SRv6 Endpoints (i.e. SRv6 End and SRv6 End.DX2)
+ setup_rt_local_sids 1 "2 3 4"
+ setup_rt_local_sids 2 "1 3 4"
+ setup_rt_local_sids 3 "1 2 4"
+ setup_rt_local_sids 4 "1 2 3"
+
+ # create a L2 VPN between hs-1 and hs-2.
+ # NB: currently, H.L2Encap* enables tunneling of L2 frames whose
+ # layer-3 is IPv4/IPv6.
+ #
+ # the network path between hs-1 and hs-2 traverses several routers
+ # depending on the direction of traffic.
+ #
+ # Direction hs-1 -> hs-2 (H.L2Encaps.Red)
+ # - rt-2 (SRv6 End.DX2 behavior)
+ #
+ # Direction hs-2 -> hs-1 (H.L2Encaps.Red)
+ # - rt-4,rt-3 (SRv6 End behaviors)
+ # - rt-1 (SRv6 End.DX2 behavior)
+ setup_l2vpn 1 "" 2
+ setup_l2vpn 2 "4 3" 1
+
+ # testing environment was set up successfully
+ SETUP_ERR=0
+}
+
+check_rt_connectivity()
+{
+ local rtsrc="$1"
+ local rtdst="$2"
+ local prefix
+ local rtsrc_nsname
+
+ rtsrc_nsname="$(get_rtname "${rtsrc}")"
+
+ prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")"
+
+ ip netns exec "${rtsrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
+ "${prefix}::${rtdst}" >/dev/null 2>&1
+}
+
+check_and_log_rt_connectivity()
+{
+ local rtsrc="$1"
+ local rtdst="$2"
+
+ check_rt_connectivity "${rtsrc}" "${rtdst}"
+ log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}"
+}
+
+check_hs_ipv6_connectivity()
+{
+ local hssrc="$1"
+ local hsdst="$2"
+ local hssrc_nsname
+
+ hssrc_nsname="$(get_hsname "${hssrc}")"
+
+ ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
+ "${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1
+}
+
+check_hs_ipv4_connectivity()
+{
+ local hssrc="$1"
+ local hsdst="$2"
+ local hssrc_nsname
+
+ hssrc_nsname="$(get_hsname "${hssrc}")"
+
+ ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
+ "${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1
+}
+
+check_and_log_hs2gw_connectivity()
+{
+ local hssrc="$1"
+
+ check_hs_ipv6_connectivity "${hssrc}" 254
+ log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> gw"
+
+ check_hs_ipv4_connectivity "${hssrc}" 254
+ log_test $? 0 "IPv4 Hosts connectivity: hs-${hssrc} -> gw"
+}
+
+check_and_log_hs_ipv6_connectivity()
+{
+ local hssrc="$1"
+ local hsdst="$2"
+
+ check_hs_ipv6_connectivity "${hssrc}" "${hsdst}"
+ log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}"
+}
+
+check_and_log_hs_ipv4_connectivity()
+{
+ local hssrc="$1"
+ local hsdst="$2"
+
+ check_hs_ipv4_connectivity "${hssrc}" "${hsdst}"
+ log_test $? 0 "IPv4 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}"
+}
+
+check_and_log_hs_connectivity()
+{
+ local hssrc="$1"
+ local hsdst="$2"
+
+ check_and_log_hs_ipv4_connectivity "${hssrc}" "${hsdst}"
+ check_and_log_hs_ipv6_connectivity "${hssrc}" "${hsdst}"
+}
+
+router_tests()
+{
+ local i
+ local j
+
+ log_section "IPv6 routers connectivity test"
+
+ for i in ${ROUTERS}; do
+ for j in ${ROUTERS}; do
+ if [ "${i}" -eq "${j}" ]; then
+ continue
+ fi
+
+ check_and_log_rt_connectivity "${i}" "${j}"
+ done
+ done
+}
+
+host2gateway_tests()
+{
+ local hs
+
+ log_section "IPv4/IPv6 connectivity test among hosts and gateways"
+
+ for hs in ${HOSTS}; do
+ check_and_log_hs2gw_connectivity "${hs}"
+ done
+}
+
+host_vpn_tests()
+{
+ log_section "SRv6 L2 VPN connectivity test hosts (h1 <-> h2)"
+
+ check_and_log_hs_connectivity 1 2
+ check_and_log_hs_connectivity 2 1
+}
+
+test_dummy_dev_or_ksft_skip()
+{
+ local test_netns
+
+ test_netns="dummy-$(mktemp -u XXXXXXXX)"
+
+ if ! ip netns add "${test_netns}"; then
+ echo "SKIP: Cannot set up netns for testing dummy dev support"
+ exit "${ksft_skip}"
+ fi
+
+ modprobe dummy &>/dev/null || true
+ if ! ip -netns "${test_netns}" link \
+ add "${DUMMY_DEVNAME}" type dummy; then
+ echo "SKIP: dummy dev not supported"
+
+ ip netns del "${test_netns}"
+ exit "${ksft_skip}"
+ fi
+
+ ip netns del "${test_netns}"
+}
+
+test_iproute2_supp_or_ksft_skip()
+{
+ if ! ip route help 2>&1 | grep -qo "l2encap.red"; then
+ echo "SKIP: Missing SRv6 l2encap.red support in iproute2"
+ exit "${ksft_skip}"
+ fi
+}
+
+if [ "$(id -u)" -ne 0 ]; then
+ echo "SKIP: Need root privileges"
+ exit "${ksft_skip}"
+fi
+
+# required programs to carry out this selftest
+test_command_or_ksft_skip ip
+test_command_or_ksft_skip ping
+test_command_or_ksft_skip sysctl
+test_command_or_ksft_skip grep
+
+test_iproute2_supp_or_ksft_skip
+test_dummy_dev_or_ksft_skip
+
+set -e
+trap cleanup EXIT
+
+setup
+set +e
+
+router_tests
+host2gateway_tests
+host_vpn_tests
+
+print_log_test_results
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index 5d70b04c482c..2cbb12736596 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -235,6 +235,7 @@ FIXTURE_VARIANT(tls)
{
uint16_t tls_version;
uint16_t cipher_type;
+ bool nopad;
};
FIXTURE_VARIANT_ADD(tls, 12_aes_gcm)
@@ -297,9 +298,17 @@ FIXTURE_VARIANT_ADD(tls, 13_aes_gcm_256)
.cipher_type = TLS_CIPHER_AES_GCM_256,
};
+FIXTURE_VARIANT_ADD(tls, 13_nopad)
+{
+ .tls_version = TLS_1_3_VERSION,
+ .cipher_type = TLS_CIPHER_AES_GCM_128,
+ .nopad = true,
+};
+
FIXTURE_SETUP(tls)
{
struct tls_crypto_info_keys tls12;
+ int one = 1;
int ret;
tls_crypto_info_init(variant->tls_version, variant->cipher_type,
@@ -315,6 +324,12 @@ FIXTURE_SETUP(tls)
ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len);
ASSERT_EQ(ret, 0);
+
+ if (variant->nopad) {
+ ret = setsockopt(self->cfd, SOL_TLS, TLS_RX_EXPECT_NO_PAD,
+ (void *)&one, sizeof(one));
+ ASSERT_EQ(ret, 0);
+ }
}
FIXTURE_TEARDOWN(tls)
@@ -629,12 +644,14 @@ TEST_F(tls, splice_from_pipe2)
int p2[2];
int p[2];
+ memrnd(mem_send, sizeof(mem_send));
+
ASSERT_GE(pipe(p), 0);
ASSERT_GE(pipe(p2), 0);
- EXPECT_GE(write(p[1], mem_send, 8000), 0);
- EXPECT_GE(splice(p[0], NULL, self->fd, NULL, 8000, 0), 0);
- EXPECT_GE(write(p2[1], mem_send + 8000, 8000), 0);
- EXPECT_GE(splice(p2[0], NULL, self->fd, NULL, 8000, 0), 0);
+ EXPECT_EQ(write(p[1], mem_send, 8000), 8000);
+ EXPECT_EQ(splice(p[0], NULL, self->fd, NULL, 8000, 0), 8000);
+ EXPECT_EQ(write(p2[1], mem_send + 8000, 8000), 8000);
+ EXPECT_EQ(splice(p2[0], NULL, self->fd, NULL, 8000, 0), 8000);
EXPECT_EQ(recv(self->cfd, mem_recv, send_len, MSG_WAITALL), send_len);
EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
}
@@ -668,10 +685,12 @@ TEST_F(tls, splice_to_pipe)
char mem_recv[TLS_PAYLOAD_MAX_LEN];
int p[2];
+ memrnd(mem_send, sizeof(mem_send));
+
ASSERT_GE(pipe(p), 0);
- EXPECT_GE(send(self->fd, mem_send, send_len, 0), 0);
- EXPECT_GE(splice(self->cfd, NULL, p[1], NULL, send_len, 0), 0);
- EXPECT_GE(read(p[0], mem_recv, send_len), 0);
+ EXPECT_EQ(send(self->fd, mem_send, send_len, 0), send_len);
+ EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, send_len, 0), send_len);
+ EXPECT_EQ(read(p[0], mem_recv, send_len), send_len);
EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
}
@@ -860,6 +879,8 @@ TEST_F(tls, multiple_send_single_recv)
char recv_mem[2 * 10];
char send_mem[10];
+ memrnd(send_mem, sizeof(send_mem));
+
EXPECT_GE(send(self->fd, send_mem, send_len, 0), 0);
EXPECT_GE(send(self->fd, send_mem, send_len, 0), 0);
memset(recv_mem, 0, total_len);
@@ -876,6 +897,8 @@ TEST_F(tls, single_send_multiple_recv_non_align)
char recv_mem[recv_len * 2];
char send_mem[total_len];
+ memrnd(send_mem, sizeof(send_mem));
+
EXPECT_GE(send(self->fd, send_mem, total_len, 0), 0);
memset(recv_mem, 0, total_len);
@@ -921,10 +944,10 @@ TEST_F(tls, recv_peek)
char buf[15];
EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
- EXPECT_NE(recv(self->cfd, buf, send_len, MSG_PEEK), -1);
+ EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_PEEK), send_len);
EXPECT_EQ(memcmp(test_str, buf, send_len), 0);
memset(buf, 0, sizeof(buf));
- EXPECT_NE(recv(self->cfd, buf, send_len, 0), -1);
+ EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len);
EXPECT_EQ(memcmp(test_str, buf, send_len), 0);
}
@@ -1582,6 +1605,38 @@ TEST_F(tls_err, bad_cmsg)
EXPECT_EQ(errno, EBADMSG);
}
+TEST_F(tls_err, timeo)
+{
+ struct timeval tv = { .tv_usec = 10000, };
+ char buf[128];
+ int ret;
+
+ if (self->notls)
+ SKIP(return, "no TLS support");
+
+ ret = setsockopt(self->cfd2, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv));
+ ASSERT_EQ(ret, 0);
+
+ ret = fork();
+ ASSERT_GE(ret, 0);
+
+ if (ret) {
+ usleep(1000); /* Give child a head start */
+
+ EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
+ EXPECT_EQ(errno, EAGAIN);
+
+ EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
+ EXPECT_EQ(errno, EAGAIN);
+
+ wait(&ret);
+ } else {
+ EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
+ EXPECT_EQ(errno, EAGAIN);
+ exit(0);
+ }
+}
+
TEST(non_established) {
struct tls12_crypto_info_aes_gcm_256 tls12;
struct sockaddr_in addr;
@@ -1659,6 +1714,57 @@ TEST(keysizes) {
close(cfd);
}
+TEST(no_pad) {
+ struct tls12_crypto_info_aes_gcm_256 tls12;
+ int ret, fd, cfd, val;
+ socklen_t len;
+ bool notls;
+
+ memset(&tls12, 0, sizeof(tls12));
+ tls12.info.version = TLS_1_3_VERSION;
+ tls12.info.cipher_type = TLS_CIPHER_AES_GCM_256;
+
+ ulp_sock_pair(_metadata, &fd, &cfd, &notls);
+
+ if (notls)
+ exit(KSFT_SKIP);
+
+ ret = setsockopt(fd, SOL_TLS, TLS_TX, &tls12, sizeof(tls12));
+ EXPECT_EQ(ret, 0);
+
+ ret = setsockopt(cfd, SOL_TLS, TLS_RX, &tls12, sizeof(tls12));
+ EXPECT_EQ(ret, 0);
+
+ val = 1;
+ ret = setsockopt(cfd, SOL_TLS, TLS_RX_EXPECT_NO_PAD,
+ (void *)&val, sizeof(val));
+ EXPECT_EQ(ret, 0);
+
+ len = sizeof(val);
+ val = 2;
+ ret = getsockopt(cfd, SOL_TLS, TLS_RX_EXPECT_NO_PAD,
+ (void *)&val, &len);
+ EXPECT_EQ(ret, 0);
+ EXPECT_EQ(val, 1);
+ EXPECT_EQ(len, 4);
+
+ val = 0;
+ ret = setsockopt(cfd, SOL_TLS, TLS_RX_EXPECT_NO_PAD,
+ (void *)&val, sizeof(val));
+ EXPECT_EQ(ret, 0);
+
+ len = sizeof(val);
+ val = 2;
+ ret = getsockopt(cfd, SOL_TLS, TLS_RX_EXPECT_NO_PAD,
+ (void *)&val, &len);
+ EXPECT_EQ(ret, 0);
+ EXPECT_EQ(val, 0);
+ EXPECT_EQ(len, 4);
+
+ close(fd);
+ close(cfd);
+}
+
TEST(tls_v6ops) {
struct tls_crypto_info_keys tls12;
struct sockaddr_in6 addr, addr2;
diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh
index f8a19f548ae9..ebbd0b282432 100755
--- a/tools/testing/selftests/net/udpgro.sh
+++ b/tools/testing/selftests/net/udpgro.sh
@@ -34,7 +34,7 @@ cfg_veth() {
ip -netns "${PEER_NS}" addr add dev veth1 192.168.1.1/24
ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad
ip -netns "${PEER_NS}" link set dev veth1 up
- ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp_dummy
+ ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp
}
run_one() {
diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh
index 820bc50f6b68..fad2d1a71cac 100755
--- a/tools/testing/selftests/net/udpgro_bench.sh
+++ b/tools/testing/selftests/net/udpgro_bench.sh
@@ -34,7 +34,7 @@ run_one() {
ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad
ip -netns "${PEER_NS}" link set dev veth1 up
- ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp_dummy
+ ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp
ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r &
ip netns exec "${PEER_NS}" ./udpgso_bench_rx -t ${rx_args} -r &
diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh
index 807b74c8fd80..832c738cc3c2 100755
--- a/tools/testing/selftests/net/udpgro_frglist.sh
+++ b/tools/testing/selftests/net/udpgro_frglist.sh
@@ -36,7 +36,7 @@ run_one() {
ip netns exec "${PEER_NS}" ethtool -K veth1 rx-gro-list on
- ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp_dummy
+ ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp
tc -n "${PEER_NS}" qdisc add dev veth1 clsact
tc -n "${PEER_NS}" filter add dev veth1 ingress prio 4 protocol ipv6 bpf object-file ../bpf/nat6to4.o section schedcls/ingress6/nat_6 direct-action
tc -n "${PEER_NS}" filter add dev veth1 egress prio 4 protocol ip bpf object-file ../bpf/nat6to4.o section schedcls/egress4/snat4 direct-action
diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh
index 6f05e06f6761..1bcd82e1f662 100755
--- a/tools/testing/selftests/net/udpgro_fwd.sh
+++ b/tools/testing/selftests/net/udpgro_fwd.sh
@@ -46,7 +46,7 @@ create_ns() {
ip -n $BASE$ns addr add dev veth$ns $BM_NET_V4$ns/24
ip -n $BASE$ns addr add dev veth$ns $BM_NET_V6$ns/64 nodad
done
- ip -n $NS_DST link set veth$DST xdp object ../bpf/xdp_dummy.o section xdp_dummy 2>/dev/null
+ ip -n $NS_DST link set veth$DST xdp object ../bpf/xdp_dummy.o section xdp 2>/dev/null
}
create_vxlan_endpoint() {
diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh
index 19eac3e44c06..430895d1a2b6 100755
--- a/tools/testing/selftests/net/veth.sh
+++ b/tools/testing/selftests/net/veth.sh
@@ -289,14 +289,14 @@ if [ $CPUS -gt 1 ]; then
ip netns exec $NS_SRC ethtool -L veth$SRC rx 1 tx 2 2>/dev/null
printf "%-60s" "bad setting: XDP with RX nr less than TX"
ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o \
- section xdp_dummy 2>/dev/null &&\
+ section xdp 2>/dev/null &&\
echo "fail - set operation successful ?!?" || echo " ok "
# the following tests will run with multiple channels active
ip netns exec $NS_SRC ethtool -L veth$SRC rx 2
ip netns exec $NS_DST ethtool -L veth$DST rx 2
ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o \
- section xdp_dummy 2>/dev/null
+ section xdp 2>/dev/null
printf "%-60s" "bad setting: reducing RX nr below peer TX with XDP set"
ip netns exec $NS_DST ethtool -L veth$DST rx 1 2>/dev/null &&\
echo "fail - set operation successful ?!?" || echo " ok "
@@ -311,7 +311,7 @@ if [ $CPUS -gt 2 ]; then
chk_channels "setting invalid channels nr" $DST 2 2
fi
-ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o section xdp_dummy 2>/dev/null
+ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o section xdp 2>/dev/null
chk_gro_flag "with xdp attached - gro flag" $DST on
chk_gro_flag " - peer gro flag" $SRC off
chk_tso_flag " - tso flag" $SRC off
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh b/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh
index f17000a2ccf1..ed0ec7f0927e 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh
@@ -35,7 +35,7 @@ then
exit 1
fi
-# Remember where we started so that we can get back and the end.
+# Remember where we started so that we can get back at the end.
curcommit="`git status | head -1 | awk '{ print $NF }'`"
nfail=0
@@ -73,15 +73,10 @@ do
# Test the specified commit.
git checkout $i > $resdir/$ds/$idir/git-checkout.out 2>&1
echo git checkout return code: $? "(Commit $ntry: $i)"
- kvm.sh --allcpus --duration 3 --trust-make > $resdir/$ds/$idir/kvm.sh.out 2>&1
+ kvm.sh --allcpus --duration 3 --trust-make --datestamp "$ds/$idir" > $resdir/$ds/$idir/kvm.sh.out 2>&1
ret=$?
echo kvm.sh return code $ret for commit $i from branch $gitbr
-
- # Move the build products to their resting place.
- runresdir="`grep -m 1 '^Results directory:' < $resdir/$ds/$idir/kvm.sh.out | sed -e 's/^Results directory://'`"
- mv $runresdir $resdir/$ds/$idir
- rrd="`echo $runresdir | sed -e 's,^.*/,,'`"
- echo Run results: $resdir/$ds/$idir/$rrd
+ echo Run results: $resdir/$ds/$idir
if test "$ret" -ne 0
then
# Failure, so leave all evidence intact.
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
index 0ff59bd8b640..9f0a5d5ff2dd 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
@@ -262,6 +262,7 @@ echo All batches started. `date` | tee -a "$oldrun/remote-log"
# Wait for all remaining scenarios to complete and collect results.
for i in $systems
do
+ echo " ---" Waiting for $i `date` | tee -a "$oldrun/remote-log"
while checkremotefile "$i" "$resdir/$ds/remote.run"
do
sleep 30
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 263e16aeca0e..6c734818a875 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -164,7 +164,7 @@ do
shift
;;
--gdb)
- TORTURE_KCONFIG_GDB_ARG="CONFIG_DEBUG_INFO=y"; export TORTURE_KCONFIG_GDB_ARG
+ TORTURE_KCONFIG_GDB_ARG="CONFIG_DEBUG_INFO_NONE=n CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y"; export TORTURE_KCONFIG_GDB_ARG
TORTURE_BOOT_GDB_ARG="nokaslr"; export TORTURE_BOOT_GDB_ARG
TORTURE_QEMU_GDB_ARG="-s -S"; export TORTURE_QEMU_GDB_ARG
;;
@@ -180,7 +180,7 @@ do
shift
;;
--kasan)
- TORTURE_KCONFIG_KASAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KASAN=y"; export TORTURE_KCONFIG_KASAN_ARG
+ TORTURE_KCONFIG_KASAN_ARG="CONFIG_DEBUG_INFO_NONE=n CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_KASAN=y"; export TORTURE_KCONFIG_KASAN_ARG
if test -n "$torture_qemu_mem_default"
then
TORTURE_QEMU_MEM=2G
@@ -192,7 +192,7 @@ do
shift
;;
--kcsan)
- TORTURE_KCONFIG_KCSAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KCSAN=y CONFIG_KCSAN_STRICT=y CONFIG_KCSAN_REPORT_ONCE_IN_MS=100000 CONFIG_KCSAN_VERBOSE=y CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y"; export TORTURE_KCONFIG_KCSAN_ARG
+ TORTURE_KCONFIG_KCSAN_ARG="CONFIG_DEBUG_INFO_NONE=n CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_KCSAN=y CONFIG_KCSAN_STRICT=y CONFIG_KCSAN_REPORT_ONCE_IN_MS=100000 CONFIG_KCSAN_VERBOSE=y CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y"; export TORTURE_KCONFIG_KCSAN_ARG
;;
--kmake-arg|--kmake-args)
checkarg --kmake-arg "(kernel make arguments)" $# "$2" '.*' '^error$'
diff --git a/tools/testing/selftests/rseq/rseq-riscv.h b/tools/testing/selftests/rseq/rseq-riscv.h
index b86642f90d7f..3a391c9bf468 100644
--- a/tools/testing/selftests/rseq/rseq-riscv.h
+++ b/tools/testing/selftests/rseq/rseq-riscv.h
@@ -86,7 +86,7 @@ do { \
#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
RSEQ_INJECT_ASM(1) \
- "la "RSEQ_ASM_TMP_REG_1 ", " __rseq_str(cs_label) "\n" \
+ "la " RSEQ_ASM_TMP_REG_1 ", " __rseq_str(cs_label) "\n" \
REG_S RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(rseq_cs) "]\n" \
__rseq_str(label) ":\n"
@@ -103,17 +103,17 @@ do { \
#define RSEQ_ASM_OP_CMPEQ(var, expect, label) \
REG_L RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" \
- "bne "RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ," \
+ "bne " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ," \
__rseq_str(label) "\n"
#define RSEQ_ASM_OP_CMPEQ32(var, expect, label) \
- "lw "RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" \
- "bne "RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ," \
+ "lw " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" \
+ "bne " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ," \
__rseq_str(label) "\n"
#define RSEQ_ASM_OP_CMPNE(var, expect, label) \
REG_L RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" \
- "beq "RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ," \
+ "beq " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ," \
__rseq_str(label) "\n"
#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \
@@ -127,12 +127,12 @@ do { \
REG_S RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n"
#define RSEQ_ASM_OP_R_LOAD_OFF(offset) \
- "add "RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(offset) "], " \
+ "add " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(offset) "], " \
RSEQ_ASM_TMP_REG_1 "\n" \
REG_L RSEQ_ASM_TMP_REG_1 ", (" RSEQ_ASM_TMP_REG_1 ")\n"
#define RSEQ_ASM_OP_R_ADD(count) \
- "add "RSEQ_ASM_TMP_REG_1 ", " RSEQ_ASM_TMP_REG_1 \
+ "add " RSEQ_ASM_TMP_REG_1 ", " RSEQ_ASM_TMP_REG_1 \
", %[" __rseq_str(count) "]\n"
#define RSEQ_ASM_OP_FINAL_STORE(value, var, post_commit_label) \
@@ -194,8 +194,8 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
[v] "m" (*v),
[expect] "r" (expect),
[newv] "r" (newv)
@@ -251,8 +251,8 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
[v] "m" (*v),
[expectnot] "r" (expectnot),
[load] "m" (*load),
@@ -301,8 +301,8 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
[v] "m" (*v),
[count] "r" (count)
RSEQ_INJECT_INPUT
@@ -352,8 +352,8 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
[expect] "r" (expect),
[v] "m" (*v),
[newv] "r" (newv),
@@ -411,8 +411,8 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
[expect] "r" (expect),
[v] "m" (*v),
[newv] "r" (newv),
@@ -472,8 +472,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
[v] "m" (*v),
[expect] "r" (expect),
[v2] "m" (*v2),
@@ -532,8 +532,8 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
[expect] "r" (expect),
[v] "m" (*v),
[newv] "r" (newv),
@@ -593,8 +593,8 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
[expect] "r" (expect),
[v] "m" (*v),
[newv] "r" (newv),
@@ -651,8 +651,8 @@ int rseq_offset_deref_addv(intptr_t *ptr, off_t off, intptr_t inc, int cpu)
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
[ptr] "r" (ptr),
[off] "er" (off),
[inc] "er" (inc)
diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c
index 986b9458efb2..4177f9507bbe 100644
--- a/tools/testing/selftests/rseq/rseq.c
+++ b/tools/testing/selftests/rseq/rseq.c
@@ -111,7 +111,8 @@ void rseq_init(void)
libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset");
libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size");
libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags");
- if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p) {
+ if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p &&
+ *libc_rseq_size_p != 0) {
/* rseq registration owned by glibc */
rseq_offset = *libc_rseq_offset_p;
rseq_size = *libc_rseq_size_p;
diff --git a/tools/testing/selftests/safesetid/Makefile b/tools/testing/selftests/safesetid/Makefile
index fa02c4d5ec13..e815bbf2d0f4 100644
--- a/tools/testing/selftests/safesetid/Makefile
+++ b/tools/testing/selftests/safesetid/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-# Makefile for mount selftests.
+# Makefile for SafeSetID selftest.
CFLAGS = -Wall -O2
LDLIBS = -lcap
diff --git a/tools/testing/selftests/safesetid/safesetid-test.c b/tools/testing/selftests/safesetid/safesetid-test.c
index 4b809c93ba36..eb9bf0aee951 100644
--- a/tools/testing/selftests/safesetid/safesetid-test.c
+++ b/tools/testing/selftests/safesetid/safesetid-test.c
@@ -3,6 +3,7 @@
#include <stdio.h>
#include <errno.h>
#include <pwd.h>
+#include <grp.h>
#include <string.h>
#include <syscall.h>
#include <sys/capability.h>
@@ -16,17 +17,28 @@
#include <stdbool.h>
#include <stdarg.h>
+/*
+ * NOTES about this test:
+ * - requries libcap-dev to be installed on test system
+ * - requires securityfs to me mounted at /sys/kernel/security, e.g.:
+ * mount -n -t securityfs -o nodev,noexec,nosuid securityfs /sys/kernel/security
+ * - needs CONFIG_SECURITYFS and CONFIG_SAFESETID to be enabled
+ */
+
#ifndef CLONE_NEWUSER
# define CLONE_NEWUSER 0x10000000
#endif
-#define ROOT_USER 0
-#define RESTRICTED_PARENT 1
-#define ALLOWED_CHILD1 2
-#define ALLOWED_CHILD2 3
-#define NO_POLICY_USER 4
+#define ROOT_UGID 0
+#define RESTRICTED_PARENT_UGID 1
+#define ALLOWED_CHILD1_UGID 2
+#define ALLOWED_CHILD2_UGID 3
+#define NO_POLICY_UGID 4
+
+#define UGID_POLICY_STRING "1:2\n1:3\n2:2\n3:3\n"
-char* add_whitelist_policy_file = "/sys/kernel/security/safesetid/add_whitelist_policy";
+char* add_uid_whitelist_policy_file = "/sys/kernel/security/safesetid/uid_allowlist_policy";
+char* add_gid_whitelist_policy_file = "/sys/kernel/security/safesetid/gid_allowlist_policy";
static void die(char *fmt, ...)
{
@@ -106,9 +118,10 @@ static void ensure_user_exists(uid_t uid)
die("couldn't open file\n");
if (fseek(fd, 0, SEEK_END))
die("couldn't fseek\n");
- snprintf(name_str, 10, "%d", uid);
+ snprintf(name_str, 10, "user %d", uid);
p.pw_name=name_str;
p.pw_uid=uid;
+ p.pw_gid=uid;
p.pw_gecos="Test account";
p.pw_dir="/dev/null";
p.pw_shell="/bin/false";
@@ -120,9 +133,36 @@ static void ensure_user_exists(uid_t uid)
}
}
+static void ensure_group_exists(gid_t gid)
+{
+ struct group g;
+
+ FILE *fd;
+ char name_str[10];
+
+ if (getgrgid(gid) == NULL) {
+ memset(&g,0x00,sizeof(g));
+ fd=fopen("/etc/group","a");
+ if (fd == NULL)
+ die("couldn't open group file\n");
+ if (fseek(fd, 0, SEEK_END))
+ die("couldn't fseek group file\n");
+ snprintf(name_str, 10, "group %d", gid);
+ g.gr_name=name_str;
+ g.gr_gid=gid;
+ g.gr_passwd=NULL;
+ g.gr_mem=NULL;
+ int value = putgrent(&g,fd);
+ if (value != 0)
+ die("putgrent failed\n");
+ if (fclose(fd))
+ die("fclose failed\n");
+ }
+}
+
static void ensure_securityfs_mounted(void)
{
- int fd = open(add_whitelist_policy_file, O_WRONLY);
+ int fd = open(add_uid_whitelist_policy_file, O_WRONLY);
if (fd < 0) {
if (errno == ENOENT) {
// Need to mount securityfs
@@ -135,39 +175,60 @@ static void ensure_securityfs_mounted(void)
} else {
if (close(fd) != 0) {
die("close of %s failed: %s\n",
- add_whitelist_policy_file, strerror(errno));
+ add_uid_whitelist_policy_file, strerror(errno));
+ }
+ }
+}
+
+static void write_uid_policies()
+{
+ static char *policy_str = UGID_POLICY_STRING;
+ ssize_t written;
+ int fd;
+
+ fd = open(add_uid_whitelist_policy_file, O_WRONLY);
+ if (fd < 0)
+ die("can't open add_uid_whitelist_policy file\n");
+ written = write(fd, policy_str, strlen(policy_str));
+ if (written != strlen(policy_str)) {
+ if (written >= 0) {
+ die("short write to %s\n", add_uid_whitelist_policy_file);
+ } else {
+ die("write to %s failed: %s\n",
+ add_uid_whitelist_policy_file, strerror(errno));
}
}
+ if (close(fd) != 0) {
+ die("close of %s failed: %s\n",
+ add_uid_whitelist_policy_file, strerror(errno));
+ }
}
-static void write_policies(void)
+static void write_gid_policies()
{
- static char *policy_str =
- "1:2\n"
- "1:3\n"
- "2:2\n"
- "3:3\n";
+ static char *policy_str = UGID_POLICY_STRING;
ssize_t written;
int fd;
- fd = open(add_whitelist_policy_file, O_WRONLY);
+ fd = open(add_gid_whitelist_policy_file, O_WRONLY);
if (fd < 0)
- die("can't open add_whitelist_policy file\n");
+ die("can't open add_gid_whitelist_policy file\n");
written = write(fd, policy_str, strlen(policy_str));
if (written != strlen(policy_str)) {
if (written >= 0) {
- die("short write to %s\n", add_whitelist_policy_file);
+ die("short write to %s\n", add_gid_whitelist_policy_file);
} else {
die("write to %s failed: %s\n",
- add_whitelist_policy_file, strerror(errno));
+ add_gid_whitelist_policy_file, strerror(errno));
}
}
if (close(fd) != 0) {
die("close of %s failed: %s\n",
- add_whitelist_policy_file, strerror(errno));
+ add_gid_whitelist_policy_file, strerror(errno));
}
}
+
static bool test_userns(bool expect_success)
{
uid_t uid;
@@ -194,7 +255,7 @@ static bool test_userns(bool expect_success)
printf("preparing file name string failed");
return false;
}
- success = write_file(map_file_name, "0 0 1", uid);
+ success = write_file(map_file_name, "0 %d 1", uid);
return success == expect_success;
}
@@ -258,13 +319,144 @@ static void test_setuid(uid_t child_uid, bool expect_success)
die("should not reach here\n");
}
+static void test_setgid(gid_t child_gid, bool expect_success)
+{
+ pid_t cpid, w;
+ int wstatus;
+
+ cpid = fork();
+ if (cpid == -1) {
+ die("fork\n");
+ }
+
+ if (cpid == 0) { /* Code executed by child */
+ if (setgid(child_gid) < 0)
+ exit(EXIT_FAILURE);
+ if (getgid() == child_gid)
+ exit(EXIT_SUCCESS);
+ else
+ exit(EXIT_FAILURE);
+ } else { /* Code executed by parent */
+ do {
+ w = waitpid(cpid, &wstatus, WUNTRACED | WCONTINUED);
+ if (w == -1) {
+ die("waitpid\n");
+ }
+
+ if (WIFEXITED(wstatus)) {
+ if (WEXITSTATUS(wstatus) == EXIT_SUCCESS) {
+ if (expect_success) {
+ return;
+ } else {
+ die("unexpected success\n");
+ }
+ } else {
+ if (expect_success) {
+ die("unexpected failure\n");
+ } else {
+ return;
+ }
+ }
+ } else if (WIFSIGNALED(wstatus)) {
+ if (WTERMSIG(wstatus) == 9) {
+ if (expect_success)
+ die("killed unexpectedly\n");
+ else
+ return;
+ } else {
+ die("unexpected signal: %d\n", wstatus);
+ }
+ } else {
+ die("unexpected status: %d\n", wstatus);
+ }
+ } while (!WIFEXITED(wstatus) && !WIFSIGNALED(wstatus));
+ }
+
+ die("should not reach here\n");
+}
+
+static void test_setgroups(gid_t* child_groups, size_t len, bool expect_success)
+{
+ pid_t cpid, w;
+ int wstatus;
+ gid_t groupset[len];
+ int i, j;
+
+ cpid = fork();
+ if (cpid == -1) {
+ die("fork\n");
+ }
+
+ if (cpid == 0) { /* Code executed by child */
+ if (setgroups(len, child_groups) != 0)
+ exit(EXIT_FAILURE);
+ if (getgroups(len, groupset) != len)
+ exit(EXIT_FAILURE);
+ for (i = 0; i < len; i++) {
+ for (j = 0; j < len; j++) {
+ if (child_groups[i] == groupset[j])
+ break;
+ if (j == len - 1)
+ exit(EXIT_FAILURE);
+ }
+ }
+ exit(EXIT_SUCCESS);
+ } else { /* Code executed by parent */
+ do {
+ w = waitpid(cpid, &wstatus, WUNTRACED | WCONTINUED);
+ if (w == -1) {
+ die("waitpid\n");
+ }
+
+ if (WIFEXITED(wstatus)) {
+ if (WEXITSTATUS(wstatus) == EXIT_SUCCESS) {
+ if (expect_success) {
+ return;
+ } else {
+ die("unexpected success\n");
+ }
+ } else {
+ if (expect_success) {
+ die("unexpected failure\n");
+ } else {
+ return;
+ }
+ }
+ } else if (WIFSIGNALED(wstatus)) {
+ if (WTERMSIG(wstatus) == 9) {
+ if (expect_success)
+ die("killed unexpectedly\n");
+ else
+ return;
+ } else {
+ die("unexpected signal: %d\n", wstatus);
+ }
+ } else {
+ die("unexpected status: %d\n", wstatus);
+ }
+ } while (!WIFEXITED(wstatus) && !WIFSIGNALED(wstatus));
+ }
+
+ die("should not reach here\n");
+}
+
+
static void ensure_users_exist(void)
{
- ensure_user_exists(ROOT_USER);
- ensure_user_exists(RESTRICTED_PARENT);
- ensure_user_exists(ALLOWED_CHILD1);
- ensure_user_exists(ALLOWED_CHILD2);
- ensure_user_exists(NO_POLICY_USER);
+ ensure_user_exists(ROOT_UGID);
+ ensure_user_exists(RESTRICTED_PARENT_UGID);
+ ensure_user_exists(ALLOWED_CHILD1_UGID);
+ ensure_user_exists(ALLOWED_CHILD2_UGID);
+ ensure_user_exists(NO_POLICY_UGID);
+}
+
+static void ensure_groups_exist(void)
+{
+ ensure_group_exists(ROOT_UGID);
+ ensure_group_exists(RESTRICTED_PARENT_UGID);
+ ensure_group_exists(ALLOWED_CHILD1_UGID);
+ ensure_group_exists(ALLOWED_CHILD2_UGID);
+ ensure_group_exists(NO_POLICY_UGID);
}
static void drop_caps(bool setid_retained)
@@ -283,41 +475,52 @@ static void drop_caps(bool setid_retained)
int main(int argc, char **argv)
{
+ ensure_groups_exist();
ensure_users_exist();
ensure_securityfs_mounted();
- write_policies();
+ write_uid_policies();
+ write_gid_policies();
if (prctl(PR_SET_KEEPCAPS, 1L))
die("Error with set keepcaps\n");
- // First test to make sure we can write userns mappings from a user
- // that doesn't have any restrictions (as long as it has CAP_SETUID);
- if (setuid(NO_POLICY_USER) < 0)
- die("Error with set uid(%d)\n", NO_POLICY_USER);
- if (setgid(NO_POLICY_USER) < 0)
- die("Error with set gid(%d)\n", NO_POLICY_USER);
-
+ // First test to make sure we can write userns mappings from a non-root
+ // user that doesn't have any restrictions (as long as it has
+ // CAP_SETUID);
+ if (setgid(NO_POLICY_UGID) < 0)
+ die("Error with set gid(%d)\n", NO_POLICY_UGID);
+ if (setuid(NO_POLICY_UGID) < 0)
+ die("Error with set uid(%d)\n", NO_POLICY_UGID);
// Take away all but setid caps
drop_caps(true);
-
// Need PR_SET_DUMPABLE flag set so we can write /proc/[pid]/uid_map
// from non-root parent process.
if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0))
die("Error with set dumpable\n");
-
if (!test_userns(true)) {
die("test_userns failed when it should work\n");
}
- if (setuid(RESTRICTED_PARENT) < 0)
- die("Error with set uid(%d)\n", RESTRICTED_PARENT);
- if (setgid(RESTRICTED_PARENT) < 0)
- die("Error with set gid(%d)\n", RESTRICTED_PARENT);
+ // Now switch to a user/group with restrictions
+ if (setgid(RESTRICTED_PARENT_UGID) < 0)
+ die("Error with set gid(%d)\n", RESTRICTED_PARENT_UGID);
+ if (setuid(RESTRICTED_PARENT_UGID) < 0)
+ die("Error with set uid(%d)\n", RESTRICTED_PARENT_UGID);
+
+ test_setuid(ROOT_UGID, false);
+ test_setuid(ALLOWED_CHILD1_UGID, true);
+ test_setuid(ALLOWED_CHILD2_UGID, true);
+ test_setuid(NO_POLICY_UGID, false);
+
+ test_setgid(ROOT_UGID, false);
+ test_setgid(ALLOWED_CHILD1_UGID, true);
+ test_setgid(ALLOWED_CHILD2_UGID, true);
+ test_setgid(NO_POLICY_UGID, false);
- test_setuid(ROOT_USER, false);
- test_setuid(ALLOWED_CHILD1, true);
- test_setuid(ALLOWED_CHILD2, true);
- test_setuid(NO_POLICY_USER, false);
+ gid_t allowed_supp_groups[2] = {ALLOWED_CHILD1_UGID, ALLOWED_CHILD2_UGID};
+ gid_t disallowed_supp_groups[2] = {ROOT_UGID, NO_POLICY_UGID};
+ test_setgroups(allowed_supp_groups, 2, true);
+ test_setgroups(disallowed_supp_groups, 2, false);
if (!test_userns(false)) {
die("test_userns worked when it should fail\n");
@@ -328,8 +531,12 @@ int main(int argc, char **argv)
test_setuid(2, false);
test_setuid(3, false);
test_setuid(4, false);
+ test_setgid(2, false);
+ test_setgid(3, false);
+ test_setgid(4, false);
// NOTE: this test doesn't clean up users that were created in
// /etc/passwd or flush policies that were added to the LSM.
+ printf("test successful!\n");
return EXIT_SUCCESS;
}
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 136df5b76319..4ae6c8991307 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -809,7 +809,7 @@ void kill_thread_or_group(struct __test_metadata *_metadata,
.len = (unsigned short)ARRAY_SIZE(filter_thread),
.filter = filter_thread,
};
- int kill = kill_how == KILL_PROCESS ? SECCOMP_RET_KILL_PROCESS : 0xAAAAAAAAA;
+ int kill = kill_how == KILL_PROCESS ? SECCOMP_RET_KILL_PROCESS : 0xAAAAAAAA;
struct sock_filter filter_process[] = {
BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
offsetof(struct seccomp_data, nr)),
diff --git a/tools/testing/selftests/sync/config b/tools/testing/selftests/sync/config
index 47ff5afc3727..64c60f38b446 100644
--- a/tools/testing/selftests/sync/config
+++ b/tools/testing/selftests/sync/config
@@ -1,3 +1,2 @@
CONFIG_STAGING=y
-CONFIG_ANDROID=y
CONFIG_SW_SYNC=y
diff --git a/tools/testing/selftests/tc-testing/.gitignore b/tools/testing/selftests/tc-testing/.gitignore
index d52f65de23b4..9fe1cef72728 100644
--- a/tools/testing/selftests/tc-testing/.gitignore
+++ b/tools/testing/selftests/tc-testing/.gitignore
@@ -1,7 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
__pycache__/
*.pyc
-plugins/
*.xml
*.tap
tdc_config_local.py
diff --git a/tools/testing/selftests/tc-testing/Makefile b/tools/testing/selftests/tc-testing/Makefile
index 4d639279f41e..cb553eac9f41 100644
--- a/tools/testing/selftests/tc-testing/Makefile
+++ b/tools/testing/selftests/tc-testing/Makefile
@@ -5,7 +5,6 @@ top_srcdir = $(abspath ../../../..)
APIDIR := $(top_scrdir)/include/uapi
TEST_GEN_FILES = action.o
-KSFT_KHDR_INSTALL := 1
include ../lib.mk
PROBE := $(shell $(LLC) -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1)
diff --git a/tools/testing/selftests/timens/Makefile b/tools/testing/selftests/timens/Makefile
index 3a5936cc10ab..f0d51d4d2c87 100644
--- a/tools/testing/selftests/timens/Makefile
+++ b/tools/testing/selftests/timens/Makefile
@@ -1,4 +1,4 @@
-TEST_GEN_PROGS := timens timerfd timer clock_nanosleep procfs exec futex
+TEST_GEN_PROGS := timens timerfd timer clock_nanosleep procfs exec futex vfork_exec
TEST_GEN_PROGS_EXTENDED := gettime_perf
CFLAGS := -Wall -Werror -pthread
diff --git a/tools/testing/selftests/timens/vfork_exec.c b/tools/testing/selftests/timens/vfork_exec.c
new file mode 100644
index 000000000000..e6ccd900f30a
--- /dev/null
+++ b/tools/testing/selftests/timens/vfork_exec.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <time.h>
+#include <unistd.h>
+#include <string.h>
+
+#include "log.h"
+#include "timens.h"
+
+#define OFFSET (36000)
+
+int main(int argc, char *argv[])
+{
+ struct timespec now, tst;
+ int status, i;
+ pid_t pid;
+
+ if (argc > 1) {
+ if (sscanf(argv[1], "%ld", &now.tv_sec) != 1)
+ return pr_perror("sscanf");
+
+ for (i = 0; i < 2; i++) {
+ _gettime(CLOCK_MONOTONIC, &tst, i);
+ if (abs(tst.tv_sec - now.tv_sec) > 5)
+ return pr_fail("%ld %ld\n", now.tv_sec, tst.tv_sec);
+ }
+ return 0;
+ }
+
+ nscheck();
+
+ ksft_set_plan(1);
+
+ clock_gettime(CLOCK_MONOTONIC, &now);
+
+ if (unshare_timens())
+ return 1;
+
+ if (_settime(CLOCK_MONOTONIC, OFFSET))
+ return 1;
+
+ for (i = 0; i < 2; i++) {
+ _gettime(CLOCK_MONOTONIC, &tst, i);
+ if (abs(tst.tv_sec - now.tv_sec) > 5)
+ return pr_fail("%ld %ld\n",
+ now.tv_sec, tst.tv_sec);
+ }
+
+ pid = vfork();
+ if (pid < 0)
+ return pr_perror("fork");
+
+ if (pid == 0) {
+ char now_str[64];
+ char *cargv[] = {"exec", now_str, NULL};
+ char *cenv[] = {NULL};
+
+ // Check that we are still in the source timens.
+ for (i = 0; i < 2; i++) {
+ _gettime(CLOCK_MONOTONIC, &tst, i);
+ if (abs(tst.tv_sec - now.tv_sec) > 5)
+ return pr_fail("%ld %ld\n",
+ now.tv_sec, tst.tv_sec);
+ }
+
+ /* Check for proper vvar offsets after execve. */
+ snprintf(now_str, sizeof(now_str), "%ld", now.tv_sec + OFFSET);
+ execve("/proc/self/exe", cargv, cenv);
+ return pr_perror("execve");
+ }
+
+ if (waitpid(pid, &status, 0) != pid)
+ return pr_perror("waitpid");
+
+ if (status)
+ ksft_exit_fail();
+
+ ksft_test_result_pass("exec\n");
+ ksft_exit_pass();
+ return 0;
+}
diff --git a/tools/testing/selftests/timers/adjtick.c b/tools/testing/selftests/timers/adjtick.c
index 54d8d87f36b3..47e05fdc32c5 100644
--- a/tools/testing/selftests/timers/adjtick.c
+++ b/tools/testing/selftests/timers/adjtick.c
@@ -165,7 +165,7 @@ int check_tick_adj(long tickval)
return 0;
}
-int main(int argv, char **argc)
+int main(int argc, char **argv)
{
struct timespec raw;
long tick, max, interval, err;
diff --git a/tools/testing/selftests/timers/alarmtimer-suspend.c b/tools/testing/selftests/timers/alarmtimer-suspend.c
index 54da4b088f4c..4332b494103d 100644
--- a/tools/testing/selftests/timers/alarmtimer-suspend.c
+++ b/tools/testing/selftests/timers/alarmtimer-suspend.c
@@ -92,7 +92,7 @@ long long timespec_sub(struct timespec a, struct timespec b)
return ret;
}
-int final_ret = 0;
+int final_ret;
void sigalarm(int signo)
{
diff --git a/tools/testing/selftests/timers/change_skew.c b/tools/testing/selftests/timers/change_skew.c
index c4eab7124990..992a77f2a74c 100644
--- a/tools/testing/selftests/timers/change_skew.c
+++ b/tools/testing/selftests/timers/change_skew.c
@@ -55,7 +55,7 @@ int change_skew_test(int ppm)
}
-int main(int argv, char **argc)
+int main(int argc, char **argv)
{
struct timex tx;
int i, ret;
diff --git a/tools/testing/selftests/timers/clocksource-switch.c b/tools/testing/selftests/timers/clocksource-switch.c
index ef8eb3604595..c5264594064c 100644
--- a/tools/testing/selftests/timers/clocksource-switch.c
+++ b/tools/testing/selftests/timers/clocksource-switch.c
@@ -23,17 +23,17 @@
*/
+#include <fcntl.h>
#include <stdio.h>
-#include <unistd.h>
#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
#include <sys/time.h>
#include <sys/timex.h>
-#include <time.h>
#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <string.h>
#include <sys/wait.h>
+#include <time.h>
+#include <unistd.h>
#include "../kselftest.h"
@@ -110,21 +110,40 @@ int run_tests(int secs)
sprintf(buf, "./inconsistency-check -t %i", secs);
ret = system(buf);
- if (ret)
- return ret;
+ if (WIFEXITED(ret) && WEXITSTATUS(ret))
+ return WEXITSTATUS(ret);
ret = system("./nanosleep");
- return ret;
+ return WIFEXITED(ret) ? WEXITSTATUS(ret) : 0;
}
char clocksource_list[10][30];
-int main(int argv, char **argc)
+int main(int argc, char **argv)
{
char orig_clk[512];
- int count, i, status;
+ int count, i, status, opt;
+ int do_sanity_check = 1;
+ int runtime = 60;
pid_t pid;
+ /* Process arguments */
+ while ((opt = getopt(argc, argv, "st:")) != -1) {
+ switch (opt) {
+ case 's':
+ do_sanity_check = 0;
+ break;
+ case 't':
+ runtime = atoi(optarg);
+ break;
+ default:
+ printf("Usage: %s [-s] [-t <secs>]\n", argv[0]);
+ printf(" -s: skip sanity checks\n");
+ printf(" -t: Number of seconds to run\n");
+ exit(-1);
+ }
+ }
+
get_cur_clocksource(orig_clk, 512);
count = get_clocksources(clocksource_list);
@@ -135,23 +154,25 @@ int main(int argv, char **argc)
}
/* Check everything is sane before we start switching asynchronously */
- for (i = 0; i < count; i++) {
- printf("Validating clocksource %s\n", clocksource_list[i]);
- if (change_clocksource(clocksource_list[i])) {
- status = -1;
- goto out;
- }
- if (run_tests(5)) {
- status = -1;
- goto out;
+ if (do_sanity_check) {
+ for (i = 0; i < count; i++) {
+ printf("Validating clocksource %s\n",
+ clocksource_list[i]);
+ if (change_clocksource(clocksource_list[i])) {
+ status = -1;
+ goto out;
+ }
+ if (run_tests(5)) {
+ status = -1;
+ goto out;
+ }
}
}
-
printf("Running Asynchronous Switching Tests...\n");
pid = fork();
if (!pid)
- return run_tests(60);
+ return run_tests(runtime);
while (pid != waitpid(pid, &status, WNOHANG))
for (i = 0; i < count; i++)
@@ -162,7 +183,9 @@ int main(int argv, char **argc)
out:
change_clocksource(orig_clk);
- if (status)
- return ksft_exit_fail();
- return ksft_exit_pass();
+ /* Print at the end to not mix output with child process */
+ ksft_print_header();
+ ksft_set_plan(1);
+ ksft_test_result(!status, "clocksource-switch\n");
+ ksft_exit(!status);
}
diff --git a/tools/testing/selftests/timers/inconsistency-check.c b/tools/testing/selftests/timers/inconsistency-check.c
index e6756d9c60a7..36a49fba6c9b 100644
--- a/tools/testing/selftests/timers/inconsistency-check.c
+++ b/tools/testing/selftests/timers/inconsistency-check.c
@@ -122,30 +122,28 @@ int consistency_test(int clock_type, unsigned long seconds)
if (inconsistent >= 0) {
unsigned long long delta;
- printf("\%s\n", start_str);
+ ksft_print_msg("\%s\n", start_str);
for (i = 0; i < CALLS_PER_LOOP; i++) {
if (i == inconsistent)
- printf("--------------------\n");
- printf("%lu:%lu\n", list[i].tv_sec,
+ ksft_print_msg("--------------------\n");
+ ksft_print_msg("%lu:%lu\n", list[i].tv_sec,
list[i].tv_nsec);
if (i == inconsistent + 1)
- printf("--------------------\n");
+ ksft_print_msg("--------------------\n");
}
delta = list[inconsistent].tv_sec * NSEC_PER_SEC;
delta += list[inconsistent].tv_nsec;
delta -= list[inconsistent+1].tv_sec * NSEC_PER_SEC;
delta -= list[inconsistent+1].tv_nsec;
- printf("Delta: %llu ns\n", delta);
+ ksft_print_msg("Delta: %llu ns\n", delta);
fflush(0);
/* timestamp inconsistency*/
t = time(0);
- printf("%s\n", ctime(&t));
- printf("[FAILED]\n");
+ ksft_print_msg("%s\n", ctime(&t));
return -1;
}
now = list[0].tv_sec;
}
- printf("[OK]\n");
return 0;
}
@@ -178,16 +176,22 @@ int main(int argc, char *argv[])
setbuf(stdout, NULL);
+ ksft_print_header();
+ ksft_set_plan(maxclocks - userclock);
+
for (clockid = userclock; clockid < maxclocks; clockid++) {
- if (clockid == CLOCK_HWSPECIFIC)
+ if (clockid == CLOCK_HWSPECIFIC || clock_gettime(clockid, &ts)) {
+ ksft_test_result_skip("%-31s\n", clockstring(clockid));
continue;
+ }
- if (!clock_gettime(clockid, &ts)) {
- printf("Consistent %-30s ", clockstring(clockid));
- if (consistency_test(clockid, runtime))
- return ksft_exit_fail();
+ if (consistency_test(clockid, runtime)) {
+ ksft_test_result_fail("%-31s\n", clockstring(clockid));
+ ksft_exit_fail();
+ } else {
+ ksft_test_result_pass("%-31s\n", clockstring(clockid));
}
}
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/timers/nanosleep.c b/tools/testing/selftests/timers/nanosleep.c
index 71b5441c2fd9..df1d03516e7b 100644
--- a/tools/testing/selftests/timers/nanosleep.c
+++ b/tools/testing/selftests/timers/nanosleep.c
@@ -133,33 +133,37 @@ int main(int argc, char **argv)
long long length;
int clockid, ret;
+ ksft_print_header();
+ ksft_set_plan(NR_CLOCKIDS);
+
for (clockid = CLOCK_REALTIME; clockid < NR_CLOCKIDS; clockid++) {
/* Skip cputime clockids since nanosleep won't increment cputime */
if (clockid == CLOCK_PROCESS_CPUTIME_ID ||
clockid == CLOCK_THREAD_CPUTIME_ID ||
- clockid == CLOCK_HWSPECIFIC)
+ clockid == CLOCK_HWSPECIFIC) {
+ ksft_test_result_skip("%-31s\n", clockstring(clockid));
continue;
+ }
- printf("Nanosleep %-31s ", clockstring(clockid));
fflush(stdout);
length = 10;
while (length <= (NSEC_PER_SEC * 10)) {
ret = nanosleep_test(clockid, length);
if (ret == UNSUPPORTED) {
- printf("[UNSUPPORTED]\n");
+ ksft_test_result_skip("%-31s\n", clockstring(clockid));
goto next;
}
if (ret < 0) {
- printf("[FAILED]\n");
- return ksft_exit_fail();
+ ksft_test_result_fail("%-31s\n", clockstring(clockid));
+ ksft_exit_fail();
}
length *= 100;
}
- printf("[OK]\n");
+ ksft_test_result_pass("%-31s\n", clockstring(clockid));
next:
ret = 0;
}
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/timers/raw_skew.c b/tools/testing/selftests/timers/raw_skew.c
index b41d8dd0c40c..5beceeed0d11 100644
--- a/tools/testing/selftests/timers/raw_skew.c
+++ b/tools/testing/selftests/timers/raw_skew.c
@@ -89,7 +89,7 @@ void get_monotonic_and_raw(struct timespec *mon, struct timespec *raw)
}
}
-int main(int argv, char **argc)
+int main(int argc, char **argv)
{
struct timespec mon, raw, start, end;
long long delta1, delta2, interval, eppm, ppm;
diff --git a/tools/testing/selftests/timers/skew_consistency.c b/tools/testing/selftests/timers/skew_consistency.c
index 8066be9aff11..63913f75b384 100644
--- a/tools/testing/selftests/timers/skew_consistency.c
+++ b/tools/testing/selftests/timers/skew_consistency.c
@@ -38,7 +38,7 @@
#define NSEC_PER_SEC 1000000000LL
-int main(int argv, char **argc)
+int main(int argc, char **argv)
{
struct timex tx;
int ret, ppm;
diff --git a/tools/testing/selftests/timers/valid-adjtimex.c b/tools/testing/selftests/timers/valid-adjtimex.c
index 5397de708d3c..48b9a803235a 100644
--- a/tools/testing/selftests/timers/valid-adjtimex.c
+++ b/tools/testing/selftests/timers/valid-adjtimex.c
@@ -40,7 +40,7 @@
#define ADJ_SETOFFSET 0x0100
#include <sys/syscall.h>
-static int clock_adjtime(clockid_t id, struct timex *tx)
+int clock_adjtime(clockid_t id, struct timex *tx)
{
return syscall(__NR_clock_adjtime, id, tx);
}
diff --git a/tools/testing/selftests/tpm2/settings b/tools/testing/selftests/tpm2/settings
new file mode 100644
index 000000000000..a62d2fa1275c
--- /dev/null
+++ b/tools/testing/selftests/tpm2/settings
@@ -0,0 +1 @@
+timeout=600
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 44f25acfbeca..108587cb327a 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -94,7 +94,6 @@ TEST_PROGS := run_vmtests.sh
TEST_FILES := test_vmalloc.sh
TEST_FILES += test_hmm.sh
-KSFT_KHDR_INSTALL := 1
include ../lib.mk
$(OUTPUT)/madv_populate: vm_util.c
diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index 0bdfc1955229..4bc24581760d 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -860,7 +860,7 @@ static int stress(struct uffd_stats *uffd_stats)
/*
* Be strict and immediately zap area_src, the whole area has
* been transferred already by the background treads. The
- * area_src could then be faulted in in a racy way by still
+ * area_src could then be faulted in a racy way by still
* running uffdio_threads reading zeropages after we zapped
* area_src (but they're guaranteed to get -EEXIST from
* UFFDIO_COPY without writing zero pages into area_dst
diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile
index 7d1b80988d8a..fda76282d34b 100644
--- a/tools/testing/selftests/wireguard/qemu/Makefile
+++ b/tools/testing/selftests/wireguard/qemu/Makefile
@@ -19,8 +19,6 @@ endif
MIRROR := https://download.wireguard.com/qemu-test/distfiles/
KERNEL_BUILD_PATH := $(BUILD_PATH)/kernel$(if $(findstring yes,$(DEBUG_KERNEL)),-debug)
-rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d))
-WIREGUARD_SOURCES := $(call rwildcard,$(KERNEL_PATH)/drivers/net/wireguard/,*)
default: qemu
@@ -109,20 +107,22 @@ CHOST := x86_64-linux-musl
QEMU_ARCH := x86_64
KERNEL_ARCH := x86_64
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage
+QEMU_VPORT_RESULT := virtio-serial-device
ifeq ($(HOST_ARCH),$(ARCH))
-QEMU_MACHINE := -cpu host -machine q35,accel=kvm
+QEMU_MACHINE := -cpu host -machine microvm,accel=kvm,pit=off,pic=off,rtc=off -no-acpi
else
-QEMU_MACHINE := -cpu max -machine q35
+QEMU_MACHINE := -cpu max -machine microvm -no-acpi
endif
else ifeq ($(ARCH),i686)
CHOST := i686-linux-musl
QEMU_ARCH := i386
KERNEL_ARCH := x86
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage
+QEMU_VPORT_RESULT := virtio-serial-device
ifeq ($(subst x86_64,i686,$(HOST_ARCH)),$(ARCH))
-QEMU_MACHINE := -cpu host -machine q35,accel=kvm
+QEMU_MACHINE := -cpu host -machine microvm,accel=kvm,pit=off,pic=off,rtc=off -no-acpi
else
-QEMU_MACHINE := -cpu max -machine q35
+QEMU_MACHINE := -cpu coreduo -machine microvm -no-acpi
endif
else ifeq ($(ARCH),mips64)
CHOST := mips64-linux-musl
@@ -208,10 +208,11 @@ QEMU_ARCH := m68k
KERNEL_ARCH := m68k
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
KERNEL_CMDLINE := $(shell sed -n 's/CONFIG_CMDLINE=\(.*\)/\1/p' arch/m68k.config)
+QEMU_VPORT_RESULT := virtio-serial-device
ifeq ($(HOST_ARCH),$(ARCH))
-QEMU_MACHINE := -cpu host,accel=kvm -machine q800 -append $(KERNEL_CMDLINE)
+QEMU_MACHINE := -cpu host,accel=kvm -machine virt -append $(KERNEL_CMDLINE)
else
-QEMU_MACHINE := -machine q800 -smp 1 -append $(KERNEL_CMDLINE)
+QEMU_MACHINE := -machine virt -smp 1 -append $(KERNEL_CMDLINE)
endif
else ifeq ($(ARCH),riscv64)
CHOST := riscv64-linux-musl
@@ -247,8 +248,13 @@ QEMU_MACHINE := -cpu host,accel=kvm -machine s390-ccw-virtio -append $(KERNEL_CM
else
QEMU_MACHINE := -cpu max -machine s390-ccw-virtio -append $(KERNEL_CMDLINE)
endif
+else ifeq ($(ARCH),um)
+CHOST := $(HOST_ARCH)-linux-musl
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
+KERNEL_ARCH := um
+KERNEL_CMDLINE := $(shell sed -n 's/CONFIG_CMDLINE=\(.*\)/\1/p' arch/um.config)
else
-$(error I only build: x86_64, i686, arm, armeb, aarch64, aarch64_be, mips, mipsel, mips64, mips64el, powerpc64, powerpc64le, powerpc, m68k, riscv64, riscv32, s390x)
+$(error I only build: x86_64, i686, arm, armeb, aarch64, aarch64_be, mips, mipsel, mips64, mips64el, powerpc64, powerpc64le, powerpc, m68k, riscv64, riscv32, s390x, um)
endif
TOOLCHAIN_FILENAME := $(CHOST)-cross.tgz
@@ -261,7 +267,9 @@ $(eval $(call file_download,$(TOOLCHAIN_FILENAME),$(TOOLCHAIN_DIR),,$(DISTFILES_
STRIP := $(CHOST)-strip
CROSS_COMPILE_FLAG := --build=$(CBUILD) --host=$(CHOST)
$(info Building for $(CHOST) using $(CBUILD))
+ifneq ($(ARCH),um)
export CROSS_COMPILE := $(CHOST)-
+endif
export PATH := $(TOOLCHAIN_PATH)/bin:$(PATH)
export CC := $(CHOST)-gcc
CCACHE_PATH := $(shell which ccache 2>/dev/null)
@@ -278,6 +286,7 @@ comma := ,
build: $(KERNEL_BZIMAGE)
qemu: $(KERNEL_BZIMAGE)
rm -f $(BUILD_PATH)/result
+ifneq ($(ARCH),um)
timeout --foreground 20m qemu-system-$(QEMU_ARCH) \
-nodefaults \
-nographic \
@@ -290,6 +299,13 @@ qemu: $(KERNEL_BZIMAGE)
-no-reboot \
-monitor none \
-kernel $<
+else
+ timeout --foreground 20m $< \
+ $(KERNEL_CMDLINE) \
+ mem=$$(grep -q CONFIG_DEBUG_KMEMLEAK=y $(KERNEL_BUILD_PATH)/.config && echo 1G || echo 256M) \
+ noreboot \
+ con1=fd:51 51>$(BUILD_PATH)/result </dev/null 2>&1 | cat
+endif
grep -Fq success $(BUILD_PATH)/result
$(BUILD_PATH)/init-cpio-spec.txt: $(TOOLCHAIN_PATH)/.installed $(BUILD_PATH)/init
@@ -322,8 +338,9 @@ $(KERNEL_BUILD_PATH)/.config: $(TOOLCHAIN_PATH)/.installed kernel.config arch/$(
cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config $(KERNEL_BUILD_PATH)/minimal.config
$(if $(findstring yes,$(DEBUG_KERNEL)),cp debug.config $(KERNEL_BUILD_PATH) && cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config debug.config,)
-$(KERNEL_BZIMAGE): $(TOOLCHAIN_PATH)/.installed $(KERNEL_BUILD_PATH)/.config $(BUILD_PATH)/init-cpio-spec.txt $(IPERF_PATH)/src/iperf3 $(IPUTILS_PATH)/ping $(BASH_PATH)/bash $(IPROUTE2_PATH)/misc/ss $(IPROUTE2_PATH)/ip/ip $(IPTABLES_PATH)/iptables/xtables-legacy-multi $(NMAP_PATH)/ncat/ncat $(WIREGUARD_TOOLS_PATH)/src/wg $(BUILD_PATH)/init ../netns.sh $(WIREGUARD_SOURCES)
+$(KERNEL_BZIMAGE): $(TOOLCHAIN_PATH)/.installed $(KERNEL_BUILD_PATH)/.config $(BUILD_PATH)/init-cpio-spec.txt $(IPERF_PATH)/src/iperf3 $(IPUTILS_PATH)/ping $(BASH_PATH)/bash $(IPROUTE2_PATH)/misc/ss $(IPROUTE2_PATH)/ip/ip $(IPTABLES_PATH)/iptables/xtables-legacy-multi $(NMAP_PATH)/ncat/ncat $(WIREGUARD_TOOLS_PATH)/src/wg $(BUILD_PATH)/init
$(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE)
+.PHONY: $(KERNEL_BZIMAGE)
$(TOOLCHAIN_PATH)/$(CHOST)/include/linux/.installed: | $(KERNEL_BUILD_PATH)/.config $(TOOLCHAIN_PATH)/.installed
rm -rf $(TOOLCHAIN_PATH)/$(CHOST)/include/linux
diff --git a/tools/testing/selftests/wireguard/qemu/arch/arm.config b/tools/testing/selftests/wireguard/qemu/arch/arm.config
index fc7959bef9c2..0579c66be83e 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/arm.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/arm.config
@@ -7,6 +7,7 @@ CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
CONFIG_VIRTIO_MENU=y
CONFIG_VIRTIO_MMIO=y
CONFIG_VIRTIO_CONSOLE=y
+CONFIG_COMPAT_32BIT_TIME=y
CONFIG_CMDLINE_BOOL=y
CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1 panic_on_warn=1"
CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/armeb.config b/tools/testing/selftests/wireguard/qemu/arch/armeb.config
index f3066be81c19..2a3307bbe534 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/armeb.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/armeb.config
@@ -7,6 +7,7 @@ CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
CONFIG_VIRTIO_MENU=y
CONFIG_VIRTIO_MMIO=y
CONFIG_VIRTIO_CONSOLE=y
+CONFIG_COMPAT_32BIT_TIME=y
CONFIG_CMDLINE_BOOL=y
CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1 panic_on_warn=1"
CONFIG_CPU_BIG_ENDIAN=y
diff --git a/tools/testing/selftests/wireguard/qemu/arch/i686.config b/tools/testing/selftests/wireguard/qemu/arch/i686.config
index 6d90892a85a2..35b06502606f 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/i686.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/i686.config
@@ -1,6 +1,10 @@
-CONFIG_ACPI=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_VIRTIO_MENU=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y
+CONFIG_COMPAT_32BIT_TIME=y
CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
+CONFIG_CMDLINE="console=ttyS0 wg.success=vport0p1 panic_on_warn=1 reboot=t"
CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/m68k.config b/tools/testing/selftests/wireguard/qemu/arch/m68k.config
index 82c925e49beb..39c48cba56b7 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/m68k.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/m68k.config
@@ -1,9 +1,7 @@
CONFIG_MMU=y
+CONFIG_VIRT=y
CONFIG_M68KCLASSIC=y
-CONFIG_M68040=y
-CONFIG_MAC=y
-CONFIG_SERIAL_PMACZILOG=y
-CONFIG_SERIAL_PMACZILOG_TTYS=y
-CONFIG_SERIAL_PMACZILOG_CONSOLE=y
-CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_COMPAT_32BIT_TIME=y
+CONFIG_CMDLINE="console=ttyGF0 wg.success=vport0p1 panic_on_warn=1"
CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/mips.config b/tools/testing/selftests/wireguard/qemu/arch/mips.config
index d7ec63c17b30..2a84402353ab 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/mips.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/mips.config
@@ -6,6 +6,7 @@ CONFIG_POWER_RESET=y
CONFIG_POWER_RESET_SYSCON=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_COMPAT_32BIT_TIME=y
CONFIG_CMDLINE_BOOL=y
CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/mipsel.config b/tools/testing/selftests/wireguard/qemu/arch/mipsel.config
index 18a498293737..56146a101e7e 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/mipsel.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/mipsel.config
@@ -7,6 +7,7 @@ CONFIG_POWER_RESET=y
CONFIG_POWER_RESET_SYSCON=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_COMPAT_32BIT_TIME=y
CONFIG_CMDLINE_BOOL=y
CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/powerpc.config b/tools/testing/selftests/wireguard/qemu/arch/powerpc.config
index 5e04882e8e35..174a9ffe2a36 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/powerpc.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/powerpc.config
@@ -4,6 +4,7 @@ CONFIG_PPC_85xx=y
CONFIG_PHYS_64BIT=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_COMPAT_32BIT_TIME=y
CONFIG_MATH_EMULATION=y
CONFIG_CMDLINE_BOOL=y
CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
diff --git a/tools/testing/selftests/wireguard/qemu/arch/um.config b/tools/testing/selftests/wireguard/qemu/arch/um.config
new file mode 100644
index 000000000000..c8b229e0810e
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/um.config
@@ -0,0 +1,3 @@
+CONFIG_64BIT=y
+CONFIG_CMDLINE="wg.success=tty1 panic_on_warn=1"
+CONFIG_FRAME_WARN=1280
diff --git a/tools/testing/selftests/wireguard/qemu/arch/x86_64.config b/tools/testing/selftests/wireguard/qemu/arch/x86_64.config
index efa00693e08b..cf2d1376d121 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/x86_64.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/x86_64.config
@@ -1,6 +1,9 @@
-CONFIG_ACPI=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_VIRTIO_MENU=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y
CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
+CONFIG_CMDLINE="console=ttyS0 wg.success=vport0p1 panic_on_warn=1 reboot=t"
CONFIG_FRAME_WARN=1280
diff --git a/tools/testing/selftests/wireguard/qemu/debug.config b/tools/testing/selftests/wireguard/qemu/debug.config
index 2b321b8a96cf..9d172210e2c6 100644
--- a/tools/testing/selftests/wireguard/qemu/debug.config
+++ b/tools/testing/selftests/wireguard/qemu/debug.config
@@ -18,15 +18,12 @@ CONFIG_DEBUG_VM=y
CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_HAVE_DEBUG_STACKOVERFLOW=y
CONFIG_DEBUG_STACKOVERFLOW=y
-CONFIG_HAVE_ARCH_KMEMCHECK=y
CONFIG_HAVE_ARCH_KASAN=y
CONFIG_KASAN=y
CONFIG_KASAN_INLINE=y
CONFIG_UBSAN=y
CONFIG_UBSAN_SANITIZE_ALL=y
-CONFIG_UBSAN_NULL=y
CONFIG_DEBUG_KMEMLEAK=y
-CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE=8192
CONFIG_DEBUG_STACK_USAGE=y
CONFIG_DEBUG_SHIRQ=y
CONFIG_WQ_WATCHDOG=y
@@ -35,7 +32,6 @@ CONFIG_SCHED_INFO=y
CONFIG_SCHEDSTATS=y
CONFIG_SCHED_STACK_END_CHECK=y
CONFIG_DEBUG_TIMEKEEPING=y
-CONFIG_TIMER_STATS=y
CONFIG_DEBUG_PREEMPT=y
CONFIG_DEBUG_RT_MUTEXES=y
CONFIG_DEBUG_SPINLOCK=y
@@ -49,7 +45,6 @@ CONFIG_DEBUG_BUGVERBOSE=y
CONFIG_DEBUG_LIST=y
CONFIG_DEBUG_PLIST=y
CONFIG_PROVE_RCU=y
-CONFIG_SPARSE_RCU_POINTER=y
CONFIG_RCU_CPU_STALL_TIMEOUT=21
CONFIG_RCU_TRACE=y
CONFIG_RCU_EQS_DEBUG=y
diff --git a/tools/testing/selftests/wireguard/qemu/init.c b/tools/testing/selftests/wireguard/qemu/init.c
index c9e128436546..3e49924dd77e 100644
--- a/tools/testing/selftests/wireguard/qemu/init.c
+++ b/tools/testing/selftests/wireguard/qemu/init.c
@@ -11,6 +11,7 @@
#include <stdlib.h>
#include <stdbool.h>
#include <fcntl.h>
+#include <time.h>
#include <sys/wait.h>
#include <sys/mount.h>
#include <sys/stat.h>
@@ -70,6 +71,15 @@ static void seed_rng(void)
close(fd);
}
+static void set_time(void)
+{
+ if (time(NULL))
+ return;
+ pretty_message("[+] Setting fake time...");
+ if (stime(&(time_t){1433512680}) < 0)
+ panic("settimeofday()");
+}
+
static void mount_filesystems(void)
{
pretty_message("[+] Mounting filesystems...");
@@ -259,6 +269,7 @@ int main(int argc, char *argv[])
print_banner();
mount_filesystems();
seed_rng();
+ set_time();
kmod_selftests();
enable_logging();
clear_leaks();
diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config
index bad88f4b0a03..ce2a04717300 100644
--- a/tools/testing/selftests/wireguard/qemu/kernel.config
+++ b/tools/testing/selftests/wireguard/qemu/kernel.config
@@ -19,7 +19,6 @@ CONFIG_NETFILTER_XTABLES=y
CONFIG_NETFILTER_XT_NAT=y
CONFIG_NETFILTER_XT_MATCH_LENGTH=y
CONFIG_NETFILTER_XT_MARK=y
-CONFIG_NF_NAT_IPV4=y
CONFIG_IP_NF_IPTABLES=y
CONFIG_IP_NF_FILTER=y
CONFIG_IP_NF_MANGLE=y
@@ -58,7 +57,6 @@ CONFIG_NO_HZ_IDLE=y
CONFIG_NO_HZ_FULL=n
CONFIG_HZ_PERIODIC=n
CONFIG_HIGH_RES_TIMERS=y
-CONFIG_ARCH_RANDOM=y
CONFIG_FILE_LOCKING=y
CONFIG_POSIX_TIMERS=y
CONFIG_DEVTMPFS=y
diff --git a/tools/thermal/tmon/pid.c b/tools/thermal/tmon/pid.c
index 296f69c00c57..da20088285bd 100644
--- a/tools/thermal/tmon/pid.c
+++ b/tools/thermal/tmon/pid.c
@@ -27,7 +27,7 @@
/**************************************************************************
* PID (Proportional-Integral-Derivative) controller is commonly used in
- * linear control system, consider the the process.
+ * linear control system, consider the process.
* G(s) = U(s)/E(s)
* kp = proportional gain
* ki = integral gain
diff --git a/tools/thermal/tmon/tmon.h b/tools/thermal/tmon/tmon.h
index c9066ec104dd..44d16d778f04 100644
--- a/tools/thermal/tmon/tmon.h
+++ b/tools/thermal/tmon/tmon.h
@@ -27,6 +27,9 @@
#define NR_LINES_TZDATA 1
#define TMON_LOG_FILE "/var/tmp/tmon.log"
+#include <sys/time.h>
+#include <pthread.h>
+
extern unsigned long ticktime;
extern double time_elapsed;
extern unsigned long target_temp_user;
diff --git a/tools/tracing/rtla/Makefile b/tools/tracing/rtla/Makefile
index 3822f4ea5f49..1bea2d16d4c1 100644
--- a/tools/tracing/rtla/Makefile
+++ b/tools/tracing/rtla/Makefile
@@ -1,6 +1,6 @@
NAME := rtla
# Follow the kernel version
-VERSION := $(shell cat VERSION 2> /dev/null || make -sC ../../.. kernelversion)
+VERSION := $(shell cat VERSION 2> /dev/null || make -sC ../../.. kernelversion | grep -v make)
# From libtracefs:
# Makefiles suck: This macro sets a default value of $(2) for the
diff --git a/tools/tracing/rtla/src/trace.c b/tools/tracing/rtla/src/trace.c
index 5784c9f9e570..e1ba6d9f4265 100644
--- a/tools/tracing/rtla/src/trace.c
+++ b/tools/tracing/rtla/src/trace.c
@@ -134,13 +134,18 @@ void trace_instance_destroy(struct trace_instance *trace)
if (trace->inst) {
disable_tracer(trace->inst);
destroy_instance(trace->inst);
+ trace->inst = NULL;
}
- if (trace->seq)
+ if (trace->seq) {
free(trace->seq);
+ trace->seq = NULL;
+ }
- if (trace->tep)
+ if (trace->tep) {
tep_free(trace->tep);
+ trace->tep = NULL;
+ }
}
/*
diff --git a/tools/tracing/rtla/src/utils.c b/tools/tracing/rtla/src/utils.c
index 5352167a1e75..663a047f794d 100644
--- a/tools/tracing/rtla/src/utils.c
+++ b/tools/tracing/rtla/src/utils.c
@@ -106,8 +106,9 @@ int parse_cpu_list(char *cpu_list, char **monitored_cpus)
nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
- mon_cpus = malloc(nr_cpus * sizeof(char));
- memset(mon_cpus, 0, (nr_cpus * sizeof(char)));
+ mon_cpus = calloc(nr_cpus, sizeof(char));
+ if (!mon_cpus)
+ goto err;
for (p = cpu_list; *p; ) {
cpu = atoi(p);
@@ -224,7 +225,7 @@ long parse_ns_duration(char *val)
#elif __arm__
# define __NR_sched_setattr 380
# define __NR_sched_getattr 381
-#elif __aarch64__
+#elif __aarch64__ || __riscv
# define __NR_sched_setattr 274
# define __NR_sched_getattr 275
#elif __powerpc__
diff --git a/tools/usb/testusb.c b/tools/usb/testusb.c
index 474bae868b35..cbaa1b9fdeac 100644
--- a/tools/usb/testusb.c
+++ b/tools/usb/testusb.c
@@ -96,7 +96,10 @@ struct usb_interface_descriptor {
enum usb_device_speed {
USB_SPEED_UNKNOWN = 0, /* enumerating */
USB_SPEED_LOW, USB_SPEED_FULL, /* usb 1.1 */
- USB_SPEED_HIGH /* usb 2.0 */
+ USB_SPEED_HIGH, /* usb 2.0 */
+ USB_SPEED_WIRELESS, /* wireless (usb 2.5) */
+ USB_SPEED_SUPER, /* usb 3.0 */
+ USB_SPEED_SUPER_PLUS, /* usb 3.1 */
};
/*-------------------------------------------------------------------------*/
@@ -104,11 +107,14 @@ enum usb_device_speed {
static char *speed (enum usb_device_speed s)
{
switch (s) {
- case USB_SPEED_UNKNOWN: return "unknown";
- case USB_SPEED_LOW: return "low";
- case USB_SPEED_FULL: return "full";
- case USB_SPEED_HIGH: return "high";
- default: return "??";
+ case USB_SPEED_UNKNOWN: return "unknown";
+ case USB_SPEED_LOW: return "low";
+ case USB_SPEED_FULL: return "full";
+ case USB_SPEED_HIGH: return "high";
+ case USB_SPEED_WIRELESS: return "wireless";
+ case USB_SPEED_SUPER: return "super";
+ case USB_SPEED_SUPER_PLUS: return "super-plus";
+ default: return "??";
}
}
diff --git a/tools/vm/slabinfo.c b/tools/vm/slabinfo.c
index 9b68658b6bb8..5b98f3ee58a5 100644
--- a/tools/vm/slabinfo.c
+++ b/tools/vm/slabinfo.c
@@ -233,6 +233,24 @@ static unsigned long read_slab_obj(struct slabinfo *s, const char *name)
return l;
}
+static unsigned long read_debug_slab_obj(struct slabinfo *s, const char *name)
+{
+ char x[128];
+ FILE *f;
+ size_t l;
+
+ snprintf(x, 128, "/sys/kernel/debug/slab/%s/%s", s->name, name);
+ f = fopen(x, "r");
+ if (!f) {
+ buffer[0] = 0;
+ l = 0;
+ } else {
+ l = fread(buffer, 1, sizeof(buffer), f);
+ buffer[l] = 0;
+ fclose(f);
+ }
+ return l;
+}
/*
* Put a size string together
@@ -409,14 +427,18 @@ static void show_tracking(struct slabinfo *s)
{
printf("\n%s: Kernel object allocation\n", s->name);
printf("-----------------------------------------------------------------------\n");
- if (read_slab_obj(s, "alloc_calls"))
+ if (read_debug_slab_obj(s, "alloc_traces"))
+ printf("%s", buffer);
+ else if (read_slab_obj(s, "alloc_calls"))
printf("%s", buffer);
else
printf("No Data\n");
printf("\n%s: Kernel object freeing\n", s->name);
printf("------------------------------------------------------------------------\n");
- if (read_slab_obj(s, "free_calls"))
+ if (read_debug_slab_obj(s, "free_traces"))
+ printf("%s", buffer);
+ else if (read_slab_obj(s, "free_calls"))
printf("%s", buffer);
else
printf("No Data\n");