aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/accounting/getdelays.c8
-rw-r--r--tools/arch/x86/include/asm/cpufeatures.h1
-rw-r--r--tools/arch/x86/include/asm/msr-index.h17
-rw-r--r--tools/arch/x86/include/asm/required-features.h4
-rw-r--r--tools/arch/x86/include/uapi/asm/kvm.h19
-rw-r--r--tools/arch/x86/include/uapi/asm/prctl.h26
-rw-r--r--tools/arch/x86/lib/memcpy_64.S12
-rw-r--r--tools/arch/x86/lib/memset_64.S6
-rw-r--r--tools/bpf/bpftool/.gitignore2
-rw-r--r--tools/bpf/bpftool/Documentation/Makefile5
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-btf.rst7
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-cgroup.rst17
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-feature.rst6
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-gen.rst7
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-iter.rst6
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-link.rst7
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-map.rst13
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-net.rst72
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-perf.rst6
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-prog.rst14
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst6
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool.rst11
-rw-r--r--tools/bpf/bpftool/Documentation/common_options.rst11
-rw-r--r--tools/bpf/bpftool/Documentation/substitutions.rst3
-rw-r--r--tools/bpf/bpftool/Makefile26
-rw-r--r--tools/bpf/bpftool/bash-completion/bpftool5
-rw-r--r--tools/bpf/bpftool/btf.c13
-rw-r--r--tools/bpf/bpftool/btf_dumper.c42
-rw-r--r--tools/bpf/bpftool/common.c1
-rw-r--r--tools/bpf/bpftool/feature.c111
-rw-r--r--tools/bpf/bpftool/gen.c23
-rw-r--r--tools/bpf/bpftool/iter.c7
-rw-r--r--tools/bpf/bpftool/main.c27
-rw-r--r--tools/bpf/bpftool/main.h3
-rw-r--r--tools/bpf/bpftool/map.c36
-rw-r--r--tools/bpf/bpftool/map_perf_ring.c9
-rw-r--r--tools/bpf/bpftool/prog.c260
-rw-r--r--tools/bpf/bpftool/skeleton/pid_iter.bpf.c4
-rw-r--r--tools/bpf/bpftool/struct_ops.c31
-rw-r--r--tools/bpf/resolve_btfids/Makefile1
-rw-r--r--tools/bpf/resolve_btfids/main.c13
-rw-r--r--tools/bpf/runqslower/Makefile4
-rw-r--r--tools/bpf/runqslower/runqslower.bpf.c2
-rw-r--r--tools/bpf/runqslower/runqslower.c8
-rw-r--r--tools/bpf/runqslower/runqslower.h2
-rw-r--r--tools/build/Build.include2
-rw-r--r--tools/build/Makefile.feature1
-rw-r--r--tools/build/feature/Makefile4
-rw-r--r--tools/build/feature/test-all.c5
-rw-r--r--tools/build/feature/test-bpf.c6
-rw-r--r--tools/build/feature/test-libpython-version.c11
-rw-r--r--tools/iio/iio_event_monitor.c1
-rw-r--r--tools/include/asm-generic/bitops.h1
-rw-r--r--tools/include/linux/bitmap.h7
-rw-r--r--tools/include/linux/debug_locks.h14
-rw-r--r--tools/include/linux/find.h (renamed from tools/include/asm-generic/bitops/find.h)81
-rw-r--r--tools/include/linux/hardirq.h12
-rw-r--r--tools/include/linux/hash.h5
-rw-r--r--tools/include/linux/irqflags.h39
-rw-r--r--tools/include/linux/kernel.h24
-rw-r--r--tools/include/linux/lockdep.h72
-rw-r--r--tools/include/linux/math.h25
-rw-r--r--tools/include/linux/proc_fs.h4
-rw-r--r--tools/include/linux/spinlock.h2
-rw-r--r--tools/include/linux/stacktrace.h33
-rw-r--r--tools/include/nolibc/nolibc.h86
-rw-r--r--tools/include/uapi/asm-generic/unistd.h5
-rw-r--r--tools/include/uapi/drm/drm.h18
-rw-r--r--tools/include/uapi/linux/bpf.h165
-rw-r--r--tools/include/uapi/linux/btf.h3
-rw-r--r--tools/include/uapi/linux/if_link.h295
-rw-r--r--tools/include/uapi/linux/kvm.h17
-rw-r--r--tools/include/uapi/linux/perf_event.h5
-rw-r--r--tools/lib/bpf/Makefile4
-rw-r--r--tools/lib/bpf/bpf.c471
-rw-r--r--tools/lib/bpf/bpf.h246
-rw-r--r--tools/lib/bpf/bpf_gen_internal.h17
-rw-r--r--tools/lib/bpf/bpf_tracing.h431
-rw-r--r--tools/lib/bpf/btf.c206
-rw-r--r--tools/lib/bpf/btf.h88
-rw-r--r--tools/lib/bpf/btf_dump.c46
-rw-r--r--tools/lib/bpf/gen_loader.c192
-rw-r--r--tools/lib/bpf/libbpf.c1111
-rw-r--r--tools/lib/bpf/libbpf.h297
-rw-r--r--tools/lib/bpf/libbpf.map30
-rw-r--r--tools/lib/bpf/libbpf_common.h19
-rw-r--r--tools/lib/bpf/libbpf_internal.h117
-rw-r--r--tools/lib/bpf/libbpf_legacy.h13
-rw-r--r--tools/lib/bpf/libbpf_probes.c271
-rw-r--r--tools/lib/bpf/libbpf_version.h2
-rw-r--r--tools/lib/bpf/linker.c10
-rw-r--r--tools/lib/bpf/relo_core.c251
-rw-r--r--tools/lib/bpf/relo_core.h103
-rw-r--r--tools/lib/bpf/skel_internal.h13
-rw-r--r--tools/lib/bpf/xsk.c61
-rw-r--r--tools/lib/find_bit.c20
-rw-r--r--tools/lib/perf/Documentation/libperf.txt11
-rw-r--r--tools/lib/perf/cpumap.c113
-rw-r--r--tools/lib/perf/evlist.c19
-rw-r--r--tools/lib/perf/evsel.c115
-rw-r--r--tools/lib/perf/include/internal/cpumap.h18
-rw-r--r--tools/lib/perf/include/internal/evlist.h5
-rw-r--r--tools/lib/perf/include/internal/evsel.h4
-rw-r--r--tools/lib/perf/include/internal/mmap.h5
-rw-r--r--tools/lib/perf/include/perf/cpumap.h8
-rw-r--r--tools/lib/perf/include/perf/evsel.h14
-rw-r--r--tools/lib/perf/libperf.map2
-rw-r--r--tools/lib/perf/mmap.c4
-rw-r--r--tools/lib/perf/tests/test-evlist.c162
-rw-r--r--tools/lib/traceevent/event-parse.c59
-rw-r--r--tools/lib/traceevent/event-parse.h5
-rw-r--r--tools/lib/traceevent/parse-filter.c5
-rw-r--r--tools/memory-model/Documentation/explanation.txt44
-rw-r--r--tools/memory-model/README12
-rw-r--r--tools/memory-model/linux-kernel.cat6
-rw-r--r--tools/memory-model/litmus-tests/LB+unlocklockonceonce+poacquireonce.litmus35
-rw-r--r--tools/memory-model/litmus-tests/MP+unlocklockonceonce+fencermbonceonce.litmus33
-rw-r--r--tools/memory-model/litmus-tests/README8
-rw-r--r--tools/objtool/arch/x86/decode.c13
-rw-r--r--tools/objtool/builtin-check.c3
-rw-r--r--tools/objtool/check.c71
-rw-r--r--tools/objtool/elf.c1
-rw-r--r--tools/objtool/include/objtool/arch.h1
-rw-r--r--tools/objtool/include/objtool/builtin.h2
-rw-r--r--tools/objtool/include/objtool/elf.h2
-rw-r--r--tools/objtool/objtool.c4
-rw-r--r--tools/perf/Documentation/perf-buildid-cache.txt5
-rw-r--r--tools/perf/Documentation/perf-config.txt9
-rw-r--r--tools/perf/Documentation/perf-list.txt48
-rw-r--r--tools/perf/Documentation/perf-record.txt15
-rw-r--r--tools/perf/Documentation/perf-stat.txt10
-rw-r--r--tools/perf/Documentation/perf-top.txt7
-rw-r--r--tools/perf/Makefile.config12
-rw-r--r--tools/perf/Makefile.perf4
-rw-r--r--tools/perf/arch/arm/include/perf_regs.h42
-rw-r--r--tools/perf/arch/arm/util/cs-etm.c54
-rw-r--r--tools/perf/arch/arm64/include/perf_regs.h78
-rw-r--r--tools/perf/arch/arm64/util/machine.c7
-rw-r--r--tools/perf/arch/arm64/util/pmu.c2
-rw-r--r--tools/perf/arch/csky/include/perf_regs.h82
-rw-r--r--tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl1
-rw-r--r--tools/perf/arch/mips/include/perf_regs.h69
-rw-r--r--tools/perf/arch/powerpc/entry/syscalls/syscall.tbl2
-rw-r--r--tools/perf/arch/powerpc/include/perf_regs.h66
-rw-r--r--tools/perf/arch/powerpc/util/event.c8
-rw-r--r--tools/perf/arch/riscv/include/perf_regs.h74
-rw-r--r--tools/perf/arch/s390/entry/syscalls/syscall.tbl2
-rw-r--r--tools/perf/arch/s390/include/perf_regs.h78
-rw-r--r--tools/perf/arch/x86/entry/syscalls/syscall_64.tbl1
-rw-r--r--tools/perf/arch/x86/include/perf_regs.h82
-rw-r--r--tools/perf/arch/x86/util/evlist.c17
-rw-r--r--tools/perf/bench/epoll-ctl.c4
-rw-r--r--tools/perf/bench/epoll-wait.c4
-rw-r--r--tools/perf/bench/evlist-open-close.c4
-rw-r--r--tools/perf/bench/futex-hash.c4
-rw-r--r--tools/perf/bench/futex-lock-pi.c4
-rw-r--r--tools/perf/bench/futex-requeue.c4
-rw-r--r--tools/perf/bench/futex-wake-parallel.c4
-rw-r--r--tools/perf/bench/futex-wake.c4
-rw-r--r--tools/perf/bench/sched-messaging.c4
-rw-r--r--tools/perf/builtin-bench.c5
-rw-r--r--tools/perf/builtin-buildid-cache.c25
-rw-r--r--tools/perf/builtin-c2c.c15
-rw-r--r--tools/perf/builtin-ftrace.c447
-rw-r--r--tools/perf/builtin-inject.c20
-rw-r--r--tools/perf/builtin-kmem.c2
-rw-r--r--tools/perf/builtin-record.c23
-rw-r--r--tools/perf/builtin-report.c4
-rw-r--r--tools/perf/builtin-sched.c71
-rw-r--r--tools/perf/builtin-script.c45
-rw-r--r--tools/perf/builtin-stat.c557
-rw-r--r--tools/perf/builtin-trace.c20
-rw-r--r--tools/perf/dlfilters/dlfilter-test-api-v0.c2
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/branch.json8
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/bus.json20
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/cache.json155
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/exception.json47
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/instruction.json143
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/memory.json38
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/other.json5
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/pipeline.json23
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/spe.json14
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/trace.json29
-rw-r--r--tools/perf/pmu-events/arch/arm64/common-and-microarch.json (renamed from tools/perf/pmu-events/arch/arm64/armv8-common-and-microarch.json)198
-rw-r--r--tools/perf/pmu-events/arch/arm64/mapfile.csv1
-rw-r--r--tools/perf/pmu-events/arch/arm64/recommended.json (renamed from tools/perf/pmu-events/arch/arm64/armv8-recommended.json)202
-rw-r--r--tools/perf/pmu-events/arch/test/test_soc/cpu/uncore.json16
-rw-r--r--tools/perf/pmu-events/jevents.c2
-rw-r--r--tools/perf/scripts/python/intel-pt-events.py23
-rw-r--r--tools/perf/tests/Build1
-rw-r--r--tools/perf/tests/attr.c6
-rw-r--r--tools/perf/tests/bitmap.c4
-rw-r--r--tools/perf/tests/bpf.c4
-rw-r--r--tools/perf/tests/builtin-test.c16
-rw-r--r--tools/perf/tests/cpumap.c6
-rw-r--r--tools/perf/tests/event_update.c8
-rw-r--r--tools/perf/tests/expr.c4
-rw-r--r--tools/perf/tests/mem2node.c9
-rw-r--r--tools/perf/tests/mmap-basic.c5
-rw-r--r--tools/perf/tests/openat-syscall-all-cpus.c39
-rw-r--r--tools/perf/tests/parse-events.c49
-rw-r--r--tools/perf/tests/parse-metric.c1
-rw-r--r--tools/perf/tests/pmu-events.c32
-rwxr-xr-xtools/perf/tests/shell/stat_all_metricgroups.sh2
-rw-r--r--tools/perf/tests/sigtrap.c177
-rw-r--r--tools/perf/tests/stat.c3
-rw-r--r--tools/perf/tests/tests.h1
-rw-r--r--tools/perf/tests/topology.c58
-rw-r--r--tools/perf/ui/browsers/annotate.c23
-rw-r--r--tools/perf/ui/tui/setup.c8
-rw-r--r--tools/perf/util/Build2
-rw-r--r--tools/perf/util/affinity.c10
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-decoder.c2
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-decoder.h1
-rw-r--r--tools/perf/util/arm-spe.c67
-rw-r--r--tools/perf/util/arm64-frame-pointer-unwind-support.c63
-rw-r--r--tools/perf/util/arm64-frame-pointer-unwind-support.h10
-rw-r--r--tools/perf/util/auxtrace.c14
-rw-r--r--tools/perf/util/auxtrace.h5
-rw-r--r--tools/perf/util/bpf-loader.c18
-rw-r--r--tools/perf/util/bpf_counter.c47
-rw-r--r--tools/perf/util/bpf_counter.h4
-rw-r--r--tools/perf/util/bpf_counter_cgroup.c10
-rw-r--r--tools/perf/util/bpf_ftrace.c152
-rw-r--r--tools/perf/util/bpf_skel/bperf.h14
-rw-r--r--tools/perf/util/bpf_skel/bperf_follower.bpf.c19
-rw-r--r--tools/perf/util/bpf_skel/bperf_leader.bpf.c19
-rw-r--r--tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c2
-rw-r--r--tools/perf/util/bpf_skel/func_latency.bpf.c114
-rw-r--r--tools/perf/util/callchain.c14
-rw-r--r--tools/perf/util/callchain.h4
-rw-r--r--tools/perf/util/counts.c8
-rw-r--r--tools/perf/util/counts.h14
-rw-r--r--tools/perf/util/cpumap.c253
-rw-r--r--tools/perf/util/cpumap.h125
-rw-r--r--tools/perf/util/cputopo.c11
-rw-r--r--tools/perf/util/data-convert-bt.c2
-rw-r--r--tools/perf/util/debug.c21
-rw-r--r--tools/perf/util/env.c29
-rw-r--r--tools/perf/util/env.h3
-rw-r--r--tools/perf/util/event.h5
-rw-r--r--tools/perf/util/evlist-hybrid.c11
-rw-r--r--tools/perf/util/evlist.c174
-rw-r--r--tools/perf/util/evlist.h52
-rw-r--r--tools/perf/util/evsel.c205
-rw-r--r--tools/perf/util/evsel.h33
-rw-r--r--tools/perf/util/expr.c49
-rw-r--r--tools/perf/util/ftrace.h81
-rw-r--r--tools/perf/util/header.c21
-rw-r--r--tools/perf/util/hist.c4
-rw-r--r--tools/perf/util/hist.h3
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.c85
-rw-r--r--tools/perf/util/intel-pt.c2
-rw-r--r--tools/perf/util/libunwind/arm64.c2
-rw-r--r--tools/perf/util/machine.c53
-rw-r--r--tools/perf/util/machine.h1
-rw-r--r--tools/perf/util/mem-events.c29
-rw-r--r--tools/perf/util/metricgroup.c46
-rw-r--r--tools/perf/util/mmap.c19
-rw-r--r--tools/perf/util/mmap.h3
-rw-r--r--tools/perf/util/namespaces.c76
-rw-r--r--tools/perf/util/namespaces.h2
-rw-r--r--tools/perf/util/parse-events-hybrid.c9
-rw-r--r--tools/perf/util/parse-events.c77
-rw-r--r--tools/perf/util/parse-events.h1
-rw-r--r--tools/perf/util/parse-events.l2
-rw-r--r--tools/perf/util/parse-events.y17
-rw-r--r--tools/perf/util/perf_api_probe.c19
-rw-r--r--tools/perf/util/perf_regs.c669
-rw-r--r--tools/perf/util/perf_regs.h17
-rw-r--r--tools/perf/util/pmu.c23
-rw-r--r--tools/perf/util/probe-event.c3
-rw-r--r--tools/perf/util/python.c14
-rw-r--r--tools/perf/util/record.c13
-rw-r--r--tools/perf/util/scripting-engines/trace-event-perl.c2
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c22
-rw-r--r--tools/perf/util/session.c37
-rw-r--r--tools/perf/util/smt.c73
-rw-r--r--tools/perf/util/sort.c36
-rw-r--r--tools/perf/util/sort.h3
-rw-r--r--tools/perf/util/stat-display.c138
-rw-r--r--tools/perf/util/stat-shadow.c308
-rw-r--r--tools/perf/util/stat.c47
-rw-r--r--tools/perf/util/stat.h9
-rw-r--r--tools/perf/util/svghelper.c10
-rw-r--r--tools/perf/util/synthetic-events.c24
-rw-r--r--tools/perf/util/synthetic-events.h3
-rw-r--r--tools/perf/util/top.c6
-rw-r--r--tools/perf/util/util.c15
-rw-r--r--tools/perf/util/util.h11
-rw-r--r--tools/power/acpi/.gitignore1
-rw-r--r--tools/power/acpi/Makefile16
-rw-r--r--tools/power/acpi/Makefile.config1
-rw-r--r--tools/power/acpi/Makefile.rules3
-rw-r--r--tools/power/acpi/man/pfrut.8137
-rw-r--r--tools/power/acpi/tools/pfrut/Makefile23
-rw-r--r--tools/power/acpi/tools/pfrut/pfrut.c424
-rw-r--r--tools/power/x86/intel-speed-select/isst-config.c4
-rw-r--r--tools/scripts/Makefile.include13
-rw-r--r--tools/testing/cxl/Kbuild3
-rw-r--r--tools/testing/cxl/test/cxl.c68
-rw-r--r--tools/testing/cxl/test/mem.c99
-rw-r--r--tools/testing/cxl/test/mock.c30
-rw-r--r--tools/testing/cxl/test/mock.h6
-rwxr-xr-xtools/testing/kunit/kunit.py182
-rw-r--r--tools/testing/kunit/kunit_config.py61
-rw-r--r--tools/testing/kunit/kunit_json.py8
-rw-r--r--tools/testing/kunit/kunit_kernel.py76
-rw-r--r--tools/testing/kunit/kunit_parser.py57
-rwxr-xr-xtools/testing/kunit/kunit_tool_test.py171
-rwxr-xr-xtools/testing/kunit/run_checks.py81
-rw-r--r--tools/testing/kunit/test_data/test_is_test_passed-no_tests_no_plan.log7
-rw-r--r--tools/testing/nvdimm/Kbuild8
-rw-r--r--tools/testing/nvdimm/dax_pmem_compat_test.c8
-rw-r--r--tools/testing/nvdimm/dax_pmem_core_test.c8
-rw-r--r--tools/testing/nvdimm/test/iomap.c43
-rw-r--r--tools/testing/nvdimm/test/ndtest.c4
-rw-r--r--tools/testing/nvdimm/test/nfit.c4
-rw-r--r--tools/testing/radix-tree/linux/lockdep.h3
-rw-r--r--tools/testing/scatterlist/linux/mm.h3
-rw-r--r--tools/testing/selftests/Makefile3
-rw-r--r--tools/testing/selftests/alsa/.gitignore1
-rw-r--r--tools/testing/selftests/alsa/Makefile9
-rw-r--r--tools/testing/selftests/alsa/mixer-test.c705
-rw-r--r--tools/testing/selftests/arm64/Makefile2
-rw-r--r--tools/testing/selftests/arm64/abi/.gitignore1
-rw-r--r--tools/testing/selftests/arm64/abi/Makefile8
-rw-r--r--tools/testing/selftests/arm64/abi/syscall-abi-asm.S240
-rw-r--r--tools/testing/selftests/arm64/abi/syscall-abi.c318
-rw-r--r--tools/testing/selftests/arm64/fp/.gitignore1
-rw-r--r--tools/testing/selftests/arm64/fp/Makefile4
-rw-r--r--tools/testing/selftests/arm64/fp/fp-pidbench.S71
-rw-r--r--tools/testing/selftests/arm64/fp/sve-ptrace.c224
-rw-r--r--tools/testing/selftests/arm64/fp/vec-syscfg.c2
-rw-r--r--tools/testing/selftests/arm64/signal/test_signals_utils.c12
-rw-r--r--tools/testing/selftests/bpf/Makefile94
-rw-r--r--tools/testing/selftests/bpf/README.rst9
-rw-r--r--tools/testing/selftests/bpf/bench.c71
-rw-r--r--tools/testing/selftests/bpf/bench.h11
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c17
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_bpf_loop.c105
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_count.c2
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_rename.c16
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_ringbufs.c22
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_strncmp.c161
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_trigger.c162
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_bpf_loop.sh15
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_strncmp.sh12
-rw-r--r--tools/testing/selftests/bpf/benchs/run_common.sh15
-rw-r--r--tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c20
-rw-r--r--tools/testing/selftests/bpf/btf_helpers.c17
-rw-r--r--tools/testing/selftests/bpf/config2
-rw-r--r--tools/testing/selftests/bpf/flow_dissector_load.h3
-rw-r--r--tools/testing/selftests/bpf/get_cgroup_id_user.c5
-rw-r--r--tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c13
-rw-r--r--tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c13
-rw-r--r--tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c15
-rw-r--r--tools/testing/selftests/bpf/map_tests/sk_storage_map.c52
-rw-r--r--tools/testing/selftests/bpf/prog_tests/align.c202
-rw-r--r--tools/testing/selftests/bpf/prog_tests/atomics.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c36
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_iter.c21
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_loop.c145
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c7
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c42
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf.c335
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c119
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_dump.c45
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c16
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_split.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_tag.c44
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_write.c67
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/connect_force_port.c35
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_kern.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_reloc.c84
-rw-r--r--tools/testing/selftests/bpf/prog_tests/d_path.c36
-rw-r--r--tools/testing/selftests/bpf/prog_tests/exhandler.c43
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c25
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_stress.c33
-rw-r--r--tools/testing/selftests/bpf/prog_tests/find_vma.c117
-rw-r--r--tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/get_func_args_test.c44
-rw-r--r--tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c27
-rw-r--r--tools/testing/selftests/bpf/prog_tests/global_data.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/global_func_args.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kfree_skb.c64
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kfunc_call.c24
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ksyms_btf.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/l4lb_all.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/legacy_printk.c65
-rw-r--r--tools/testing/selftests/bpf/prog_tests/libbpf_probes.c124
-rw-r--r--tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/log_buf.c276
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_lock.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_ptr.c16
-rw-r--r--tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/perf_buffer.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/pinning.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/pkt_access.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/pkt_md_access.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/prog_array_init.c32
-rw-r--r--tools/testing/selftests/bpf/prog_tests/queue_stack_map.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/raw_tp_writable_reject_nbd_invalid.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c29
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/select_reuseport.c22
-rw-r--r--tools/testing/selftests/bpf/prog_tests/signal_pending.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sk_lookup.c32
-rw-r--r--tools/testing/selftests/bpf/prog_tests/skb_ctx.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/skb_helpers.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sock_fields.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_basic.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_listen.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt.c19
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c27
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt_multi.c12
-rw-r--r--tools/testing/selftests/bpf/prog_tests/spinlock.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_map.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tailcalls.c18
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_redirect.c7
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tcp_estats.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tcp_rtt.c21
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_bpffs.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_global_funcs.c28
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_local_storage.c20
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_overhead.c20
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_strncmp.c167
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tp_attach_query.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/trampoline_count.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp.c13
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_attach.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_bonding.c36
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c13
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_info.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_link.c61
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_perf.c2
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_unix.c2
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_loop.c112
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_loop_bench.c26
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_tracing_net.h2
-rw-r--r--tools/testing/selftests/bpf/progs/btf_decl_tag.c (renamed from tools/testing/selftests/bpf/progs/tag.c)4
-rw-r--r--tools/testing/selftests/bpf/progs/btf_type_tag.c25
-rw-r--r--tools/testing/selftests/bpf/progs/core_kern.c104
-rw-r--r--tools/testing/selftests/bpf/progs/exhandler_kern.c43
-rw-r--r--tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c2
-rw-r--r--tools/testing/selftests/bpf/progs/find_vma.c69
-rw-r--r--tools/testing/selftests/bpf/progs/find_vma_fail1.c29
-rw-r--r--tools/testing/selftests/bpf/progs/find_vma_fail2.c29
-rw-r--r--tools/testing/selftests/bpf/progs/get_func_args_test.c123
-rw-r--r--tools/testing/selftests/bpf/progs/local_storage.c24
-rw-r--r--tools/testing/selftests/bpf/progs/loop3.c4
-rw-r--r--tools/testing/selftests/bpf/progs/map_ptr_kern.c16
-rw-r--r--tools/testing/selftests/bpf/progs/pyperf.h71
-rw-r--r--tools/testing/selftests/bpf/progs/pyperf600_bpf_loop.c6
-rw-r--r--tools/testing/selftests/bpf/progs/strncmp_bench.c50
-rw-r--r--tools/testing/selftests/bpf/progs/strncmp_test.c54
-rw-r--r--tools/testing/selftests/bpf/progs/strobemeta.h75
-rw-r--r--tools/testing/selftests/bpf/progs/strobemeta_bpf_loop.c9
-rw-r--r--tools/testing/selftests/bpf/progs/test_d_path_check_rdonly_mem.c28
-rw-r--r--tools/testing/selftests/bpf/progs/test_d_path_check_types.c32
-rw-r--r--tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c29
-rw-r--r--tools/testing/selftests/bpf/progs/test_ksyms_weak.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_l4lb.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_l4lb_noinline.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_legacy_printk.c73
-rw-r--r--tools/testing/selftests/bpf/progs/test_log_buf.c24
-rw-r--r--tools/testing/selftests/bpf/progs/test_map_lock.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_module_attach.c12
-rw-r--r--tools/testing/selftests/bpf/progs/test_prog_array_init.c39
-rw-r--r--tools/testing/selftests/bpf/progs/test_queue_stack_map.h2
-rw-r--r--tools/testing/selftests/bpf/progs/test_sk_lookup.c8
-rw-r--r--tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_skb_ctx.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_skc_to_unix_sock.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_spin_lock.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_stacktrace_map.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcp_estats.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_tracepoint.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_verif_scale2.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_link.c6
-rw-r--r--tools/testing/selftests/bpf/progs/trigger_bench.c7
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool_synctypes.py94
-rw-r--r--tools/testing/selftests/bpf/test_btf.h3
-rw-r--r--tools/testing/selftests/bpf/test_cgroup_storage.c11
-rw-r--r--tools/testing/selftests/bpf/test_cpp.cpp9
-rw-r--r--tools/testing/selftests/bpf/test_dev_cgroup.c3
-rw-r--r--tools/testing/selftests/bpf/test_lirc_mode2_user.c6
-rw-r--r--tools/testing/selftests/bpf/test_lpm_map.c27
-rw-r--r--tools/testing/selftests/bpf/test_lru_map.c25
-rw-r--r--tools/testing/selftests/bpf/test_maps.c118
-rw-r--r--tools/testing/selftests/bpf/test_progs.c30
-rw-r--r--tools/testing/selftests/bpf/test_sock.c393
-rw-r--r--tools/testing/selftests/bpf/test_sock_addr.c46
-rw-r--r--tools/testing/selftests/bpf/test_stub.c44
-rw-r--r--tools/testing/selftests/bpf/test_sysctl.c23
-rw-r--r--tools/testing/selftests/bpf/test_tag.c8
-rw-r--r--tools/testing/selftests/bpf/test_tcpnotify_user.c7
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c112
-rw-r--r--tools/testing/selftests/bpf/testing_helpers.c62
-rw-r--r--tools/testing/selftests/bpf/testing_helpers.h6
-rw-r--r--tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c86
-rw-r--r--tools/testing/selftests/bpf/verifier/atomic_fetch.c94
-rw-r--r--tools/testing/selftests/bpf/verifier/btf_ctx_access.c12
-rw-r--r--tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c32
-rw-r--r--tools/testing/selftests/bpf/verifier/ringbuf.c95
-rw-r--r--tools/testing/selftests/bpf/verifier/search_pruning.c71
-rw-r--r--tools/testing/selftests/bpf/verifier/spill_fill.c60
-rw-r--r--tools/testing/selftests/bpf/verifier/value_ptr_arith.c23
-rw-r--r--tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c632
-rwxr-xr-xtools/testing/selftests/bpf/vmtest.sh48
-rw-r--r--tools/testing/selftests/bpf/xdp_redirect_multi.c15
-rw-r--r--tools/testing/selftests/bpf/xdping.c3
-rw-r--r--tools/testing/selftests/bpf/xdpxceiver.c14
-rw-r--r--tools/testing/selftests/cgroup/Makefile12
-rw-r--r--tools/testing/selftests/cgroup/cgroup_util.c2
-rw-r--r--tools/testing/selftests/cgroup/cgroup_util.h4
-rw-r--r--tools/testing/selftests/cgroup/test_core.c165
-rw-r--r--tools/testing/selftests/clone3/clone3.c6
-rw-r--r--tools/testing/selftests/core/close_range_test.c4
-rw-r--r--tools/testing/selftests/damon/.gitignore2
-rw-r--r--tools/testing/selftests/damon/Makefile7
-rw-r--r--tools/testing/selftests/damon/_debugfs_common.sh52
-rw-r--r--tools/testing/selftests/damon/debugfs_attrs.sh73
-rw-r--r--tools/testing/selftests/damon/debugfs_empty_targets.sh13
-rw-r--r--tools/testing/selftests/damon/debugfs_huge_count_read_write.sh22
-rw-r--r--tools/testing/selftests/damon/debugfs_schemes.sh19
-rw-r--r--tools/testing/selftests/damon/debugfs_target_ids.sh19
-rw-r--r--tools/testing/selftests/damon/huge_count_read_write.c39
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh7
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan_ipv6.sh342
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles_occ.sh30
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum-2/vxlan_flooding_ipv6.sh322
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum/vxlan_flooding_ipv6.sh334
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/vxlan.sh242
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto.sh39
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto_ipv6.sh12
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/vxlan_ipv6.sh65
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/profile.tc2
-rw-r--r--tools/testing/selftests/gpio/.gitignore2
-rw-r--r--tools/testing/selftests/gpio/Makefile4
-rw-r--r--tools/testing/selftests/gpio/config1
-rw-r--r--tools/testing/selftests/gpio/gpio-chip-info.c57
-rw-r--r--tools/testing/selftests/gpio/gpio-line-name.c55
-rwxr-xr-xtools/testing/selftests/gpio/gpio-sim.sh396
-rw-r--r--tools/testing/selftests/ir/ir_loopback.c1
-rw-r--r--tools/testing/selftests/kexec/Makefile2
-rwxr-xr-xtools/testing/selftests/kexec/kexec_common_lib.sh51
-rwxr-xr-xtools/testing/selftests/kexec/test_kexec_file_load.sh13
-rw-r--r--tools/testing/selftests/kselftest.h4
-rw-r--r--tools/testing/selftests/kselftest_harness.h4
-rw-r--r--tools/testing/selftests/kvm/.gitignore8
-rw-r--r--tools/testing/selftests/kvm/Makefile25
-rw-r--r--tools/testing/selftests/kvm/aarch64/arch_timer.c2
-rw-r--r--tools/testing/selftests/kvm/aarch64/get-reg-list.c50
-rw-r--r--tools/testing/selftests/kvm/aarch64/vgic_irq.c853
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/gic.h26
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/gic_v3.h (renamed from tools/testing/selftests/kvm/lib/aarch64/gic_v3.h)12
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/processor.h3
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/vgic.h18
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util.h408
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util_base.h398
-rw-r--r--tools/testing/selftests/kvm/include/riscv/processor.h135
-rw-r--r--tools/testing/selftests/kvm/include/ucall_common.h59
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/processor.h52
-rw-r--r--tools/testing/selftests/kvm/kvm_create_max_vcpus.c30
-rw-r--r--tools/testing/selftests/kvm/kvm_page_table_test.c2
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/gic.c66
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/gic_private.h11
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/gic_v3.c206
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/processor.c82
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/vgic.c103
-rw-r--r--tools/testing/selftests/kvm/lib/guest_modes.c59
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c136
-rw-r--r--tools/testing/selftests/kvm/lib/riscv/processor.c362
-rw-r--r--tools/testing/selftests/kvm/lib/riscv/ucall.c87
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/processor.c270
-rw-r--r--tools/testing/selftests/kvm/x86_64/amx_test.c450
-rw-r--r--tools/testing/selftests/kvm/x86_64/cpuid_test.c (renamed from tools/testing/selftests/kvm/x86_64/get_cpuid_test.c)30
-rw-r--r--tools/testing/selftests/kvm/x86_64/evmcs_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_features.c140
-rw-r--r--tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c434
-rw-r--r--tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c216
-rw-r--r--tools/testing/selftests/kvm/x86_64/smm_test.c3
-rw-r--r--tools/testing/selftests/kvm/x86_64/state_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c4
-rw-r--r--tools/testing/selftests/kvm/x86_64/userspace_io_test.c114
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c4
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c139
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c105
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_pmu_msrs_test.c17
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c218
-rw-r--r--tools/testing/selftests/landlock/common.h4
-rw-r--r--tools/testing/selftests/lib.mk2
-rwxr-xr-xtools/testing/selftests/lkdtm/stack-entropy.sh16
-rw-r--r--tools/testing/selftests/mount/unprivileged-remount-test.c4
-rw-r--r--tools/testing/selftests/move_mount_set_group/move_mount_set_group_test.c10
-rw-r--r--tools/testing/selftests/net/Makefile1
-rwxr-xr-x[-rw-r--r--]tools/testing/selftests/net/amt.sh0
-rwxr-xr-xtools/testing/selftests/net/fcnal-test.sh102
-rwxr-xr-xtools/testing/selftests/net/fib_nexthops.sh63
-rwxr-xr-xtools/testing/selftests/net/fib_tests.sh59
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh543
-rw-r--r--tools/testing/selftests/net/forwarding/forwarding.config.sample2
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh7
-rwxr-xr-xtools/testing/selftests/net/forwarding/q_in_vni_ipv6.sh347
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_asymmetric_ipv6.sh504
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh804
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1d_port_8472_ipv6.sh11
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh20
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1q_ipv6.sh837
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1q_port_8472_ipv6.sh11
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_symmetric_ipv6.sh563
-rw-r--r--tools/testing/selftests/net/gro.c41
-rwxr-xr-xtools/testing/selftests/net/icmp_redirect.sh2
-rw-r--r--tools/testing/selftests/net/ioam6_parser.c5
-rw-r--r--tools/testing/selftests/net/ipsec.c1
-rw-r--r--tools/testing/selftests/net/mptcp/.gitignore1
-rw-r--r--tools/testing/selftests/net/mptcp/Makefile2
-rw-r--r--tools/testing/selftests/net/mptcp/config10
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_connect.c350
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh139
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_inq.c602
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh208
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_sockopt.c63
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_sockopt.sh44
-rw-r--r--tools/testing/selftests/net/nettest.c33
-rw-r--r--tools/testing/selftests/net/reuseport_bpf.c4
-rw-r--r--tools/testing/selftests/net/rxtimestamp.c2
-rw-r--r--tools/testing/selftests/net/settings2
-rw-r--r--tools/testing/selftests/net/socket.c3
-rw-r--r--tools/testing/selftests/net/tcp_fastopen_backup_key.c6
-rw-r--r--tools/testing/selftests/net/tls.c557
-rw-r--r--tools/testing/selftests/net/toeplitz.c2
-rwxr-xr-xtools/testing/selftests/net/udpgro_fwd.sh7
-rw-r--r--tools/testing/selftests/net/udpgso.c12
-rw-r--r--tools/testing/selftests/net/udpgso_bench_tx.c8
-rw-r--r--tools/testing/selftests/netfilter/Makefile3
-rwxr-xr-xtools/testing/selftests/netfilter/conntrack_vrf.sh241
-rwxr-xr-xtools/testing/selftests/netfilter/ipip-conntrack-mtu.sh9
-rwxr-xr-xtools/testing/selftests/netfilter/nf_nat_edemux.sh10
-rwxr-xr-xtools/testing/selftests/netfilter/nft_concat_range.sh24
-rwxr-xr-xtools/testing/selftests/netfilter/nft_nat.sh38
-rwxr-xr-xtools/testing/selftests/netfilter/nft_queue.sh54
-rwxr-xr-xtools/testing/selftests/netfilter/nft_zones_many.sh19
-rwxr-xr-xtools/testing/selftests/powerpc/security/mitigation-patching.sh5
-rw-r--r--tools/testing/selftests/powerpc/security/spectre_v2.c2
-rw-r--r--tools/testing/selftests/powerpc/signal/.gitignore2
-rw-r--r--tools/testing/selftests/powerpc/signal/Makefile2
-rw-r--r--tools/testing/selftests/powerpc/signal/sigreturn_kernel.c132
-rw-r--r--tools/testing/selftests/powerpc/signal/sigreturn_unaligned.c43
-rw-r--r--tools/testing/selftests/ptp/testptp.c24
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-find-errors.sh4
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-remote.sh23
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm.sh9
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/parse-build.sh3
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-T1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-U1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS01.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TINY011
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TINY021
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TRACE01.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TRACE02.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE011
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE021
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE02.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE041
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE051
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE061
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE071
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE081
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE101
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE10.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuscale/TINY2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuscale/TRACE011
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuscale/TREE1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuscale/TREE541
-rw-r--r--tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT1
-rw-r--r--tools/testing/selftests/rcutorture/configs/refscale/PREEMPT1
-rw-r--r--tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt1
-rw-r--r--tools/testing/selftests/rseq/basic_percpu_ops_test.c3
-rw-r--r--tools/testing/selftests/rseq/rseq.c3
-rw-r--r--tools/testing/selftests/seccomp/seccomp_benchmark.c2
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c56
-rw-r--r--tools/testing/selftests/sgx/Makefile2
-rw-r--r--tools/testing/selftests/sgx/defines.h33
-rw-r--r--tools/testing/selftests/sgx/load.c40
-rw-r--r--tools/testing/selftests/sgx/main.c356
-rw-r--r--tools/testing/selftests/sgx/main.h6
-rw-r--r--tools/testing/selftests/sgx/sigstruct.c12
-rw-r--r--tools/testing/selftests/sgx/test_encl.c60
-rw-r--r--tools/testing/selftests/sgx/test_encl_bootstrap.S21
-rw-r--r--tools/testing/selftests/sparc64/drivers/adi-test.c4
-rw-r--r--tools/testing/selftests/tc-testing/config2
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json2
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/police.json24
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/filters/matchall.json72
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json12
-rwxr-xr-xtools/testing/selftests/tc-testing/tdc.py8
-rwxr-xr-xtools/testing/selftests/tc-testing/tdc.sh1
-rw-r--r--tools/testing/selftests/timens/procfs.c2
-rw-r--r--tools/testing/selftests/timens/timens.c2
-rw-r--r--tools/testing/selftests/timers/alarmtimer-suspend.c2
-rw-r--r--tools/testing/selftests/timers/inconsistency-check.c2
-rw-r--r--tools/testing/selftests/vm/charge_reserved_hugetlb.sh34
-rw-r--r--tools/testing/selftests/vm/hmm-tests.c42
-rw-r--r--tools/testing/selftests/vm/hugepage-mremap.c46
-rw-r--r--tools/testing/selftests/vm/hugetlb_reparenting_test.sh21
-rw-r--r--tools/testing/selftests/vm/mremap_test.c1
-rw-r--r--tools/testing/selftests/vm/pkey-helpers.h3
-rwxr-xr-xtools/testing/selftests/vm/run_vmtests.sh2
-rw-r--r--tools/testing/selftests/vm/userfaultfd.c19
-rw-r--r--tools/testing/selftests/vm/va_128TBswitch.c2
-rw-r--r--tools/testing/selftests/vm/write_hugetlb_memory.sh2
-rwxr-xr-xtools/testing/selftests/wireguard/netns.sh30
-rw-r--r--tools/testing/selftests/wireguard/qemu/debug.config2
-rw-r--r--tools/testing/selftests/wireguard/qemu/kernel.config1
-rw-r--r--tools/thermal/tmon/pid.c3
-rw-r--r--tools/tracing/Makefile18
-rw-r--r--tools/tracing/rtla/Makefile102
-rw-r--r--tools/tracing/rtla/README.txt36
-rw-r--r--tools/tracing/rtla/src/osnoise.c875
-rw-r--r--tools/tracing/rtla/src/osnoise.h91
-rw-r--r--tools/tracing/rtla/src/osnoise_hist.c801
-rw-r--r--tools/tracing/rtla/src/osnoise_top.c579
-rw-r--r--tools/tracing/rtla/src/rtla.c87
-rw-r--r--tools/tracing/rtla/src/timerlat.c72
-rw-r--r--tools/tracing/rtla/src/timerlat.h4
-rw-r--r--tools/tracing/rtla/src/timerlat_hist.c822
-rw-r--r--tools/tracing/rtla/src/timerlat_top.c618
-rw-r--r--tools/tracing/rtla/src/trace.c192
-rw-r--r--tools/tracing/rtla/src/trace.h27
-rw-r--r--tools/tracing/rtla/src/utils.c433
-rw-r--r--tools/tracing/rtla/src/utils.h56
746 files changed, 36440 insertions, 7733 deletions
diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c
index 5ef1c15e88ad..11e86739456d 100644
--- a/tools/accounting/getdelays.c
+++ b/tools/accounting/getdelays.c
@@ -205,6 +205,8 @@ static void print_delayacct(struct taskstats *t)
"RECLAIM %12s%15s%15s\n"
" %15llu%15llu%15llums\n"
"THRASHING%12s%15s%15s\n"
+ " %15llu%15llu%15llums\n"
+ "COMPACT %12s%15s%15s\n"
" %15llu%15llu%15llums\n",
"count", "real total", "virtual total",
"delay total", "delay average",
@@ -228,7 +230,11 @@ static void print_delayacct(struct taskstats *t)
"count", "delay total", "delay average",
(unsigned long long)t->thrashing_count,
(unsigned long long)t->thrashing_delay_total,
- average_ms(t->thrashing_delay_total, t->thrashing_count));
+ average_ms(t->thrashing_delay_total, t->thrashing_count),
+ "count", "delay total", "delay average",
+ (unsigned long long)t->compact_count,
+ (unsigned long long)t->compact_delay_total,
+ average_ms(t->compact_delay_total, t->compact_count));
}
static void task_context_switch_counts(struct taskstats *t)
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index d5b5f2ab87a0..18de5f76f198 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -315,6 +315,7 @@
#define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */
#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
+#define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index 01e2650b9585..3faf0f97edb1 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -486,6 +486,23 @@
#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f
+/* AMD Collaborative Processor Performance Control MSRs */
+#define MSR_AMD_CPPC_CAP1 0xc00102b0
+#define MSR_AMD_CPPC_ENABLE 0xc00102b1
+#define MSR_AMD_CPPC_CAP2 0xc00102b2
+#define MSR_AMD_CPPC_REQ 0xc00102b3
+#define MSR_AMD_CPPC_STATUS 0xc00102b4
+
+#define AMD_CPPC_LOWEST_PERF(x) (((x) >> 0) & 0xff)
+#define AMD_CPPC_LOWNONLIN_PERF(x) (((x) >> 8) & 0xff)
+#define AMD_CPPC_NOMINAL_PERF(x) (((x) >> 16) & 0xff)
+#define AMD_CPPC_HIGHEST_PERF(x) (((x) >> 24) & 0xff)
+
+#define AMD_CPPC_MAX_PERF(x) (((x) & 0xff) << 0)
+#define AMD_CPPC_MIN_PERF(x) (((x) & 0xff) << 8)
+#define AMD_CPPC_DES_PERF(x) (((x) & 0xff) << 16)
+#define AMD_CPPC_ENERGY_PERF_PREF(x) (((x) & 0xff) << 24)
+
/* Fam 17h MSRs */
#define MSR_F17H_IRPERF 0xc00000e9
diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h
index b2d504f11937..aff774775c67 100644
--- a/tools/arch/x86/include/asm/required-features.h
+++ b/tools/arch/x86/include/asm/required-features.h
@@ -35,11 +35,7 @@
# define NEED_CMOV 0
#endif
-#ifdef CONFIG_X86_USE_3DNOW
-# define NEED_3DNOW (1<<(X86_FEATURE_3DNOW & 31))
-#else
# define NEED_3DNOW 0
-#endif
#if defined(CONFIG_X86_P6_NOP) || defined(CONFIG_X86_64)
# define NEED_NOPL (1<<(X86_FEATURE_NOPL & 31))
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index 5a776a08f78c..bf6e96011dfe 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -373,9 +373,23 @@ struct kvm_debugregs {
__u64 reserved[9];
};
-/* for KVM_CAP_XSAVE */
+/* for KVM_CAP_XSAVE and KVM_CAP_XSAVE2 */
struct kvm_xsave {
+ /*
+ * KVM_GET_XSAVE2 and KVM_SET_XSAVE write and read as many bytes
+ * as are returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2)
+ * respectively, when invoked on the vm file descriptor.
+ *
+ * The size value returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2)
+ * will always be at least 4096. Currently, it is only greater
+ * than 4096 if a dynamic feature has been enabled with
+ * ``arch_prctl()``, but this may change in the future.
+ *
+ * The offsets of the state save areas in struct kvm_xsave follow
+ * the contents of CPUID leaf 0xD on the host.
+ */
__u32 region[1024];
+ __u32 extra[0];
};
#define KVM_MAX_XCRS 16
@@ -438,6 +452,9 @@ struct kvm_sync_regs {
#define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001
+/* attributes for system fd (group 0) */
+#define KVM_X86_XCOMP_GUEST_SUPP 0
+
struct kvm_vmx_nested_state_data {
__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
__u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
diff --git a/tools/arch/x86/include/uapi/asm/prctl.h b/tools/arch/x86/include/uapi/asm/prctl.h
index 754a07856817..500b96e71f18 100644
--- a/tools/arch/x86/include/uapi/asm/prctl.h
+++ b/tools/arch/x86/include/uapi/asm/prctl.h
@@ -2,20 +2,22 @@
#ifndef _ASM_X86_PRCTL_H
#define _ASM_X86_PRCTL_H
-#define ARCH_SET_GS 0x1001
-#define ARCH_SET_FS 0x1002
-#define ARCH_GET_FS 0x1003
-#define ARCH_GET_GS 0x1004
+#define ARCH_SET_GS 0x1001
+#define ARCH_SET_FS 0x1002
+#define ARCH_GET_FS 0x1003
+#define ARCH_GET_GS 0x1004
-#define ARCH_GET_CPUID 0x1011
-#define ARCH_SET_CPUID 0x1012
+#define ARCH_GET_CPUID 0x1011
+#define ARCH_SET_CPUID 0x1012
-#define ARCH_GET_XCOMP_SUPP 0x1021
-#define ARCH_GET_XCOMP_PERM 0x1022
-#define ARCH_REQ_XCOMP_PERM 0x1023
+#define ARCH_GET_XCOMP_SUPP 0x1021
+#define ARCH_GET_XCOMP_PERM 0x1022
+#define ARCH_REQ_XCOMP_PERM 0x1023
+#define ARCH_GET_XCOMP_GUEST_PERM 0x1024
+#define ARCH_REQ_XCOMP_GUEST_PERM 0x1025
-#define ARCH_MAP_VDSO_X32 0x2001
-#define ARCH_MAP_VDSO_32 0x2002
-#define ARCH_MAP_VDSO_64 0x2003
+#define ARCH_MAP_VDSO_X32 0x2001
+#define ARCH_MAP_VDSO_32 0x2002
+#define ARCH_MAP_VDSO_64 0x2003
#endif /* _ASM_X86_PRCTL_H */
diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S
index 1cc9da6e29c7..59cf2343f3d9 100644
--- a/tools/arch/x86/lib/memcpy_64.S
+++ b/tools/arch/x86/lib/memcpy_64.S
@@ -39,7 +39,7 @@ SYM_FUNC_START_WEAK(memcpy)
rep movsq
movl %edx, %ecx
rep movsb
- ret
+ RET
SYM_FUNC_END(memcpy)
SYM_FUNC_END_ALIAS(__memcpy)
EXPORT_SYMBOL(memcpy)
@@ -53,7 +53,7 @@ SYM_FUNC_START_LOCAL(memcpy_erms)
movq %rdi, %rax
movq %rdx, %rcx
rep movsb
- ret
+ RET
SYM_FUNC_END(memcpy_erms)
SYM_FUNC_START_LOCAL(memcpy_orig)
@@ -137,7 +137,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
movq %r9, 1*8(%rdi)
movq %r10, -2*8(%rdi, %rdx)
movq %r11, -1*8(%rdi, %rdx)
- retq
+ RET
.p2align 4
.Lless_16bytes:
cmpl $8, %edx
@@ -149,7 +149,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
movq -1*8(%rsi, %rdx), %r9
movq %r8, 0*8(%rdi)
movq %r9, -1*8(%rdi, %rdx)
- retq
+ RET
.p2align 4
.Lless_8bytes:
cmpl $4, %edx
@@ -162,7 +162,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
movl -4(%rsi, %rdx), %r8d
movl %ecx, (%rdi)
movl %r8d, -4(%rdi, %rdx)
- retq
+ RET
.p2align 4
.Lless_3bytes:
subl $1, %edx
@@ -180,7 +180,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
movb %cl, (%rdi)
.Lend:
- retq
+ RET
SYM_FUNC_END(memcpy_orig)
.popsection
diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S
index 9827ae267f96..d624f2bc42f1 100644
--- a/tools/arch/x86/lib/memset_64.S
+++ b/tools/arch/x86/lib/memset_64.S
@@ -40,7 +40,7 @@ SYM_FUNC_START(__memset)
movl %edx,%ecx
rep stosb
movq %r9,%rax
- ret
+ RET
SYM_FUNC_END(__memset)
SYM_FUNC_END_ALIAS(memset)
EXPORT_SYMBOL(memset)
@@ -63,7 +63,7 @@ SYM_FUNC_START_LOCAL(memset_erms)
movq %rdx,%rcx
rep stosb
movq %r9,%rax
- ret
+ RET
SYM_FUNC_END(memset_erms)
SYM_FUNC_START_LOCAL(memset_orig)
@@ -125,7 +125,7 @@ SYM_FUNC_START_LOCAL(memset_orig)
.Lende:
movq %r10,%rax
- ret
+ RET
.Lbad_alignment:
cmpq $7,%rdx
diff --git a/tools/bpf/bpftool/.gitignore b/tools/bpf/bpftool/.gitignore
index 05ce4446b780..a736f64dc5dc 100644
--- a/tools/bpf/bpftool/.gitignore
+++ b/tools/bpf/bpftool/.gitignore
@@ -1,4 +1,4 @@
-# SPDX-License-Identifier: GPL-2.0-only
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
*.d
/bootstrap/
/bpftool
diff --git a/tools/bpf/bpftool/Documentation/Makefile b/tools/bpf/bpftool/Documentation/Makefile
index c49487905ceb..ac8487dcff1d 100644
--- a/tools/bpf/bpftool/Documentation/Makefile
+++ b/tools/bpf/bpftool/Documentation/Makefile
@@ -1,6 +1,5 @@
-# SPDX-License-Identifier: GPL-2.0-only
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
include ../../../scripts/Makefile.include
-include ../../../scripts/utilities.mak
INSTALL ?= install
RM ?= rm -f
@@ -25,7 +24,7 @@ man: man8
man8: $(DOC_MAN8)
RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null)
-RST2MAN_OPTS += --verbose
+RST2MAN_OPTS += --verbose --strip-comments
list_pages = $(sort $(basename $(filter-out $(1),$(MAN8_RST))))
see_also = $(subst " ",, \
diff --git a/tools/bpf/bpftool/Documentation/bpftool-btf.rst b/tools/bpf/bpftool/Documentation/bpftool-btf.rst
index 88b28aa7431f..342716f74ec4 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-btf.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-btf.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+
================
bpftool-btf
================
@@ -7,13 +9,14 @@ tool for inspection of BTF data
:Manual section: 8
+.. include:: substitutions.rst
+
SYNOPSIS
========
**bpftool** [*OPTIONS*] **btf** *COMMAND*
- *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | {**-d** | **--debug** } |
- { **-B** | **--base-btf** } }
+ *OPTIONS* := { |COMMON_OPTIONS| | { **-B** | **--base-btf** } }
*COMMANDS* := { **dump** | **help** }
diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
index 3e4395eede4f..a17e9aa314fd 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+
================
bpftool-cgroup
================
@@ -7,13 +9,14 @@ tool for inspection and simple manipulation of eBPF progs
:Manual section: 8
+.. include:: substitutions.rst
+
SYNOPSIS
========
**bpftool** [*OPTIONS*] **cgroup** *COMMAND*
- *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } |
- { **-f** | **--bpffs** } }
+ *OPTIONS* := { |COMMON_OPTIONS| | { **-f** | **--bpffs** } }
*COMMANDS* :=
{ **show** | **list** | **tree** | **attach** | **detach** | **help** }
@@ -30,9 +33,9 @@ CGROUP COMMANDS
| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* }
| *ATTACH_TYPE* := { **ingress** | **egress** | **sock_create** | **sock_ops** | **device** |
| **bind4** | **bind6** | **post_bind4** | **post_bind6** | **connect4** | **connect6** |
-| **getpeername4** | **getpeername6** | **getsockname4** | **getsockname6** | **sendmsg4** |
-| **sendmsg6** | **recvmsg4** | **recvmsg6** | **sysctl** | **getsockopt** | **setsockopt** |
-| **sock_release** }
+| **getpeername4** | **getpeername6** | **getsockname4** | **getsockname6** | **sendmsg4** |
+| **sendmsg6** | **recvmsg4** | **recvmsg6** | **sysctl** | **getsockopt** | **setsockopt** |
+| **sock_release** }
| *ATTACH_FLAGS* := { **multi** | **override** }
DESCRIPTION
@@ -98,9 +101,9 @@ DESCRIPTION
**sendmsg6** call to sendto(2), sendmsg(2), sendmmsg(2) for an
unconnected udp6 socket (since 4.18);
**recvmsg4** call to recvfrom(2), recvmsg(2), recvmmsg(2) for
- an unconnected udp4 socket (since 5.2);
+ an unconnected udp4 socket (since 5.2);
**recvmsg6** call to recvfrom(2), recvmsg(2), recvmmsg(2) for
- an unconnected udp6 socket (since 5.2);
+ an unconnected udp6 socket (since 5.2);
**sysctl** sysctl access (since 5.2);
**getsockopt** call to getsockopt (since 5.3);
**setsockopt** call to setsockopt (since 5.3);
diff --git a/tools/bpf/bpftool/Documentation/bpftool-feature.rst b/tools/bpf/bpftool/Documentation/bpftool-feature.rst
index ab9f57ee4c3a..4ce9a77bc1e0 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-feature.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-feature.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+
===============
bpftool-feature
===============
@@ -7,12 +9,14 @@ tool for inspection of eBPF-related parameters for Linux kernel or net device
:Manual section: 8
+.. include:: substitutions.rst
+
SYNOPSIS
========
**bpftool** [*OPTIONS*] **feature** *COMMAND*
- *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } }
+ *OPTIONS* := { |COMMON_OPTIONS| }
*COMMANDS* := { **probe** | **help** }
diff --git a/tools/bpf/bpftool/Documentation/bpftool-gen.rst b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
index 2ef2f2df0279..bc276388f432 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-gen.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+
================
bpftool-gen
================
@@ -7,13 +9,14 @@ tool for BPF code-generation
:Manual section: 8
+.. include:: substitutions.rst
+
SYNOPSIS
========
**bpftool** [*OPTIONS*] **gen** *COMMAND*
- *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } |
- { **-L** | **--use-loader** } }
+ *OPTIONS* := { |COMMON_OPTIONS| | { **-L** | **--use-loader** } }
*COMMAND* := { **object** | **skeleton** | **help** }
diff --git a/tools/bpf/bpftool/Documentation/bpftool-iter.rst b/tools/bpf/bpftool/Documentation/bpftool-iter.rst
index 471f363a725a..84839d488621 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-iter.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-iter.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+
============
bpftool-iter
============
@@ -7,12 +9,14 @@ tool to create BPF iterators
:Manual section: 8
+.. include:: substitutions.rst
+
SYNOPSIS
========
**bpftool** [*OPTIONS*] **iter** *COMMAND*
- *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } }
+ *OPTIONS* := { |COMMON_OPTIONS| }
*COMMANDS* := { **pin** | **help** }
diff --git a/tools/bpf/bpftool/Documentation/bpftool-link.rst b/tools/bpf/bpftool/Documentation/bpftool-link.rst
index 0de90f086238..52a4eee4af54 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-link.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-link.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+
================
bpftool-link
================
@@ -7,13 +9,14 @@ tool for inspection and simple manipulation of eBPF links
:Manual section: 8
+.. include:: substitutions.rst
+
SYNOPSIS
========
**bpftool** [*OPTIONS*] **link** *COMMAND*
- *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } |
- { **-f** | **--bpffs** } | { **-n** | **--nomount** } }
+ *OPTIONS* := { |COMMON_OPTIONS| | { **-f** | **--bpffs** } | { **-n** | **--nomount** } }
*COMMANDS* := { **show** | **list** | **pin** | **help** }
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index d0c4abe08aba..7c188a598444 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+
================
bpftool-map
================
@@ -7,17 +9,18 @@ tool for inspection and simple manipulation of eBPF maps
:Manual section: 8
+.. include:: substitutions.rst
+
SYNOPSIS
========
**bpftool** [*OPTIONS*] **map** *COMMAND*
- *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } |
- { **-f** | **--bpffs** } | { **-n** | **--nomount** } }
+ *OPTIONS* := { |COMMON_OPTIONS| | { **-f** | **--bpffs** } | { **-n** | **--nomount** } }
*COMMANDS* :=
- { **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext**
- | **delete** | **pin** | **help** }
+ { **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext** |
+ **delete** | **pin** | **help** }
MAP COMMANDS
=============
@@ -52,7 +55,7 @@ MAP COMMANDS
| | **devmap** | **devmap_hash** | **sockmap** | **cpumap** | **xskmap** | **sockhash**
| | **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage**
| | **queue** | **stack** | **sk_storage** | **struct_ops** | **ringbuf** | **inode_storage**
- | **task_storage** }
+| | **task_storage** | **bloom_filter** }
DESCRIPTION
===========
diff --git a/tools/bpf/bpftool/Documentation/bpftool-net.rst b/tools/bpf/bpftool/Documentation/bpftool-net.rst
index 1ae0375e8fea..f4e0a516335a 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-net.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-net.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+
================
bpftool-net
================
@@ -7,12 +9,14 @@ tool for inspection of netdev/tc related bpf prog attachments
:Manual section: 8
+.. include:: substitutions.rst
+
SYNOPSIS
========
**bpftool** [*OPTIONS*] **net** *COMMAND*
- *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } }
+ *OPTIONS* := { |COMMON_OPTIONS| }
*COMMANDS* :=
{ **show** | **list** | **attach** | **detach** | **help** }
@@ -31,44 +35,44 @@ NET COMMANDS
DESCRIPTION
===========
**bpftool net { show | list }** [ **dev** *NAME* ]
- List bpf program attachments in the kernel networking subsystem.
-
- Currently, only device driver xdp attachments and tc filter
- classification/action attachments are implemented, i.e., for
- program types **BPF_PROG_TYPE_SCHED_CLS**,
- **BPF_PROG_TYPE_SCHED_ACT** and **BPF_PROG_TYPE_XDP**.
- For programs attached to a particular cgroup, e.g.,
- **BPF_PROG_TYPE_CGROUP_SKB**, **BPF_PROG_TYPE_CGROUP_SOCK**,
- **BPF_PROG_TYPE_SOCK_OPS** and **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**,
- users can use **bpftool cgroup** to dump cgroup attachments.
- For sk_{filter, skb, msg, reuseport} and lwt/seg6
- bpf programs, users should consult other tools, e.g., iproute2.
-
- The current output will start with all xdp program attachments, followed by
- all tc class/qdisc bpf program attachments. Both xdp programs and
- tc programs are ordered based on ifindex number. If multiple bpf
- programs attached to the same networking device through **tc filter**,
- the order will be first all bpf programs attached to tc classes, then
- all bpf programs attached to non clsact qdiscs, and finally all
- bpf programs attached to root and clsact qdisc.
+ List bpf program attachments in the kernel networking subsystem.
+
+ Currently, only device driver xdp attachments and tc filter
+ classification/action attachments are implemented, i.e., for
+ program types **BPF_PROG_TYPE_SCHED_CLS**,
+ **BPF_PROG_TYPE_SCHED_ACT** and **BPF_PROG_TYPE_XDP**.
+ For programs attached to a particular cgroup, e.g.,
+ **BPF_PROG_TYPE_CGROUP_SKB**, **BPF_PROG_TYPE_CGROUP_SOCK**,
+ **BPF_PROG_TYPE_SOCK_OPS** and **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**,
+ users can use **bpftool cgroup** to dump cgroup attachments.
+ For sk_{filter, skb, msg, reuseport} and lwt/seg6
+ bpf programs, users should consult other tools, e.g., iproute2.
+
+ The current output will start with all xdp program attachments, followed by
+ all tc class/qdisc bpf program attachments. Both xdp programs and
+ tc programs are ordered based on ifindex number. If multiple bpf
+ programs attached to the same networking device through **tc filter**,
+ the order will be first all bpf programs attached to tc classes, then
+ all bpf programs attached to non clsact qdiscs, and finally all
+ bpf programs attached to root and clsact qdisc.
**bpftool** **net attach** *ATTACH_TYPE* *PROG* **dev** *NAME* [ **overwrite** ]
- Attach bpf program *PROG* to network interface *NAME* with
- type specified by *ATTACH_TYPE*. Previously attached bpf program
- can be replaced by the command used with **overwrite** option.
- Currently, only XDP-related modes are supported for *ATTACH_TYPE*.
+ Attach bpf program *PROG* to network interface *NAME* with
+ type specified by *ATTACH_TYPE*. Previously attached bpf program
+ can be replaced by the command used with **overwrite** option.
+ Currently, only XDP-related modes are supported for *ATTACH_TYPE*.
- *ATTACH_TYPE* can be of:
- **xdp** - try native XDP and fallback to generic XDP if NIC driver does not support it;
- **xdpgeneric** - Generic XDP. runs at generic XDP hook when packet already enters receive path as skb;
- **xdpdrv** - Native XDP. runs earliest point in driver's receive path;
- **xdpoffload** - Offload XDP. runs directly on NIC on each packet reception;
+ *ATTACH_TYPE* can be of:
+ **xdp** - try native XDP and fallback to generic XDP if NIC driver does not support it;
+ **xdpgeneric** - Generic XDP. runs at generic XDP hook when packet already enters receive path as skb;
+ **xdpdrv** - Native XDP. runs earliest point in driver's receive path;
+ **xdpoffload** - Offload XDP. runs directly on NIC on each packet reception;
**bpftool** **net detach** *ATTACH_TYPE* **dev** *NAME*
- Detach bpf program attached to network interface *NAME* with
- type specified by *ATTACH_TYPE*. To detach bpf program, same
- *ATTACH_TYPE* previously used for attach must be specified.
- Currently, only XDP-related modes are supported for *ATTACH_TYPE*.
+ Detach bpf program attached to network interface *NAME* with
+ type specified by *ATTACH_TYPE*. To detach bpf program, same
+ *ATTACH_TYPE* previously used for attach must be specified.
+ Currently, only XDP-related modes are supported for *ATTACH_TYPE*.
**bpftool net help**
Print short help message.
diff --git a/tools/bpf/bpftool/Documentation/bpftool-perf.rst b/tools/bpf/bpftool/Documentation/bpftool-perf.rst
index ce52798a917d..5fea633a82f1 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-perf.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-perf.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+
================
bpftool-perf
================
@@ -7,12 +9,14 @@ tool for inspection of perf related bpf prog attachments
:Manual section: 8
+.. include:: substitutions.rst
+
SYNOPSIS
========
**bpftool** [*OPTIONS*] **perf** *COMMAND*
- *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } }
+ *OPTIONS* := { |COMMON_OPTIONS| }
*COMMANDS* :=
{ **show** | **list** | **help** }
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index 91608cb7e44a..a2e9359e554c 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+
================
bpftool-prog
================
@@ -7,18 +9,20 @@ tool for inspection and simple manipulation of eBPF progs
:Manual section: 8
+.. include:: substitutions.rst
+
SYNOPSIS
========
**bpftool** [*OPTIONS*] **prog** *COMMAND*
- *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } |
- { **-f** | **--bpffs** } | { **-m** | **--mapcompat** } | { **-n** | **--nomount** } |
- { **-L** | **--use-loader** } }
+ *OPTIONS* := { |COMMON_OPTIONS| |
+ { **-f** | **--bpffs** } | { **-m** | **--mapcompat** } | { **-n** | **--nomount** } |
+ { **-L** | **--use-loader** } }
*COMMANDS* :=
- { **show** | **list** | **dump xlated** | **dump jited** | **pin** | **load**
- | **loadall** | **help** }
+ { **show** | **list** | **dump xlated** | **dump jited** | **pin** | **load** |
+ **loadall** | **help** }
PROG COMMANDS
=============
diff --git a/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst b/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst
index 02afc0fc14cb..ee53a122c0c7 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+
==================
bpftool-struct_ops
==================
@@ -7,12 +9,14 @@ tool to register/unregister/introspect BPF struct_ops
:Manual section: 8
+.. include:: substitutions.rst
+
SYNOPSIS
========
**bpftool** [*OPTIONS*] **struct_ops** *COMMAND*
- *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } }
+ *OPTIONS* := { |COMMON_OPTIONS| }
*COMMANDS* :=
{ **show** | **list** | **dump** | **register** | **unregister** | **help** }
diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst
index bb23f55bb05a..7084dd9fa2f8 100644
--- a/tools/bpf/bpftool/Documentation/bpftool.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+
================
BPFTOOL
================
@@ -7,6 +9,8 @@ tool for inspection and simple manipulation of eBPF programs and maps
:Manual section: 8
+.. include:: substitutions.rst
+
SYNOPSIS
========
@@ -18,15 +22,14 @@ SYNOPSIS
*OBJECT* := { **map** | **program** | **cgroup** | **perf** | **net** | **feature** }
- *OPTIONS* := { { **-V** | **--version** } |
- { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } }
+ *OPTIONS* := { { **-V** | **--version** } | |COMMON_OPTIONS| }
*MAP-COMMANDS* :=
{ **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext** |
- **delete** | **pin** | **event_pipe** | **help** }
+ **delete** | **pin** | **event_pipe** | **help** }
*PROG-COMMANDS* := { **show** | **list** | **dump jited** | **dump xlated** | **pin** |
- **load** | **attach** | **detach** | **help** }
+ **load** | **attach** | **detach** | **help** }
*CGROUP-COMMANDS* := { **show** | **list** | **attach** | **detach** | **help** }
diff --git a/tools/bpf/bpftool/Documentation/common_options.rst b/tools/bpf/bpftool/Documentation/common_options.rst
index 05d06c74dcaa..908487b9c2ad 100644
--- a/tools/bpf/bpftool/Documentation/common_options.rst
+++ b/tools/bpf/bpftool/Documentation/common_options.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+
-h, --help
Print short help message (similar to **bpftool help**).
@@ -20,3 +22,12 @@
Print all logs available, even debug-level information. This includes
logs from libbpf as well as from the verifier, when attempting to
load programs.
+
+-l, --legacy
+ Use legacy libbpf mode which has more relaxed BPF program
+ requirements. By default, bpftool has more strict requirements
+ about section names, changes pinning logic and doesn't support
+ some of the older non-BTF map declarations.
+
+ See https://github.com/libbpf/libbpf/wiki/Libbpf:-the-road-to-v1.0
+ for details.
diff --git a/tools/bpf/bpftool/Documentation/substitutions.rst b/tools/bpf/bpftool/Documentation/substitutions.rst
new file mode 100644
index 000000000000..ccf1ffa0686c
--- /dev/null
+++ b/tools/bpf/bpftool/Documentation/substitutions.rst
@@ -0,0 +1,3 @@
+.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+
+.. |COMMON_OPTIONS| replace:: { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } | { **-l** | **--legacy** }
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 7cfba11c3014..83369f55df61 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -1,6 +1,5 @@
-# SPDX-License-Identifier: GPL-2.0-only
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
include ../../scripts/Makefile.include
-include ../../scripts/utilities.mak
ifeq ($(srctree),)
srctree := $(patsubst %/,%,$(dir $(CURDIR)))
@@ -58,7 +57,7 @@ $(LIBBPF_INTERNAL_HDRS): $(LIBBPF_HDRS_DIR)/%.h: $(BPF_DIR)/%.h | $(LIBBPF_HDRS_
$(LIBBPF_BOOTSTRAP): $(wildcard $(BPF_DIR)/*.[ch] $(BPF_DIR)/Makefile) | $(LIBBPF_BOOTSTRAP_OUTPUT)
$(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_BOOTSTRAP_OUTPUT) \
DESTDIR=$(LIBBPF_BOOTSTRAP_DESTDIR) prefix= \
- ARCH= CC=$(HOSTCC) LD=$(HOSTLD) $@ install_headers
+ ARCH= CROSS_COMPILE= CC=$(HOSTCC) LD=$(HOSTLD) $@ install_headers
$(LIBBPF_BOOTSTRAP_INTERNAL_HDRS): $(LIBBPF_BOOTSTRAP_HDRS_DIR)/%.h: $(BPF_DIR)/%.h | $(LIBBPF_BOOTSTRAP_HDRS_DIR)
$(call QUIET_INSTALL, $@)
@@ -153,6 +152,9 @@ CFLAGS += -DHAVE_LIBBFD_SUPPORT
SRCS += $(BFD_SRCS)
endif
+HOST_CFLAGS = $(subst -I$(LIBBPF_INCLUDE),-I$(LIBBPF_BOOTSTRAP_INCLUDE),\
+ $(subst $(CLANG_CROSS_FLAGS),,$(CFLAGS)))
+
BPFTOOL_BOOTSTRAP := $(BOOTSTRAP_OUTPUT)bpftool
BOOTSTRAP_OBJS = $(addprefix $(BOOTSTRAP_OUTPUT),main.o common.o json_writer.o gen.o btf.o xlated_dumper.o btf_dumper.o disasm.o)
@@ -187,7 +189,8 @@ $(OUTPUT)%.bpf.o: skeleton/%.bpf.c $(OUTPUT)vmlinux.h $(LIBBPF_BOOTSTRAP)
-I$(if $(OUTPUT),$(OUTPUT),.) \
-I$(srctree)/tools/include/uapi/ \
-I$(LIBBPF_BOOTSTRAP_INCLUDE) \
- -g -O2 -Wall -target bpf -c $< -o $@ && $(LLVM_STRIP) -g $@
+ -g -O2 -Wall -target bpf -c $< -o $@
+ $(Q)$(LLVM_STRIP) -g $@
$(OUTPUT)%.skel.h: $(OUTPUT)%.bpf.o $(BPFTOOL_BOOTSTRAP)
$(QUIET_GEN)$(BPFTOOL_BOOTSTRAP) gen skeleton $< > $@
@@ -202,10 +205,10 @@ endif
CFLAGS += $(if $(BUILD_BPF_SKELS),,-DBPFTOOL_WITHOUT_SKELETONS)
$(BOOTSTRAP_OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c
- $(QUIET_CC)$(HOSTCC) $(CFLAGS) -c -MMD -o $@ $<
+ $(QUIET_CC)$(HOSTCC) $(HOST_CFLAGS) -c -MMD $< -o $@
$(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c
- $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $<
+ $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD $< -o $@
$(OUTPUT)feature.o:
ifneq ($(feature-zlib), 1)
@@ -213,19 +216,16 @@ ifneq ($(feature-zlib), 1)
endif
$(BPFTOOL_BOOTSTRAP): $(BOOTSTRAP_OBJS) $(LIBBPF_BOOTSTRAP)
- $(QUIET_LINK)$(HOSTCC) $(CFLAGS) $(LDFLAGS) -o $@ $(BOOTSTRAP_OBJS) \
- $(LIBS_BOOTSTRAP)
+ $(QUIET_LINK)$(HOSTCC) $(HOST_CFLAGS) $(LDFLAGS) $(BOOTSTRAP_OBJS) $(LIBS_BOOTSTRAP) -o $@
$(OUTPUT)bpftool: $(OBJS) $(LIBBPF)
- $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJS) $(LIBS)
+ $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(OBJS) $(LIBS) -o $@
$(BOOTSTRAP_OUTPUT)%.o: %.c $(LIBBPF_BOOTSTRAP_INTERNAL_HDRS) | $(BOOTSTRAP_OUTPUT)
- $(QUIET_CC)$(HOSTCC) \
- $(subst -I$(LIBBPF_INCLUDE),-I$(LIBBPF_BOOTSTRAP_INCLUDE),$(CFLAGS)) \
- -c -MMD -o $@ $<
+ $(QUIET_CC)$(HOSTCC) $(HOST_CFLAGS) -c -MMD $< -o $@
$(OUTPUT)%.o: %.c
- $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $<
+ $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD $< -o $@
feature-detect-clean:
$(call QUIET_CLEAN, feature-detect)
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 88e2bcf16cca..493753a4962e 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -261,7 +261,7 @@ _bpftool()
# Deal with options
if [[ ${words[cword]} == -* ]]; then
local c='--version --json --pretty --bpffs --mapcompat --debug \
- --use-loader --base-btf'
+ --use-loader --base-btf --legacy'
COMPREPLY=( $( compgen -W "$c" -- "$cur" ) )
return 0
fi
@@ -710,7 +710,8 @@ _bpftool()
hash_of_maps devmap devmap_hash sockmap cpumap \
xskmap sockhash cgroup_storage reuseport_sockarray \
percpu_cgroup_storage queue stack sk_storage \
- struct_ops inode_storage task_storage ringbuf'
+ struct_ops ringbuf inode_storage task_storage \
+ bloom_filter'
COMPREPLY=( $( compgen -W "$BPFTOOL_MAP_CREATE_TYPES" -- "$cur" ) )
return 0
;;
diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index 015d2758f826..59833125ac0a 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -39,6 +39,7 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = {
[BTF_KIND_DATASEC] = "DATASEC",
[BTF_KIND_FLOAT] = "FLOAT",
[BTF_KIND_DECL_TAG] = "DECL_TAG",
+ [BTF_KIND_TYPE_TAG] = "TYPE_TAG",
};
struct btf_attach_point {
@@ -142,6 +143,7 @@ static int dump_btf_type(const struct btf *btf, __u32 id,
case BTF_KIND_VOLATILE:
case BTF_KIND_RESTRICT:
case BTF_KIND_TYPEDEF:
+ case BTF_KIND_TYPE_TAG:
if (json_output)
jsonw_uint_field(w, "type_id", t->type);
else
@@ -418,9 +420,10 @@ static int dump_btf_c(const struct btf *btf,
struct btf_dump *d;
int err = 0, i;
- d = btf_dump__new(btf, NULL, NULL, btf_dump_printf);
- if (IS_ERR(d))
- return PTR_ERR(d);
+ d = btf_dump__new(btf, btf_dump_printf, NULL, NULL);
+ err = libbpf_get_error(d);
+ if (err)
+ return err;
printf("#ifndef __VMLINUX_H__\n");
printf("#define __VMLINUX_H__\n");
@@ -547,8 +550,8 @@ static int do_dump(int argc, char **argv)
}
btf = btf__parse_split(*argv, base ?: base_btf);
- if (IS_ERR(btf)) {
- err = -PTR_ERR(btf);
+ err = libbpf_get_error(btf);
+ if (err) {
btf = NULL;
p_err("failed to load BTF from %s: %s",
*argv, strerror(err));
diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c
index 9c25286a5c73..f5dddf8ef404 100644
--- a/tools/bpf/bpftool/btf_dumper.c
+++ b/tools/bpf/bpftool/btf_dumper.c
@@ -32,14 +32,16 @@ static int dump_prog_id_as_func_ptr(const struct btf_dumper *d,
const struct btf_type *func_proto,
__u32 prog_id)
{
- struct bpf_prog_info_linear *prog_info = NULL;
const struct btf_type *func_type;
+ int prog_fd = -1, func_sig_len;
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
const char *prog_name = NULL;
- struct bpf_func_info *finfo;
struct btf *prog_btf = NULL;
- struct bpf_prog_info *info;
- int prog_fd, func_sig_len;
+ struct bpf_func_info finfo;
+ __u32 finfo_rec_size;
char prog_str[1024];
+ int err;
/* Get the ptr's func_proto */
func_sig_len = btf_dump_func(d->btf, prog_str, func_proto, NULL, 0,
@@ -52,25 +54,30 @@ static int dump_prog_id_as_func_ptr(const struct btf_dumper *d,
/* Get the bpf_prog's name. Obtain from func_info. */
prog_fd = bpf_prog_get_fd_by_id(prog_id);
- if (prog_fd == -1)
+ if (prog_fd < 0)
goto print;
- prog_info = bpf_program__get_prog_info_linear(prog_fd,
- 1UL << BPF_PROG_INFO_FUNC_INFO);
- close(prog_fd);
- if (IS_ERR(prog_info)) {
- prog_info = NULL;
+ err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+ if (err)
goto print;
- }
- info = &prog_info->info;
- if (!info->btf_id || !info->nr_func_info)
+ if (!info.btf_id || !info.nr_func_info)
+ goto print;
+
+ finfo_rec_size = info.func_info_rec_size;
+ memset(&info, 0, sizeof(info));
+ info.nr_func_info = 1;
+ info.func_info_rec_size = finfo_rec_size;
+ info.func_info = ptr_to_u64(&finfo);
+
+ err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+ if (err)
goto print;
- prog_btf = btf__load_from_kernel_by_id(info->btf_id);
+
+ prog_btf = btf__load_from_kernel_by_id(info.btf_id);
if (libbpf_get_error(prog_btf))
goto print;
- finfo = u64_to_ptr(info->func_info);
- func_type = btf__type_by_id(prog_btf, finfo->type_id);
+ func_type = btf__type_by_id(prog_btf, finfo.type_id);
if (!func_type || !btf_is_func(func_type))
goto print;
@@ -92,7 +99,8 @@ print:
prog_str[sizeof(prog_str) - 1] = '\0';
jsonw_string(d->jw, prog_str);
btf__free(prog_btf);
- free(prog_info);
+ if (prog_fd >= 0)
+ close(prog_fd);
return 0;
}
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index 511eccdbdfe6..fa8eb8134344 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -74,6 +74,7 @@ const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = {
[BPF_XDP] = "xdp",
[BPF_SK_REUSEPORT_SELECT] = "sk_skb_reuseport_select",
[BPF_SK_REUSEPORT_SELECT_OR_MIGRATE] = "sk_skb_reuseport_select_or_migrate",
+ [BPF_PERF_EVENT] = "perf_event",
};
void p_err(const char *fmt, ...)
diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
index ade44577688e..e999159fa28d 100644
--- a/tools/bpf/bpftool/feature.c
+++ b/tools/bpf/bpftool/feature.c
@@ -467,7 +467,7 @@ static bool probe_bpf_syscall(const char *define_prefix)
{
bool res;
- bpf_load_program(BPF_PROG_TYPE_UNSPEC, NULL, 0, NULL, 0, NULL, 0);
+ bpf_prog_load(BPF_PROG_TYPE_UNSPEC, NULL, NULL, NULL, 0, NULL);
res = (errno != ENOSYS);
print_bool_feature("have_bpf_syscall",
@@ -643,15 +643,111 @@ probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type,
}
static void
-probe_large_insn_limit(const char *define_prefix, __u32 ifindex)
+probe_misc_feature(struct bpf_insn *insns, size_t len,
+ const char *define_prefix, __u32 ifindex,
+ const char *feat_name, const char *plain_name,
+ const char *define_name)
{
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
+ .prog_ifindex = ifindex,
+ );
bool res;
+ int fd;
- res = bpf_probe_large_insn_limit(ifindex);
- print_bool_feature("have_large_insn_limit",
+ errno = 0;
+ fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL",
+ insns, len, &opts);
+ res = fd >= 0 || !errno;
+
+ if (fd >= 0)
+ close(fd);
+
+ print_bool_feature(feat_name, plain_name, define_name, res,
+ define_prefix);
+}
+
+/*
+ * Probe for availability of kernel commit (5.3):
+ *
+ * c04c0d2b968a ("bpf: increase complexity limit and maximum program size")
+ */
+static void probe_large_insn_limit(const char *define_prefix, __u32 ifindex)
+{
+ struct bpf_insn insns[BPF_MAXINSNS + 1];
+ int i;
+
+ for (i = 0; i < BPF_MAXINSNS; i++)
+ insns[i] = BPF_MOV64_IMM(BPF_REG_0, 1);
+ insns[BPF_MAXINSNS] = BPF_EXIT_INSN();
+
+ probe_misc_feature(insns, ARRAY_SIZE(insns),
+ define_prefix, ifindex,
+ "have_large_insn_limit",
"Large program size limit",
- "LARGE_INSN_LIMIT",
- res, define_prefix);
+ "LARGE_INSN_LIMIT");
+}
+
+/*
+ * Probe for bounded loop support introduced in commit 2589726d12a1
+ * ("bpf: introduce bounded loops").
+ */
+static void
+probe_bounded_loops(const char *define_prefix, __u32 ifindex)
+{
+ struct bpf_insn insns[4] = {
+ BPF_MOV64_IMM(BPF_REG_0, 10),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 1),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, -2),
+ BPF_EXIT_INSN()
+ };
+
+ probe_misc_feature(insns, ARRAY_SIZE(insns),
+ define_prefix, ifindex,
+ "have_bounded_loops",
+ "Bounded loop support",
+ "BOUNDED_LOOPS");
+}
+
+/*
+ * Probe for the v2 instruction set extension introduced in commit 92b31a9af73b
+ * ("bpf: add BPF_J{LT,LE,SLT,SLE} instructions").
+ */
+static void
+probe_v2_isa_extension(const char *define_prefix, __u32 ifindex)
+{
+ struct bpf_insn insns[4] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 0, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN()
+ };
+
+ probe_misc_feature(insns, ARRAY_SIZE(insns),
+ define_prefix, ifindex,
+ "have_v2_isa_extension",
+ "ISA extension v2",
+ "V2_ISA_EXTENSION");
+}
+
+/*
+ * Probe for the v3 instruction set extension introduced in commit 092ed0968bb6
+ * ("bpf: verifier support JMP32").
+ */
+static void
+probe_v3_isa_extension(const char *define_prefix, __u32 ifindex)
+{
+ struct bpf_insn insns[4] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP32_IMM(BPF_JLT, BPF_REG_0, 0, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN()
+ };
+
+ probe_misc_feature(insns, ARRAY_SIZE(insns),
+ define_prefix, ifindex,
+ "have_v3_isa_extension",
+ "ISA extension v3",
+ "V3_ISA_EXTENSION");
}
static void
@@ -768,6 +864,9 @@ static void section_misc(const char *define_prefix, __u32 ifindex)
"/*** eBPF misc features ***/",
define_prefix);
probe_large_insn_limit(define_prefix, ifindex);
+ probe_bounded_loops(define_prefix, ifindex);
+ probe_v2_isa_extension(define_prefix, ifindex);
+ probe_v3_isa_extension(define_prefix, ifindex);
print_end_section();
}
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index 5c18351290f0..b4695df2ea3d 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -218,9 +218,10 @@ static int codegen_datasecs(struct bpf_object *obj, const char *obj_name)
char sec_ident[256], map_ident[256];
int i, err = 0;
- d = btf_dump__new(btf, NULL, NULL, codegen_btf_dump_printf);
- if (IS_ERR(d))
- return PTR_ERR(d);
+ d = btf_dump__new(btf, codegen_btf_dump_printf, NULL, NULL);
+ err = libbpf_get_error(d);
+ if (err)
+ return err;
bpf_object__for_each_map(map, obj) {
/* only generate definitions for memory-mapped internal maps */
@@ -485,7 +486,6 @@ static void codegen_destroy(struct bpf_object *obj, const char *obj_name)
static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *header_guard)
{
- struct bpf_object_load_attr load_attr = {};
DECLARE_LIBBPF_OPTS(gen_loader_opts, opts);
struct bpf_map *map;
char ident[256];
@@ -495,12 +495,7 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h
if (err)
return err;
- load_attr.obj = obj;
- if (verifier_logs)
- /* log_level1 + log_level2 + stats, but not stable UAPI */
- load_attr.log_level = 1 + 2 + 4;
-
- err = bpf_object__load_xattr(&load_attr);
+ err = bpf_object__load(obj);
if (err) {
p_err("failed to load object file");
goto out;
@@ -718,11 +713,15 @@ static int do_skeleton(int argc, char **argv)
if (obj_name[0] == '\0')
get_obj_name(obj_name, file);
opts.object_name = obj_name;
+ if (verifier_logs)
+ /* log_level1 + log_level2 + stats, but not stable UAPI */
+ opts.kernel_log_level = 1 + 2 + 4;
obj = bpf_object__open_mem(obj_data, file_sz, &opts);
- if (IS_ERR(obj)) {
+ err = libbpf_get_error(obj);
+ if (err) {
char err_buf[256];
- libbpf_strerror(PTR_ERR(obj), err_buf, sizeof(err_buf));
+ libbpf_strerror(err, err_buf, sizeof(err_buf));
p_err("failed to open BPF object file: %s", err_buf);
obj = NULL;
goto out;
diff --git a/tools/bpf/bpftool/iter.c b/tools/bpf/bpftool/iter.c
index 6c0de647b8ad..f88fdc820d23 100644
--- a/tools/bpf/bpftool/iter.c
+++ b/tools/bpf/bpftool/iter.c
@@ -46,7 +46,8 @@ static int do_pin(int argc, char **argv)
}
obj = bpf_object__open(objfile);
- if (IS_ERR(obj)) {
+ err = libbpf_get_error(obj);
+ if (err) {
p_err("can't open objfile %s", objfile);
goto close_map_fd;
}
@@ -64,8 +65,8 @@ static int do_pin(int argc, char **argv)
}
link = bpf_program__attach_iter(prog, &iter_opts);
- if (IS_ERR(link)) {
- err = PTR_ERR(link);
+ err = libbpf_get_error(link);
+ if (err) {
p_err("attach_iter failed for program %s",
bpf_program__name(prog));
goto close_obj;
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 28237d7cef67..020e91a542d5 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -31,6 +31,7 @@ bool block_mount;
bool verifier_logs;
bool relaxed_maps;
bool use_loader;
+bool legacy_libbpf;
struct btf *base_btf;
struct hashmap *refs_table;
@@ -92,6 +93,7 @@ static int do_version(int argc, char **argv)
jsonw_name(json_wtr, "features");
jsonw_start_object(json_wtr); /* features */
jsonw_bool_field(json_wtr, "libbfd", has_libbfd);
+ jsonw_bool_field(json_wtr, "libbpf_strict", !legacy_libbpf);
jsonw_bool_field(json_wtr, "skeletons", has_skeletons);
jsonw_end_object(json_wtr); /* features */
@@ -105,6 +107,10 @@ static int do_version(int argc, char **argv)
printf(" libbfd");
nb_features++;
}
+ if (!legacy_libbpf) {
+ printf("%s libbpf_strict", nb_features++ ? "," : "");
+ nb_features++;
+ }
if (has_skeletons)
printf("%s skeletons", nb_features++ ? "," : "");
printf("\n");
@@ -396,10 +402,14 @@ int main(int argc, char **argv)
{ "debug", no_argument, NULL, 'd' },
{ "use-loader", no_argument, NULL, 'L' },
{ "base-btf", required_argument, NULL, 'B' },
+ { "legacy", no_argument, NULL, 'l' },
{ 0 }
};
+ bool version_requested = false;
int opt, ret;
+ setlinebuf(stdout);
+
last_do_help = do_help;
pretty_output = false;
json_output = false;
@@ -408,11 +418,12 @@ int main(int argc, char **argv)
bin_name = argv[0];
opterr = 0;
- while ((opt = getopt_long(argc, argv, "VhpjfLmndB:",
+ while ((opt = getopt_long(argc, argv, "VhpjfLmndB:l",
options, NULL)) >= 0) {
switch (opt) {
case 'V':
- return do_version(argc, argv);
+ version_requested = true;
+ break;
case 'h':
return do_help(argc, argv);
case 'p':
@@ -454,6 +465,9 @@ int main(int argc, char **argv)
case 'L':
use_loader = true;
break;
+ case 'l':
+ legacy_libbpf = true;
+ break;
default:
p_err("unrecognized option '%s'", argv[optind - 1]);
if (json_output)
@@ -463,11 +477,20 @@ int main(int argc, char **argv)
}
}
+ if (!legacy_libbpf) {
+ ret = libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+ if (ret)
+ p_err("failed to enable libbpf strict mode: %d", ret);
+ }
+
argc -= optind;
argv += optind;
if (argc < 0)
usage();
+ if (version_requested)
+ return do_version(argc, argv);
+
ret = cmd_select(cmds, argc, argv, do_help);
if (json_output)
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 383835c2604d..8d76d937a62b 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -57,7 +57,7 @@ static inline void *u64_to_ptr(__u64 ptr)
#define HELP_SPEC_PROGRAM \
"PROG := { id PROG_ID | pinned FILE | tag PROG_TAG | name PROG_NAME }"
#define HELP_SPEC_OPTIONS \
- "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-d|--debug}"
+ "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-d|--debug} | {-l|--legacy}"
#define HELP_SPEC_MAP \
"MAP := { id MAP_ID | pinned FILE | name MAP_NAME }"
#define HELP_SPEC_LINK \
@@ -90,6 +90,7 @@ extern bool block_mount;
extern bool verifier_logs;
extern bool relaxed_maps;
extern bool use_loader;
+extern bool legacy_libbpf;
extern struct btf *base_btf;
extern struct hashmap *refs_table;
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index cae1f1119296..cc530a229812 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -53,6 +53,7 @@ const char * const map_type_name[] = {
[BPF_MAP_TYPE_RINGBUF] = "ringbuf",
[BPF_MAP_TYPE_INODE_STORAGE] = "inode_storage",
[BPF_MAP_TYPE_TASK_STORAGE] = "task_storage",
+ [BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter",
};
const size_t map_type_name_size = ARRAY_SIZE(map_type_name);
@@ -811,7 +812,7 @@ static struct btf *get_map_kv_btf(const struct bpf_map_info *info)
if (info->btf_vmlinux_value_type_id) {
if (!btf_vmlinux) {
btf_vmlinux = libbpf_find_kernel_btf();
- if (IS_ERR(btf_vmlinux))
+ if (libbpf_get_error(btf_vmlinux))
p_err("failed to get kernel btf");
}
return btf_vmlinux;
@@ -831,13 +832,13 @@ static struct btf *get_map_kv_btf(const struct bpf_map_info *info)
static void free_map_kv_btf(struct btf *btf)
{
- if (!IS_ERR(btf) && btf != btf_vmlinux)
+ if (!libbpf_get_error(btf) && btf != btf_vmlinux)
btf__free(btf);
}
static void free_btf_vmlinux(void)
{
- if (!IS_ERR(btf_vmlinux))
+ if (!libbpf_get_error(btf_vmlinux))
btf__free(btf_vmlinux);
}
@@ -862,8 +863,8 @@ map_dump(int fd, struct bpf_map_info *info, json_writer_t *wtr,
if (wtr) {
btf = get_map_kv_btf(info);
- if (IS_ERR(btf)) {
- err = PTR_ERR(btf);
+ err = libbpf_get_error(btf);
+ if (err) {
goto exit_free;
}
@@ -1260,7 +1261,10 @@ static int do_pin(int argc, char **argv)
static int do_create(int argc, char **argv)
{
- struct bpf_create_map_attr attr = { NULL, };
+ LIBBPF_OPTS(bpf_map_create_opts, attr);
+ enum bpf_map_type map_type = BPF_MAP_TYPE_UNSPEC;
+ __u32 key_size = 0, value_size = 0, max_entries = 0;
+ const char *map_name = NULL;
const char *pinfile;
int err = -1, fd;
@@ -1275,30 +1279,30 @@ static int do_create(int argc, char **argv)
if (is_prefix(*argv, "type")) {
NEXT_ARG();
- if (attr.map_type) {
+ if (map_type) {
p_err("map type already specified");
goto exit;
}
- attr.map_type = map_type_from_str(*argv);
- if ((int)attr.map_type < 0) {
+ map_type = map_type_from_str(*argv);
+ if ((int)map_type < 0) {
p_err("unrecognized map type: %s", *argv);
goto exit;
}
NEXT_ARG();
} else if (is_prefix(*argv, "name")) {
NEXT_ARG();
- attr.name = GET_ARG();
+ map_name = GET_ARG();
} else if (is_prefix(*argv, "key")) {
- if (parse_u32_arg(&argc, &argv, &attr.key_size,
+ if (parse_u32_arg(&argc, &argv, &key_size,
"key size"))
goto exit;
} else if (is_prefix(*argv, "value")) {
- if (parse_u32_arg(&argc, &argv, &attr.value_size,
+ if (parse_u32_arg(&argc, &argv, &value_size,
"value size"))
goto exit;
} else if (is_prefix(*argv, "entries")) {
- if (parse_u32_arg(&argc, &argv, &attr.max_entries,
+ if (parse_u32_arg(&argc, &argv, &max_entries,
"max entries"))
goto exit;
} else if (is_prefix(*argv, "flags")) {
@@ -1339,14 +1343,14 @@ static int do_create(int argc, char **argv)
}
}
- if (!attr.name) {
+ if (!map_name) {
p_err("map name not specified");
goto exit;
}
set_max_rlimit();
- fd = bpf_create_map_xattr(&attr);
+ fd = bpf_map_create(map_type, map_name, key_size, value_size, max_entries, &attr);
if (fd < 0) {
p_err("map create failed: %s", strerror(errno));
goto exit;
@@ -1477,7 +1481,7 @@ static int do_help(int argc, char **argv)
" devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n"
" cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n"
" queue | stack | sk_storage | struct_ops | ringbuf | inode_storage |\n"
- " task_storage }\n"
+ " task_storage | bloom_filter }\n"
" " HELP_SPEC_OPTIONS " |\n"
" {-f|--bpffs} | {-n|--nomount} }\n"
"",
diff --git a/tools/bpf/bpftool/map_perf_ring.c b/tools/bpf/bpftool/map_perf_ring.c
index b98ea702d284..6b0c410152de 100644
--- a/tools/bpf/bpftool/map_perf_ring.c
+++ b/tools/bpf/bpftool/map_perf_ring.c
@@ -124,7 +124,7 @@ int do_event_pipe(int argc, char **argv)
.wakeup_events = 1,
};
struct bpf_map_info map_info = {};
- struct perf_buffer_raw_opts opts = {};
+ LIBBPF_OPTS(perf_buffer_raw_opts, opts);
struct event_pipe_ctx ctx = {
.all_cpus = true,
.cpu = -1,
@@ -190,14 +190,11 @@ int do_event_pipe(int argc, char **argv)
ctx.idx = 0;
}
- opts.attr = &perf_attr;
- opts.event_cb = print_bpf_output;
- opts.ctx = &ctx;
opts.cpu_cnt = ctx.all_cpus ? 0 : 1;
opts.cpus = &ctx.cpu;
opts.map_keys = &ctx.idx;
-
- pb = perf_buffer__new_raw(map_fd, MMAP_PAGE_CNT, &opts);
+ pb = perf_buffer__new_raw(map_fd, MMAP_PAGE_CNT, &perf_attr,
+ print_bpf_output, &ctx, &opts);
err = libbpf_get_error(pb);
if (err) {
p_err("failed to create perf buffer: %s (%d)",
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 515d22952602..2a21d50516bc 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -100,6 +100,76 @@ static enum bpf_attach_type parse_attach_type(const char *str)
return __MAX_BPF_ATTACH_TYPE;
}
+static int prep_prog_info(struct bpf_prog_info *const info, enum dump_mode mode,
+ void **info_data, size_t *const info_data_sz)
+{
+ struct bpf_prog_info holder = {};
+ size_t needed = 0;
+ void *ptr;
+
+ if (mode == DUMP_JITED) {
+ holder.jited_prog_len = info->jited_prog_len;
+ needed += info->jited_prog_len;
+ } else {
+ holder.xlated_prog_len = info->xlated_prog_len;
+ needed += info->xlated_prog_len;
+ }
+
+ holder.nr_jited_ksyms = info->nr_jited_ksyms;
+ needed += info->nr_jited_ksyms * sizeof(__u64);
+
+ holder.nr_jited_func_lens = info->nr_jited_func_lens;
+ needed += info->nr_jited_func_lens * sizeof(__u32);
+
+ holder.nr_func_info = info->nr_func_info;
+ holder.func_info_rec_size = info->func_info_rec_size;
+ needed += info->nr_func_info * info->func_info_rec_size;
+
+ holder.nr_line_info = info->nr_line_info;
+ holder.line_info_rec_size = info->line_info_rec_size;
+ needed += info->nr_line_info * info->line_info_rec_size;
+
+ holder.nr_jited_line_info = info->nr_jited_line_info;
+ holder.jited_line_info_rec_size = info->jited_line_info_rec_size;
+ needed += info->nr_jited_line_info * info->jited_line_info_rec_size;
+
+ if (needed > *info_data_sz) {
+ ptr = realloc(*info_data, needed);
+ if (!ptr)
+ return -1;
+
+ *info_data = ptr;
+ *info_data_sz = needed;
+ }
+ ptr = *info_data;
+
+ if (mode == DUMP_JITED) {
+ holder.jited_prog_insns = ptr_to_u64(ptr);
+ ptr += holder.jited_prog_len;
+ } else {
+ holder.xlated_prog_insns = ptr_to_u64(ptr);
+ ptr += holder.xlated_prog_len;
+ }
+
+ holder.jited_ksyms = ptr_to_u64(ptr);
+ ptr += holder.nr_jited_ksyms * sizeof(__u64);
+
+ holder.jited_func_lens = ptr_to_u64(ptr);
+ ptr += holder.nr_jited_func_lens * sizeof(__u32);
+
+ holder.func_info = ptr_to_u64(ptr);
+ ptr += holder.nr_func_info * holder.func_info_rec_size;
+
+ holder.line_info = ptr_to_u64(ptr);
+ ptr += holder.nr_line_info * holder.line_info_rec_size;
+
+ holder.jited_line_info = ptr_to_u64(ptr);
+ ptr += holder.nr_jited_line_info * holder.jited_line_info_rec_size;
+
+ *info = holder;
+ return 0;
+}
+
static void print_boot_time(__u64 nsecs, char *buf, unsigned int size)
{
struct timespec real_time_ts, boot_time_ts;
@@ -639,8 +709,8 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
char func_sig[1024];
unsigned char *buf;
__u32 member_len;
+ int fd, err = -1;
ssize_t n;
- int fd;
if (mode == DUMP_JITED) {
if (info->jited_prog_len == 0 || !info->jited_prog_insns) {
@@ -679,7 +749,7 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
if (fd < 0) {
p_err("can't open file %s: %s", filepath,
strerror(errno));
- return -1;
+ goto exit_free;
}
n = write(fd, buf, member_len);
@@ -687,7 +757,7 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
if (n != (ssize_t)member_len) {
p_err("error writing output file: %s",
n < 0 ? strerror(errno) : "short write");
- return -1;
+ goto exit_free;
}
if (json_output)
@@ -701,7 +771,7 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
info->netns_ino,
&disasm_opt);
if (!name)
- return -1;
+ goto exit_free;
}
if (info->nr_jited_func_lens && info->jited_func_lens) {
@@ -796,23 +866,28 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
kernel_syms_destroy(&dd);
}
- btf__free(btf);
+ err = 0;
- return 0;
+exit_free:
+ btf__free(btf);
+ bpf_prog_linfo__free(prog_linfo);
+ return err;
}
static int do_dump(int argc, char **argv)
{
- struct bpf_prog_info_linear *info_linear;
+ struct bpf_prog_info info;
+ __u32 info_len = sizeof(info);
+ size_t info_data_sz = 0;
+ void *info_data = NULL;
char *filepath = NULL;
bool opcodes = false;
bool visual = false;
enum dump_mode mode;
bool linum = false;
- int *fds = NULL;
int nb_fds, i = 0;
+ int *fds = NULL;
int err = -1;
- __u64 arrays;
if (is_prefix(*argv, "jited")) {
if (disasm_init())
@@ -872,43 +947,44 @@ static int do_dump(int argc, char **argv)
goto exit_close;
}
- if (mode == DUMP_JITED)
- arrays = 1UL << BPF_PROG_INFO_JITED_INSNS;
- else
- arrays = 1UL << BPF_PROG_INFO_XLATED_INSNS;
-
- arrays |= 1UL << BPF_PROG_INFO_JITED_KSYMS;
- arrays |= 1UL << BPF_PROG_INFO_JITED_FUNC_LENS;
- arrays |= 1UL << BPF_PROG_INFO_FUNC_INFO;
- arrays |= 1UL << BPF_PROG_INFO_LINE_INFO;
- arrays |= 1UL << BPF_PROG_INFO_JITED_LINE_INFO;
-
if (json_output && nb_fds > 1)
jsonw_start_array(json_wtr); /* root array */
for (i = 0; i < nb_fds; i++) {
- info_linear = bpf_program__get_prog_info_linear(fds[i], arrays);
- if (IS_ERR_OR_NULL(info_linear)) {
+ memset(&info, 0, sizeof(info));
+
+ err = bpf_obj_get_info_by_fd(fds[i], &info, &info_len);
+ if (err) {
+ p_err("can't get prog info: %s", strerror(errno));
+ break;
+ }
+
+ err = prep_prog_info(&info, mode, &info_data, &info_data_sz);
+ if (err) {
+ p_err("can't grow prog info_data");
+ break;
+ }
+
+ err = bpf_obj_get_info_by_fd(fds[i], &info, &info_len);
+ if (err) {
p_err("can't get prog info: %s", strerror(errno));
break;
}
if (json_output && nb_fds > 1) {
jsonw_start_object(json_wtr); /* prog object */
- print_prog_header_json(&info_linear->info);
+ print_prog_header_json(&info);
jsonw_name(json_wtr, "insns");
} else if (nb_fds > 1) {
- print_prog_header_plain(&info_linear->info);
+ print_prog_header_plain(&info);
}
- err = prog_dump(&info_linear->info, mode, filepath, opcodes,
- visual, linum);
+ err = prog_dump(&info, mode, filepath, opcodes, visual, linum);
if (json_output && nb_fds > 1)
jsonw_end_object(json_wtr); /* prog object */
else if (i != nb_fds - 1 && nb_fds > 1)
printf("\n");
- free(info_linear);
if (err)
break;
close(fds[i]);
@@ -920,6 +996,7 @@ exit_close:
for (; i < nb_fds; i++)
close(fds[i]);
exit_free:
+ free(info_data);
free(fds);
return err;
}
@@ -1387,7 +1464,6 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts,
.relaxed_maps = relaxed_maps,
);
- struct bpf_object_load_attr load_attr = { 0 };
enum bpf_attach_type expected_attach_type;
struct map_replace *map_replace = NULL;
struct bpf_program *prog = NULL, *pos;
@@ -1409,8 +1485,6 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
while (argc) {
if (is_prefix(*argv, "type")) {
- char *type;
-
NEXT_ARG();
if (common_prog_type != BPF_PROG_TYPE_UNSPEC) {
@@ -1420,21 +1494,26 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
if (!REQ_ARGS(1))
goto err_free_reuse_maps;
- /* Put a '/' at the end of type to appease libbpf */
- type = malloc(strlen(*argv) + 2);
- if (!type) {
- p_err("mem alloc failed");
- goto err_free_reuse_maps;
- }
- *type = 0;
- strcat(type, *argv);
- strcat(type, "/");
+ err = libbpf_prog_type_by_name(*argv, &common_prog_type,
+ &expected_attach_type);
+ if (err < 0) {
+ /* Put a '/' at the end of type to appease libbpf */
+ char *type = malloc(strlen(*argv) + 2);
- err = get_prog_type_by_name(type, &common_prog_type,
- &expected_attach_type);
- free(type);
- if (err < 0)
- goto err_free_reuse_maps;
+ if (!type) {
+ p_err("mem alloc failed");
+ goto err_free_reuse_maps;
+ }
+ *type = 0;
+ strcat(type, *argv);
+ strcat(type, "/");
+
+ err = get_prog_type_by_name(type, &common_prog_type,
+ &expected_attach_type);
+ free(type);
+ if (err < 0)
+ goto err_free_reuse_maps;
+ }
NEXT_ARG();
} else if (is_prefix(*argv, "map")) {
@@ -1518,6 +1597,10 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
set_max_rlimit();
+ if (verifier_logs)
+ /* log_level1 + log_level2 + stats, but not stable UAPI */
+ open_opts.kernel_log_level = 1 + 2 + 4;
+
obj = bpf_object__open_file(file, &open_opts);
if (libbpf_get_error(obj)) {
p_err("failed to open object file");
@@ -1572,7 +1655,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
j = 0;
idx = 0;
bpf_object__for_each_map(map, obj) {
- if (!bpf_map__is_offload_neutral(map))
+ if (bpf_map__type(map) != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
bpf_map__set_ifindex(map, ifindex);
if (j < old_map_fds && idx == map_replace[j].idx) {
@@ -1597,12 +1680,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
goto err_close_obj;
}
- load_attr.obj = obj;
- if (verifier_logs)
- /* log_level1 + log_level2 + stats, but not stable UAPI */
- load_attr.log_level = 1 + 2 + 4;
-
- err = bpf_object__load_xattr(&load_attr);
+ err = bpf_object__load(obj);
if (err) {
p_err("failed to load object file");
goto err_close_obj;
@@ -1657,6 +1735,11 @@ err_unpin:
else
bpf_object__unpin_programs(obj, pinfile);
err_close_obj:
+ if (!legacy_libbpf) {
+ p_info("Warning: bpftool is now running in libbpf strict mode and has more stringent requirements about BPF programs.\n"
+ "If it used to work for this object file but now doesn't, see --legacy option for more details.\n");
+ }
+
bpf_object__close(obj);
err_free_reuse_maps:
for (i = 0; i < old_map_fds; i++)
@@ -1689,17 +1772,19 @@ static int try_loader(struct gen_loader_opts *gen)
sizeof(struct bpf_prog_desc));
int log_buf_sz = (1u << 24) - 1;
int err, fds_before, fd_delta;
- char *log_buf;
+ char *log_buf = NULL;
ctx = alloca(ctx_sz);
memset(ctx, 0, ctx_sz);
ctx->sz = ctx_sz;
- ctx->log_level = 1;
- ctx->log_size = log_buf_sz;
- log_buf = malloc(log_buf_sz);
- if (!log_buf)
- return -ENOMEM;
- ctx->log_buf = (long) log_buf;
+ if (verifier_logs) {
+ ctx->log_level = 1 + 2 + 4;
+ ctx->log_size = log_buf_sz;
+ log_buf = malloc(log_buf_sz);
+ if (!log_buf)
+ return -ENOMEM;
+ ctx->log_buf = (long) log_buf;
+ }
opts.ctx = ctx;
opts.data = gen->data;
opts.data_sz = gen->data_sz;
@@ -1708,9 +1793,9 @@ static int try_loader(struct gen_loader_opts *gen)
fds_before = count_open_fds();
err = bpf_load_and_run(&opts);
fd_delta = count_open_fds() - fds_before;
- if (err < 0) {
+ if (err < 0 || verifier_logs) {
fprintf(stderr, "err %d\n%s\n%s", err, opts.errstr, log_buf);
- if (fd_delta)
+ if (fd_delta && err < 0)
fprintf(stderr, "loader prog leaked %d FDs\n",
fd_delta);
}
@@ -1722,7 +1807,6 @@ static int do_loader(int argc, char **argv)
{
DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts);
DECLARE_LIBBPF_OPTS(gen_loader_opts, gen);
- struct bpf_object_load_attr load_attr = {};
struct bpf_object *obj;
const char *file;
int err = 0;
@@ -1731,6 +1815,10 @@ static int do_loader(int argc, char **argv)
return -1;
file = GET_ARG();
+ if (verifier_logs)
+ /* log_level1 + log_level2 + stats, but not stable UAPI */
+ open_opts.kernel_log_level = 1 + 2 + 4;
+
obj = bpf_object__open_file(file, &open_opts);
if (libbpf_get_error(obj)) {
p_err("failed to open object file");
@@ -1741,12 +1829,7 @@ static int do_loader(int argc, char **argv)
if (err)
goto err_close_obj;
- load_attr.obj = obj;
- if (verifier_logs)
- /* log_level1 + log_level2 + stats, but not stable UAPI */
- load_attr.log_level = 1 + 2 + 4;
-
- err = bpf_object__load_xattr(&load_attr);
+ err = bpf_object__load(obj);
if (err) {
p_err("failed to load object file");
goto err_close_obj;
@@ -2016,41 +2099,58 @@ static void profile_print_readings(void)
static char *profile_target_name(int tgt_fd)
{
- struct bpf_prog_info_linear *info_linear;
- struct bpf_func_info *func_info;
+ struct bpf_func_info func_info;
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
const struct btf_type *t;
+ __u32 func_info_rec_size;
struct btf *btf = NULL;
char *name = NULL;
+ int err;
- info_linear = bpf_program__get_prog_info_linear(
- tgt_fd, 1UL << BPF_PROG_INFO_FUNC_INFO);
- if (IS_ERR_OR_NULL(info_linear)) {
- p_err("failed to get info_linear for prog FD %d", tgt_fd);
- return NULL;
+ err = bpf_obj_get_info_by_fd(tgt_fd, &info, &info_len);
+ if (err) {
+ p_err("failed to bpf_obj_get_info_by_fd for prog FD %d", tgt_fd);
+ goto out;
}
- if (info_linear->info.btf_id == 0) {
+ if (info.btf_id == 0) {
p_err("prog FD %d doesn't have valid btf", tgt_fd);
goto out;
}
- btf = btf__load_from_kernel_by_id(info_linear->info.btf_id);
+ func_info_rec_size = info.func_info_rec_size;
+ if (info.nr_func_info == 0) {
+ p_err("bpf_obj_get_info_by_fd for prog FD %d found 0 func_info", tgt_fd);
+ goto out;
+ }
+
+ memset(&info, 0, sizeof(info));
+ info.nr_func_info = 1;
+ info.func_info_rec_size = func_info_rec_size;
+ info.func_info = ptr_to_u64(&func_info);
+
+ err = bpf_obj_get_info_by_fd(tgt_fd, &info, &info_len);
+ if (err) {
+ p_err("failed to get func_info for prog FD %d", tgt_fd);
+ goto out;
+ }
+
+ btf = btf__load_from_kernel_by_id(info.btf_id);
if (libbpf_get_error(btf)) {
p_err("failed to load btf for prog FD %d", tgt_fd);
goto out;
}
- func_info = u64_to_ptr(info_linear->info.func_info);
- t = btf__type_by_id(btf, func_info[0].type_id);
+ t = btf__type_by_id(btf, func_info.type_id);
if (!t) {
p_err("btf %d doesn't have type %d",
- info_linear->info.btf_id, func_info[0].type_id);
+ info.btf_id, func_info.type_id);
goto out;
}
name = strdup(btf__name_by_offset(btf, t->name_off));
out:
btf__free(btf);
- free(info_linear);
return name;
}
diff --git a/tools/bpf/bpftool/skeleton/pid_iter.bpf.c b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c
index d9b420972934..f70702fcb224 100644
--- a/tools/bpf/bpftool/skeleton/pid_iter.bpf.c
+++ b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c
@@ -71,8 +71,8 @@ int iter(struct bpf_iter__task_file *ctx)
e.pid = task->tgid;
e.id = get_obj_id(file->private_data, obj_type);
- bpf_probe_read_kernel(&e.comm, sizeof(e.comm),
- task->group_leader->comm);
+ bpf_probe_read_kernel_str(&e.comm, sizeof(e.comm),
+ task->group_leader->comm);
bpf_seq_write(ctx->meta->seq, &e, sizeof(e));
return 0;
diff --git a/tools/bpf/bpftool/struct_ops.c b/tools/bpf/bpftool/struct_ops.c
index ab2d2290569a..2f693b082bdb 100644
--- a/tools/bpf/bpftool/struct_ops.c
+++ b/tools/bpf/bpftool/struct_ops.c
@@ -32,7 +32,7 @@ static const struct btf *get_btf_vmlinux(void)
return btf_vmlinux;
btf_vmlinux = libbpf_find_kernel_btf();
- if (IS_ERR(btf_vmlinux))
+ if (libbpf_get_error(btf_vmlinux))
p_err("struct_ops requires kernel CONFIG_DEBUG_INFO_BTF=y");
return btf_vmlinux;
@@ -45,7 +45,7 @@ static const char *get_kern_struct_ops_name(const struct bpf_map_info *info)
const char *st_ops_name;
kern_btf = get_btf_vmlinux();
- if (IS_ERR(kern_btf))
+ if (libbpf_get_error(kern_btf))
return "<btf_vmlinux_not_found>";
t = btf__type_by_id(kern_btf, info->btf_vmlinux_value_type_id);
@@ -63,7 +63,7 @@ static __s32 get_map_info_type_id(void)
return map_info_type_id;
kern_btf = get_btf_vmlinux();
- if (IS_ERR(kern_btf)) {
+ if (libbpf_get_error(kern_btf)) {
map_info_type_id = PTR_ERR(kern_btf);
return map_info_type_id;
}
@@ -252,7 +252,7 @@ static struct res do_one_id(const char *id_str, work_func func, void *data,
}
fd = bpf_map_get_fd_by_id(id);
- if (fd == -1) {
+ if (fd < 0) {
p_err("can't get map by id (%lu): %s", id, strerror(errno));
res.nr_errs++;
return res;
@@ -415,7 +415,7 @@ static int do_dump(int argc, char **argv)
}
kern_btf = get_btf_vmlinux();
- if (IS_ERR(kern_btf))
+ if (libbpf_get_error(kern_btf))
return -1;
if (!json_output) {
@@ -479,7 +479,7 @@ static int do_unregister(int argc, char **argv)
static int do_register(int argc, char **argv)
{
- struct bpf_object_load_attr load_attr = {};
+ LIBBPF_OPTS(bpf_object_open_opts, open_opts);
const struct bpf_map_def *def;
struct bpf_map_info info = {};
__u32 info_len = sizeof(info);
@@ -494,18 +494,17 @@ static int do_register(int argc, char **argv)
file = GET_ARG();
- obj = bpf_object__open(file);
- if (IS_ERR_OR_NULL(obj))
+ if (verifier_logs)
+ /* log_level1 + log_level2 + stats, but not stable UAPI */
+ open_opts.kernel_log_level = 1 + 2 + 4;
+
+ obj = bpf_object__open_file(file, &open_opts);
+ if (libbpf_get_error(obj))
return -1;
set_max_rlimit();
- load_attr.obj = obj;
- if (verifier_logs)
- /* log_level1 + log_level2 + stats, but not stable UAPI */
- load_attr.log_level = 1 + 2 + 4;
-
- if (bpf_object__load_xattr(&load_attr)) {
+ if (bpf_object__load(obj)) {
bpf_object__close(obj);
return -1;
}
@@ -516,7 +515,7 @@ static int do_register(int argc, char **argv)
continue;
link = bpf_map__attach_struct_ops(map);
- if (IS_ERR(link)) {
+ if (libbpf_get_error(link)) {
p_err("can't register struct_ops %s: %s",
bpf_map__name(map),
strerror(-PTR_ERR(link)));
@@ -596,7 +595,7 @@ int do_struct_ops(int argc, char **argv)
err = cmd_select(cmds, argc, argv, do_help);
- if (!IS_ERR(btf_vmlinux))
+ if (!libbpf_get_error(btf_vmlinux))
btf__free(btf_vmlinux);
return err;
diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile
index 751643f860b2..9ddeca947635 100644
--- a/tools/bpf/resolve_btfids/Makefile
+++ b/tools/bpf/resolve_btfids/Makefile
@@ -19,6 +19,7 @@ CC = $(HOSTCC)
LD = $(HOSTLD)
ARCH = $(HOSTARCH)
RM ?= rm
+CROSS_COMPILE =
OUTPUT ?= $(srctree)/tools/bpf/resolve_btfids/
diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c
index a59cb0ee609c..5d26f3c6f918 100644
--- a/tools/bpf/resolve_btfids/main.c
+++ b/tools/bpf/resolve_btfids/main.c
@@ -83,6 +83,7 @@ struct btf_id {
int cnt;
};
int addr_cnt;
+ bool is_set;
Elf64_Addr addr[ADDR_CNT];
};
@@ -167,7 +168,7 @@ static struct btf_id *btf_id__find(struct rb_root *root, const char *name)
return NULL;
}
-static struct btf_id*
+static struct btf_id *
btf_id__add(struct rb_root *root, char *name, bool unique)
{
struct rb_node **p = &root->rb_node;
@@ -451,8 +452,10 @@ static int symbols_collect(struct object *obj)
* in symbol's size, together with 'cnt' field hence
* that - 1.
*/
- if (id)
+ if (id) {
id->cnt = sym.st_size / sizeof(int) - 1;
+ id->is_set = true;
+ }
} else {
pr_err("FAILED unsupported prefix %s\n", prefix);
return -1;
@@ -568,9 +571,8 @@ static int id_patch(struct object *obj, struct btf_id *id)
int *ptr = data->d_buf;
int i;
- if (!id->id) {
+ if (!id->id && !id->is_set)
pr_err("WARN: resolve_btfids: unresolved symbol %s\n", id->name);
- }
for (i = 0; i < id->addr_cnt; i++) {
unsigned long addr = id->addr[i];
@@ -730,7 +732,8 @@ int main(int argc, const char **argv)
if (obj.efile.idlist_shndx == -1 ||
obj.efile.symbols_shndx == -1) {
pr_debug("Cannot find .BTF_ids or symbols sections, nothing to do\n");
- return 0;
+ err = 0;
+ goto out;
}
if (symbols_collect(&obj))
diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile
index 8791d0e2762b..da6de16a3dfb 100644
--- a/tools/bpf/runqslower/Makefile
+++ b/tools/bpf/runqslower/Makefile
@@ -12,7 +12,7 @@ BPFOBJ := $(BPFOBJ_OUTPUT)libbpf.a
BPF_DESTDIR := $(BPFOBJ_OUTPUT)
BPF_INCLUDE := $(BPF_DESTDIR)/include
INCLUDES := -I$(OUTPUT) -I$(BPF_INCLUDE) -I$(abspath ../../include/uapi)
-CFLAGS := -g -Wall
+CFLAGS := -g -Wall $(CLANG_CROSS_FLAGS)
# Try to detect best kernel BTF source
KERNEL_REL := $(shell uname -r)
@@ -88,4 +88,4 @@ $(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(BPFOBJ_OU
$(DEFAULT_BPFTOOL): $(BPFOBJ) | $(BPFTOOL_OUTPUT)
$(Q)$(MAKE) $(submake_extras) -C ../bpftool OUTPUT=$(BPFTOOL_OUTPUT) \
- CC=$(HOSTCC) LD=$(HOSTLD)
+ ARCH= CROSS_COMPILE= CC=$(HOSTCC) LD=$(HOSTLD)
diff --git a/tools/bpf/runqslower/runqslower.bpf.c b/tools/bpf/runqslower/runqslower.bpf.c
index ab9353f2fd46..9a5c1f008fe6 100644
--- a/tools/bpf/runqslower/runqslower.bpf.c
+++ b/tools/bpf/runqslower/runqslower.bpf.c
@@ -68,7 +68,7 @@ int handle__sched_switch(u64 *ctx)
*/
struct task_struct *prev = (struct task_struct *)ctx[1];
struct task_struct *next = (struct task_struct *)ctx[2];
- struct event event = {};
+ struct runq_event event = {};
u64 *tsp, delta_us;
long state;
u32 pid;
diff --git a/tools/bpf/runqslower/runqslower.c b/tools/bpf/runqslower/runqslower.c
index d89715844952..d78f4148597f 100644
--- a/tools/bpf/runqslower/runqslower.c
+++ b/tools/bpf/runqslower/runqslower.c
@@ -100,7 +100,7 @@ static int bump_memlock_rlimit(void)
void handle_event(void *ctx, int cpu, void *data, __u32 data_sz)
{
- const struct event *e = data;
+ const struct runq_event *e = data;
struct tm *tm;
char ts[32];
time_t t;
@@ -123,7 +123,6 @@ int main(int argc, char **argv)
.parser = parse_arg,
.doc = argp_program_doc,
};
- struct perf_buffer_opts pb_opts;
struct perf_buffer *pb = NULL;
struct runqslower_bpf *obj;
int err;
@@ -165,9 +164,8 @@ int main(int argc, char **argv)
printf("Tracing run queue latency higher than %llu us\n", env.min_us);
printf("%-8s %-16s %-6s %14s\n", "TIME", "COMM", "PID", "LAT(us)");
- pb_opts.sample_cb = handle_event;
- pb_opts.lost_cb = handle_lost_events;
- pb = perf_buffer__new(bpf_map__fd(obj->maps.events), 64, &pb_opts);
+ pb = perf_buffer__new(bpf_map__fd(obj->maps.events), 64,
+ handle_event, handle_lost_events, NULL, NULL);
err = libbpf_get_error(pb);
if (err) {
pb = NULL;
diff --git a/tools/bpf/runqslower/runqslower.h b/tools/bpf/runqslower/runqslower.h
index 9db225425e5f..4f70f07200c2 100644
--- a/tools/bpf/runqslower/runqslower.h
+++ b/tools/bpf/runqslower/runqslower.h
@@ -4,7 +4,7 @@
#define TASK_COMM_LEN 16
-struct event {
+struct runq_event {
char task[TASK_COMM_LEN];
__u64 delta_us;
pid_t pid;
diff --git a/tools/build/Build.include b/tools/build/Build.include
index 2cf3b1bde86e..c2a95ab47379 100644
--- a/tools/build/Build.include
+++ b/tools/build/Build.include
@@ -99,7 +99,7 @@ cxx_flags = -Wp,-MD,$(depfile) -Wp,-MT,$@ $(CXXFLAGS) -D"BUILD_STR(s)=\#s" $(CXX
###
## HOSTCC C flags
-host_c_flags = -Wp,-MD,$(depfile) -Wp,-MT,$@ $(KBUILD_HOSTCFLAGS) -D"BUILD_STR(s)=\#s" $(HOSTCFLAGS_$(basetarget).o) $(HOSTCFLAGS_$(obj))
+host_c_flags = -Wp,-MD,$(depfile) -Wp,-MT,$@ $(HOSTCFLAGS) -D"BUILD_STR(s)=\#s" $(HOSTCFLAGS_$(basetarget).o) $(HOSTCFLAGS_$(obj))
# output directory for tests below
TMPOUT = .tmp_$$$$
diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index 45a9a59828c3..ae61f464043a 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -48,7 +48,6 @@ FEATURE_TESTS_BASIC := \
numa_num_possible_cpus \
libperl \
libpython \
- libpython-version \
libslang \
libslang-include-subdir \
libtraceevent \
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index 0a3244ad9673..1480910c792e 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -32,7 +32,6 @@ FILES= \
test-numa_num_possible_cpus.bin \
test-libperl.bin \
test-libpython.bin \
- test-libpython-version.bin \
test-libslang.bin \
test-libslang-include-subdir.bin \
test-libtraceevent.bin \
@@ -227,9 +226,6 @@ $(OUTPUT)test-libperl.bin:
$(OUTPUT)test-libpython.bin:
$(BUILD) $(FLAGS_PYTHON_EMBED)
-$(OUTPUT)test-libpython-version.bin:
- $(BUILD)
-
$(OUTPUT)test-libbfd.bin:
$(BUILD) -DPACKAGE='"perf"' -lbfd -ldl
diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c
index 0b243ce842be..5ffafb967b6e 100644
--- a/tools/build/feature/test-all.c
+++ b/tools/build/feature/test-all.c
@@ -14,10 +14,6 @@
# include "test-libpython.c"
#undef main
-#define main main_test_libpython_version
-# include "test-libpython-version.c"
-#undef main
-
#define main main_test_libperl
# include "test-libperl.c"
#undef main
@@ -177,7 +173,6 @@
int main(int argc, char *argv[])
{
main_test_libpython();
- main_test_libpython_version();
main_test_libperl();
main_test_hello();
main_test_libelf();
diff --git a/tools/build/feature/test-bpf.c b/tools/build/feature/test-bpf.c
index 82070eadfc07..727d22e34a6e 100644
--- a/tools/build/feature/test-bpf.c
+++ b/tools/build/feature/test-bpf.c
@@ -14,6 +14,12 @@
# define __NR_bpf 349
# elif defined(__s390__)
# define __NR_bpf 351
+# elif defined(__mips__) && defined(_ABIO32)
+# define __NR_bpf 4355
+# elif defined(__mips__) && defined(_ABIN32)
+# define __NR_bpf 6319
+# elif defined(__mips__) && defined(_ABI64)
+# define __NR_bpf 5315
# else
# error __NR_bpf not defined. libbpf does not support your arch.
# endif
diff --git a/tools/build/feature/test-libpython-version.c b/tools/build/feature/test-libpython-version.c
deleted file mode 100644
index 47714b942d4d..000000000000
--- a/tools/build/feature/test-libpython-version.c
+++ /dev/null
@@ -1,11 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <Python.h>
-
-#if PY_VERSION_HEX >= 0x03000000
- #error
-#endif
-
-int main(void)
-{
- return 0;
-}
diff --git a/tools/iio/iio_event_monitor.c b/tools/iio/iio_event_monitor.c
index 0076437f6e3f..b94a16ba5c6c 100644
--- a/tools/iio/iio_event_monitor.c
+++ b/tools/iio/iio_event_monitor.c
@@ -279,6 +279,7 @@ static void print_event(struct iio_event_data *event)
printf(", direction: %s", iio_ev_dir_text[dir]);
printf("\n");
+ fflush(stdout);
}
/* Enable or disable events in sysfs if the knob is available */
diff --git a/tools/include/asm-generic/bitops.h b/tools/include/asm-generic/bitops.h
index 5d2ab38965cc..9ab313e93555 100644
--- a/tools/include/asm-generic/bitops.h
+++ b/tools/include/asm-generic/bitops.h
@@ -18,7 +18,6 @@
#include <asm-generic/bitops/fls.h>
#include <asm-generic/bitops/__fls.h>
#include <asm-generic/bitops/fls64.h>
-#include <asm-generic/bitops/find.h>
#ifndef _TOOLS_LINUX_BITOPS_H_
#error only <linux/bitops.h> can be included directly
diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h
index 95611df1d26e..ea97804d04d4 100644
--- a/tools/include/linux/bitmap.h
+++ b/tools/include/linux/bitmap.h
@@ -1,9 +1,10 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _PERF_BITOPS_H
-#define _PERF_BITOPS_H
+#ifndef _TOOLS_LINUX_BITMAP_H
+#define _TOOLS_LINUX_BITMAP_H
#include <string.h>
#include <linux/bitops.h>
+#include <linux/find.h>
#include <stdlib.h>
#include <linux/kernel.h>
@@ -181,4 +182,4 @@ static inline int bitmap_intersects(const unsigned long *src1,
return __bitmap_intersects(src1, src2, nbits);
}
-#endif /* _PERF_BITOPS_H */
+#endif /* _TOOLS_LINUX_BITMAP_H */
diff --git a/tools/include/linux/debug_locks.h b/tools/include/linux/debug_locks.h
deleted file mode 100644
index 72d595ce764a..000000000000
--- a/tools/include/linux/debug_locks.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LIBLOCKDEP_DEBUG_LOCKS_H_
-#define _LIBLOCKDEP_DEBUG_LOCKS_H_
-
-#include <stddef.h>
-#include <linux/compiler.h>
-#include <asm/bug.h>
-
-#define DEBUG_LOCKS_WARN_ON(x) WARN_ON(x)
-
-extern bool debug_locks;
-extern bool debug_locks_silent;
-
-#endif
diff --git a/tools/include/asm-generic/bitops/find.h b/tools/include/linux/find.h
index 6481fd11012a..47e2bd6c5174 100644
--- a/tools/include/asm-generic/bitops/find.h
+++ b/tools/include/linux/find.h
@@ -1,11 +1,19 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _TOOLS_LINUX_ASM_GENERIC_BITOPS_FIND_H_
-#define _TOOLS_LINUX_ASM_GENERIC_BITOPS_FIND_H_
+#ifndef _TOOLS_LINUX_FIND_H_
+#define _TOOLS_LINUX_FIND_H_
+
+#ifndef _TOOLS_LINUX_BITMAP_H
+#error tools: only <linux/bitmap.h> can be included directly
+#endif
+
+#include <linux/bitops.h>
extern unsigned long _find_next_bit(const unsigned long *addr1,
const unsigned long *addr2, unsigned long nbits,
unsigned long start, unsigned long invert, unsigned long le);
extern unsigned long _find_first_bit(const unsigned long *addr, unsigned long size);
+extern unsigned long _find_first_and_bit(const unsigned long *addr1,
+ const unsigned long *addr2, unsigned long size);
extern unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size);
extern unsigned long _find_last_bit(const unsigned long *addr, unsigned long size);
@@ -96,7 +104,6 @@ unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
#endif
#ifndef find_first_bit
-
/**
* find_first_bit - find the first set bit in a memory region
* @addr: The address to start the search at
@@ -116,11 +123,34 @@ unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
return _find_first_bit(addr, size);
}
+#endif
+
+#ifndef find_first_and_bit
+/**
+ * find_first_and_bit - find the first set bit in both memory regions
+ * @addr1: The first address to base the search on
+ * @addr2: The second address to base the search on
+ * @size: The bitmap size in bits
+ *
+ * Returns the bit number for the next set bit
+ * If no bits are set, returns @size.
+ */
+static inline
+unsigned long find_first_and_bit(const unsigned long *addr1,
+ const unsigned long *addr2,
+ unsigned long size)
+{
+ if (small_const_nbits(size)) {
+ unsigned long val = *addr1 & *addr2 & GENMASK(size - 1, 0);
-#endif /* find_first_bit */
+ return val ? __ffs(val) : size;
+ }
-#ifndef find_first_zero_bit
+ return _find_first_and_bit(addr1, addr2, size);
+}
+#endif
+#ifndef find_first_zero_bit
/**
* find_first_zero_bit - find the first cleared bit in a memory region
* @addr: The address to start the search at
@@ -142,4 +172,43 @@ unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
}
#endif
-#endif /*_TOOLS_LINUX_ASM_GENERIC_BITOPS_FIND_H_ */
+#ifndef find_last_bit
+/**
+ * find_last_bit - find the last set bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The number of bits to search
+ *
+ * Returns the bit number of the last set bit, or size.
+ */
+static inline
+unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
+{
+ if (small_const_nbits(size)) {
+ unsigned long val = *addr & GENMASK(size - 1, 0);
+
+ return val ? __fls(val) : size;
+ }
+
+ return _find_last_bit(addr, size);
+}
+#endif
+
+/**
+ * find_next_clump8 - find next 8-bit clump with set bits in a memory region
+ * @clump: location to store copy of found clump
+ * @addr: address to base the search on
+ * @size: bitmap size in number of bits
+ * @offset: bit offset at which to start searching
+ *
+ * Returns the bit offset for the next set clump; the found clump value is
+ * copied to the location pointed by @clump. If no bits are set, returns @size.
+ */
+extern unsigned long find_next_clump8(unsigned long *clump,
+ const unsigned long *addr,
+ unsigned long size, unsigned long offset);
+
+#define find_first_clump8(clump, bits, size) \
+ find_next_clump8((clump), (bits), (size), 0)
+
+
+#endif /*__LINUX_FIND_H_ */
diff --git a/tools/include/linux/hardirq.h b/tools/include/linux/hardirq.h
deleted file mode 100644
index b25580b6a9be..000000000000
--- a/tools/include/linux/hardirq.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LIBLOCKDEP_LINUX_HARDIRQ_H_
-#define _LIBLOCKDEP_LINUX_HARDIRQ_H_
-
-#define SOFTIRQ_BITS 0UL
-#define HARDIRQ_BITS 0UL
-#define SOFTIRQ_SHIFT 0UL
-#define HARDIRQ_SHIFT 0UL
-#define hardirq_count() 0UL
-#define softirq_count() 0UL
-
-#endif
diff --git a/tools/include/linux/hash.h b/tools/include/linux/hash.h
index ad6fa21d977b..38edaa08f862 100644
--- a/tools/include/linux/hash.h
+++ b/tools/include/linux/hash.h
@@ -62,10 +62,7 @@ static inline u32 __hash_32_generic(u32 val)
return val * GOLDEN_RATIO_32;
}
-#ifndef HAVE_ARCH_HASH_32
-#define hash_32 hash_32_generic
-#endif
-static inline u32 hash_32_generic(u32 val, unsigned int bits)
+static inline u32 hash_32(u32 val, unsigned int bits)
{
/* High bits are more random, so use them. */
return __hash_32(val) >> (32 - bits);
diff --git a/tools/include/linux/irqflags.h b/tools/include/linux/irqflags.h
deleted file mode 100644
index 501262aee8ff..000000000000
--- a/tools/include/linux/irqflags.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LIBLOCKDEP_LINUX_TRACE_IRQFLAGS_H_
-#define _LIBLOCKDEP_LINUX_TRACE_IRQFLAGS_H_
-
-# define lockdep_hardirq_context() 0
-# define lockdep_softirq_context(p) 0
-# define lockdep_hardirqs_enabled() 0
-# define lockdep_softirqs_enabled(p) 0
-# define lockdep_hardirq_enter() do { } while (0)
-# define lockdep_hardirq_exit() do { } while (0)
-# define lockdep_softirq_enter() do { } while (0)
-# define lockdep_softirq_exit() do { } while (0)
-# define INIT_TRACE_IRQFLAGS
-
-# define stop_critical_timings() do { } while (0)
-# define start_critical_timings() do { } while (0)
-
-#define raw_local_irq_disable() do { } while (0)
-#define raw_local_irq_enable() do { } while (0)
-#define raw_local_irq_save(flags) ((flags) = 0)
-#define raw_local_irq_restore(flags) ((void)(flags))
-#define raw_local_save_flags(flags) ((flags) = 0)
-#define raw_irqs_disabled_flags(flags) ((void)(flags))
-#define raw_irqs_disabled() 0
-#define raw_safe_halt()
-
-#define local_irq_enable() do { } while (0)
-#define local_irq_disable() do { } while (0)
-#define local_irq_save(flags) ((flags) = 0)
-#define local_irq_restore(flags) ((void)(flags))
-#define local_save_flags(flags) ((flags) = 0)
-#define irqs_disabled() (1)
-#define irqs_disabled_flags(flags) ((void)(flags), 0)
-#define safe_halt() do { } while (0)
-
-#define trace_lock_release(x, y)
-#define trace_lock_acquire(a, b, c, d, e, f, g)
-
-#endif
diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h
index a7e54a08fb54..9701e8307db0 100644
--- a/tools/include/linux/kernel.h
+++ b/tools/include/linux/kernel.h
@@ -7,6 +7,7 @@
#include <assert.h>
#include <linux/build_bug.h>
#include <linux/compiler.h>
+#include <linux/math.h>
#include <endian.h>
#include <byteswap.h>
@@ -14,8 +15,6 @@
#define UINT_MAX (~0U)
#endif
-#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
-
#define PERF_ALIGN(x, a) __PERF_ALIGN_MASK(x, (typeof(x))(a)-1)
#define __PERF_ALIGN_MASK(x, mask) (((x)+(mask))&~(mask))
@@ -52,15 +51,6 @@
_min1 < _min2 ? _min1 : _min2; })
#endif
-#ifndef roundup
-#define roundup(x, y) ( \
-{ \
- const typeof(y) __y = y; \
- (((x) + (__y - 1)) / __y) * __y; \
-} \
-)
-#endif
-
#ifndef BUG_ON
#ifdef NDEBUG
#define BUG_ON(cond) do { if (cond) {} } while (0)
@@ -102,17 +92,9 @@ int vscnprintf(char *buf, size_t size, const char *fmt, va_list args);
int scnprintf(char * buf, size_t size, const char * fmt, ...);
int scnprintf_pad(char * buf, size_t size, const char * fmt, ...);
+#ifndef ARRAY_SIZE
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr))
-
-/*
- * This looks more complex than it should be. But we need to
- * get the type for the ~ right in round_down (it needs to be
- * as wide as the result!), and we want to evaluate the macro
- * arguments just once each.
- */
-#define __round_mask(x, y) ((__typeof__(x))((y)-1))
-#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
-#define round_down(x, y) ((x) & ~__round_mask(x, y))
+#endif
#define current_gfp_context(k) 0
#define synchronize_rcu()
diff --git a/tools/include/linux/lockdep.h b/tools/include/linux/lockdep.h
deleted file mode 100644
index e56997288f2b..000000000000
--- a/tools/include/linux/lockdep.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LIBLOCKDEP_LOCKDEP_H_
-#define _LIBLOCKDEP_LOCKDEP_H_
-
-#include <sys/prctl.h>
-#include <sys/syscall.h>
-#include <string.h>
-#include <limits.h>
-#include <linux/utsname.h>
-#include <linux/compiler.h>
-#include <linux/export.h>
-#include <linux/kern_levels.h>
-#include <linux/err.h>
-#include <linux/rcu.h>
-#include <linux/list.h>
-#include <linux/hardirq.h>
-#include <unistd.h>
-
-#define MAX_LOCK_DEPTH 63UL
-
-#define asmlinkage
-#define __visible
-
-#include "../../../include/linux/lockdep.h"
-
-struct task_struct {
- u64 curr_chain_key;
- int lockdep_depth;
- unsigned int lockdep_recursion;
- struct held_lock held_locks[MAX_LOCK_DEPTH];
- gfp_t lockdep_reclaim_gfp;
- int pid;
- int state;
- char comm[17];
-};
-
-#define TASK_RUNNING 0
-
-extern struct task_struct *__curr(void);
-
-#define current (__curr())
-
-static inline int debug_locks_off(void)
-{
- return 1;
-}
-
-#define task_pid_nr(tsk) ((tsk)->pid)
-
-#define KSYM_NAME_LEN 128
-#define printk(...) dprintf(STDOUT_FILENO, __VA_ARGS__)
-#define pr_err(format, ...) fprintf (stderr, format, ## __VA_ARGS__)
-#define pr_warn pr_err
-#define pr_cont pr_err
-
-#define list_del_rcu list_del
-
-#define atomic_t unsigned long
-#define atomic_inc(x) ((*(x))++)
-
-#define print_tainted() ""
-#define static_obj(x) 1
-
-#define debug_show_all_locks()
-extern void debug_check_no_locks_held(void);
-
-static __used bool __is_kernel_percpu_address(unsigned long addr, void *can_addr)
-{
- return false;
-}
-
-#endif
diff --git a/tools/include/linux/math.h b/tools/include/linux/math.h
new file mode 100644
index 000000000000..4e7af99ec9eb
--- /dev/null
+++ b/tools/include/linux/math.h
@@ -0,0 +1,25 @@
+#ifndef _TOOLS_MATH_H
+#define _TOOLS_MATH_H
+
+/*
+ * This looks more complex than it should be. But we need to
+ * get the type for the ~ right in round_down (it needs to be
+ * as wide as the result!), and we want to evaluate the macro
+ * arguments just once each.
+ */
+#define __round_mask(x, y) ((__typeof__(x))((y)-1))
+#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
+#define round_down(x, y) ((x) & ~__round_mask(x, y))
+
+#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
+
+#ifndef roundup
+#define roundup(x, y) ( \
+{ \
+ const typeof(y) __y = y; \
+ (((x) + (__y - 1)) / __y) * __y; \
+} \
+)
+#endif
+
+#endif
diff --git a/tools/include/linux/proc_fs.h b/tools/include/linux/proc_fs.h
deleted file mode 100644
index 8b3b03b64fda..000000000000
--- a/tools/include/linux/proc_fs.h
+++ /dev/null
@@ -1,4 +0,0 @@
-#ifndef _TOOLS_INCLUDE_LINUX_PROC_FS_H
-#define _TOOLS_INCLUDE_LINUX_PROC_FS_H
-
-#endif /* _TOOLS_INCLUDE_LINUX_PROC_FS_H */
diff --git a/tools/include/linux/spinlock.h b/tools/include/linux/spinlock.h
index c934572d935c..622266b197d0 100644
--- a/tools/include/linux/spinlock.h
+++ b/tools/include/linux/spinlock.h
@@ -37,6 +37,4 @@ static inline bool arch_spin_is_locked(arch_spinlock_t *mutex)
return true;
}
-#include <linux/lockdep.h>
-
#endif
diff --git a/tools/include/linux/stacktrace.h b/tools/include/linux/stacktrace.h
deleted file mode 100644
index ae343ac35bfa..000000000000
--- a/tools/include/linux/stacktrace.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LIBLOCKDEP_LINUX_STACKTRACE_H_
-#define _LIBLOCKDEP_LINUX_STACKTRACE_H_
-
-#include <execinfo.h>
-
-struct stack_trace {
- unsigned int nr_entries, max_entries;
- unsigned long *entries;
- int skip;
-};
-
-static inline void print_stack_trace(struct stack_trace *trace, int spaces)
-{
- backtrace_symbols_fd((void **)trace->entries, trace->nr_entries, 1);
-}
-
-#define save_stack_trace(trace) \
- ((trace)->nr_entries = \
- backtrace((void **)(trace)->entries, (trace)->max_entries))
-
-static inline int dump_stack(void)
-{
- void *array[64];
- size_t size;
-
- size = backtrace(array, 64);
- backtrace_symbols_fd(array, size, 1);
-
- return 0;
-}
-
-#endif
diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h
index 3430667b0d24..c1c285fe494a 100644
--- a/tools/include/nolibc/nolibc.h
+++ b/tools/include/nolibc/nolibc.h
@@ -265,12 +265,17 @@ struct stat {
* - arguments are in rdi, rsi, rdx, r10, r8, r9 respectively
* - the system call is performed by calling the syscall instruction
* - syscall return comes in rax
- * - rcx and r8..r11 may be clobbered, others are preserved.
+ * - rcx and r11 are clobbered, others are preserved.
* - the arguments are cast to long and assigned into the target registers
* which are then simply passed as registers to the asm code, so that we
* don't have to experience issues with register constraints.
* - the syscall number is always specified last in order to allow to force
* some registers before (gcc refuses a %-register at the last position).
+ * - see also x86-64 ABI section A.2 AMD64 Linux Kernel Conventions, A.2.1
+ * Calling Conventions.
+ *
+ * Link x86-64 ABI: https://gitlab.com/x86-psABIs/x86-64-ABI/-/wikis/x86-64-psABI
+ *
*/
#define my_syscall0(num) \
@@ -280,9 +285,9 @@ struct stat {
\
asm volatile ( \
"syscall\n" \
- : "=a" (_ret) \
+ : "=a"(_ret) \
: "0"(_num) \
- : "rcx", "r8", "r9", "r10", "r11", "memory", "cc" \
+ : "rcx", "r11", "memory", "cc" \
); \
_ret; \
})
@@ -295,10 +300,10 @@ struct stat {
\
asm volatile ( \
"syscall\n" \
- : "=a" (_ret) \
+ : "=a"(_ret) \
: "r"(_arg1), \
"0"(_num) \
- : "rcx", "r8", "r9", "r10", "r11", "memory", "cc" \
+ : "rcx", "r11", "memory", "cc" \
); \
_ret; \
})
@@ -312,10 +317,10 @@ struct stat {
\
asm volatile ( \
"syscall\n" \
- : "=a" (_ret) \
+ : "=a"(_ret) \
: "r"(_arg1), "r"(_arg2), \
"0"(_num) \
- : "rcx", "r8", "r9", "r10", "r11", "memory", "cc" \
+ : "rcx", "r11", "memory", "cc" \
); \
_ret; \
})
@@ -330,10 +335,10 @@ struct stat {
\
asm volatile ( \
"syscall\n" \
- : "=a" (_ret) \
+ : "=a"(_ret) \
: "r"(_arg1), "r"(_arg2), "r"(_arg3), \
"0"(_num) \
- : "rcx", "r8", "r9", "r10", "r11", "memory", "cc" \
+ : "rcx", "r11", "memory", "cc" \
); \
_ret; \
})
@@ -349,10 +354,10 @@ struct stat {
\
asm volatile ( \
"syscall\n" \
- : "=a" (_ret), "=r"(_arg4) \
+ : "=a"(_ret) \
: "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \
"0"(_num) \
- : "rcx", "r8", "r9", "r11", "memory", "cc" \
+ : "rcx", "r11", "memory", "cc" \
); \
_ret; \
})
@@ -369,10 +374,10 @@ struct stat {
\
asm volatile ( \
"syscall\n" \
- : "=a" (_ret), "=r"(_arg4), "=r"(_arg5) \
+ : "=a"(_ret) \
: "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
"0"(_num) \
- : "rcx", "r9", "r11", "memory", "cc" \
+ : "rcx", "r11", "memory", "cc" \
); \
_ret; \
})
@@ -390,7 +395,7 @@ struct stat {
\
asm volatile ( \
"syscall\n" \
- : "=a" (_ret), "=r"(_arg4), "=r"(_arg5) \
+ : "=a"(_ret) \
: "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
"r"(_arg6), "0"(_num) \
: "rcx", "r11", "memory", "cc" \
@@ -399,17 +404,23 @@ struct stat {
})
/* startup code */
+/*
+ * x86-64 System V ABI mandates:
+ * 1) %rsp must be 16-byte aligned right before the function call.
+ * 2) The deepest stack frame should be zero (the %rbp).
+ *
+ */
asm(".section .text\n"
".global _start\n"
"_start:\n"
"pop %rdi\n" // argc (first arg, %rdi)
"mov %rsp, %rsi\n" // argv[] (second arg, %rsi)
"lea 8(%rsi,%rdi,8),%rdx\n" // then a NULL then envp (third arg, %rdx)
- "and $-16, %rsp\n" // x86 ABI : esp must be 16-byte aligned when
- "sub $8, %rsp\n" // entering the callee
+ "xor %ebp, %ebp\n" // zero the stack frame
+ "and $-16, %rsp\n" // x86 ABI : esp must be 16-byte aligned before call
"call main\n" // main() returns the status code, we'll exit with it.
- "movzb %al, %rdi\n" // retrieve exit code from 8 lower bits
- "mov $60, %rax\n" // NR_exit == 60
+ "mov %eax, %edi\n" // retrieve exit code (32 bit)
+ "mov $60, %eax\n" // NR_exit == 60
"syscall\n" // really exit
"hlt\n" // ensure it does not return
"");
@@ -577,20 +588,28 @@ struct sys_stat_struct {
})
/* startup code */
+/*
+ * i386 System V ABI mandates:
+ * 1) last pushed argument must be 16-byte aligned.
+ * 2) The deepest stack frame should be set to zero
+ *
+ */
asm(".section .text\n"
".global _start\n"
"_start:\n"
"pop %eax\n" // argc (first arg, %eax)
"mov %esp, %ebx\n" // argv[] (second arg, %ebx)
"lea 4(%ebx,%eax,4),%ecx\n" // then a NULL then envp (third arg, %ecx)
- "and $-16, %esp\n" // x86 ABI : esp must be 16-byte aligned when
+ "xor %ebp, %ebp\n" // zero the stack frame
+ "and $-16, %esp\n" // x86 ABI : esp must be 16-byte aligned before
+ "sub $4, %esp\n" // the call instruction (args are aligned)
"push %ecx\n" // push all registers on the stack so that we
"push %ebx\n" // support both regparm and plain stack modes
"push %eax\n"
"call main\n" // main() returns the status code in %eax
- "movzbl %al, %ebx\n" // retrieve exit code from lower 8 bits
- "movl $1, %eax\n" // NR_exit == 1
- "int $0x80\n" // exit now
+ "mov %eax, %ebx\n" // retrieve exit code (32-bit int)
+ "movl $1, %eax\n" // NR_exit == 1
+ "int $0x80\n" // exit now
"hlt\n" // ensure it does not
"");
@@ -774,7 +793,6 @@ asm(".section .text\n"
"and %r3, %r1, $-8\n" // AAPCS : sp must be 8-byte aligned in the
"mov %sp, %r3\n" // callee, an bl doesn't push (lr=pc)
"bl main\n" // main() returns the status code, we'll exit with it.
- "and %r0, %r0, $0xff\n" // limit exit code to 8 bits
"movs r7, $1\n" // NR_exit == 1
"svc $0x00\n"
"");
@@ -971,7 +989,6 @@ asm(".section .text\n"
"add x2, x2, x1\n" // + argv
"and sp, x1, -16\n" // sp must be 16-byte aligned in the callee
"bl main\n" // main() returns the status code, we'll exit with it.
- "and x0, x0, 0xff\n" // limit exit code to 8 bits
"mov x8, 93\n" // NR_exit == 93
"svc #0\n"
"");
@@ -1176,7 +1193,7 @@ asm(".section .text\n"
"addiu $sp,$sp,-16\n" // the callee expects to save a0..a3 there!
"jal main\n" // main() returns the status code, we'll exit with it.
"nop\n" // delayed slot
- "and $a0, $v0, 0xff\n" // limit exit code to 8 bits
+ "move $a0, $v0\n" // retrieve 32-bit exit code from v0
"li $v0, 4001\n" // NR_exit == 4001
"syscall\n"
".end __start\n"
@@ -1374,7 +1391,6 @@ asm(".section .text\n"
"add a2,a2,a1\n" // + argv
"andi sp,a1,-16\n" // sp must be 16-byte aligned
"call main\n" // main() returns the status code, we'll exit with it.
- "andi a0, a0, 0xff\n" // limit exit code to 8 bits
"li a7, 93\n" // NR_exit == 93
"ecall\n"
"");
@@ -1556,6 +1572,12 @@ pid_t sys_getpid(void)
}
static __attribute__((unused))
+pid_t sys_gettid(void)
+{
+ return my_syscall0(__NR_gettid);
+}
+
+static __attribute__((unused))
int sys_gettimeofday(struct timeval *tv, struct timezone *tz)
{
return my_syscall2(__NR_gettimeofday, tv, tz);
@@ -2014,6 +2036,18 @@ pid_t getpid(void)
}
static __attribute__((unused))
+pid_t gettid(void)
+{
+ pid_t ret = sys_gettid();
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+static __attribute__((unused))
int gettimeofday(struct timeval *tv, struct timezone *tz)
{
int ret = sys_gettimeofday(tv, tz);
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index 4557a8b6086f..1c48b0ae3ba3 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -883,8 +883,11 @@ __SYSCALL(__NR_process_mrelease, sys_process_mrelease)
#define __NR_futex_waitv 449
__SYSCALL(__NR_futex_waitv, sys_futex_waitv)
+#define __NR_set_mempolicy_home_node 450
+__SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node)
+
#undef __NR_syscalls
-#define __NR_syscalls 450
+#define __NR_syscalls 451
/*
* 32 bit systems traditionally used different
diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h
index 3b810b53ba8b..642808520d92 100644
--- a/tools/include/uapi/drm/drm.h
+++ b/tools/include/uapi/drm/drm.h
@@ -1096,6 +1096,24 @@ extern "C" {
#define DRM_IOCTL_SYNCOBJ_TRANSFER DRM_IOWR(0xCC, struct drm_syncobj_transfer)
#define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL DRM_IOWR(0xCD, struct drm_syncobj_timeline_array)
+/**
+ * DRM_IOCTL_MODE_GETFB2 - Get framebuffer metadata.
+ *
+ * This queries metadata about a framebuffer. User-space fills
+ * &drm_mode_fb_cmd2.fb_id as the input, and the kernels fills the rest of the
+ * struct as the output.
+ *
+ * If the client is DRM master or has &CAP_SYS_ADMIN, &drm_mode_fb_cmd2.handles
+ * will be filled with GEM buffer handles. Planes are valid until one has a
+ * zero handle -- this can be used to compute the number of planes.
+ *
+ * Otherwise, &drm_mode_fb_cmd2.handles will be zeroed and planes are valid
+ * until one has a zero &drm_mode_fb_cmd2.pitches.
+ *
+ * If the framebuffer has a format modifier, &DRM_MODE_FB_MODIFIERS will be set
+ * in &drm_mode_fb_cmd2.flags and &drm_mode_fb_cmd2.modifier will contain the
+ * modifier. Otherwise, user-space must ignore &drm_mode_fb_cmd2.modifier.
+ */
#define DRM_IOCTL_MODE_GETFB2 DRM_IOWR(0xCE, struct drm_mode_fb_cmd2)
/*
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index ba5af15e25f5..b0383d371b9a 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1342,8 +1342,10 @@ union bpf_attr {
/* or valid module BTF object fd or 0 to attach to vmlinux */
__u32 attach_btf_obj_fd;
};
- __u32 :32; /* pad */
+ __u32 core_relo_cnt; /* number of bpf_core_relo */
__aligned_u64 fd_array; /* array of FDs */
+ __aligned_u64 core_relos;
+ __u32 core_relo_rec_size; /* sizeof(struct bpf_core_relo) */
};
struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -1744,7 +1746,7 @@ union bpf_attr {
* if the maximum number of tail calls has been reached for this
* chain of programs. This limit is defined in the kernel by the
* macro **MAX_TAIL_CALL_CNT** (not accessible to user space),
- * which is currently set to 32.
+ * which is currently set to 33.
* Return
* 0 on success, or a negative error in case of failure.
*
@@ -4938,6 +4940,84 @@ union bpf_attr {
* **-ENOENT** if symbol is not found.
*
* **-EPERM** if caller does not have permission to obtain kernel address.
+ *
+ * long bpf_find_vma(struct task_struct *task, u64 addr, void *callback_fn, void *callback_ctx, u64 flags)
+ * Description
+ * Find vma of *task* that contains *addr*, call *callback_fn*
+ * function with *task*, *vma*, and *callback_ctx*.
+ * The *callback_fn* should be a static function and
+ * the *callback_ctx* should be a pointer to the stack.
+ * The *flags* is used to control certain aspects of the helper.
+ * Currently, the *flags* must be 0.
+ *
+ * The expected callback signature is
+ *
+ * long (\*callback_fn)(struct task_struct \*task, struct vm_area_struct \*vma, void \*callback_ctx);
+ *
+ * Return
+ * 0 on success.
+ * **-ENOENT** if *task->mm* is NULL, or no vma contains *addr*.
+ * **-EBUSY** if failed to try lock mmap_lock.
+ * **-EINVAL** for invalid **flags**.
+ *
+ * long bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx, u64 flags)
+ * Description
+ * For **nr_loops**, call **callback_fn** function
+ * with **callback_ctx** as the context parameter.
+ * The **callback_fn** should be a static function and
+ * the **callback_ctx** should be a pointer to the stack.
+ * The **flags** is used to control certain aspects of the helper.
+ * Currently, the **flags** must be 0. Currently, nr_loops is
+ * limited to 1 << 23 (~8 million) loops.
+ *
+ * long (\*callback_fn)(u32 index, void \*ctx);
+ *
+ * where **index** is the current index in the loop. The index
+ * is zero-indexed.
+ *
+ * If **callback_fn** returns 0, the helper will continue to the next
+ * loop. If return value is 1, the helper will skip the rest of
+ * the loops and return. Other return values are not used now,
+ * and will be rejected by the verifier.
+ *
+ * Return
+ * The number of loops performed, **-EINVAL** for invalid **flags**,
+ * **-E2BIG** if **nr_loops** exceeds the maximum number of loops.
+ *
+ * long bpf_strncmp(const char *s1, u32 s1_sz, const char *s2)
+ * Description
+ * Do strncmp() between **s1** and **s2**. **s1** doesn't need
+ * to be null-terminated and **s1_sz** is the maximum storage
+ * size of **s1**. **s2** must be a read-only string.
+ * Return
+ * An integer less than, equal to, or greater than zero
+ * if the first **s1_sz** bytes of **s1** is found to be
+ * less than, to match, or be greater than **s2**.
+ *
+ * long bpf_get_func_arg(void *ctx, u32 n, u64 *value)
+ * Description
+ * Get **n**-th argument (zero based) of the traced function (for tracing programs)
+ * returned in **value**.
+ *
+ * Return
+ * 0 on success.
+ * **-EINVAL** if n >= arguments count of traced function.
+ *
+ * long bpf_get_func_ret(void *ctx, u64 *value)
+ * Description
+ * Get return value of the traced function (for tracing programs)
+ * in **value**.
+ *
+ * Return
+ * 0 on success.
+ * **-EOPNOTSUPP** for tracing programs other than BPF_TRACE_FEXIT or BPF_MODIFY_RETURN.
+ *
+ * long bpf_get_func_arg_cnt(void *ctx)
+ * Description
+ * Get number of arguments of the traced function (for tracing programs).
+ *
+ * Return
+ * The number of arguments of the traced function.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -5120,6 +5200,12 @@ union bpf_attr {
FN(trace_vprintk), \
FN(skc_to_unix_sock), \
FN(kallsyms_lookup_name), \
+ FN(find_vma), \
+ FN(loop), \
+ FN(strncmp), \
+ FN(get_func_arg), \
+ FN(get_func_ret), \
+ FN(get_func_arg_cnt), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -6296,6 +6382,7 @@ struct bpf_sk_lookup {
__u32 local_ip4; /* Network byte order */
__u32 local_ip6[4]; /* Network byte order */
__u32 local_port; /* Host byte order */
+ __u32 ingress_ifindex; /* The arriving interface. Determined by inet_iif. */
};
/*
@@ -6328,4 +6415,78 @@ enum {
BTF_F_ZERO = (1ULL << 3),
};
+/* bpf_core_relo_kind encodes which aspect of captured field/type/enum value
+ * has to be adjusted by relocations. It is emitted by llvm and passed to
+ * libbpf and later to the kernel.
+ */
+enum bpf_core_relo_kind {
+ BPF_CORE_FIELD_BYTE_OFFSET = 0, /* field byte offset */
+ BPF_CORE_FIELD_BYTE_SIZE = 1, /* field size in bytes */
+ BPF_CORE_FIELD_EXISTS = 2, /* field existence in target kernel */
+ BPF_CORE_FIELD_SIGNED = 3, /* field signedness (0 - unsigned, 1 - signed) */
+ BPF_CORE_FIELD_LSHIFT_U64 = 4, /* bitfield-specific left bitshift */
+ BPF_CORE_FIELD_RSHIFT_U64 = 5, /* bitfield-specific right bitshift */
+ BPF_CORE_TYPE_ID_LOCAL = 6, /* type ID in local BPF object */
+ BPF_CORE_TYPE_ID_TARGET = 7, /* type ID in target kernel */
+ BPF_CORE_TYPE_EXISTS = 8, /* type existence in target kernel */
+ BPF_CORE_TYPE_SIZE = 9, /* type size in bytes */
+ BPF_CORE_ENUMVAL_EXISTS = 10, /* enum value existence in target kernel */
+ BPF_CORE_ENUMVAL_VALUE = 11, /* enum value integer value */
+};
+
+/*
+ * "struct bpf_core_relo" is used to pass relocation data form LLVM to libbpf
+ * and from libbpf to the kernel.
+ *
+ * CO-RE relocation captures the following data:
+ * - insn_off - instruction offset (in bytes) within a BPF program that needs
+ * its insn->imm field to be relocated with actual field info;
+ * - type_id - BTF type ID of the "root" (containing) entity of a relocatable
+ * type or field;
+ * - access_str_off - offset into corresponding .BTF string section. String
+ * interpretation depends on specific relocation kind:
+ * - for field-based relocations, string encodes an accessed field using
+ * a sequence of field and array indices, separated by colon (:). It's
+ * conceptually very close to LLVM's getelementptr ([0]) instruction's
+ * arguments for identifying offset to a field.
+ * - for type-based relocations, strings is expected to be just "0";
+ * - for enum value-based relocations, string contains an index of enum
+ * value within its enum type;
+ * - kind - one of enum bpf_core_relo_kind;
+ *
+ * Example:
+ * struct sample {
+ * int a;
+ * struct {
+ * int b[10];
+ * };
+ * };
+ *
+ * struct sample *s = ...;
+ * int *x = &s->a; // encoded as "0:0" (a is field #0)
+ * int *y = &s->b[5]; // encoded as "0:1:0:5" (anon struct is field #1,
+ * // b is field #0 inside anon struct, accessing elem #5)
+ * int *z = &s[10]->b; // encoded as "10:1" (ptr is used as an array)
+ *
+ * type_id for all relocs in this example will capture BTF type id of
+ * `struct sample`.
+ *
+ * Such relocation is emitted when using __builtin_preserve_access_index()
+ * Clang built-in, passing expression that captures field address, e.g.:
+ *
+ * bpf_probe_read(&dst, sizeof(dst),
+ * __builtin_preserve_access_index(&src->a.b.c));
+ *
+ * In this case Clang will emit field relocation recording necessary data to
+ * be able to find offset of embedded `a.b.c` field within `src` struct.
+ *
+ * [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction
+ */
+struct bpf_core_relo {
+ __u32 insn_off;
+ __u32 type_id;
+ __u32 access_str_off;
+ enum bpf_core_relo_kind kind;
+};
+
#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h
index deb12f755f0f..b0d8fea1951d 100644
--- a/tools/include/uapi/linux/btf.h
+++ b/tools/include/uapi/linux/btf.h
@@ -43,7 +43,7 @@ struct btf_type {
* "size" tells the size of the type it is describing.
*
* "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
- * FUNC, FUNC_PROTO, VAR and DECL_TAG.
+ * FUNC, FUNC_PROTO, VAR, DECL_TAG and TYPE_TAG.
* "type" is a type_id referring to another type.
*/
union {
@@ -75,6 +75,7 @@ enum {
BTF_KIND_DATASEC = 15, /* Section */
BTF_KIND_FLOAT = 16, /* Floating point */
BTF_KIND_DECL_TAG = 17, /* Decl Tag */
+ BTF_KIND_TYPE_TAG = 18, /* Type Tag */
NR_BTF_KINDS,
BTF_KIND_MAX = NR_BTF_KINDS - 1,
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index b3610fdd1fee..6218f93f5c1a 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -7,24 +7,23 @@
/* This struct should be in sync with struct rtnl_link_stats64 */
struct rtnl_link_stats {
- __u32 rx_packets; /* total packets received */
- __u32 tx_packets; /* total packets transmitted */
- __u32 rx_bytes; /* total bytes received */
- __u32 tx_bytes; /* total bytes transmitted */
- __u32 rx_errors; /* bad packets received */
- __u32 tx_errors; /* packet transmit problems */
- __u32 rx_dropped; /* no space in linux buffers */
- __u32 tx_dropped; /* no space available in linux */
- __u32 multicast; /* multicast packets received */
+ __u32 rx_packets;
+ __u32 tx_packets;
+ __u32 rx_bytes;
+ __u32 tx_bytes;
+ __u32 rx_errors;
+ __u32 tx_errors;
+ __u32 rx_dropped;
+ __u32 tx_dropped;
+ __u32 multicast;
__u32 collisions;
-
/* detailed rx_errors: */
__u32 rx_length_errors;
- __u32 rx_over_errors; /* receiver ring buff overflow */
- __u32 rx_crc_errors; /* recved pkt with crc error */
- __u32 rx_frame_errors; /* recv'd frame alignment error */
- __u32 rx_fifo_errors; /* recv'r fifo overrun */
- __u32 rx_missed_errors; /* receiver missed packet */
+ __u32 rx_over_errors;
+ __u32 rx_crc_errors;
+ __u32 rx_frame_errors;
+ __u32 rx_fifo_errors;
+ __u32 rx_missed_errors;
/* detailed tx_errors */
__u32 tx_aborted_errors;
@@ -37,29 +36,201 @@ struct rtnl_link_stats {
__u32 rx_compressed;
__u32 tx_compressed;
- __u32 rx_nohandler; /* dropped, no handler found */
+ __u32 rx_nohandler;
};
-/* The main device statistics structure */
+/**
+ * struct rtnl_link_stats64 - The main device statistics structure.
+ *
+ * @rx_packets: Number of good packets received by the interface.
+ * For hardware interfaces counts all good packets received from the device
+ * by the host, including packets which host had to drop at various stages
+ * of processing (even in the driver).
+ *
+ * @tx_packets: Number of packets successfully transmitted.
+ * For hardware interfaces counts packets which host was able to successfully
+ * hand over to the device, which does not necessarily mean that packets
+ * had been successfully transmitted out of the device, only that device
+ * acknowledged it copied them out of host memory.
+ *
+ * @rx_bytes: Number of good received bytes, corresponding to @rx_packets.
+ *
+ * For IEEE 802.3 devices should count the length of Ethernet Frames
+ * excluding the FCS.
+ *
+ * @tx_bytes: Number of good transmitted bytes, corresponding to @tx_packets.
+ *
+ * For IEEE 802.3 devices should count the length of Ethernet Frames
+ * excluding the FCS.
+ *
+ * @rx_errors: Total number of bad packets received on this network device.
+ * This counter must include events counted by @rx_length_errors,
+ * @rx_crc_errors, @rx_frame_errors and other errors not otherwise
+ * counted.
+ *
+ * @tx_errors: Total number of transmit problems.
+ * This counter must include events counter by @tx_aborted_errors,
+ * @tx_carrier_errors, @tx_fifo_errors, @tx_heartbeat_errors,
+ * @tx_window_errors and other errors not otherwise counted.
+ *
+ * @rx_dropped: Number of packets received but not processed,
+ * e.g. due to lack of resources or unsupported protocol.
+ * For hardware interfaces this counter may include packets discarded
+ * due to L2 address filtering but should not include packets dropped
+ * by the device due to buffer exhaustion which are counted separately in
+ * @rx_missed_errors (since procfs folds those two counters together).
+ *
+ * @tx_dropped: Number of packets dropped on their way to transmission,
+ * e.g. due to lack of resources.
+ *
+ * @multicast: Multicast packets received.
+ * For hardware interfaces this statistic is commonly calculated
+ * at the device level (unlike @rx_packets) and therefore may include
+ * packets which did not reach the host.
+ *
+ * For IEEE 802.3 devices this counter may be equivalent to:
+ *
+ * - 30.3.1.1.21 aMulticastFramesReceivedOK
+ *
+ * @collisions: Number of collisions during packet transmissions.
+ *
+ * @rx_length_errors: Number of packets dropped due to invalid length.
+ * Part of aggregate "frame" errors in `/proc/net/dev`.
+ *
+ * For IEEE 802.3 devices this counter should be equivalent to a sum
+ * of the following attributes:
+ *
+ * - 30.3.1.1.23 aInRangeLengthErrors
+ * - 30.3.1.1.24 aOutOfRangeLengthField
+ * - 30.3.1.1.25 aFrameTooLongErrors
+ *
+ * @rx_over_errors: Receiver FIFO overflow event counter.
+ *
+ * Historically the count of overflow events. Such events may be
+ * reported in the receive descriptors or via interrupts, and may
+ * not correspond one-to-one with dropped packets.
+ *
+ * The recommended interpretation for high speed interfaces is -
+ * number of packets dropped because they did not fit into buffers
+ * provided by the host, e.g. packets larger than MTU or next buffer
+ * in the ring was not available for a scatter transfer.
+ *
+ * Part of aggregate "frame" errors in `/proc/net/dev`.
+ *
+ * This statistics was historically used interchangeably with
+ * @rx_fifo_errors.
+ *
+ * This statistic corresponds to hardware events and is not commonly used
+ * on software devices.
+ *
+ * @rx_crc_errors: Number of packets received with a CRC error.
+ * Part of aggregate "frame" errors in `/proc/net/dev`.
+ *
+ * For IEEE 802.3 devices this counter must be equivalent to:
+ *
+ * - 30.3.1.1.6 aFrameCheckSequenceErrors
+ *
+ * @rx_frame_errors: Receiver frame alignment errors.
+ * Part of aggregate "frame" errors in `/proc/net/dev`.
+ *
+ * For IEEE 802.3 devices this counter should be equivalent to:
+ *
+ * - 30.3.1.1.7 aAlignmentErrors
+ *
+ * @rx_fifo_errors: Receiver FIFO error counter.
+ *
+ * Historically the count of overflow events. Those events may be
+ * reported in the receive descriptors or via interrupts, and may
+ * not correspond one-to-one with dropped packets.
+ *
+ * This statistics was used interchangeably with @rx_over_errors.
+ * Not recommended for use in drivers for high speed interfaces.
+ *
+ * This statistic is used on software devices, e.g. to count software
+ * packet queue overflow (can) or sequencing errors (GRE).
+ *
+ * @rx_missed_errors: Count of packets missed by the host.
+ * Folded into the "drop" counter in `/proc/net/dev`.
+ *
+ * Counts number of packets dropped by the device due to lack
+ * of buffer space. This usually indicates that the host interface
+ * is slower than the network interface, or host is not keeping up
+ * with the receive packet rate.
+ *
+ * This statistic corresponds to hardware events and is not used
+ * on software devices.
+ *
+ * @tx_aborted_errors:
+ * Part of aggregate "carrier" errors in `/proc/net/dev`.
+ * For IEEE 802.3 devices capable of half-duplex operation this counter
+ * must be equivalent to:
+ *
+ * - 30.3.1.1.11 aFramesAbortedDueToXSColls
+ *
+ * High speed interfaces may use this counter as a general device
+ * discard counter.
+ *
+ * @tx_carrier_errors: Number of frame transmission errors due to loss
+ * of carrier during transmission.
+ * Part of aggregate "carrier" errors in `/proc/net/dev`.
+ *
+ * For IEEE 802.3 devices this counter must be equivalent to:
+ *
+ * - 30.3.1.1.13 aCarrierSenseErrors
+ *
+ * @tx_fifo_errors: Number of frame transmission errors due to device
+ * FIFO underrun / underflow. This condition occurs when the device
+ * begins transmission of a frame but is unable to deliver the
+ * entire frame to the transmitter in time for transmission.
+ * Part of aggregate "carrier" errors in `/proc/net/dev`.
+ *
+ * @tx_heartbeat_errors: Number of Heartbeat / SQE Test errors for
+ * old half-duplex Ethernet.
+ * Part of aggregate "carrier" errors in `/proc/net/dev`.
+ *
+ * For IEEE 802.3 devices possibly equivalent to:
+ *
+ * - 30.3.2.1.4 aSQETestErrors
+ *
+ * @tx_window_errors: Number of frame transmission errors due
+ * to late collisions (for Ethernet - after the first 64B of transmission).
+ * Part of aggregate "carrier" errors in `/proc/net/dev`.
+ *
+ * For IEEE 802.3 devices this counter must be equivalent to:
+ *
+ * - 30.3.1.1.10 aLateCollisions
+ *
+ * @rx_compressed: Number of correctly received compressed packets.
+ * This counters is only meaningful for interfaces which support
+ * packet compression (e.g. CSLIP, PPP).
+ *
+ * @tx_compressed: Number of transmitted compressed packets.
+ * This counters is only meaningful for interfaces which support
+ * packet compression (e.g. CSLIP, PPP).
+ *
+ * @rx_nohandler: Number of packets received on the interface
+ * but dropped by the networking stack because the device is
+ * not designated to receive packets (e.g. backup link in a bond).
+ */
struct rtnl_link_stats64 {
- __u64 rx_packets; /* total packets received */
- __u64 tx_packets; /* total packets transmitted */
- __u64 rx_bytes; /* total bytes received */
- __u64 tx_bytes; /* total bytes transmitted */
- __u64 rx_errors; /* bad packets received */
- __u64 tx_errors; /* packet transmit problems */
- __u64 rx_dropped; /* no space in linux buffers */
- __u64 tx_dropped; /* no space available in linux */
- __u64 multicast; /* multicast packets received */
+ __u64 rx_packets;
+ __u64 tx_packets;
+ __u64 rx_bytes;
+ __u64 tx_bytes;
+ __u64 rx_errors;
+ __u64 tx_errors;
+ __u64 rx_dropped;
+ __u64 tx_dropped;
+ __u64 multicast;
__u64 collisions;
/* detailed rx_errors: */
__u64 rx_length_errors;
- __u64 rx_over_errors; /* receiver ring buff overflow */
- __u64 rx_crc_errors; /* recved pkt with crc error */
- __u64 rx_frame_errors; /* recv'd frame alignment error */
- __u64 rx_fifo_errors; /* recv'r fifo overrun */
- __u64 rx_missed_errors; /* receiver missed packet */
+ __u64 rx_over_errors;
+ __u64 rx_crc_errors;
+ __u64 rx_frame_errors;
+ __u64 rx_fifo_errors;
+ __u64 rx_missed_errors;
/* detailed tx_errors */
__u64 tx_aborted_errors;
@@ -71,8 +242,7 @@ struct rtnl_link_stats64 {
/* for cslip etc */
__u64 rx_compressed;
__u64 tx_compressed;
-
- __u64 rx_nohandler; /* dropped, no handler found */
+ __u64 rx_nohandler;
};
/* The struct should be in sync with struct ifmap */
@@ -170,12 +340,30 @@ enum {
IFLA_PROP_LIST,
IFLA_ALT_IFNAME, /* Alternative ifname */
IFLA_PERM_ADDRESS,
+ IFLA_PROTO_DOWN_REASON,
+
+ /* device (sysfs) name as parent, used instead
+ * of IFLA_LINK where there's no parent netdev
+ */
+ IFLA_PARENT_DEV_NAME,
+ IFLA_PARENT_DEV_BUS_NAME,
+ IFLA_GRO_MAX_SIZE,
+
__IFLA_MAX
};
#define IFLA_MAX (__IFLA_MAX - 1)
+enum {
+ IFLA_PROTO_DOWN_REASON_UNSPEC,
+ IFLA_PROTO_DOWN_REASON_MASK, /* u32, mask for reason bits */
+ IFLA_PROTO_DOWN_REASON_VALUE, /* u32, reason bit value */
+
+ __IFLA_PROTO_DOWN_REASON_CNT,
+ IFLA_PROTO_DOWN_REASON_MAX = __IFLA_PROTO_DOWN_REASON_CNT - 1
+};
+
/* backwards compatibility for userspace */
#ifndef __KERNEL__
#define IFLA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifinfomsg))))
@@ -293,6 +481,7 @@ enum {
IFLA_BR_MCAST_MLD_VERSION,
IFLA_BR_VLAN_STATS_PER_PORT,
IFLA_BR_MULTI_BOOLOPT,
+ IFLA_BR_MCAST_QUERIER_STATE,
__IFLA_BR_MAX,
};
@@ -346,6 +535,8 @@ enum {
IFLA_BRPORT_BACKUP_PORT,
IFLA_BRPORT_MRP_RING_OPEN,
IFLA_BRPORT_MRP_IN_OPEN,
+ IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT,
+ IFLA_BRPORT_MCAST_EHT_HOSTS_CNT,
__IFLA_BRPORT_MAX
};
#define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
@@ -433,6 +624,7 @@ enum macvlan_macaddr_mode {
};
#define MACVLAN_FLAG_NOPROMISC 1
+#define MACVLAN_FLAG_NODST 2 /* skip dst macvlan if matching src macvlan */
/* VRF section */
enum {
@@ -597,6 +789,18 @@ enum ifla_geneve_df {
GENEVE_DF_MAX = __GENEVE_DF_END - 1,
};
+/* Bareudp section */
+enum {
+ IFLA_BAREUDP_UNSPEC,
+ IFLA_BAREUDP_PORT,
+ IFLA_BAREUDP_ETHERTYPE,
+ IFLA_BAREUDP_SRCPORT_MIN,
+ IFLA_BAREUDP_MULTIPROTO_MODE,
+ __IFLA_BAREUDP_MAX
+};
+
+#define IFLA_BAREUDP_MAX (__IFLA_BAREUDP_MAX - 1)
+
/* PPP section */
enum {
IFLA_PPP_UNSPEC,
@@ -655,6 +859,7 @@ enum {
IFLA_BOND_TLB_DYNAMIC_LB,
IFLA_BOND_PEER_NOTIF_DELAY,
IFLA_BOND_AD_LACP_ACTIVE,
+ IFLA_BOND_MISSED_MAX,
__IFLA_BOND_MAX,
};
@@ -899,7 +1104,14 @@ enum {
#define IFLA_IPOIB_MAX (__IFLA_IPOIB_MAX - 1)
-/* HSR section */
+/* HSR/PRP section, both uses same interface */
+
+/* Different redundancy protocols for hsr device */
+enum {
+ HSR_PROTOCOL_HSR,
+ HSR_PROTOCOL_PRP,
+ HSR_PROTOCOL_MAX,
+};
enum {
IFLA_HSR_UNSPEC,
@@ -909,6 +1121,9 @@ enum {
IFLA_HSR_SUPERVISION_ADDR, /* Supervision frame multicast addr */
IFLA_HSR_SEQ_NR,
IFLA_HSR_VERSION, /* HSR version */
+ IFLA_HSR_PROTOCOL, /* Indicate different protocol than
+ * HSR. For example PRP.
+ */
__IFLA_HSR_MAX,
};
@@ -1033,6 +1248,8 @@ enum {
#define RMNET_FLAGS_INGRESS_MAP_COMMANDS (1U << 1)
#define RMNET_FLAGS_INGRESS_MAP_CKSUMV4 (1U << 2)
#define RMNET_FLAGS_EGRESS_MAP_CKSUMV4 (1U << 3)
+#define RMNET_FLAGS_INGRESS_MAP_CKSUMV5 (1U << 4)
+#define RMNET_FLAGS_EGRESS_MAP_CKSUMV5 (1U << 5)
enum {
IFLA_RMNET_UNSPEC,
@@ -1048,4 +1265,14 @@ struct ifla_rmnet_flags {
__u32 mask;
};
+/* MCTP section */
+
+enum {
+ IFLA_MCTP_UNSPEC,
+ IFLA_MCTP_NET,
+ __IFLA_MCTP_MAX,
+};
+
+#define IFLA_MCTP_MAX (__IFLA_MCTP_MAX - 1)
+
#endif /* _UAPI_LINUX_IF_LINK_H */
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 1daa45268de2..b46bcdb0cab1 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -1131,6 +1131,9 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204
#define KVM_CAP_ARM_MTE 205
#define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206
+#define KVM_CAP_VM_GPA_BITS 207
+#define KVM_CAP_XSAVE2 208
+#define KVM_CAP_SYS_ATTRIBUTES 209
#ifdef KVM_CAP_IRQ_ROUTING
@@ -1162,11 +1165,20 @@ struct kvm_irq_routing_hv_sint {
__u32 sint;
};
+struct kvm_irq_routing_xen_evtchn {
+ __u32 port;
+ __u32 vcpu;
+ __u32 priority;
+};
+
+#define KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL ((__u32)(-1))
+
/* gsi routing entry types */
#define KVM_IRQ_ROUTING_IRQCHIP 1
#define KVM_IRQ_ROUTING_MSI 2
#define KVM_IRQ_ROUTING_S390_ADAPTER 3
#define KVM_IRQ_ROUTING_HV_SINT 4
+#define KVM_IRQ_ROUTING_XEN_EVTCHN 5
struct kvm_irq_routing_entry {
__u32 gsi;
@@ -1178,6 +1190,7 @@ struct kvm_irq_routing_entry {
struct kvm_irq_routing_msi msi;
struct kvm_irq_routing_s390_adapter adapter;
struct kvm_irq_routing_hv_sint hv_sint;
+ struct kvm_irq_routing_xen_evtchn xen_evtchn;
__u32 pad[8];
} u;
};
@@ -1208,6 +1221,7 @@ struct kvm_x86_mce {
#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1)
#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2)
#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3)
+#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4)
struct kvm_xen_hvm_config {
__u32 flags;
@@ -1610,6 +1624,9 @@ struct kvm_enc_region {
#define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3)
#define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4)
+/* Available with KVM_CAP_XSAVE2 */
+#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave)
+
struct kvm_s390_pv_sec_parm {
__u64 origin;
__u64 length;
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index bd8860eeb291..4cd39aaccbe7 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -1332,7 +1332,10 @@ union perf_mem_data_src {
/* hop level */
#define PERF_MEM_HOPS_0 0x01 /* remote core, same node */
-/* 2-7 available */
+#define PERF_MEM_HOPS_1 0x02 /* remote node, same socket */
+#define PERF_MEM_HOPS_2 0x03 /* remote socket, same board */
+#define PERF_MEM_HOPS_3 0x04 /* remote board */
+/* 5-7 available */
#define PERF_MEM_HOPS_SHIFT 43
#define PERF_MEM_S(a, s) \
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index b393b5e82380..f947b61b2107 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -84,11 +84,13 @@ else
endif
# Append required CFLAGS
+override CFLAGS += -std=gnu89
override CFLAGS += $(EXTRA_WARNINGS) -Wno-switch-enum
override CFLAGS += -Werror -Wall
override CFLAGS += $(INCLUDES)
override CFLAGS += -fvisibility=hidden
override CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
+override CFLAGS += $(CLANG_CROSS_FLAGS)
# flags specific for shared library
SHLIB_FLAGS := -DSHARED -fPIC
@@ -161,7 +163,7 @@ $(BPF_HELPER_DEFS): $(srctree)/tools/include/uapi/linux/bpf.h
$(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION)
$(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN_SHARED) $(VERSION_SCRIPT)
- $(QUIET_LINK)$(CC) $(LDFLAGS) \
+ $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) \
--shared -Wl,-soname,libbpf.so.$(LIBBPF_MAJOR_VERSION) \
-Wl,--version-script=$(VERSION_SCRIPT) $< -lelf -lz -o $@
@ln -sf $(@F) $(OUTPUT)libbpf.so
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 725701235fd8..550b4cbb6c99 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -28,6 +28,9 @@
#include <asm/unistd.h>
#include <errno.h>
#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <limits.h>
+#include <sys/resource.h>
#include "bpf.h"
#include "libbpf.h"
#include "libbpf_internal.h"
@@ -49,6 +52,12 @@
# define __NR_bpf 351
# elif defined(__arc__)
# define __NR_bpf 280
+# elif defined(__mips__) && defined(_ABIO32)
+# define __NR_bpf 4355
+# elif defined(__mips__) && defined(_ABIN32)
+# define __NR_bpf 6319
+# elif defined(__mips__) && defined(_ABI64)
+# define __NR_bpf 5315
# else
# error __NR_bpf not defined. libbpf does not support your arch.
# endif
@@ -74,158 +83,208 @@ static inline int sys_bpf_fd(enum bpf_cmd cmd, union bpf_attr *attr,
return ensure_good_fd(fd);
}
-static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size)
+#define PROG_LOAD_ATTEMPTS 5
+
+static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts)
{
- int retries = 5;
int fd;
do {
fd = sys_bpf_fd(BPF_PROG_LOAD, attr, size);
- } while (fd < 0 && errno == EAGAIN && retries-- > 0);
+ } while (fd < 0 && errno == EAGAIN && --attempts > 0);
return fd;
}
-int libbpf__bpf_create_map_xattr(const struct bpf_create_map_params *create_attr)
+/* Probe whether kernel switched from memlock-based (RLIMIT_MEMLOCK) to
+ * memcg-based memory accounting for BPF maps and progs. This was done in [0].
+ * We use the support for bpf_ktime_get_coarse_ns() helper, which was added in
+ * the same 5.11 Linux release ([1]), to detect memcg-based accounting for BPF.
+ *
+ * [0] https://lore.kernel.org/bpf/20201201215900.3569844-1-guro@fb.com/
+ * [1] d05512618056 ("bpf: Add bpf_ktime_get_coarse_ns helper")
+ */
+int probe_memcg_account(void)
+{
+ const size_t prog_load_attr_sz = offsetofend(union bpf_attr, attach_btf_obj_fd);
+ struct bpf_insn insns[] = {
+ BPF_EMIT_CALL(BPF_FUNC_ktime_get_coarse_ns),
+ BPF_EXIT_INSN(),
+ };
+ size_t insn_cnt = sizeof(insns) / sizeof(insns[0]);
+ union bpf_attr attr;
+ int prog_fd;
+
+ /* attempt loading freplace trying to use custom BTF */
+ memset(&attr, 0, prog_load_attr_sz);
+ attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+ attr.insns = ptr_to_u64(insns);
+ attr.insn_cnt = insn_cnt;
+ attr.license = ptr_to_u64("GPL");
+
+ prog_fd = sys_bpf_fd(BPF_PROG_LOAD, &attr, prog_load_attr_sz);
+ if (prog_fd >= 0) {
+ close(prog_fd);
+ return 1;
+ }
+ return 0;
+}
+
+static bool memlock_bumped;
+static rlim_t memlock_rlim = RLIM_INFINITY;
+
+int libbpf_set_memlock_rlim(size_t memlock_bytes)
+{
+ if (memlock_bumped)
+ return libbpf_err(-EBUSY);
+
+ memlock_rlim = memlock_bytes;
+ return 0;
+}
+
+int bump_rlimit_memlock(void)
+{
+ struct rlimit rlim;
+
+ /* this the default in libbpf 1.0, but for now user has to opt-in explicitly */
+ if (!(libbpf_mode & LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK))
+ return 0;
+
+ /* if kernel supports memcg-based accounting, skip bumping RLIMIT_MEMLOCK */
+ if (memlock_bumped || kernel_supports(NULL, FEAT_MEMCG_ACCOUNT))
+ return 0;
+
+ memlock_bumped = true;
+
+ /* zero memlock_rlim_max disables auto-bumping RLIMIT_MEMLOCK */
+ if (memlock_rlim == 0)
+ return 0;
+
+ rlim.rlim_cur = rlim.rlim_max = memlock_rlim;
+ if (setrlimit(RLIMIT_MEMLOCK, &rlim))
+ return -errno;
+
+ return 0;
+}
+
+int bpf_map_create(enum bpf_map_type map_type,
+ const char *map_name,
+ __u32 key_size,
+ __u32 value_size,
+ __u32 max_entries,
+ const struct bpf_map_create_opts *opts)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, map_extra);
union bpf_attr attr;
int fd;
- memset(&attr, '\0', sizeof(attr));
-
- attr.map_type = create_attr->map_type;
- attr.key_size = create_attr->key_size;
- attr.value_size = create_attr->value_size;
- attr.max_entries = create_attr->max_entries;
- attr.map_flags = create_attr->map_flags;
- if (create_attr->name)
- memcpy(attr.map_name, create_attr->name,
- min(strlen(create_attr->name), BPF_OBJ_NAME_LEN - 1));
- attr.numa_node = create_attr->numa_node;
- attr.btf_fd = create_attr->btf_fd;
- attr.btf_key_type_id = create_attr->btf_key_type_id;
- attr.btf_value_type_id = create_attr->btf_value_type_id;
- attr.map_ifindex = create_attr->map_ifindex;
- if (attr.map_type == BPF_MAP_TYPE_STRUCT_OPS)
- attr.btf_vmlinux_value_type_id =
- create_attr->btf_vmlinux_value_type_id;
- else
- attr.inner_map_fd = create_attr->inner_map_fd;
- attr.map_extra = create_attr->map_extra;
+ bump_rlimit_memlock();
+
+ memset(&attr, 0, attr_sz);
+
+ if (!OPTS_VALID(opts, bpf_map_create_opts))
+ return libbpf_err(-EINVAL);
+
+ attr.map_type = map_type;
+ if (map_name)
+ libbpf_strlcpy(attr.map_name, map_name, sizeof(attr.map_name));
+ attr.key_size = key_size;
+ attr.value_size = value_size;
+ attr.max_entries = max_entries;
- fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, sizeof(attr));
+ attr.btf_fd = OPTS_GET(opts, btf_fd, 0);
+ attr.btf_key_type_id = OPTS_GET(opts, btf_key_type_id, 0);
+ attr.btf_value_type_id = OPTS_GET(opts, btf_value_type_id, 0);
+ attr.btf_vmlinux_value_type_id = OPTS_GET(opts, btf_vmlinux_value_type_id, 0);
+
+ attr.inner_map_fd = OPTS_GET(opts, inner_map_fd, 0);
+ attr.map_flags = OPTS_GET(opts, map_flags, 0);
+ attr.map_extra = OPTS_GET(opts, map_extra, 0);
+ attr.numa_node = OPTS_GET(opts, numa_node, 0);
+ attr.map_ifindex = OPTS_GET(opts, map_ifindex, 0);
+
+ fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, attr_sz);
return libbpf_err_errno(fd);
}
int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr)
{
- struct bpf_create_map_params p = {};
+ LIBBPF_OPTS(bpf_map_create_opts, p);
- p.map_type = create_attr->map_type;
- p.key_size = create_attr->key_size;
- p.value_size = create_attr->value_size;
- p.max_entries = create_attr->max_entries;
p.map_flags = create_attr->map_flags;
- p.name = create_attr->name;
p.numa_node = create_attr->numa_node;
p.btf_fd = create_attr->btf_fd;
p.btf_key_type_id = create_attr->btf_key_type_id;
p.btf_value_type_id = create_attr->btf_value_type_id;
p.map_ifindex = create_attr->map_ifindex;
- if (p.map_type == BPF_MAP_TYPE_STRUCT_OPS)
- p.btf_vmlinux_value_type_id =
- create_attr->btf_vmlinux_value_type_id;
+ if (create_attr->map_type == BPF_MAP_TYPE_STRUCT_OPS)
+ p.btf_vmlinux_value_type_id = create_attr->btf_vmlinux_value_type_id;
else
p.inner_map_fd = create_attr->inner_map_fd;
- return libbpf__bpf_create_map_xattr(&p);
+ return bpf_map_create(create_attr->map_type, create_attr->name,
+ create_attr->key_size, create_attr->value_size,
+ create_attr->max_entries, &p);
}
int bpf_create_map_node(enum bpf_map_type map_type, const char *name,
int key_size, int value_size, int max_entries,
__u32 map_flags, int node)
{
- struct bpf_create_map_attr map_attr = {};
-
- map_attr.name = name;
- map_attr.map_type = map_type;
- map_attr.map_flags = map_flags;
- map_attr.key_size = key_size;
- map_attr.value_size = value_size;
- map_attr.max_entries = max_entries;
+ LIBBPF_OPTS(bpf_map_create_opts, opts);
+
+ opts.map_flags = map_flags;
if (node >= 0) {
- map_attr.numa_node = node;
- map_attr.map_flags |= BPF_F_NUMA_NODE;
+ opts.numa_node = node;
+ opts.map_flags |= BPF_F_NUMA_NODE;
}
- return bpf_create_map_xattr(&map_attr);
+ return bpf_map_create(map_type, name, key_size, value_size, max_entries, &opts);
}
int bpf_create_map(enum bpf_map_type map_type, int key_size,
int value_size, int max_entries, __u32 map_flags)
{
- struct bpf_create_map_attr map_attr = {};
+ LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = map_flags);
- map_attr.map_type = map_type;
- map_attr.map_flags = map_flags;
- map_attr.key_size = key_size;
- map_attr.value_size = value_size;
- map_attr.max_entries = max_entries;
-
- return bpf_create_map_xattr(&map_attr);
+ return bpf_map_create(map_type, NULL, key_size, value_size, max_entries, &opts);
}
int bpf_create_map_name(enum bpf_map_type map_type, const char *name,
int key_size, int value_size, int max_entries,
__u32 map_flags)
{
- struct bpf_create_map_attr map_attr = {};
-
- map_attr.name = name;
- map_attr.map_type = map_type;
- map_attr.map_flags = map_flags;
- map_attr.key_size = key_size;
- map_attr.value_size = value_size;
- map_attr.max_entries = max_entries;
+ LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = map_flags);
- return bpf_create_map_xattr(&map_attr);
+ return bpf_map_create(map_type, name, key_size, value_size, max_entries, &opts);
}
int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name,
int key_size, int inner_map_fd, int max_entries,
__u32 map_flags, int node)
{
- union bpf_attr attr;
- int fd;
-
- memset(&attr, '\0', sizeof(attr));
-
- attr.map_type = map_type;
- attr.key_size = key_size;
- attr.value_size = 4;
- attr.inner_map_fd = inner_map_fd;
- attr.max_entries = max_entries;
- attr.map_flags = map_flags;
- if (name)
- memcpy(attr.map_name, name,
- min(strlen(name), BPF_OBJ_NAME_LEN - 1));
+ LIBBPF_OPTS(bpf_map_create_opts, opts);
+ opts.inner_map_fd = inner_map_fd;
+ opts.map_flags = map_flags;
if (node >= 0) {
- attr.map_flags |= BPF_F_NUMA_NODE;
- attr.numa_node = node;
+ opts.map_flags |= BPF_F_NUMA_NODE;
+ opts.numa_node = node;
}
- fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, sizeof(attr));
- return libbpf_err_errno(fd);
+ return bpf_map_create(map_type, name, key_size, 4, max_entries, &opts);
}
int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name,
int key_size, int inner_map_fd, int max_entries,
__u32 map_flags)
{
- return bpf_create_map_in_map_node(map_type, name, key_size,
- inner_map_fd, max_entries, map_flags,
- -1);
+ LIBBPF_OPTS(bpf_map_create_opts, opts,
+ .inner_map_fd = inner_map_fd,
+ .map_flags = map_flags,
+ );
+
+ return bpf_map_create(map_type, name, key_size, 4, max_entries, &opts);
}
static void *
@@ -253,58 +312,95 @@ alloc_zero_tailing_info(const void *orecord, __u32 cnt,
return info;
}
-int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr)
+DEFAULT_VERSION(bpf_prog_load_v0_6_0, bpf_prog_load, LIBBPF_0.6.0)
+int bpf_prog_load_v0_6_0(enum bpf_prog_type prog_type,
+ const char *prog_name, const char *license,
+ const struct bpf_insn *insns, size_t insn_cnt,
+ const struct bpf_prog_load_opts *opts)
{
void *finfo = NULL, *linfo = NULL;
+ const char *func_info, *line_info;
+ __u32 log_size, log_level, attach_prog_fd, attach_btf_obj_fd;
+ __u32 func_info_rec_size, line_info_rec_size;
+ int fd, attempts;
union bpf_attr attr;
- int fd;
+ char *log_buf;
+
+ bump_rlimit_memlock();
- if (!load_attr->log_buf != !load_attr->log_buf_sz)
+ if (!OPTS_VALID(opts, bpf_prog_load_opts))
return libbpf_err(-EINVAL);
- if (load_attr->log_level > (4 | 2 | 1) || (load_attr->log_level && !load_attr->log_buf))
+ attempts = OPTS_GET(opts, attempts, 0);
+ if (attempts < 0)
return libbpf_err(-EINVAL);
+ if (attempts == 0)
+ attempts = PROG_LOAD_ATTEMPTS;
memset(&attr, 0, sizeof(attr));
- attr.prog_type = load_attr->prog_type;
- attr.expected_attach_type = load_attr->expected_attach_type;
- if (load_attr->attach_prog_fd)
- attr.attach_prog_fd = load_attr->attach_prog_fd;
- else
- attr.attach_btf_obj_fd = load_attr->attach_btf_obj_fd;
- attr.attach_btf_id = load_attr->attach_btf_id;
+ attr.prog_type = prog_type;
+ attr.expected_attach_type = OPTS_GET(opts, expected_attach_type, 0);
- attr.prog_ifindex = load_attr->prog_ifindex;
- attr.kern_version = load_attr->kern_version;
+ attr.prog_btf_fd = OPTS_GET(opts, prog_btf_fd, 0);
+ attr.prog_flags = OPTS_GET(opts, prog_flags, 0);
+ attr.prog_ifindex = OPTS_GET(opts, prog_ifindex, 0);
+ attr.kern_version = OPTS_GET(opts, kern_version, 0);
- attr.insn_cnt = (__u32)load_attr->insn_cnt;
- attr.insns = ptr_to_u64(load_attr->insns);
- attr.license = ptr_to_u64(load_attr->license);
+ if (prog_name)
+ libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name));
+ attr.license = ptr_to_u64(license);
- attr.log_level = load_attr->log_level;
- if (attr.log_level) {
- attr.log_buf = ptr_to_u64(load_attr->log_buf);
- attr.log_size = load_attr->log_buf_sz;
- }
+ if (insn_cnt > UINT_MAX)
+ return libbpf_err(-E2BIG);
+
+ attr.insns = ptr_to_u64(insns);
+ attr.insn_cnt = (__u32)insn_cnt;
+
+ attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0);
+ attach_btf_obj_fd = OPTS_GET(opts, attach_btf_obj_fd, 0);
+
+ if (attach_prog_fd && attach_btf_obj_fd)
+ return libbpf_err(-EINVAL);
+
+ attr.attach_btf_id = OPTS_GET(opts, attach_btf_id, 0);
+ if (attach_prog_fd)
+ attr.attach_prog_fd = attach_prog_fd;
+ else
+ attr.attach_btf_obj_fd = attach_btf_obj_fd;
- attr.prog_btf_fd = load_attr->prog_btf_fd;
- attr.prog_flags = load_attr->prog_flags;
+ log_buf = OPTS_GET(opts, log_buf, NULL);
+ log_size = OPTS_GET(opts, log_size, 0);
+ log_level = OPTS_GET(opts, log_level, 0);
- attr.func_info_rec_size = load_attr->func_info_rec_size;
- attr.func_info_cnt = load_attr->func_info_cnt;
- attr.func_info = ptr_to_u64(load_attr->func_info);
+ if (!!log_buf != !!log_size)
+ return libbpf_err(-EINVAL);
+ if (log_level > (4 | 2 | 1))
+ return libbpf_err(-EINVAL);
+ if (log_level && !log_buf)
+ return libbpf_err(-EINVAL);
- attr.line_info_rec_size = load_attr->line_info_rec_size;
- attr.line_info_cnt = load_attr->line_info_cnt;
- attr.line_info = ptr_to_u64(load_attr->line_info);
- attr.fd_array = ptr_to_u64(load_attr->fd_array);
+ func_info_rec_size = OPTS_GET(opts, func_info_rec_size, 0);
+ func_info = OPTS_GET(opts, func_info, NULL);
+ attr.func_info_rec_size = func_info_rec_size;
+ attr.func_info = ptr_to_u64(func_info);
+ attr.func_info_cnt = OPTS_GET(opts, func_info_cnt, 0);
- if (load_attr->name)
- memcpy(attr.prog_name, load_attr->name,
- min(strlen(load_attr->name), (size_t)BPF_OBJ_NAME_LEN - 1));
+ line_info_rec_size = OPTS_GET(opts, line_info_rec_size, 0);
+ line_info = OPTS_GET(opts, line_info, NULL);
+ attr.line_info_rec_size = line_info_rec_size;
+ attr.line_info = ptr_to_u64(line_info);
+ attr.line_info_cnt = OPTS_GET(opts, line_info_cnt, 0);
+
+ attr.fd_array = ptr_to_u64(OPTS_GET(opts, fd_array, NULL));
+
+ if (log_level) {
+ attr.log_buf = ptr_to_u64(log_buf);
+ attr.log_size = log_size;
+ attr.log_level = log_level;
+ }
- fd = sys_bpf_prog_load(&attr, sizeof(attr));
+ fd = sys_bpf_prog_load(&attr, sizeof(attr), attempts);
if (fd >= 0)
return fd;
@@ -314,11 +410,11 @@ int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr)
*/
while (errno == E2BIG && (!finfo || !linfo)) {
if (!finfo && attr.func_info_cnt &&
- attr.func_info_rec_size < load_attr->func_info_rec_size) {
+ attr.func_info_rec_size < func_info_rec_size) {
/* try with corrected func info records */
- finfo = alloc_zero_tailing_info(load_attr->func_info,
- load_attr->func_info_cnt,
- load_attr->func_info_rec_size,
+ finfo = alloc_zero_tailing_info(func_info,
+ attr.func_info_cnt,
+ func_info_rec_size,
attr.func_info_rec_size);
if (!finfo) {
errno = E2BIG;
@@ -326,13 +422,12 @@ int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr)
}
attr.func_info = ptr_to_u64(finfo);
- attr.func_info_rec_size = load_attr->func_info_rec_size;
+ attr.func_info_rec_size = func_info_rec_size;
} else if (!linfo && attr.line_info_cnt &&
- attr.line_info_rec_size <
- load_attr->line_info_rec_size) {
- linfo = alloc_zero_tailing_info(load_attr->line_info,
- load_attr->line_info_cnt,
- load_attr->line_info_rec_size,
+ attr.line_info_rec_size < line_info_rec_size) {
+ linfo = alloc_zero_tailing_info(line_info,
+ attr.line_info_cnt,
+ line_info_rec_size,
attr.line_info_rec_size);
if (!linfo) {
errno = E2BIG;
@@ -340,26 +435,27 @@ int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr)
}
attr.line_info = ptr_to_u64(linfo);
- attr.line_info_rec_size = load_attr->line_info_rec_size;
+ attr.line_info_rec_size = line_info_rec_size;
} else {
break;
}
- fd = sys_bpf_prog_load(&attr, sizeof(attr));
+ fd = sys_bpf_prog_load(&attr, sizeof(attr), attempts);
if (fd >= 0)
goto done;
}
- if (load_attr->log_level || !load_attr->log_buf)
- goto done;
+ if (log_level == 0 && log_buf) {
+ /* log_level == 0 with non-NULL log_buf requires retrying on error
+ * with log_level == 1 and log_buf/log_buf_size set, to get details of
+ * failure
+ */
+ attr.log_buf = ptr_to_u64(log_buf);
+ attr.log_size = log_size;
+ attr.log_level = 1;
- /* Try again with log */
- attr.log_buf = ptr_to_u64(load_attr->log_buf);
- attr.log_size = load_attr->log_buf_sz;
- attr.log_level = 1;
- load_attr->log_buf[0] = 0;
-
- fd = sys_bpf_prog_load(&attr, sizeof(attr));
+ fd = sys_bpf_prog_load(&attr, sizeof(attr), attempts);
+ }
done:
/* free() doesn't affect errno, so we don't need to restore it */
free(finfo);
@@ -367,17 +463,20 @@ done:
return libbpf_err_errno(fd);
}
+__attribute__((alias("bpf_load_program_xattr2")))
int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
- char *log_buf, size_t log_buf_sz)
+ char *log_buf, size_t log_buf_sz);
+
+static int bpf_load_program_xattr2(const struct bpf_load_program_attr *load_attr,
+ char *log_buf, size_t log_buf_sz)
{
- struct bpf_prog_load_params p = {};
+ LIBBPF_OPTS(bpf_prog_load_opts, p);
if (!load_attr || !log_buf != !log_buf_sz)
return libbpf_err(-EINVAL);
- p.prog_type = load_attr->prog_type;
p.expected_attach_type = load_attr->expected_attach_type;
- switch (p.prog_type) {
+ switch (load_attr->prog_type) {
case BPF_PROG_TYPE_STRUCT_OPS:
case BPF_PROG_TYPE_LSM:
p.attach_btf_id = load_attr->attach_btf_id;
@@ -391,12 +490,9 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
p.prog_ifindex = load_attr->prog_ifindex;
p.kern_version = load_attr->kern_version;
}
- p.insn_cnt = load_attr->insns_cnt;
- p.insns = load_attr->insns;
- p.license = load_attr->license;
p.log_level = load_attr->log_level;
p.log_buf = log_buf;
- p.log_buf_sz = log_buf_sz;
+ p.log_size = log_buf_sz;
p.prog_btf_fd = load_attr->prog_btf_fd;
p.func_info_rec_size = load_attr->func_info_rec_size;
p.func_info_cnt = load_attr->func_info_cnt;
@@ -404,10 +500,10 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
p.line_info_rec_size = load_attr->line_info_rec_size;
p.line_info_cnt = load_attr->line_info_cnt;
p.line_info = load_attr->line_info;
- p.name = load_attr->name;
p.prog_flags = load_attr->prog_flags;
- return libbpf__bpf_prog_load(&p);
+ return bpf_prog_load(load_attr->prog_type, load_attr->name, load_attr->license,
+ load_attr->insns, load_attr->insns_cnt, &p);
}
int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
@@ -426,7 +522,7 @@ int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
load_attr.license = license;
load_attr.kern_version = kern_version;
- return bpf_load_program_xattr(&load_attr, log_buf, log_buf_sz);
+ return bpf_load_program_xattr2(&load_attr, log_buf, log_buf_sz);
}
int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
@@ -437,6 +533,8 @@ int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
union bpf_attr attr;
int fd;
+ bump_rlimit_memlock();
+
memset(&attr, 0, sizeof(attr));
attr.prog_type = type;
attr.insn_cnt = (__u32)insns_cnt;
@@ -449,7 +547,7 @@ int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
attr.kern_version = kern_version;
attr.prog_flags = prog_flags;
- fd = sys_bpf_prog_load(&attr, sizeof(attr));
+ fd = sys_bpf_prog_load(&attr, sizeof(attr), PROG_LOAD_ATTEMPTS);
return libbpf_err_errno(fd);
}
@@ -593,11 +691,11 @@ static int bpf_map_batch_common(int cmd, int fd, void *in_batch,
return libbpf_err_errno(ret);
}
-int bpf_map_delete_batch(int fd, void *keys, __u32 *count,
+int bpf_map_delete_batch(int fd, const void *keys, __u32 *count,
const struct bpf_map_batch_opts *opts)
{
return bpf_map_batch_common(BPF_MAP_DELETE_BATCH, fd, NULL,
- NULL, keys, NULL, count, opts);
+ NULL, (void *)keys, NULL, count, opts);
}
int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch, void *keys,
@@ -617,11 +715,11 @@ int bpf_map_lookup_and_delete_batch(int fd, void *in_batch, void *out_batch,
count, opts);
}
-int bpf_map_update_batch(int fd, void *keys, void *values, __u32 *count,
+int bpf_map_update_batch(int fd, const void *keys, const void *values, __u32 *count,
const struct bpf_map_batch_opts *opts)
{
return bpf_map_batch_common(BPF_MAP_UPDATE_BATCH, fd, NULL, NULL,
- keys, values, count, opts);
+ (void *)keys, (void *)values, count, opts);
}
int bpf_obj_pin(int fd, const char *pathname)
@@ -1028,24 +1126,67 @@ int bpf_raw_tracepoint_open(const char *name, int prog_fd)
return libbpf_err_errno(fd);
}
-int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size,
- bool do_log)
+int bpf_btf_load(const void *btf_data, size_t btf_size, const struct bpf_btf_load_opts *opts)
{
- union bpf_attr attr = {};
+ const size_t attr_sz = offsetofend(union bpf_attr, btf_log_level);
+ union bpf_attr attr;
+ char *log_buf;
+ size_t log_size;
+ __u32 log_level;
int fd;
- attr.btf = ptr_to_u64(btf);
+ bump_rlimit_memlock();
+
+ memset(&attr, 0, attr_sz);
+
+ if (!OPTS_VALID(opts, bpf_btf_load_opts))
+ return libbpf_err(-EINVAL);
+
+ log_buf = OPTS_GET(opts, log_buf, NULL);
+ log_size = OPTS_GET(opts, log_size, 0);
+ log_level = OPTS_GET(opts, log_level, 0);
+
+ if (log_size > UINT_MAX)
+ return libbpf_err(-EINVAL);
+ if (log_size && !log_buf)
+ return libbpf_err(-EINVAL);
+
+ attr.btf = ptr_to_u64(btf_data);
attr.btf_size = btf_size;
+ /* log_level == 0 and log_buf != NULL means "try loading without
+ * log_buf, but retry with log_buf and log_level=1 on error", which is
+ * consistent across low-level and high-level BTF and program loading
+ * APIs within libbpf and provides a sensible behavior in practice
+ */
+ if (log_level) {
+ attr.btf_log_buf = ptr_to_u64(log_buf);
+ attr.btf_log_size = (__u32)log_size;
+ attr.btf_log_level = log_level;
+ }
-retry:
- if (do_log && log_buf && log_buf_size) {
- attr.btf_log_level = 1;
- attr.btf_log_size = log_buf_size;
+ fd = sys_bpf_fd(BPF_BTF_LOAD, &attr, attr_sz);
+ if (fd < 0 && log_buf && log_level == 0) {
attr.btf_log_buf = ptr_to_u64(log_buf);
+ attr.btf_log_size = (__u32)log_size;
+ attr.btf_log_level = 1;
+ fd = sys_bpf_fd(BPF_BTF_LOAD, &attr, attr_sz);
}
+ return libbpf_err_errno(fd);
+}
- fd = sys_bpf_fd(BPF_BTF_LOAD, &attr, sizeof(attr));
+int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size, bool do_log)
+{
+ LIBBPF_OPTS(bpf_btf_load_opts, opts);
+ int fd;
+
+retry:
+ if (do_log && log_buf && log_buf_size) {
+ opts.log_buf = log_buf;
+ opts.log_size = log_buf_size;
+ opts.log_level = 1;
+ }
+ fd = bpf_btf_load(btf, btf_size, &opts);
if (fd < 0 && !do_log && log_buf && log_buf_size) {
do_log = true;
goto retry;
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 6fffb3cdf39b..14e0d97ad2cf 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -29,11 +29,38 @@
#include <stdint.h>
#include "libbpf_common.h"
+#include "libbpf_legacy.h"
#ifdef __cplusplus
extern "C" {
#endif
+int libbpf_set_memlock_rlim(size_t memlock_bytes);
+
+struct bpf_map_create_opts {
+ size_t sz; /* size of this struct for forward/backward compatibility */
+
+ __u32 btf_fd;
+ __u32 btf_key_type_id;
+ __u32 btf_value_type_id;
+ __u32 btf_vmlinux_value_type_id;
+
+ __u32 inner_map_fd;
+ __u32 map_flags;
+ __u64 map_extra;
+
+ __u32 numa_node;
+ __u32 map_ifindex;
+};
+#define bpf_map_create_opts__last_field map_ifindex
+
+LIBBPF_API int bpf_map_create(enum bpf_map_type map_type,
+ const char *map_name,
+ __u32 key_size,
+ __u32 value_size,
+ __u32 max_entries,
+ const struct bpf_map_create_opts *opts);
+
struct bpf_create_map_attr {
const char *name;
enum bpf_map_type map_type;
@@ -52,25 +79,95 @@ struct bpf_create_map_attr {
};
};
-LIBBPF_API int
-bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr);
+LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead")
+LIBBPF_API int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr);
+LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead")
LIBBPF_API int bpf_create_map_node(enum bpf_map_type map_type, const char *name,
int key_size, int value_size,
int max_entries, __u32 map_flags, int node);
+LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead")
LIBBPF_API int bpf_create_map_name(enum bpf_map_type map_type, const char *name,
int key_size, int value_size,
int max_entries, __u32 map_flags);
+LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead")
LIBBPF_API int bpf_create_map(enum bpf_map_type map_type, int key_size,
int value_size, int max_entries, __u32 map_flags);
+LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead")
LIBBPF_API int bpf_create_map_in_map_node(enum bpf_map_type map_type,
const char *name, int key_size,
int inner_map_fd, int max_entries,
__u32 map_flags, int node);
+LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead")
LIBBPF_API int bpf_create_map_in_map(enum bpf_map_type map_type,
const char *name, int key_size,
int inner_map_fd, int max_entries,
__u32 map_flags);
+struct bpf_prog_load_opts {
+ size_t sz; /* size of this struct for forward/backward compatibility */
+
+ /* libbpf can retry BPF_PROG_LOAD command if bpf() syscall returns
+ * -EAGAIN. This field determines how many attempts libbpf has to
+ * make. If not specified, libbpf will use default value of 5.
+ */
+ int attempts;
+
+ enum bpf_attach_type expected_attach_type;
+ __u32 prog_btf_fd;
+ __u32 prog_flags;
+ __u32 prog_ifindex;
+ __u32 kern_version;
+
+ __u32 attach_btf_id;
+ __u32 attach_prog_fd;
+ __u32 attach_btf_obj_fd;
+
+ const int *fd_array;
+
+ /* .BTF.ext func info data */
+ const void *func_info;
+ __u32 func_info_cnt;
+ __u32 func_info_rec_size;
+
+ /* .BTF.ext line info data */
+ const void *line_info;
+ __u32 line_info_cnt;
+ __u32 line_info_rec_size;
+
+ /* verifier log options */
+ __u32 log_level;
+ __u32 log_size;
+ char *log_buf;
+};
+#define bpf_prog_load_opts__last_field log_buf
+
+LIBBPF_API int bpf_prog_load(enum bpf_prog_type prog_type,
+ const char *prog_name, const char *license,
+ const struct bpf_insn *insns, size_t insn_cnt,
+ const struct bpf_prog_load_opts *opts);
+/* this "specialization" should go away in libbpf 1.0 */
+LIBBPF_API int bpf_prog_load_v0_6_0(enum bpf_prog_type prog_type,
+ const char *prog_name, const char *license,
+ const struct bpf_insn *insns, size_t insn_cnt,
+ const struct bpf_prog_load_opts *opts);
+
+/* This is an elaborate way to not conflict with deprecated bpf_prog_load()
+ * API, defined in libbpf.h. Once we hit libbpf 1.0, all this will be gone.
+ * With this approach, if someone is calling bpf_prog_load() with
+ * 4 arguments, they will use the deprecated API, which keeps backwards
+ * compatibility (both source code and binary). If bpf_prog_load() is called
+ * with 6 arguments, though, it gets redirected to __bpf_prog_load.
+ * So looking forward to libbpf 1.0 when this hack will be gone and
+ * __bpf_prog_load() will be called just bpf_prog_load().
+ */
+#ifndef bpf_prog_load
+#define bpf_prog_load(...) ___libbpf_overload(___bpf_prog_load, __VA_ARGS__)
+#define ___bpf_prog_load4(file, type, pobj, prog_fd) \
+ bpf_prog_load_deprecated(file, type, pobj, prog_fd)
+#define ___bpf_prog_load6(prog_type, prog_name, license, insns, insn_cnt, opts) \
+ bpf_prog_load(prog_type, prog_name, license, insns, insn_cnt, opts)
+#endif /* bpf_prog_load */
+
struct bpf_load_program_attr {
enum bpf_prog_type prog_type;
enum bpf_attach_type expected_attach_type;
@@ -100,15 +197,18 @@ struct bpf_load_program_attr {
/* Flags to direct loading requirements */
#define MAPS_RELAX_COMPAT 0x01
-/* Recommend log buffer size */
+/* Recommended log buffer size */
#define BPF_LOG_BUF_SIZE (UINT32_MAX >> 8) /* verifier maximum in kernels <= 5.1 */
-LIBBPF_API int
-bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
- char *log_buf, size_t log_buf_sz);
+
+LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_load() instead")
+LIBBPF_API int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
+ char *log_buf, size_t log_buf_sz);
+LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_load() instead")
LIBBPF_API int bpf_load_program(enum bpf_prog_type type,
const struct bpf_insn *insns, size_t insns_cnt,
const char *license, __u32 kern_version,
char *log_buf, size_t log_buf_sz);
+LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_load() instead")
LIBBPF_API int bpf_verify_program(enum bpf_prog_type type,
const struct bpf_insn *insns,
size_t insns_cnt, __u32 prog_flags,
@@ -116,6 +216,23 @@ LIBBPF_API int bpf_verify_program(enum bpf_prog_type type,
char *log_buf, size_t log_buf_sz,
int log_level);
+struct bpf_btf_load_opts {
+ size_t sz; /* size of this struct for forward/backward compatibility */
+
+ /* kernel log options */
+ char *log_buf;
+ __u32 log_level;
+ __u32 log_size;
+};
+#define bpf_btf_load_opts__last_field log_size
+
+LIBBPF_API int bpf_btf_load(const void *btf_data, size_t btf_size,
+ const struct bpf_btf_load_opts *opts);
+
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_btf_load() instead")
+LIBBPF_API int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf,
+ __u32 log_buf_size, bool do_log);
+
LIBBPF_API int bpf_map_update_elem(int fd, const void *key, const void *value,
__u64 flags);
@@ -137,17 +254,128 @@ struct bpf_map_batch_opts {
};
#define bpf_map_batch_opts__last_field flags
-LIBBPF_API int bpf_map_delete_batch(int fd, void *keys,
+
+/**
+ * @brief **bpf_map_delete_batch()** allows for batch deletion of multiple
+ * elements in a BPF map.
+ *
+ * @param fd BPF map file descriptor
+ * @param keys pointer to an array of *count* keys
+ * @param count input and output parameter; on input **count** represents the
+ * number of elements in the map to delete in batch;
+ * on output if a non-EFAULT error is returned, **count** represents the number of deleted
+ * elements if the output **count** value is not equal to the input **count** value
+ * If EFAULT is returned, **count** should not be trusted to be correct.
+ * @param opts options for configuring the way the batch deletion works
+ * @return 0, on success; negative error code, otherwise (errno is also set to
+ * the error code)
+ */
+LIBBPF_API int bpf_map_delete_batch(int fd, const void *keys,
__u32 *count,
const struct bpf_map_batch_opts *opts);
+
+/**
+ * @brief **bpf_map_lookup_batch()** allows for batch lookup of BPF map elements.
+ *
+ * The parameter *in_batch* is the address of the first element in the batch to read.
+ * *out_batch* is an output parameter that should be passed as *in_batch* to subsequent
+ * calls to **bpf_map_lookup_batch()**. NULL can be passed for *in_batch* to indicate
+ * that the batched lookup starts from the beginning of the map.
+ *
+ * The *keys* and *values* are output parameters which must point to memory large enough to
+ * hold *count* items based on the key and value size of the map *map_fd*. The *keys*
+ * buffer must be of *key_size* * *count*. The *values* buffer must be of
+ * *value_size* * *count*.
+ *
+ * @param fd BPF map file descriptor
+ * @param in_batch address of the first element in batch to read, can pass NULL to
+ * indicate that the batched lookup starts from the beginning of the map.
+ * @param out_batch output parameter that should be passed to next call as *in_batch*
+ * @param keys pointer to an array large enough for *count* keys
+ * @param values pointer to an array large enough for *count* values
+ * @param count input and output parameter; on input it's the number of elements
+ * in the map to read in batch; on output it's the number of elements that were
+ * successfully read.
+ * If a non-EFAULT error is returned, count will be set as the number of elements
+ * that were read before the error occurred.
+ * If EFAULT is returned, **count** should not be trusted to be correct.
+ * @param opts options for configuring the way the batch lookup works
+ * @return 0, on success; negative error code, otherwise (errno is also set to
+ * the error code)
+ */
LIBBPF_API int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch,
void *keys, void *values, __u32 *count,
const struct bpf_map_batch_opts *opts);
+
+/**
+ * @brief **bpf_map_lookup_and_delete_batch()** allows for batch lookup and deletion
+ * of BPF map elements where each element is deleted after being retrieved.
+ *
+ * @param fd BPF map file descriptor
+ * @param in_batch address of the first element in batch to read, can pass NULL to
+ * get address of the first element in *out_batch*
+ * @param out_batch output parameter that should be passed to next call as *in_batch*
+ * @param keys pointer to an array of *count* keys
+ * @param values pointer to an array large enough for *count* values
+ * @param count input and output parameter; on input it's the number of elements
+ * in the map to read and delete in batch; on output it represents the number of
+ * elements that were successfully read and deleted
+ * If a non-**EFAULT** error code is returned and if the output **count** value
+ * is not equal to the input **count** value, up to **count** elements may
+ * have been deleted.
+ * if **EFAULT** is returned up to *count* elements may have been deleted without
+ * being returned via the *keys* and *values* output parameters.
+ * @param opts options for configuring the way the batch lookup and delete works
+ * @return 0, on success; negative error code, otherwise (errno is also set to
+ * the error code)
+ */
LIBBPF_API int bpf_map_lookup_and_delete_batch(int fd, void *in_batch,
void *out_batch, void *keys,
void *values, __u32 *count,
const struct bpf_map_batch_opts *opts);
-LIBBPF_API int bpf_map_update_batch(int fd, void *keys, void *values,
+
+/**
+ * @brief **bpf_map_update_batch()** updates multiple elements in a map
+ * by specifying keys and their corresponding values.
+ *
+ * The *keys* and *values* parameters must point to memory large enough
+ * to hold *count* items based on the key and value size of the map.
+ *
+ * The *opts* parameter can be used to control how *bpf_map_update_batch()*
+ * should handle keys that either do or do not already exist in the map.
+ * In particular the *flags* parameter of *bpf_map_batch_opts* can be
+ * one of the following:
+ *
+ * Note that *count* is an input and output parameter, where on output it
+ * represents how many elements were successfully updated. Also note that if
+ * **EFAULT** then *count* should not be trusted to be correct.
+ *
+ * **BPF_ANY**
+ * Create new elements or update existing.
+ *
+ * **BPF_NOEXIST**
+ * Create new elements only if they do not exist.
+ *
+ * **BPF_EXIST**
+ * Update existing elements.
+ *
+ * **BPF_F_LOCK**
+ * Update spin_lock-ed map elements. This must be
+ * specified if the map value contains a spinlock.
+ *
+ * @param fd BPF map file descriptor
+ * @param keys pointer to an array of *count* keys
+ * @param values pointer to an array of *count* values
+ * @param count input and output parameter; on input it's the number of elements
+ * in the map to update in batch; on output if a non-EFAULT error is returned,
+ * **count** represents the number of updated elements if the output **count**
+ * value is not equal to the input **count** value.
+ * If EFAULT is returned, **count** should not be trusted to be correct.
+ * @param opts options for configuring the way the batch update works
+ * @return 0, on success; negative error code, otherwise (errno is also set to
+ * the error code)
+ */
+LIBBPF_API int bpf_map_update_batch(int fd, const void *keys, const void *values,
__u32 *count,
const struct bpf_map_batch_opts *opts);
@@ -243,8 +471,6 @@ LIBBPF_API int bpf_prog_query(int target_fd, enum bpf_attach_type type,
__u32 query_flags, __u32 *attach_flags,
__u32 *prog_ids, __u32 *prog_cnt);
LIBBPF_API int bpf_raw_tracepoint_open(const char *name, int prog_fd);
-LIBBPF_API int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf,
- __u32 log_buf_size, bool do_log);
LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf,
__u32 *buf_len, __u32 *prog_id, __u32 *fd_type,
__u64 *probe_offset, __u64 *probe_addr);
diff --git a/tools/lib/bpf/bpf_gen_internal.h b/tools/lib/bpf/bpf_gen_internal.h
index 6f3df004479b..223308931d55 100644
--- a/tools/lib/bpf/bpf_gen_internal.h
+++ b/tools/lib/bpf/bpf_gen_internal.h
@@ -3,6 +3,8 @@
#ifndef __BPF_GEN_INTERNAL_H
#define __BPF_GEN_INTERNAL_H
+#include "bpf.h"
+
struct ksym_relo_desc {
const char *name;
int kind;
@@ -37,6 +39,8 @@ struct bpf_gen {
int error;
struct ksym_relo_desc *relos;
int relo_cnt;
+ struct bpf_core_relo *core_relos;
+ int core_relo_cnt;
char attach_target[128];
int attach_kind;
struct ksym_desc *ksyms;
@@ -49,13 +53,20 @@ void bpf_gen__init(struct bpf_gen *gen, int log_level, int nr_progs, int nr_maps
int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps);
void bpf_gen__free(struct bpf_gen *gen);
void bpf_gen__load_btf(struct bpf_gen *gen, const void *raw_data, __u32 raw_size);
-void bpf_gen__map_create(struct bpf_gen *gen, struct bpf_create_map_params *map_attr, int map_idx);
-struct bpf_prog_load_params;
-void bpf_gen__prog_load(struct bpf_gen *gen, struct bpf_prog_load_params *load_attr, int prog_idx);
+void bpf_gen__map_create(struct bpf_gen *gen,
+ enum bpf_map_type map_type, const char *map_name,
+ __u32 key_size, __u32 value_size, __u32 max_entries,
+ struct bpf_map_create_opts *map_attr, int map_idx);
+void bpf_gen__prog_load(struct bpf_gen *gen,
+ enum bpf_prog_type prog_type, const char *prog_name,
+ const char *license, struct bpf_insn *insns, size_t insn_cnt,
+ struct bpf_prog_load_opts *load_attr, int prog_idx);
void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *value, __u32 value_size);
void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx);
void bpf_gen__record_attach_target(struct bpf_gen *gen, const char *name, enum bpf_attach_type type);
void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak,
bool is_typeless, int kind, int insn_idx);
+void bpf_gen__record_relo_core(struct bpf_gen *gen, const struct bpf_core_relo *core_relo);
+void bpf_gen__populate_outer_map(struct bpf_gen *gen, int outer_map_idx, int key, int inner_map_idx);
#endif
diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h
index db05a5937105..90f56b0f585f 100644
--- a/tools/lib/bpf/bpf_tracing.h
+++ b/tools/lib/bpf/bpf_tracing.h
@@ -66,277 +66,204 @@
#if defined(__KERNEL__) || defined(__VMLINUX_H__)
-#define PT_REGS_PARM1(x) ((x)->di)
-#define PT_REGS_PARM2(x) ((x)->si)
-#define PT_REGS_PARM3(x) ((x)->dx)
-#define PT_REGS_PARM4(x) ((x)->cx)
-#define PT_REGS_PARM5(x) ((x)->r8)
-#define PT_REGS_RET(x) ((x)->sp)
-#define PT_REGS_FP(x) ((x)->bp)
-#define PT_REGS_RC(x) ((x)->ax)
-#define PT_REGS_SP(x) ((x)->sp)
-#define PT_REGS_IP(x) ((x)->ip)
-
-#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), di)
-#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), si)
-#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), dx)
-#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), cx)
-#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), r8)
-#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), sp)
-#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), bp)
-#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), ax)
-#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), sp)
-#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), ip)
+#define __PT_PARM1_REG di
+#define __PT_PARM2_REG si
+#define __PT_PARM3_REG dx
+#define __PT_PARM4_REG cx
+#define __PT_PARM5_REG r8
+#define __PT_RET_REG sp
+#define __PT_FP_REG bp
+#define __PT_RC_REG ax
+#define __PT_SP_REG sp
+#define __PT_IP_REG ip
#else
#ifdef __i386__
-/* i386 kernel is built with -mregparm=3 */
-#define PT_REGS_PARM1(x) ((x)->eax)
-#define PT_REGS_PARM2(x) ((x)->edx)
-#define PT_REGS_PARM3(x) ((x)->ecx)
-#define PT_REGS_PARM4(x) 0
-#define PT_REGS_PARM5(x) 0
-#define PT_REGS_RET(x) ((x)->esp)
-#define PT_REGS_FP(x) ((x)->ebp)
-#define PT_REGS_RC(x) ((x)->eax)
-#define PT_REGS_SP(x) ((x)->esp)
-#define PT_REGS_IP(x) ((x)->eip)
-
-#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), eax)
-#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), edx)
-#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), ecx)
-#define PT_REGS_PARM4_CORE(x) 0
-#define PT_REGS_PARM5_CORE(x) 0
-#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), esp)
-#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), ebp)
-#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), eax)
-#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), esp)
-#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), eip)
-
-#else
-#define PT_REGS_PARM1(x) ((x)->rdi)
-#define PT_REGS_PARM2(x) ((x)->rsi)
-#define PT_REGS_PARM3(x) ((x)->rdx)
-#define PT_REGS_PARM4(x) ((x)->rcx)
-#define PT_REGS_PARM5(x) ((x)->r8)
-#define PT_REGS_RET(x) ((x)->rsp)
-#define PT_REGS_FP(x) ((x)->rbp)
-#define PT_REGS_RC(x) ((x)->rax)
-#define PT_REGS_SP(x) ((x)->rsp)
-#define PT_REGS_IP(x) ((x)->rip)
-
-#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), rdi)
-#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), rsi)
-#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), rdx)
-#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), rcx)
-#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), r8)
-#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), rsp)
-#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), rbp)
-#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), rax)
-#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), rsp)
-#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), rip)
-
-#endif
-#endif
+#define __PT_PARM1_REG eax
+#define __PT_PARM2_REG edx
+#define __PT_PARM3_REG ecx
+/* i386 kernel is built with -mregparm=3 */
+#define __PT_PARM4_REG __unsupported__
+#define __PT_PARM5_REG __unsupported__
+#define __PT_RET_REG esp
+#define __PT_FP_REG ebp
+#define __PT_RC_REG eax
+#define __PT_SP_REG esp
+#define __PT_IP_REG eip
+
+#else /* __i386__ */
+
+#define __PT_PARM1_REG rdi
+#define __PT_PARM2_REG rsi
+#define __PT_PARM3_REG rdx
+#define __PT_PARM4_REG rcx
+#define __PT_PARM5_REG r8
+#define __PT_RET_REG rsp
+#define __PT_FP_REG rbp
+#define __PT_RC_REG rax
+#define __PT_SP_REG rsp
+#define __PT_IP_REG rip
+
+#endif /* __i386__ */
+
+#endif /* __KERNEL__ || __VMLINUX_H__ */
#elif defined(bpf_target_s390)
/* s390 provides user_pt_regs instead of struct pt_regs to userspace */
-struct pt_regs;
-#define PT_REGS_S390 const volatile user_pt_regs
-#define PT_REGS_PARM1(x) (((PT_REGS_S390 *)(x))->gprs[2])
-#define PT_REGS_PARM2(x) (((PT_REGS_S390 *)(x))->gprs[3])
-#define PT_REGS_PARM3(x) (((PT_REGS_S390 *)(x))->gprs[4])
-#define PT_REGS_PARM4(x) (((PT_REGS_S390 *)(x))->gprs[5])
-#define PT_REGS_PARM5(x) (((PT_REGS_S390 *)(x))->gprs[6])
-#define PT_REGS_RET(x) (((PT_REGS_S390 *)(x))->gprs[14])
-/* Works only with CONFIG_FRAME_POINTER */
-#define PT_REGS_FP(x) (((PT_REGS_S390 *)(x))->gprs[11])
-#define PT_REGS_RC(x) (((PT_REGS_S390 *)(x))->gprs[2])
-#define PT_REGS_SP(x) (((PT_REGS_S390 *)(x))->gprs[15])
-#define PT_REGS_IP(x) (((PT_REGS_S390 *)(x))->psw.addr)
-
-#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[2])
-#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[3])
-#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[4])
-#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[5])
-#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[6])
-#define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[14])
-#define PT_REGS_FP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[11])
-#define PT_REGS_RC_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[2])
-#define PT_REGS_SP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[15])
-#define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), psw.addr)
+#define __PT_REGS_CAST(x) ((const user_pt_regs *)(x))
+#define __PT_PARM1_REG gprs[2]
+#define __PT_PARM2_REG gprs[3]
+#define __PT_PARM3_REG gprs[4]
+#define __PT_PARM4_REG gprs[5]
+#define __PT_PARM5_REG gprs[6]
+#define __PT_RET_REG grps[14]
+#define __PT_FP_REG gprs[11] /* Works only with CONFIG_FRAME_POINTER */
+#define __PT_RC_REG gprs[2]
+#define __PT_SP_REG gprs[15]
+#define __PT_IP_REG psw.addr
#elif defined(bpf_target_arm)
-#define PT_REGS_PARM1(x) ((x)->uregs[0])
-#define PT_REGS_PARM2(x) ((x)->uregs[1])
-#define PT_REGS_PARM3(x) ((x)->uregs[2])
-#define PT_REGS_PARM4(x) ((x)->uregs[3])
-#define PT_REGS_PARM5(x) ((x)->uregs[4])
-#define PT_REGS_RET(x) ((x)->uregs[14])
-#define PT_REGS_FP(x) ((x)->uregs[11]) /* Works only with CONFIG_FRAME_POINTER */
-#define PT_REGS_RC(x) ((x)->uregs[0])
-#define PT_REGS_SP(x) ((x)->uregs[13])
-#define PT_REGS_IP(x) ((x)->uregs[12])
-
-#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), uregs[0])
-#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), uregs[1])
-#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), uregs[2])
-#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), uregs[3])
-#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), uregs[4])
-#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), uregs[14])
-#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), uregs[11])
-#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), uregs[0])
-#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), uregs[13])
-#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), uregs[12])
+#define __PT_PARM1_REG uregs[0]
+#define __PT_PARM2_REG uregs[1]
+#define __PT_PARM3_REG uregs[2]
+#define __PT_PARM4_REG uregs[3]
+#define __PT_PARM5_REG uregs[4]
+#define __PT_RET_REG uregs[14]
+#define __PT_FP_REG uregs[11] /* Works only with CONFIG_FRAME_POINTER */
+#define __PT_RC_REG uregs[0]
+#define __PT_SP_REG uregs[13]
+#define __PT_IP_REG uregs[12]
#elif defined(bpf_target_arm64)
/* arm64 provides struct user_pt_regs instead of struct pt_regs to userspace */
-struct pt_regs;
-#define PT_REGS_ARM64 const volatile struct user_pt_regs
-#define PT_REGS_PARM1(x) (((PT_REGS_ARM64 *)(x))->regs[0])
-#define PT_REGS_PARM2(x) (((PT_REGS_ARM64 *)(x))->regs[1])
-#define PT_REGS_PARM3(x) (((PT_REGS_ARM64 *)(x))->regs[2])
-#define PT_REGS_PARM4(x) (((PT_REGS_ARM64 *)(x))->regs[3])
-#define PT_REGS_PARM5(x) (((PT_REGS_ARM64 *)(x))->regs[4])
-#define PT_REGS_RET(x) (((PT_REGS_ARM64 *)(x))->regs[30])
-/* Works only with CONFIG_FRAME_POINTER */
-#define PT_REGS_FP(x) (((PT_REGS_ARM64 *)(x))->regs[29])
-#define PT_REGS_RC(x) (((PT_REGS_ARM64 *)(x))->regs[0])
-#define PT_REGS_SP(x) (((PT_REGS_ARM64 *)(x))->sp)
-#define PT_REGS_IP(x) (((PT_REGS_ARM64 *)(x))->pc)
-
-#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[0])
-#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[1])
-#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[2])
-#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[3])
-#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[4])
-#define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[30])
-#define PT_REGS_FP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[29])
-#define PT_REGS_RC_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[0])
-#define PT_REGS_SP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), sp)
-#define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), pc)
+#define __PT_REGS_CAST(x) ((const struct user_pt_regs *)(x))
+#define __PT_PARM1_REG regs[0]
+#define __PT_PARM2_REG regs[1]
+#define __PT_PARM3_REG regs[2]
+#define __PT_PARM4_REG regs[3]
+#define __PT_PARM5_REG regs[4]
+#define __PT_RET_REG regs[30]
+#define __PT_FP_REG regs[29] /* Works only with CONFIG_FRAME_POINTER */
+#define __PT_RC_REG regs[0]
+#define __PT_SP_REG sp
+#define __PT_IP_REG pc
#elif defined(bpf_target_mips)
-#define PT_REGS_PARM1(x) ((x)->regs[4])
-#define PT_REGS_PARM2(x) ((x)->regs[5])
-#define PT_REGS_PARM3(x) ((x)->regs[6])
-#define PT_REGS_PARM4(x) ((x)->regs[7])
-#define PT_REGS_PARM5(x) ((x)->regs[8])
-#define PT_REGS_RET(x) ((x)->regs[31])
-#define PT_REGS_FP(x) ((x)->regs[30]) /* Works only with CONFIG_FRAME_POINTER */
-#define PT_REGS_RC(x) ((x)->regs[2])
-#define PT_REGS_SP(x) ((x)->regs[29])
-#define PT_REGS_IP(x) ((x)->cp0_epc)
-
-#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), regs[4])
-#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), regs[5])
-#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), regs[6])
-#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), regs[7])
-#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), regs[8])
-#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), regs[31])
-#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), regs[30])
-#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), regs[2])
-#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), regs[29])
-#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), cp0_epc)
+#define __PT_PARM1_REG regs[4]
+#define __PT_PARM2_REG regs[5]
+#define __PT_PARM3_REG regs[6]
+#define __PT_PARM4_REG regs[7]
+#define __PT_PARM5_REG regs[8]
+#define __PT_RET_REG regs[31]
+#define __PT_FP_REG regs[30] /* Works only with CONFIG_FRAME_POINTER */
+#define __PT_RC_REG regs[2]
+#define __PT_SP_REG regs[29]
+#define __PT_IP_REG cp0_epc
#elif defined(bpf_target_powerpc)
-#define PT_REGS_PARM1(x) ((x)->gpr[3])
-#define PT_REGS_PARM2(x) ((x)->gpr[4])
-#define PT_REGS_PARM3(x) ((x)->gpr[5])
-#define PT_REGS_PARM4(x) ((x)->gpr[6])
-#define PT_REGS_PARM5(x) ((x)->gpr[7])
-#define PT_REGS_RC(x) ((x)->gpr[3])
-#define PT_REGS_SP(x) ((x)->sp)
-#define PT_REGS_IP(x) ((x)->nip)
-
-#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), gpr[3])
-#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), gpr[4])
-#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), gpr[5])
-#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), gpr[6])
-#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), gpr[7])
-#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), gpr[3])
-#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), sp)
-#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), nip)
+#define __PT_PARM1_REG gpr[3]
+#define __PT_PARM2_REG gpr[4]
+#define __PT_PARM3_REG gpr[5]
+#define __PT_PARM4_REG gpr[6]
+#define __PT_PARM5_REG gpr[7]
+#define __PT_RET_REG regs[31]
+#define __PT_FP_REG __unsupported__
+#define __PT_RC_REG gpr[3]
+#define __PT_SP_REG sp
+#define __PT_IP_REG nip
#elif defined(bpf_target_sparc)
-#define PT_REGS_PARM1(x) ((x)->u_regs[UREG_I0])
-#define PT_REGS_PARM2(x) ((x)->u_regs[UREG_I1])
-#define PT_REGS_PARM3(x) ((x)->u_regs[UREG_I2])
-#define PT_REGS_PARM4(x) ((x)->u_regs[UREG_I3])
-#define PT_REGS_PARM5(x) ((x)->u_regs[UREG_I4])
-#define PT_REGS_RET(x) ((x)->u_regs[UREG_I7])
-#define PT_REGS_RC(x) ((x)->u_regs[UREG_I0])
-#define PT_REGS_SP(x) ((x)->u_regs[UREG_FP])
-
-#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I0])
-#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I1])
-#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I2])
-#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I3])
-#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I4])
-#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I7])
-#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I0])
-#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), u_regs[UREG_FP])
-
+#define __PT_PARM1_REG u_regs[UREG_I0]
+#define __PT_PARM2_REG u_regs[UREG_I1]
+#define __PT_PARM3_REG u_regs[UREG_I2]
+#define __PT_PARM4_REG u_regs[UREG_I3]
+#define __PT_PARM5_REG u_regs[UREG_I4]
+#define __PT_RET_REG u_regs[UREG_I7]
+#define __PT_FP_REG __unsupported__
+#define __PT_RC_REG u_regs[UREG_I0]
+#define __PT_SP_REG u_regs[UREG_FP]
/* Should this also be a bpf_target check for the sparc case? */
#if defined(__arch64__)
-#define PT_REGS_IP(x) ((x)->tpc)
-#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), tpc)
+#define __PT_IP_REG tpc
#else
-#define PT_REGS_IP(x) ((x)->pc)
-#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), pc)
+#define __PT_IP_REG pc
#endif
#elif defined(bpf_target_riscv)
+#define __PT_REGS_CAST(x) ((const struct user_regs_struct *)(x))
+#define __PT_PARM1_REG a0
+#define __PT_PARM2_REG a1
+#define __PT_PARM3_REG a2
+#define __PT_PARM4_REG a3
+#define __PT_PARM5_REG a4
+#define __PT_RET_REG ra
+#define __PT_FP_REG fp
+#define __PT_RC_REG a5
+#define __PT_SP_REG sp
+#define __PT_IP_REG epc
+
+#endif
+
+#if defined(bpf_target_defined)
+
struct pt_regs;
-#define PT_REGS_RV const volatile struct user_regs_struct
-#define PT_REGS_PARM1(x) (((PT_REGS_RV *)(x))->a0)
-#define PT_REGS_PARM2(x) (((PT_REGS_RV *)(x))->a1)
-#define PT_REGS_PARM3(x) (((PT_REGS_RV *)(x))->a2)
-#define PT_REGS_PARM4(x) (((PT_REGS_RV *)(x))->a3)
-#define PT_REGS_PARM5(x) (((PT_REGS_RV *)(x))->a4)
-#define PT_REGS_RET(x) (((PT_REGS_RV *)(x))->ra)
-#define PT_REGS_FP(x) (((PT_REGS_RV *)(x))->s5)
-#define PT_REGS_RC(x) (((PT_REGS_RV *)(x))->a5)
-#define PT_REGS_SP(x) (((PT_REGS_RV *)(x))->sp)
-#define PT_REGS_IP(x) (((PT_REGS_RV *)(x))->epc)
-
-#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a0)
-#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a1)
-#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a2)
-#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a3)
-#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a4)
-#define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), ra)
-#define PT_REGS_FP_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), fp)
-#define PT_REGS_RC_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a5)
-#define PT_REGS_SP_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), sp)
-#define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), epc)
+/* allow some architecutres to override `struct pt_regs` */
+#ifndef __PT_REGS_CAST
+#define __PT_REGS_CAST(x) (x)
#endif
+#define PT_REGS_PARM1(x) (__PT_REGS_CAST(x)->__PT_PARM1_REG)
+#define PT_REGS_PARM2(x) (__PT_REGS_CAST(x)->__PT_PARM2_REG)
+#define PT_REGS_PARM3(x) (__PT_REGS_CAST(x)->__PT_PARM3_REG)
+#define PT_REGS_PARM4(x) (__PT_REGS_CAST(x)->__PT_PARM4_REG)
+#define PT_REGS_PARM5(x) (__PT_REGS_CAST(x)->__PT_PARM5_REG)
+#define PT_REGS_RET(x) (__PT_REGS_CAST(x)->__PT_RET_REG)
+#define PT_REGS_FP(x) (__PT_REGS_CAST(x)->__PT_FP_REG)
+#define PT_REGS_RC(x) (__PT_REGS_CAST(x)->__PT_RC_REG)
+#define PT_REGS_SP(x) (__PT_REGS_CAST(x)->__PT_SP_REG)
+#define PT_REGS_IP(x) (__PT_REGS_CAST(x)->__PT_IP_REG)
+
+#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM1_REG)
+#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM2_REG)
+#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM3_REG)
+#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM4_REG)
+#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM5_REG)
+#define PT_REGS_RET_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_RET_REG)
+#define PT_REGS_FP_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_FP_REG)
+#define PT_REGS_RC_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_RC_REG)
+#define PT_REGS_SP_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_SP_REG)
+#define PT_REGS_IP_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_IP_REG)
+
#if defined(bpf_target_powerpc)
+
#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = (ctx)->link; })
#define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP
+
#elif defined(bpf_target_sparc)
+
#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = PT_REGS_RET(ctx); })
#define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP
-#elif defined(bpf_target_defined)
+
+#else
+
#define BPF_KPROBE_READ_RET_IP(ip, ctx) \
({ bpf_probe_read_kernel(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); })
#define BPF_KRETPROBE_READ_RET_IP(ip, ctx) \
- ({ bpf_probe_read_kernel(&(ip), sizeof(ip), \
- (void *)(PT_REGS_FP(ctx) + sizeof(ip))); })
+ ({ bpf_probe_read_kernel(&(ip), sizeof(ip), (void *)(PT_REGS_FP(ctx) + sizeof(ip))); })
+
#endif
-#if !defined(bpf_target_defined)
+#else /* defined(bpf_target_defined) */
#define PT_REGS_PARM1(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
#define PT_REGS_PARM2(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
@@ -363,7 +290,7 @@ struct pt_regs;
#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
#define BPF_KRETPROBE_READ_RET_IP(ip, ctx) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
-#endif /* !defined(bpf_target_defined) */
+#endif /* defined(bpf_target_defined) */
#ifndef ___bpf_concat
#define ___bpf_concat(a, b) a ## b
@@ -375,25 +302,23 @@ struct pt_regs;
#define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N
#endif
#ifndef ___bpf_narg
-#define ___bpf_narg(...) \
- ___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
+#define ___bpf_narg(...) ___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
#endif
-#define ___bpf_ctx_cast0() ctx
-#define ___bpf_ctx_cast1(x) ___bpf_ctx_cast0(), (void *)ctx[0]
-#define ___bpf_ctx_cast2(x, args...) ___bpf_ctx_cast1(args), (void *)ctx[1]
-#define ___bpf_ctx_cast3(x, args...) ___bpf_ctx_cast2(args), (void *)ctx[2]
-#define ___bpf_ctx_cast4(x, args...) ___bpf_ctx_cast3(args), (void *)ctx[3]
-#define ___bpf_ctx_cast5(x, args...) ___bpf_ctx_cast4(args), (void *)ctx[4]
-#define ___bpf_ctx_cast6(x, args...) ___bpf_ctx_cast5(args), (void *)ctx[5]
-#define ___bpf_ctx_cast7(x, args...) ___bpf_ctx_cast6(args), (void *)ctx[6]
-#define ___bpf_ctx_cast8(x, args...) ___bpf_ctx_cast7(args), (void *)ctx[7]
-#define ___bpf_ctx_cast9(x, args...) ___bpf_ctx_cast8(args), (void *)ctx[8]
+#define ___bpf_ctx_cast0() ctx
+#define ___bpf_ctx_cast1(x) ___bpf_ctx_cast0(), (void *)ctx[0]
+#define ___bpf_ctx_cast2(x, args...) ___bpf_ctx_cast1(args), (void *)ctx[1]
+#define ___bpf_ctx_cast3(x, args...) ___bpf_ctx_cast2(args), (void *)ctx[2]
+#define ___bpf_ctx_cast4(x, args...) ___bpf_ctx_cast3(args), (void *)ctx[3]
+#define ___bpf_ctx_cast5(x, args...) ___bpf_ctx_cast4(args), (void *)ctx[4]
+#define ___bpf_ctx_cast6(x, args...) ___bpf_ctx_cast5(args), (void *)ctx[5]
+#define ___bpf_ctx_cast7(x, args...) ___bpf_ctx_cast6(args), (void *)ctx[6]
+#define ___bpf_ctx_cast8(x, args...) ___bpf_ctx_cast7(args), (void *)ctx[7]
+#define ___bpf_ctx_cast9(x, args...) ___bpf_ctx_cast8(args), (void *)ctx[8]
#define ___bpf_ctx_cast10(x, args...) ___bpf_ctx_cast9(args), (void *)ctx[9]
#define ___bpf_ctx_cast11(x, args...) ___bpf_ctx_cast10(args), (void *)ctx[10]
#define ___bpf_ctx_cast12(x, args...) ___bpf_ctx_cast11(args), (void *)ctx[11]
-#define ___bpf_ctx_cast(args...) \
- ___bpf_apply(___bpf_ctx_cast, ___bpf_narg(args))(args)
+#define ___bpf_ctx_cast(args...) ___bpf_apply(___bpf_ctx_cast, ___bpf_narg(args))(args)
/*
* BPF_PROG is a convenience wrapper for generic tp_btf/fentry/fexit and
@@ -426,19 +351,13 @@ ____##name(unsigned long long *ctx, ##args)
struct pt_regs;
-#define ___bpf_kprobe_args0() ctx
-#define ___bpf_kprobe_args1(x) \
- ___bpf_kprobe_args0(), (void *)PT_REGS_PARM1(ctx)
-#define ___bpf_kprobe_args2(x, args...) \
- ___bpf_kprobe_args1(args), (void *)PT_REGS_PARM2(ctx)
-#define ___bpf_kprobe_args3(x, args...) \
- ___bpf_kprobe_args2(args), (void *)PT_REGS_PARM3(ctx)
-#define ___bpf_kprobe_args4(x, args...) \
- ___bpf_kprobe_args3(args), (void *)PT_REGS_PARM4(ctx)
-#define ___bpf_kprobe_args5(x, args...) \
- ___bpf_kprobe_args4(args), (void *)PT_REGS_PARM5(ctx)
-#define ___bpf_kprobe_args(args...) \
- ___bpf_apply(___bpf_kprobe_args, ___bpf_narg(args))(args)
+#define ___bpf_kprobe_args0() ctx
+#define ___bpf_kprobe_args1(x) ___bpf_kprobe_args0(), (void *)PT_REGS_PARM1(ctx)
+#define ___bpf_kprobe_args2(x, args...) ___bpf_kprobe_args1(args), (void *)PT_REGS_PARM2(ctx)
+#define ___bpf_kprobe_args3(x, args...) ___bpf_kprobe_args2(args), (void *)PT_REGS_PARM3(ctx)
+#define ___bpf_kprobe_args4(x, args...) ___bpf_kprobe_args3(args), (void *)PT_REGS_PARM4(ctx)
+#define ___bpf_kprobe_args5(x, args...) ___bpf_kprobe_args4(args), (void *)PT_REGS_PARM5(ctx)
+#define ___bpf_kprobe_args(args...) ___bpf_apply(___bpf_kprobe_args, ___bpf_narg(args))(args)
/*
* BPF_KPROBE serves the same purpose for kprobes as BPF_PROG for
@@ -464,11 +383,9 @@ typeof(name(0)) name(struct pt_regs *ctx) \
static __attribute__((always_inline)) typeof(name(0)) \
____##name(struct pt_regs *ctx, ##args)
-#define ___bpf_kretprobe_args0() ctx
-#define ___bpf_kretprobe_args1(x) \
- ___bpf_kretprobe_args0(), (void *)PT_REGS_RC(ctx)
-#define ___bpf_kretprobe_args(args...) \
- ___bpf_apply(___bpf_kretprobe_args, ___bpf_narg(args))(args)
+#define ___bpf_kretprobe_args0() ctx
+#define ___bpf_kretprobe_args1(x) ___bpf_kretprobe_args0(), (void *)PT_REGS_RC(ctx)
+#define ___bpf_kretprobe_args(args...) ___bpf_apply(___bpf_kretprobe_args, ___bpf_narg(args))(args)
/*
* BPF_KRETPROBE is similar to BPF_KPROBE, except, it only provides optional
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 7e4c5586bd87..9aa19c89f758 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -299,6 +299,7 @@ static int btf_type_size(const struct btf_type *t)
case BTF_KIND_TYPEDEF:
case BTF_KIND_FUNC:
case BTF_KIND_FLOAT:
+ case BTF_KIND_TYPE_TAG:
return base_size;
case BTF_KIND_INT:
return base_size + sizeof(__u32);
@@ -349,6 +350,7 @@ static int btf_bswap_type_rest(struct btf_type *t)
case BTF_KIND_TYPEDEF:
case BTF_KIND_FUNC:
case BTF_KIND_FLOAT:
+ case BTF_KIND_TYPE_TAG:
return 0;
case BTF_KIND_INT:
*(__u32 *)(t + 1) = bswap_32(*(__u32 *)(t + 1));
@@ -452,7 +454,7 @@ const struct btf *btf__base_btf(const struct btf *btf)
}
/* internal helper returning non-const pointer to a type */
-struct btf_type *btf_type_by_id(struct btf *btf, __u32 type_id)
+struct btf_type *btf_type_by_id(const struct btf *btf, __u32 type_id)
{
if (type_id == 0)
return &btf_void;
@@ -608,6 +610,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
case BTF_KIND_RESTRICT:
case BTF_KIND_VAR:
case BTF_KIND_DECL_TAG:
+ case BTF_KIND_TYPE_TAG:
type_id = t->type;
break;
case BTF_KIND_ARRAY:
@@ -649,6 +652,7 @@ int btf__align_of(const struct btf *btf, __u32 id)
case BTF_KIND_VOLATILE:
case BTF_KIND_CONST:
case BTF_KIND_RESTRICT:
+ case BTF_KIND_TYPE_TAG:
return btf__align_of(btf, t->type);
case BTF_KIND_ARRAY:
return btf__align_of(btf, btf_array(t)->type);
@@ -1120,54 +1124,86 @@ struct btf *btf__parse_split(const char *path, struct btf *base_btf)
static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian);
-int btf__load_into_kernel(struct btf *btf)
+int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level)
{
- __u32 log_buf_size = 0, raw_size;
- char *log_buf = NULL;
+ LIBBPF_OPTS(bpf_btf_load_opts, opts);
+ __u32 buf_sz = 0, raw_size;
+ char *buf = NULL, *tmp;
void *raw_data;
int err = 0;
if (btf->fd >= 0)
return libbpf_err(-EEXIST);
+ if (log_sz && !log_buf)
+ return libbpf_err(-EINVAL);
-retry_load:
- if (log_buf_size) {
- log_buf = malloc(log_buf_size);
- if (!log_buf)
- return libbpf_err(-ENOMEM);
-
- *log_buf = 0;
- }
-
+ /* cache native raw data representation */
raw_data = btf_get_raw_data(btf, &raw_size, false);
if (!raw_data) {
err = -ENOMEM;
goto done;
}
- /* cache native raw data representation */
btf->raw_size = raw_size;
btf->raw_data = raw_data;
- btf->fd = bpf_load_btf(raw_data, raw_size, log_buf, log_buf_size, false);
+retry_load:
+ /* if log_level is 0, we won't provide log_buf/log_size to the kernel,
+ * initially. Only if BTF loading fails, we bump log_level to 1 and
+ * retry, using either auto-allocated or custom log_buf. This way
+ * non-NULL custom log_buf provides a buffer just in case, but hopes
+ * for successful load and no need for log_buf.
+ */
+ if (log_level) {
+ /* if caller didn't provide custom log_buf, we'll keep
+ * allocating our own progressively bigger buffers for BTF
+ * verification log
+ */
+ if (!log_buf) {
+ buf_sz = max((__u32)BPF_LOG_BUF_SIZE, buf_sz * 2);
+ tmp = realloc(buf, buf_sz);
+ if (!tmp) {
+ err = -ENOMEM;
+ goto done;
+ }
+ buf = tmp;
+ buf[0] = '\0';
+ }
+
+ opts.log_buf = log_buf ? log_buf : buf;
+ opts.log_size = log_buf ? log_sz : buf_sz;
+ opts.log_level = log_level;
+ }
+
+ btf->fd = bpf_btf_load(raw_data, raw_size, &opts);
if (btf->fd < 0) {
- if (!log_buf || errno == ENOSPC) {
- log_buf_size = max((__u32)BPF_LOG_BUF_SIZE,
- log_buf_size << 1);
- free(log_buf);
+ /* time to turn on verbose mode and try again */
+ if (log_level == 0) {
+ log_level = 1;
goto retry_load;
}
+ /* only retry if caller didn't provide custom log_buf, but
+ * make sure we can never overflow buf_sz
+ */
+ if (!log_buf && errno == ENOSPC && buf_sz <= UINT_MAX / 2)
+ goto retry_load;
err = -errno;
- pr_warn("Error loading BTF: %s(%d)\n", strerror(errno), errno);
- if (*log_buf)
- pr_warn("%s\n", log_buf);
- goto done;
+ pr_warn("BTF loading error: %d\n", err);
+ /* don't print out contents of custom log_buf */
+ if (!log_buf && buf[0])
+ pr_warn("-- BEGIN BTF LOAD LOG ---\n%s\n-- END BTF LOAD LOG --\n", buf);
}
done:
- free(log_buf);
+ free(buf);
return libbpf_err(err);
}
+
+int btf__load_into_kernel(struct btf *btf)
+{
+ return btf_load_into_kernel(btf, NULL, 0, 0);
+}
+
int btf__load(struct btf *) __attribute__((alias("btf__load_into_kernel")));
int btf__fd(const struct btf *btf)
@@ -2236,6 +2272,22 @@ int btf__add_restrict(struct btf *btf, int ref_type_id)
}
/*
+ * Append new BTF_KIND_TYPE_TAG type with:
+ * - *value*, non-empty/non-NULL tag value;
+ * - *ref_type_id* - referenced type ID, it might not exist yet;
+ * Returns:
+ * - >0, type ID of newly added BTF type;
+ * - <0, on error.
+ */
+int btf__add_type_tag(struct btf *btf, const char *value, int ref_type_id)
+{
+ if (!value|| !value[0])
+ return libbpf_err(-EINVAL);
+
+ return btf_add_ref_kind(btf, BTF_KIND_TYPE_TAG, value, ref_type_id);
+}
+
+/*
* Append new BTF_KIND_FUNC type with:
* - *name*, non-empty/non-NULL name;
* - *proto_type_id* - FUNC_PROTO's type ID, it might not exist yet;
@@ -2711,15 +2763,11 @@ void btf_ext__free(struct btf_ext *btf_ext)
free(btf_ext);
}
-struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
+struct btf_ext *btf_ext__new(const __u8 *data, __u32 size)
{
struct btf_ext *btf_ext;
int err;
- err = btf_ext_parse_hdr(data, size);
- if (err)
- return libbpf_err_ptr(err);
-
btf_ext = calloc(1, sizeof(struct btf_ext));
if (!btf_ext)
return libbpf_err_ptr(-ENOMEM);
@@ -2732,6 +2780,10 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
}
memcpy(btf_ext->data, data, size);
+ err = btf_ext_parse_hdr(btf_ext->data, size);
+ if (err)
+ goto done;
+
if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, line_info_len)) {
err = -EINVAL;
goto done;
@@ -2846,8 +2898,7 @@ __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext)
struct btf_dedup;
-static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
- const struct btf_dedup_opts *opts);
+static struct btf_dedup *btf_dedup_new(struct btf *btf, const struct btf_dedup_opts *opts);
static void btf_dedup_free(struct btf_dedup *d);
static int btf_dedup_prep(struct btf_dedup *d);
static int btf_dedup_strings(struct btf_dedup *d);
@@ -2994,12 +3045,17 @@ static int btf_dedup_remap_types(struct btf_dedup *d);
* deduplicating structs/unions is described in greater details in comments for
* `btf_dedup_is_equiv` function.
*/
-int btf__dedup(struct btf *btf, struct btf_ext *btf_ext,
- const struct btf_dedup_opts *opts)
+
+DEFAULT_VERSION(btf__dedup_v0_6_0, btf__dedup, LIBBPF_0.6.0)
+int btf__dedup_v0_6_0(struct btf *btf, const struct btf_dedup_opts *opts)
{
- struct btf_dedup *d = btf_dedup_new(btf, btf_ext, opts);
+ struct btf_dedup *d;
int err;
+ if (!OPTS_VALID(opts, btf_dedup_opts))
+ return libbpf_err(-EINVAL);
+
+ d = btf_dedup_new(btf, opts);
if (IS_ERR(d)) {
pr_debug("btf_dedup_new failed: %ld", PTR_ERR(d));
return libbpf_err(-EINVAL);
@@ -3051,6 +3107,19 @@ done:
return libbpf_err(err);
}
+COMPAT_VERSION(btf__dedup_deprecated, btf__dedup, LIBBPF_0.0.2)
+int btf__dedup_deprecated(struct btf *btf, struct btf_ext *btf_ext, const void *unused_opts)
+{
+ LIBBPF_OPTS(btf_dedup_opts, opts, .btf_ext = btf_ext);
+
+ if (unused_opts) {
+ pr_warn("please use new version of btf__dedup() that supports options\n");
+ return libbpf_err(-ENOTSUP);
+ }
+
+ return btf__dedup(btf, &opts);
+}
+
#define BTF_UNPROCESSED_ID ((__u32)-1)
#define BTF_IN_PROGRESS_ID ((__u32)-2)
@@ -3163,8 +3232,7 @@ static bool btf_dedup_equal_fn(const void *k1, const void *k2, void *ctx)
return k1 == k2;
}
-static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
- const struct btf_dedup_opts *opts)
+static struct btf_dedup *btf_dedup_new(struct btf *btf, const struct btf_dedup_opts *opts)
{
struct btf_dedup *d = calloc(1, sizeof(struct btf_dedup));
hashmap_hash_fn hash_fn = btf_dedup_identity_hash_fn;
@@ -3173,13 +3241,11 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
if (!d)
return ERR_PTR(-ENOMEM);
- d->opts.dont_resolve_fwds = opts && opts->dont_resolve_fwds;
- /* dedup_table_size is now used only to force collisions in tests */
- if (opts && opts->dedup_table_size == 1)
+ if (OPTS_GET(opts, force_collisions, false))
hash_fn = btf_dedup_collision_hash_fn;
d->btf = btf;
- d->btf_ext = btf_ext;
+ d->btf_ext = OPTS_GET(opts, btf_ext, NULL);
d->dedup_table = hashmap__new(hash_fn, btf_dedup_equal_fn, NULL);
if (IS_ERR(d->dedup_table)) {
@@ -3443,8 +3509,8 @@ static long btf_hash_struct(struct btf_type *t)
}
/*
- * Check structural compatibility of two FUNC_PROTOs, ignoring referenced type
- * IDs. This check is performed during type graph equivalence check and
+ * Check structural compatibility of two STRUCTs/UNIONs, ignoring referenced
+ * type IDs. This check is performed during type graph equivalence check and
* referenced types equivalence is checked separately.
*/
static bool btf_shallow_equal_struct(struct btf_type *t1, struct btf_type *t2)
@@ -3625,6 +3691,7 @@ static int btf_dedup_prep(struct btf_dedup *d)
case BTF_KIND_TYPEDEF:
case BTF_KIND_FUNC:
case BTF_KIND_FLOAT:
+ case BTF_KIND_TYPE_TAG:
h = btf_hash_common(t);
break;
case BTF_KIND_INT:
@@ -3685,6 +3752,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
case BTF_KIND_VAR:
case BTF_KIND_DATASEC:
case BTF_KIND_DECL_TAG:
+ case BTF_KIND_TYPE_TAG:
return 0;
case BTF_KIND_INT:
@@ -3708,8 +3776,6 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
new_id = cand_id;
break;
}
- if (d->opts.dont_resolve_fwds)
- continue;
if (btf_compat_enum(t, cand)) {
if (btf_is_enum_fwd(t)) {
/* resolve fwd to full enum */
@@ -3817,6 +3883,31 @@ static int btf_dedup_identical_arrays(struct btf_dedup *d, __u32 id1, __u32 id2)
return btf_equal_array(t1, t2);
}
+/* Check if given two types are identical STRUCT/UNION definitions */
+static bool btf_dedup_identical_structs(struct btf_dedup *d, __u32 id1, __u32 id2)
+{
+ const struct btf_member *m1, *m2;
+ struct btf_type *t1, *t2;
+ int n, i;
+
+ t1 = btf_type_by_id(d->btf, id1);
+ t2 = btf_type_by_id(d->btf, id2);
+
+ if (!btf_is_composite(t1) || btf_kind(t1) != btf_kind(t2))
+ return false;
+
+ if (!btf_shallow_equal_struct(t1, t2))
+ return false;
+
+ m1 = btf_members(t1);
+ m2 = btf_members(t2);
+ for (i = 0, n = btf_vlen(t1); i < n; i++, m1++, m2++) {
+ if (m1->type != m2->type)
+ return false;
+ }
+ return true;
+}
+
/*
* Check equivalence of BTF type graph formed by candidate struct/union (we'll
* call it "candidate graph" in this description for brevity) to a type graph
@@ -3928,6 +4019,8 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
hypot_type_id = d->hypot_map[canon_id];
if (hypot_type_id <= BTF_MAX_NR_TYPES) {
+ if (hypot_type_id == cand_id)
+ return 1;
/* In some cases compiler will generate different DWARF types
* for *identical* array type definitions and use them for
* different fields within the *same* struct. This breaks type
@@ -3936,8 +4029,18 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
* types within a single CU. So work around that by explicitly
* allowing identical array types here.
*/
- return hypot_type_id == cand_id ||
- btf_dedup_identical_arrays(d, hypot_type_id, cand_id);
+ if (btf_dedup_identical_arrays(d, hypot_type_id, cand_id))
+ return 1;
+ /* It turns out that similar situation can happen with
+ * struct/union sometimes, sigh... Handle the case where
+ * structs/unions are exactly the same, down to the referenced
+ * type IDs. Anything more complicated (e.g., if referenced
+ * types are different, but equivalent) is *way more*
+ * complicated and requires a many-to-many equivalence mapping.
+ */
+ if (btf_dedup_identical_structs(d, hypot_type_id, cand_id))
+ return 1;
+ return 0;
}
if (btf_dedup_hypot_map_add(d, canon_id, cand_id))
@@ -3952,8 +4055,7 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
return 0;
/* FWD <--> STRUCT/UNION equivalence check, if enabled */
- if (!d->opts.dont_resolve_fwds
- && (cand_kind == BTF_KIND_FWD || canon_kind == BTF_KIND_FWD)
+ if ((cand_kind == BTF_KIND_FWD || canon_kind == BTF_KIND_FWD)
&& cand_kind != canon_kind) {
__u16 real_kind;
__u16 fwd_kind;
@@ -3979,10 +4081,7 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
return btf_equal_int_tag(cand_type, canon_type);
case BTF_KIND_ENUM:
- if (d->opts.dont_resolve_fwds)
- return btf_equal_enum(cand_type, canon_type);
- else
- return btf_compat_enum(cand_type, canon_type);
+ return btf_compat_enum(cand_type, canon_type);
case BTF_KIND_FWD:
case BTF_KIND_FLOAT:
@@ -3994,6 +4093,7 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
case BTF_KIND_PTR:
case BTF_KIND_TYPEDEF:
case BTF_KIND_FUNC:
+ case BTF_KIND_TYPE_TAG:
if (cand_type->info != canon_type->info)
return 0;
return btf_dedup_is_equiv(d, cand_type->type, canon_type->type);
@@ -4289,6 +4389,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
case BTF_KIND_PTR:
case BTF_KIND_TYPEDEF:
case BTF_KIND_FUNC:
+ case BTF_KIND_TYPE_TAG:
ref_type_id = btf_dedup_ref_type(d, t->type);
if (ref_type_id < 0)
return ref_type_id;
@@ -4595,6 +4696,7 @@ int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ct
case BTF_KIND_FUNC:
case BTF_KIND_VAR:
case BTF_KIND_DECL_TAG:
+ case BTF_KIND_TYPE_TAG:
return visit(&t->type, ctx);
case BTF_KIND_ARRAY: {
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index bc005ba3ceec..061839f04525 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -157,7 +157,7 @@ LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
__u32 expected_value_size,
__u32 *key_type_id, __u32 *value_type_id);
-LIBBPF_API struct btf_ext *btf_ext__new(__u8 *data, __u32 size);
+LIBBPF_API struct btf_ext *btf_ext__new(const __u8 *data, __u32 size);
LIBBPF_API void btf_ext__free(struct btf_ext *btf_ext);
LIBBPF_API const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext,
__u32 *size);
@@ -227,6 +227,7 @@ LIBBPF_API int btf__add_typedef(struct btf *btf, const char *name, int ref_type_
LIBBPF_API int btf__add_volatile(struct btf *btf, int ref_type_id);
LIBBPF_API int btf__add_const(struct btf *btf, int ref_type_id);
LIBBPF_API int btf__add_restrict(struct btf *btf, int ref_type_id);
+LIBBPF_API int btf__add_type_tag(struct btf *btf, const char *value, int ref_type_id);
/* func and func_proto construction APIs */
LIBBPF_API int btf__add_func(struct btf *btf, const char *name,
@@ -245,25 +246,86 @@ LIBBPF_API int btf__add_decl_tag(struct btf *btf, const char *value, int ref_typ
int component_idx);
struct btf_dedup_opts {
- unsigned int dedup_table_size;
- bool dont_resolve_fwds;
+ size_t sz;
+ /* optional .BTF.ext info to dedup along the main BTF info */
+ struct btf_ext *btf_ext;
+ /* force hash collisions (used for testing) */
+ bool force_collisions;
+ size_t :0;
};
+#define btf_dedup_opts__last_field force_collisions
+
+LIBBPF_API int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts);
+
+LIBBPF_API int btf__dedup_v0_6_0(struct btf *btf, const struct btf_dedup_opts *opts);
-LIBBPF_API int btf__dedup(struct btf *btf, struct btf_ext *btf_ext,
- const struct btf_dedup_opts *opts);
+LIBBPF_DEPRECATED_SINCE(0, 7, "use btf__dedup() instead")
+LIBBPF_API int btf__dedup_deprecated(struct btf *btf, struct btf_ext *btf_ext, const void *opts);
+#define btf__dedup(...) ___libbpf_overload(___btf_dedup, __VA_ARGS__)
+#define ___btf_dedup3(btf, btf_ext, opts) btf__dedup_deprecated(btf, btf_ext, opts)
+#define ___btf_dedup2(btf, opts) btf__dedup(btf, opts)
struct btf_dump;
struct btf_dump_opts {
- void *ctx;
+ union {
+ size_t sz;
+ void *ctx; /* DEPRECATED: will be gone in v1.0 */
+ };
};
typedef void (*btf_dump_printf_fn_t)(void *ctx, const char *fmt, va_list args);
LIBBPF_API struct btf_dump *btf_dump__new(const struct btf *btf,
- const struct btf_ext *btf_ext,
- const struct btf_dump_opts *opts,
- btf_dump_printf_fn_t printf_fn);
+ btf_dump_printf_fn_t printf_fn,
+ void *ctx,
+ const struct btf_dump_opts *opts);
+
+LIBBPF_API struct btf_dump *btf_dump__new_v0_6_0(const struct btf *btf,
+ btf_dump_printf_fn_t printf_fn,
+ void *ctx,
+ const struct btf_dump_opts *opts);
+
+LIBBPF_API struct btf_dump *btf_dump__new_deprecated(const struct btf *btf,
+ const struct btf_ext *btf_ext,
+ const struct btf_dump_opts *opts,
+ btf_dump_printf_fn_t printf_fn);
+
+/* Choose either btf_dump__new() or btf_dump__new_deprecated() based on the
+ * type of 4th argument. If it's btf_dump's print callback, use deprecated
+ * API; otherwise, choose the new btf_dump__new(). ___libbpf_override()
+ * doesn't work here because both variants have 4 input arguments.
+ *
+ * (void *) casts are necessary to avoid compilation warnings about type
+ * mismatches, because even though __builtin_choose_expr() only ever evaluates
+ * one side the other side still has to satisfy type constraints (this is
+ * compiler implementation limitation which might be lifted eventually,
+ * according to the documentation). So passing struct btf_ext in place of
+ * btf_dump_printf_fn_t would be generating compilation warning. Casting to
+ * void * avoids this issue.
+ *
+ * Also, two type compatibility checks for a function and function pointer are
+ * required because passing function reference into btf_dump__new() as
+ * btf_dump__new(..., my_callback, ...) and as btf_dump__new(...,
+ * &my_callback, ...) (not explicit ampersand in the latter case) actually
+ * differs as far as __builtin_types_compatible_p() is concerned. Thus two
+ * checks are combined to detect callback argument.
+ *
+ * The rest works just like in case of ___libbpf_override() usage with symbol
+ * versioning.
+ *
+ * C++ compilers don't support __builtin_types_compatible_p(), so at least
+ * don't screw up compilation for them and let C++ users pick btf_dump__new
+ * vs btf_dump__new_deprecated explicitly.
+ */
+#ifndef __cplusplus
+#define btf_dump__new(a1, a2, a3, a4) __builtin_choose_expr( \
+ __builtin_types_compatible_p(typeof(a4), btf_dump_printf_fn_t) || \
+ __builtin_types_compatible_p(typeof(a4), void(void *, const char *, va_list)), \
+ btf_dump__new_deprecated((void *)a1, (void *)a2, (void *)a3, (void *)a4), \
+ btf_dump__new((void *)a1, (void *)a2, (void *)a3, (void *)a4))
+#endif
+
LIBBPF_API void btf_dump__free(struct btf_dump *d);
LIBBPF_API int btf_dump__dump_type(struct btf_dump *d, __u32 id);
@@ -403,7 +465,8 @@ static inline bool btf_is_mod(const struct btf_type *t)
return kind == BTF_KIND_VOLATILE ||
kind == BTF_KIND_CONST ||
- kind == BTF_KIND_RESTRICT;
+ kind == BTF_KIND_RESTRICT ||
+ kind == BTF_KIND_TYPE_TAG;
}
static inline bool btf_is_func(const struct btf_type *t)
@@ -436,6 +499,11 @@ static inline bool btf_is_decl_tag(const struct btf_type *t)
return btf_kind(t) == BTF_KIND_DECL_TAG;
}
+static inline bool btf_is_type_tag(const struct btf_type *t)
+{
+ return btf_kind(t) == BTF_KIND_TYPE_TAG;
+}
+
static inline __u8 btf_int_encoding(const struct btf_type *t)
{
return BTF_INT_ENCODING(*(__u32 *)(t + 1));
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index 17db62b5002e..b9a3260c83cb 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -77,9 +77,8 @@ struct btf_dump_data {
struct btf_dump {
const struct btf *btf;
- const struct btf_ext *btf_ext;
btf_dump_printf_fn_t printf_fn;
- struct btf_dump_opts opts;
+ void *cb_ctx;
int ptr_sz;
bool strip_mods;
bool skip_anon_defs;
@@ -138,29 +137,32 @@ static void btf_dump_printf(const struct btf_dump *d, const char *fmt, ...)
va_list args;
va_start(args, fmt);
- d->printf_fn(d->opts.ctx, fmt, args);
+ d->printf_fn(d->cb_ctx, fmt, args);
va_end(args);
}
static int btf_dump_mark_referenced(struct btf_dump *d);
static int btf_dump_resize(struct btf_dump *d);
-struct btf_dump *btf_dump__new(const struct btf *btf,
- const struct btf_ext *btf_ext,
- const struct btf_dump_opts *opts,
- btf_dump_printf_fn_t printf_fn)
+DEFAULT_VERSION(btf_dump__new_v0_6_0, btf_dump__new, LIBBPF_0.6.0)
+struct btf_dump *btf_dump__new_v0_6_0(const struct btf *btf,
+ btf_dump_printf_fn_t printf_fn,
+ void *ctx,
+ const struct btf_dump_opts *opts)
{
struct btf_dump *d;
int err;
+ if (!printf_fn)
+ return libbpf_err_ptr(-EINVAL);
+
d = calloc(1, sizeof(struct btf_dump));
if (!d)
return libbpf_err_ptr(-ENOMEM);
d->btf = btf;
- d->btf_ext = btf_ext;
d->printf_fn = printf_fn;
- d->opts.ctx = opts ? opts->ctx : NULL;
+ d->cb_ctx = ctx;
d->ptr_sz = btf__pointer_size(btf) ? : sizeof(void *);
d->type_names = hashmap__new(str_hash_fn, str_equal_fn, NULL);
@@ -186,6 +188,17 @@ err:
return libbpf_err_ptr(err);
}
+COMPAT_VERSION(btf_dump__new_deprecated, btf_dump__new, LIBBPF_0.0.4)
+struct btf_dump *btf_dump__new_deprecated(const struct btf *btf,
+ const struct btf_ext *btf_ext,
+ const struct btf_dump_opts *opts,
+ btf_dump_printf_fn_t printf_fn)
+{
+ if (!printf_fn)
+ return libbpf_err_ptr(-EINVAL);
+ return btf_dump__new_v0_6_0(btf, printf_fn, opts ? opts->ctx : NULL, opts);
+}
+
static int btf_dump_resize(struct btf_dump *d)
{
int err, last_id = btf__type_cnt(d->btf) - 1;
@@ -317,6 +330,7 @@ static int btf_dump_mark_referenced(struct btf_dump *d)
case BTF_KIND_FUNC:
case BTF_KIND_VAR:
case BTF_KIND_DECL_TAG:
+ case BTF_KIND_TYPE_TAG:
d->type_states[t->type].referenced = 1;
break;
@@ -560,6 +574,7 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)
case BTF_KIND_VOLATILE:
case BTF_KIND_CONST:
case BTF_KIND_RESTRICT:
+ case BTF_KIND_TYPE_TAG:
return btf_dump_order_type(d, t->type, through_ptr);
case BTF_KIND_FUNC_PROTO: {
@@ -734,6 +749,7 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id)
case BTF_KIND_VOLATILE:
case BTF_KIND_CONST:
case BTF_KIND_RESTRICT:
+ case BTF_KIND_TYPE_TAG:
btf_dump_emit_type(d, t->type, cont_id);
break;
case BTF_KIND_ARRAY:
@@ -1154,6 +1170,7 @@ skip_mod:
case BTF_KIND_CONST:
case BTF_KIND_RESTRICT:
case BTF_KIND_FUNC_PROTO:
+ case BTF_KIND_TYPE_TAG:
id = t->type;
break;
case BTF_KIND_ARRAY:
@@ -1322,6 +1339,11 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,
case BTF_KIND_RESTRICT:
btf_dump_printf(d, " restrict");
break;
+ case BTF_KIND_TYPE_TAG:
+ btf_dump_emit_mods(d, decls);
+ name = btf_name_of(d, t->name_off);
+ btf_dump_printf(d, " __attribute__((btf_type_tag(\"%s\")))", name);
+ break;
case BTF_KIND_ARRAY: {
const struct btf_array *a = btf_array(t);
const struct btf_type *next_t;
@@ -2194,7 +2216,7 @@ static int btf_dump_dump_type_data(struct btf_dump *d,
__u8 bits_offset,
__u8 bit_sz)
{
- int size, err;
+ int size, err = 0;
size = btf_dump_type_data_check_overflow(d, t, id, data, bits_offset);
if (size < 0)
@@ -2299,8 +2321,8 @@ int btf_dump__dump_type_data(struct btf_dump *d, __u32 id,
if (!opts->indent_str)
d->typed_dump->indent_str[0] = '\t';
else
- strncat(d->typed_dump->indent_str, opts->indent_str,
- sizeof(d->typed_dump->indent_str) - 1);
+ libbpf_strlcpy(d->typed_dump->indent_str, opts->indent_str,
+ sizeof(d->typed_dump->indent_str));
d->typed_dump->compact = OPTS_GET(opts, compact, false);
d->typed_dump->skip_names = OPTS_GET(opts, skip_names, false);
diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c
index 9934851ccde7..8ecef1088ba2 100644
--- a/tools/lib/bpf/gen_loader.c
+++ b/tools/lib/bpf/gen_loader.c
@@ -371,8 +371,9 @@ int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps)
{
int i;
- if (nr_progs != gen->nr_progs || nr_maps != gen->nr_maps) {
- pr_warn("progs/maps mismatch\n");
+ if (nr_progs < gen->nr_progs || nr_maps != gen->nr_maps) {
+ pr_warn("nr_progs %d/%d nr_maps %d/%d mismatch\n",
+ nr_progs, gen->nr_progs, nr_maps, gen->nr_maps);
gen->error = -EFAULT;
return gen->error;
}
@@ -445,47 +446,32 @@ void bpf_gen__load_btf(struct bpf_gen *gen, const void *btf_raw_data,
}
void bpf_gen__map_create(struct bpf_gen *gen,
- struct bpf_create_map_params *map_attr, int map_idx)
+ enum bpf_map_type map_type,
+ const char *map_name,
+ __u32 key_size, __u32 value_size, __u32 max_entries,
+ struct bpf_map_create_opts *map_attr, int map_idx)
{
- int attr_size = offsetofend(union bpf_attr, btf_vmlinux_value_type_id);
+ int attr_size = offsetofend(union bpf_attr, map_extra);
bool close_inner_map_fd = false;
int map_create_attr, idx;
union bpf_attr attr;
memset(&attr, 0, attr_size);
- attr.map_type = map_attr->map_type;
- attr.key_size = map_attr->key_size;
- attr.value_size = map_attr->value_size;
+ attr.map_type = map_type;
+ attr.key_size = key_size;
+ attr.value_size = value_size;
attr.map_flags = map_attr->map_flags;
attr.map_extra = map_attr->map_extra;
- memcpy(attr.map_name, map_attr->name,
- min((unsigned)strlen(map_attr->name), BPF_OBJ_NAME_LEN - 1));
+ if (map_name)
+ libbpf_strlcpy(attr.map_name, map_name, sizeof(attr.map_name));
attr.numa_node = map_attr->numa_node;
attr.map_ifindex = map_attr->map_ifindex;
- attr.max_entries = map_attr->max_entries;
- switch (attr.map_type) {
- case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
- case BPF_MAP_TYPE_CGROUP_ARRAY:
- case BPF_MAP_TYPE_STACK_TRACE:
- case BPF_MAP_TYPE_ARRAY_OF_MAPS:
- case BPF_MAP_TYPE_HASH_OF_MAPS:
- case BPF_MAP_TYPE_DEVMAP:
- case BPF_MAP_TYPE_DEVMAP_HASH:
- case BPF_MAP_TYPE_CPUMAP:
- case BPF_MAP_TYPE_XSKMAP:
- case BPF_MAP_TYPE_SOCKMAP:
- case BPF_MAP_TYPE_SOCKHASH:
- case BPF_MAP_TYPE_QUEUE:
- case BPF_MAP_TYPE_STACK:
- case BPF_MAP_TYPE_RINGBUF:
- break;
- default:
- attr.btf_key_type_id = map_attr->btf_key_type_id;
- attr.btf_value_type_id = map_attr->btf_value_type_id;
- }
+ attr.max_entries = max_entries;
+ attr.btf_key_type_id = map_attr->btf_key_type_id;
+ attr.btf_value_type_id = map_attr->btf_value_type_id;
pr_debug("gen: map_create: %s idx %d type %d value_type_id %d\n",
- attr.map_name, map_idx, map_attr->map_type, attr.btf_value_type_id);
+ attr.map_name, map_idx, map_type, attr.btf_value_type_id);
map_create_attr = add_data(gen, &attr, attr_size);
if (attr.btf_value_type_id)
@@ -512,7 +498,7 @@ void bpf_gen__map_create(struct bpf_gen *gen,
/* emit MAP_CREATE command */
emit_sys_bpf(gen, BPF_MAP_CREATE, map_create_attr, attr_size);
debug_ret(gen, "map_create %s idx %d type %d value_size %d value_btf_id %d",
- attr.map_name, map_idx, map_attr->map_type, attr.value_size,
+ attr.map_name, map_idx, map_type, value_size,
attr.btf_value_type_id);
emit_check_err(gen);
/* remember map_fd in the stack, if successful */
@@ -597,8 +583,9 @@ void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak,
static struct ksym_desc *get_ksym_desc(struct bpf_gen *gen, struct ksym_relo_desc *relo)
{
struct ksym_desc *kdesc;
+ int i;
- for (int i = 0; i < gen->nr_ksyms; i++) {
+ for (i = 0; i < gen->nr_ksyms; i++) {
if (!strcmp(gen->ksyms[i].name, relo->name)) {
gen->ksyms[i].ref++;
return &gen->ksyms[i];
@@ -700,27 +687,29 @@ static void emit_relo_kfunc_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo
return;
}
kdesc->off = btf_fd_idx;
- /* set a default value for imm */
+ /* jump to success case */
+ emit(gen, BPF_JMP_IMM(BPF_JSGE, BPF_REG_7, 0, 3));
+ /* set value for imm, off as 0 */
emit(gen, BPF_ST_MEM(BPF_W, BPF_REG_8, offsetof(struct bpf_insn, imm), 0));
- /* skip success case store if ret < 0 */
- emit(gen, BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0, 1));
+ emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), 0));
+ /* skip success case for ret < 0 */
+ emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 10));
/* store btf_id into insn[insn_idx].imm */
emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_7, offsetof(struct bpf_insn, imm)));
+ /* obtain fd in BPF_REG_9 */
+ emit(gen, BPF_MOV64_REG(BPF_REG_9, BPF_REG_7));
+ emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_9, 32));
+ /* jump to fd_array store if fd denotes module BTF */
+ emit(gen, BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0, 2));
+ /* set the default value for off */
+ emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), 0));
+ /* skip BTF fd store for vmlinux BTF */
+ emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 4));
/* load fd_array slot pointer */
emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE,
0, 0, 0, blob_fd_array_off(gen, btf_fd_idx)));
- /* skip store of BTF fd if ret < 0 */
- emit(gen, BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0, 3));
/* store BTF fd in slot */
- emit(gen, BPF_MOV64_REG(BPF_REG_9, BPF_REG_7));
- emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_9, 32));
emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_9, 0));
- /* set a default value for off */
- emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), 0));
- /* skip insn->off store if ret < 0 */
- emit(gen, BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0, 2));
- /* skip if vmlinux BTF */
- emit(gen, BPF_JMP_IMM(BPF_JEQ, BPF_REG_9, 0, 1));
/* store index into insn[insn_idx].off */
emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), btf_fd_idx));
log:
@@ -819,9 +808,8 @@ static void emit_relo_ksym_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo,
kdesc->insn + offsetof(struct bpf_insn, imm));
move_blob2blob(gen, insn + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm), 4,
kdesc->insn + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm));
- emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_8, offsetof(struct bpf_insn, imm)));
- /* jump over src_reg adjustment if imm is not 0 */
- emit(gen, BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0, 3));
+ /* jump over src_reg adjustment if imm is not 0, reuse BPF_REG_0 from move_blob2blob */
+ emit(gen, BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3));
goto clear_src_reg;
}
/* remember insn offset, so we can copy BTF ID and FD later */
@@ -829,17 +817,20 @@ static void emit_relo_ksym_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo,
emit_bpf_find_by_name_kind(gen, relo);
if (!relo->is_weak)
emit_check_err(gen);
- /* set default values as 0 */
+ /* jump to success case */
+ emit(gen, BPF_JMP_IMM(BPF_JSGE, BPF_REG_7, 0, 3));
+ /* set values for insn[insn_idx].imm, insn[insn_idx + 1].imm as 0 */
emit(gen, BPF_ST_MEM(BPF_W, BPF_REG_8, offsetof(struct bpf_insn, imm), 0));
emit(gen, BPF_ST_MEM(BPF_W, BPF_REG_8, sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm), 0));
- /* skip success case stores if ret < 0 */
- emit(gen, BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0, 4));
+ /* skip success case for ret < 0 */
+ emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 4));
/* store btf_id into insn[insn_idx].imm */
emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_7, offsetof(struct bpf_insn, imm)));
/* store btf_obj_fd into insn[insn_idx + 1].imm */
emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_7, 32));
emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_7,
sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm)));
+ /* skip src_reg adjustment */
emit(gen, BPF_JMP_IMM(BPF_JSGE, BPF_REG_7, 0, 3));
clear_src_reg:
/* clear bpf_object__relocate_data's src_reg assignment, otherwise we get a verifier failure */
@@ -851,6 +842,22 @@ clear_src_reg:
emit_ksym_relo_log(gen, relo, kdesc->ref);
}
+void bpf_gen__record_relo_core(struct bpf_gen *gen,
+ const struct bpf_core_relo *core_relo)
+{
+ struct bpf_core_relo *relos;
+
+ relos = libbpf_reallocarray(gen->core_relos, gen->core_relo_cnt + 1, sizeof(*relos));
+ if (!relos) {
+ gen->error = -ENOMEM;
+ return;
+ }
+ gen->core_relos = relos;
+ relos += gen->core_relo_cnt;
+ memcpy(relos, core_relo, sizeof(*relos));
+ gen->core_relo_cnt++;
+}
+
static void emit_relo(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insns)
{
int insn;
@@ -883,6 +890,15 @@ static void emit_relos(struct bpf_gen *gen, int insns)
emit_relo(gen, gen->relos + i, insns);
}
+static void cleanup_core_relo(struct bpf_gen *gen)
+{
+ if (!gen->core_relo_cnt)
+ return;
+ free(gen->core_relos);
+ gen->core_relo_cnt = 0;
+ gen->core_relos = NULL;
+}
+
static void cleanup_relos(struct bpf_gen *gen, int insns)
{
int i, insn;
@@ -910,30 +926,32 @@ static void cleanup_relos(struct bpf_gen *gen, int insns)
gen->relo_cnt = 0;
gen->relos = NULL;
}
+ cleanup_core_relo(gen);
}
void bpf_gen__prog_load(struct bpf_gen *gen,
- struct bpf_prog_load_params *load_attr, int prog_idx)
+ enum bpf_prog_type prog_type, const char *prog_name,
+ const char *license, struct bpf_insn *insns, size_t insn_cnt,
+ struct bpf_prog_load_opts *load_attr, int prog_idx)
{
- int attr_size = offsetofend(union bpf_attr, fd_array);
- int prog_load_attr, license, insns, func_info, line_info;
+ int prog_load_attr, license_off, insns_off, func_info, line_info, core_relos;
+ int attr_size = offsetofend(union bpf_attr, core_relo_rec_size);
union bpf_attr attr;
memset(&attr, 0, attr_size);
- pr_debug("gen: prog_load: type %d insns_cnt %zd\n",
- load_attr->prog_type, load_attr->insn_cnt);
+ pr_debug("gen: prog_load: type %d insns_cnt %zd progi_idx %d\n",
+ prog_type, insn_cnt, prog_idx);
/* add license string to blob of bytes */
- license = add_data(gen, load_attr->license, strlen(load_attr->license) + 1);
+ license_off = add_data(gen, license, strlen(license) + 1);
/* add insns to blob of bytes */
- insns = add_data(gen, load_attr->insns,
- load_attr->insn_cnt * sizeof(struct bpf_insn));
+ insns_off = add_data(gen, insns, insn_cnt * sizeof(struct bpf_insn));
- attr.prog_type = load_attr->prog_type;
+ attr.prog_type = prog_type;
attr.expected_attach_type = load_attr->expected_attach_type;
attr.attach_btf_id = load_attr->attach_btf_id;
attr.prog_ifindex = load_attr->prog_ifindex;
attr.kern_version = 0;
- attr.insn_cnt = (__u32)load_attr->insn_cnt;
+ attr.insn_cnt = (__u32)insn_cnt;
attr.prog_flags = load_attr->prog_flags;
attr.func_info_rec_size = load_attr->func_info_rec_size;
@@ -946,15 +964,19 @@ void bpf_gen__prog_load(struct bpf_gen *gen,
line_info = add_data(gen, load_attr->line_info,
attr.line_info_cnt * attr.line_info_rec_size);
- memcpy(attr.prog_name, load_attr->name,
- min((unsigned)strlen(load_attr->name), BPF_OBJ_NAME_LEN - 1));
+ attr.core_relo_rec_size = sizeof(struct bpf_core_relo);
+ attr.core_relo_cnt = gen->core_relo_cnt;
+ core_relos = add_data(gen, gen->core_relos,
+ attr.core_relo_cnt * attr.core_relo_rec_size);
+
+ libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name));
prog_load_attr = add_data(gen, &attr, attr_size);
/* populate union bpf_attr with a pointer to license */
- emit_rel_store(gen, attr_field(prog_load_attr, license), license);
+ emit_rel_store(gen, attr_field(prog_load_attr, license), license_off);
/* populate union bpf_attr with a pointer to instructions */
- emit_rel_store(gen, attr_field(prog_load_attr, insns), insns);
+ emit_rel_store(gen, attr_field(prog_load_attr, insns), insns_off);
/* populate union bpf_attr with a pointer to func_info */
emit_rel_store(gen, attr_field(prog_load_attr, func_info), func_info);
@@ -962,6 +984,9 @@ void bpf_gen__prog_load(struct bpf_gen *gen,
/* populate union bpf_attr with a pointer to line_info */
emit_rel_store(gen, attr_field(prog_load_attr, line_info), line_info);
+ /* populate union bpf_attr with a pointer to core_relos */
+ emit_rel_store(gen, attr_field(prog_load_attr, core_relos), core_relos);
+
/* populate union bpf_attr fd_array with a pointer to data where map_fds are saved */
emit_rel_store(gen, attr_field(prog_load_attr, fd_array), gen->fd_array);
@@ -986,15 +1011,17 @@ void bpf_gen__prog_load(struct bpf_gen *gen,
emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_7,
offsetof(union bpf_attr, attach_btf_obj_fd)));
}
- emit_relos(gen, insns);
+ emit_relos(gen, insns_off);
/* emit PROG_LOAD command */
emit_sys_bpf(gen, BPF_PROG_LOAD, prog_load_attr, attr_size);
debug_ret(gen, "prog_load %s insn_cnt %d", attr.prog_name, attr.insn_cnt);
/* successful or not, close btf module FDs used in extern ksyms and attach_btf_obj_fd */
- cleanup_relos(gen, insns);
- if (gen->attach_kind)
+ cleanup_relos(gen, insns_off);
+ if (gen->attach_kind) {
emit_sys_close_blob(gen,
attr_field(prog_load_attr, attach_btf_obj_fd));
+ gen->attach_kind = 0;
+ }
emit_check_err(gen);
/* remember prog_fd in the stack, if successful */
emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7,
@@ -1040,6 +1067,33 @@ void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *pvalue,
emit_check_err(gen);
}
+void bpf_gen__populate_outer_map(struct bpf_gen *gen, int outer_map_idx, int slot,
+ int inner_map_idx)
+{
+ int attr_size = offsetofend(union bpf_attr, flags);
+ int map_update_attr, key;
+ union bpf_attr attr;
+
+ memset(&attr, 0, attr_size);
+ pr_debug("gen: populate_outer_map: outer %d key %d inner %d\n",
+ outer_map_idx, slot, inner_map_idx);
+
+ key = add_data(gen, &slot, sizeof(slot));
+
+ map_update_attr = add_data(gen, &attr, attr_size);
+ move_blob2blob(gen, attr_field(map_update_attr, map_fd), 4,
+ blob_fd_array_off(gen, outer_map_idx));
+ emit_rel_store(gen, attr_field(map_update_attr, key), key);
+ emit_rel_store(gen, attr_field(map_update_attr, value),
+ blob_fd_array_off(gen, inner_map_idx));
+
+ /* emit MAP_UPDATE_ELEM command */
+ emit_sys_bpf(gen, BPF_MAP_UPDATE_ELEM, map_update_attr, attr_size);
+ debug_ret(gen, "populate_outer_map outer %d key %d inner %d",
+ outer_map_idx, slot, inner_map_idx);
+ emit_check_err(gen);
+}
+
void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx)
{
int attr_size = offsetofend(union bpf_attr, map_fd);
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 7c74342bb668..7f10dd501a52 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -168,39 +168,24 @@ int libbpf_set_strict_mode(enum libbpf_strict_mode mode)
return 0;
}
-enum kern_feature_id {
- /* v4.14: kernel support for program & map names. */
- FEAT_PROG_NAME,
- /* v5.2: kernel support for global data sections. */
- FEAT_GLOBAL_DATA,
- /* BTF support */
- FEAT_BTF,
- /* BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO support */
- FEAT_BTF_FUNC,
- /* BTF_KIND_VAR and BTF_KIND_DATASEC support */
- FEAT_BTF_DATASEC,
- /* BTF_FUNC_GLOBAL is supported */
- FEAT_BTF_GLOBAL_FUNC,
- /* BPF_F_MMAPABLE is supported for arrays */
- FEAT_ARRAY_MMAP,
- /* kernel support for expected_attach_type in BPF_PROG_LOAD */
- FEAT_EXP_ATTACH_TYPE,
- /* bpf_probe_read_{kernel,user}[_str] helpers */
- FEAT_PROBE_READ_KERN,
- /* BPF_PROG_BIND_MAP is supported */
- FEAT_PROG_BIND_MAP,
- /* Kernel support for module BTFs */
- FEAT_MODULE_BTF,
- /* BTF_KIND_FLOAT support */
- FEAT_BTF_FLOAT,
- /* BPF perf link support */
- FEAT_PERF_LINK,
- /* BTF_KIND_DECL_TAG support */
- FEAT_BTF_DECL_TAG,
- __FEAT_CNT,
-};
+__u32 libbpf_major_version(void)
+{
+ return LIBBPF_MAJOR_VERSION;
+}
-static bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id);
+__u32 libbpf_minor_version(void)
+{
+ return LIBBPF_MINOR_VERSION;
+}
+
+const char *libbpf_version_string(void)
+{
+#define __S(X) #X
+#define _S(X) __S(X)
+ return "v" _S(LIBBPF_MAJOR_VERSION) "." _S(LIBBPF_MINOR_VERSION);
+#undef _S
+#undef __S
+}
enum reloc_type {
RELO_LD64,
@@ -209,19 +194,25 @@ enum reloc_type {
RELO_EXTERN_VAR,
RELO_EXTERN_FUNC,
RELO_SUBPROG_ADDR,
+ RELO_CORE,
};
struct reloc_desc {
enum reloc_type type;
int insn_idx;
- int map_idx;
- int sym_off;
+ union {
+ const struct bpf_core_relo *core_relo; /* used when type == RELO_CORE */
+ struct {
+ int map_idx;
+ int sym_off;
+ };
+ };
};
struct bpf_sec_def;
typedef int (*init_fn_t)(struct bpf_program *prog, long cookie);
-typedef int (*preload_fn_t)(struct bpf_program *prog, struct bpf_prog_load_params *attr, long cookie);
+typedef int (*preload_fn_t)(struct bpf_program *prog, struct bpf_prog_load_opts *opts, long cookie);
typedef struct bpf_link *(*attach_fn_t)(const struct bpf_program *prog, long cookie);
/* stored as sec_def->cookie for all libbpf-supported SEC()s */
@@ -304,7 +295,11 @@ struct bpf_program {
struct reloc_desc *reloc_desc;
int nr_reloc;
- int log_level;
+
+ /* BPF verifier log settings */
+ char *log_buf;
+ size_t log_size;
+ __u32 log_level;
struct {
int nr;
@@ -400,6 +395,7 @@ struct bpf_map {
char *pin_path;
bool pinned;
bool reused;
+ bool skipped;
__u64 map_extra;
};
@@ -546,6 +542,11 @@ struct bpf_object {
size_t btf_module_cnt;
size_t btf_module_cap;
+ /* optional log settings passed to BPF_BTF_LOAD and BPF_PROG_LOAD commands */
+ char *log_buf;
+ size_t log_size;
+ __u32 log_level;
+
void *priv;
bpf_object_clear_priv_t clear_priv;
@@ -681,6 +682,9 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
prog->instances.fds = NULL;
prog->instances.nr = -1;
+ /* inherit object's log_level */
+ prog->log_level = obj->log_level;
+
prog->sec_name = strdup(sec_name);
if (!prog->sec_name)
goto errout;
@@ -791,11 +795,36 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
return 0;
}
-static __u32 get_kernel_version(void)
+__u32 get_kernel_version(void)
{
+ /* On Ubuntu LINUX_VERSION_CODE doesn't correspond to info.release,
+ * but Ubuntu provides /proc/version_signature file, as described at
+ * https://ubuntu.com/kernel, with an example contents below, which we
+ * can use to get a proper LINUX_VERSION_CODE.
+ *
+ * Ubuntu 5.4.0-12.15-generic 5.4.8
+ *
+ * In the above, 5.4.8 is what kernel is actually expecting, while
+ * uname() call will return 5.4.0 in info.release.
+ */
+ const char *ubuntu_kver_file = "/proc/version_signature";
__u32 major, minor, patch;
struct utsname info;
+ if (access(ubuntu_kver_file, R_OK) == 0) {
+ FILE *f;
+
+ f = fopen(ubuntu_kver_file, "r");
+ if (f) {
+ if (fscanf(f, "%*s %*s %d.%d.%d\n", &major, &minor, &patch) == 3) {
+ fclose(f);
+ return KERNEL_VERSION(major, minor, patch);
+ }
+ fclose(f);
+ }
+ /* something went wrong, fall back to uname() approach */
+ }
+
uname(&info);
if (sscanf(info.release, "%u.%u.%u", &major, &minor, &patch) != 3)
return 0;
@@ -1161,12 +1190,10 @@ static struct bpf_object *bpf_object__new(const char *path,
strcpy(obj->path, path);
if (obj_name) {
- strncpy(obj->name, obj_name, sizeof(obj->name) - 1);
- obj->name[sizeof(obj->name) - 1] = 0;
+ libbpf_strlcpy(obj->name, obj_name, sizeof(obj->name));
} else {
/* Using basename() GNU version which doesn't modify arg. */
- strncpy(obj->name, basename((void *)path),
- sizeof(obj->name) - 1);
+ libbpf_strlcpy(obj->name, basename((void *)path), sizeof(obj->name));
end = strchr(obj->name, '.');
if (end)
*end = 0;
@@ -1318,7 +1345,10 @@ static int bpf_object__check_endianness(struct bpf_object *obj)
static int
bpf_object__init_license(struct bpf_object *obj, void *data, size_t size)
{
- memcpy(obj->license, data, min(size, sizeof(obj->license) - 1));
+ /* libbpf_strlcpy() only copies first N - 1 bytes, so size + 1 won't
+ * go over allowed ELF data section buffer
+ */
+ libbpf_strlcpy(obj->license, data, min(size + 1, sizeof(obj->license)));
pr_debug("license of %s is %s\n", obj->path, obj->license);
return 0;
}
@@ -2076,6 +2106,7 @@ static const char *__btf_kind_str(__u16 kind)
case BTF_KIND_DATASEC: return "datasec";
case BTF_KIND_FLOAT: return "float";
case BTF_KIND_DECL_TAG: return "decl_tag";
+ case BTF_KIND_TYPE_TAG: return "type_tag";
default: return "unknown";
}
}
@@ -2255,6 +2286,9 @@ int parse_btf_map_def(const char *map_name, struct btf *btf,
map_def->parts |= MAP_DEF_VALUE_SIZE | MAP_DEF_VALUE_TYPE;
}
else if (strcmp(name, "values") == 0) {
+ bool is_map_in_map = bpf_map_type__is_map_in_map(map_def->map_type);
+ bool is_prog_array = map_def->map_type == BPF_MAP_TYPE_PROG_ARRAY;
+ const char *desc = is_map_in_map ? "map-in-map inner" : "prog-array value";
char inner_map_name[128];
int err;
@@ -2268,8 +2302,8 @@ int parse_btf_map_def(const char *map_name, struct btf *btf,
map_name, name);
return -EINVAL;
}
- if (!bpf_map_type__is_map_in_map(map_def->map_type)) {
- pr_warn("map '%s': should be map-in-map.\n",
+ if (!is_map_in_map && !is_prog_array) {
+ pr_warn("map '%s': should be map-in-map or prog-array.\n",
map_name);
return -ENOTSUP;
}
@@ -2281,22 +2315,30 @@ int parse_btf_map_def(const char *map_name, struct btf *btf,
map_def->value_size = 4;
t = btf__type_by_id(btf, m->type);
if (!t) {
- pr_warn("map '%s': map-in-map inner type [%d] not found.\n",
- map_name, m->type);
+ pr_warn("map '%s': %s type [%d] not found.\n",
+ map_name, desc, m->type);
return -EINVAL;
}
if (!btf_is_array(t) || btf_array(t)->nelems) {
- pr_warn("map '%s': map-in-map inner spec is not a zero-sized array.\n",
- map_name);
+ pr_warn("map '%s': %s spec is not a zero-sized array.\n",
+ map_name, desc);
return -EINVAL;
}
t = skip_mods_and_typedefs(btf, btf_array(t)->type, NULL);
if (!btf_is_ptr(t)) {
- pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
- map_name, btf_kind_str(t));
+ pr_warn("map '%s': %s def is of unexpected kind %s.\n",
+ map_name, desc, btf_kind_str(t));
return -EINVAL;
}
t = skip_mods_and_typedefs(btf, t->type, NULL);
+ if (is_prog_array) {
+ if (!btf_is_func_proto(t)) {
+ pr_warn("map '%s': prog-array value def is of unexpected kind %s.\n",
+ map_name, btf_kind_str(t));
+ return -EINVAL;
+ }
+ continue;
+ }
if (!btf_is_struct(t)) {
pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
map_name, btf_kind_str(t));
@@ -2588,8 +2630,10 @@ static bool btf_needs_sanitization(struct bpf_object *obj)
bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT);
bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG);
+ bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG);
- return !has_func || !has_datasec || !has_func_global || !has_float || !has_decl_tag;
+ return !has_func || !has_datasec || !has_func_global || !has_float ||
+ !has_decl_tag || !has_type_tag;
}
static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
@@ -2599,6 +2643,7 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT);
bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG);
+ bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG);
struct btf_type *t;
int i, j, vlen;
@@ -2657,6 +2702,10 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
*/
t->name_off = 0;
t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 0);
+ } else if (!has_type_tag && btf_is_type_tag(t)) {
+ /* replace TYPE_TAG with a CONST */
+ t->name_off = 0;
+ t->info = BTF_INFO_ENC(BTF_KIND_CONST, 0, 0);
}
}
}
@@ -2752,13 +2801,12 @@ static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf,
for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) {
t_var = btf__type_by_id(btf, vsi->type);
- var = btf_var(t_var);
-
- if (!btf_is_var(t_var)) {
+ if (!t_var || !btf_is_var(t_var)) {
pr_debug("Non-VAR type seen in section %s\n", name);
return -EINVAL;
}
+ var = btf_var(t_var);
if (var->linkage == BTF_VAR_STATIC)
continue;
@@ -2972,7 +3020,9 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
*/
btf__set_fd(kern_btf, 0);
} else {
- err = btf__load_into_kernel(kern_btf);
+ /* currently BPF_BTF_LOAD only supports log_level 1 */
+ err = btf_load_into_kernel(kern_btf, obj->log_buf, obj->log_size,
+ obj->log_level ? 1 : 0);
}
if (sanitize) {
if (!err) {
@@ -3191,11 +3241,11 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
Elf_Scn *scn;
Elf64_Shdr *sh;
- /* ELF section indices are 1-based, so allocate +1 element to keep
- * indexing simple. Also include 0th invalid section into sec_cnt for
- * simpler and more traditional iteration logic.
+ /* ELF section indices are 0-based, but sec #0 is special "invalid"
+ * section. e_shnum does include sec #0, so e_shnum is the necessary
+ * size of an array to keep all the sections.
*/
- obj->efile.sec_cnt = 1 + obj->efile.ehdr->e_shnum;
+ obj->efile.sec_cnt = obj->efile.ehdr->e_shnum;
obj->efile.secs = calloc(obj->efile.sec_cnt, sizeof(*obj->efile.secs));
if (!obj->efile.secs)
return -ENOMEM;
@@ -3271,8 +3321,12 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
} else if (strcmp(name, MAPS_ELF_SEC) == 0) {
obj->efile.btf_maps_shndx = idx;
} else if (strcmp(name, BTF_ELF_SEC) == 0) {
+ if (sh->sh_type != SHT_PROGBITS)
+ return -LIBBPF_ERRNO__FORMAT;
btf_data = data;
} else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) {
+ if (sh->sh_type != SHT_PROGBITS)
+ return -LIBBPF_ERRNO__FORMAT;
btf_ext_data = data;
} else if (sh->sh_type == SHT_SYMTAB) {
/* already processed during the first pass above */
@@ -3303,6 +3357,10 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
} else if (sh->sh_type == SHT_REL) {
int targ_sec_idx = sh->sh_info; /* points to other section */
+ if (sh->sh_entsize != sizeof(Elf64_Rel) ||
+ targ_sec_idx >= obj->efile.sec_cnt)
+ return -LIBBPF_ERRNO__FORMAT;
+
/* Only do relo for section with exec instructions */
if (!section_have_execinstr(obj, targ_sec_idx) &&
strcmp(name, ".rel" STRUCT_OPS_SEC) &&
@@ -3333,7 +3391,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
/* sort BPF programs by section name and in-section instruction offset
* for faster search */
- qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs);
+ if (obj->nr_programs)
+ qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs);
return bpf_object__init_btf(obj, btf_data, btf_ext_data);
}
@@ -3555,7 +3614,7 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
scn = elf_sec_by_idx(obj, obj->efile.symbols_shndx);
sh = elf_sec_hdr(obj, scn);
- if (!sh)
+ if (!sh || sh->sh_entsize != sizeof(Elf64_Sym))
return -LIBBPF_ERRNO__FORMAT;
dummy_var_btf_id = add_dummy_ksym_var(obj->btf);
@@ -4022,7 +4081,7 @@ static int
bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Data *data)
{
const char *relo_sec_name, *sec_name;
- size_t sec_idx = shdr->sh_info;
+ size_t sec_idx = shdr->sh_info, sym_idx;
struct bpf_program *prog;
struct reloc_desc *relos;
int err, i, nrels;
@@ -4033,6 +4092,9 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Dat
Elf64_Sym *sym;
Elf64_Rel *rel;
+ if (sec_idx >= obj->efile.sec_cnt)
+ return -EINVAL;
+
scn = elf_sec_by_idx(obj, sec_idx);
scn_data = elf_sec_data(obj, scn);
@@ -4052,16 +4114,23 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Dat
return -LIBBPF_ERRNO__FORMAT;
}
- sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info));
+ sym_idx = ELF64_R_SYM(rel->r_info);
+ sym = elf_sym_by_idx(obj, sym_idx);
if (!sym) {
- pr_warn("sec '%s': symbol 0x%zx not found for relo #%d\n",
- relo_sec_name, (size_t)ELF64_R_SYM(rel->r_info), i);
+ pr_warn("sec '%s': symbol #%zu not found for relo #%d\n",
+ relo_sec_name, sym_idx, i);
+ return -LIBBPF_ERRNO__FORMAT;
+ }
+
+ if (sym->st_shndx >= obj->efile.sec_cnt) {
+ pr_warn("sec '%s': corrupted symbol #%zu pointing to invalid section #%zu for relo #%d\n",
+ relo_sec_name, sym_idx, (size_t)sym->st_shndx, i);
return -LIBBPF_ERRNO__FORMAT;
}
if (rel->r_offset % BPF_INSN_SZ || rel->r_offset >= scn_data->d_size) {
pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n",
- relo_sec_name, (size_t)ELF64_R_SYM(rel->r_info), i);
+ relo_sec_name, (size_t)rel->r_offset, i);
return -LIBBPF_ERRNO__FORMAT;
}
@@ -4265,30 +4334,24 @@ int bpf_map__resize(struct bpf_map *map, __u32 max_entries)
static int
bpf_object__probe_loading(struct bpf_object *obj)
{
- struct bpf_load_program_attr attr;
char *cp, errmsg[STRERR_BUFSIZE];
struct bpf_insn insns[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
};
- int ret;
+ int ret, insn_cnt = ARRAY_SIZE(insns);
if (obj->gen_loader)
return 0;
- /* make sure basic loading works */
-
- memset(&attr, 0, sizeof(attr));
- attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
- attr.insns = insns;
- attr.insns_cnt = ARRAY_SIZE(insns);
- attr.license = "GPL";
+ ret = bump_rlimit_memlock();
+ if (ret)
+ pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %d), you might need to do it explicitly!\n", ret);
- ret = bpf_load_program_xattr(&attr, NULL, 0);
- if (ret < 0) {
- attr.prog_type = BPF_PROG_TYPE_TRACEPOINT;
- ret = bpf_load_program_xattr(&attr, NULL, 0);
- }
+ /* make sure basic loading works */
+ ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL);
+ if (ret < 0)
+ ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL);
if (ret < 0) {
ret = errno;
cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
@@ -4312,29 +4375,19 @@ static int probe_fd(int fd)
static int probe_kern_prog_name(void)
{
- struct bpf_load_program_attr attr;
struct bpf_insn insns[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
};
- int ret;
+ int ret, insn_cnt = ARRAY_SIZE(insns);
/* make sure loading with name works */
-
- memset(&attr, 0, sizeof(attr));
- attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
- attr.insns = insns;
- attr.insns_cnt = ARRAY_SIZE(insns);
- attr.license = "GPL";
- attr.name = "test";
- ret = bpf_load_program_xattr(&attr, NULL, 0);
+ ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "test", "GPL", insns, insn_cnt, NULL);
return probe_fd(ret);
}
static int probe_kern_global_data(void)
{
- struct bpf_load_program_attr prg_attr;
- struct bpf_create_map_attr map_attr;
char *cp, errmsg[STRERR_BUFSIZE];
struct bpf_insn insns[] = {
BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16),
@@ -4342,15 +4395,9 @@ static int probe_kern_global_data(void)
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
};
- int ret, map;
-
- memset(&map_attr, 0, sizeof(map_attr));
- map_attr.map_type = BPF_MAP_TYPE_ARRAY;
- map_attr.key_size = sizeof(int);
- map_attr.value_size = 32;
- map_attr.max_entries = 1;
+ int ret, map, insn_cnt = ARRAY_SIZE(insns);
- map = bpf_create_map_xattr(&map_attr);
+ map = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), 32, 1, NULL);
if (map < 0) {
ret = -errno;
cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
@@ -4361,13 +4408,7 @@ static int probe_kern_global_data(void)
insns[0].imm = map;
- memset(&prg_attr, 0, sizeof(prg_attr));
- prg_attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
- prg_attr.insns = insns;
- prg_attr.insns_cnt = ARRAY_SIZE(insns);
- prg_attr.license = "GPL";
-
- ret = bpf_load_program_xattr(&prg_attr, NULL, 0);
+ ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL);
close(map);
return probe_fd(ret);
}
@@ -4468,45 +4509,51 @@ static int probe_kern_btf_decl_tag(void)
strs, sizeof(strs)));
}
-static int probe_kern_array_mmap(void)
+static int probe_kern_btf_type_tag(void)
{
- struct bpf_create_map_attr attr = {
- .map_type = BPF_MAP_TYPE_ARRAY,
- .map_flags = BPF_F_MMAPABLE,
- .key_size = sizeof(int),
- .value_size = sizeof(int),
- .max_entries = 1,
+ static const char strs[] = "\0tag";
+ __u32 types[] = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* attr */
+ BTF_TYPE_TYPE_TAG_ENC(1, 1), /* [2] */
+ /* ptr */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), /* [3] */
};
- return probe_fd(bpf_create_map_xattr(&attr));
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs)));
+}
+
+static int probe_kern_array_mmap(void)
+{
+ LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_MMAPABLE);
+ int fd;
+
+ fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), sizeof(int), 1, &opts);
+ return probe_fd(fd);
}
static int probe_kern_exp_attach_type(void)
{
- struct bpf_load_program_attr attr;
+ LIBBPF_OPTS(bpf_prog_load_opts, opts, .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE);
struct bpf_insn insns[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
};
+ int fd, insn_cnt = ARRAY_SIZE(insns);
- memset(&attr, 0, sizeof(attr));
/* use any valid combination of program type and (optional)
* non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS)
* to see if kernel supports expected_attach_type field for
* BPF_PROG_LOAD command
*/
- attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
- attr.expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE;
- attr.insns = insns;
- attr.insns_cnt = ARRAY_SIZE(insns);
- attr.license = "GPL";
-
- return probe_fd(bpf_load_program_xattr(&attr, NULL, 0));
+ fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, insn_cnt, &opts);
+ return probe_fd(fd);
}
static int probe_kern_probe_read_kernel(void)
{
- struct bpf_load_program_attr attr;
struct bpf_insn insns[] = {
BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), /* r1 = r10 (fp) */
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), /* r1 += -8 */
@@ -4515,34 +4562,22 @@ static int probe_kern_probe_read_kernel(void)
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
};
+ int fd, insn_cnt = ARRAY_SIZE(insns);
- memset(&attr, 0, sizeof(attr));
- attr.prog_type = BPF_PROG_TYPE_KPROBE;
- attr.insns = insns;
- attr.insns_cnt = ARRAY_SIZE(insns);
- attr.license = "GPL";
-
- return probe_fd(bpf_load_program_xattr(&attr, NULL, 0));
+ fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL);
+ return probe_fd(fd);
}
static int probe_prog_bind_map(void)
{
- struct bpf_load_program_attr prg_attr;
- struct bpf_create_map_attr map_attr;
char *cp, errmsg[STRERR_BUFSIZE];
struct bpf_insn insns[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
};
- int ret, map, prog;
-
- memset(&map_attr, 0, sizeof(map_attr));
- map_attr.map_type = BPF_MAP_TYPE_ARRAY;
- map_attr.key_size = sizeof(int);
- map_attr.value_size = 32;
- map_attr.max_entries = 1;
+ int ret, map, prog, insn_cnt = ARRAY_SIZE(insns);
- map = bpf_create_map_xattr(&map_attr);
+ map = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), 32, 1, NULL);
if (map < 0) {
ret = -errno;
cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
@@ -4551,13 +4586,7 @@ static int probe_prog_bind_map(void)
return ret;
}
- memset(&prg_attr, 0, sizeof(prg_attr));
- prg_attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
- prg_attr.insns = insns;
- prg_attr.insns_cnt = ARRAY_SIZE(insns);
- prg_attr.license = "GPL";
-
- prog = bpf_load_program_xattr(&prg_attr, NULL, 0);
+ prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL);
if (prog < 0) {
close(map);
return 0;
@@ -4602,19 +4631,14 @@ static int probe_module_btf(void)
static int probe_perf_link(void)
{
- struct bpf_load_program_attr attr;
struct bpf_insn insns[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
};
int prog_fd, link_fd, err;
- memset(&attr, 0, sizeof(attr));
- attr.prog_type = BPF_PROG_TYPE_TRACEPOINT;
- attr.insns = insns;
- attr.insns_cnt = ARRAY_SIZE(insns);
- attr.license = "GPL";
- prog_fd = bpf_load_program_xattr(&attr, NULL, 0);
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL",
+ insns, ARRAY_SIZE(insns), NULL);
if (prog_fd < 0)
return -errno;
@@ -4687,14 +4711,20 @@ static struct kern_feature_desc {
[FEAT_BTF_DECL_TAG] = {
"BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag,
},
+ [FEAT_BTF_TYPE_TAG] = {
+ "BTF_KIND_TYPE_TAG support", probe_kern_btf_type_tag,
+ },
+ [FEAT_MEMCG_ACCOUNT] = {
+ "memcg-based memory accounting", probe_memcg_account,
+ },
};
-static bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
+bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
{
struct kern_feature_desc *feat = &feature_probes[feat_id];
int ret;
- if (obj->gen_loader)
+ if (obj && obj->gen_loader)
/* To generate loader program assume the latest kernel
* to avoid doing extra prog_load, map_create syscalls.
*/
@@ -4821,19 +4851,16 @@ static void bpf_map__destroy(struct bpf_map *map);
static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner)
{
- struct bpf_create_map_params create_attr;
+ LIBBPF_OPTS(bpf_map_create_opts, create_attr);
struct bpf_map_def *def = &map->def;
+ const char *map_name = NULL;
+ __u32 max_entries;
int err = 0;
- memset(&create_attr, 0, sizeof(create_attr));
-
if (kernel_supports(obj, FEAT_PROG_NAME))
- create_attr.name = map->name;
+ map_name = map->name;
create_attr.map_ifindex = map->map_ifindex;
- create_attr.map_type = def->type;
create_attr.map_flags = def->map_flags;
- create_attr.key_size = def->key_size;
- create_attr.value_size = def->value_size;
create_attr.numa_node = map->numa_node;
create_attr.map_extra = map->map_extra;
@@ -4847,18 +4874,14 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
return nr_cpus;
}
pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus);
- create_attr.max_entries = nr_cpus;
+ max_entries = nr_cpus;
} else {
- create_attr.max_entries = def->max_entries;
+ max_entries = def->max_entries;
}
if (bpf_map__is_struct_ops(map))
- create_attr.btf_vmlinux_value_type_id =
- map->btf_vmlinux_value_type_id;
+ create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id;
- create_attr.btf_fd = 0;
- create_attr.btf_key_type_id = 0;
- create_attr.btf_value_type_id = 0;
if (obj->btf && btf__fd(obj->btf) >= 0 && !bpf_map_find_btf_info(obj, map)) {
create_attr.btf_fd = btf__fd(obj->btf);
create_attr.btf_key_type_id = map->btf_key_type_id;
@@ -4904,13 +4927,17 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
}
if (obj->gen_loader) {
- bpf_gen__map_create(obj->gen_loader, &create_attr, is_inner ? -1 : map - obj->maps);
+ bpf_gen__map_create(obj->gen_loader, def->type, map_name,
+ def->key_size, def->value_size, max_entries,
+ &create_attr, is_inner ? -1 : map - obj->maps);
/* Pretend to have valid FD to pass various fd >= 0 checks.
* This fd == 0 will not be used with any syscall and will be reset to -1 eventually.
*/
map->fd = 0;
} else {
- map->fd = libbpf__bpf_create_map_xattr(&create_attr);
+ map->fd = bpf_map_create(def->type, map_name,
+ def->key_size, def->value_size,
+ max_entries, &create_attr);
}
if (map->fd < 0 && (create_attr.btf_key_type_id ||
create_attr.btf_value_type_id)) {
@@ -4925,7 +4952,9 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
create_attr.btf_value_type_id = 0;
map->btf_key_type_id = 0;
map->btf_value_type_id = 0;
- map->fd = libbpf__bpf_create_map_xattr(&create_attr);
+ map->fd = bpf_map_create(def->type, map_name,
+ def->key_size, def->value_size,
+ max_entries, &create_attr);
}
err = map->fd < 0 ? -errno : 0;
@@ -4940,7 +4969,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
return err;
}
-static int init_map_slots(struct bpf_object *obj, struct bpf_map *map)
+static int init_map_in_map_slots(struct bpf_object *obj, struct bpf_map *map)
{
const struct bpf_map *targ_map;
unsigned int i;
@@ -4952,18 +4981,18 @@ static int init_map_slots(struct bpf_object *obj, struct bpf_map *map)
targ_map = map->init_slots[i];
fd = bpf_map__fd(targ_map);
+
if (obj->gen_loader) {
- pr_warn("// TODO map_update_elem: idx %td key %d value==map_idx %td\n",
- map - obj->maps, i, targ_map - obj->maps);
- return -ENOTSUP;
+ bpf_gen__populate_outer_map(obj->gen_loader,
+ map - obj->maps, i,
+ targ_map - obj->maps);
} else {
err = bpf_map_update_elem(map->fd, &i, &fd, 0);
}
if (err) {
err = -errno;
pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n",
- map->name, i, targ_map->name,
- fd, err);
+ map->name, i, targ_map->name, fd, err);
return err;
}
pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n",
@@ -4976,6 +5005,59 @@ static int init_map_slots(struct bpf_object *obj, struct bpf_map *map)
return 0;
}
+static int init_prog_array_slots(struct bpf_object *obj, struct bpf_map *map)
+{
+ const struct bpf_program *targ_prog;
+ unsigned int i;
+ int fd, err;
+
+ if (obj->gen_loader)
+ return -ENOTSUP;
+
+ for (i = 0; i < map->init_slots_sz; i++) {
+ if (!map->init_slots[i])
+ continue;
+
+ targ_prog = map->init_slots[i];
+ fd = bpf_program__fd(targ_prog);
+
+ err = bpf_map_update_elem(map->fd, &i, &fd, 0);
+ if (err) {
+ err = -errno;
+ pr_warn("map '%s': failed to initialize slot [%d] to prog '%s' fd=%d: %d\n",
+ map->name, i, targ_prog->name, fd, err);
+ return err;
+ }
+ pr_debug("map '%s': slot [%d] set to prog '%s' fd=%d\n",
+ map->name, i, targ_prog->name, fd);
+ }
+
+ zfree(&map->init_slots);
+ map->init_slots_sz = 0;
+
+ return 0;
+}
+
+static int bpf_object_init_prog_arrays(struct bpf_object *obj)
+{
+ struct bpf_map *map;
+ int i, err;
+
+ for (i = 0; i < obj->nr_maps; i++) {
+ map = &obj->maps[i];
+
+ if (!map->init_slots_sz || map->def.type != BPF_MAP_TYPE_PROG_ARRAY)
+ continue;
+
+ err = init_prog_array_slots(obj, map);
+ if (err < 0) {
+ zclose(map->fd);
+ return err;
+ }
+ }
+ return 0;
+}
+
static int
bpf_object__create_maps(struct bpf_object *obj)
{
@@ -4988,6 +5070,26 @@ bpf_object__create_maps(struct bpf_object *obj)
for (i = 0; i < obj->nr_maps; i++) {
map = &obj->maps[i];
+ /* To support old kernels, we skip creating global data maps
+ * (.rodata, .data, .kconfig, etc); later on, during program
+ * loading, if we detect that at least one of the to-be-loaded
+ * programs is referencing any global data map, we'll error
+ * out with program name and relocation index logged.
+ * This approach allows to accommodate Clang emitting
+ * unnecessary .rodata.str1.1 sections for string literals,
+ * but also it allows to have CO-RE applications that use
+ * global variables in some of BPF programs, but not others.
+ * If those global variable-using programs are not loaded at
+ * runtime due to bpf_program__set_autoload(prog, false),
+ * bpf_object loading will succeed just fine even on old
+ * kernels.
+ */
+ if (bpf_map__is_internal(map) &&
+ !kernel_supports(obj, FEAT_GLOBAL_DATA)) {
+ map->skipped = true;
+ continue;
+ }
+
retried = false;
retry:
if (map->pin_path) {
@@ -5024,8 +5126,8 @@ retry:
}
}
- if (map->init_slots_sz) {
- err = init_map_slots(obj, map);
+ if (map->init_slots_sz && map->def.type != BPF_MAP_TYPE_PROG_ARRAY) {
+ err = init_map_in_map_slots(obj, map);
if (err < 0) {
zclose(map->fd);
goto err_out;
@@ -5097,15 +5199,18 @@ static int bpf_core_add_cands(struct bpf_core_cand *local_cand,
struct bpf_core_cand_list *cands)
{
struct bpf_core_cand *new_cands, *cand;
- const struct btf_type *t;
- const char *targ_name;
+ const struct btf_type *t, *local_t;
+ const char *targ_name, *local_name;
size_t targ_essent_len;
int n, i;
+ local_t = btf__type_by_id(local_cand->btf, local_cand->id);
+ local_name = btf__str_by_offset(local_cand->btf, local_t->name_off);
+
n = btf__type_cnt(targ_btf);
for (i = targ_start_id; i < n; i++) {
t = btf__type_by_id(targ_btf, i);
- if (btf_kind(t) != btf_kind(local_cand->t))
+ if (btf_kind(t) != btf_kind(local_t))
continue;
targ_name = btf__name_by_offset(targ_btf, t->name_off);
@@ -5116,12 +5221,12 @@ static int bpf_core_add_cands(struct bpf_core_cand *local_cand,
if (targ_essent_len != local_essent_len)
continue;
- if (strncmp(local_cand->name, targ_name, local_essent_len) != 0)
+ if (strncmp(local_name, targ_name, local_essent_len) != 0)
continue;
pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s in [%s]\n",
- local_cand->id, btf_kind_str(local_cand->t),
- local_cand->name, i, btf_kind_str(t), targ_name,
+ local_cand->id, btf_kind_str(local_t),
+ local_name, i, btf_kind_str(t), targ_name,
targ_btf_name);
new_cands = libbpf_reallocarray(cands->cands, cands->len + 1,
sizeof(*cands->cands));
@@ -5130,8 +5235,6 @@ static int bpf_core_add_cands(struct bpf_core_cand *local_cand,
cand = &new_cands[cands->len];
cand->btf = targ_btf;
- cand->t = t;
- cand->name = targ_name;
cand->id = i;
cands->cands = new_cands;
@@ -5238,18 +5341,21 @@ bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 l
struct bpf_core_cand local_cand = {};
struct bpf_core_cand_list *cands;
const struct btf *main_btf;
+ const struct btf_type *local_t;
+ const char *local_name;
size_t local_essent_len;
int err, i;
local_cand.btf = local_btf;
- local_cand.t = btf__type_by_id(local_btf, local_type_id);
- if (!local_cand.t)
+ local_cand.id = local_type_id;
+ local_t = btf__type_by_id(local_btf, local_type_id);
+ if (!local_t)
return ERR_PTR(-EINVAL);
- local_cand.name = btf__name_by_offset(local_btf, local_cand.t->name_off);
- if (str_is_empty(local_cand.name))
+ local_name = btf__name_by_offset(local_btf, local_t->name_off);
+ if (str_is_empty(local_name))
return ERR_PTR(-EINVAL);
- local_essent_len = bpf_core_essential_name_len(local_cand.name);
+ local_essent_len = bpf_core_essential_name_len(local_name);
cands = calloc(1, sizeof(*cands));
if (!cands)
@@ -5399,12 +5505,31 @@ static void *u32_as_hash_key(__u32 x)
return (void *)(uintptr_t)x;
}
+static int record_relo_core(struct bpf_program *prog,
+ const struct bpf_core_relo *core_relo, int insn_idx)
+{
+ struct reloc_desc *relos, *relo;
+
+ relos = libbpf_reallocarray(prog->reloc_desc,
+ prog->nr_reloc + 1, sizeof(*relos));
+ if (!relos)
+ return -ENOMEM;
+ relo = &relos[prog->nr_reloc];
+ relo->type = RELO_CORE;
+ relo->insn_idx = insn_idx;
+ relo->core_relo = core_relo;
+ prog->reloc_desc = relos;
+ prog->nr_reloc++;
+ return 0;
+}
+
static int bpf_core_apply_relo(struct bpf_program *prog,
const struct bpf_core_relo *relo,
int relo_idx,
const struct btf *local_btf,
struct hashmap *cand_cache)
{
+ struct bpf_core_spec specs_scratch[3] = {};
const void *type_key = u32_as_hash_key(relo->type_id);
struct bpf_core_cand_list *cands = NULL;
const char *prog_name = prog->name;
@@ -5435,13 +5560,15 @@ static int bpf_core_apply_relo(struct bpf_program *prog,
return -EINVAL;
if (prog->obj->gen_loader) {
- pr_warn("// TODO core_relo: prog %td insn[%d] %s kind %d\n",
+ const char *spec_str = btf__name_by_offset(local_btf, relo->access_str_off);
+
+ pr_debug("record_relo_core: prog %td insn[%d] %s %s %s final insn_idx %d\n",
prog - prog->obj->programs, relo->insn_off / 8,
- local_name, relo->kind);
- return -ENOTSUP;
+ btf_kind_str(local_type), local_name, spec_str, insn_idx);
+ return record_relo_core(prog, relo, insn_idx);
}
- if (relo->kind != BPF_TYPE_ID_LOCAL &&
+ if (relo->kind != BPF_CORE_TYPE_ID_LOCAL &&
!hashmap__find(cand_cache, type_key, (void **)&cands)) {
cands = bpf_core_find_cands(prog->obj, local_btf, local_id);
if (IS_ERR(cands)) {
@@ -5457,7 +5584,8 @@ static int bpf_core_apply_relo(struct bpf_program *prog,
}
}
- return bpf_core_apply_relo_insn(prog_name, insn, insn_idx, relo, relo_idx, local_btf, cands);
+ return bpf_core_apply_relo_insn(prog_name, insn, insn_idx, relo,
+ relo_idx, local_btf, cands, specs_scratch);
}
static int
@@ -5587,6 +5715,13 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE;
insn[0].imm = relo->map_idx;
} else {
+ const struct bpf_map *map = &obj->maps[relo->map_idx];
+
+ if (map->skipped) {
+ pr_warn("prog '%s': relo #%d: kernel doesn't support global data\n",
+ prog->name, i);
+ return -ENOTSUP;
+ }
insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
insn[0].imm = obj->maps[relo->map_idx].fd;
}
@@ -5635,6 +5770,9 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
case RELO_CALL:
/* handled already */
break;
+ case RELO_CORE:
+ /* will be handled by bpf_program_record_relos() */
+ break;
default:
pr_warn("prog '%s': relo #%d: bad relo type %d\n",
prog->name, i, relo->type);
@@ -5798,6 +5936,8 @@ static int cmp_relo_by_insn_idx(const void *key, const void *elem)
static struct reloc_desc *find_prog_insn_relo(const struct bpf_program *prog, size_t insn_idx)
{
+ if (!prog->nr_reloc)
+ return NULL;
return bsearch(&insn_idx, prog->reloc_desc, prog->nr_reloc,
sizeof(*prog->reloc_desc), cmp_relo_by_insn_idx);
}
@@ -5813,8 +5953,9 @@ static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_progra
relos = libbpf_reallocarray(main_prog->reloc_desc, new_cnt, sizeof(*relos));
if (!relos)
return -ENOMEM;
- memcpy(relos + main_prog->nr_reloc, subprog->reloc_desc,
- sizeof(*relos) * subprog->nr_reloc);
+ if (subprog->nr_reloc)
+ memcpy(relos + main_prog->nr_reloc, subprog->reloc_desc,
+ sizeof(*relos) * subprog->nr_reloc);
for (i = main_prog->nr_reloc; i < new_cnt; i++)
relos[i].insn_idx += subprog->sub_insn_off;
@@ -6072,6 +6213,35 @@ bpf_object__free_relocs(struct bpf_object *obj)
}
}
+static int cmp_relocs(const void *_a, const void *_b)
+{
+ const struct reloc_desc *a = _a;
+ const struct reloc_desc *b = _b;
+
+ if (a->insn_idx != b->insn_idx)
+ return a->insn_idx < b->insn_idx ? -1 : 1;
+
+ /* no two relocations should have the same insn_idx, but ... */
+ if (a->type != b->type)
+ return a->type < b->type ? -1 : 1;
+
+ return 0;
+}
+
+static void bpf_object__sort_relos(struct bpf_object *obj)
+{
+ int i;
+
+ for (i = 0; i < obj->nr_programs; i++) {
+ struct bpf_program *p = &obj->programs[i];
+
+ if (!p->nr_reloc)
+ continue;
+
+ qsort(p->reloc_desc, p->nr_reloc, sizeof(*p->reloc_desc), cmp_relocs);
+ }
+}
+
static int
bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
{
@@ -6086,6 +6256,8 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
err);
return err;
}
+ if (obj->gen_loader)
+ bpf_object__sort_relos(obj);
}
/* Before relocating calls pre-process relocations and mark
@@ -6121,6 +6293,8 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
*/
if (prog_is_subprog(obj, prog))
continue;
+ if (!prog->load)
+ continue;
err = bpf_object__relocate_calls(obj, prog);
if (err) {
@@ -6134,6 +6308,8 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
prog = &obj->programs[i];
if (prog_is_subprog(obj, prog))
continue;
+ if (!prog->load)
+ continue;
err = bpf_object__relocate_data(obj, prog);
if (err) {
pr_warn("prog '%s': failed to relocate data references: %d\n",
@@ -6156,9 +6332,11 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj,
int i, j, nrels, new_sz;
const struct btf_var_secinfo *vi = NULL;
const struct btf_type *sec, *var, *def;
- struct bpf_map *map = NULL, *targ_map;
+ struct bpf_map *map = NULL, *targ_map = NULL;
+ struct bpf_program *targ_prog = NULL;
+ bool is_prog_array, is_map_in_map;
const struct btf_member *member;
- const char *name, *mname;
+ const char *name, *mname, *type;
unsigned int moff;
Elf64_Sym *sym;
Elf64_Rel *rel;
@@ -6185,11 +6363,6 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj,
return -LIBBPF_ERRNO__FORMAT;
}
name = elf_sym_str(obj, sym->st_name) ?: "<?>";
- if (sym->st_shndx != obj->efile.btf_maps_shndx) {
- pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n",
- i, name);
- return -LIBBPF_ERRNO__RELOC;
- }
pr_debug(".maps relo #%d: for %zd value %zd rel->r_offset %zu name %d ('%s')\n",
i, (ssize_t)(rel->r_info >> 32), (size_t)sym->st_value,
@@ -6211,19 +6384,45 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj,
return -EINVAL;
}
- if (!bpf_map_type__is_map_in_map(map->def.type))
- return -EINVAL;
- if (map->def.type == BPF_MAP_TYPE_HASH_OF_MAPS &&
- map->def.key_size != sizeof(int)) {
- pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n",
- i, map->name, sizeof(int));
+ is_map_in_map = bpf_map_type__is_map_in_map(map->def.type);
+ is_prog_array = map->def.type == BPF_MAP_TYPE_PROG_ARRAY;
+ type = is_map_in_map ? "map" : "prog";
+ if (is_map_in_map) {
+ if (sym->st_shndx != obj->efile.btf_maps_shndx) {
+ pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n",
+ i, name);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+ if (map->def.type == BPF_MAP_TYPE_HASH_OF_MAPS &&
+ map->def.key_size != sizeof(int)) {
+ pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n",
+ i, map->name, sizeof(int));
+ return -EINVAL;
+ }
+ targ_map = bpf_object__find_map_by_name(obj, name);
+ if (!targ_map) {
+ pr_warn(".maps relo #%d: '%s' isn't a valid map reference\n",
+ i, name);
+ return -ESRCH;
+ }
+ } else if (is_prog_array) {
+ targ_prog = bpf_object__find_program_by_name(obj, name);
+ if (!targ_prog) {
+ pr_warn(".maps relo #%d: '%s' isn't a valid program reference\n",
+ i, name);
+ return -ESRCH;
+ }
+ if (targ_prog->sec_idx != sym->st_shndx ||
+ targ_prog->sec_insn_off * 8 != sym->st_value ||
+ prog_is_subprog(obj, targ_prog)) {
+ pr_warn(".maps relo #%d: '%s' isn't an entry-point program\n",
+ i, name);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+ } else {
return -EINVAL;
}
- targ_map = bpf_object__find_map_by_name(obj, name);
- if (!targ_map)
- return -ESRCH;
-
var = btf__type_by_id(obj->btf, vi->type);
def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
if (btf_vlen(def) == 0)
@@ -6254,30 +6453,15 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj,
(new_sz - map->init_slots_sz) * host_ptr_sz);
map->init_slots_sz = new_sz;
}
- map->init_slots[moff] = targ_map;
+ map->init_slots[moff] = is_map_in_map ? (void *)targ_map : (void *)targ_prog;
- pr_debug(".maps relo #%d: map '%s' slot [%d] points to map '%s'\n",
- i, map->name, moff, name);
+ pr_debug(".maps relo #%d: map '%s' slot [%d] points to %s '%s'\n",
+ i, map->name, moff, type, name);
}
return 0;
}
-static int cmp_relocs(const void *_a, const void *_b)
-{
- const struct reloc_desc *a = _a;
- const struct reloc_desc *b = _b;
-
- if (a->insn_idx != b->insn_idx)
- return a->insn_idx < b->insn_idx ? -1 : 1;
-
- /* no two relocations should have the same insn_idx, but ... */
- if (a->type != b->type)
- return a->type < b->type ? -1 : 1;
-
- return 0;
-}
-
static int bpf_object__collect_relos(struct bpf_object *obj)
{
int i, err;
@@ -6310,14 +6494,7 @@ static int bpf_object__collect_relos(struct bpf_object *obj)
return err;
}
- for (i = 0; i < obj->nr_programs; i++) {
- struct bpf_program *p = &obj->programs[i];
-
- if (!p->nr_reloc)
- continue;
-
- qsort(p->reloc_desc, p->nr_reloc, sizeof(*p->reloc_desc), cmp_relocs);
- }
+ bpf_object__sort_relos(obj);
return 0;
}
@@ -6374,16 +6551,16 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attac
/* this is called as prog->sec_def->preload_fn for libbpf-supported sec_defs */
static int libbpf_preload_prog(struct bpf_program *prog,
- struct bpf_prog_load_params *attr, long cookie)
+ struct bpf_prog_load_opts *opts, long cookie)
{
enum sec_def_flags def = cookie;
/* old kernels might not support specifying expected_attach_type */
if ((def & SEC_EXP_ATTACH_OPT) && !kernel_supports(prog->obj, FEAT_EXP_ATTACH_TYPE))
- attr->expected_attach_type = 0;
+ opts->expected_attach_type = 0;
if (def & SEC_SLEEPABLE)
- attr->prog_flags |= BPF_F_SLEEPABLE;
+ opts->prog_flags |= BPF_F_SLEEPABLE;
if ((prog->type == BPF_PROG_TYPE_TRACING ||
prog->type == BPF_PROG_TYPE_LSM ||
@@ -6402,25 +6579,28 @@ static int libbpf_preload_prog(struct bpf_program *prog,
/* but by now libbpf common logic is not utilizing
* prog->atach_btf_obj_fd/prog->attach_btf_id anymore because
- * this callback is called after attrs were populated by
- * libbpf, so this callback has to update attr explicitly here
+ * this callback is called after opts were populated by
+ * libbpf, so this callback has to update opts explicitly here
*/
- attr->attach_btf_obj_fd = btf_obj_fd;
- attr->attach_btf_id = btf_type_id;
+ opts->attach_btf_obj_fd = btf_obj_fd;
+ opts->attach_btf_id = btf_type_id;
}
return 0;
}
-static int
-load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
- char *license, __u32 kern_version, int *pfd)
+static int bpf_object_load_prog_instance(struct bpf_object *obj, struct bpf_program *prog,
+ struct bpf_insn *insns, int insns_cnt,
+ const char *license, __u32 kern_version,
+ int *prog_fd)
{
- struct bpf_prog_load_params load_attr = {};
- struct bpf_object *obj = prog->obj;
+ LIBBPF_OPTS(bpf_prog_load_opts, load_attr);
+ const char *prog_name = NULL;
char *cp, errmsg[STRERR_BUFSIZE];
size_t log_buf_size = 0;
- char *log_buf = NULL;
+ char *log_buf = NULL, *tmp;
int btf_fd, ret, err;
+ bool own_log_buf = true;
+ __u32 log_level = prog->log_level;
if (prog->type == BPF_PROG_TYPE_UNSPEC) {
/*
@@ -6435,14 +6615,9 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
if (!insns || !insns_cnt)
return -EINVAL;
- load_attr.prog_type = prog->type;
load_attr.expected_attach_type = prog->expected_attach_type;
if (kernel_supports(obj, FEAT_PROG_NAME))
- load_attr.name = prog->name;
- load_attr.insns = insns;
- load_attr.insn_cnt = insns_cnt;
- load_attr.license = license;
- load_attr.attach_btf_id = prog->attach_btf_id;
+ prog_name = prog->name;
load_attr.attach_prog_fd = prog->attach_prog_fd;
load_attr.attach_btf_obj_fd = prog->attach_btf_obj_fd;
load_attr.attach_btf_id = prog->attach_btf_id;
@@ -6460,7 +6635,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
load_attr.line_info_rec_size = prog->line_info_rec_size;
load_attr.line_info_cnt = prog->line_info_cnt;
}
- load_attr.log_level = prog->log_level;
+ load_attr.log_level = log_level;
load_attr.prog_flags = prog->prog_flags;
load_attr.fd_array = obj->fd_array;
@@ -6475,27 +6650,51 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
}
if (obj->gen_loader) {
- bpf_gen__prog_load(obj->gen_loader, &load_attr,
+ bpf_gen__prog_load(obj->gen_loader, prog->type, prog->name,
+ license, insns, insns_cnt, &load_attr,
prog - obj->programs);
- *pfd = -1;
+ *prog_fd = -1;
return 0;
}
-retry_load:
- if (log_buf_size) {
- log_buf = malloc(log_buf_size);
- if (!log_buf)
- return -ENOMEM;
- *log_buf = 0;
+retry_load:
+ /* if log_level is zero, we don't request logs initiallly even if
+ * custom log_buf is specified; if the program load fails, then we'll
+ * bump log_level to 1 and use either custom log_buf or we'll allocate
+ * our own and retry the load to get details on what failed
+ */
+ if (log_level) {
+ if (prog->log_buf) {
+ log_buf = prog->log_buf;
+ log_buf_size = prog->log_size;
+ own_log_buf = false;
+ } else if (obj->log_buf) {
+ log_buf = obj->log_buf;
+ log_buf_size = obj->log_size;
+ own_log_buf = false;
+ } else {
+ log_buf_size = max((size_t)BPF_LOG_BUF_SIZE, log_buf_size * 2);
+ tmp = realloc(log_buf, log_buf_size);
+ if (!tmp) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ log_buf = tmp;
+ log_buf[0] = '\0';
+ own_log_buf = true;
+ }
}
load_attr.log_buf = log_buf;
- load_attr.log_buf_sz = log_buf_size;
- ret = libbpf__bpf_prog_load(&load_attr);
+ load_attr.log_size = log_buf_size;
+ load_attr.log_level = log_level;
+ ret = bpf_prog_load(prog->type, prog_name, license, insns, insns_cnt, &load_attr);
if (ret >= 0) {
- if (log_buf && load_attr.log_level)
- pr_debug("verifier log:\n%s", log_buf);
+ if (log_level && own_log_buf) {
+ pr_debug("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n",
+ prog->name, log_buf);
+ }
if (obj->has_rodata && kernel_supports(obj, FEAT_PROG_BIND_MAP)) {
struct bpf_map *map;
@@ -6508,61 +6707,53 @@ retry_load:
if (bpf_prog_bind_map(ret, bpf_map__fd(map), NULL)) {
cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
- pr_warn("prog '%s': failed to bind .rodata map: %s\n",
- prog->name, cp);
+ pr_warn("prog '%s': failed to bind map '%s': %s\n",
+ prog->name, map->real_name, cp);
/* Don't fail hard if can't bind rodata. */
}
}
}
- *pfd = ret;
+ *prog_fd = ret;
ret = 0;
goto out;
}
- if (!log_buf || errno == ENOSPC) {
- log_buf_size = max((size_t)BPF_LOG_BUF_SIZE,
- log_buf_size << 1);
-
- free(log_buf);
+ if (log_level == 0) {
+ log_level = 1;
goto retry_load;
}
- ret = errno ? -errno : -LIBBPF_ERRNO__LOAD;
+ /* On ENOSPC, increase log buffer size and retry, unless custom
+ * log_buf is specified.
+ * Be careful to not overflow u32, though. Kernel's log buf size limit
+ * isn't part of UAPI so it can always be bumped to full 4GB. So don't
+ * multiply by 2 unless we are sure we'll fit within 32 bits.
+ * Currently, we'll get -EINVAL when we reach (UINT_MAX >> 2).
+ */
+ if (own_log_buf && errno == ENOSPC && log_buf_size <= UINT_MAX / 2)
+ goto retry_load;
+
+ ret = -errno;
cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
- pr_warn("load bpf program failed: %s\n", cp);
+ pr_warn("prog '%s': BPF program load failed: %s\n", prog->name, cp);
pr_perm_msg(ret);
- if (log_buf && log_buf[0] != '\0') {
- ret = -LIBBPF_ERRNO__VERIFY;
- pr_warn("-- BEGIN DUMP LOG ---\n");
- pr_warn("\n%s\n", log_buf);
- pr_warn("-- END LOG --\n");
- } else if (load_attr.insn_cnt >= BPF_MAXINSNS) {
- pr_warn("Program too large (%zu insns), at most %d insns\n",
- load_attr.insn_cnt, BPF_MAXINSNS);
- ret = -LIBBPF_ERRNO__PROG2BIG;
- } else if (load_attr.prog_type != BPF_PROG_TYPE_KPROBE) {
- /* Wrong program type? */
- int fd;
-
- load_attr.prog_type = BPF_PROG_TYPE_KPROBE;
- load_attr.expected_attach_type = 0;
- load_attr.log_buf = NULL;
- load_attr.log_buf_sz = 0;
- fd = libbpf__bpf_prog_load(&load_attr);
- if (fd >= 0) {
- close(fd);
- ret = -LIBBPF_ERRNO__PROGTYPE;
- goto out;
- }
+ if (own_log_buf && log_buf && log_buf[0] != '\0') {
+ pr_warn("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n",
+ prog->name, log_buf);
+ }
+ if (insns_cnt >= BPF_MAXINSNS) {
+ pr_warn("prog '%s': program too large (%d insns), at most %d insns\n",
+ prog->name, insns_cnt, BPF_MAXINSNS);
}
out:
- free(log_buf);
+ if (own_log_buf)
+ free(log_buf);
return ret;
}
-static int bpf_program__record_externs(struct bpf_program *prog)
+static int bpf_program_record_relos(struct bpf_program *prog)
{
struct bpf_object *obj = prog->obj;
int i;
@@ -6584,6 +6775,17 @@ static int bpf_program__record_externs(struct bpf_program *prog)
ext->is_weak, false, BTF_KIND_FUNC,
relo->insn_idx);
break;
+ case RELO_CORE: {
+ struct bpf_core_relo cr = {
+ .insn_off = relo->insn_idx * 8,
+ .type_id = relo->core_relo->type_id,
+ .access_str_off = relo->core_relo->access_str_off,
+ .kind = relo->core_relo->kind,
+ };
+
+ bpf_gen__record_relo_core(obj->gen_loader, &cr);
+ break;
+ }
default:
continue;
}
@@ -6591,11 +6793,12 @@ static int bpf_program__record_externs(struct bpf_program *prog)
return 0;
}
-int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
+static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog,
+ const char *license, __u32 kern_ver)
{
int err = 0, fd, i;
- if (prog->obj->loaded) {
+ if (obj->loaded) {
pr_warn("prog '%s': can't load after object was loaded\n", prog->name);
return libbpf_err(-EINVAL);
}
@@ -6621,10 +6824,11 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
pr_warn("prog '%s': inconsistent nr(%d) != 1\n",
prog->name, prog->instances.nr);
}
- if (prog->obj->gen_loader)
- bpf_program__record_externs(prog);
- err = load_program(prog, prog->insns, prog->insns_cnt,
- license, kern_ver, &fd);
+ if (obj->gen_loader)
+ bpf_program_record_relos(prog);
+ err = bpf_object_load_prog_instance(obj, prog,
+ prog->insns, prog->insns_cnt,
+ license, kern_ver, &fd);
if (!err)
prog->instances.fds[0] = fd;
goto out;
@@ -6652,8 +6856,9 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
continue;
}
- err = load_program(prog, result.new_insn_ptr,
- result.new_insn_cnt, license, kern_ver, &fd);
+ err = bpf_object_load_prog_instance(obj, prog,
+ result.new_insn_ptr, result.new_insn_cnt,
+ license, kern_ver, &fd);
if (err) {
pr_warn("Loading the %dth instance of program '%s' failed\n",
i, prog->name);
@@ -6670,6 +6875,11 @@ out:
return libbpf_err(err);
}
+int bpf_program__load(struct bpf_program *prog, const char *license, __u32 kern_ver)
+{
+ return bpf_object_load_prog(prog->obj, prog, license, kern_ver);
+}
+
static int
bpf_object__load_progs(struct bpf_object *obj, int log_level)
{
@@ -6693,7 +6903,7 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level)
continue;
}
prog->log_level |= log_level;
- err = bpf_program__load(prog, obj->license, obj->kern_version);
+ err = bpf_object_load_prog(obj, prog, obj->license, obj->kern_version);
if (err)
return err;
}
@@ -6744,14 +6954,16 @@ static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object
return 0;
}
-static struct bpf_object *
-__bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
- const struct bpf_object_open_opts *opts)
+static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, size_t obj_buf_sz,
+ const struct bpf_object_open_opts *opts)
{
const char *obj_name, *kconfig, *btf_tmp_path;
struct bpf_object *obj;
char tmp_name[64];
int err;
+ char *log_buf;
+ size_t log_size;
+ __u32 log_level;
if (elf_version(EV_CURRENT) == EV_NONE) {
pr_warn("failed to init libelf for %s\n",
@@ -6774,10 +6986,22 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
pr_debug("loading object '%s' from buffer\n", obj_name);
}
+ log_buf = OPTS_GET(opts, kernel_log_buf, NULL);
+ log_size = OPTS_GET(opts, kernel_log_size, 0);
+ log_level = OPTS_GET(opts, kernel_log_level, 0);
+ if (log_size > UINT_MAX)
+ return ERR_PTR(-EINVAL);
+ if (log_size && !log_buf)
+ return ERR_PTR(-EINVAL);
+
obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name);
if (IS_ERR(obj))
return obj;
+ obj->log_buf = log_buf;
+ obj->log_size = log_size;
+ obj->log_level = log_level;
+
btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL);
if (btf_tmp_path) {
if (strlen(btf_tmp_path) >= PATH_MAX) {
@@ -6831,7 +7055,7 @@ __bpf_object__open_xattr(struct bpf_object_open_attr *attr, int flags)
return NULL;
pr_debug("loading %s\n", attr->file);
- return __bpf_object__open(attr->file, NULL, 0, &opts);
+ return bpf_object_open(attr->file, NULL, 0, &opts);
}
struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr)
@@ -6857,7 +7081,7 @@ bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts)
pr_debug("loading %s\n", path);
- return libbpf_ptr(__bpf_object__open(path, NULL, 0, opts));
+ return libbpf_ptr(bpf_object_open(path, NULL, 0, opts));
}
struct bpf_object *
@@ -6867,7 +7091,7 @@ bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
if (!obj_buf || obj_buf_sz == 0)
return libbpf_err_ptr(-EINVAL);
- return libbpf_ptr(__bpf_object__open(NULL, obj_buf, obj_buf_sz, opts));
+ return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, opts));
}
struct bpf_object *
@@ -6884,7 +7108,7 @@ bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz,
if (!obj_buf || obj_buf_sz == 0)
return errno = EINVAL, NULL;
- return libbpf_ptr(__bpf_object__open(NULL, obj_buf, obj_buf_sz, &opts));
+ return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, &opts));
}
static int bpf_object_unload(struct bpf_object *obj)
@@ -6915,10 +7139,6 @@ static int bpf_object__sanitize_maps(struct bpf_object *obj)
bpf_object__for_each_map(m, obj) {
if (!bpf_map__is_internal(m))
continue;
- if (!kernel_supports(obj, FEAT_GLOBAL_DATA)) {
- pr_warn("kernel doesn't support global data\n");
- return -ENOTSUP;
- }
if (!kernel_supports(obj, FEAT_ARRAY_MMAP))
m->def.map_flags ^= BPF_F_MMAPABLE;
}
@@ -7241,14 +7461,10 @@ static int bpf_object__resolve_externs(struct bpf_object *obj,
return 0;
}
-int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
+static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const char *target_btf_path)
{
- struct bpf_object *obj;
int err, i;
- if (!attr)
- return libbpf_err(-EINVAL);
- obj = attr->obj;
if (!obj)
return libbpf_err(-EINVAL);
@@ -7258,7 +7474,7 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
}
if (obj->gen_loader)
- bpf_gen__init(obj->gen_loader, attr->log_level, obj->nr_programs, obj->nr_maps);
+ bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps);
err = bpf_object__probe_loading(obj);
err = err ? : bpf_object__load_vmlinux_btf(obj, false);
@@ -7267,8 +7483,9 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
err = err ? : bpf_object__sanitize_maps(obj);
err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
err = err ? : bpf_object__create_maps(obj);
- err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : attr->target_btf_path);
- err = err ? : bpf_object__load_progs(obj, attr->log_level);
+ err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path);
+ err = err ? : bpf_object__load_progs(obj, extra_log_level);
+ err = err ? : bpf_object_init_prog_arrays(obj);
if (obj->gen_loader) {
/* reset FDs */
@@ -7312,13 +7529,14 @@ out:
return libbpf_err(err);
}
-int bpf_object__load(struct bpf_object *obj)
+int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
{
- struct bpf_object_load_attr attr = {
- .obj = obj,
- };
+ return bpf_object_load(attr->obj, attr->log_level, attr->target_btf_path);
+}
- return bpf_object__load_xattr(&attr);
+int bpf_object__load(struct bpf_object *obj)
+{
+ return bpf_object_load(obj, 0, NULL);
}
static int make_parent_dir(const char *path)
@@ -7707,6 +7925,9 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
char *pin_path = NULL;
char buf[PATH_MAX];
+ if (map->skipped)
+ continue;
+
if (path) {
int len;
@@ -7733,7 +7954,7 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
return 0;
err_unpin_maps:
- while ((map = bpf_map__prev(map, obj))) {
+ while ((map = bpf_object__prev_map(obj, map))) {
if (!map->pin_path)
continue;
@@ -7813,7 +8034,7 @@ int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
return 0;
err_unpin_programs:
- while ((prog = bpf_program__prev(prog, obj))) {
+ while ((prog = bpf_object__prev_program(obj, prog))) {
char buf[PATH_MAX];
int len;
@@ -8154,9 +8375,11 @@ int bpf_program__set_autoload(struct bpf_program *prog, bool autoload)
return 0;
}
+static int bpf_program_nth_fd(const struct bpf_program *prog, int n);
+
int bpf_program__fd(const struct bpf_program *prog)
{
- return bpf_program__nth_fd(prog, 0);
+ return bpf_program_nth_fd(prog, 0);
}
size_t bpf_program__size(const struct bpf_program *prog)
@@ -8202,7 +8425,10 @@ int bpf_program__set_prep(struct bpf_program *prog, int nr_instances,
return 0;
}
-int bpf_program__nth_fd(const struct bpf_program *prog, int n)
+__attribute__((alias("bpf_program_nth_fd")))
+int bpf_program__nth_fd(const struct bpf_program *prog, int n);
+
+static int bpf_program_nth_fd(const struct bpf_program *prog, int n)
{
int fd;
@@ -8281,6 +8507,54 @@ void bpf_program__set_expected_attach_type(struct bpf_program *prog,
prog->expected_attach_type = type;
}
+__u32 bpf_program__flags(const struct bpf_program *prog)
+{
+ return prog->prog_flags;
+}
+
+int bpf_program__set_flags(struct bpf_program *prog, __u32 flags)
+{
+ if (prog->obj->loaded)
+ return libbpf_err(-EBUSY);
+
+ prog->prog_flags = flags;
+ return 0;
+}
+
+__u32 bpf_program__log_level(const struct bpf_program *prog)
+{
+ return prog->log_level;
+}
+
+int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_level)
+{
+ if (prog->obj->loaded)
+ return libbpf_err(-EBUSY);
+
+ prog->log_level = log_level;
+ return 0;
+}
+
+const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size)
+{
+ *log_size = prog->log_size;
+ return prog->log_buf;
+}
+
+int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size)
+{
+ if (log_size && !log_buf)
+ return -EINVAL;
+ if (prog->log_size > UINT_MAX)
+ return -EINVAL;
+ if (prog->obj->loaded)
+ return -EBUSY;
+
+ prog->log_buf = log_buf;
+ prog->log_size = log_size;
+ return 0;
+}
+
#define SEC_DEF(sec_pfx, ptype, atype, flags, ...) { \
.sec = sec_pfx, \
.prog_type = BPF_PROG_TYPE_##ptype, \
@@ -9028,7 +9302,10 @@ int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
pr_warn("error: inner_map_fd already specified\n");
return libbpf_err(-EINVAL);
}
- zfree(&map->inner_map);
+ if (map->inner_map) {
+ bpf_map__destroy(map->inner_map);
+ zfree(&map->inner_map);
+ }
map->inner_map_fd = fd;
return 0;
}
@@ -9145,21 +9422,12 @@ long libbpf_get_error(const void *ptr)
return -errno;
}
-int bpf_prog_load(const char *file, enum bpf_prog_type type,
- struct bpf_object **pobj, int *prog_fd)
-{
- struct bpf_prog_load_attr attr;
-
- memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
- attr.file = file;
- attr.prog_type = type;
- attr.expected_attach_type = 0;
-
- return bpf_prog_load_xattr(&attr, pobj, prog_fd);
-}
-
+__attribute__((alias("bpf_prog_load_xattr2")))
int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
- struct bpf_object **pobj, int *prog_fd)
+ struct bpf_object **pobj, int *prog_fd);
+
+static int bpf_prog_load_xattr2(const struct bpf_prog_load_attr *attr,
+ struct bpf_object **pobj, int *prog_fd)
{
struct bpf_object_open_attr open_attr = {};
struct bpf_program *prog, *first_prog = NULL;
@@ -9230,6 +9498,20 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
return 0;
}
+COMPAT_VERSION(bpf_prog_load_deprecated, bpf_prog_load, LIBBPF_0.0.1)
+int bpf_prog_load_deprecated(const char *file, enum bpf_prog_type type,
+ struct bpf_object **pobj, int *prog_fd)
+{
+ struct bpf_prog_load_attr attr;
+
+ memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
+ attr.file = file;
+ attr.prog_type = type;
+ attr.expected_attach_type = 0;
+
+ return bpf_prog_load_xattr2(&attr, pobj, prog_fd);
+}
+
struct bpf_link {
int (*detach)(struct bpf_link *link);
void (*dealloc)(struct bpf_link *link);
@@ -9634,7 +9916,10 @@ static int append_to_file(const char *file, const char *fmt, ...)
static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz,
const char *kfunc_name, size_t offset)
{
- snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx", getpid(), kfunc_name, offset);
+ static int index = 0;
+
+ snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx_%d", getpid(), kfunc_name, offset,
+ __sync_fetch_and_add(&index, 1));
}
static int add_kprobe_event_legacy(const char *probe_name, bool retprobe,
@@ -9735,7 +10020,7 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog,
gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name),
func_name, offset);
- legacy_probe = strdup(func_name);
+ legacy_probe = strdup(probe_name);
if (!legacy_probe)
return libbpf_err_ptr(-ENOMEM);
@@ -10394,10 +10679,10 @@ struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map)
return link;
}
-enum bpf_perf_event_ret
-bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
- void **copy_mem, size_t *copy_size,
- bpf_perf_event_print_t fn, void *private_data)
+static enum bpf_perf_event_ret
+perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
+ void **copy_mem, size_t *copy_size,
+ bpf_perf_event_print_t fn, void *private_data)
{
struct perf_event_mmap_page *header = mmap_mem;
__u64 data_head = ring_buffer_read_head(header);
@@ -10442,6 +10727,12 @@ bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
return libbpf_err(ret);
}
+__attribute__((alias("perf_event_read_simple")))
+enum bpf_perf_event_ret
+bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
+ void **copy_mem, size_t *copy_size,
+ bpf_perf_event_print_t fn, void *private_data);
+
struct perf_buffer;
struct perf_buffer_params {
@@ -10575,11 +10866,18 @@ error:
static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
struct perf_buffer_params *p);
-struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
- const struct perf_buffer_opts *opts)
+DEFAULT_VERSION(perf_buffer__new_v0_6_0, perf_buffer__new, LIBBPF_0.6.0)
+struct perf_buffer *perf_buffer__new_v0_6_0(int map_fd, size_t page_cnt,
+ perf_buffer_sample_fn sample_cb,
+ perf_buffer_lost_fn lost_cb,
+ void *ctx,
+ const struct perf_buffer_opts *opts)
{
struct perf_buffer_params p = {};
- struct perf_event_attr attr = { 0, };
+ struct perf_event_attr attr = {};
+
+ if (!OPTS_VALID(opts, perf_buffer_opts))
+ return libbpf_err_ptr(-EINVAL);
attr.config = PERF_COUNT_SW_BPF_OUTPUT;
attr.type = PERF_TYPE_SOFTWARE;
@@ -10588,29 +10886,62 @@ struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
attr.wakeup_events = 1;
p.attr = &attr;
- p.sample_cb = opts ? opts->sample_cb : NULL;
- p.lost_cb = opts ? opts->lost_cb : NULL;
- p.ctx = opts ? opts->ctx : NULL;
+ p.sample_cb = sample_cb;
+ p.lost_cb = lost_cb;
+ p.ctx = ctx;
return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
}
-struct perf_buffer *
-perf_buffer__new_raw(int map_fd, size_t page_cnt,
- const struct perf_buffer_raw_opts *opts)
+COMPAT_VERSION(perf_buffer__new_deprecated, perf_buffer__new, LIBBPF_0.0.4)
+struct perf_buffer *perf_buffer__new_deprecated(int map_fd, size_t page_cnt,
+ const struct perf_buffer_opts *opts)
+{
+ return perf_buffer__new_v0_6_0(map_fd, page_cnt,
+ opts ? opts->sample_cb : NULL,
+ opts ? opts->lost_cb : NULL,
+ opts ? opts->ctx : NULL,
+ NULL);
+}
+
+DEFAULT_VERSION(perf_buffer__new_raw_v0_6_0, perf_buffer__new_raw, LIBBPF_0.6.0)
+struct perf_buffer *perf_buffer__new_raw_v0_6_0(int map_fd, size_t page_cnt,
+ struct perf_event_attr *attr,
+ perf_buffer_event_fn event_cb, void *ctx,
+ const struct perf_buffer_raw_opts *opts)
{
struct perf_buffer_params p = {};
- p.attr = opts->attr;
- p.event_cb = opts->event_cb;
- p.ctx = opts->ctx;
- p.cpu_cnt = opts->cpu_cnt;
- p.cpus = opts->cpus;
- p.map_keys = opts->map_keys;
+ if (page_cnt == 0 || !attr)
+ return libbpf_err_ptr(-EINVAL);
+
+ if (!OPTS_VALID(opts, perf_buffer_raw_opts))
+ return libbpf_err_ptr(-EINVAL);
+
+ p.attr = attr;
+ p.event_cb = event_cb;
+ p.ctx = ctx;
+ p.cpu_cnt = OPTS_GET(opts, cpu_cnt, 0);
+ p.cpus = OPTS_GET(opts, cpus, NULL);
+ p.map_keys = OPTS_GET(opts, map_keys, NULL);
return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
}
+COMPAT_VERSION(perf_buffer__new_raw_deprecated, perf_buffer__new_raw, LIBBPF_0.0.4)
+struct perf_buffer *perf_buffer__new_raw_deprecated(int map_fd, size_t page_cnt,
+ const struct perf_buffer_raw_opts *opts)
+{
+ LIBBPF_OPTS(perf_buffer_raw_opts, inner_opts,
+ .cpu_cnt = opts->cpu_cnt,
+ .cpus = opts->cpus,
+ .map_keys = opts->map_keys,
+ );
+
+ return perf_buffer__new_raw_v0_6_0(map_fd, page_cnt, opts->attr,
+ opts->event_cb, opts->ctx, &inner_opts);
+}
+
static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
struct perf_buffer_params *p)
{
@@ -10810,10 +11141,10 @@ static int perf_buffer__process_records(struct perf_buffer *pb,
{
enum bpf_perf_event_ret ret;
- ret = bpf_perf_event_read_simple(cpu_buf->base, pb->mmap_size,
- pb->page_size, &cpu_buf->buf,
- &cpu_buf->buf_size,
- perf_buffer__process_record, cpu_buf);
+ ret = perf_event_read_simple(cpu_buf->base, pb->mmap_size,
+ pb->page_size, &cpu_buf->buf,
+ &cpu_buf->buf_size,
+ perf_buffer__process_record, cpu_buf);
if (ret != LIBBPF_PERF_EVENT_CONT)
return ret;
return 0;
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 9de0f299706b..8b9bc5e90c2b 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -24,6 +24,10 @@
extern "C" {
#endif
+LIBBPF_API __u32 libbpf_major_version(void);
+LIBBPF_API __u32 libbpf_minor_version(void);
+LIBBPF_API const char *libbpf_version_string(void);
+
enum libbpf_errno {
__LIBBPF_ERRNO__START = 4000,
@@ -104,12 +108,73 @@ struct bpf_object_open_opts {
* struct_ops, etc) will need actual kernel BTF at /sys/kernel/btf/vmlinux.
*/
const char *btf_custom_path;
+ /* Pointer to a buffer for storing kernel logs for applicable BPF
+ * commands. Valid kernel_log_size has to be specified as well and are
+ * passed-through to bpf() syscall. Keep in mind that kernel might
+ * fail operation with -ENOSPC error if provided buffer is too small
+ * to contain entire log output.
+ * See the comment below for kernel_log_level for interaction between
+ * log_buf and log_level settings.
+ *
+ * If specified, this log buffer will be passed for:
+ * - each BPF progral load (BPF_PROG_LOAD) attempt, unless overriden
+ * with bpf_program__set_log() on per-program level, to get
+ * BPF verifier log output.
+ * - during BPF object's BTF load into kernel (BPF_BTF_LOAD) to get
+ * BTF sanity checking log.
+ *
+ * Each BPF command (BPF_BTF_LOAD or BPF_PROG_LOAD) will overwrite
+ * previous contents, so if you need more fine-grained control, set
+ * per-program buffer with bpf_program__set_log_buf() to preserve each
+ * individual program's verification log. Keep using kernel_log_buf
+ * for BTF verification log, if necessary.
+ */
+ char *kernel_log_buf;
+ size_t kernel_log_size;
+ /*
+ * Log level can be set independently from log buffer. Log_level=0
+ * means that libbpf will attempt loading BTF or program without any
+ * logging requested, but will retry with either its own or custom log
+ * buffer, if provided, and log_level=1 on any error.
+ * And vice versa, setting log_level>0 will request BTF or prog
+ * loading with verbose log from the first attempt (and as such also
+ * for successfully loaded BTF or program), and the actual log buffer
+ * could be either libbpf's own auto-allocated log buffer, if
+ * kernel_log_buffer is NULL, or user-provided custom kernel_log_buf.
+ * If user didn't provide custom log buffer, libbpf will emit captured
+ * logs through its print callback.
+ */
+ __u32 kernel_log_level;
+
+ size_t :0;
};
-#define bpf_object_open_opts__last_field btf_custom_path
+#define bpf_object_open_opts__last_field kernel_log_level
LIBBPF_API struct bpf_object *bpf_object__open(const char *path);
+
+/**
+ * @brief **bpf_object__open_file()** creates a bpf_object by opening
+ * the BPF ELF object file pointed to by the passed path and loading it
+ * into memory.
+ * @param path BPF object file path
+ * @param opts options for how to load the bpf object, this parameter is
+ * optional and can be set to NULL
+ * @return pointer to the new bpf_object; or NULL is returned on error,
+ * error code is stored in errno
+ */
LIBBPF_API struct bpf_object *
bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts);
+
+/**
+ * @brief **bpf_object__open_mem()** creates a bpf_object by reading
+ * the BPF objects raw bytes from a memory buffer containing a valid
+ * BPF ELF object file.
+ * @param obj_buf pointer to the buffer containing ELF file bytes
+ * @param obj_buf_sz number of bytes in the buffer
+ * @param opts options for how to load the bpf object
+ * @return pointer to the new bpf_object; or NULL is returned on error,
+ * error code is stored in errno
+ */
LIBBPF_API struct bpf_object *
bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
const struct bpf_object_open_opts *opts);
@@ -149,6 +214,7 @@ struct bpf_object_load_attr {
/* Load/unload object into/from kernel */
LIBBPF_API int bpf_object__load(struct bpf_object *obj);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_object__load() instead")
LIBBPF_API int bpf_object__load_xattr(struct bpf_object_load_attr *attr);
LIBBPF_DEPRECATED_SINCE(0, 6, "bpf_object__unload() is deprecated, use bpf_object__close() instead")
LIBBPF_API int bpf_object__unload(struct bpf_object *obj);
@@ -161,6 +227,7 @@ struct btf;
LIBBPF_API struct btf *bpf_object__btf(const struct bpf_object *obj);
LIBBPF_API int bpf_object__btf_fd(const struct bpf_object *obj);
+LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__find_program_by_name() instead")
LIBBPF_API struct bpf_program *
bpf_object__find_program_by_title(const struct bpf_object *obj,
const char *title);
@@ -262,8 +329,8 @@ LIBBPF_API const struct bpf_insn *bpf_program__insns(const struct bpf_program *p
*/
LIBBPF_API size_t bpf_program__insn_cnt(const struct bpf_program *prog);
-LIBBPF_API int bpf_program__load(struct bpf_program *prog, char *license,
- __u32 kern_version);
+LIBBPF_DEPRECATED_SINCE(0, 6, "use bpf_object__load() instead")
+LIBBPF_API int bpf_program__load(struct bpf_program *prog, const char *license, __u32 kern_version);
LIBBPF_API int bpf_program__fd(const struct bpf_program *prog);
LIBBPF_DEPRECATED_SINCE(0, 7, "multi-instance bpf_program support is deprecated")
LIBBPF_API int bpf_program__pin_instance(struct bpf_program *prog,
@@ -273,7 +340,31 @@ LIBBPF_DEPRECATED_SINCE(0, 7, "multi-instance bpf_program support is deprecated"
LIBBPF_API int bpf_program__unpin_instance(struct bpf_program *prog,
const char *path,
int instance);
+
+/**
+ * @brief **bpf_program__pin()** pins the BPF program to a file
+ * in the BPF FS specified by a path. This increments the programs
+ * reference count, allowing it to stay loaded after the process
+ * which loaded it has exited.
+ *
+ * @param prog BPF program to pin, must already be loaded
+ * @param path file path in a BPF file system
+ * @return 0, on success; negative error code, otherwise
+ */
LIBBPF_API int bpf_program__pin(struct bpf_program *prog, const char *path);
+
+/**
+ * @brief **bpf_program__unpin()** unpins the BPF program from a file
+ * in the BPFFS specified by a path. This decrements the programs
+ * reference count.
+ *
+ * The file pinning the BPF program can also be unlinked by a different
+ * process in which case this function will return an error.
+ *
+ * @param prog BPF program to unpin
+ * @param path file path to the pin in a BPF file system
+ * @return 0, on success; negative error code, otherwise
+ */
LIBBPF_API int bpf_program__unpin(struct bpf_program *prog, const char *path);
LIBBPF_API void bpf_program__unload(struct bpf_program *prog);
@@ -344,10 +435,41 @@ struct bpf_uprobe_opts {
};
#define bpf_uprobe_opts__last_field retprobe
+/**
+ * @brief **bpf_program__attach_uprobe()** attaches a BPF program
+ * to the userspace function which is found by binary path and
+ * offset. You can optionally specify a particular proccess to attach
+ * to. You can also optionally attach the program to the function
+ * exit instead of entry.
+ *
+ * @param prog BPF program to attach
+ * @param retprobe Attach to function exit
+ * @param pid Process ID to attach the uprobe to, 0 for self (own process),
+ * -1 for all processes
+ * @param binary_path Path to binary that contains the function symbol
+ * @param func_offset Offset within the binary of the function symbol
+ * @return Reference to the newly created BPF link; or NULL is returned on error,
+ * error code is stored in errno
+ */
LIBBPF_API struct bpf_link *
bpf_program__attach_uprobe(const struct bpf_program *prog, bool retprobe,
pid_t pid, const char *binary_path,
size_t func_offset);
+
+/**
+ * @brief **bpf_program__attach_uprobe_opts()** is just like
+ * bpf_program__attach_uprobe() except with a options struct
+ * for various configurations.
+ *
+ * @param prog BPF program to attach
+ * @param pid Process ID to attach the uprobe to, 0 for self (own process),
+ * -1 for all processes
+ * @param binary_path Path to binary that contains the function symbol
+ * @param func_offset Offset within the binary of the function symbol
+ * @param opts Options for altering program attachment
+ * @return Reference to the newly created BPF link; or NULL is returned on error,
+ * error code is stored in errno
+ */
LIBBPF_API struct bpf_link *
bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
const char *binary_path, size_t func_offset,
@@ -431,7 +553,6 @@ bpf_program__attach_iter(const struct bpf_program *prog,
* one instance. In this case bpf_program__fd(prog) is equal to
* bpf_program__nth_fd(prog, 0).
*/
-LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_program__insns() for getting bpf_program instructions")
struct bpf_prog_prep_result {
/*
* If not NULL, load new instruction array.
@@ -494,6 +615,18 @@ LIBBPF_API void
bpf_program__set_expected_attach_type(struct bpf_program *prog,
enum bpf_attach_type type);
+LIBBPF_API __u32 bpf_program__flags(const struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_flags(struct bpf_program *prog, __u32 flags);
+
+/* Per-program log level and log buffer getters/setters.
+ * See bpf_object_open_opts comments regarding log_level and log_buf
+ * interactions.
+ */
+LIBBPF_API __u32 bpf_program__log_level(const struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_level);
+LIBBPF_API const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size);
+LIBBPF_API int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size);
+
LIBBPF_API int
bpf_program__set_attach_target(struct bpf_program *prog, int attach_prog_fd,
const char *attach_func_name);
@@ -544,7 +677,8 @@ bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name);
* Get bpf_map through the offset of corresponding struct bpf_map_def
* in the BPF object file.
*/
-LIBBPF_API struct bpf_map *
+LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_object__find_map_by_name() instead")
+struct bpf_map *
bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset);
LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__next_map() instead")
@@ -611,6 +745,7 @@ LIBBPF_API void *bpf_map__priv(const struct bpf_map *map);
LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map,
const void *data, size_t size);
LIBBPF_API const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_map__type() instead")
LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map);
/**
@@ -674,10 +809,12 @@ struct bpf_prog_load_attr {
int prog_flags;
};
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_object__open() and bpf_object__load() instead")
LIBBPF_API int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
struct bpf_object **pobj, int *prog_fd);
-LIBBPF_API int bpf_prog_load(const char *file, enum bpf_prog_type type,
- struct bpf_object **pobj, int *prog_fd);
+LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__open() and bpf_object__load() instead")
+LIBBPF_API int bpf_prog_load_deprecated(const char *file, enum bpf_prog_type type,
+ struct bpf_object **pobj, int *prog_fd);
/* XDP related API */
struct xdp_link_info {
@@ -775,18 +912,52 @@ typedef void (*perf_buffer_lost_fn)(void *ctx, int cpu, __u64 cnt);
/* common use perf buffer options */
struct perf_buffer_opts {
- /* if specified, sample_cb is called for each sample */
- perf_buffer_sample_fn sample_cb;
- /* if specified, lost_cb is called for each batch of lost samples */
- perf_buffer_lost_fn lost_cb;
- /* ctx is provided to sample_cb and lost_cb */
- void *ctx;
+ union {
+ size_t sz;
+ struct { /* DEPRECATED: will be removed in v1.0 */
+ /* if specified, sample_cb is called for each sample */
+ perf_buffer_sample_fn sample_cb;
+ /* if specified, lost_cb is called for each batch of lost samples */
+ perf_buffer_lost_fn lost_cb;
+ /* ctx is provided to sample_cb and lost_cb */
+ void *ctx;
+ };
+ };
};
+#define perf_buffer_opts__last_field sz
+/**
+ * @brief **perf_buffer__new()** creates BPF perfbuf manager for a specified
+ * BPF_PERF_EVENT_ARRAY map
+ * @param map_fd FD of BPF_PERF_EVENT_ARRAY BPF map that will be used by BPF
+ * code to send data over to user-space
+ * @param page_cnt number of memory pages allocated for each per-CPU buffer
+ * @param sample_cb function called on each received data record
+ * @param lost_cb function called when record loss has occurred
+ * @param ctx user-provided extra context passed into *sample_cb* and *lost_cb*
+ * @return a new instance of struct perf_buffer on success, NULL on error with
+ * *errno* containing an error code
+ */
LIBBPF_API struct perf_buffer *
perf_buffer__new(int map_fd, size_t page_cnt,
+ perf_buffer_sample_fn sample_cb, perf_buffer_lost_fn lost_cb, void *ctx,
const struct perf_buffer_opts *opts);
+LIBBPF_API struct perf_buffer *
+perf_buffer__new_v0_6_0(int map_fd, size_t page_cnt,
+ perf_buffer_sample_fn sample_cb, perf_buffer_lost_fn lost_cb, void *ctx,
+ const struct perf_buffer_opts *opts);
+
+LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use new variant of perf_buffer__new() instead")
+struct perf_buffer *perf_buffer__new_deprecated(int map_fd, size_t page_cnt,
+ const struct perf_buffer_opts *opts);
+
+#define perf_buffer__new(...) ___libbpf_overload(___perf_buffer_new, __VA_ARGS__)
+#define ___perf_buffer_new6(map_fd, page_cnt, sample_cb, lost_cb, ctx, opts) \
+ perf_buffer__new(map_fd, page_cnt, sample_cb, lost_cb, ctx, opts)
+#define ___perf_buffer_new3(map_fd, page_cnt, opts) \
+ perf_buffer__new_deprecated(map_fd, page_cnt, opts)
+
enum bpf_perf_event_ret {
LIBBPF_PERF_EVENT_DONE = 0,
LIBBPF_PERF_EVENT_ERROR = -1,
@@ -800,12 +971,21 @@ typedef enum bpf_perf_event_ret
/* raw perf buffer options, giving most power and control */
struct perf_buffer_raw_opts {
- /* perf event attrs passed directly into perf_event_open() */
- struct perf_event_attr *attr;
- /* raw event callback */
- perf_buffer_event_fn event_cb;
- /* ctx is provided to event_cb */
- void *ctx;
+ union {
+ struct {
+ size_t sz;
+ long :0;
+ long :0;
+ };
+ struct { /* DEPRECATED: will be removed in v1.0 */
+ /* perf event attrs passed directly into perf_event_open() */
+ struct perf_event_attr *attr;
+ /* raw event callback */
+ perf_buffer_event_fn event_cb;
+ /* ctx is provided to event_cb */
+ void *ctx;
+ };
+ };
/* if cpu_cnt == 0, open all on all possible CPUs (up to the number of
* max_entries of given PERF_EVENT_ARRAY map)
*/
@@ -815,11 +995,28 @@ struct perf_buffer_raw_opts {
/* if cpu_cnt > 0, map_keys specify map keys to set per-CPU FDs for */
int *map_keys;
};
+#define perf_buffer_raw_opts__last_field map_keys
LIBBPF_API struct perf_buffer *
-perf_buffer__new_raw(int map_fd, size_t page_cnt,
+perf_buffer__new_raw(int map_fd, size_t page_cnt, struct perf_event_attr *attr,
+ perf_buffer_event_fn event_cb, void *ctx,
const struct perf_buffer_raw_opts *opts);
+LIBBPF_API struct perf_buffer *
+perf_buffer__new_raw_v0_6_0(int map_fd, size_t page_cnt, struct perf_event_attr *attr,
+ perf_buffer_event_fn event_cb, void *ctx,
+ const struct perf_buffer_raw_opts *opts);
+
+LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use new variant of perf_buffer__new_raw() instead")
+struct perf_buffer *perf_buffer__new_raw_deprecated(int map_fd, size_t page_cnt,
+ const struct perf_buffer_raw_opts *opts);
+
+#define perf_buffer__new_raw(...) ___libbpf_overload(___perf_buffer_new_raw, __VA_ARGS__)
+#define ___perf_buffer_new_raw6(map_fd, page_cnt, attr, event_cb, ctx, opts) \
+ perf_buffer__new_raw(map_fd, page_cnt, attr, event_cb, ctx, opts)
+#define ___perf_buffer_new_raw3(map_fd, page_cnt, opts) \
+ perf_buffer__new_raw_deprecated(map_fd, page_cnt, opts)
+
LIBBPF_API void perf_buffer__free(struct perf_buffer *pb);
LIBBPF_API int perf_buffer__epoll_fd(const struct perf_buffer *pb);
LIBBPF_API int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms);
@@ -831,6 +1028,7 @@ LIBBPF_API int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_i
typedef enum bpf_perf_event_ret
(*bpf_perf_event_print_t)(struct perf_event_header *hdr,
void *private_data);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use perf_buffer__poll() or perf_buffer__consume() instead")
LIBBPF_API enum bpf_perf_event_ret
bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
void **copy_mem, size_t *copy_size,
@@ -857,13 +1055,57 @@ bpf_prog_linfo__lfind(const struct bpf_prog_linfo *prog_linfo,
* user, causing subsequent probes to fail. In this case, the caller may want
* to adjust that limit with setrlimit().
*/
-LIBBPF_API bool bpf_probe_prog_type(enum bpf_prog_type prog_type,
- __u32 ifindex);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use libbpf_probe_bpf_prog_type() instead")
+LIBBPF_API bool bpf_probe_prog_type(enum bpf_prog_type prog_type, __u32 ifindex);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use libbpf_probe_bpf_map_type() instead")
LIBBPF_API bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex);
-LIBBPF_API bool bpf_probe_helper(enum bpf_func_id id,
- enum bpf_prog_type prog_type, __u32 ifindex);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use libbpf_probe_bpf_helper() instead")
+LIBBPF_API bool bpf_probe_helper(enum bpf_func_id id, enum bpf_prog_type prog_type, __u32 ifindex);
+LIBBPF_DEPRECATED_SINCE(0, 8, "implement your own or use bpftool for feature detection")
LIBBPF_API bool bpf_probe_large_insn_limit(__u32 ifindex);
+/**
+ * @brief **libbpf_probe_bpf_prog_type()** detects if host kernel supports
+ * BPF programs of a given type.
+ * @param prog_type BPF program type to detect kernel support for
+ * @param opts reserved for future extensibility, should be NULL
+ * @return 1, if given program type is supported; 0, if given program type is
+ * not supported; negative error code if feature detection failed or can't be
+ * performed
+ *
+ * Make sure the process has required set of CAP_* permissions (or runs as
+ * root) when performing feature checking.
+ */
+LIBBPF_API int libbpf_probe_bpf_prog_type(enum bpf_prog_type prog_type, const void *opts);
+/**
+ * @brief **libbpf_probe_bpf_map_type()** detects if host kernel supports
+ * BPF maps of a given type.
+ * @param map_type BPF map type to detect kernel support for
+ * @param opts reserved for future extensibility, should be NULL
+ * @return 1, if given map type is supported; 0, if given map type is
+ * not supported; negative error code if feature detection failed or can't be
+ * performed
+ *
+ * Make sure the process has required set of CAP_* permissions (or runs as
+ * root) when performing feature checking.
+ */
+LIBBPF_API int libbpf_probe_bpf_map_type(enum bpf_map_type map_type, const void *opts);
+/**
+ * @brief **libbpf_probe_bpf_helper()** detects if host kernel supports the
+ * use of a given BPF helper from specified BPF program type.
+ * @param prog_type BPF program type used to check the support of BPF helper
+ * @param helper_id BPF helper ID (enum bpf_func_id) to check support for
+ * @param opts reserved for future extensibility, should be NULL
+ * @return 1, if given combination of program type and helper is supported; 0,
+ * if the combination is not supported; negative error code if feature
+ * detection for provided input arguments failed or can't be performed
+ *
+ * Make sure the process has required set of CAP_* permissions (or runs as
+ * root) when performing feature checking.
+ */
+LIBBPF_API int libbpf_probe_bpf_helper(enum bpf_prog_type prog_type,
+ enum bpf_func_id helper_id, const void *opts);
+
/*
* Get bpf_prog_info in continuous memory
*
@@ -918,12 +1160,15 @@ struct bpf_prog_info_linear {
__u8 data[];
};
+LIBBPF_DEPRECATED_SINCE(0, 6, "use a custom linear prog_info wrapper")
LIBBPF_API struct bpf_prog_info_linear *
bpf_program__get_prog_info_linear(int fd, __u64 arrays);
+LIBBPF_DEPRECATED_SINCE(0, 6, "use a custom linear prog_info wrapper")
LIBBPF_API void
bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear);
+LIBBPF_DEPRECATED_SINCE(0, 6, "use a custom linear prog_info wrapper")
LIBBPF_API void
bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear);
@@ -965,11 +1210,11 @@ struct bpf_object_skeleton {
struct bpf_object **obj;
int map_cnt;
- int map_skel_sz; /* sizeof(struct bpf_skeleton_map) */
+ int map_skel_sz; /* sizeof(struct bpf_map_skeleton) */
struct bpf_map_skeleton *maps;
int prog_cnt;
- int prog_skel_sz; /* sizeof(struct bpf_skeleton_prog) */
+ int prog_skel_sz; /* sizeof(struct bpf_prog_skeleton) */
struct bpf_prog_skeleton *progs;
};
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 43580eb47740..529783967793 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -391,14 +391,44 @@ LIBBPF_0.6.0 {
global:
bpf_map__map_extra;
bpf_map__set_map_extra;
+ bpf_map_create;
bpf_object__next_map;
bpf_object__next_program;
bpf_object__prev_map;
bpf_object__prev_program;
+ bpf_prog_load_deprecated;
+ bpf_prog_load;
+ bpf_program__flags;
bpf_program__insn_cnt;
bpf_program__insns;
+ bpf_program__set_flags;
btf__add_btf;
btf__add_decl_tag;
+ btf__add_type_tag;
+ btf__dedup;
+ btf__dedup_deprecated;
btf__raw_data;
btf__type_cnt;
+ btf_dump__new;
+ btf_dump__new_deprecated;
+ libbpf_major_version;
+ libbpf_minor_version;
+ libbpf_version_string;
+ perf_buffer__new;
+ perf_buffer__new_deprecated;
+ perf_buffer__new_raw;
+ perf_buffer__new_raw_deprecated;
} LIBBPF_0.5.0;
+
+LIBBPF_0.7.0 {
+ global:
+ bpf_btf_load;
+ bpf_program__log_buf;
+ bpf_program__log_level;
+ bpf_program__set_log_buf;
+ bpf_program__set_log_level;
+ libbpf_probe_bpf_helper;
+ libbpf_probe_bpf_map_type;
+ libbpf_probe_bpf_prog_type;
+ libbpf_set_memlock_rlim_max;
+};
diff --git a/tools/lib/bpf/libbpf_common.h b/tools/lib/bpf/libbpf_common.h
index aaa1efbf6f51..000e37798ff2 100644
--- a/tools/lib/bpf/libbpf_common.h
+++ b/tools/lib/bpf/libbpf_common.h
@@ -40,6 +40,23 @@
#else
#define __LIBBPF_MARK_DEPRECATED_0_7(X)
#endif
+#if __LIBBPF_CURRENT_VERSION_GEQ(0, 8)
+#define __LIBBPF_MARK_DEPRECATED_0_8(X) X
+#else
+#define __LIBBPF_MARK_DEPRECATED_0_8(X)
+#endif
+
+/* This set of internal macros allows to do "function overloading" based on
+ * number of arguments provided by used in backwards-compatible way during the
+ * transition to libbpf 1.0
+ * It's ugly but necessary evil that will be cleaned up when we get to 1.0.
+ * See bpf_prog_load() overload for example.
+ */
+#define ___libbpf_cat(A, B) A ## B
+#define ___libbpf_select(NAME, NUM) ___libbpf_cat(NAME, NUM)
+#define ___libbpf_nth(_1, _2, _3, _4, _5, _6, N, ...) N
+#define ___libbpf_cnt(...) ___libbpf_nth(__VA_ARGS__, 6, 5, 4, 3, 2, 1)
+#define ___libbpf_overload(NAME, ...) ___libbpf_select(NAME, ___libbpf_cnt(__VA_ARGS__))(__VA_ARGS__)
/* Helper macro to declare and initialize libbpf options struct
*
@@ -54,7 +71,7 @@
* including any extra padding, it with memset() and then assigns initial
* values provided by users in struct initializer-syntax as varargs.
*/
-#define DECLARE_LIBBPF_OPTS(TYPE, NAME, ...) \
+#define LIBBPF_OPTS(TYPE, NAME, ...) \
struct TYPE NAME = ({ \
memset(&NAME, 0, sizeof(struct TYPE)); \
(struct TYPE) { \
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index aeb79e3a8ff9..1565679eb432 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -73,6 +73,8 @@
BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FLOAT, 0, 0), sz)
#define BTF_TYPE_DECL_TAG_ENC(value, type, component_idx) \
BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 0, 0), type), (component_idx)
+#define BTF_TYPE_TYPE_TAG_ENC(value, type) \
+ BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_TYPE_TAG, 0, 0), type)
#ifndef likely
#define likely(x) __builtin_expect(!!(x), 1)
@@ -167,10 +169,31 @@ static inline void *libbpf_reallocarray(void *ptr, size_t nmemb, size_t size)
return realloc(ptr, total);
}
+/* Copy up to sz - 1 bytes from zero-terminated src string and ensure that dst
+ * is zero-terminated string no matter what (unless sz == 0, in which case
+ * it's a no-op). It's conceptually close to FreeBSD's strlcpy(), but differs
+ * in what is returned. Given this is internal helper, it's trivial to extend
+ * this, when necessary. Use this instead of strncpy inside libbpf source code.
+ */
+static inline void libbpf_strlcpy(char *dst, const char *src, size_t sz)
+{
+ size_t i;
+
+ if (sz == 0)
+ return;
+
+ sz--;
+ for (i = 0; i < sz && src[i]; i++)
+ dst[i] = src[i];
+ dst[i] = '\0';
+}
+
+__u32 get_kernel_version(void);
+
struct btf;
struct btf_type;
-struct btf_type *btf_type_by_id(struct btf *btf, __u32 type_id);
+struct btf_type *btf_type_by_id(const struct btf *btf, __u32 type_id);
const char *btf_kind_str(const struct btf_type *t);
const struct btf_type *skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id);
@@ -270,63 +293,51 @@ static inline bool libbpf_validate_opts(const char *opts,
(opts)->sz - __off); \
})
+enum kern_feature_id {
+ /* v4.14: kernel support for program & map names. */
+ FEAT_PROG_NAME,
+ /* v5.2: kernel support for global data sections. */
+ FEAT_GLOBAL_DATA,
+ /* BTF support */
+ FEAT_BTF,
+ /* BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO support */
+ FEAT_BTF_FUNC,
+ /* BTF_KIND_VAR and BTF_KIND_DATASEC support */
+ FEAT_BTF_DATASEC,
+ /* BTF_FUNC_GLOBAL is supported */
+ FEAT_BTF_GLOBAL_FUNC,
+ /* BPF_F_MMAPABLE is supported for arrays */
+ FEAT_ARRAY_MMAP,
+ /* kernel support for expected_attach_type in BPF_PROG_LOAD */
+ FEAT_EXP_ATTACH_TYPE,
+ /* bpf_probe_read_{kernel,user}[_str] helpers */
+ FEAT_PROBE_READ_KERN,
+ /* BPF_PROG_BIND_MAP is supported */
+ FEAT_PROG_BIND_MAP,
+ /* Kernel support for module BTFs */
+ FEAT_MODULE_BTF,
+ /* BTF_KIND_FLOAT support */
+ FEAT_BTF_FLOAT,
+ /* BPF perf link support */
+ FEAT_PERF_LINK,
+ /* BTF_KIND_DECL_TAG support */
+ FEAT_BTF_DECL_TAG,
+ /* BTF_KIND_TYPE_TAG support */
+ FEAT_BTF_TYPE_TAG,
+ /* memcg-based accounting for BPF maps and progs */
+ FEAT_MEMCG_ACCOUNT,
+ __FEAT_CNT,
+};
+
+int probe_memcg_account(void);
+bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id);
+int bump_rlimit_memlock(void);
int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz);
int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz);
int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
const char *str_sec, size_t str_len);
-
-struct bpf_prog_load_params {
- enum bpf_prog_type prog_type;
- enum bpf_attach_type expected_attach_type;
- const char *name;
- const struct bpf_insn *insns;
- size_t insn_cnt;
- const char *license;
- __u32 kern_version;
- __u32 attach_prog_fd;
- __u32 attach_btf_obj_fd;
- __u32 attach_btf_id;
- __u32 prog_ifindex;
- __u32 prog_btf_fd;
- __u32 prog_flags;
-
- __u32 func_info_rec_size;
- const void *func_info;
- __u32 func_info_cnt;
-
- __u32 line_info_rec_size;
- const void *line_info;
- __u32 line_info_cnt;
-
- __u32 log_level;
- char *log_buf;
- size_t log_buf_sz;
- int *fd_array;
-};
-
-int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr);
-
-struct bpf_create_map_params {
- const char *name;
- enum bpf_map_type map_type;
- __u32 map_flags;
- __u32 key_size;
- __u32 value_size;
- __u32 max_entries;
- __u32 numa_node;
- __u32 btf_fd;
- __u32 btf_key_type_id;
- __u32 btf_value_type_id;
- __u32 map_ifindex;
- union {
- __u32 inner_map_fd;
- __u32 btf_vmlinux_value_type_id;
- };
- __u64 map_extra;
-};
-
-int libbpf__bpf_create_map_xattr(const struct bpf_create_map_params *create_attr);
+int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level);
struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf);
void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type,
diff --git a/tools/lib/bpf/libbpf_legacy.h b/tools/lib/bpf/libbpf_legacy.h
index 5ba5c9beccfa..79131f761a27 100644
--- a/tools/lib/bpf/libbpf_legacy.h
+++ b/tools/lib/bpf/libbpf_legacy.h
@@ -45,7 +45,6 @@ enum libbpf_strict_mode {
* (positive) error code.
*/
LIBBPF_STRICT_DIRECT_ERRS = 0x02,
-
/*
* Enforce strict BPF program section (SEC()) names.
* E.g., while prefiously SEC("xdp_whatever") or SEC("perf_event_blah") were
@@ -63,12 +62,24 @@ enum libbpf_strict_mode {
* Clients can maintain it on their own if it is valuable for them.
*/
LIBBPF_STRICT_NO_OBJECT_LIST = 0x08,
+ /*
+ * Automatically bump RLIMIT_MEMLOCK using setrlimit() before the
+ * first BPF program or map creation operation. This is done only if
+ * kernel is too old to support memcg-based memory accounting for BPF
+ * subsystem. By default, RLIMIT_MEMLOCK limit is set to RLIM_INFINITY,
+ * but it can be overriden with libbpf_set_memlock_rlim_max() API.
+ * Note that libbpf_set_memlock_rlim_max() needs to be called before
+ * the very first bpf_prog_load(), bpf_map_create() or bpf_object__load()
+ * operation.
+ */
+ LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK = 0x10,
__LIBBPF_STRICT_LAST,
};
LIBBPF_API int libbpf_set_strict_mode(enum libbpf_strict_mode mode);
+#define DECLARE_LIBBPF_OPTS LIBBPF_OPTS
#ifdef __cplusplus
} /* extern "C" */
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index 68f2dbf364aa..97b06cede56f 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -48,41 +48,65 @@ static int get_vendor_id(int ifindex)
return strtol(buf, NULL, 0);
}
-static int get_kernel_version(void)
+static int probe_prog_load(enum bpf_prog_type prog_type,
+ const struct bpf_insn *insns, size_t insns_cnt,
+ char *log_buf, size_t log_buf_sz,
+ __u32 ifindex)
{
- int version, subversion, patchlevel;
- struct utsname utsn;
-
- /* Return 0 on failure, and attempt to probe with empty kversion */
- if (uname(&utsn))
- return 0;
-
- if (sscanf(utsn.release, "%d.%d.%d",
- &version, &subversion, &patchlevel) != 3)
- return 0;
-
- return (version << 16) + (subversion << 8) + patchlevel;
-}
-
-static void
-probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns,
- size_t insns_cnt, char *buf, size_t buf_len, __u32 ifindex)
-{
- struct bpf_load_program_attr xattr = {};
- int fd;
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
+ .log_buf = log_buf,
+ .log_size = log_buf_sz,
+ .log_level = log_buf ? 1 : 0,
+ .prog_ifindex = ifindex,
+ );
+ int fd, err, exp_err = 0;
+ const char *exp_msg = NULL;
+ char buf[4096];
switch (prog_type) {
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
- xattr.expected_attach_type = BPF_CGROUP_INET4_CONNECT;
+ opts.expected_attach_type = BPF_CGROUP_INET4_CONNECT;
break;
case BPF_PROG_TYPE_CGROUP_SOCKOPT:
- xattr.expected_attach_type = BPF_CGROUP_GETSOCKOPT;
+ opts.expected_attach_type = BPF_CGROUP_GETSOCKOPT;
break;
case BPF_PROG_TYPE_SK_LOOKUP:
- xattr.expected_attach_type = BPF_SK_LOOKUP;
+ opts.expected_attach_type = BPF_SK_LOOKUP;
break;
case BPF_PROG_TYPE_KPROBE:
- xattr.kern_version = get_kernel_version();
+ opts.kern_version = get_kernel_version();
+ break;
+ case BPF_PROG_TYPE_LIRC_MODE2:
+ opts.expected_attach_type = BPF_LIRC_MODE2;
+ break;
+ case BPF_PROG_TYPE_TRACING:
+ case BPF_PROG_TYPE_LSM:
+ opts.log_buf = buf;
+ opts.log_size = sizeof(buf);
+ opts.log_level = 1;
+ if (prog_type == BPF_PROG_TYPE_TRACING)
+ opts.expected_attach_type = BPF_TRACE_FENTRY;
+ else
+ opts.expected_attach_type = BPF_MODIFY_RETURN;
+ opts.attach_btf_id = 1;
+
+ exp_err = -EINVAL;
+ exp_msg = "attach_btf_id 1 is not a function";
+ break;
+ case BPF_PROG_TYPE_EXT:
+ opts.log_buf = buf;
+ opts.log_size = sizeof(buf);
+ opts.log_level = 1;
+ opts.attach_btf_id = 1;
+
+ exp_err = -EINVAL;
+ exp_msg = "Cannot replace kernel functions";
+ break;
+ case BPF_PROG_TYPE_SYSCALL:
+ opts.prog_flags = BPF_F_SLEEPABLE;
+ break;
+ case BPF_PROG_TYPE_STRUCT_OPS:
+ exp_err = -524; /* -ENOTSUPP */
break;
case BPF_PROG_TYPE_UNSPEC:
case BPF_PROG_TYPE_SOCKET_FILTER:
@@ -103,27 +127,42 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns,
case BPF_PROG_TYPE_RAW_TRACEPOINT:
case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
case BPF_PROG_TYPE_LWT_SEG6LOCAL:
- case BPF_PROG_TYPE_LIRC_MODE2:
case BPF_PROG_TYPE_SK_REUSEPORT:
case BPF_PROG_TYPE_FLOW_DISSECTOR:
case BPF_PROG_TYPE_CGROUP_SYSCTL:
- case BPF_PROG_TYPE_TRACING:
- case BPF_PROG_TYPE_STRUCT_OPS:
- case BPF_PROG_TYPE_EXT:
- case BPF_PROG_TYPE_LSM:
- default:
break;
+ default:
+ return -EOPNOTSUPP;
}
- xattr.prog_type = prog_type;
- xattr.insns = insns;
- xattr.insns_cnt = insns_cnt;
- xattr.license = "GPL";
- xattr.prog_ifindex = ifindex;
-
- fd = bpf_load_program_xattr(&xattr, buf, buf_len);
+ fd = bpf_prog_load(prog_type, NULL, "GPL", insns, insns_cnt, &opts);
+ err = -errno;
if (fd >= 0)
close(fd);
+ if (exp_err) {
+ if (fd >= 0 || err != exp_err)
+ return 0;
+ if (exp_msg && !strstr(buf, exp_msg))
+ return 0;
+ return 1;
+ }
+ return fd >= 0 ? 1 : 0;
+}
+
+int libbpf_probe_bpf_prog_type(enum bpf_prog_type prog_type, const void *opts)
+{
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN()
+ };
+ const size_t insn_cnt = ARRAY_SIZE(insns);
+ int ret;
+
+ if (opts)
+ return libbpf_err(-EINVAL);
+
+ ret = probe_prog_load(prog_type, insns, insn_cnt, NULL, 0, 0);
+ return libbpf_err(ret);
}
bool bpf_probe_prog_type(enum bpf_prog_type prog_type, __u32 ifindex)
@@ -133,12 +172,16 @@ bool bpf_probe_prog_type(enum bpf_prog_type prog_type, __u32 ifindex)
BPF_EXIT_INSN()
};
+ /* prefer libbpf_probe_bpf_prog_type() unless offload is requested */
+ if (ifindex == 0)
+ return libbpf_probe_bpf_prog_type(prog_type, NULL) == 1;
+
if (ifindex && prog_type == BPF_PROG_TYPE_SCHED_CLS)
/* nfp returns -EINVAL on exit(0) with TC offload */
insns[0].imm = 2;
errno = 0;
- probe_load(prog_type, insns, ARRAY_SIZE(insns), NULL, 0, ifindex);
+ probe_prog_load(prog_type, insns, ARRAY_SIZE(insns), NULL, 0, ifindex);
return errno != EINVAL && errno != EOPNOTSUPP;
}
@@ -166,7 +209,7 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
memcpy(raw_btf + hdr.hdr_len, raw_types, hdr.type_len);
memcpy(raw_btf + hdr.hdr_len + hdr.type_len, str_sec, hdr.str_len);
- btf_fd = bpf_load_btf(raw_btf, btf_len, NULL, 0, false);
+ btf_fd = bpf_btf_load(raw_btf, btf_len, NULL);
free(raw_btf);
return btf_fd;
@@ -199,17 +242,18 @@ static int load_local_storage_btf(void)
strs, sizeof(strs));
}
-bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)
+static int probe_map_create(enum bpf_map_type map_type, __u32 ifindex)
{
- int key_size, value_size, max_entries, map_flags;
+ LIBBPF_OPTS(bpf_map_create_opts, opts);
+ int key_size, value_size, max_entries;
__u32 btf_key_type_id = 0, btf_value_type_id = 0;
- struct bpf_create_map_attr attr = {};
- int fd = -1, btf_fd = -1, fd_inner;
+ int fd = -1, btf_fd = -1, fd_inner = -1, exp_err = 0, err;
+
+ opts.map_ifindex = ifindex;
key_size = sizeof(__u32);
value_size = sizeof(__u32);
max_entries = 1;
- map_flags = 0;
switch (map_type) {
case BPF_MAP_TYPE_STACK_TRACE:
@@ -218,7 +262,7 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)
case BPF_MAP_TYPE_LPM_TRIE:
key_size = sizeof(__u64);
value_size = sizeof(__u64);
- map_flags = BPF_F_NO_PREALLOC;
+ opts.map_flags = BPF_F_NO_PREALLOC;
break;
case BPF_MAP_TYPE_CGROUP_STORAGE:
case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
@@ -237,17 +281,25 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)
btf_value_type_id = 3;
value_size = 8;
max_entries = 0;
- map_flags = BPF_F_NO_PREALLOC;
+ opts.map_flags = BPF_F_NO_PREALLOC;
btf_fd = load_local_storage_btf();
if (btf_fd < 0)
- return false;
+ return btf_fd;
break;
case BPF_MAP_TYPE_RINGBUF:
key_size = 0;
value_size = 0;
max_entries = 4096;
break;
- case BPF_MAP_TYPE_UNSPEC:
+ case BPF_MAP_TYPE_STRUCT_OPS:
+ /* we'll get -ENOTSUPP for invalid BTF type ID for struct_ops */
+ opts.btf_vmlinux_value_type_id = 1;
+ exp_err = -524; /* -ENOTSUPP */
+ break;
+ case BPF_MAP_TYPE_BLOOM_FILTER:
+ key_size = 0;
+ max_entries = 1;
+ break;
case BPF_MAP_TYPE_HASH:
case BPF_MAP_TYPE_ARRAY:
case BPF_MAP_TYPE_PROG_ARRAY:
@@ -266,9 +318,10 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)
case BPF_MAP_TYPE_XSKMAP:
case BPF_MAP_TYPE_SOCKHASH:
case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
- case BPF_MAP_TYPE_STRUCT_OPS:
- default:
break;
+ case BPF_MAP_TYPE_UNSPEC:
+ default:
+ return -EOPNOTSUPP;
}
if (map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
@@ -277,37 +330,102 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)
* map-in-map for offload
*/
if (ifindex)
- return false;
+ goto cleanup;
- fd_inner = bpf_create_map(BPF_MAP_TYPE_HASH,
- sizeof(__u32), sizeof(__u32), 1, 0);
+ fd_inner = bpf_map_create(BPF_MAP_TYPE_HASH, NULL,
+ sizeof(__u32), sizeof(__u32), 1, NULL);
if (fd_inner < 0)
- return false;
- fd = bpf_create_map_in_map(map_type, NULL, sizeof(__u32),
- fd_inner, 1, 0);
- close(fd_inner);
- } else {
- /* Note: No other restriction on map type probes for offload */
- attr.map_type = map_type;
- attr.key_size = key_size;
- attr.value_size = value_size;
- attr.max_entries = max_entries;
- attr.map_flags = map_flags;
- attr.map_ifindex = ifindex;
- if (btf_fd >= 0) {
- attr.btf_fd = btf_fd;
- attr.btf_key_type_id = btf_key_type_id;
- attr.btf_value_type_id = btf_value_type_id;
- }
+ goto cleanup;
+
+ opts.inner_map_fd = fd_inner;
+ }
- fd = bpf_create_map_xattr(&attr);
+ if (btf_fd >= 0) {
+ opts.btf_fd = btf_fd;
+ opts.btf_key_type_id = btf_key_type_id;
+ opts.btf_value_type_id = btf_value_type_id;
}
+
+ fd = bpf_map_create(map_type, NULL, key_size, value_size, max_entries, &opts);
+ err = -errno;
+
+cleanup:
if (fd >= 0)
close(fd);
+ if (fd_inner >= 0)
+ close(fd_inner);
if (btf_fd >= 0)
close(btf_fd);
- return fd >= 0;
+ if (exp_err)
+ return fd < 0 && err == exp_err ? 1 : 0;
+ else
+ return fd >= 0 ? 1 : 0;
+}
+
+int libbpf_probe_bpf_map_type(enum bpf_map_type map_type, const void *opts)
+{
+ int ret;
+
+ if (opts)
+ return libbpf_err(-EINVAL);
+
+ ret = probe_map_create(map_type, 0);
+ return libbpf_err(ret);
+}
+
+bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)
+{
+ return probe_map_create(map_type, ifindex) == 1;
+}
+
+int libbpf_probe_bpf_helper(enum bpf_prog_type prog_type, enum bpf_func_id helper_id,
+ const void *opts)
+{
+ struct bpf_insn insns[] = {
+ BPF_EMIT_CALL((__u32)helper_id),
+ BPF_EXIT_INSN(),
+ };
+ const size_t insn_cnt = ARRAY_SIZE(insns);
+ char buf[4096];
+ int ret;
+
+ if (opts)
+ return libbpf_err(-EINVAL);
+
+ /* we can't successfully load all prog types to check for BPF helper
+ * support, so bail out with -EOPNOTSUPP error
+ */
+ switch (prog_type) {
+ case BPF_PROG_TYPE_TRACING:
+ case BPF_PROG_TYPE_EXT:
+ case BPF_PROG_TYPE_LSM:
+ case BPF_PROG_TYPE_STRUCT_OPS:
+ return -EOPNOTSUPP;
+ default:
+ break;
+ }
+
+ buf[0] = '\0';
+ ret = probe_prog_load(prog_type, insns, insn_cnt, buf, sizeof(buf), 0);
+ if (ret < 0)
+ return libbpf_err(ret);
+
+ /* If BPF verifier doesn't recognize BPF helper ID (enum bpf_func_id)
+ * at all, it will emit something like "invalid func unknown#181".
+ * If BPF verifier recognizes BPF helper but it's not supported for
+ * given BPF program type, it will emit "unknown func bpf_sys_bpf#166".
+ * In both cases, provided combination of BPF program type and BPF
+ * helper is not supported by the kernel.
+ * In all other cases, probe_prog_load() above will either succeed (e.g.,
+ * because BPF helper happens to accept no input arguments or it
+ * accepts one input argument and initial PTR_TO_CTX is fine for
+ * that), or we'll get some more specific BPF verifier error about
+ * some unsatisfied conditions.
+ */
+ if (ret == 0 && (strstr(buf, "invalid func ") || strstr(buf, "unknown func ")))
+ return 0;
+ return 1; /* assume supported */
}
bool bpf_probe_helper(enum bpf_func_id id, enum bpf_prog_type prog_type,
@@ -320,8 +438,7 @@ bool bpf_probe_helper(enum bpf_func_id id, enum bpf_prog_type prog_type,
char buf[4096] = {};
bool res;
- probe_load(prog_type, insns, ARRAY_SIZE(insns), buf, sizeof(buf),
- ifindex);
+ probe_prog_load(prog_type, insns, ARRAY_SIZE(insns), buf, sizeof(buf), ifindex);
res = !grep(buf, "invalid func ") && !grep(buf, "unknown func ");
if (ifindex) {
@@ -353,8 +470,8 @@ bool bpf_probe_large_insn_limit(__u32 ifindex)
insns[BPF_MAXINSNS] = BPF_EXIT_INSN();
errno = 0;
- probe_load(BPF_PROG_TYPE_SCHED_CLS, insns, ARRAY_SIZE(insns), NULL, 0,
- ifindex);
+ probe_prog_load(BPF_PROG_TYPE_SCHED_CLS, insns, ARRAY_SIZE(insns), NULL, 0,
+ ifindex);
return errno != E2BIG && errno != EINVAL;
}
diff --git a/tools/lib/bpf/libbpf_version.h b/tools/lib/bpf/libbpf_version.h
index dd56d76f291c..0fefefc3500b 100644
--- a/tools/lib/bpf/libbpf_version.h
+++ b/tools/lib/bpf/libbpf_version.h
@@ -4,6 +4,6 @@
#define __LIBBPF_VERSION_H
#define LIBBPF_MAJOR_VERSION 0
-#define LIBBPF_MINOR_VERSION 6
+#define LIBBPF_MINOR_VERSION 7
#endif /* __LIBBPF_VERSION_H */
diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c
index f677dccdeae4..9aa016fb55aa 100644
--- a/tools/lib/bpf/linker.c
+++ b/tools/lib/bpf/linker.c
@@ -210,6 +210,7 @@ void bpf_linker__free(struct bpf_linker *linker)
}
free(linker->secs);
+ free(linker->glob_syms);
free(linker);
}
@@ -1999,7 +2000,7 @@ add_sym:
static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *obj)
{
struct src_sec *src_symtab = &obj->secs[obj->symtab_sec_idx];
- struct dst_sec *dst_symtab = &linker->secs[linker->symtab_sec_idx];
+ struct dst_sec *dst_symtab;
int i, err;
for (i = 1; i < obj->sec_cnt; i++) {
@@ -2032,6 +2033,9 @@ static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *ob
return -1;
}
+ /* add_dst_sec() above could have invalidated linker->secs */
+ dst_symtab = &linker->secs[linker->symtab_sec_idx];
+
/* shdr->sh_link points to SYMTAB */
dst_sec->shdr->sh_link = linker->symtab_sec_idx;
@@ -2650,6 +2654,7 @@ static int emit_elf_data_sec(struct bpf_linker *linker, const char *sec_name,
static int finalize_btf(struct bpf_linker *linker)
{
+ LIBBPF_OPTS(btf_dedup_opts, opts);
struct btf *btf = linker->btf;
const void *raw_data;
int i, j, id, err;
@@ -2686,7 +2691,8 @@ static int finalize_btf(struct bpf_linker *linker)
return err;
}
- err = btf__dedup(linker->btf, linker->btf_ext, NULL);
+ opts.btf_ext = linker->btf_ext;
+ err = btf__dedup(linker->btf, &opts);
if (err) {
pr_warn("BTF dedup failed: %d\n", err);
return err;
diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c
index b5b8956a1be8..910865e29edc 100644
--- a/tools/lib/bpf/relo_core.c
+++ b/tools/lib/bpf/relo_core.c
@@ -1,6 +1,60 @@
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
/* Copyright (c) 2019 Facebook */
+#ifdef __KERNEL__
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <linux/string.h>
+#include <linux/bpf_verifier.h>
+#include "relo_core.h"
+
+static const char *btf_kind_str(const struct btf_type *t)
+{
+ return btf_type_str(t);
+}
+
+static bool is_ldimm64_insn(struct bpf_insn *insn)
+{
+ return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
+}
+
+static const struct btf_type *
+skip_mods_and_typedefs(const struct btf *btf, u32 id, u32 *res_id)
+{
+ return btf_type_skip_modifiers(btf, id, res_id);
+}
+
+static const char *btf__name_by_offset(const struct btf *btf, u32 offset)
+{
+ return btf_name_by_offset(btf, offset);
+}
+
+static s64 btf__resolve_size(const struct btf *btf, u32 type_id)
+{
+ const struct btf_type *t;
+ int size;
+
+ t = btf_type_by_id(btf, type_id);
+ t = btf_resolve_size(btf, t, &size);
+ if (IS_ERR(t))
+ return PTR_ERR(t);
+ return size;
+}
+
+enum libbpf_print_level {
+ LIBBPF_WARN,
+ LIBBPF_INFO,
+ LIBBPF_DEBUG,
+};
+
+#undef pr_warn
+#undef pr_info
+#undef pr_debug
+#define pr_warn(fmt, log, ...) bpf_log((void *)log, fmt, "", ##__VA_ARGS__)
+#define pr_info(fmt, log, ...) bpf_log((void *)log, fmt, "", ##__VA_ARGS__)
+#define pr_debug(fmt, log, ...) bpf_log((void *)log, fmt, "", ##__VA_ARGS__)
+#define libbpf_print(level, fmt, ...) bpf_log((void *)prog_name, fmt, ##__VA_ARGS__)
+#else
#include <stdio.h>
#include <string.h>
#include <errno.h>
@@ -12,33 +66,7 @@
#include "btf.h"
#include "str_error.h"
#include "libbpf_internal.h"
-
-#define BPF_CORE_SPEC_MAX_LEN 64
-
-/* represents BPF CO-RE field or array element accessor */
-struct bpf_core_accessor {
- __u32 type_id; /* struct/union type or array element type */
- __u32 idx; /* field index or array index */
- const char *name; /* field name or NULL for array accessor */
-};
-
-struct bpf_core_spec {
- const struct btf *btf;
- /* high-level spec: named fields and array indices only */
- struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN];
- /* original unresolved (no skip_mods_or_typedefs) root type ID */
- __u32 root_type_id;
- /* CO-RE relocation kind */
- enum bpf_core_relo_kind relo_kind;
- /* high-level spec length */
- int len;
- /* raw, low-level spec: 1-to-1 with accessor spec string */
- int raw_spec[BPF_CORE_SPEC_MAX_LEN];
- /* raw spec length */
- int raw_len;
- /* field bit offset represented by spec */
- __u32 bit_offset;
-};
+#endif
static bool is_flex_arr(const struct btf *btf,
const struct bpf_core_accessor *acc,
@@ -51,25 +79,25 @@ static bool is_flex_arr(const struct btf *btf,
return false;
/* has to be the last member of enclosing struct */
- t = btf__type_by_id(btf, acc->type_id);
+ t = btf_type_by_id(btf, acc->type_id);
return acc->idx == btf_vlen(t) - 1;
}
static const char *core_relo_kind_str(enum bpf_core_relo_kind kind)
{
switch (kind) {
- case BPF_FIELD_BYTE_OFFSET: return "byte_off";
- case BPF_FIELD_BYTE_SIZE: return "byte_sz";
- case BPF_FIELD_EXISTS: return "field_exists";
- case BPF_FIELD_SIGNED: return "signed";
- case BPF_FIELD_LSHIFT_U64: return "lshift_u64";
- case BPF_FIELD_RSHIFT_U64: return "rshift_u64";
- case BPF_TYPE_ID_LOCAL: return "local_type_id";
- case BPF_TYPE_ID_TARGET: return "target_type_id";
- case BPF_TYPE_EXISTS: return "type_exists";
- case BPF_TYPE_SIZE: return "type_size";
- case BPF_ENUMVAL_EXISTS: return "enumval_exists";
- case BPF_ENUMVAL_VALUE: return "enumval_value";
+ case BPF_CORE_FIELD_BYTE_OFFSET: return "byte_off";
+ case BPF_CORE_FIELD_BYTE_SIZE: return "byte_sz";
+ case BPF_CORE_FIELD_EXISTS: return "field_exists";
+ case BPF_CORE_FIELD_SIGNED: return "signed";
+ case BPF_CORE_FIELD_LSHIFT_U64: return "lshift_u64";
+ case BPF_CORE_FIELD_RSHIFT_U64: return "rshift_u64";
+ case BPF_CORE_TYPE_ID_LOCAL: return "local_type_id";
+ case BPF_CORE_TYPE_ID_TARGET: return "target_type_id";
+ case BPF_CORE_TYPE_EXISTS: return "type_exists";
+ case BPF_CORE_TYPE_SIZE: return "type_size";
+ case BPF_CORE_ENUMVAL_EXISTS: return "enumval_exists";
+ case BPF_CORE_ENUMVAL_VALUE: return "enumval_value";
default: return "unknown";
}
}
@@ -77,12 +105,12 @@ static const char *core_relo_kind_str(enum bpf_core_relo_kind kind)
static bool core_relo_is_field_based(enum bpf_core_relo_kind kind)
{
switch (kind) {
- case BPF_FIELD_BYTE_OFFSET:
- case BPF_FIELD_BYTE_SIZE:
- case BPF_FIELD_EXISTS:
- case BPF_FIELD_SIGNED:
- case BPF_FIELD_LSHIFT_U64:
- case BPF_FIELD_RSHIFT_U64:
+ case BPF_CORE_FIELD_BYTE_OFFSET:
+ case BPF_CORE_FIELD_BYTE_SIZE:
+ case BPF_CORE_FIELD_EXISTS:
+ case BPF_CORE_FIELD_SIGNED:
+ case BPF_CORE_FIELD_LSHIFT_U64:
+ case BPF_CORE_FIELD_RSHIFT_U64:
return true;
default:
return false;
@@ -92,10 +120,10 @@ static bool core_relo_is_field_based(enum bpf_core_relo_kind kind)
static bool core_relo_is_type_based(enum bpf_core_relo_kind kind)
{
switch (kind) {
- case BPF_TYPE_ID_LOCAL:
- case BPF_TYPE_ID_TARGET:
- case BPF_TYPE_EXISTS:
- case BPF_TYPE_SIZE:
+ case BPF_CORE_TYPE_ID_LOCAL:
+ case BPF_CORE_TYPE_ID_TARGET:
+ case BPF_CORE_TYPE_EXISTS:
+ case BPF_CORE_TYPE_SIZE:
return true;
default:
return false;
@@ -105,8 +133,8 @@ static bool core_relo_is_type_based(enum bpf_core_relo_kind kind)
static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind)
{
switch (kind) {
- case BPF_ENUMVAL_EXISTS:
- case BPF_ENUMVAL_VALUE:
+ case BPF_CORE_ENUMVAL_EXISTS:
+ case BPF_CORE_ENUMVAL_VALUE:
return true;
default:
return false;
@@ -150,7 +178,7 @@ static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind)
* Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access
* string to specify enumerator's value index that need to be relocated.
*/
-static int bpf_core_parse_spec(const struct btf *btf,
+static int bpf_core_parse_spec(const char *prog_name, const struct btf *btf,
__u32 type_id,
const char *spec_str,
enum bpf_core_relo_kind relo_kind,
@@ -272,8 +300,8 @@ static int bpf_core_parse_spec(const struct btf *btf,
return sz;
spec->bit_offset += access_idx * sz * 8;
} else {
- pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n",
- type_id, spec_str, i, id, btf_kind_str(t));
+ pr_warn("prog '%s': relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n",
+ prog_name, type_id, spec_str, i, id, btf_kind_str(t));
return -EINVAL;
}
}
@@ -346,8 +374,6 @@ recur:
targ_id = btf_array(targ_type)->type;
goto recur;
default:
- pr_warn("unexpected kind %d relocated, local [%d], target [%d]\n",
- btf_kind(local_type), local_id, targ_id);
return 0;
}
}
@@ -388,7 +414,7 @@ static int bpf_core_match_member(const struct btf *local_btf,
return 0;
local_id = local_acc->type_id;
- local_type = btf__type_by_id(local_btf, local_id);
+ local_type = btf_type_by_id(local_btf, local_id);
local_member = btf_members(local_type) + local_acc->idx;
local_name = btf__name_by_offset(local_btf, local_member->name_off);
@@ -571,7 +597,7 @@ static int bpf_core_calc_field_relo(const char *prog_name,
*field_sz = 0;
- if (relo->kind == BPF_FIELD_EXISTS) {
+ if (relo->kind == BPF_CORE_FIELD_EXISTS) {
*val = spec ? 1 : 0;
return 0;
}
@@ -580,11 +606,11 @@ static int bpf_core_calc_field_relo(const char *prog_name,
return -EUCLEAN; /* request instruction poisoning */
acc = &spec->spec[spec->len - 1];
- t = btf__type_by_id(spec->btf, acc->type_id);
+ t = btf_type_by_id(spec->btf, acc->type_id);
/* a[n] accessor needs special handling */
if (!acc->name) {
- if (relo->kind == BPF_FIELD_BYTE_OFFSET) {
+ if (relo->kind == BPF_CORE_FIELD_BYTE_OFFSET) {
*val = spec->bit_offset / 8;
/* remember field size for load/store mem size */
sz = btf__resolve_size(spec->btf, acc->type_id);
@@ -592,7 +618,7 @@ static int bpf_core_calc_field_relo(const char *prog_name,
return -EINVAL;
*field_sz = sz;
*type_id = acc->type_id;
- } else if (relo->kind == BPF_FIELD_BYTE_SIZE) {
+ } else if (relo->kind == BPF_CORE_FIELD_BYTE_SIZE) {
sz = btf__resolve_size(spec->btf, acc->type_id);
if (sz < 0)
return -EINVAL;
@@ -644,36 +670,36 @@ static int bpf_core_calc_field_relo(const char *prog_name,
*validate = !bitfield;
switch (relo->kind) {
- case BPF_FIELD_BYTE_OFFSET:
+ case BPF_CORE_FIELD_BYTE_OFFSET:
*val = byte_off;
if (!bitfield) {
*field_sz = byte_sz;
*type_id = field_type_id;
}
break;
- case BPF_FIELD_BYTE_SIZE:
+ case BPF_CORE_FIELD_BYTE_SIZE:
*val = byte_sz;
break;
- case BPF_FIELD_SIGNED:
+ case BPF_CORE_FIELD_SIGNED:
/* enums will be assumed unsigned */
*val = btf_is_enum(mt) ||
(btf_int_encoding(mt) & BTF_INT_SIGNED);
if (validate)
*validate = true; /* signedness is never ambiguous */
break;
- case BPF_FIELD_LSHIFT_U64:
+ case BPF_CORE_FIELD_LSHIFT_U64:
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
*val = 64 - (bit_off + bit_sz - byte_off * 8);
#else
*val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8);
#endif
break;
- case BPF_FIELD_RSHIFT_U64:
+ case BPF_CORE_FIELD_RSHIFT_U64:
*val = 64 - bit_sz;
if (validate)
*validate = true; /* right shift is never ambiguous */
break;
- case BPF_FIELD_EXISTS:
+ case BPF_CORE_FIELD_EXISTS:
default:
return -EOPNOTSUPP;
}
@@ -683,10 +709,14 @@ static int bpf_core_calc_field_relo(const char *prog_name,
static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo,
const struct bpf_core_spec *spec,
- __u32 *val)
+ __u32 *val, bool *validate)
{
__s64 sz;
+ /* by default, always check expected value in bpf_insn */
+ if (validate)
+ *validate = true;
+
/* type-based relos return zero when target type is not found */
if (!spec) {
*val = 0;
@@ -694,20 +724,25 @@ static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo,
}
switch (relo->kind) {
- case BPF_TYPE_ID_TARGET:
+ case BPF_CORE_TYPE_ID_TARGET:
*val = spec->root_type_id;
+ /* type ID, embedded in bpf_insn, might change during linking,
+ * so enforcing it is pointless
+ */
+ if (validate)
+ *validate = false;
break;
- case BPF_TYPE_EXISTS:
+ case BPF_CORE_TYPE_EXISTS:
*val = 1;
break;
- case BPF_TYPE_SIZE:
+ case BPF_CORE_TYPE_SIZE:
sz = btf__resolve_size(spec->btf, spec->root_type_id);
if (sz < 0)
return -EINVAL;
*val = sz;
break;
- case BPF_TYPE_ID_LOCAL:
- /* BPF_TYPE_ID_LOCAL is handled specially and shouldn't get here */
+ case BPF_CORE_TYPE_ID_LOCAL:
+ /* BPF_CORE_TYPE_ID_LOCAL is handled specially and shouldn't get here */
default:
return -EOPNOTSUPP;
}
@@ -723,13 +758,13 @@ static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo,
const struct btf_enum *e;
switch (relo->kind) {
- case BPF_ENUMVAL_EXISTS:
+ case BPF_CORE_ENUMVAL_EXISTS:
*val = spec ? 1 : 0;
break;
- case BPF_ENUMVAL_VALUE:
+ case BPF_CORE_ENUMVAL_VALUE:
if (!spec)
return -EUCLEAN; /* request instruction poisoning */
- t = btf__type_by_id(spec->btf, spec->spec[0].type_id);
+ t = btf_type_by_id(spec->btf, spec->spec[0].type_id);
e = btf_enum(t) + spec->spec[0].idx;
*val = e->val;
break;
@@ -805,8 +840,8 @@ static int bpf_core_calc_relo(const char *prog_name,
if (res->orig_sz != res->new_sz) {
const struct btf_type *orig_t, *new_t;
- orig_t = btf__type_by_id(local_spec->btf, res->orig_type_id);
- new_t = btf__type_by_id(targ_spec->btf, res->new_type_id);
+ orig_t = btf_type_by_id(local_spec->btf, res->orig_type_id);
+ new_t = btf_type_by_id(targ_spec->btf, res->new_type_id);
/* There are two use cases in which it's safe to
* adjust load/store's mem size:
@@ -835,8 +870,8 @@ static int bpf_core_calc_relo(const char *prog_name,
res->fail_memsz_adjust = true;
}
} else if (core_relo_is_type_based(relo->kind)) {
- err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val);
- err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val);
+ err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val, &res->validate);
+ err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val, NULL);
} else if (core_relo_is_enumval_based(relo->kind)) {
err = bpf_core_calc_enumval_relo(relo, local_spec, &res->orig_val);
err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val);
@@ -1045,7 +1080,7 @@ poison:
* [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>,
* where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b
*/
-static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec)
+static void bpf_core_dump_spec(const char *prog_name, int level, const struct bpf_core_spec *spec)
{
const struct btf_type *t;
const struct btf_enum *e;
@@ -1054,7 +1089,7 @@ static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec)
int i;
type_id = spec->root_type_id;
- t = btf__type_by_id(spec->btf, type_id);
+ t = btf_type_by_id(spec->btf, type_id);
s = btf__name_by_offset(spec->btf, t->name_off);
libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s);
@@ -1147,9 +1182,12 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn,
const struct bpf_core_relo *relo,
int relo_idx,
const struct btf *local_btf,
- struct bpf_core_cand_list *cands)
+ struct bpf_core_cand_list *cands,
+ struct bpf_core_spec *specs_scratch)
{
- struct bpf_core_spec local_spec, cand_spec, targ_spec = {};
+ struct bpf_core_spec *local_spec = &specs_scratch[0];
+ struct bpf_core_spec *cand_spec = &specs_scratch[1];
+ struct bpf_core_spec *targ_spec = &specs_scratch[2];
struct bpf_core_relo_res cand_res, targ_res;
const struct btf_type *local_type;
const char *local_name;
@@ -1158,10 +1196,7 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn,
int i, j, err;
local_id = relo->type_id;
- local_type = btf__type_by_id(local_btf, local_id);
- if (!local_type)
- return -EINVAL;
-
+ local_type = btf_type_by_id(local_btf, local_id);
local_name = btf__name_by_offset(local_btf, local_type->name_off);
if (!local_name)
return -EINVAL;
@@ -1170,7 +1205,8 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn,
if (str_is_empty(spec_str))
return -EINVAL;
- err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec);
+ err = bpf_core_parse_spec(prog_name, local_btf, local_id, spec_str,
+ relo->kind, local_spec);
if (err) {
pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n",
prog_name, relo_idx, local_id, btf_kind_str(local_type),
@@ -1181,15 +1217,17 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn,
pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog_name,
relo_idx, core_relo_kind_str(relo->kind), relo->kind);
- bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec);
+ bpf_core_dump_spec(prog_name, LIBBPF_DEBUG, local_spec);
libbpf_print(LIBBPF_DEBUG, "\n");
/* TYPE_ID_LOCAL relo is special and doesn't need candidate search */
- if (relo->kind == BPF_TYPE_ID_LOCAL) {
- targ_res.validate = true;
+ if (relo->kind == BPF_CORE_TYPE_ID_LOCAL) {
+ /* bpf_insn's imm value could get out of sync during linking */
+ memset(&targ_res, 0, sizeof(targ_res));
+ targ_res.validate = false;
targ_res.poison = false;
- targ_res.orig_val = local_spec.root_type_id;
- targ_res.new_val = local_spec.root_type_id;
+ targ_res.orig_val = local_spec->root_type_id;
+ targ_res.new_val = local_spec->root_type_id;
goto patch_insn;
}
@@ -1200,40 +1238,39 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn,
return -EOPNOTSUPP;
}
-
for (i = 0, j = 0; i < cands->len; i++) {
- err = bpf_core_spec_match(&local_spec, cands->cands[i].btf,
- cands->cands[i].id, &cand_spec);
+ err = bpf_core_spec_match(local_spec, cands->cands[i].btf,
+ cands->cands[i].id, cand_spec);
if (err < 0) {
pr_warn("prog '%s': relo #%d: error matching candidate #%d ",
prog_name, relo_idx, i);
- bpf_core_dump_spec(LIBBPF_WARN, &cand_spec);
+ bpf_core_dump_spec(prog_name, LIBBPF_WARN, cand_spec);
libbpf_print(LIBBPF_WARN, ": %d\n", err);
return err;
}
pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog_name,
relo_idx, err == 0 ? "non-matching" : "matching", i);
- bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec);
+ bpf_core_dump_spec(prog_name, LIBBPF_DEBUG, cand_spec);
libbpf_print(LIBBPF_DEBUG, "\n");
if (err == 0)
continue;
- err = bpf_core_calc_relo(prog_name, relo, relo_idx, &local_spec, &cand_spec, &cand_res);
+ err = bpf_core_calc_relo(prog_name, relo, relo_idx, local_spec, cand_spec, &cand_res);
if (err)
return err;
if (j == 0) {
targ_res = cand_res;
- targ_spec = cand_spec;
- } else if (cand_spec.bit_offset != targ_spec.bit_offset) {
+ *targ_spec = *cand_spec;
+ } else if (cand_spec->bit_offset != targ_spec->bit_offset) {
/* if there are many field relo candidates, they
* should all resolve to the same bit offset
*/
pr_warn("prog '%s': relo #%d: field offset ambiguity: %u != %u\n",
- prog_name, relo_idx, cand_spec.bit_offset,
- targ_spec.bit_offset);
+ prog_name, relo_idx, cand_spec->bit_offset,
+ targ_spec->bit_offset);
return -EINVAL;
} else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) {
/* all candidates should result in the same relocation
@@ -1251,7 +1288,7 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn,
}
/*
- * For BPF_FIELD_EXISTS relo or when used BPF program has field
+ * For BPF_CORE_FIELD_EXISTS relo or when used BPF program has field
* existence checks or kernel version/config checks, it's expected
* that we might not find any candidates. In this case, if field
* wasn't found in any candidate, the list of candidates shouldn't
@@ -1277,7 +1314,7 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn,
prog_name, relo_idx);
/* calculate single target relo result explicitly */
- err = bpf_core_calc_relo(prog_name, relo, relo_idx, &local_spec, NULL, &targ_res);
+ err = bpf_core_calc_relo(prog_name, relo, relo_idx, local_spec, NULL, &targ_res);
if (err)
return err;
}
diff --git a/tools/lib/bpf/relo_core.h b/tools/lib/bpf/relo_core.h
index 3b9f8f18346c..17799819ad7c 100644
--- a/tools/lib/bpf/relo_core.h
+++ b/tools/lib/bpf/relo_core.h
@@ -4,81 +4,10 @@
#ifndef __RELO_CORE_H
#define __RELO_CORE_H
-/* bpf_core_relo_kind encodes which aspect of captured field/type/enum value
- * has to be adjusted by relocations.
- */
-enum bpf_core_relo_kind {
- BPF_FIELD_BYTE_OFFSET = 0, /* field byte offset */
- BPF_FIELD_BYTE_SIZE = 1, /* field size in bytes */
- BPF_FIELD_EXISTS = 2, /* field existence in target kernel */
- BPF_FIELD_SIGNED = 3, /* field signedness (0 - unsigned, 1 - signed) */
- BPF_FIELD_LSHIFT_U64 = 4, /* bitfield-specific left bitshift */
- BPF_FIELD_RSHIFT_U64 = 5, /* bitfield-specific right bitshift */
- BPF_TYPE_ID_LOCAL = 6, /* type ID in local BPF object */
- BPF_TYPE_ID_TARGET = 7, /* type ID in target kernel */
- BPF_TYPE_EXISTS = 8, /* type existence in target kernel */
- BPF_TYPE_SIZE = 9, /* type size in bytes */
- BPF_ENUMVAL_EXISTS = 10, /* enum value existence in target kernel */
- BPF_ENUMVAL_VALUE = 11, /* enum value integer value */
-};
-
-/* The minimum bpf_core_relo checked by the loader
- *
- * CO-RE relocation captures the following data:
- * - insn_off - instruction offset (in bytes) within a BPF program that needs
- * its insn->imm field to be relocated with actual field info;
- * - type_id - BTF type ID of the "root" (containing) entity of a relocatable
- * type or field;
- * - access_str_off - offset into corresponding .BTF string section. String
- * interpretation depends on specific relocation kind:
- * - for field-based relocations, string encodes an accessed field using
- * a sequence of field and array indices, separated by colon (:). It's
- * conceptually very close to LLVM's getelementptr ([0]) instruction's
- * arguments for identifying offset to a field.
- * - for type-based relocations, strings is expected to be just "0";
- * - for enum value-based relocations, string contains an index of enum
- * value within its enum type;
- *
- * Example to provide a better feel.
- *
- * struct sample {
- * int a;
- * struct {
- * int b[10];
- * };
- * };
- *
- * struct sample *s = ...;
- * int x = &s->a; // encoded as "0:0" (a is field #0)
- * int y = &s->b[5]; // encoded as "0:1:0:5" (anon struct is field #1,
- * // b is field #0 inside anon struct, accessing elem #5)
- * int z = &s[10]->b; // encoded as "10:1" (ptr is used as an array)
- *
- * type_id for all relocs in this example will capture BTF type id of
- * `struct sample`.
- *
- * Such relocation is emitted when using __builtin_preserve_access_index()
- * Clang built-in, passing expression that captures field address, e.g.:
- *
- * bpf_probe_read(&dst, sizeof(dst),
- * __builtin_preserve_access_index(&src->a.b.c));
- *
- * In this case Clang will emit field relocation recording necessary data to
- * be able to find offset of embedded `a.b.c` field within `src` struct.
- *
- * [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction
- */
-struct bpf_core_relo {
- __u32 insn_off;
- __u32 type_id;
- __u32 access_str_off;
- enum bpf_core_relo_kind kind;
-};
+#include <linux/bpf.h>
struct bpf_core_cand {
const struct btf *btf;
- const struct btf_type *t;
- const char *name;
__u32 id;
};
@@ -88,11 +17,39 @@ struct bpf_core_cand_list {
int len;
};
+#define BPF_CORE_SPEC_MAX_LEN 64
+
+/* represents BPF CO-RE field or array element accessor */
+struct bpf_core_accessor {
+ __u32 type_id; /* struct/union type or array element type */
+ __u32 idx; /* field index or array index */
+ const char *name; /* field name or NULL for array accessor */
+};
+
+struct bpf_core_spec {
+ const struct btf *btf;
+ /* high-level spec: named fields and array indices only */
+ struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN];
+ /* original unresolved (no skip_mods_or_typedefs) root type ID */
+ __u32 root_type_id;
+ /* CO-RE relocation kind */
+ enum bpf_core_relo_kind relo_kind;
+ /* high-level spec length */
+ int len;
+ /* raw, low-level spec: 1-to-1 with accessor spec string */
+ int raw_spec[BPF_CORE_SPEC_MAX_LEN];
+ /* raw spec length */
+ int raw_len;
+ /* field bit offset represented by spec */
+ __u32 bit_offset;
+};
+
int bpf_core_apply_relo_insn(const char *prog_name,
struct bpf_insn *insn, int insn_idx,
const struct bpf_core_relo *relo, int relo_idx,
const struct btf *local_btf,
- struct bpf_core_cand_list *cands);
+ struct bpf_core_cand_list *cands,
+ struct bpf_core_spec *specs_scratch);
int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
const struct btf *targ_btf, __u32 targ_id);
diff --git a/tools/lib/bpf/skel_internal.h b/tools/lib/bpf/skel_internal.h
index 9cf66702fa8d..0b84d8e6b72a 100644
--- a/tools/lib/bpf/skel_internal.h
+++ b/tools/lib/bpf/skel_internal.h
@@ -7,6 +7,16 @@
#include <sys/syscall.h>
#include <sys/mman.h>
+#ifndef __NR_bpf
+# if defined(__mips__) && defined(_ABIO32)
+# define __NR_bpf 4355
+# elif defined(__mips__) && defined(_ABIN32)
+# define __NR_bpf 6319
+# elif defined(__mips__) && defined(_ABI64)
+# define __NR_bpf 5315
+# endif
+#endif
+
/* This file is a base header for auto-generated *.lskel.h files.
* Its contents will change and may become part of auto-generation in the future.
*
@@ -65,8 +75,7 @@ static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts)
int map_fd = -1, prog_fd = -1, key = 0, err;
union bpf_attr attr;
- map_fd = bpf_create_map_name(BPF_MAP_TYPE_ARRAY, "__loader.map", 4,
- opts->data_sz, 1, 0);
+ map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "__loader.map", 4, opts->data_sz, 1, NULL);
if (map_fd < 0) {
opts->errstr = "failed to create loader map";
err = -errno;
diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index 81f8fbc85e70..edafe56664f3 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -35,6 +35,11 @@
#include "libbpf_internal.h"
#include "xsk.h"
+/* entire xsk.h and xsk.c is going away in libbpf 1.0, so ignore all internal
+ * uses of deprecated APIs
+ */
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+
#ifndef SOL_XDP
#define SOL_XDP 283
#endif
@@ -364,8 +369,6 @@ int xsk_umem__create_v0_0_2(struct xsk_umem **umem_ptr, void *umem_area,
static enum xsk_prog get_xsk_prog(void)
{
enum xsk_prog detected = XSK_PROG_FALLBACK;
- struct bpf_load_program_attr prog_attr;
- struct bpf_create_map_attr map_attr;
__u32 size_out, retval, duration;
char data_in = 0, data_out;
struct bpf_insn insns[] = {
@@ -375,27 +378,15 @@ static enum xsk_prog get_xsk_prog(void)
BPF_EMIT_CALL(BPF_FUNC_redirect_map),
BPF_EXIT_INSN(),
};
- int prog_fd, map_fd, ret;
-
- memset(&map_attr, 0, sizeof(map_attr));
- map_attr.map_type = BPF_MAP_TYPE_XSKMAP;
- map_attr.key_size = sizeof(int);
- map_attr.value_size = sizeof(int);
- map_attr.max_entries = 1;
+ int prog_fd, map_fd, ret, insn_cnt = ARRAY_SIZE(insns);
- map_fd = bpf_create_map_xattr(&map_attr);
+ map_fd = bpf_map_create(BPF_MAP_TYPE_XSKMAP, NULL, sizeof(int), sizeof(int), 1, NULL);
if (map_fd < 0)
return detected;
insns[0].imm = map_fd;
- memset(&prog_attr, 0, sizeof(prog_attr));
- prog_attr.prog_type = BPF_PROG_TYPE_XDP;
- prog_attr.insns = insns;
- prog_attr.insns_cnt = ARRAY_SIZE(insns);
- prog_attr.license = "GPL";
-
- prog_fd = bpf_load_program_xattr(&prog_attr, NULL, 0);
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, insn_cnt, NULL);
if (prog_fd < 0) {
close(map_fd);
return detected;
@@ -495,10 +486,13 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)
};
struct bpf_insn *progs[] = {prog, prog_redirect_flags};
enum xsk_prog option = get_xsk_prog();
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
+ .log_buf = log_buf,
+ .log_size = log_buf_size,
+ );
- prog_fd = bpf_load_program(BPF_PROG_TYPE_XDP, progs[option], insns_cnt[option],
- "LGPL-2.1 or BSD-2-Clause", 0, log_buf,
- log_buf_size);
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "LGPL-2.1 or BSD-2-Clause",
+ progs[option], insns_cnt[option], &opts);
if (prog_fd < 0) {
pr_warn("BPF log buffer:\n%s", log_buf);
return prog_fd;
@@ -554,8 +548,7 @@ static int xsk_get_max_queues(struct xsk_socket *xsk)
return -errno;
ifr.ifr_data = (void *)&channels;
- memcpy(ifr.ifr_name, ctx->ifname, IFNAMSIZ - 1);
- ifr.ifr_name[IFNAMSIZ - 1] = '\0';
+ libbpf_strlcpy(ifr.ifr_name, ctx->ifname, IFNAMSIZ);
err = ioctl(fd, SIOCETHTOOL, &ifr);
if (err && errno != EOPNOTSUPP) {
ret = -errno;
@@ -590,8 +583,8 @@ static int xsk_create_bpf_maps(struct xsk_socket *xsk)
if (max_queues < 0)
return max_queues;
- fd = bpf_create_map_name(BPF_MAP_TYPE_XSKMAP, "xsks_map",
- sizeof(int), sizeof(int), max_queues, 0);
+ fd = bpf_map_create(BPF_MAP_TYPE_XSKMAP, "xsks_map",
+ sizeof(int), sizeof(int), max_queues, NULL);
if (fd < 0)
return fd;
@@ -725,14 +718,12 @@ static int xsk_link_lookup(int ifindex, __u32 *prog_id, int *link_fd)
static bool xsk_probe_bpf_link(void)
{
- DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts,
- .flags = XDP_FLAGS_SKB_MODE);
- struct bpf_load_program_attr prog_attr;
+ LIBBPF_OPTS(bpf_link_create_opts, opts, .flags = XDP_FLAGS_SKB_MODE);
struct bpf_insn insns[2] = {
BPF_MOV64_IMM(BPF_REG_0, XDP_PASS),
BPF_EXIT_INSN()
};
- int prog_fd, link_fd = -1;
+ int prog_fd, link_fd = -1, insn_cnt = ARRAY_SIZE(insns);
int ifindex_lo = 1;
bool ret = false;
int err;
@@ -744,13 +735,7 @@ static bool xsk_probe_bpf_link(void)
if (link_fd >= 0)
return true;
- memset(&prog_attr, 0, sizeof(prog_attr));
- prog_attr.prog_type = BPF_PROG_TYPE_XDP;
- prog_attr.insns = insns;
- prog_attr.insns_cnt = ARRAY_SIZE(insns);
- prog_attr.license = "GPL";
-
- prog_fd = bpf_load_program_xattr(&prog_attr, NULL, 0);
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, insn_cnt, NULL);
if (prog_fd < 0)
return ret;
@@ -782,8 +767,7 @@ static int xsk_create_xsk_struct(int ifindex, struct xsk_socket *xsk)
}
ctx->ifindex = ifindex;
- memcpy(ctx->ifname, ifname, IFNAMSIZ -1);
- ctx->ifname[IFNAMSIZ - 1] = 0;
+ libbpf_strlcpy(ctx->ifname, ifname, IFNAMSIZ);
xsk->ctx = ctx;
xsk->ctx->has_bpf_link = xsk_probe_bpf_link();
@@ -965,8 +949,7 @@ static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk,
ctx->refcount = 1;
ctx->umem = umem;
ctx->queue_id = queue_id;
- memcpy(ctx->ifname, ifname, IFNAMSIZ - 1);
- ctx->ifname[IFNAMSIZ - 1] = '\0';
+ libbpf_strlcpy(ctx->ifname, ifname, IFNAMSIZ);
ctx->fill = fill;
ctx->comp = comp;
diff --git a/tools/lib/find_bit.c b/tools/lib/find_bit.c
index 109aa7ffcf97..ba4b8d94e004 100644
--- a/tools/lib/find_bit.c
+++ b/tools/lib/find_bit.c
@@ -96,6 +96,26 @@ unsigned long _find_first_bit(const unsigned long *addr, unsigned long size)
}
#endif
+#ifndef find_first_and_bit
+/*
+ * Find the first set bit in two memory regions.
+ */
+unsigned long _find_first_and_bit(const unsigned long *addr1,
+ const unsigned long *addr2,
+ unsigned long size)
+{
+ unsigned long idx, val;
+
+ for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
+ val = addr1[idx] & addr2[idx];
+ if (val)
+ return min(idx * BITS_PER_LONG + __ffs(val), size);
+ }
+
+ return size;
+}
+#endif
+
#ifndef find_first_zero_bit
/*
* Find the first cleared bit in a memory region.
diff --git a/tools/lib/perf/Documentation/libperf.txt b/tools/lib/perf/Documentation/libperf.txt
index 63ae5e0195ce..32c5051c24eb 100644
--- a/tools/lib/perf/Documentation/libperf.txt
+++ b/tools/lib/perf/Documentation/libperf.txt
@@ -48,6 +48,7 @@ SYNOPSIS
int perf_cpu_map__nr(const struct perf_cpu_map *cpus);
bool perf_cpu_map__empty(const struct perf_cpu_map *map);
int perf_cpu_map__max(struct perf_cpu_map *map);
+ bool perf_cpu_map__has(const struct perf_cpu_map *map, int cpu);
#define perf_cpu_map__for_each_cpu(cpu, idx, cpus)
--
@@ -135,16 +136,16 @@ SYNOPSIS
int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
struct perf_thread_map *threads);
void perf_evsel__close(struct perf_evsel *evsel);
- void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu);
+ void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu_map_idx);
int perf_evsel__mmap(struct perf_evsel *evsel, int pages);
void perf_evsel__munmap(struct perf_evsel *evsel);
- void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu, int thread);
- int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
+ void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu_map_idx, int thread);
+ int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int thread,
struct perf_counts_values *count);
int perf_evsel__enable(struct perf_evsel *evsel);
- int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu);
+ int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu_map_idx);
int perf_evsel__disable(struct perf_evsel *evsel);
- int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu);
+ int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu_map_idx);
struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel);
struct perf_thread_map *perf_evsel__threads(struct perf_evsel *evsel);
struct perf_event_attr *perf_evsel__attr(struct perf_evsel *evsel);
diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c
index adaad3dddf6e..ee66760f1e63 100644
--- a/tools/lib/perf/cpumap.c
+++ b/tools/lib/perf/cpumap.c
@@ -10,15 +10,24 @@
#include <ctype.h>
#include <limits.h>
-struct perf_cpu_map *perf_cpu_map__dummy_new(void)
+static struct perf_cpu_map *perf_cpu_map__alloc(int nr_cpus)
{
- struct perf_cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(int));
+ struct perf_cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(struct perf_cpu) * nr_cpus);
if (cpus != NULL) {
- cpus->nr = 1;
- cpus->map[0] = -1;
+ cpus->nr = nr_cpus;
refcount_set(&cpus->refcnt, 1);
+
}
+ return cpus;
+}
+
+struct perf_cpu_map *perf_cpu_map__dummy_new(void)
+{
+ struct perf_cpu_map *cpus = perf_cpu_map__alloc(1);
+
+ if (cpus)
+ cpus->map[0].cpu = -1;
return cpus;
}
@@ -54,15 +63,12 @@ static struct perf_cpu_map *cpu_map__default_new(void)
if (nr_cpus < 0)
return NULL;
- cpus = malloc(sizeof(*cpus) + nr_cpus * sizeof(int));
+ cpus = perf_cpu_map__alloc(nr_cpus);
if (cpus != NULL) {
int i;
for (i = 0; i < nr_cpus; ++i)
- cpus->map[i] = i;
-
- cpus->nr = nr_cpus;
- refcount_set(&cpus->refcnt, 1);
+ cpus->map[i].cpu = i;
}
return cpus;
@@ -73,31 +79,32 @@ struct perf_cpu_map *perf_cpu_map__default_new(void)
return cpu_map__default_new();
}
-static int cmp_int(const void *a, const void *b)
+
+static int cmp_cpu(const void *a, const void *b)
{
- return *(const int *)a - *(const int*)b;
+ const struct perf_cpu *cpu_a = a, *cpu_b = b;
+
+ return cpu_a->cpu - cpu_b->cpu;
}
-static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, int *tmp_cpus)
+static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, const struct perf_cpu *tmp_cpus)
{
- size_t payload_size = nr_cpus * sizeof(int);
- struct perf_cpu_map *cpus = malloc(sizeof(*cpus) + payload_size);
+ size_t payload_size = nr_cpus * sizeof(struct perf_cpu);
+ struct perf_cpu_map *cpus = perf_cpu_map__alloc(nr_cpus);
int i, j;
if (cpus != NULL) {
memcpy(cpus->map, tmp_cpus, payload_size);
- qsort(cpus->map, nr_cpus, sizeof(int), cmp_int);
+ qsort(cpus->map, nr_cpus, sizeof(struct perf_cpu), cmp_cpu);
/* Remove dups */
j = 0;
for (i = 0; i < nr_cpus; i++) {
- if (i == 0 || cpus->map[i] != cpus->map[i - 1])
- cpus->map[j++] = cpus->map[i];
+ if (i == 0 || cpus->map[i].cpu != cpus->map[i - 1].cpu)
+ cpus->map[j++].cpu = cpus->map[i].cpu;
}
cpus->nr = j;
assert(j <= nr_cpus);
- refcount_set(&cpus->refcnt, 1);
}
-
return cpus;
}
@@ -105,7 +112,7 @@ struct perf_cpu_map *perf_cpu_map__read(FILE *file)
{
struct perf_cpu_map *cpus = NULL;
int nr_cpus = 0;
- int *tmp_cpus = NULL, *tmp;
+ struct perf_cpu *tmp_cpus = NULL, *tmp;
int max_entries = 0;
int n, cpu, prev;
char sep;
@@ -124,24 +131,24 @@ struct perf_cpu_map *perf_cpu_map__read(FILE *file)
if (new_max >= max_entries) {
max_entries = new_max + MAX_NR_CPUS / 2;
- tmp = realloc(tmp_cpus, max_entries * sizeof(int));
+ tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu));
if (tmp == NULL)
goto out_free_tmp;
tmp_cpus = tmp;
}
while (++prev < cpu)
- tmp_cpus[nr_cpus++] = prev;
+ tmp_cpus[nr_cpus++].cpu = prev;
}
if (nr_cpus == max_entries) {
max_entries += MAX_NR_CPUS;
- tmp = realloc(tmp_cpus, max_entries * sizeof(int));
+ tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu));
if (tmp == NULL)
goto out_free_tmp;
tmp_cpus = tmp;
}
- tmp_cpus[nr_cpus++] = cpu;
+ tmp_cpus[nr_cpus++].cpu = cpu;
if (n == 2 && sep == '-')
prev = cpu;
else
@@ -179,7 +186,7 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
unsigned long start_cpu, end_cpu = 0;
char *p = NULL;
int i, nr_cpus = 0;
- int *tmp_cpus = NULL, *tmp;
+ struct perf_cpu *tmp_cpus = NULL, *tmp;
int max_entries = 0;
if (!cpu_list)
@@ -220,17 +227,17 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
for (; start_cpu <= end_cpu; start_cpu++) {
/* check for duplicates */
for (i = 0; i < nr_cpus; i++)
- if (tmp_cpus[i] == (int)start_cpu)
+ if (tmp_cpus[i].cpu == (int)start_cpu)
goto invalid;
if (nr_cpus == max_entries) {
max_entries += MAX_NR_CPUS;
- tmp = realloc(tmp_cpus, max_entries * sizeof(int));
+ tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu));
if (tmp == NULL)
goto invalid;
tmp_cpus = tmp;
}
- tmp_cpus[nr_cpus++] = (int)start_cpu;
+ tmp_cpus[nr_cpus++].cpu = (int)start_cpu;
}
if (*p)
++p;
@@ -250,12 +257,16 @@ out:
return cpus;
}
-int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx)
+struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx)
{
+ struct perf_cpu result = {
+ .cpu = -1
+ };
+
if (cpus && idx < cpus->nr)
return cpus->map[idx];
- return -1;
+ return result;
}
int perf_cpu_map__nr(const struct perf_cpu_map *cpus)
@@ -265,21 +276,26 @@ int perf_cpu_map__nr(const struct perf_cpu_map *cpus)
bool perf_cpu_map__empty(const struct perf_cpu_map *map)
{
- return map ? map->map[0] == -1 : true;
+ return map ? map->map[0].cpu == -1 : true;
}
-int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu)
+int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu)
{
- int low = 0, high = cpus->nr;
+ int low, high;
+ if (!cpus)
+ return -1;
+
+ low = 0;
+ high = cpus->nr;
while (low < high) {
- int idx = (low + high) / 2,
- cpu_at_idx = cpus->map[idx];
+ int idx = (low + high) / 2;
+ struct perf_cpu cpu_at_idx = cpus->map[idx];
- if (cpu_at_idx == cpu)
+ if (cpu_at_idx.cpu == cpu.cpu)
return idx;
- if (cpu_at_idx > cpu)
+ if (cpu_at_idx.cpu > cpu.cpu)
high = idx;
else
low = idx + 1;
@@ -288,10 +304,19 @@ int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu)
return -1;
}
-int perf_cpu_map__max(struct perf_cpu_map *map)
+bool perf_cpu_map__has(const struct perf_cpu_map *cpus, struct perf_cpu cpu)
{
+ return perf_cpu_map__idx(cpus, cpu) != -1;
+}
+
+struct perf_cpu perf_cpu_map__max(struct perf_cpu_map *map)
+{
+ struct perf_cpu result = {
+ .cpu = -1
+ };
+
// cpu_map__trim_new() qsort()s it, cpu_map__default_new() sorts it as well.
- return map->nr > 0 ? map->map[map->nr - 1] : -1;
+ return map->nr > 0 ? map->map[map->nr - 1] : result;
}
/*
@@ -305,7 +330,7 @@ int perf_cpu_map__max(struct perf_cpu_map *map)
struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
struct perf_cpu_map *other)
{
- int *tmp_cpus;
+ struct perf_cpu *tmp_cpus;
int tmp_len;
int i, j, k;
struct perf_cpu_map *merged;
@@ -319,19 +344,19 @@ struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
if (!other)
return orig;
if (orig->nr == other->nr &&
- !memcmp(orig->map, other->map, orig->nr * sizeof(int)))
+ !memcmp(orig->map, other->map, orig->nr * sizeof(struct perf_cpu)))
return orig;
tmp_len = orig->nr + other->nr;
- tmp_cpus = malloc(tmp_len * sizeof(int));
+ tmp_cpus = malloc(tmp_len * sizeof(struct perf_cpu));
if (!tmp_cpus)
return NULL;
/* Standard merge algorithm from wikipedia */
i = j = k = 0;
while (i < orig->nr && j < other->nr) {
- if (orig->map[i] <= other->map[j]) {
- if (orig->map[i] == other->map[j])
+ if (orig->map[i].cpu <= other->map[j].cpu) {
+ if (orig->map[i].cpu == other->map[j].cpu)
j++;
tmp_cpus[k++] = orig->map[i++];
} else
diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c
index e37dfad31383..9a770bfdc804 100644
--- a/tools/lib/perf/evlist.c
+++ b/tools/lib/perf/evlist.c
@@ -407,7 +407,7 @@ perf_evlist__mmap_cb_get(struct perf_evlist *evlist, bool overwrite, int idx)
static int
perf_evlist__mmap_cb_mmap(struct perf_mmap *map, struct perf_mmap_param *mp,
- int output, int cpu)
+ int output, struct perf_cpu cpu)
{
return perf_mmap__mmap(map, mp, output, cpu);
}
@@ -426,7 +426,7 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
int idx, struct perf_mmap_param *mp, int cpu_idx,
int thread, int *_output, int *_output_overwrite)
{
- int evlist_cpu = perf_cpu_map__cpu(evlist->cpus, cpu_idx);
+ struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->cpus, cpu_idx);
struct perf_evsel *evsel;
int revent;
@@ -643,14 +643,14 @@ perf_evlist__next_mmap(struct perf_evlist *evlist, struct perf_mmap *map,
return overwrite ? evlist->mmap_ovw_first : evlist->mmap_first;
}
-void __perf_evlist__set_leader(struct list_head *list)
+void __perf_evlist__set_leader(struct list_head *list, struct perf_evsel *leader)
{
- struct perf_evsel *evsel, *leader;
+ struct perf_evsel *first, *last, *evsel;
- leader = list_entry(list->next, struct perf_evsel, node);
- evsel = list_entry(list->prev, struct perf_evsel, node);
+ first = list_first_entry(list, struct perf_evsel, node);
+ last = list_last_entry(list, struct perf_evsel, node);
- leader->nr_members = evsel->idx - leader->idx + 1;
+ leader->nr_members = last->idx - first->idx + 1;
__perf_evlist__for_each_entry(list, evsel)
evsel->leader = leader;
@@ -659,7 +659,10 @@ void __perf_evlist__set_leader(struct list_head *list)
void perf_evlist__set_leader(struct perf_evlist *evlist)
{
if (evlist->nr_entries) {
+ struct perf_evsel *first = list_entry(evlist->entries.next,
+ struct perf_evsel, node);
+
evlist->nr_groups = evlist->nr_entries > 1 ? 1 : 0;
- __perf_evlist__set_leader(&evlist->entries);
+ __perf_evlist__set_leader(&evlist->entries, first);
}
}
diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c
index 8441e3e1aaac..210ea7c06ce8 100644
--- a/tools/lib/perf/evsel.c
+++ b/tools/lib/perf/evsel.c
@@ -43,18 +43,22 @@ void perf_evsel__delete(struct perf_evsel *evsel)
free(evsel);
}
-#define FD(e, x, y) ((int *) xyarray__entry(e->fd, x, y))
-#define MMAP(e, x, y) (e->mmap ? ((struct perf_mmap *) xyarray__entry(e->mmap, x, y)) : NULL)
+#define FD(_evsel, _cpu_map_idx, _thread) \
+ ((int *)xyarray__entry(_evsel->fd, _cpu_map_idx, _thread))
+#define MMAP(_evsel, _cpu_map_idx, _thread) \
+ (_evsel->mmap ? ((struct perf_mmap *) xyarray__entry(_evsel->mmap, _cpu_map_idx, _thread)) \
+ : NULL)
int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
{
evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int));
if (evsel->fd) {
- int cpu, thread;
- for (cpu = 0; cpu < ncpus; cpu++) {
+ int idx, thread;
+
+ for (idx = 0; idx < ncpus; idx++) {
for (thread = 0; thread < nthreads; thread++) {
- int *fd = FD(evsel, cpu, thread);
+ int *fd = FD(evsel, idx, thread);
if (fd)
*fd = -1;
@@ -74,13 +78,13 @@ static int perf_evsel__alloc_mmap(struct perf_evsel *evsel, int ncpus, int nthre
static int
sys_perf_event_open(struct perf_event_attr *attr,
- pid_t pid, int cpu, int group_fd,
+ pid_t pid, struct perf_cpu cpu, int group_fd,
unsigned long flags)
{
- return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
+ return syscall(__NR_perf_event_open, attr, pid, cpu.cpu, group_fd, flags);
}
-static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread, int *group_fd)
+static int get_group_fd(struct perf_evsel *evsel, int cpu_map_idx, int thread, int *group_fd)
{
struct perf_evsel *leader = evsel->leader;
int *fd;
@@ -97,7 +101,7 @@ static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread, int *grou
if (!leader->fd)
return -ENOTCONN;
- fd = FD(leader, cpu, thread);
+ fd = FD(leader, cpu_map_idx, thread);
if (fd == NULL || *fd == -1)
return -EBADF;
@@ -109,7 +113,8 @@ static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread, int *grou
int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
struct perf_thread_map *threads)
{
- int cpu, thread, err = 0;
+ struct perf_cpu cpu;
+ int idx, thread, err = 0;
if (cpus == NULL) {
static struct perf_cpu_map *empty_cpu_map;
@@ -136,24 +141,24 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
}
if (evsel->fd == NULL &&
- perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0)
+ perf_evsel__alloc_fd(evsel, perf_cpu_map__nr(cpus), threads->nr) < 0)
return -ENOMEM;
- for (cpu = 0; cpu < cpus->nr; cpu++) {
+ perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
for (thread = 0; thread < threads->nr; thread++) {
int fd, group_fd, *evsel_fd;
- evsel_fd = FD(evsel, cpu, thread);
+ evsel_fd = FD(evsel, idx, thread);
if (evsel_fd == NULL)
return -EINVAL;
- err = get_group_fd(evsel, cpu, thread, &group_fd);
+ err = get_group_fd(evsel, idx, thread, &group_fd);
if (err < 0)
return err;
fd = sys_perf_event_open(&evsel->attr,
threads->map[thread].pid,
- cpus->map[cpu], group_fd, 0);
+ cpu, group_fd, 0);
if (fd < 0)
return -errno;
@@ -165,12 +170,12 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
return err;
}
-static void perf_evsel__close_fd_cpu(struct perf_evsel *evsel, int cpu)
+static void perf_evsel__close_fd_cpu(struct perf_evsel *evsel, int cpu_map_idx)
{
int thread;
for (thread = 0; thread < xyarray__max_y(evsel->fd); ++thread) {
- int *fd = FD(evsel, cpu, thread);
+ int *fd = FD(evsel, cpu_map_idx, thread);
if (fd && *fd >= 0) {
close(*fd);
@@ -181,10 +186,8 @@ static void perf_evsel__close_fd_cpu(struct perf_evsel *evsel, int cpu)
void perf_evsel__close_fd(struct perf_evsel *evsel)
{
- int cpu;
-
- for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++)
- perf_evsel__close_fd_cpu(evsel, cpu);
+ for (int idx = 0; idx < xyarray__max_x(evsel->fd); idx++)
+ perf_evsel__close_fd_cpu(evsel, idx);
}
void perf_evsel__free_fd(struct perf_evsel *evsel)
@@ -202,29 +205,29 @@ void perf_evsel__close(struct perf_evsel *evsel)
perf_evsel__free_fd(evsel);
}
-void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu)
+void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu_map_idx)
{
if (evsel->fd == NULL)
return;
- perf_evsel__close_fd_cpu(evsel, cpu);
+ perf_evsel__close_fd_cpu(evsel, cpu_map_idx);
}
void perf_evsel__munmap(struct perf_evsel *evsel)
{
- int cpu, thread;
+ int idx, thread;
if (evsel->fd == NULL || evsel->mmap == NULL)
return;
- for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) {
+ for (idx = 0; idx < xyarray__max_x(evsel->fd); idx++) {
for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) {
- int *fd = FD(evsel, cpu, thread);
+ int *fd = FD(evsel, idx, thread);
if (fd == NULL || *fd < 0)
continue;
- perf_mmap__munmap(MMAP(evsel, cpu, thread));
+ perf_mmap__munmap(MMAP(evsel, idx, thread));
}
}
@@ -234,7 +237,7 @@ void perf_evsel__munmap(struct perf_evsel *evsel)
int perf_evsel__mmap(struct perf_evsel *evsel, int pages)
{
- int ret, cpu, thread;
+ int ret, idx, thread;
struct perf_mmap_param mp = {
.prot = PROT_READ | PROT_WRITE,
.mask = (pages * page_size) - 1,
@@ -246,15 +249,16 @@ int perf_evsel__mmap(struct perf_evsel *evsel, int pages)
if (perf_evsel__alloc_mmap(evsel, xyarray__max_x(evsel->fd), xyarray__max_y(evsel->fd)) < 0)
return -ENOMEM;
- for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) {
+ for (idx = 0; idx < xyarray__max_x(evsel->fd); idx++) {
for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) {
- int *fd = FD(evsel, cpu, thread);
+ int *fd = FD(evsel, idx, thread);
struct perf_mmap *map;
+ struct perf_cpu cpu = perf_cpu_map__cpu(evsel->cpus, idx);
if (fd == NULL || *fd < 0)
continue;
- map = MMAP(evsel, cpu, thread);
+ map = MMAP(evsel, idx, thread);
perf_mmap__init(map, NULL, false, NULL);
ret = perf_mmap__mmap(map, &mp, *fd, cpu);
@@ -268,14 +272,14 @@ int perf_evsel__mmap(struct perf_evsel *evsel, int pages)
return 0;
}
-void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu, int thread)
+void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu_map_idx, int thread)
{
- int *fd = FD(evsel, cpu, thread);
+ int *fd = FD(evsel, cpu_map_idx, thread);
- if (fd == NULL || *fd < 0 || MMAP(evsel, cpu, thread) == NULL)
+ if (fd == NULL || *fd < 0 || MMAP(evsel, cpu_map_idx, thread) == NULL)
return NULL;
- return MMAP(evsel, cpu, thread)->base;
+ return MMAP(evsel, cpu_map_idx, thread)->base;
}
int perf_evsel__read_size(struct perf_evsel *evsel)
@@ -303,19 +307,19 @@ int perf_evsel__read_size(struct perf_evsel *evsel)
return size;
}
-int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
+int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int thread,
struct perf_counts_values *count)
{
size_t size = perf_evsel__read_size(evsel);
- int *fd = FD(evsel, cpu, thread);
+ int *fd = FD(evsel, cpu_map_idx, thread);
memset(count, 0, sizeof(*count));
if (fd == NULL || *fd < 0)
return -EINVAL;
- if (MMAP(evsel, cpu, thread) &&
- !perf_mmap__read_self(MMAP(evsel, cpu, thread), count))
+ if (MMAP(evsel, cpu_map_idx, thread) &&
+ !perf_mmap__read_self(MMAP(evsel, cpu_map_idx, thread), count))
return 0;
if (readn(*fd, count->values, size) <= 0)
@@ -326,13 +330,13 @@ int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
static int perf_evsel__run_ioctl(struct perf_evsel *evsel,
int ioc, void *arg,
- int cpu)
+ int cpu_map_idx)
{
int thread;
for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) {
int err;
- int *fd = FD(evsel, cpu, thread);
+ int *fd = FD(evsel, cpu_map_idx, thread);
if (fd == NULL || *fd < 0)
return -1;
@@ -346,9 +350,9 @@ static int perf_evsel__run_ioctl(struct perf_evsel *evsel,
return 0;
}
-int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu)
+int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu_map_idx)
{
- return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, NULL, cpu);
+ return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, NULL, cpu_map_idx);
}
int perf_evsel__enable(struct perf_evsel *evsel)
@@ -361,9 +365,9 @@ int perf_evsel__enable(struct perf_evsel *evsel)
return err;
}
-int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu)
+int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu_map_idx)
{
- return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, NULL, cpu);
+ return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, NULL, cpu_map_idx);
}
int perf_evsel__disable(struct perf_evsel *evsel)
@@ -380,7 +384,7 @@ int perf_evsel__apply_filter(struct perf_evsel *evsel, const char *filter)
{
int err = 0, i;
- for (i = 0; i < evsel->cpus->nr && !err; i++)
+ for (i = 0; i < perf_cpu_map__nr(evsel->cpus) && !err; i++)
err = perf_evsel__run_ioctl(evsel,
PERF_EVENT_IOC_SET_FILTER,
(void *)filter, i);
@@ -431,3 +435,22 @@ void perf_evsel__free_id(struct perf_evsel *evsel)
zfree(&evsel->id);
evsel->ids = 0;
}
+
+void perf_counts_values__scale(struct perf_counts_values *count,
+ bool scale, __s8 *pscaled)
+{
+ s8 scaled = 0;
+
+ if (scale) {
+ if (count->run == 0) {
+ scaled = -1;
+ count->val = 0;
+ } else if (count->run < count->ena) {
+ scaled = 1;
+ count->val = (u64)((double)count->val * count->ena / count->run);
+ }
+ }
+
+ if (pscaled)
+ *pscaled = scaled;
+}
diff --git a/tools/lib/perf/include/internal/cpumap.h b/tools/lib/perf/include/internal/cpumap.h
index 840d4032587b..581f9ffb4237 100644
--- a/tools/lib/perf/include/internal/cpumap.h
+++ b/tools/lib/perf/include/internal/cpumap.h
@@ -4,16 +4,30 @@
#include <linux/refcount.h>
+/** A wrapper around a CPU to avoid confusion with the perf_cpu_map's map's indices. */
+struct perf_cpu {
+ int cpu;
+};
+
+/**
+ * A sized, reference counted, sorted array of integers representing CPU
+ * numbers. This is commonly used to capture which CPUs a PMU is associated
+ * with. The indices into the cpumap are frequently used as they avoid having
+ * gaps if CPU numbers were used. For events associated with a pid, rather than
+ * a CPU, a single dummy map with an entry of -1 is used.
+ */
struct perf_cpu_map {
refcount_t refcnt;
+ /** Length of the map array. */
int nr;
- int map[];
+ /** The CPU values. */
+ struct perf_cpu map[];
};
#ifndef MAX_NR_CPUS
#define MAX_NR_CPUS 2048
#endif
-int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu);
+int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu);
#endif /* __LIBPERF_INTERNAL_CPUMAP_H */
diff --git a/tools/lib/perf/include/internal/evlist.h b/tools/lib/perf/include/internal/evlist.h
index f366dbad6a88..4cefade540bd 100644
--- a/tools/lib/perf/include/internal/evlist.h
+++ b/tools/lib/perf/include/internal/evlist.h
@@ -4,6 +4,7 @@
#include <linux/list.h>
#include <api/fd/array.h>
+#include <internal/cpumap.h>
#include <internal/evsel.h>
#define PERF_EVLIST__HLIST_BITS 8
@@ -36,7 +37,7 @@ typedef void
typedef struct perf_mmap*
(*perf_evlist_mmap__cb_get_t)(struct perf_evlist*, bool, int);
typedef int
-(*perf_evlist_mmap__cb_mmap_t)(struct perf_mmap*, struct perf_mmap_param*, int, int);
+(*perf_evlist_mmap__cb_mmap_t)(struct perf_mmap*, struct perf_mmap_param*, int, struct perf_cpu);
struct perf_evlist_mmap_ops {
perf_evlist_mmap__cb_idx_t idx;
@@ -127,5 +128,5 @@ int perf_evlist__id_add_fd(struct perf_evlist *evlist,
void perf_evlist__reset_id_hash(struct perf_evlist *evlist);
-void __perf_evlist__set_leader(struct list_head *list);
+void __perf_evlist__set_leader(struct list_head *list, struct perf_evsel *leader);
#endif /* __LIBPERF_INTERNAL_EVLIST_H */
diff --git a/tools/lib/perf/include/internal/evsel.h b/tools/lib/perf/include/internal/evsel.h
index 1f3eacbad2e8..cfc9ebd7968e 100644
--- a/tools/lib/perf/include/internal/evsel.h
+++ b/tools/lib/perf/include/internal/evsel.h
@@ -6,8 +6,8 @@
#include <linux/perf_event.h>
#include <stdbool.h>
#include <sys/types.h>
+#include <internal/cpumap.h>
-struct perf_cpu_map;
struct perf_thread_map;
struct xyarray;
@@ -27,7 +27,7 @@ struct perf_sample_id {
* queue number.
*/
int idx;
- int cpu;
+ struct perf_cpu cpu;
pid_t tid;
/* Holds total ID period value for PERF_SAMPLE_READ processing. */
diff --git a/tools/lib/perf/include/internal/mmap.h b/tools/lib/perf/include/internal/mmap.h
index 5e3422f40ed5..5a062af8e9d8 100644
--- a/tools/lib/perf/include/internal/mmap.h
+++ b/tools/lib/perf/include/internal/mmap.h
@@ -6,6 +6,7 @@
#include <linux/refcount.h>
#include <linux/types.h>
#include <stdbool.h>
+#include <internal/cpumap.h>
/* perf sample has 16 bits size limit */
#define PERF_SAMPLE_MAX_SIZE (1 << 16)
@@ -24,7 +25,7 @@ struct perf_mmap {
void *base;
int mask;
int fd;
- int cpu;
+ struct perf_cpu cpu;
refcount_t refcnt;
u64 prev;
u64 start;
@@ -46,7 +47,7 @@ size_t perf_mmap__mmap_len(struct perf_mmap *map);
void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev,
bool overwrite, libperf_unmap_cb_t unmap_cb);
int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp,
- int fd, int cpu);
+ int fd, struct perf_cpu cpu);
void perf_mmap__munmap(struct perf_mmap *map);
void perf_mmap__get(struct perf_mmap *map);
void perf_mmap__put(struct perf_mmap *map);
diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h
index 7c27766ea0bf..15b8faafd615 100644
--- a/tools/lib/perf/include/perf/cpumap.h
+++ b/tools/lib/perf/include/perf/cpumap.h
@@ -3,11 +3,10 @@
#define __LIBPERF_CPUMAP_H
#include <perf/core.h>
+#include <perf/cpumap.h>
#include <stdio.h>
#include <stdbool.h>
-struct perf_cpu_map;
-
LIBPERF_API struct perf_cpu_map *perf_cpu_map__dummy_new(void);
LIBPERF_API struct perf_cpu_map *perf_cpu_map__default_new(void);
LIBPERF_API struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list);
@@ -16,10 +15,11 @@ LIBPERF_API struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map);
LIBPERF_API struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
struct perf_cpu_map *other);
LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map);
-LIBPERF_API int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx);
+LIBPERF_API struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx);
LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus);
LIBPERF_API bool perf_cpu_map__empty(const struct perf_cpu_map *map);
-LIBPERF_API int perf_cpu_map__max(struct perf_cpu_map *map);
+LIBPERF_API struct perf_cpu perf_cpu_map__max(struct perf_cpu_map *map);
+LIBPERF_API bool perf_cpu_map__has(const struct perf_cpu_map *map, struct perf_cpu cpu);
#define perf_cpu_map__for_each_cpu(cpu, idx, cpus) \
for ((idx) = 0, (cpu) = perf_cpu_map__cpu(cpus, idx); \
diff --git a/tools/lib/perf/include/perf/evsel.h b/tools/lib/perf/include/perf/evsel.h
index 60eae25076d3..2a9516b42d15 100644
--- a/tools/lib/perf/include/perf/evsel.h
+++ b/tools/lib/perf/include/perf/evsel.h
@@ -4,6 +4,8 @@
#include <stdint.h>
#include <perf/core.h>
+#include <stdbool.h>
+#include <linux/types.h>
struct perf_evsel;
struct perf_event_attr;
@@ -26,18 +28,20 @@ LIBPERF_API void perf_evsel__delete(struct perf_evsel *evsel);
LIBPERF_API int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
struct perf_thread_map *threads);
LIBPERF_API void perf_evsel__close(struct perf_evsel *evsel);
-LIBPERF_API void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu);
+LIBPERF_API void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu_map_idx);
LIBPERF_API int perf_evsel__mmap(struct perf_evsel *evsel, int pages);
LIBPERF_API void perf_evsel__munmap(struct perf_evsel *evsel);
-LIBPERF_API void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu, int thread);
-LIBPERF_API int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
+LIBPERF_API void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu_map_idx, int thread);
+LIBPERF_API int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int thread,
struct perf_counts_values *count);
LIBPERF_API int perf_evsel__enable(struct perf_evsel *evsel);
-LIBPERF_API int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu);
+LIBPERF_API int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu_map_idx);
LIBPERF_API int perf_evsel__disable(struct perf_evsel *evsel);
-LIBPERF_API int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu);
+LIBPERF_API int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu_map_idx);
LIBPERF_API struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel);
LIBPERF_API struct perf_thread_map *perf_evsel__threads(struct perf_evsel *evsel);
LIBPERF_API struct perf_event_attr *perf_evsel__attr(struct perf_evsel *evsel);
+LIBPERF_API void perf_counts_values__scale(struct perf_counts_values *count,
+ bool scale, __s8 *pscaled);
#endif /* __LIBPERF_EVSEL_H */
diff --git a/tools/lib/perf/libperf.map b/tools/lib/perf/libperf.map
index 71468606e8a7..93696affda2e 100644
--- a/tools/lib/perf/libperf.map
+++ b/tools/lib/perf/libperf.map
@@ -10,6 +10,7 @@ LIBPERF_0.0.1 {
perf_cpu_map__cpu;
perf_cpu_map__empty;
perf_cpu_map__max;
+ perf_cpu_map__has;
perf_thread_map__new_dummy;
perf_thread_map__set_pid;
perf_thread_map__comm;
@@ -50,6 +51,7 @@ LIBPERF_0.0.1 {
perf_mmap__read_init;
perf_mmap__read_done;
perf_mmap__read_event;
+ perf_counts_values__scale;
local:
*;
};
diff --git a/tools/lib/perf/mmap.c b/tools/lib/perf/mmap.c
index c89dfa5f67b3..f7ee07cb5818 100644
--- a/tools/lib/perf/mmap.c
+++ b/tools/lib/perf/mmap.c
@@ -32,7 +32,7 @@ size_t perf_mmap__mmap_len(struct perf_mmap *map)
}
int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp,
- int fd, int cpu)
+ int fd, struct perf_cpu cpu)
{
map->prev = 0;
map->mask = mp->mask;
@@ -353,8 +353,6 @@ int perf_mmap__read_self(struct perf_mmap *map, struct perf_counts_values *count
count->ena += delta;
if (idx)
count->run += delta;
-
- cnt = mul_u64_u64_div64(cnt, count->ena, count->run);
}
count->val = cnt;
diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c
index ce91a582f0e4..b3479dfa9a1c 100644
--- a/tools/lib/perf/tests/test-evlist.c
+++ b/tools/lib/perf/tests/test-evlist.c
@@ -21,6 +21,9 @@
#include "tests.h"
#include <internal/evsel.h>
+#define EVENT_NUM 15
+#define WAIT_COUNT 100000000UL
+
static int libperf_print(enum libperf_print_level level,
const char *fmt, va_list ap)
{
@@ -331,7 +334,8 @@ static int test_mmap_cpus(void)
};
cpu_set_t saved_mask;
char path[PATH_MAX];
- int id, err, cpu, tmp;
+ int id, err, tmp;
+ struct perf_cpu cpu;
union perf_event *event;
int count = 0;
@@ -374,7 +378,7 @@ static int test_mmap_cpus(void)
cpu_set_t mask;
CPU_ZERO(&mask);
- CPU_SET(cpu, &mask);
+ CPU_SET(cpu.cpu, &mask);
err = sched_setaffinity(0, sizeof(mask), &mask);
__T("sched_setaffinity failed", err == 0);
@@ -413,6 +417,159 @@ static int test_mmap_cpus(void)
return 0;
}
+static double display_error(long long average,
+ long long high,
+ long long low,
+ long long expected)
+{
+ double error;
+
+ error = (((double)average - expected) / expected) * 100.0;
+
+ __T_VERBOSE(" Expected: %lld\n", expected);
+ __T_VERBOSE(" High: %lld Low: %lld Average: %lld\n",
+ high, low, average);
+
+ __T_VERBOSE(" Average Error = %.2f%%\n", error);
+
+ return error;
+}
+
+static int test_stat_multiplexing(void)
+{
+ struct perf_counts_values expected_counts = { .val = 0 };
+ struct perf_counts_values counts[EVENT_NUM] = {{ .val = 0 },};
+ struct perf_thread_map *threads;
+ struct perf_evlist *evlist;
+ struct perf_evsel *evsel;
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_INSTRUCTIONS,
+ .read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
+ PERF_FORMAT_TOTAL_TIME_RUNNING,
+ .disabled = 1,
+ };
+ int err, i, nonzero = 0;
+ unsigned long count;
+ long long max = 0, min = 0, avg = 0;
+ double error = 0.0;
+ s8 scaled = 0;
+
+ /* read for non-multiplexing event count */
+ threads = perf_thread_map__new_dummy();
+ __T("failed to create threads", threads);
+
+ perf_thread_map__set_pid(threads, 0, 0);
+
+ evsel = perf_evsel__new(&attr);
+ __T("failed to create evsel", evsel);
+
+ err = perf_evsel__open(evsel, NULL, threads);
+ __T("failed to open evsel", err == 0);
+
+ err = perf_evsel__enable(evsel);
+ __T("failed to enable evsel", err == 0);
+
+ /* wait loop */
+ count = WAIT_COUNT;
+ while (count--)
+ ;
+
+ perf_evsel__read(evsel, 0, 0, &expected_counts);
+ __T("failed to read value for evsel", expected_counts.val != 0);
+ __T("failed to read non-multiplexing event count",
+ expected_counts.ena == expected_counts.run);
+
+ err = perf_evsel__disable(evsel);
+ __T("failed to enable evsel", err == 0);
+
+ perf_evsel__close(evsel);
+ perf_evsel__delete(evsel);
+
+ perf_thread_map__put(threads);
+
+ /* read for multiplexing event count */
+ threads = perf_thread_map__new_dummy();
+ __T("failed to create threads", threads);
+
+ perf_thread_map__set_pid(threads, 0, 0);
+
+ evlist = perf_evlist__new();
+ __T("failed to create evlist", evlist);
+
+ for (i = 0; i < EVENT_NUM; i++) {
+ evsel = perf_evsel__new(&attr);
+ __T("failed to create evsel", evsel);
+
+ perf_evlist__add(evlist, evsel);
+ }
+ perf_evlist__set_maps(evlist, NULL, threads);
+
+ err = perf_evlist__open(evlist);
+ __T("failed to open evsel", err == 0);
+
+ perf_evlist__enable(evlist);
+
+ /* wait loop */
+ count = WAIT_COUNT;
+ while (count--)
+ ;
+
+ i = 0;
+ perf_evlist__for_each_evsel(evlist, evsel) {
+ perf_evsel__read(evsel, 0, 0, &counts[i]);
+ __T("failed to read value for evsel", counts[i].val != 0);
+ i++;
+ }
+
+ perf_evlist__disable(evlist);
+
+ min = counts[0].val;
+ for (i = 0; i < EVENT_NUM; i++) {
+ __T_VERBOSE("Event %2d -- Raw count = %lu, run = %lu, enable = %lu\n",
+ i, counts[i].val, counts[i].run, counts[i].ena);
+
+ perf_counts_values__scale(&counts[i], true, &scaled);
+ if (scaled == 1) {
+ __T_VERBOSE("\t Scaled count = %lu (%.2lf%%, %lu/%lu)\n",
+ counts[i].val,
+ (double)counts[i].run / (double)counts[i].ena * 100.0,
+ counts[i].run, counts[i].ena);
+ } else if (scaled == -1) {
+ __T_VERBOSE("\t Not Running\n");
+ } else {
+ __T_VERBOSE("\t Not Scaling\n");
+ }
+
+ if (counts[i].val > max)
+ max = counts[i].val;
+
+ if (counts[i].val < min)
+ min = counts[i].val;
+
+ avg += counts[i].val;
+
+ if (counts[i].val != 0)
+ nonzero++;
+ }
+
+ if (nonzero != 0)
+ avg = avg / nonzero;
+ else
+ avg = 0;
+
+ error = display_error(avg, max, min, expected_counts.val);
+
+ __T("Error out of range!", ((error <= 1.0) && (error >= -1.0)));
+
+ perf_evlist__close(evlist);
+ perf_evlist__delete(evlist);
+
+ perf_thread_map__put(threads);
+
+ return 0;
+}
+
int test_evlist(int argc, char **argv)
{
__T_START;
@@ -424,6 +581,7 @@ int test_evlist(int argc, char **argv)
test_stat_thread_enable();
test_mmap_thread();
test_mmap_cpus();
+ test_stat_multiplexing();
__T_END;
return tests_failed == 0 ? 0 : -1;
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index fe58843d047c..8e24c4c78c7f 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -1367,6 +1367,14 @@ static int field_is_dynamic(struct tep_format_field *field)
return 0;
}
+static int field_is_relative_dynamic(struct tep_format_field *field)
+{
+ if (strncmp(field->type, "__rel_loc", 9) == 0)
+ return 1;
+
+ return 0;
+}
+
static int field_is_long(struct tep_format_field *field)
{
/* includes long long */
@@ -1622,6 +1630,8 @@ static int event_read_fields(struct tep_event *event, struct tep_format_field **
field->flags |= TEP_FIELD_IS_STRING;
if (field_is_dynamic(field))
field->flags |= TEP_FIELD_IS_DYNAMIC;
+ if (field_is_relative_dynamic(field))
+ field->flags |= TEP_FIELD_IS_DYNAMIC | TEP_FIELD_IS_RELATIVE;
if (field_is_long(field))
field->flags |= TEP_FIELD_IS_LONG;
@@ -2928,7 +2938,7 @@ process_str(struct tep_event *event __maybe_unused, struct tep_print_arg *arg,
arg->type = TEP_PRINT_STRING;
arg->string.string = token;
- arg->string.offset = -1;
+ arg->string.field = NULL;
if (read_expected(TEP_EVENT_DELIM, ")") < 0)
goto out_err;
@@ -2957,7 +2967,7 @@ process_bitmask(struct tep_event *event __maybe_unused, struct tep_print_arg *ar
arg->type = TEP_PRINT_BITMASK;
arg->bitmask.bitmask = token;
- arg->bitmask.offset = -1;
+ arg->bitmask.field = NULL;
if (read_expected(TEP_EVENT_DELIM, ")") < 0)
goto out_err;
@@ -3123,19 +3133,23 @@ process_function(struct tep_event *event, struct tep_print_arg *arg,
free_token(token);
return process_int_array(event, arg, tok);
}
- if (strcmp(token, "__get_str") == 0) {
+ if (strcmp(token, "__get_str") == 0 ||
+ strcmp(token, "__get_rel_str") == 0) {
free_token(token);
return process_str(event, arg, tok);
}
- if (strcmp(token, "__get_bitmask") == 0) {
+ if (strcmp(token, "__get_bitmask") == 0 ||
+ strcmp(token, "__get_rel_bitmask") == 0) {
free_token(token);
return process_bitmask(event, arg, tok);
}
- if (strcmp(token, "__get_dynamic_array") == 0) {
+ if (strcmp(token, "__get_dynamic_array") == 0 ||
+ strcmp(token, "__get_rel_dynamic_array") == 0) {
free_token(token);
return process_dynamic_array(event, arg, tok);
}
- if (strcmp(token, "__get_dynamic_array_len") == 0) {
+ if (strcmp(token, "__get_dynamic_array_len") == 0 ||
+ strcmp(token, "__get_rel_dynamic_array_len") == 0) {
free_token(token);
return process_dynamic_array_len(event, arg, tok);
}
@@ -4163,14 +4177,16 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
case TEP_PRINT_STRING: {
int str_offset;
- if (arg->string.offset == -1) {
- struct tep_format_field *f;
+ if (!arg->string.field)
+ arg->string.field = tep_find_any_field(event, arg->string.string);
+ if (!arg->string.field)
+ break;
- f = tep_find_any_field(event, arg->string.string);
- arg->string.offset = f->offset;
- }
- str_offset = data2host4(tep, *(unsigned int *)(data + arg->string.offset));
+ str_offset = data2host4(tep,
+ *(unsigned int *)(data + arg->string.field->offset));
str_offset &= 0xffff;
+ if (arg->string.field->flags & TEP_FIELD_IS_RELATIVE)
+ str_offset += arg->string.field->offset + arg->string.field->size;
print_str_to_seq(s, format, len_arg, ((char *)data) + str_offset);
break;
}
@@ -4181,15 +4197,16 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
int bitmask_offset;
int bitmask_size;
- if (arg->bitmask.offset == -1) {
- struct tep_format_field *f;
-
- f = tep_find_any_field(event, arg->bitmask.bitmask);
- arg->bitmask.offset = f->offset;
- }
- bitmask_offset = data2host4(tep, *(unsigned int *)(data + arg->bitmask.offset));
+ if (!arg->bitmask.field)
+ arg->bitmask.field = tep_find_any_field(event, arg->bitmask.bitmask);
+ if (!arg->bitmask.field)
+ break;
+ bitmask_offset = data2host4(tep,
+ *(unsigned int *)(data + arg->bitmask.field->offset));
bitmask_size = bitmask_offset >> 16;
bitmask_offset &= 0xffff;
+ if (arg->bitmask.field->flags & TEP_FIELD_IS_RELATIVE)
+ bitmask_offset += arg->bitmask.field->offset + arg->bitmask.field->size;
print_bitmask_to_seq(tep, s, format, len_arg,
data + bitmask_offset, bitmask_size);
break;
@@ -5109,6 +5126,8 @@ void tep_print_field(struct trace_seq *s, void *data,
offset = val;
len = offset >> 16;
offset &= 0xffff;
+ if (field->flags & TEP_FIELD_IS_RELATIVE)
+ offset += field->offset + field->size;
}
if (field->flags & TEP_FIELD_IS_STRING &&
is_printable_array(data + offset, len)) {
@@ -6987,6 +7006,8 @@ void *tep_get_field_raw(struct trace_seq *s, struct tep_event *event,
data + offset, field->size);
*len = offset >> 16;
offset &= 0xffff;
+ if (field->flags & TEP_FIELD_IS_RELATIVE)
+ offset += field->offset + field->size;
} else
*len = field->size;
diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h
index a67ad9a5b835..41d4f9f6a843 100644
--- a/tools/lib/traceevent/event-parse.h
+++ b/tools/lib/traceevent/event-parse.h
@@ -125,6 +125,7 @@ enum tep_format_flags {
TEP_FIELD_IS_LONG = 32,
TEP_FIELD_IS_FLAG = 64,
TEP_FIELD_IS_SYMBOLIC = 128,
+ TEP_FIELD_IS_RELATIVE = 256,
};
struct tep_format_field {
@@ -153,12 +154,12 @@ struct tep_print_arg_atom {
struct tep_print_arg_string {
char *string;
- int offset;
+ struct tep_format_field *field;
};
struct tep_print_arg_bitmask {
char *bitmask;
- int offset;
+ struct tep_format_field *field;
};
struct tep_print_arg_field {
diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c
index 368826bb5a57..5df177070d53 100644
--- a/tools/lib/traceevent/parse-filter.c
+++ b/tools/lib/traceevent/parse-filter.c
@@ -1712,8 +1712,11 @@ static const char *get_field_str(struct tep_filter_arg *arg, struct tep_record *
if (arg->str.field->flags & TEP_FIELD_IS_DYNAMIC) {
addr = *(unsigned int *)val;
- val = record->data + (addr & 0xffff);
size = addr >> 16;
+ addr &= 0xffff;
+ if (arg->str.field->flags & TEP_FIELD_IS_RELATIVE)
+ addr += arg->str.field->offset + arg->str.field->size;
+ val = record->data + addr;
}
/*
diff --git a/tools/memory-model/Documentation/explanation.txt b/tools/memory-model/Documentation/explanation.txt
index 5d72f3112e56..394ee57d58f2 100644
--- a/tools/memory-model/Documentation/explanation.txt
+++ b/tools/memory-model/Documentation/explanation.txt
@@ -1813,15 +1813,16 @@ spin_trylock() -- we can call these things lock-releases and
lock-acquires -- have two properties beyond those of ordinary releases
and acquires.
-First, when a lock-acquire reads from a lock-release, the LKMM
-requires that every instruction po-before the lock-release must
-execute before any instruction po-after the lock-acquire. This would
-naturally hold if the release and acquire operations were on different
-CPUs, but the LKMM says it holds even when they are on the same CPU.
-For example:
+First, when a lock-acquire reads from or is po-after a lock-release,
+the LKMM requires that every instruction po-before the lock-release
+must execute before any instruction po-after the lock-acquire. This
+would naturally hold if the release and acquire operations were on
+different CPUs and accessed the same lock variable, but the LKMM says
+it also holds when they are on the same CPU, even if they access
+different lock variables. For example:
int x, y;
- spinlock_t s;
+ spinlock_t s, t;
P0()
{
@@ -1830,9 +1831,9 @@ For example:
spin_lock(&s);
r1 = READ_ONCE(x);
spin_unlock(&s);
- spin_lock(&s);
+ spin_lock(&t);
r2 = READ_ONCE(y);
- spin_unlock(&s);
+ spin_unlock(&t);
}
P1()
@@ -1842,10 +1843,10 @@ For example:
WRITE_ONCE(x, 1);
}
-Here the second spin_lock() reads from the first spin_unlock(), and
-therefore the load of x must execute before the load of y. Thus we
-cannot have r1 = 1 and r2 = 0 at the end (this is an instance of the
-MP pattern).
+Here the second spin_lock() is po-after the first spin_unlock(), and
+therefore the load of x must execute before the load of y, even though
+the two locking operations use different locks. Thus we cannot have
+r1 = 1 and r2 = 0 at the end (this is an instance of the MP pattern).
This requirement does not apply to ordinary release and acquire
fences, only to lock-related operations. For instance, suppose P0()
@@ -1872,13 +1873,13 @@ instructions in the following order:
and thus it could load y before x, obtaining r2 = 0 and r1 = 1.
-Second, when a lock-acquire reads from a lock-release, and some other
-stores W and W' occur po-before the lock-release and po-after the
-lock-acquire respectively, the LKMM requires that W must propagate to
-each CPU before W' does. For example, consider:
+Second, when a lock-acquire reads from or is po-after a lock-release,
+and some other stores W and W' occur po-before the lock-release and
+po-after the lock-acquire respectively, the LKMM requires that W must
+propagate to each CPU before W' does. For example, consider:
int x, y;
- spinlock_t x;
+ spinlock_t s;
P0()
{
@@ -1908,7 +1909,12 @@ each CPU before W' does. For example, consider:
If r1 = 1 at the end then the spin_lock() in P1 must have read from
the spin_unlock() in P0. Hence the store to x must propagate to P2
-before the store to y does, so we cannot have r2 = 1 and r3 = 0.
+before the store to y does, so we cannot have r2 = 1 and r3 = 0. But
+if P1 had used a lock variable different from s, the writes could have
+propagated in either order. (On the other hand, if the code in P0 and
+P1 had all executed on a single CPU, as in the example before this
+one, then the writes would have propagated in order even if the two
+critical sections used different lock variables.)
These two special requirements for lock-release and lock-acquire do
not arise from the operational model. Nevertheless, kernel developers
diff --git a/tools/memory-model/README b/tools/memory-model/README
index 9a84c45504ab..9edd402704c4 100644
--- a/tools/memory-model/README
+++ b/tools/memory-model/README
@@ -195,6 +195,18 @@ litmus-tests
are listed in litmus-tests/README. A great deal more litmus
tests are available at https://github.com/paulmckrcu/litmus.
+ By "representative", it means the one in the litmus-tests
+ directory is:
+
+ 1) simple, the number of threads should be relatively
+ small and each thread function should be relatively
+ simple.
+ 2) orthogonal, there should be no two litmus tests
+ describing the same aspect of the memory model.
+ 3) textbook, developers can easily copy-paste-modify
+ the litmus tests to use the patterns on their own
+ code.
+
lock.cat
Provides a front-end analysis of lock acquisition and release,
for example, associating a lock acquisition with the preceding
diff --git a/tools/memory-model/linux-kernel.cat b/tools/memory-model/linux-kernel.cat
index 2a9b4fe4a84e..d70315fddef6 100644
--- a/tools/memory-model/linux-kernel.cat
+++ b/tools/memory-model/linux-kernel.cat
@@ -27,7 +27,7 @@ include "lock.cat"
(* Release Acquire *)
let acq-po = [Acquire] ; po ; [M]
let po-rel = [M] ; po ; [Release]
-let po-unlock-rf-lock-po = po ; [UL] ; rf ; [LKR] ; po
+let po-unlock-lock-po = po ; [UL] ; (po|rf) ; [LKR] ; po
(* Fences *)
let R4rmb = R \ Noreturn (* Reads for which rmb works *)
@@ -70,12 +70,12 @@ let rwdep = (dep | ctrl) ; [W]
let overwrite = co | fr
let to-w = rwdep | (overwrite & int) | (addr ; [Plain] ; wmb)
let to-r = addr | (dep ; [Marked] ; rfi)
-let ppo = to-r | to-w | fence | (po-unlock-rf-lock-po & int)
+let ppo = to-r | to-w | fence | (po-unlock-lock-po & int)
(* Propagation: Ordering from release operations and strong fences. *)
let A-cumul(r) = (rfe ; [Marked])? ; r
let cumul-fence = [Marked] ; (A-cumul(strong-fence | po-rel) | wmb |
- po-unlock-rf-lock-po) ; [Marked]
+ po-unlock-lock-po) ; [Marked]
let prop = [Marked] ; (overwrite & ext)? ; cumul-fence* ;
[Marked] ; rfe? ; [Marked]
diff --git a/tools/memory-model/litmus-tests/LB+unlocklockonceonce+poacquireonce.litmus b/tools/memory-model/litmus-tests/LB+unlocklockonceonce+poacquireonce.litmus
new file mode 100644
index 000000000000..eb34123a6ffe
--- /dev/null
+++ b/tools/memory-model/litmus-tests/LB+unlocklockonceonce+poacquireonce.litmus
@@ -0,0 +1,35 @@
+C LB+unlocklockonceonce+poacquireonce
+
+(*
+ * Result: Never
+ *
+ * If two locked critical sections execute on the same CPU, all accesses
+ * in the first must execute before any accesses in the second, even if the
+ * critical sections are protected by different locks. Note: Even when a
+ * write executes before a read, their memory effects can be reordered from
+ * the viewpoint of another CPU (the kind of reordering allowed by TSO).
+ *)
+
+{}
+
+P0(spinlock_t *s, spinlock_t *t, int *x, int *y)
+{
+ int r1;
+
+ spin_lock(s);
+ r1 = READ_ONCE(*x);
+ spin_unlock(s);
+ spin_lock(t);
+ WRITE_ONCE(*y, 1);
+ spin_unlock(t);
+}
+
+P1(int *x, int *y)
+{
+ int r2;
+
+ r2 = smp_load_acquire(y);
+ WRITE_ONCE(*x, 1);
+}
+
+exists (0:r1=1 /\ 1:r2=1)
diff --git a/tools/memory-model/litmus-tests/MP+unlocklockonceonce+fencermbonceonce.litmus b/tools/memory-model/litmus-tests/MP+unlocklockonceonce+fencermbonceonce.litmus
new file mode 100644
index 000000000000..2feb1398be71
--- /dev/null
+++ b/tools/memory-model/litmus-tests/MP+unlocklockonceonce+fencermbonceonce.litmus
@@ -0,0 +1,33 @@
+C MP+unlocklockonceonce+fencermbonceonce
+
+(*
+ * Result: Never
+ *
+ * If two locked critical sections execute on the same CPU, stores in the
+ * first must propagate to each CPU before stores in the second do, even if
+ * the critical sections are protected by different locks.
+ *)
+
+{}
+
+P0(spinlock_t *s, spinlock_t *t, int *x, int *y)
+{
+ spin_lock(s);
+ WRITE_ONCE(*x, 1);
+ spin_unlock(s);
+ spin_lock(t);
+ WRITE_ONCE(*y, 1);
+ spin_unlock(t);
+}
+
+P1(int *x, int *y)
+{
+ int r1;
+ int r2;
+
+ r1 = READ_ONCE(*y);
+ smp_rmb();
+ r2 = READ_ONCE(*x);
+}
+
+exists (1:r1=1 /\ 1:r2=0)
diff --git a/tools/memory-model/litmus-tests/README b/tools/memory-model/litmus-tests/README
index 681f9067fa9e..d311a0ff1ae6 100644
--- a/tools/memory-model/litmus-tests/README
+++ b/tools/memory-model/litmus-tests/README
@@ -63,6 +63,10 @@ LB+poonceonces.litmus
As above, but with store-release replaced with WRITE_ONCE()
and load-acquire replaced with READ_ONCE().
+LB+unlocklockonceonce+poacquireonce.litmus
+ Does a unlock+lock pair provides ordering guarantee between a
+ load and a store?
+
MP+onceassign+derefonce.litmus
As below, but with rcu_assign_pointer() and an rcu_dereference().
@@ -90,6 +94,10 @@ MP+porevlocks.litmus
As below, but with the first access of the writer process
and the second access of reader process protected by a lock.
+MP+unlocklockonceonce+fencermbonceonce.litmus
+ Does a unlock+lock pair provides ordering guarantee between a
+ store and another store?
+
MP+fencewmbonceonce+fencermbonceonce.litmus
Does a smp_wmb() (between the stores) and an smp_rmb() (between
the loads) suffice for the message-passing litmus test, where one
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 4d6d7fc13255..c10ef78df050 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -531,6 +531,11 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
}
break;
+ case 0xcc:
+ /* int3 */
+ *type = INSN_TRAP;
+ break;
+
case 0xe3:
/* jecxz/jrcxz */
*type = INSN_JUMP_CONDITIONAL;
@@ -697,10 +702,10 @@ const char *arch_ret_insn(int len)
{
static const char ret[5][5] = {
{ BYTE_RET },
- { BYTE_RET, BYTES_NOP1 },
- { BYTE_RET, BYTES_NOP2 },
- { BYTE_RET, BYTES_NOP3 },
- { BYTE_RET, BYTES_NOP4 },
+ { BYTE_RET, 0xcc },
+ { BYTE_RET, 0xcc, BYTES_NOP1 },
+ { BYTE_RET, 0xcc, BYTES_NOP2 },
+ { BYTE_RET, 0xcc, BYTES_NOP3 },
};
if (len < 1 || len > 5) {
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index 8b38b5d6fec7..38070f26105b 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -20,7 +20,7 @@
#include <objtool/objtool.h>
bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats,
- validate_dup, vmlinux, mcount, noinstr, backup;
+ validate_dup, vmlinux, mcount, noinstr, backup, sls;
static const char * const check_usage[] = {
"objtool check [<options>] file.o",
@@ -45,6 +45,7 @@ const struct option check_options[] = {
OPT_BOOLEAN('l', "vmlinux", &vmlinux, "vmlinux.o validation"),
OPT_BOOLEAN('M', "mcount", &mcount, "generate __mcount_loc"),
OPT_BOOLEAN('B', "backup", &backup, "create .orig files before modification"),
+ OPT_BOOLEAN('S', "sls", &sls, "validate straight-line-speculation"),
OPT_END(),
};
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 21735829b860..c2d2ab9a2861 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -168,14 +168,16 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func,
"panic",
"do_exit",
"do_task_dead",
- "__module_put_and_exit",
- "complete_and_exit",
+ "kthread_exit",
+ "make_task_dead",
+ "__module_put_and_kthread_exit",
+ "kthread_complete_and_exit",
"__reiserfs_panic",
"lbug_with_loc",
"fortify_panic",
"usercopy_abort",
"machine_real_restart",
- "rewind_stack_do_exit",
+ "rewind_stack_and_make_dead",
"kunit_try_catch_throw",
"xen_start_kernel",
"cpu_bringup_and_idle",
@@ -849,6 +851,10 @@ static const char *uaccess_safe_builtin[] = {
"__asan_report_store16_noabort",
/* KCSAN */
"__kcsan_check_access",
+ "__kcsan_mb",
+ "__kcsan_wmb",
+ "__kcsan_rmb",
+ "__kcsan_release",
"kcsan_found_watchpoint",
"kcsan_setup_watchpoint",
"kcsan_check_scoped_accesses",
@@ -1068,11 +1074,11 @@ static void annotate_call_site(struct objtool_file *file,
}
/*
- * Many compilers cannot disable KCOV with a function attribute
- * so they need a little help, NOP out any KCOV calls from noinstr
- * text.
+ * Many compilers cannot disable KCOV or sanitizer calls with a function
+ * attribute so they need a little help, NOP out any such calls from
+ * noinstr text.
*/
- if (insn->sec->noinstr && sym->kcov) {
+ if (insn->sec->noinstr && sym->profiling_func) {
if (reloc) {
reloc->type = R_NONE;
elf_write_reloc(file->elf, reloc);
@@ -1987,6 +1993,31 @@ static int read_intra_function_calls(struct objtool_file *file)
return 0;
}
+/*
+ * Return true if name matches an instrumentation function, where calls to that
+ * function from noinstr code can safely be removed, but compilers won't do so.
+ */
+static bool is_profiling_func(const char *name)
+{
+ /*
+ * Many compilers cannot disable KCOV with a function attribute.
+ */
+ if (!strncmp(name, "__sanitizer_cov_", 16))
+ return true;
+
+ /*
+ * Some compilers currently do not remove __tsan_func_entry/exit nor
+ * __tsan_atomic_signal_fence (used for barrier instrumentation) with
+ * the __no_sanitize_thread attribute, remove them. Once the kernel's
+ * minimum Clang version is 14.0, this can be removed.
+ */
+ if (!strncmp(name, "__tsan_func_", 12) ||
+ !strcmp(name, "__tsan_atomic_signal_fence"))
+ return true;
+
+ return false;
+}
+
static int classify_symbols(struct objtool_file *file)
{
struct section *sec;
@@ -2007,8 +2038,8 @@ static int classify_symbols(struct objtool_file *file)
if (!strcmp(func->name, "__fentry__"))
func->fentry = true;
- if (!strncmp(func->name, "__sanitizer_cov_", 16))
- func->kcov = true;
+ if (is_profiling_func(func->name))
+ func->profiling_func = true;
}
}
@@ -3084,6 +3115,12 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
switch (insn->type) {
case INSN_RETURN:
+ if (next_insn && next_insn->type == INSN_TRAP) {
+ next_insn->ignore = true;
+ } else if (sls && !insn->retpoline_safe) {
+ WARN_FUNC("missing int3 after ret",
+ insn->sec, insn->offset);
+ }
return validate_return(func, insn, &state);
case INSN_CALL:
@@ -3127,6 +3164,14 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
break;
case INSN_JUMP_DYNAMIC:
+ if (next_insn && next_insn->type == INSN_TRAP) {
+ next_insn->ignore = true;
+ } else if (sls && !insn->retpoline_safe) {
+ WARN_FUNC("missing int3 after indirect jump",
+ insn->sec, insn->offset);
+ }
+
+ /* fallthrough */
case INSN_JUMP_DYNAMIC_CONDITIONAL:
if (is_sibling_call(insn)) {
ret = validate_sibling_call(file, insn, &state);
@@ -3296,14 +3341,10 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio
return true;
/*
- * Ignore any unused exceptions. This can happen when a whitelisted
- * function has an exception table entry.
- *
- * Also ignore alternative replacement instructions. This can happen
+ * Ignore alternative replacement instructions. This can happen
* when a whitelisted function uses one of the ALTERNATIVE macros.
*/
- if (!strcmp(insn->sec->name, ".fixup") ||
- !strcmp(insn->sec->name, ".altinstr_replacement") ||
+ if (!strcmp(insn->sec->name, ".altinstr_replacement") ||
!strcmp(insn->sec->name, ".altinstr_aux"))
return true;
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 81a4c543ff7e..4b384c907027 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -375,6 +375,7 @@ static int read_symbols(struct elf *elf)
return -1;
}
memset(sym, 0, sizeof(*sym));
+ INIT_LIST_HEAD(&sym->pv_target);
sym->alias = sym;
sym->idx = i;
diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h
index 589ff58426ab..76bae3078286 100644
--- a/tools/objtool/include/objtool/arch.h
+++ b/tools/objtool/include/objtool/arch.h
@@ -26,6 +26,7 @@ enum insn_type {
INSN_CLAC,
INSN_STD,
INSN_CLD,
+ INSN_TRAP,
INSN_OTHER,
};
diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h
index 15ac0b7d3d6a..89ba869ed08f 100644
--- a/tools/objtool/include/objtool/builtin.h
+++ b/tools/objtool/include/objtool/builtin.h
@@ -9,7 +9,7 @@
extern const struct option check_options[];
extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats,
- validate_dup, vmlinux, mcount, noinstr, backup;
+ validate_dup, vmlinux, mcount, noinstr, backup, sls;
extern int cmd_parse_options(int argc, const char **argv, const char * const usage[]);
diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h
index cdc739fa9a6f..d22336781401 100644
--- a/tools/objtool/include/objtool/elf.h
+++ b/tools/objtool/include/objtool/elf.h
@@ -58,7 +58,7 @@ struct symbol {
u8 static_call_tramp : 1;
u8 retpoline_thunk : 1;
u8 fentry : 1;
- u8 kcov : 1;
+ u8 profiling_func : 1;
struct list_head pv_target;
};
diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c
index c90c7084e45a..bdf699f6552b 100644
--- a/tools/objtool/objtool.c
+++ b/tools/objtool/objtool.c
@@ -153,6 +153,10 @@ void objtool_pv_add(struct objtool_file *f, int idx, struct symbol *func)
!strcmp(func->name, "_paravirt_ident_64"))
return;
+ /* already added this function */
+ if (!list_empty(&func->pv_target))
+ return;
+
list_add(&func->pv_target, &f->pv_ops[idx].targets);
f->pv_ops[idx].clean = false;
}
diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt
index cd8ce6e8ec12..7e44b419d301 100644
--- a/tools/perf/Documentation/perf-buildid-cache.txt
+++ b/tools/perf/Documentation/perf-buildid-cache.txt
@@ -74,12 +74,15 @@ OPTIONS
used when creating a uprobe for a process that resides in a
different mount namespace from the perf(1) utility.
---debuginfod=URLs::
+--debuginfod[=URLs]::
Specify debuginfod URL to be used when retrieving perf.data binaries,
it follows the same syntax as the DEBUGINFOD_URLS variable, like:
buildid-cache.debuginfod=http://192.168.122.174:8002
+ If the URLs is not specified, the value of DEBUGINFOD_URLS
+ system environment variable is used.
+
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-buildid-list[1]
diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt
index 3bb75c1f25e8..0420e71698ee 100644
--- a/tools/perf/Documentation/perf-config.txt
+++ b/tools/perf/Documentation/perf-config.txt
@@ -587,6 +587,15 @@ record.*::
Use 'n' control blocks in asynchronous (Posix AIO) trace writing
mode ('n' default: 1, max: 4).
+ record.debuginfod::
+ Specify debuginfod URL to be used when cacheing perf.data binaries,
+ it follows the same syntax as the DEBUGINFOD_URLS variable, like:
+
+ http://192.168.122.174:8002
+
+ If the URLs is 'system', the value of DEBUGINFOD_URLS system environment
+ variable is used.
+
diff.*::
diff.order::
This option sets the number of columns to sort the result.
diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index 4dc8d0af19df..57384a97c04f 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -81,7 +81,11 @@ On AMD systems it is implemented using IBS (up to precise-level 2).
The precise modifier works with event types 0x76 (cpu-cycles, CPU
clocks not halted) and 0xC1 (micro-ops retired). Both events map to
IBS execution sampling (IBS op) with the IBS Op Counter Control bit
-(IbsOpCntCtl) set respectively (see AMD64 Architecture Programmer’s
+(IbsOpCntCtl) set respectively (see the
+Core Complex (CCX) -> Processor x86 Core -> Instruction Based Sampling (IBS)
+section of the [AMD Processor Programming Reference (PPR)] relevant to the
+family, model and stepping of the processor being used).
+
Manual Volume 2: System Programming, 13.3 Instruction-Based
Sampling). Examples to use IBS:
@@ -94,10 +98,12 @@ RAW HARDWARE EVENT DESCRIPTOR
Even when an event is not available in a symbolic form within perf right now,
it can be encoded in a per processor specific way.
-For instance For x86 CPUs NNN represents the raw register encoding with the
+For instance on x86 CPUs, N is a hexadecimal value that represents the raw register encoding with the
layout of IA32_PERFEVTSELx MSRs (see [Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide] Figure 30-1 Layout
-of IA32_PERFEVTSELx MSRs) or AMD's PerfEvtSeln (see [AMD64 Architecture Programmer’s Manual Volume 2: System Programming], Page 344,
-Figure 13-7 Performance Event-Select Register (PerfEvtSeln)).
+of IA32_PERFEVTSELx MSRs) or AMD's PERF_CTL MSRs (see the
+Core Complex (CCX) -> Processor x86 Core -> MSR Registers section of the
+[AMD Processor Programming Reference (PPR)] relevant to the family, model
+and stepping of the processor being used).
Note: Only the following bit fields can be set in x86 counter
registers: event, umask, edge, inv, cmask. Esp. guest/host only and
@@ -126,6 +132,38 @@ It's also possible to use pmu syntax:
perf record -e cpu/r1a8/ ...
perf record -e cpu/r0x1a8/ ...
+Some processors, like those from AMD, support event codes and unit masks
+larger than a byte. In such cases, the bits corresponding to the event
+configuration parameters can be seen with:
+
+ cat /sys/bus/event_source/devices/<pmu>/format/<config>
+
+Example:
+
+If the AMD docs for an EPYC 7713 processor describe an event as:
+
+ Event Umask Event Mask
+ Num. Value Mnemonic Description
+
+ 28FH 03H op_cache_hit_miss.op_cache_hit Counts Op Cache micro-tag
+ hit events.
+
+raw encoding of 0x0328F cannot be used since the upper nibble of the
+EventSelect bits have to be specified via bits 32-35 as can be seen with:
+
+ cat /sys/bus/event_source/devices/cpu/format/event
+
+raw encoding of 0x20000038F should be used instead:
+
+ perf stat -e r20000038f -a sleep 1
+ perf record -e r20000038f ...
+
+It's also possible to use pmu syntax:
+
+ perf record -e r20000038f -a sleep 1
+ perf record -e cpu/r20000038f/ ...
+ perf record -e cpu/r0x20000038f/ ...
+
You should refer to the processor specific documentation for getting these
details. Some of them are referenced in the SEE ALSO section below.
@@ -316,4 +354,4 @@ SEE ALSO
linkperf:perf-stat[1], linkperf:perf-top[1],
linkperf:perf-record[1],
http://www.intel.com/sdm/[Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide],
-http://support.amd.com/us/Processor_TechDocs/24593_APM_v2.pdf[AMD64 Architecture Programmer’s Manual Volume 2: System Programming]
+https://bugzilla.kernel.org/show_bug.cgi?id=206537[AMD Processor Programming Reference (PPR)]
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 3cf7bac67239..9ccc75935bc5 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -30,8 +30,10 @@ OPTIONS
- a symbolic event name (use 'perf list' to list all events)
- - a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a
- hexadecimal event descriptor.
+ - a raw PMU event in the form of rN where N is a hexadecimal value
+ that represents the raw register encoding with the layout of the
+ event control registers as described by entries in
+ /sys/bus/event_sources/devices/cpu/format/*.
- a symbolic or raw PMU event followed by an optional colon
and a list of event modifiers, e.g., cpu-cycles:p. See the
@@ -713,6 +715,15 @@ measurements:
include::intel-hybrid.txt[]
+--debuginfod[=URLs]::
+ Specify debuginfod URL to be used when cacheing perf.data binaries,
+ it follows the same syntax as the DEBUGINFOD_URLS variable, like:
+
+ http://192.168.122.174:8002
+
+ If the URLs is not specified, the value of DEBUGINFOD_URLS
+ system environment variable is used.
+
SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-intel-pt[1]
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 7e6fb7cbc0f4..c06c341e72b9 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -36,8 +36,10 @@ report::
- a symbolic event name (use 'perf list' to list all events)
- - a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a
- hexadecimal event descriptor.
+ - a raw PMU event in the form of rN where N is a hexadecimal value
+ that represents the raw register encoding with the layout of the
+ event control registers as described by entries in
+ /sys/bus/event_sources/devices/cpu/format/*.
- a symbolic or raw PMU event followed by an optional colon
and a list of event modifiers, e.g., cpu-cycles:p. See the
@@ -493,6 +495,10 @@ This option can be enabled in perf config by setting the variable
$ perf config stat.no-csv-summary=true
+--cputype::
+Only enable events on applying cpu with this type for hybrid platform
+(e.g. core or atom)"
+
EXAMPLES
--------
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 9898a32b8d9c..cac3dfbee7d8 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -38,9 +38,10 @@ Default is to monitor all CPUS.
-e <event>::
--event=<event>::
Select the PMU event. Selection can be a symbolic event name
- (use 'perf list' to list all events) or a raw PMU
- event (eventsel+umask) in the form of rNNN where NNN is a
- hexadecimal event descriptor.
+ (use 'perf list' to list all events) or a raw PMU event in the form
+ of rN where N is a hexadecimal value that represents the raw register
+ encoding with the layout of the event control registers as described
+ by entries in /sys/bus/event_sources/devices/cpu/format/*.
-E <entries>::
--entries=<entries>::
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index afd144725a0b..96ad944ca6a8 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -17,6 +17,7 @@ detected = $(shell echo "$(1)=y" >> $(OUTPUT).config-detected)
detected_var = $(shell echo "$(1)=$($(1))" >> $(OUTPUT).config-detected)
CFLAGS := $(EXTRA_CFLAGS) $(filter-out -Wnested-externs,$(EXTRA_WARNINGS))
+HOSTCFLAGS := $(filter-out -Wnested-externs,$(EXTRA_WARNINGS))
include $(srctree)/tools/scripts/Makefile.arch
@@ -143,7 +144,10 @@ FEATURE_CHECK_LDFLAGS-libcrypto = -lcrypto
ifdef CSINCLUDES
LIBOPENCSD_CFLAGS := -I$(CSINCLUDES)
endif
-OPENCSDLIBS := -lopencsd_c_api -lopencsd -lstdc++
+OPENCSDLIBS := -lopencsd_c_api
+ifeq ($(findstring -static,${LDFLAGS}),-static)
+ OPENCSDLIBS += -lopencsd -lstdc++
+endif
ifdef CSLIBS
LIBOPENCSD_LDFLAGS := -L$(CSLIBS)
endif
@@ -211,6 +215,7 @@ endif
ifneq ($(WERROR),0)
CORE_CFLAGS += -Werror
CXXFLAGS += -Werror
+ HOSTCFLAGS += -Werror
endif
ifndef DEBUG
@@ -271,8 +276,6 @@ endif
FEATURE_CHECK_CFLAGS-libpython := $(PYTHON_EMBED_CCOPTS)
FEATURE_CHECK_LDFLAGS-libpython := $(PYTHON_EMBED_LDOPTS)
-FEATURE_CHECK_CFLAGS-libpython-version := $(PYTHON_EMBED_CCOPTS)
-FEATURE_CHECK_LDFLAGS-libpython-version := $(PYTHON_EMBED_LDOPTS)
FEATURE_CHECK_LDFLAGS-libaio = -lrt
@@ -292,6 +295,9 @@ CXXFLAGS += -ggdb3
CXXFLAGS += -funwind-tables
CXXFLAGS += -Wno-strict-aliasing
+HOSTCFLAGS += -Wall
+HOSTCFLAGS += -Wextra
+
# Enforce a non-executable stack, as we may regress (again) in the future by
# adding assembler files missing the .GNU-stack linker note.
LDFLAGS += -Wl,-z,noexecstack
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 80522bcfafe0..ac861e42c8f7 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -226,7 +226,7 @@ else
endif
export srctree OUTPUT RM CC CXX LD AR CFLAGS CXXFLAGS V BISON FLEX AWK
-export HOSTCC HOSTLD HOSTAR
+export HOSTCC HOSTLD HOSTAR HOSTCFLAGS
include $(srctree)/tools/build/Makefile.include
@@ -1041,7 +1041,7 @@ SKEL_OUT := $(abspath $(OUTPUT)util/bpf_skel)
SKEL_TMP_OUT := $(abspath $(SKEL_OUT)/.tmp)
SKELETONS := $(SKEL_OUT)/bpf_prog_profiler.skel.h
SKELETONS += $(SKEL_OUT)/bperf_leader.skel.h $(SKEL_OUT)/bperf_follower.skel.h
-SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h
+SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h $(SKEL_OUT)/func_latency.skel.h
$(SKEL_TMP_OUT) $(LIBBPF_OUTPUT):
$(Q)$(MKDIR) -p $@
diff --git a/tools/perf/arch/arm/include/perf_regs.h b/tools/perf/arch/arm/include/perf_regs.h
index 4085419283d0..99a06550e25d 100644
--- a/tools/perf/arch/arm/include/perf_regs.h
+++ b/tools/perf/arch/arm/include/perf_regs.h
@@ -15,46 +15,4 @@ void perf_regs_load(u64 *regs);
#define PERF_REG_IP PERF_REG_ARM_PC
#define PERF_REG_SP PERF_REG_ARM_SP
-static inline const char *__perf_reg_name(int id)
-{
- switch (id) {
- case PERF_REG_ARM_R0:
- return "r0";
- case PERF_REG_ARM_R1:
- return "r1";
- case PERF_REG_ARM_R2:
- return "r2";
- case PERF_REG_ARM_R3:
- return "r3";
- case PERF_REG_ARM_R4:
- return "r4";
- case PERF_REG_ARM_R5:
- return "r5";
- case PERF_REG_ARM_R6:
- return "r6";
- case PERF_REG_ARM_R7:
- return "r7";
- case PERF_REG_ARM_R8:
- return "r8";
- case PERF_REG_ARM_R9:
- return "r9";
- case PERF_REG_ARM_R10:
- return "r10";
- case PERF_REG_ARM_FP:
- return "fp";
- case PERF_REG_ARM_IP:
- return "ip";
- case PERF_REG_ARM_SP:
- return "sp";
- case PERF_REG_ARM_LR:
- return "lr";
- case PERF_REG_ARM_PC:
- return "pc";
- default:
- return NULL;
- }
-
- return NULL;
-}
-
#endif /* ARCH_PERF_REGS_H */
diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
index 293a23bf8be3..2e8b2c4365a0 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -203,9 +203,11 @@ static int cs_etm_set_option(struct auxtrace_record *itr,
struct perf_cpu_map *online_cpus = perf_cpu_map__new(NULL);
/* Set option of each CPU we have */
- for (i = 0; i < cpu__max_cpu(); i++) {
- if (!cpu_map__has(event_cpus, i) ||
- !cpu_map__has(online_cpus, i))
+ for (i = 0; i < cpu__max_cpu().cpu; i++) {
+ struct perf_cpu cpu = { .cpu = i, };
+
+ if (!perf_cpu_map__has(event_cpus, cpu) ||
+ !perf_cpu_map__has(online_cpus, cpu))
continue;
if (option & BIT(ETM_OPT_CTXTID)) {
@@ -407,25 +409,6 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
}
- /* Validate auxtrace_mmap_pages provided by user */
- if (opts->auxtrace_mmap_pages) {
- unsigned int max_page = (KiB(128) / page_size);
- size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size;
-
- if (!privileged &&
- opts->auxtrace_mmap_pages > max_page) {
- opts->auxtrace_mmap_pages = max_page;
- pr_err("auxtrace too big, truncating to %d\n",
- max_page);
- }
-
- if (!is_power_of_2(sz)) {
- pr_err("Invalid mmap size for %s: must be a power of 2\n",
- CORESIGHT_ETM_PMU_NAME);
- return -EINVAL;
- }
- }
-
if (opts->auxtrace_snapshot_mode)
pr_debug2("%s snapshot size: %zu\n", CORESIGHT_ETM_PMU_NAME,
opts->auxtrace_snapshot_size);
@@ -541,9 +524,11 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused,
/* cpu map is not empty, we have specific CPUs to work with */
if (!perf_cpu_map__empty(event_cpus)) {
- for (i = 0; i < cpu__max_cpu(); i++) {
- if (!cpu_map__has(event_cpus, i) ||
- !cpu_map__has(online_cpus, i))
+ for (i = 0; i < cpu__max_cpu().cpu; i++) {
+ struct perf_cpu cpu = { .cpu = i, };
+
+ if (!perf_cpu_map__has(event_cpus, cpu) ||
+ !perf_cpu_map__has(online_cpus, cpu))
continue;
if (cs_etm_is_ete(itr, i))
@@ -555,8 +540,10 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused,
}
} else {
/* get configuration for all CPUs in the system */
- for (i = 0; i < cpu__max_cpu(); i++) {
- if (!cpu_map__has(online_cpus, i))
+ for (i = 0; i < cpu__max_cpu().cpu; i++) {
+ struct perf_cpu cpu = { .cpu = i, };
+
+ if (!perf_cpu_map__has(online_cpus, cpu))
continue;
if (cs_etm_is_ete(itr, i))
@@ -741,8 +728,10 @@ static int cs_etm_info_fill(struct auxtrace_record *itr,
} else {
/* Make sure all specified CPUs are online */
for (i = 0; i < perf_cpu_map__nr(event_cpus); i++) {
- if (cpu_map__has(event_cpus, i) &&
- !cpu_map__has(online_cpus, i))
+ struct perf_cpu cpu = { .cpu = i, };
+
+ if (perf_cpu_map__has(event_cpus, cpu) &&
+ !perf_cpu_map__has(online_cpus, cpu))
return -EINVAL;
}
@@ -762,9 +751,12 @@ static int cs_etm_info_fill(struct auxtrace_record *itr,
offset = CS_ETM_SNAPSHOT + 1;
- for (i = 0; i < cpu__max_cpu() && offset < priv_size; i++)
- if (cpu_map__has(cpu_map, i))
+ for (i = 0; i < cpu__max_cpu().cpu && offset < priv_size; i++) {
+ struct perf_cpu cpu = { .cpu = i, };
+
+ if (perf_cpu_map__has(cpu_map, cpu))
cs_etm_get_metadata(i, &offset, itr, info);
+ }
perf_cpu_map__put(online_cpus);
diff --git a/tools/perf/arch/arm64/include/perf_regs.h b/tools/perf/arch/arm64/include/perf_regs.h
index fa3e07459f76..35a3cc775b39 100644
--- a/tools/perf/arch/arm64/include/perf_regs.h
+++ b/tools/perf/arch/arm64/include/perf_regs.h
@@ -4,7 +4,9 @@
#include <stdlib.h>
#include <linux/types.h>
+#define perf_event_arm_regs perf_event_arm64_regs
#include <asm/perf_regs.h>
+#undef perf_event_arm_regs
void perf_regs_load(u64 *regs);
@@ -15,80 +17,4 @@ void perf_regs_load(u64 *regs);
#define PERF_REG_IP PERF_REG_ARM64_PC
#define PERF_REG_SP PERF_REG_ARM64_SP
-static inline const char *__perf_reg_name(int id)
-{
- switch (id) {
- case PERF_REG_ARM64_X0:
- return "x0";
- case PERF_REG_ARM64_X1:
- return "x1";
- case PERF_REG_ARM64_X2:
- return "x2";
- case PERF_REG_ARM64_X3:
- return "x3";
- case PERF_REG_ARM64_X4:
- return "x4";
- case PERF_REG_ARM64_X5:
- return "x5";
- case PERF_REG_ARM64_X6:
- return "x6";
- case PERF_REG_ARM64_X7:
- return "x7";
- case PERF_REG_ARM64_X8:
- return "x8";
- case PERF_REG_ARM64_X9:
- return "x9";
- case PERF_REG_ARM64_X10:
- return "x10";
- case PERF_REG_ARM64_X11:
- return "x11";
- case PERF_REG_ARM64_X12:
- return "x12";
- case PERF_REG_ARM64_X13:
- return "x13";
- case PERF_REG_ARM64_X14:
- return "x14";
- case PERF_REG_ARM64_X15:
- return "x15";
- case PERF_REG_ARM64_X16:
- return "x16";
- case PERF_REG_ARM64_X17:
- return "x17";
- case PERF_REG_ARM64_X18:
- return "x18";
- case PERF_REG_ARM64_X19:
- return "x19";
- case PERF_REG_ARM64_X20:
- return "x20";
- case PERF_REG_ARM64_X21:
- return "x21";
- case PERF_REG_ARM64_X22:
- return "x22";
- case PERF_REG_ARM64_X23:
- return "x23";
- case PERF_REG_ARM64_X24:
- return "x24";
- case PERF_REG_ARM64_X25:
- return "x25";
- case PERF_REG_ARM64_X26:
- return "x26";
- case PERF_REG_ARM64_X27:
- return "x27";
- case PERF_REG_ARM64_X28:
- return "x28";
- case PERF_REG_ARM64_X29:
- return "x29";
- case PERF_REG_ARM64_SP:
- return "sp";
- case PERF_REG_ARM64_LR:
- return "lr";
- case PERF_REG_ARM64_PC:
- return "pc";
- default:
- return NULL;
- }
-
- return NULL;
-}
-
#endif /* ARCH_PERF_REGS_H */
diff --git a/tools/perf/arch/arm64/util/machine.c b/tools/perf/arch/arm64/util/machine.c
index 7e7714290a87..d2ce31e28cd7 100644
--- a/tools/perf/arch/arm64/util/machine.c
+++ b/tools/perf/arch/arm64/util/machine.c
@@ -5,6 +5,8 @@
#include <string.h>
#include "debug.h"
#include "symbol.h"
+#include "callchain.h"
+#include "record.h"
/* On arm64, kernel text segment starts at high memory address,
* for example 0xffff 0000 8xxx xxxx. Modules start at a low memory
@@ -26,3 +28,8 @@ void arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
p->end = c->start;
pr_debug4("%s sym:%s end:%#" PRIx64 "\n", __func__, p->name, p->end);
}
+
+void arch__add_leaf_frame_record_opts(struct record_opts *opts)
+{
+ opts->sample_user_regs |= sample_reg_masks[PERF_REG_ARM64_LR].mask;
+}
diff --git a/tools/perf/arch/arm64/util/pmu.c b/tools/perf/arch/arm64/util/pmu.c
index d3a18f9c85f6..79124bba713e 100644
--- a/tools/perf/arch/arm64/util/pmu.c
+++ b/tools/perf/arch/arm64/util/pmu.c
@@ -15,7 +15,7 @@ const struct pmu_events_map *pmu_events_map__find(void)
* The cpumap should cover all CPUs. Otherwise, some CPUs may
* not support some events or have different event IDs.
*/
- if (pmu->cpus->nr != cpu__max_cpu())
+ if (pmu->cpus->nr != cpu__max_cpu().cpu)
return NULL;
return perf_pmu__find_map(pmu);
diff --git a/tools/perf/arch/csky/include/perf_regs.h b/tools/perf/arch/csky/include/perf_regs.h
index 25ac3bdcb9d1..1afcc0e916c2 100644
--- a/tools/perf/arch/csky/include/perf_regs.h
+++ b/tools/perf/arch/csky/include/perf_regs.h
@@ -15,86 +15,4 @@
#define PERF_REG_IP PERF_REG_CSKY_PC
#define PERF_REG_SP PERF_REG_CSKY_SP
-static inline const char *__perf_reg_name(int id)
-{
- switch (id) {
- case PERF_REG_CSKY_A0:
- return "a0";
- case PERF_REG_CSKY_A1:
- return "a1";
- case PERF_REG_CSKY_A2:
- return "a2";
- case PERF_REG_CSKY_A3:
- return "a3";
- case PERF_REG_CSKY_REGS0:
- return "regs0";
- case PERF_REG_CSKY_REGS1:
- return "regs1";
- case PERF_REG_CSKY_REGS2:
- return "regs2";
- case PERF_REG_CSKY_REGS3:
- return "regs3";
- case PERF_REG_CSKY_REGS4:
- return "regs4";
- case PERF_REG_CSKY_REGS5:
- return "regs5";
- case PERF_REG_CSKY_REGS6:
- return "regs6";
- case PERF_REG_CSKY_REGS7:
- return "regs7";
- case PERF_REG_CSKY_REGS8:
- return "regs8";
- case PERF_REG_CSKY_REGS9:
- return "regs9";
- case PERF_REG_CSKY_SP:
- return "sp";
- case PERF_REG_CSKY_LR:
- return "lr";
- case PERF_REG_CSKY_PC:
- return "pc";
-#if defined(__CSKYABIV2__)
- case PERF_REG_CSKY_EXREGS0:
- return "exregs0";
- case PERF_REG_CSKY_EXREGS1:
- return "exregs1";
- case PERF_REG_CSKY_EXREGS2:
- return "exregs2";
- case PERF_REG_CSKY_EXREGS3:
- return "exregs3";
- case PERF_REG_CSKY_EXREGS4:
- return "exregs4";
- case PERF_REG_CSKY_EXREGS5:
- return "exregs5";
- case PERF_REG_CSKY_EXREGS6:
- return "exregs6";
- case PERF_REG_CSKY_EXREGS7:
- return "exregs7";
- case PERF_REG_CSKY_EXREGS8:
- return "exregs8";
- case PERF_REG_CSKY_EXREGS9:
- return "exregs9";
- case PERF_REG_CSKY_EXREGS10:
- return "exregs10";
- case PERF_REG_CSKY_EXREGS11:
- return "exregs11";
- case PERF_REG_CSKY_EXREGS12:
- return "exregs12";
- case PERF_REG_CSKY_EXREGS13:
- return "exregs13";
- case PERF_REG_CSKY_EXREGS14:
- return "exregs14";
- case PERF_REG_CSKY_TLS:
- return "tls";
- case PERF_REG_CSKY_HI:
- return "hi";
- case PERF_REG_CSKY_LO:
- return "lo";
-#endif
- default:
- return NULL;
- }
-
- return NULL;
-}
-
#endif /* ARCH_PERF_REGS_H */
diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
index e2c481fcede6..3f1886ad9d80 100644
--- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
+++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
@@ -364,3 +364,4 @@
# 447 reserved for memfd_secret
448 n64 process_mrelease sys_process_mrelease
449 n64 futex_waitv sys_futex_waitv
+450 common set_mempolicy_home_node sys_set_mempolicy_home_node
diff --git a/tools/perf/arch/mips/include/perf_regs.h b/tools/perf/arch/mips/include/perf_regs.h
index ee73b36a14d1..b8cd8bbb37ba 100644
--- a/tools/perf/arch/mips/include/perf_regs.h
+++ b/tools/perf/arch/mips/include/perf_regs.h
@@ -12,73 +12,4 @@
#define PERF_REGS_MASK ((1ULL << PERF_REG_MIPS_MAX) - 1)
-static inline const char *__perf_reg_name(int id)
-{
- switch (id) {
- case PERF_REG_MIPS_PC:
- return "PC";
- case PERF_REG_MIPS_R1:
- return "$1";
- case PERF_REG_MIPS_R2:
- return "$2";
- case PERF_REG_MIPS_R3:
- return "$3";
- case PERF_REG_MIPS_R4:
- return "$4";
- case PERF_REG_MIPS_R5:
- return "$5";
- case PERF_REG_MIPS_R6:
- return "$6";
- case PERF_REG_MIPS_R7:
- return "$7";
- case PERF_REG_MIPS_R8:
- return "$8";
- case PERF_REG_MIPS_R9:
- return "$9";
- case PERF_REG_MIPS_R10:
- return "$10";
- case PERF_REG_MIPS_R11:
- return "$11";
- case PERF_REG_MIPS_R12:
- return "$12";
- case PERF_REG_MIPS_R13:
- return "$13";
- case PERF_REG_MIPS_R14:
- return "$14";
- case PERF_REG_MIPS_R15:
- return "$15";
- case PERF_REG_MIPS_R16:
- return "$16";
- case PERF_REG_MIPS_R17:
- return "$17";
- case PERF_REG_MIPS_R18:
- return "$18";
- case PERF_REG_MIPS_R19:
- return "$19";
- case PERF_REG_MIPS_R20:
- return "$20";
- case PERF_REG_MIPS_R21:
- return "$21";
- case PERF_REG_MIPS_R22:
- return "$22";
- case PERF_REG_MIPS_R23:
- return "$23";
- case PERF_REG_MIPS_R24:
- return "$24";
- case PERF_REG_MIPS_R25:
- return "$25";
- case PERF_REG_MIPS_R28:
- return "$28";
- case PERF_REG_MIPS_R29:
- return "$29";
- case PERF_REG_MIPS_R30:
- return "$30";
- case PERF_REG_MIPS_R31:
- return "$31";
- default:
- break;
- }
- return NULL;
-}
-
#endif /* ARCH_PERF_REGS_H */
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index 7bef917cc84e..2600b4237292 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -528,3 +528,5 @@
446 common landlock_restrict_self sys_landlock_restrict_self
# 447 reserved for memfd_secret
448 common process_mrelease sys_process_mrelease
+449 common futex_waitv sys_futex_waitv
+450 nospu set_mempolicy_home_node sys_set_mempolicy_home_node
diff --git a/tools/perf/arch/powerpc/include/perf_regs.h b/tools/perf/arch/powerpc/include/perf_regs.h
index 93339d17acc4..9bb17c3f370b 100644
--- a/tools/perf/arch/powerpc/include/perf_regs.h
+++ b/tools/perf/arch/powerpc/include/perf_regs.h
@@ -19,70 +19,4 @@ void perf_regs_load(u64 *regs);
#define PERF_REG_IP PERF_REG_POWERPC_NIP
#define PERF_REG_SP PERF_REG_POWERPC_R1
-static const char *reg_names[] = {
- [PERF_REG_POWERPC_R0] = "r0",
- [PERF_REG_POWERPC_R1] = "r1",
- [PERF_REG_POWERPC_R2] = "r2",
- [PERF_REG_POWERPC_R3] = "r3",
- [PERF_REG_POWERPC_R4] = "r4",
- [PERF_REG_POWERPC_R5] = "r5",
- [PERF_REG_POWERPC_R6] = "r6",
- [PERF_REG_POWERPC_R7] = "r7",
- [PERF_REG_POWERPC_R8] = "r8",
- [PERF_REG_POWERPC_R9] = "r9",
- [PERF_REG_POWERPC_R10] = "r10",
- [PERF_REG_POWERPC_R11] = "r11",
- [PERF_REG_POWERPC_R12] = "r12",
- [PERF_REG_POWERPC_R13] = "r13",
- [PERF_REG_POWERPC_R14] = "r14",
- [PERF_REG_POWERPC_R15] = "r15",
- [PERF_REG_POWERPC_R16] = "r16",
- [PERF_REG_POWERPC_R17] = "r17",
- [PERF_REG_POWERPC_R18] = "r18",
- [PERF_REG_POWERPC_R19] = "r19",
- [PERF_REG_POWERPC_R20] = "r20",
- [PERF_REG_POWERPC_R21] = "r21",
- [PERF_REG_POWERPC_R22] = "r22",
- [PERF_REG_POWERPC_R23] = "r23",
- [PERF_REG_POWERPC_R24] = "r24",
- [PERF_REG_POWERPC_R25] = "r25",
- [PERF_REG_POWERPC_R26] = "r26",
- [PERF_REG_POWERPC_R27] = "r27",
- [PERF_REG_POWERPC_R28] = "r28",
- [PERF_REG_POWERPC_R29] = "r29",
- [PERF_REG_POWERPC_R30] = "r30",
- [PERF_REG_POWERPC_R31] = "r31",
- [PERF_REG_POWERPC_NIP] = "nip",
- [PERF_REG_POWERPC_MSR] = "msr",
- [PERF_REG_POWERPC_ORIG_R3] = "orig_r3",
- [PERF_REG_POWERPC_CTR] = "ctr",
- [PERF_REG_POWERPC_LINK] = "link",
- [PERF_REG_POWERPC_XER] = "xer",
- [PERF_REG_POWERPC_CCR] = "ccr",
- [PERF_REG_POWERPC_SOFTE] = "softe",
- [PERF_REG_POWERPC_TRAP] = "trap",
- [PERF_REG_POWERPC_DAR] = "dar",
- [PERF_REG_POWERPC_DSISR] = "dsisr",
- [PERF_REG_POWERPC_SIER] = "sier",
- [PERF_REG_POWERPC_MMCRA] = "mmcra",
- [PERF_REG_POWERPC_MMCR0] = "mmcr0",
- [PERF_REG_POWERPC_MMCR1] = "mmcr1",
- [PERF_REG_POWERPC_MMCR2] = "mmcr2",
- [PERF_REG_POWERPC_MMCR3] = "mmcr3",
- [PERF_REG_POWERPC_SIER2] = "sier2",
- [PERF_REG_POWERPC_SIER3] = "sier3",
- [PERF_REG_POWERPC_PMC1] = "pmc1",
- [PERF_REG_POWERPC_PMC2] = "pmc2",
- [PERF_REG_POWERPC_PMC3] = "pmc3",
- [PERF_REG_POWERPC_PMC4] = "pmc4",
- [PERF_REG_POWERPC_PMC5] = "pmc5",
- [PERF_REG_POWERPC_PMC6] = "pmc6",
- [PERF_REG_POWERPC_SDAR] = "sdar",
- [PERF_REG_POWERPC_SIAR] = "siar",
-};
-
-static inline const char *__perf_reg_name(int id)
-{
- return reg_names[id];
-}
#endif /* ARCH_PERF_REGS_H */
diff --git a/tools/perf/arch/powerpc/util/event.c b/tools/perf/arch/powerpc/util/event.c
index 3bf441257466..cf430a4c55b9 100644
--- a/tools/perf/arch/powerpc/util/event.c
+++ b/tools/perf/arch/powerpc/util/event.c
@@ -40,8 +40,12 @@ const char *arch_perf_header_entry(const char *se_header)
{
if (!strcmp(se_header, "Local INSTR Latency"))
return "Finish Cyc";
- else if (!strcmp(se_header, "Pipeline Stage Cycle"))
+ else if (!strcmp(se_header, "INSTR Latency"))
+ return "Global Finish_cyc";
+ else if (!strcmp(se_header, "Local Pipeline Stage Cycle"))
return "Dispatch Cyc";
+ else if (!strcmp(se_header, "Pipeline Stage Cycle"))
+ return "Global Dispatch_cyc";
return se_header;
}
@@ -49,5 +53,7 @@ int arch_support_sort_key(const char *sort_key)
{
if (!strcmp(sort_key, "p_stage_cyc"))
return 1;
+ if (!strcmp(sort_key, "local_p_stage_cyc"))
+ return 1;
return 0;
}
diff --git a/tools/perf/arch/riscv/include/perf_regs.h b/tools/perf/arch/riscv/include/perf_regs.h
index 6b02a767c918..6944bf0de53e 100644
--- a/tools/perf/arch/riscv/include/perf_regs.h
+++ b/tools/perf/arch/riscv/include/perf_regs.h
@@ -19,78 +19,4 @@
#define PERF_REG_IP PERF_REG_RISCV_PC
#define PERF_REG_SP PERF_REG_RISCV_SP
-static inline const char *__perf_reg_name(int id)
-{
- switch (id) {
- case PERF_REG_RISCV_PC:
- return "pc";
- case PERF_REG_RISCV_RA:
- return "ra";
- case PERF_REG_RISCV_SP:
- return "sp";
- case PERF_REG_RISCV_GP:
- return "gp";
- case PERF_REG_RISCV_TP:
- return "tp";
- case PERF_REG_RISCV_T0:
- return "t0";
- case PERF_REG_RISCV_T1:
- return "t1";
- case PERF_REG_RISCV_T2:
- return "t2";
- case PERF_REG_RISCV_S0:
- return "s0";
- case PERF_REG_RISCV_S1:
- return "s1";
- case PERF_REG_RISCV_A0:
- return "a0";
- case PERF_REG_RISCV_A1:
- return "a1";
- case PERF_REG_RISCV_A2:
- return "a2";
- case PERF_REG_RISCV_A3:
- return "a3";
- case PERF_REG_RISCV_A4:
- return "a4";
- case PERF_REG_RISCV_A5:
- return "a5";
- case PERF_REG_RISCV_A6:
- return "a6";
- case PERF_REG_RISCV_A7:
- return "a7";
- case PERF_REG_RISCV_S2:
- return "s2";
- case PERF_REG_RISCV_S3:
- return "s3";
- case PERF_REG_RISCV_S4:
- return "s4";
- case PERF_REG_RISCV_S5:
- return "s5";
- case PERF_REG_RISCV_S6:
- return "s6";
- case PERF_REG_RISCV_S7:
- return "s7";
- case PERF_REG_RISCV_S8:
- return "s8";
- case PERF_REG_RISCV_S9:
- return "s9";
- case PERF_REG_RISCV_S10:
- return "s10";
- case PERF_REG_RISCV_S11:
- return "s11";
- case PERF_REG_RISCV_T3:
- return "t3";
- case PERF_REG_RISCV_T4:
- return "t4";
- case PERF_REG_RISCV_T5:
- return "t5";
- case PERF_REG_RISCV_T6:
- return "t6";
- default:
- return NULL;
- }
-
- return NULL;
-}
-
#endif /* ARCH_PERF_REGS_H */
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index df5261e5cfe1..799147658dee 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -451,3 +451,5 @@
446 common landlock_restrict_self sys_landlock_restrict_self sys_landlock_restrict_self
# 447 reserved for memfd_secret
448 common process_mrelease sys_process_mrelease sys_process_mrelease
+449 common futex_waitv sys_futex_waitv sys_futex_waitv
+450 common set_mempolicy_home_node sys_set_mempolicy_home_node sys_set_mempolicy_home_node
diff --git a/tools/perf/arch/s390/include/perf_regs.h b/tools/perf/arch/s390/include/perf_regs.h
index ce3031526623..52fcc0891da6 100644
--- a/tools/perf/arch/s390/include/perf_regs.h
+++ b/tools/perf/arch/s390/include/perf_regs.h
@@ -14,82 +14,4 @@ void perf_regs_load(u64 *regs);
#define PERF_REG_IP PERF_REG_S390_PC
#define PERF_REG_SP PERF_REG_S390_R15
-static inline const char *__perf_reg_name(int id)
-{
- switch (id) {
- case PERF_REG_S390_R0:
- return "R0";
- case PERF_REG_S390_R1:
- return "R1";
- case PERF_REG_S390_R2:
- return "R2";
- case PERF_REG_S390_R3:
- return "R3";
- case PERF_REG_S390_R4:
- return "R4";
- case PERF_REG_S390_R5:
- return "R5";
- case PERF_REG_S390_R6:
- return "R6";
- case PERF_REG_S390_R7:
- return "R7";
- case PERF_REG_S390_R8:
- return "R8";
- case PERF_REG_S390_R9:
- return "R9";
- case PERF_REG_S390_R10:
- return "R10";
- case PERF_REG_S390_R11:
- return "R11";
- case PERF_REG_S390_R12:
- return "R12";
- case PERF_REG_S390_R13:
- return "R13";
- case PERF_REG_S390_R14:
- return "R14";
- case PERF_REG_S390_R15:
- return "R15";
- case PERF_REG_S390_FP0:
- return "FP0";
- case PERF_REG_S390_FP1:
- return "FP1";
- case PERF_REG_S390_FP2:
- return "FP2";
- case PERF_REG_S390_FP3:
- return "FP3";
- case PERF_REG_S390_FP4:
- return "FP4";
- case PERF_REG_S390_FP5:
- return "FP5";
- case PERF_REG_S390_FP6:
- return "FP6";
- case PERF_REG_S390_FP7:
- return "FP7";
- case PERF_REG_S390_FP8:
- return "FP8";
- case PERF_REG_S390_FP9:
- return "FP9";
- case PERF_REG_S390_FP10:
- return "FP10";
- case PERF_REG_S390_FP11:
- return "FP11";
- case PERF_REG_S390_FP12:
- return "FP12";
- case PERF_REG_S390_FP13:
- return "FP13";
- case PERF_REG_S390_FP14:
- return "FP14";
- case PERF_REG_S390_FP15:
- return "FP15";
- case PERF_REG_S390_MASK:
- return "MASK";
- case PERF_REG_S390_PC:
- return "PC";
- default:
- return NULL;
- }
-
- return NULL;
-}
-
#endif /* ARCH_PERF_REGS_H */
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index fe8f8dd157b4..c84d12608cd2 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -371,6 +371,7 @@
447 common memfd_secret sys_memfd_secret
448 common process_mrelease sys_process_mrelease
449 common futex_waitv sys_futex_waitv
+450 common set_mempolicy_home_node sys_set_mempolicy_home_node
#
# Due to a historical design error, certain syscalls are numbered differently
diff --git a/tools/perf/arch/x86/include/perf_regs.h b/tools/perf/arch/x86/include/perf_regs.h
index cddc4cdc0d9b..16e23b722042 100644
--- a/tools/perf/arch/x86/include/perf_regs.h
+++ b/tools/perf/arch/x86/include/perf_regs.h
@@ -23,86 +23,4 @@ void perf_regs_load(u64 *regs);
#define PERF_REG_IP PERF_REG_X86_IP
#define PERF_REG_SP PERF_REG_X86_SP
-static inline const char *__perf_reg_name(int id)
-{
- switch (id) {
- case PERF_REG_X86_AX:
- return "AX";
- case PERF_REG_X86_BX:
- return "BX";
- case PERF_REG_X86_CX:
- return "CX";
- case PERF_REG_X86_DX:
- return "DX";
- case PERF_REG_X86_SI:
- return "SI";
- case PERF_REG_X86_DI:
- return "DI";
- case PERF_REG_X86_BP:
- return "BP";
- case PERF_REG_X86_SP:
- return "SP";
- case PERF_REG_X86_IP:
- return "IP";
- case PERF_REG_X86_FLAGS:
- return "FLAGS";
- case PERF_REG_X86_CS:
- return "CS";
- case PERF_REG_X86_SS:
- return "SS";
- case PERF_REG_X86_DS:
- return "DS";
- case PERF_REG_X86_ES:
- return "ES";
- case PERF_REG_X86_FS:
- return "FS";
- case PERF_REG_X86_GS:
- return "GS";
-#ifdef HAVE_ARCH_X86_64_SUPPORT
- case PERF_REG_X86_R8:
- return "R8";
- case PERF_REG_X86_R9:
- return "R9";
- case PERF_REG_X86_R10:
- return "R10";
- case PERF_REG_X86_R11:
- return "R11";
- case PERF_REG_X86_R12:
- return "R12";
- case PERF_REG_X86_R13:
- return "R13";
- case PERF_REG_X86_R14:
- return "R14";
- case PERF_REG_X86_R15:
- return "R15";
-#endif /* HAVE_ARCH_X86_64_SUPPORT */
-
-#define XMM(x) \
- case PERF_REG_X86_XMM ## x: \
- case PERF_REG_X86_XMM ## x + 1: \
- return "XMM" #x;
- XMM(0)
- XMM(1)
- XMM(2)
- XMM(3)
- XMM(4)
- XMM(5)
- XMM(6)
- XMM(7)
- XMM(8)
- XMM(9)
- XMM(10)
- XMM(11)
- XMM(12)
- XMM(13)
- XMM(14)
- XMM(15)
-#undef XMM
- default:
- return NULL;
- }
-
- return NULL;
-}
-
#endif /* ARCH_PERF_REGS_H */
diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c
index 0b0951030a2f..f924246eff78 100644
--- a/tools/perf/arch/x86/util/evlist.c
+++ b/tools/perf/arch/x86/util/evlist.c
@@ -17,3 +17,20 @@ int arch_evlist__add_default_attrs(struct evlist *evlist)
else
return parse_events(evlist, TOPDOWN_L1_EVENTS, NULL);
}
+
+struct evsel *arch_evlist__leader(struct list_head *list)
+{
+ struct evsel *evsel, *first;
+
+ first = list_first_entry(list, struct evsel, core.node);
+
+ if (!pmu_have_event("cpu", "slots"))
+ return first;
+
+ __evlist__for_each_entry(list, evsel) {
+ if (evsel->pmu_name && !strcmp(evsel->pmu_name, "cpu") &&
+ evsel->name && strstr(evsel->name, "slots"))
+ return evsel;
+ }
+ return first;
+}
diff --git a/tools/perf/bench/epoll-ctl.c b/tools/perf/bench/epoll-ctl.c
index ddaca75c3bc0..740ae764537e 100644
--- a/tools/perf/bench/epoll-ctl.c
+++ b/tools/perf/bench/epoll-ctl.c
@@ -253,7 +253,7 @@ static int do_threads(struct worker *worker, struct perf_cpu_map *cpu)
if (!noaffinity) {
CPU_ZERO(&cpuset);
- CPU_SET(cpu->map[i % cpu->nr], &cpuset);
+ CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset);
if (ret)
@@ -333,7 +333,7 @@ int bench_epoll_ctl(int argc, const char **argv)
/* default to the number of CPUs */
if (!nthreads)
- nthreads = cpu->nr;
+ nthreads = perf_cpu_map__nr(cpu);
worker = calloc(nthreads, sizeof(*worker));
if (!worker)
diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c
index 79d13dbc0a47..37de970c9743 100644
--- a/tools/perf/bench/epoll-wait.c
+++ b/tools/perf/bench/epoll-wait.c
@@ -342,7 +342,7 @@ static int do_threads(struct worker *worker, struct perf_cpu_map *cpu)
if (!noaffinity) {
CPU_ZERO(&cpuset);
- CPU_SET(cpu->map[i % cpu->nr], &cpuset);
+ CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset);
if (ret)
@@ -452,7 +452,7 @@ int bench_epoll_wait(int argc, const char **argv)
/* default to the number of CPUs and leave one for the writer pthread */
if (!nthreads)
- nthreads = cpu->nr - 1;
+ nthreads = perf_cpu_map__nr(cpu) - 1;
worker = calloc(nthreads, sizeof(*worker));
if (!worker) {
diff --git a/tools/perf/bench/evlist-open-close.c b/tools/perf/bench/evlist-open-close.c
index 482738e9bdad..de56601f69ee 100644
--- a/tools/perf/bench/evlist-open-close.c
+++ b/tools/perf/bench/evlist-open-close.c
@@ -71,7 +71,7 @@ static int evlist__count_evsel_fds(struct evlist *evlist)
int cnt = 0;
evlist__for_each_entry(evlist, evsel)
- cnt += evsel->core.threads->nr * evsel->core.cpus->nr;
+ cnt += evsel->core.threads->nr * perf_cpu_map__nr(evsel->core.cpus);
return cnt;
}
@@ -151,7 +151,7 @@ static int bench_evlist_open_close__run(char *evstr)
init_stats(&time_stats);
- printf(" Number of cpus:\t%d\n", evlist->core.cpus->nr);
+ printf(" Number of cpus:\t%d\n", perf_cpu_map__nr(evlist->core.cpus));
printf(" Number of threads:\t%d\n", evlist->core.threads->nr);
printf(" Number of events:\t%d (%d fds)\n",
evlist->core.nr_entries, evlist__count_evsel_fds(evlist));
diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
index fcdea3e44937..dbcecec4eeda 100644
--- a/tools/perf/bench/futex-hash.c
+++ b/tools/perf/bench/futex-hash.c
@@ -150,7 +150,7 @@ int bench_futex_hash(int argc, const char **argv)
}
if (!params.nthreads) /* default to the number of CPUs */
- params.nthreads = cpu->nr;
+ params.nthreads = perf_cpu_map__nr(cpu);
worker = calloc(params.nthreads, sizeof(*worker));
if (!worker)
@@ -177,7 +177,7 @@ int bench_futex_hash(int argc, const char **argv)
goto errmem;
CPU_ZERO(&cpuset);
- CPU_SET(cpu->map[i % cpu->nr], &cpuset);
+ CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset);
if (ret)
diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
index 137890f78e17..6fc9a3d55c1f 100644
--- a/tools/perf/bench/futex-lock-pi.c
+++ b/tools/perf/bench/futex-lock-pi.c
@@ -136,7 +136,7 @@ static void create_threads(struct worker *w, pthread_attr_t thread_attr,
worker[i].futex = &global_futex;
CPU_ZERO(&cpuset);
- CPU_SET(cpu->map[i % cpu->nr], &cpuset);
+ CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
@@ -173,7 +173,7 @@ int bench_futex_lock_pi(int argc, const char **argv)
}
if (!params.nthreads)
- params.nthreads = cpu->nr;
+ params.nthreads = perf_cpu_map__nr(cpu);
worker = calloc(params.nthreads, sizeof(*worker));
if (!worker)
diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
index f7a5ffebb940..2f59d5d1c509 100644
--- a/tools/perf/bench/futex-requeue.c
+++ b/tools/perf/bench/futex-requeue.c
@@ -131,7 +131,7 @@ static void block_threads(pthread_t *w,
/* create and block all threads */
for (i = 0; i < params.nthreads; i++) {
CPU_ZERO(&cpuset);
- CPU_SET(cpu->map[i % cpu->nr], &cpuset);
+ CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
@@ -175,7 +175,7 @@ int bench_futex_requeue(int argc, const char **argv)
}
if (!params.nthreads)
- params.nthreads = cpu->nr;
+ params.nthreads = perf_cpu_map__nr(cpu);
worker = calloc(params.nthreads, sizeof(*worker));
if (!worker)
diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
index 0983f40b4b40..861deb934745 100644
--- a/tools/perf/bench/futex-wake-parallel.c
+++ b/tools/perf/bench/futex-wake-parallel.c
@@ -152,7 +152,7 @@ static void block_threads(pthread_t *w, pthread_attr_t thread_attr,
/* create and block all threads */
for (i = 0; i < params.nthreads; i++) {
CPU_ZERO(&cpuset);
- CPU_SET(cpu->map[i % cpu->nr], &cpuset);
+ CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
@@ -252,7 +252,7 @@ int bench_futex_wake_parallel(int argc, const char **argv)
err(EXIT_FAILURE, "calloc");
if (!params.nthreads)
- params.nthreads = cpu->nr;
+ params.nthreads = perf_cpu_map__nr(cpu);
/* some sanity checks */
if (params.nwakes > params.nthreads ||
diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
index 2226a475e782..cfda48bef1d7 100644
--- a/tools/perf/bench/futex-wake.c
+++ b/tools/perf/bench/futex-wake.c
@@ -105,7 +105,7 @@ static void block_threads(pthread_t *w,
/* create and block all threads */
for (i = 0; i < params.nthreads; i++) {
CPU_ZERO(&cpuset);
- CPU_SET(cpu->map[i % cpu->nr], &cpuset);
+ CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
@@ -151,7 +151,7 @@ int bench_futex_wake(int argc, const char **argv)
}
if (!params.nthreads)
- params.nthreads = cpu->nr;
+ params.nthreads = perf_cpu_map__nr(cpu);
worker = calloc(params.nthreads, sizeof(*worker));
if (!worker)
diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c
index fa0ff4ce2b74..488f6e6ba1a5 100644
--- a/tools/perf/bench/sched-messaging.c
+++ b/tools/perf/bench/sched-messaging.c
@@ -223,8 +223,6 @@ static unsigned int group(pthread_t *pth,
snd_ctx->out_fds[i] = fds[1];
if (!thread_mode)
close(fds[0]);
-
- free(ctx);
}
/* Now we have all the fds, fork the senders */
@@ -241,8 +239,6 @@ static unsigned int group(pthread_t *pth,
for (i = 0; i < num_fds; i++)
close(snd_ctx->out_fds[i]);
- free(snd_ctx);
-
/* Return number of children to reap */
return num_fds * 2;
}
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index d0895162c2ba..d291f3a8af5f 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -226,7 +226,6 @@ static void run_collection(struct collection *coll)
if (!bench->fn)
break;
printf("# Running %s/%s benchmark...\n", coll->name, bench->name);
- fflush(stdout);
argv[1] = bench->name;
run_bench(coll->name, bench->name, bench->fn, 1, argv);
@@ -247,6 +246,9 @@ int cmd_bench(int argc, const char **argv)
struct collection *coll;
int ret = 0;
+ /* Unbuffered output */
+ setvbuf(stdout, NULL, _IONBF, 0);
+
if (argc < 2) {
/* No collection specified. */
print_usage();
@@ -300,7 +302,6 @@ int cmd_bench(int argc, const char **argv)
if (bench_format == BENCH_FORMAT_DEFAULT)
printf("# Running '%s/%s' benchmark:\n", coll->name, bench->name);
- fflush(stdout);
ret = run_bench(coll->name, bench->name, bench->fn, argc-1, argv+1);
goto end;
}
diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c
index 0db3cfc04c47..cd381693658b 100644
--- a/tools/perf/builtin-buildid-cache.c
+++ b/tools/perf/builtin-buildid-cache.c
@@ -351,10 +351,14 @@ static int build_id_cache__show_all(void)
static int perf_buildid_cache_config(const char *var, const char *value, void *cb)
{
- const char **debuginfod = cb;
+ struct perf_debuginfod *di = cb;
- if (!strcmp(var, "buildid-cache.debuginfod"))
- *debuginfod = strdup(value);
+ if (!strcmp(var, "buildid-cache.debuginfod")) {
+ di->urls = strdup(value);
+ if (!di->urls)
+ return -ENOMEM;
+ di->set = true;
+ }
return 0;
}
@@ -373,8 +377,8 @@ int cmd_buildid_cache(int argc, const char **argv)
*purge_name_list_str = NULL,
*missing_filename = NULL,
*update_name_list_str = NULL,
- *kcore_filename = NULL,
- *debuginfod = NULL;
+ *kcore_filename = NULL;
+ struct perf_debuginfod debuginfod = { };
char sbuf[STRERR_BUFSIZE];
struct perf_data data = {
@@ -399,8 +403,10 @@ int cmd_buildid_cache(int argc, const char **argv)
OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
OPT_STRING('u', "update", &update_name_list_str, "file list",
"file(s) to update"),
- OPT_STRING(0, "debuginfod", &debuginfod, "debuginfod url",
- "set debuginfod url"),
+ OPT_STRING_OPTARG_SET(0, "debuginfod", &debuginfod.urls,
+ &debuginfod.set, "debuginfod urls",
+ "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls",
+ "system"),
OPT_INCR('v', "verbose", &verbose, "be more verbose"),
OPT_INTEGER(0, "target-ns", &ns_id, "target pid for namespace context"),
OPT_END()
@@ -425,10 +431,7 @@ int cmd_buildid_cache(int argc, const char **argv)
if (argc || !(list_files || opts_flag))
usage_with_options(buildid_cache_usage, buildid_cache_options);
- if (debuginfod) {
- pr_debug("DEBUGINFOD_URLS=%s\n", debuginfod);
- setenv("DEBUGINFOD_URLS", debuginfod, 1);
- }
+ perf_debuginfod_setup(&debuginfod);
/* -l is exclusive. It can not be used with other options. */
if (list_files && opts_flag) {
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index b5c67ef73862..77dd4afacca4 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -2015,7 +2015,8 @@ static int setup_nodes(struct perf_session *session)
{
struct numa_node *n;
unsigned long **nodes;
- int node, cpu;
+ int node, idx;
+ struct perf_cpu cpu;
int *cpu2node;
if (c2c.node_info > 2)
@@ -2038,8 +2039,8 @@ static int setup_nodes(struct perf_session *session)
if (!cpu2node)
return -ENOMEM;
- for (cpu = 0; cpu < c2c.cpus_cnt; cpu++)
- cpu2node[cpu] = -1;
+ for (idx = 0; idx < c2c.cpus_cnt; idx++)
+ cpu2node[idx] = -1;
c2c.cpu2node = cpu2node;
@@ -2057,13 +2058,13 @@ static int setup_nodes(struct perf_session *session)
if (perf_cpu_map__empty(map))
continue;
- for (cpu = 0; cpu < map->nr; cpu++) {
- set_bit(map->map[cpu], set);
+ perf_cpu_map__for_each_cpu(cpu, idx, map) {
+ set_bit(cpu.cpu, set);
- if (WARN_ONCE(cpu2node[map->map[cpu]] != -1, "node/cpu topology bug"))
+ if (WARN_ONCE(cpu2node[cpu.cpu] != -1, "node/cpu topology bug"))
return -EINVAL;
- cpu2node[map->map[cpu]] = node;
+ cpu2node[cpu.cpu] = node;
}
}
diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index 87cb11a7a3ee..dec24dc0e767 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -13,7 +13,9 @@
#include <signal.h>
#include <stdlib.h>
#include <fcntl.h>
+#include <math.h>
#include <poll.h>
+#include <ctype.h>
#include <linux/capability.h>
#include <linux/string.h>
@@ -28,36 +30,12 @@
#include "strfilter.h"
#include "util/cap.h"
#include "util/config.h"
+#include "util/ftrace.h"
#include "util/units.h"
#include "util/parse-sublevel-options.h"
#define DEFAULT_TRACER "function_graph"
-struct perf_ftrace {
- struct evlist *evlist;
- struct target target;
- const char *tracer;
- struct list_head filters;
- struct list_head notrace;
- struct list_head graph_funcs;
- struct list_head nograph_funcs;
- int graph_depth;
- unsigned long percpu_buffer_size;
- bool inherit;
- int func_stack_trace;
- int func_irq_info;
- int graph_nosleep_time;
- int graph_noirqs;
- int graph_verbose;
- int graph_thresh;
- unsigned int initial_delay;
-};
-
-struct filter_entry {
- struct list_head list;
- char name[];
-};
-
static volatile int workload_exec_errno;
static bool done;
@@ -303,7 +281,7 @@ static int set_tracing_cpumask(struct perf_cpu_map *cpumap)
int ret;
int last_cpu;
- last_cpu = cpu_map__cpu(cpumap, cpumap->nr - 1);
+ last_cpu = perf_cpu_map__cpu(cpumap, perf_cpu_map__nr(cpumap) - 1).cpu;
mask_size = last_cpu / 4 + 2; /* one more byte for EOS */
mask_size += last_cpu / 32; /* ',' is needed for every 32th cpus */
@@ -565,7 +543,24 @@ static int set_tracing_options(struct perf_ftrace *ftrace)
return 0;
}
-static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv)
+static void select_tracer(struct perf_ftrace *ftrace)
+{
+ bool graph = !list_empty(&ftrace->graph_funcs) ||
+ !list_empty(&ftrace->nograph_funcs);
+ bool func = !list_empty(&ftrace->filters) ||
+ !list_empty(&ftrace->notrace);
+
+ /* The function_graph has priority over function tracer. */
+ if (graph)
+ ftrace->tracer = "function_graph";
+ else if (func)
+ ftrace->tracer = "function";
+ /* Otherwise, the default tracer is used. */
+
+ pr_debug("%s tracer is used\n", ftrace->tracer);
+}
+
+static int __cmd_ftrace(struct perf_ftrace *ftrace)
{
char *trace_file;
int trace_fd;
@@ -586,10 +581,7 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv)
return -1;
}
- signal(SIGINT, sig_handler);
- signal(SIGUSR1, sig_handler);
- signal(SIGCHLD, sig_handler);
- signal(SIGPIPE, sig_handler);
+ select_tracer(ftrace);
if (reset_tracing_files(ftrace) < 0) {
pr_err("failed to reset ftrace\n");
@@ -600,11 +592,6 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv)
if (write_tracing_file("trace", "0") < 0)
goto out;
- if (argc && evlist__prepare_workload(ftrace->evlist, &ftrace->target, argv, false,
- ftrace__workload_exec_failed_signal) < 0) {
- goto out;
- }
-
if (set_tracing_options(ftrace) < 0)
goto out_reset;
@@ -693,6 +680,270 @@ out:
return (done && !workload_exec_errno) ? 0 : -1;
}
+static void make_histogram(int buckets[], char *buf, size_t len, char *linebuf)
+{
+ char *p, *q;
+ char *unit;
+ double num;
+ int i;
+
+ /* ensure NUL termination */
+ buf[len] = '\0';
+
+ /* handle data line by line */
+ for (p = buf; (q = strchr(p, '\n')) != NULL; p = q + 1) {
+ *q = '\0';
+ /* move it to the line buffer */
+ strcat(linebuf, p);
+
+ /*
+ * parse trace output to get function duration like in
+ *
+ * # tracer: function_graph
+ * #
+ * # CPU DURATION FUNCTION CALLS
+ * # | | | | | | |
+ * 1) + 10.291 us | do_filp_open();
+ * 1) 4.889 us | do_filp_open();
+ * 1) 6.086 us | do_filp_open();
+ *
+ */
+ if (linebuf[0] == '#')
+ goto next;
+
+ /* ignore CPU */
+ p = strchr(linebuf, ')');
+ if (p == NULL)
+ p = linebuf;
+
+ while (*p && !isdigit(*p) && (*p != '|'))
+ p++;
+
+ /* no duration */
+ if (*p == '\0' || *p == '|')
+ goto next;
+
+ num = strtod(p, &unit);
+ if (!unit || strncmp(unit, " us", 3))
+ goto next;
+
+ i = log2(num);
+ if (i < 0)
+ i = 0;
+ if (i >= NUM_BUCKET)
+ i = NUM_BUCKET - 1;
+
+ buckets[i]++;
+
+next:
+ /* empty the line buffer for the next output */
+ linebuf[0] = '\0';
+ }
+
+ /* preserve any remaining output (before newline) */
+ strcat(linebuf, p);
+}
+
+static void display_histogram(int buckets[])
+{
+ int i;
+ int total = 0;
+ int bar_total = 46; /* to fit in 80 column */
+ char bar[] = "###############################################";
+ int bar_len;
+
+ for (i = 0; i < NUM_BUCKET; i++)
+ total += buckets[i];
+
+ if (total == 0) {
+ printf("No data found\n");
+ return;
+ }
+
+ printf("# %14s | %10s | %-*s |\n",
+ " DURATION ", "COUNT", bar_total, "GRAPH");
+
+ bar_len = buckets[0] * bar_total / total;
+ printf(" %4d - %-4d %s | %10d | %.*s%*s |\n",
+ 0, 1, "us", buckets[0], bar_len, bar, bar_total - bar_len, "");
+
+ for (i = 1; i < NUM_BUCKET - 1; i++) {
+ int start = (1 << (i - 1));
+ int stop = 1 << i;
+ const char *unit = "us";
+
+ if (start >= 1024) {
+ start >>= 10;
+ stop >>= 10;
+ unit = "ms";
+ }
+ bar_len = buckets[i] * bar_total / total;
+ printf(" %4d - %-4d %s | %10d | %.*s%*s |\n",
+ start, stop, unit, buckets[i], bar_len, bar,
+ bar_total - bar_len, "");
+ }
+
+ bar_len = buckets[NUM_BUCKET - 1] * bar_total / total;
+ printf(" %4d - %-4s %s | %10d | %.*s%*s |\n",
+ 1, "...", " s", buckets[NUM_BUCKET - 1], bar_len, bar,
+ bar_total - bar_len, "");
+
+}
+
+static int prepare_func_latency(struct perf_ftrace *ftrace)
+{
+ char *trace_file;
+ int fd;
+
+ if (ftrace->target.use_bpf)
+ return perf_ftrace__latency_prepare_bpf(ftrace);
+
+ if (reset_tracing_files(ftrace) < 0) {
+ pr_err("failed to reset ftrace\n");
+ return -1;
+ }
+
+ /* reset ftrace buffer */
+ if (write_tracing_file("trace", "0") < 0)
+ return -1;
+
+ if (set_tracing_options(ftrace) < 0)
+ return -1;
+
+ /* force to use the function_graph tracer to track duration */
+ if (write_tracing_file("current_tracer", "function_graph") < 0) {
+ pr_err("failed to set current_tracer to function_graph\n");
+ return -1;
+ }
+
+ trace_file = get_tracing_file("trace_pipe");
+ if (!trace_file) {
+ pr_err("failed to open trace_pipe\n");
+ return -1;
+ }
+
+ fd = open(trace_file, O_RDONLY);
+ if (fd < 0)
+ pr_err("failed to open trace_pipe\n");
+
+ put_tracing_file(trace_file);
+ return fd;
+}
+
+static int start_func_latency(struct perf_ftrace *ftrace)
+{
+ if (ftrace->target.use_bpf)
+ return perf_ftrace__latency_start_bpf(ftrace);
+
+ if (write_tracing_file("tracing_on", "1") < 0) {
+ pr_err("can't enable tracing\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int stop_func_latency(struct perf_ftrace *ftrace)
+{
+ if (ftrace->target.use_bpf)
+ return perf_ftrace__latency_stop_bpf(ftrace);
+
+ write_tracing_file("tracing_on", "0");
+ return 0;
+}
+
+static int read_func_latency(struct perf_ftrace *ftrace, int buckets[])
+{
+ if (ftrace->target.use_bpf)
+ return perf_ftrace__latency_read_bpf(ftrace, buckets);
+
+ return 0;
+}
+
+static int cleanup_func_latency(struct perf_ftrace *ftrace)
+{
+ if (ftrace->target.use_bpf)
+ return perf_ftrace__latency_cleanup_bpf(ftrace);
+
+ reset_tracing_files(ftrace);
+ return 0;
+}
+
+static int __cmd_latency(struct perf_ftrace *ftrace)
+{
+ int trace_fd;
+ char buf[4096];
+ char line[256];
+ struct pollfd pollfd = {
+ .events = POLLIN,
+ };
+ int buckets[NUM_BUCKET] = { };
+
+ if (!(perf_cap__capable(CAP_PERFMON) ||
+ perf_cap__capable(CAP_SYS_ADMIN))) {
+ pr_err("ftrace only works for %s!\n",
+#ifdef HAVE_LIBCAP_SUPPORT
+ "users with the CAP_PERFMON or CAP_SYS_ADMIN capability"
+#else
+ "root"
+#endif
+ );
+ return -1;
+ }
+
+ trace_fd = prepare_func_latency(ftrace);
+ if (trace_fd < 0)
+ goto out;
+
+ fcntl(trace_fd, F_SETFL, O_NONBLOCK);
+ pollfd.fd = trace_fd;
+
+ if (start_func_latency(ftrace) < 0)
+ goto out;
+
+ evlist__start_workload(ftrace->evlist);
+
+ line[0] = '\0';
+ while (!done) {
+ if (poll(&pollfd, 1, -1) < 0)
+ break;
+
+ if (pollfd.revents & POLLIN) {
+ int n = read(trace_fd, buf, sizeof(buf) - 1);
+ if (n < 0)
+ break;
+
+ make_histogram(buckets, buf, n, line);
+ }
+ }
+
+ stop_func_latency(ftrace);
+
+ if (workload_exec_errno) {
+ const char *emsg = str_error_r(workload_exec_errno, buf, sizeof(buf));
+ pr_err("workload failed: %s\n", emsg);
+ goto out;
+ }
+
+ /* read remaining buffer contents */
+ while (!ftrace->target.use_bpf) {
+ int n = read(trace_fd, buf, sizeof(buf) - 1);
+ if (n <= 0)
+ break;
+ make_histogram(buckets, buf, n, line);
+ }
+
+ read_func_latency(ftrace, buckets);
+
+ display_histogram(buckets);
+
+out:
+ close(trace_fd);
+ cleanup_func_latency(ftrace);
+
+ return (done && !workload_exec_errno) ? 0 : -1;
+}
+
static int perf_ftrace_config(const char *var, const char *value, void *cb)
{
struct perf_ftrace *ftrace = cb;
@@ -855,22 +1106,11 @@ static int parse_graph_tracer_opts(const struct option *opt,
return 0;
}
-static void select_tracer(struct perf_ftrace *ftrace)
-{
- bool graph = !list_empty(&ftrace->graph_funcs) ||
- !list_empty(&ftrace->nograph_funcs);
- bool func = !list_empty(&ftrace->filters) ||
- !list_empty(&ftrace->notrace);
-
- /* The function_graph has priority over function tracer. */
- if (graph)
- ftrace->tracer = "function_graph";
- else if (func)
- ftrace->tracer = "function";
- /* Otherwise, the default tracer is used. */
-
- pr_debug("%s tracer is used\n", ftrace->tracer);
-}
+enum perf_ftrace_subcommand {
+ PERF_FTRACE_NONE,
+ PERF_FTRACE_TRACE,
+ PERF_FTRACE_LATENCY,
+};
int cmd_ftrace(int argc, const char **argv)
{
@@ -879,17 +1119,7 @@ int cmd_ftrace(int argc, const char **argv)
.tracer = DEFAULT_TRACER,
.target = { .uid = UINT_MAX, },
};
- const char * const ftrace_usage[] = {
- "perf ftrace [<options>] [<command>]",
- "perf ftrace [<options>] -- <command> [<options>]",
- NULL
- };
- const struct option ftrace_options[] = {
- OPT_STRING('t', "tracer", &ftrace.tracer, "tracer",
- "Tracer to use: function_graph(default) or function"),
- OPT_CALLBACK_DEFAULT('F', "funcs", NULL, "[FILTER]",
- "Show available functions to filter",
- opt_list_avail_functions, "*"),
+ const struct option common_options[] = {
OPT_STRING('p', "pid", &ftrace.target.pid, "pid",
"Trace on existing process id"),
/* TODO: Add short option -t after -t/--tracer can be removed. */
@@ -901,6 +1131,14 @@ int cmd_ftrace(int argc, const char **argv)
"System-wide collection from all CPUs"),
OPT_STRING('C', "cpu", &ftrace.target.cpu_list, "cpu",
"List of cpus to monitor"),
+ OPT_END()
+ };
+ const struct option ftrace_options[] = {
+ OPT_STRING('t', "tracer", &ftrace.tracer, "tracer",
+ "Tracer to use: function_graph(default) or function"),
+ OPT_CALLBACK_DEFAULT('F', "funcs", NULL, "[FILTER]",
+ "Show available functions to filter",
+ opt_list_avail_functions, "*"),
OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func",
"Trace given functions using function tracer",
parse_filter_func),
@@ -923,24 +1161,65 @@ int cmd_ftrace(int argc, const char **argv)
"Trace children processes"),
OPT_UINTEGER('D', "delay", &ftrace.initial_delay,
"Number of milliseconds to wait before starting tracing after program start"),
- OPT_END()
+ OPT_PARENT(common_options),
+ };
+ const struct option latency_options[] = {
+ OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func",
+ "Show latency of given function", parse_filter_func),
+#ifdef HAVE_BPF_SKEL
+ OPT_BOOLEAN('b', "use-bpf", &ftrace.target.use_bpf,
+ "Use BPF to measure function latency"),
+#endif
+ OPT_PARENT(common_options),
+ };
+ const struct option *options = ftrace_options;
+
+ const char * const ftrace_usage[] = {
+ "perf ftrace [<options>] [<command>]",
+ "perf ftrace [<options>] -- [<command>] [<options>]",
+ "perf ftrace {trace|latency} [<options>] [<command>]",
+ "perf ftrace {trace|latency} [<options>] -- [<command>] [<options>]",
+ NULL
};
+ enum perf_ftrace_subcommand subcmd = PERF_FTRACE_NONE;
INIT_LIST_HEAD(&ftrace.filters);
INIT_LIST_HEAD(&ftrace.notrace);
INIT_LIST_HEAD(&ftrace.graph_funcs);
INIT_LIST_HEAD(&ftrace.nograph_funcs);
+ signal(SIGINT, sig_handler);
+ signal(SIGUSR1, sig_handler);
+ signal(SIGCHLD, sig_handler);
+ signal(SIGPIPE, sig_handler);
+
ret = perf_config(perf_ftrace_config, &ftrace);
if (ret < 0)
return -1;
- argc = parse_options(argc, argv, ftrace_options, ftrace_usage,
- PARSE_OPT_STOP_AT_NON_OPTION);
- if (!argc && target__none(&ftrace.target))
- ftrace.target.system_wide = true;
+ if (argc > 1) {
+ if (!strcmp(argv[1], "trace")) {
+ subcmd = PERF_FTRACE_TRACE;
+ } else if (!strcmp(argv[1], "latency")) {
+ subcmd = PERF_FTRACE_LATENCY;
+ options = latency_options;
+ }
+
+ if (subcmd != PERF_FTRACE_NONE) {
+ argc--;
+ argv++;
+ }
+ }
+ /* for backward compatibility */
+ if (subcmd == PERF_FTRACE_NONE)
+ subcmd = PERF_FTRACE_TRACE;
- select_tracer(&ftrace);
+ argc = parse_options(argc, argv, options, ftrace_usage,
+ PARSE_OPT_STOP_AT_NON_OPTION);
+ if (argc < 0) {
+ ret = -EINVAL;
+ goto out_delete_filters;
+ }
ret = target__validate(&ftrace.target);
if (ret) {
@@ -961,7 +1240,35 @@ int cmd_ftrace(int argc, const char **argv)
if (ret < 0)
goto out_delete_evlist;
- ret = __cmd_ftrace(&ftrace, argc, argv);
+ if (argc) {
+ ret = evlist__prepare_workload(ftrace.evlist, &ftrace.target,
+ argv, false,
+ ftrace__workload_exec_failed_signal);
+ if (ret < 0)
+ goto out_delete_evlist;
+ }
+
+ switch (subcmd) {
+ case PERF_FTRACE_TRACE:
+ if (!argc && target__none(&ftrace.target))
+ ftrace.target.system_wide = true;
+ ret = __cmd_ftrace(&ftrace);
+ break;
+ case PERF_FTRACE_LATENCY:
+ if (list_empty(&ftrace.filters)) {
+ pr_err("Should provide a function to measure\n");
+ parse_options_usage(ftrace_usage, options, "T", 1);
+ ret = -EINVAL;
+ goto out_delete_evlist;
+ }
+ ret = __cmd_latency(&ftrace);
+ break;
+ case PERF_FTRACE_NONE:
+ default:
+ pr_err("Invalid subcommand\n");
+ ret = -EINVAL;
+ break;
+ }
out_delete_evlist:
evlist__delete(ftrace.evlist);
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index bc5259db5fd9..fbf43a454cba 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -535,12 +535,9 @@ static int perf_event__repipe_exit(struct perf_tool *tool,
static int perf_event__repipe_tracing_data(struct perf_session *session,
union perf_event *event)
{
- int err;
-
perf_event__repipe_synth(session->tool, event);
- err = perf_event__process_tracing_data(session, event);
- return err;
+ return perf_event__process_tracing_data(session, event);
}
static int dso__read_build_id(struct dso *dso)
@@ -755,12 +752,16 @@ static int parse_vm_time_correlation(const struct option *opt, const char *str,
return inject->itrace_synth_opts.vm_tm_corr_args ? 0 : -ENOMEM;
}
+static int output_fd(struct perf_inject *inject)
+{
+ return inject->in_place_update ? -1 : perf_data__fd(&inject->output);
+}
+
static int __cmd_inject(struct perf_inject *inject)
{
int ret = -EINVAL;
struct perf_session *session = inject->session;
- struct perf_data *data_out = &inject->output;
- int fd = inject->in_place_update ? -1 : perf_data__fd(data_out);
+ int fd = output_fd(inject);
u64 output_data_offset;
signal(SIGINT, sig_handler);
@@ -820,7 +821,7 @@ static int __cmd_inject(struct perf_inject *inject)
inject->tool.ordered_events = true;
inject->tool.ordering_requires_timestamps = true;
/* Allow space in the header for new attributes */
- output_data_offset = 4096;
+ output_data_offset = roundup(8192 + session->header.data_offset, 4096);
if (inject->strip)
strip_init(inject);
}
@@ -1015,7 +1016,7 @@ int cmd_inject(int argc, const char **argv)
}
inject.session = __perf_session__new(&data, repipe,
- perf_data__fd(&inject.output),
+ output_fd(&inject),
&inject.tool);
if (IS_ERR(inject.session)) {
ret = PTR_ERR(inject.session);
@@ -1078,7 +1079,8 @@ out_delete:
zstd_fini(&(inject.session->zstd_data));
perf_session__delete(inject.session);
out_close_output:
- perf_data__close(&inject.output);
+ if (!inject.in_place_update)
+ perf_data__close(&inject.output);
free(inject.itrace_synth_opts.vm_tm_corr_args);
return ret;
}
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index da03a341c63c..99d7ff9a8eff 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -192,7 +192,7 @@ static int evsel__process_alloc_node_event(struct evsel *evsel, struct perf_samp
int ret = evsel__process_alloc_event(evsel, sample);
if (!ret) {
- int node1 = cpu__get_node(sample->cpu),
+ int node1 = cpu__get_node((struct perf_cpu){.cpu = sample->cpu}),
node2 = evsel__intval(evsel, sample, "node");
if (node1 != node2)
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 0338b813585a..bb716c953d02 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -111,6 +111,7 @@ struct record {
unsigned long long samples;
struct mmap_cpu_mask affinity_mask;
unsigned long output_max_size; /* = 0: unlimited */
+ struct perf_debuginfod debuginfod;
};
static volatile int done;
@@ -2177,6 +2178,12 @@ static int perf_record_config(const char *var, const char *value, void *cb)
rec->opts.nr_cblocks = nr_cblocks_default;
}
#endif
+ if (!strcmp(var, "record.debuginfod")) {
+ rec->debuginfod.urls = strdup(value);
+ if (!rec->debuginfod.urls)
+ return -ENOMEM;
+ rec->debuginfod.set = true;
+ }
return 0;
}
@@ -2267,6 +2274,10 @@ out_free:
return ret;
}
+void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused)
+{
+}
+
static int parse_control_option(const struct option *opt,
const char *str,
int unset __maybe_unused)
@@ -2663,6 +2674,10 @@ static struct option __record_options[] = {
parse_control_option),
OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup",
"Fine-tune event synthesis: default=all", parse_record_synth_option),
+ OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls,
+ &record.debuginfod.set, "debuginfod urls",
+ "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls",
+ "system"),
OPT_END()
};
@@ -2716,6 +2731,8 @@ int cmd_record(int argc, const char **argv)
if (err)
return err;
+ perf_debuginfod_setup(&record.debuginfod);
+
/* Make system wide (-a) the default target. */
if (!argc && target__none(&rec->opts.target))
rec->opts.target.system_wide = true;
@@ -2792,7 +2809,7 @@ int cmd_record(int argc, const char **argv)
symbol__init(NULL);
if (rec->opts.affinity != PERF_AFFINITY_SYS) {
- rec->affinity_mask.nbits = cpu__max_cpu();
+ rec->affinity_mask.nbits = cpu__max_cpu().cpu;
rec->affinity_mask.bits = bitmap_zalloc(rec->affinity_mask.nbits);
if (!rec->affinity_mask.bits) {
pr_err("Failed to allocate thread mask for %zd cpus\n", rec->affinity_mask.nbits);
@@ -2898,6 +2915,10 @@ int cmd_record(int argc, const char **argv)
}
rec->opts.target.hybrid = perf_pmu__has_hybrid();
+
+ if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP)
+ arch__add_leaf_frame_record_opts(&rec->opts);
+
err = -ENOMEM;
if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
usage_with_options(record_usage, record_options);
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 8ae400429870..1dd92d8c9279 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -410,7 +410,7 @@ static int report__setup_sample_type(struct report *rep)
}
}
- callchain_param_setup(sample_type);
+ callchain_param_setup(sample_type, perf_env__arch(&rep->session->header.env));
if (rep->stitch_lbr && (callchain_param.record_mode != CALLCHAIN_LBR)) {
ui__warning("Can't find LBR callchain. Switch off --stitch-lbr.\n"
@@ -1127,7 +1127,7 @@ static int process_attr(struct perf_tool *tool __maybe_unused,
* on events sample_type.
*/
sample_type = evlist__combined_sample_type(*pevlist);
- callchain_param_setup(sample_type);
+ callchain_param_setup(sample_type, perf_env__arch((*pevlist)->env));
return 0;
}
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 4527f632ebe4..72d446de9c60 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -167,7 +167,7 @@ struct trace_sched_handler {
struct perf_sched_map {
DECLARE_BITMAP(comp_cpus_mask, MAX_CPUS);
- int *comp_cpus;
+ struct perf_cpu *comp_cpus;
bool comp;
struct perf_thread_map *color_pids;
const char *color_pids_str;
@@ -191,7 +191,7 @@ struct perf_sched {
* Track the current task - that way we can know whether there's any
* weird events, such as a task being switched away that is not current.
*/
- int max_cpu;
+ struct perf_cpu max_cpu;
u32 curr_pid[MAX_CPUS];
struct thread *curr_thread[MAX_CPUS];
char next_shortname1;
@@ -1535,28 +1535,31 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel,
int new_shortname;
u64 timestamp0, timestamp = sample->time;
s64 delta;
- int i, this_cpu = sample->cpu;
+ int i;
+ struct perf_cpu this_cpu = {
+ .cpu = sample->cpu,
+ };
int cpus_nr;
bool new_cpu = false;
const char *color = PERF_COLOR_NORMAL;
char stimestamp[32];
- BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0);
+ BUG_ON(this_cpu.cpu >= MAX_CPUS || this_cpu.cpu < 0);
- if (this_cpu > sched->max_cpu)
+ if (this_cpu.cpu > sched->max_cpu.cpu)
sched->max_cpu = this_cpu;
if (sched->map.comp) {
cpus_nr = bitmap_weight(sched->map.comp_cpus_mask, MAX_CPUS);
- if (!test_and_set_bit(this_cpu, sched->map.comp_cpus_mask)) {
+ if (!test_and_set_bit(this_cpu.cpu, sched->map.comp_cpus_mask)) {
sched->map.comp_cpus[cpus_nr++] = this_cpu;
new_cpu = true;
}
} else
- cpus_nr = sched->max_cpu;
+ cpus_nr = sched->max_cpu.cpu;
- timestamp0 = sched->cpu_last_switched[this_cpu];
- sched->cpu_last_switched[this_cpu] = timestamp;
+ timestamp0 = sched->cpu_last_switched[this_cpu.cpu];
+ sched->cpu_last_switched[this_cpu.cpu] = timestamp;
if (timestamp0)
delta = timestamp - timestamp0;
else
@@ -1577,7 +1580,7 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel,
return -1;
}
- sched->curr_thread[this_cpu] = thread__get(sched_in);
+ sched->curr_thread[this_cpu.cpu] = thread__get(sched_in);
printf(" ");
@@ -1608,8 +1611,10 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel,
}
for (i = 0; i < cpus_nr; i++) {
- int cpu = sched->map.comp ? sched->map.comp_cpus[i] : i;
- struct thread *curr_thread = sched->curr_thread[cpu];
+ struct perf_cpu cpu = {
+ .cpu = sched->map.comp ? sched->map.comp_cpus[i].cpu : i,
+ };
+ struct thread *curr_thread = sched->curr_thread[cpu.cpu];
struct thread_runtime *curr_tr;
const char *pid_color = color;
const char *cpu_color = color;
@@ -1617,19 +1622,19 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel,
if (curr_thread && thread__has_color(curr_thread))
pid_color = COLOR_PIDS;
- if (sched->map.cpus && !cpu_map__has(sched->map.cpus, cpu))
+ if (sched->map.cpus && !perf_cpu_map__has(sched->map.cpus, cpu))
continue;
- if (sched->map.color_cpus && cpu_map__has(sched->map.color_cpus, cpu))
+ if (sched->map.color_cpus && perf_cpu_map__has(sched->map.color_cpus, cpu))
cpu_color = COLOR_CPUS;
- if (cpu != this_cpu)
+ if (cpu.cpu != this_cpu.cpu)
color_fprintf(stdout, color, " ");
else
color_fprintf(stdout, cpu_color, "*");
- if (sched->curr_thread[cpu]) {
- curr_tr = thread__get_runtime(sched->curr_thread[cpu]);
+ if (sched->curr_thread[cpu.cpu]) {
+ curr_tr = thread__get_runtime(sched->curr_thread[cpu.cpu]);
if (curr_tr == NULL) {
thread__put(sched_in);
return -1;
@@ -1639,7 +1644,7 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel,
color_fprintf(stdout, color, " ");
}
- if (sched->map.cpus && !cpu_map__has(sched->map.cpus, this_cpu))
+ if (sched->map.cpus && !perf_cpu_map__has(sched->map.cpus, this_cpu))
goto out;
timestamp__scnprintf_usec(timestamp, stimestamp, sizeof(stimestamp));
@@ -1929,7 +1934,7 @@ static char *timehist_get_commstr(struct thread *thread)
static void timehist_header(struct perf_sched *sched)
{
- u32 ncpus = sched->max_cpu + 1;
+ u32 ncpus = sched->max_cpu.cpu + 1;
u32 i, j;
printf("%15s %6s ", "time", "cpu");
@@ -2008,7 +2013,7 @@ static void timehist_print_sample(struct perf_sched *sched,
struct thread_runtime *tr = thread__priv(thread);
const char *next_comm = evsel__strval(evsel, sample, "next_comm");
const u32 next_pid = evsel__intval(evsel, sample, "next_pid");
- u32 max_cpus = sched->max_cpu + 1;
+ u32 max_cpus = sched->max_cpu.cpu + 1;
char tstr[64];
char nstr[30];
u64 wait_time;
@@ -2389,7 +2394,7 @@ static void timehist_print_wakeup_event(struct perf_sched *sched,
timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr));
printf("%15s [%04d] ", tstr, sample->cpu);
if (sched->show_cpu_visual)
- printf(" %*s ", sched->max_cpu + 1, "");
+ printf(" %*s ", sched->max_cpu.cpu + 1, "");
printf(" %-*s ", comm_width, timehist_get_commstr(thread));
@@ -2449,13 +2454,13 @@ static void timehist_print_migration_event(struct perf_sched *sched,
{
struct thread *thread;
char tstr[64];
- u32 max_cpus = sched->max_cpu + 1;
+ u32 max_cpus;
u32 ocpu, dcpu;
if (sched->summary_only)
return;
- max_cpus = sched->max_cpu + 1;
+ max_cpus = sched->max_cpu.cpu + 1;
ocpu = evsel__intval(evsel, sample, "orig_cpu");
dcpu = evsel__intval(evsel, sample, "dest_cpu");
@@ -2918,7 +2923,7 @@ static void timehist_print_summary(struct perf_sched *sched,
printf(" Total scheduling time (msec): ");
print_sched_time(hist_time, 2);
- printf(" (x %d)\n", sched->max_cpu);
+ printf(" (x %d)\n", sched->max_cpu.cpu);
}
typedef int (*sched_handler)(struct perf_tool *tool,
@@ -2935,9 +2940,11 @@ static int perf_timehist__process_sample(struct perf_tool *tool,
{
struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
int err = 0;
- int this_cpu = sample->cpu;
+ struct perf_cpu this_cpu = {
+ .cpu = sample->cpu,
+ };
- if (this_cpu > sched->max_cpu)
+ if (this_cpu.cpu > sched->max_cpu.cpu)
sched->max_cpu = this_cpu;
if (evsel->handler != NULL) {
@@ -3054,10 +3061,10 @@ static int perf_sched__timehist(struct perf_sched *sched)
goto out;
/* pre-allocate struct for per-CPU idle stats */
- sched->max_cpu = session->header.env.nr_cpus_online;
- if (sched->max_cpu == 0)
- sched->max_cpu = 4;
- if (init_idle_threads(sched->max_cpu))
+ sched->max_cpu.cpu = session->header.env.nr_cpus_online;
+ if (sched->max_cpu.cpu == 0)
+ sched->max_cpu.cpu = 4;
+ if (init_idle_threads(sched->max_cpu.cpu))
goto out;
/* summary_only implies summary option, but don't overwrite summary if set */
@@ -3209,10 +3216,10 @@ static int setup_map_cpus(struct perf_sched *sched)
{
struct perf_cpu_map *map;
- sched->max_cpu = sysconf(_SC_NPROCESSORS_CONF);
+ sched->max_cpu.cpu = sysconf(_SC_NPROCESSORS_CONF);
if (sched->map.comp) {
- sched->map.comp_cpus = zalloc(sched->max_cpu * sizeof(int));
+ sched->map.comp_cpus = zalloc(sched->max_cpu.cpu * sizeof(int));
if (!sched->map.comp_cpus)
return -1;
}
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 9434367af166..abae8184e171 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -15,6 +15,7 @@
#include "util/symbol.h"
#include "util/thread.h"
#include "util/trace-event.h"
+#include "util/env.h"
#include "util/evlist.h"
#include "util/evsel.h"
#include "util/evsel_fprintf.h"
@@ -514,7 +515,7 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session)
return -EINVAL;
if (PRINT_FIELD(PHYS_ADDR) &&
- evsel__check_stype(evsel, PERF_SAMPLE_PHYS_ADDR, "PHYS_ADDR", PERF_OUTPUT_PHYS_ADDR))
+ evsel__do_check_stype(evsel, PERF_SAMPLE_PHYS_ADDR, "PHYS_ADDR", PERF_OUTPUT_PHYS_ADDR, allow_user_set))
return -EINVAL;
if (PRINT_FIELD(DATA_PAGE_SIZE) &&
@@ -648,7 +649,7 @@ out:
return 0;
}
-static int perf_sample__fprintf_regs(struct regs_dump *regs, uint64_t mask,
+static int perf_sample__fprintf_regs(struct regs_dump *regs, uint64_t mask, const char *arch,
FILE *fp)
{
unsigned i = 0, r;
@@ -661,7 +662,7 @@ static int perf_sample__fprintf_regs(struct regs_dump *regs, uint64_t mask,
for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) {
u64 val = regs->regs[i++];
- printed += fprintf(fp, "%5s:0x%"PRIx64" ", perf_reg_name(r), val);
+ printed += fprintf(fp, "%5s:0x%"PRIx64" ", perf_reg_name(r, arch), val);
}
return printed;
@@ -718,17 +719,17 @@ tod_scnprintf(struct perf_script *script, char *buf, int buflen,
}
static int perf_sample__fprintf_iregs(struct perf_sample *sample,
- struct perf_event_attr *attr, FILE *fp)
+ struct perf_event_attr *attr, const char *arch, FILE *fp)
{
return perf_sample__fprintf_regs(&sample->intr_regs,
- attr->sample_regs_intr, fp);
+ attr->sample_regs_intr, arch, fp);
}
static int perf_sample__fprintf_uregs(struct perf_sample *sample,
- struct perf_event_attr *attr, FILE *fp)
+ struct perf_event_attr *attr, const char *arch, FILE *fp)
{
return perf_sample__fprintf_regs(&sample->user_regs,
- attr->sample_regs_user, fp);
+ attr->sample_regs_user, arch, fp);
}
static int perf_sample__fprintf_start(struct perf_script *script,
@@ -2000,6 +2001,7 @@ static void process_event(struct perf_script *script,
struct evsel_script *es = evsel->priv;
FILE *fp = es->fp;
char str[PAGE_SIZE_NAME_LEN];
+ const char *arch = perf_env__arch(machine->env);
if (output[type].fields == 0)
return;
@@ -2066,10 +2068,10 @@ static void process_event(struct perf_script *script,
}
if (PRINT_FIELD(IREGS))
- perf_sample__fprintf_iregs(sample, attr, fp);
+ perf_sample__fprintf_iregs(sample, attr, arch, fp);
if (PRINT_FIELD(UREGS))
- perf_sample__fprintf_uregs(sample, attr, fp);
+ perf_sample__fprintf_uregs(sample, attr, arch, fp);
if (PRINT_FIELD(BRSTACK))
perf_sample__fprintf_brstack(sample, thread, attr, fp);
@@ -2113,8 +2115,8 @@ static struct scripting_ops *scripting_ops;
static void __process_stat(struct evsel *counter, u64 tstamp)
{
int nthreads = perf_thread_map__nr(counter->core.threads);
- int ncpus = evsel__nr_cpus(counter);
- int cpu, thread;
+ int idx, thread;
+ struct perf_cpu cpu;
static int header_printed;
if (counter->core.system_wide)
@@ -2127,13 +2129,13 @@ static void __process_stat(struct evsel *counter, u64 tstamp)
}
for (thread = 0; thread < nthreads; thread++) {
- for (cpu = 0; cpu < ncpus; cpu++) {
+ perf_cpu_map__for_each_cpu(cpu, idx, evsel__cpus(counter)) {
struct perf_counts_values *counts;
- counts = perf_counts(counter->counts, cpu, thread);
+ counts = perf_counts(counter->counts, idx, thread);
printf("%3d %8d %15" PRIu64 " %15" PRIu64 " %15" PRIu64 " %15" PRIu64 " %s\n",
- counter->core.cpus->map[cpu],
+ cpu.cpu,
perf_thread_map__pid(counter->core.threads, thread),
counts->val,
counts->ena,
@@ -2316,7 +2318,7 @@ static int process_attr(struct perf_tool *tool, union perf_event *event,
* on events sample_type.
*/
sample_type = evlist__combined_sample_type(evlist);
- callchain_param_setup(sample_type);
+ callchain_param_setup(sample_type, perf_env__arch((*pevlist)->env));
/* Enable fields for callchain entries */
if (symbol_conf.use_callchain &&
@@ -2473,7 +2475,7 @@ static int process_switch_event(struct perf_tool *tool,
if (perf_event__process_switch(tool, event, sample, machine) < 0)
return -1;
- if (scripting_ops && scripting_ops->process_switch)
+ if (scripting_ops && scripting_ops->process_switch && !filter_cpu(sample))
scripting_ops->process_switch(event, sample, machine);
if (!script->show_switch_events)
@@ -3466,16 +3468,7 @@ static void script__setup_sample_type(struct perf_script *script)
struct perf_session *session = script->session;
u64 sample_type = evlist__combined_sample_type(session->evlist);
- if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
- if ((sample_type & PERF_SAMPLE_REGS_USER) &&
- (sample_type & PERF_SAMPLE_STACK_USER)) {
- callchain_param.record_mode = CALLCHAIN_DWARF;
- dwarf_callchain_users = true;
- } else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
- callchain_param.record_mode = CALLCHAIN_LBR;
- else
- callchain_param.record_mode = CALLCHAIN_FP;
- }
+ callchain_param_setup(sample_type, perf_env__arch(session->machines.host.env));
if (script->stitch_lbr && (callchain_param.record_mode != CALLCHAIN_LBR)) {
pr_warning("Can't find LBR callchain. Switch off --stitch-lbr.\n"
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 7974933dbc77..3f98689dd687 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -230,11 +230,12 @@ static bool cpus_map_matched(struct evsel *a, struct evsel *b)
if (!a->core.cpus || !b->core.cpus)
return false;
- if (a->core.cpus->nr != b->core.cpus->nr)
+ if (perf_cpu_map__nr(a->core.cpus) != perf_cpu_map__nr(b->core.cpus))
return false;
- for (int i = 0; i < a->core.cpus->nr; i++) {
- if (a->core.cpus->map[i] != b->core.cpus->map[i])
+ for (int i = 0; i < perf_cpu_map__nr(a->core.cpus); i++) {
+ if (perf_cpu_map__cpu(a->core.cpus, i).cpu !=
+ perf_cpu_map__cpu(b->core.cpus, i).cpu)
return false;
}
@@ -327,34 +328,35 @@ static int write_stat_round_event(u64 tm, u64 type)
#define SID(e, x, y) xyarray__entry(e->core.sample_id, x, y)
-static int evsel__write_stat_event(struct evsel *counter, u32 cpu, u32 thread,
+static int evsel__write_stat_event(struct evsel *counter, int cpu_map_idx, u32 thread,
struct perf_counts_values *count)
{
- struct perf_sample_id *sid = SID(counter, cpu, thread);
+ struct perf_sample_id *sid = SID(counter, cpu_map_idx, thread);
+ struct perf_cpu cpu = perf_cpu_map__cpu(evsel__cpus(counter), cpu_map_idx);
return perf_event__synthesize_stat(NULL, cpu, thread, sid->id, count,
process_synthesized_event, NULL);
}
-static int read_single_counter(struct evsel *counter, int cpu,
+static int read_single_counter(struct evsel *counter, int cpu_map_idx,
int thread, struct timespec *rs)
{
if (counter->tool_event == PERF_TOOL_DURATION_TIME) {
u64 val = rs->tv_nsec + rs->tv_sec*1000000000ULL;
struct perf_counts_values *count =
- perf_counts(counter->counts, cpu, thread);
+ perf_counts(counter->counts, cpu_map_idx, thread);
count->ena = count->run = val;
count->val = val;
return 0;
}
- return evsel__read_counter(counter, cpu, thread);
+ return evsel__read_counter(counter, cpu_map_idx, thread);
}
/*
* Read out the results of a single counter:
* do not aggregate counts across CPUs in system-wide mode
*/
-static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu)
+static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu_map_idx)
{
int nthreads = perf_thread_map__nr(evsel_list->core.threads);
int thread;
@@ -368,24 +370,24 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu)
for (thread = 0; thread < nthreads; thread++) {
struct perf_counts_values *count;
- count = perf_counts(counter->counts, cpu, thread);
+ count = perf_counts(counter->counts, cpu_map_idx, thread);
/*
* The leader's group read loads data into its group members
* (via evsel__read_counter()) and sets their count->loaded.
*/
- if (!perf_counts__is_loaded(counter->counts, cpu, thread) &&
- read_single_counter(counter, cpu, thread, rs)) {
+ if (!perf_counts__is_loaded(counter->counts, cpu_map_idx, thread) &&
+ read_single_counter(counter, cpu_map_idx, thread, rs)) {
counter->counts->scaled = -1;
- perf_counts(counter->counts, cpu, thread)->ena = 0;
- perf_counts(counter->counts, cpu, thread)->run = 0;
+ perf_counts(counter->counts, cpu_map_idx, thread)->ena = 0;
+ perf_counts(counter->counts, cpu_map_idx, thread)->run = 0;
return -1;
}
- perf_counts__set_loaded(counter->counts, cpu, thread, false);
+ perf_counts__set_loaded(counter->counts, cpu_map_idx, thread, false);
if (STAT_RECORD) {
- if (evsel__write_stat_event(counter, cpu, thread, count)) {
+ if (evsel__write_stat_event(counter, cpu_map_idx, thread, count)) {
pr_err("failed to write stat event\n");
return -1;
}
@@ -395,7 +397,8 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu)
fprintf(stat_config.output,
"%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
evsel__name(counter),
- cpu,
+ perf_cpu_map__cpu(evsel__cpus(counter),
+ cpu_map_idx).cpu,
count->val, count->ena, count->run);
}
}
@@ -405,36 +408,33 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu)
static int read_affinity_counters(struct timespec *rs)
{
- struct evsel *counter;
- struct affinity affinity;
- int i, ncpus, cpu;
+ struct evlist_cpu_iterator evlist_cpu_itr;
+ struct affinity saved_affinity, *affinity;
if (all_counters_use_bpf)
return 0;
- if (affinity__setup(&affinity) < 0)
+ if (!target__has_cpu(&target) || target__has_per_thread(&target))
+ affinity = NULL;
+ else if (affinity__setup(&saved_affinity) < 0)
return -1;
+ else
+ affinity = &saved_affinity;
- ncpus = perf_cpu_map__nr(evsel_list->core.all_cpus);
- if (!target__has_cpu(&target) || target__has_per_thread(&target))
- ncpus = 1;
- evlist__for_each_cpu(evsel_list, i, cpu) {
- if (i >= ncpus)
- break;
- affinity__set(&affinity, cpu);
+ evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) {
+ struct evsel *counter = evlist_cpu_itr.evsel;
- evlist__for_each_entry(evsel_list, counter) {
- if (evsel__cpu_iter_skip(counter, cpu))
- continue;
- if (evsel__is_bpf(counter))
- continue;
- if (!counter->err) {
- counter->err = read_counter_cpu(counter, rs,
- counter->cpu_iter - 1);
- }
+ if (evsel__is_bpf(counter))
+ continue;
+
+ if (!counter->err) {
+ counter->err = read_counter_cpu(counter, rs,
+ evlist_cpu_itr.cpu_map_idx);
}
}
- affinity__cleanup(&affinity);
+ if (affinity)
+ affinity__cleanup(&saved_affinity);
+
return 0;
}
@@ -788,8 +788,9 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
int status = 0;
const bool forks = (argc > 0);
bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false;
- struct affinity affinity;
- int i, cpu, err;
+ struct evlist_cpu_iterator evlist_cpu_itr;
+ struct affinity saved_affinity, *affinity = NULL;
+ int err;
bool second_pass = false;
if (forks) {
@@ -803,8 +804,11 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
if (group)
evlist__set_leader(evsel_list);
- if (affinity__setup(&affinity) < 0)
- return -1;
+ if (!cpu_map__is_dummy(evsel_list->core.cpus)) {
+ if (affinity__setup(&saved_affinity) < 0)
+ return -1;
+ affinity = &saved_affinity;
+ }
evlist__for_each_entry(evsel_list, counter) {
if (bpf_counter__load(counter, &target))
@@ -813,103 +817,98 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
all_counters_use_bpf = false;
}
- evlist__for_each_cpu (evsel_list, i, cpu) {
+ evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) {
+ counter = evlist_cpu_itr.evsel;
+
/*
* bperf calls evsel__open_per_cpu() in bperf__load(), so
* no need to call it again here.
*/
if (target.use_bpf)
break;
- affinity__set(&affinity, cpu);
- evlist__for_each_entry(evsel_list, counter) {
- if (evsel__cpu_iter_skip(counter, cpu))
+ if (counter->reset_group || counter->errored)
+ continue;
+ if (evsel__is_bpf(counter))
+ continue;
+try_again:
+ if (create_perf_stat_counter(counter, &stat_config, &target,
+ evlist_cpu_itr.cpu_map_idx) < 0) {
+
+ /*
+ * Weak group failed. We cannot just undo this here
+ * because earlier CPUs might be in group mode, and the kernel
+ * doesn't support mixing group and non group reads. Defer
+ * it to later.
+ * Don't close here because we're in the wrong affinity.
+ */
+ if ((errno == EINVAL || errno == EBADF) &&
+ evsel__leader(counter) != counter &&
+ counter->weak_group) {
+ evlist__reset_weak_group(evsel_list, counter, false);
+ assert(counter->reset_group);
+ second_pass = true;
continue;
- if (counter->reset_group || counter->errored)
+ }
+
+ switch (stat_handle_error(counter)) {
+ case COUNTER_FATAL:
+ return -1;
+ case COUNTER_RETRY:
+ goto try_again;
+ case COUNTER_SKIP:
continue;
- if (evsel__is_bpf(counter))
+ default:
+ break;
+ }
+
+ }
+ counter->supported = true;
+ }
+
+ if (second_pass) {
+ /*
+ * Now redo all the weak group after closing them,
+ * and also close errored counters.
+ */
+
+ /* First close errored or weak retry */
+ evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) {
+ counter = evlist_cpu_itr.evsel;
+
+ if (!counter->reset_group && !counter->errored)
continue;
-try_again:
+
+ perf_evsel__close_cpu(&counter->core, evlist_cpu_itr.cpu_map_idx);
+ }
+ /* Now reopen weak */
+ evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) {
+ counter = evlist_cpu_itr.evsel;
+
+ if (!counter->reset_group && !counter->errored)
+ continue;
+ if (!counter->reset_group)
+ continue;
+try_again_reset:
+ pr_debug2("reopening weak %s\n", evsel__name(counter));
if (create_perf_stat_counter(counter, &stat_config, &target,
- counter->cpu_iter - 1) < 0) {
-
- /*
- * Weak group failed. We cannot just undo this here
- * because earlier CPUs might be in group mode, and the kernel
- * doesn't support mixing group and non group reads. Defer
- * it to later.
- * Don't close here because we're in the wrong affinity.
- */
- if ((errno == EINVAL || errno == EBADF) &&
- evsel__leader(counter) != counter &&
- counter->weak_group) {
- evlist__reset_weak_group(evsel_list, counter, false);
- assert(counter->reset_group);
- second_pass = true;
- continue;
- }
+ evlist_cpu_itr.cpu_map_idx) < 0) {
switch (stat_handle_error(counter)) {
case COUNTER_FATAL:
return -1;
case COUNTER_RETRY:
- goto try_again;
+ goto try_again_reset;
case COUNTER_SKIP:
continue;
default:
break;
}
-
}
counter->supported = true;
}
}
-
- if (second_pass) {
- /*
- * Now redo all the weak group after closing them,
- * and also close errored counters.
- */
-
- evlist__for_each_cpu(evsel_list, i, cpu) {
- affinity__set(&affinity, cpu);
- /* First close errored or weak retry */
- evlist__for_each_entry(evsel_list, counter) {
- if (!counter->reset_group && !counter->errored)
- continue;
- if (evsel__cpu_iter_skip_no_inc(counter, cpu))
- continue;
- perf_evsel__close_cpu(&counter->core, counter->cpu_iter);
- }
- /* Now reopen weak */
- evlist__for_each_entry(evsel_list, counter) {
- if (!counter->reset_group && !counter->errored)
- continue;
- if (evsel__cpu_iter_skip(counter, cpu))
- continue;
- if (!counter->reset_group)
- continue;
-try_again_reset:
- pr_debug2("reopening weak %s\n", evsel__name(counter));
- if (create_perf_stat_counter(counter, &stat_config, &target,
- counter->cpu_iter - 1) < 0) {
-
- switch (stat_handle_error(counter)) {
- case COUNTER_FATAL:
- return -1;
- case COUNTER_RETRY:
- goto try_again_reset;
- case COUNTER_SKIP:
- continue;
- default:
- break;
- }
- }
- counter->supported = true;
- }
- }
- }
- affinity__cleanup(&affinity);
+ affinity__cleanup(affinity);
evlist__for_each_entry(evsel_list, counter) {
if (!counter->supported) {
@@ -1168,6 +1167,26 @@ static int parse_stat_cgroups(const struct option *opt,
return parse_cgroups(opt, str, unset);
}
+static int parse_hybrid_type(const struct option *opt,
+ const char *str,
+ int unset __maybe_unused)
+{
+ struct evlist *evlist = *(struct evlist **)opt->value;
+
+ if (!list_empty(&evlist->core.entries)) {
+ fprintf(stderr, "Must define cputype before events/metrics\n");
+ return -1;
+ }
+
+ evlist->hybrid_pmu_name = perf_pmu__hybrid_type_to_pmu(str);
+ if (!evlist->hybrid_pmu_name) {
+ fprintf(stderr, "--cputype %s is not supported!\n", str);
+ return -1;
+ }
+
+ return 0;
+}
+
static struct option stat_options[] = {
OPT_BOOLEAN('T', "transaction", &transaction_run,
"hardware transaction statistics"),
@@ -1282,6 +1301,10 @@ static struct option stat_options[] = {
"don't print 'summary' for CSV summary output"),
OPT_BOOLEAN(0, "quiet", &stat_config.quiet,
"don't print output (useful with record)"),
+ OPT_CALLBACK(0, "cputype", &evsel_list, "hybrid cpu type",
+ "Only enable events on applying cpu with this type "
+ "for hybrid platform (e.g. core or atom)",
+ parse_hybrid_type),
#ifdef HAVE_LIBPFM
OPT_CALLBACK(0, "pfm-events", &evsel_list, "event",
"libpfm4 event selector. use 'perf list' to list available events",
@@ -1298,70 +1321,75 @@ static struct option stat_options[] = {
OPT_END()
};
+static const char *const aggr_mode__string[] = {
+ [AGGR_CORE] = "core",
+ [AGGR_DIE] = "die",
+ [AGGR_GLOBAL] = "global",
+ [AGGR_NODE] = "node",
+ [AGGR_NONE] = "none",
+ [AGGR_SOCKET] = "socket",
+ [AGGR_THREAD] = "thread",
+ [AGGR_UNSET] = "unset",
+};
+
static struct aggr_cpu_id perf_stat__get_socket(struct perf_stat_config *config __maybe_unused,
- struct perf_cpu_map *map, int cpu)
+ struct perf_cpu cpu)
{
- return cpu_map__get_socket(map, cpu, NULL);
+ return aggr_cpu_id__socket(cpu, /*data=*/NULL);
}
static struct aggr_cpu_id perf_stat__get_die(struct perf_stat_config *config __maybe_unused,
- struct perf_cpu_map *map, int cpu)
+ struct perf_cpu cpu)
{
- return cpu_map__get_die(map, cpu, NULL);
+ return aggr_cpu_id__die(cpu, /*data=*/NULL);
}
static struct aggr_cpu_id perf_stat__get_core(struct perf_stat_config *config __maybe_unused,
- struct perf_cpu_map *map, int cpu)
+ struct perf_cpu cpu)
{
- return cpu_map__get_core(map, cpu, NULL);
+ return aggr_cpu_id__core(cpu, /*data=*/NULL);
}
static struct aggr_cpu_id perf_stat__get_node(struct perf_stat_config *config __maybe_unused,
- struct perf_cpu_map *map, int cpu)
+ struct perf_cpu cpu)
{
- return cpu_map__get_node(map, cpu, NULL);
+ return aggr_cpu_id__node(cpu, /*data=*/NULL);
}
static struct aggr_cpu_id perf_stat__get_aggr(struct perf_stat_config *config,
- aggr_get_id_t get_id, struct perf_cpu_map *map, int idx)
+ aggr_get_id_t get_id, struct perf_cpu cpu)
{
- int cpu;
- struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
+ struct aggr_cpu_id id = aggr_cpu_id__empty();
- if (idx >= map->nr)
- return id;
+ if (aggr_cpu_id__is_empty(&config->cpus_aggr_map->map[cpu.cpu]))
+ config->cpus_aggr_map->map[cpu.cpu] = get_id(config, cpu);
- cpu = map->map[idx];
-
- if (cpu_map__aggr_cpu_id_is_empty(config->cpus_aggr_map->map[cpu]))
- config->cpus_aggr_map->map[cpu] = get_id(config, map, idx);
-
- id = config->cpus_aggr_map->map[cpu];
+ id = config->cpus_aggr_map->map[cpu.cpu];
return id;
}
static struct aggr_cpu_id perf_stat__get_socket_cached(struct perf_stat_config *config,
- struct perf_cpu_map *map, int idx)
+ struct perf_cpu cpu)
{
- return perf_stat__get_aggr(config, perf_stat__get_socket, map, idx);
+ return perf_stat__get_aggr(config, perf_stat__get_socket, cpu);
}
static struct aggr_cpu_id perf_stat__get_die_cached(struct perf_stat_config *config,
- struct perf_cpu_map *map, int idx)
+ struct perf_cpu cpu)
{
- return perf_stat__get_aggr(config, perf_stat__get_die, map, idx);
+ return perf_stat__get_aggr(config, perf_stat__get_die, cpu);
}
static struct aggr_cpu_id perf_stat__get_core_cached(struct perf_stat_config *config,
- struct perf_cpu_map *map, int idx)
+ struct perf_cpu cpu)
{
- return perf_stat__get_aggr(config, perf_stat__get_core, map, idx);
+ return perf_stat__get_aggr(config, perf_stat__get_core, cpu);
}
static struct aggr_cpu_id perf_stat__get_node_cached(struct perf_stat_config *config,
- struct perf_cpu_map *map, int idx)
+ struct perf_cpu cpu)
{
- return perf_stat__get_aggr(config, perf_stat__get_node, map, idx);
+ return perf_stat__get_aggr(config, perf_stat__get_node, cpu);
}
static bool term_percore_set(void)
@@ -1376,54 +1404,67 @@ static bool term_percore_set(void)
return false;
}
-static int perf_stat_init_aggr_mode(void)
+static aggr_cpu_id_get_t aggr_mode__get_aggr(enum aggr_mode aggr_mode)
{
- int nr;
+ switch (aggr_mode) {
+ case AGGR_SOCKET:
+ return aggr_cpu_id__socket;
+ case AGGR_DIE:
+ return aggr_cpu_id__die;
+ case AGGR_CORE:
+ return aggr_cpu_id__core;
+ case AGGR_NODE:
+ return aggr_cpu_id__node;
+ case AGGR_NONE:
+ if (term_percore_set())
+ return aggr_cpu_id__core;
+
+ return NULL;
+ case AGGR_GLOBAL:
+ case AGGR_THREAD:
+ case AGGR_UNSET:
+ default:
+ return NULL;
+ }
+}
- switch (stat_config.aggr_mode) {
+static aggr_get_id_t aggr_mode__get_id(enum aggr_mode aggr_mode)
+{
+ switch (aggr_mode) {
case AGGR_SOCKET:
- if (cpu_map__build_socket_map(evsel_list->core.cpus, &stat_config.aggr_map)) {
- perror("cannot build socket map");
- return -1;
- }
- stat_config.aggr_get_id = perf_stat__get_socket_cached;
- break;
+ return perf_stat__get_socket_cached;
case AGGR_DIE:
- if (cpu_map__build_die_map(evsel_list->core.cpus, &stat_config.aggr_map)) {
- perror("cannot build die map");
- return -1;
- }
- stat_config.aggr_get_id = perf_stat__get_die_cached;
- break;
+ return perf_stat__get_die_cached;
case AGGR_CORE:
- if (cpu_map__build_core_map(evsel_list->core.cpus, &stat_config.aggr_map)) {
- perror("cannot build core map");
- return -1;
- }
- stat_config.aggr_get_id = perf_stat__get_core_cached;
- break;
+ return perf_stat__get_core_cached;
case AGGR_NODE:
- if (cpu_map__build_node_map(evsel_list->core.cpus, &stat_config.aggr_map)) {
- perror("cannot build core map");
- return -1;
- }
- stat_config.aggr_get_id = perf_stat__get_node_cached;
- break;
+ return perf_stat__get_node_cached;
case AGGR_NONE:
if (term_percore_set()) {
- if (cpu_map__build_core_map(evsel_list->core.cpus,
- &stat_config.aggr_map)) {
- perror("cannot build core map");
- return -1;
- }
- stat_config.aggr_get_id = perf_stat__get_core_cached;
+ return perf_stat__get_core_cached;
}
- break;
+ return NULL;
case AGGR_GLOBAL:
case AGGR_THREAD:
case AGGR_UNSET:
default:
- break;
+ return NULL;
+ }
+}
+
+static int perf_stat_init_aggr_mode(void)
+{
+ int nr;
+ aggr_cpu_id_get_t get_id = aggr_mode__get_aggr(stat_config.aggr_mode);
+
+ if (get_id) {
+ stat_config.aggr_map = cpu_aggr_map__new(evsel_list->core.cpus,
+ get_id, /*data=*/NULL);
+ if (!stat_config.aggr_map) {
+ pr_err("cannot build %s map", aggr_mode__string[stat_config.aggr_mode]);
+ return -1;
+ }
+ stat_config.aggr_get_id = aggr_mode__get_id(stat_config.aggr_mode);
}
/*
@@ -1431,7 +1472,7 @@ static int perf_stat_init_aggr_mode(void)
* taking the highest cpu number to be the size of
* the aggregation translate cpumap.
*/
- nr = perf_cpu_map__max(evsel_list->core.cpus);
+ nr = perf_cpu_map__max(evsel_list->core.cpus).cpu;
stat_config.cpus_aggr_map = cpu_aggr_map__empty_new(nr + 1);
return stat_config.cpus_aggr_map ? 0 : -ENOMEM;
}
@@ -1459,169 +1500,139 @@ static void perf_stat__exit_aggr_mode(void)
stat_config.cpus_aggr_map = NULL;
}
-static inline int perf_env__get_cpu(struct perf_env *env, struct perf_cpu_map *map, int idx)
-{
- int cpu;
-
- if (idx > map->nr)
- return -1;
-
- cpu = map->map[idx];
-
- if (cpu >= env->nr_cpus_avail)
- return -1;
-
- return cpu;
-}
-
-static struct aggr_cpu_id perf_env__get_socket(struct perf_cpu_map *map, int idx, void *data)
+static struct aggr_cpu_id perf_env__get_socket_aggr_by_cpu(struct perf_cpu cpu, void *data)
{
struct perf_env *env = data;
- int cpu = perf_env__get_cpu(env, map, idx);
- struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
+ struct aggr_cpu_id id = aggr_cpu_id__empty();
- if (cpu != -1)
- id.socket = env->cpu[cpu].socket_id;
+ if (cpu.cpu != -1)
+ id.socket = env->cpu[cpu.cpu].socket_id;
return id;
}
-static struct aggr_cpu_id perf_env__get_die(struct perf_cpu_map *map, int idx, void *data)
+static struct aggr_cpu_id perf_env__get_die_aggr_by_cpu(struct perf_cpu cpu, void *data)
{
struct perf_env *env = data;
- struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
- int cpu = perf_env__get_cpu(env, map, idx);
+ struct aggr_cpu_id id = aggr_cpu_id__empty();
- if (cpu != -1) {
+ if (cpu.cpu != -1) {
/*
* die_id is relative to socket, so start
* with the socket ID and then add die to
* make a unique ID.
*/
- id.socket = env->cpu[cpu].socket_id;
- id.die = env->cpu[cpu].die_id;
+ id.socket = env->cpu[cpu.cpu].socket_id;
+ id.die = env->cpu[cpu.cpu].die_id;
}
return id;
}
-static struct aggr_cpu_id perf_env__get_core(struct perf_cpu_map *map, int idx, void *data)
+static struct aggr_cpu_id perf_env__get_core_aggr_by_cpu(struct perf_cpu cpu, void *data)
{
struct perf_env *env = data;
- struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
- int cpu = perf_env__get_cpu(env, map, idx);
+ struct aggr_cpu_id id = aggr_cpu_id__empty();
- if (cpu != -1) {
+ if (cpu.cpu != -1) {
/*
* core_id is relative to socket and die,
* we need a global id. So we set
* socket, die id and core id
*/
- id.socket = env->cpu[cpu].socket_id;
- id.die = env->cpu[cpu].die_id;
- id.core = env->cpu[cpu].core_id;
+ id.socket = env->cpu[cpu.cpu].socket_id;
+ id.die = env->cpu[cpu.cpu].die_id;
+ id.core = env->cpu[cpu.cpu].core_id;
}
return id;
}
-static struct aggr_cpu_id perf_env__get_node(struct perf_cpu_map *map, int idx, void *data)
+static struct aggr_cpu_id perf_env__get_node_aggr_by_cpu(struct perf_cpu cpu, void *data)
{
- int cpu = perf_env__get_cpu(data, map, idx);
- struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
+ struct aggr_cpu_id id = aggr_cpu_id__empty();
id.node = perf_env__numa_node(data, cpu);
return id;
}
-static int perf_env__build_socket_map(struct perf_env *env, struct perf_cpu_map *cpus,
- struct cpu_aggr_map **sockp)
-{
- return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env);
-}
-
-static int perf_env__build_die_map(struct perf_env *env, struct perf_cpu_map *cpus,
- struct cpu_aggr_map **diep)
-{
- return cpu_map__build_map(cpus, diep, perf_env__get_die, env);
-}
-
-static int perf_env__build_core_map(struct perf_env *env, struct perf_cpu_map *cpus,
- struct cpu_aggr_map **corep)
-{
- return cpu_map__build_map(cpus, corep, perf_env__get_core, env);
-}
-
-static int perf_env__build_node_map(struct perf_env *env, struct perf_cpu_map *cpus,
- struct cpu_aggr_map **nodep)
-{
- return cpu_map__build_map(cpus, nodep, perf_env__get_node, env);
-}
-
static struct aggr_cpu_id perf_stat__get_socket_file(struct perf_stat_config *config __maybe_unused,
- struct perf_cpu_map *map, int idx)
+ struct perf_cpu cpu)
{
- return perf_env__get_socket(map, idx, &perf_stat.session->header.env);
+ return perf_env__get_socket_aggr_by_cpu(cpu, &perf_stat.session->header.env);
}
static struct aggr_cpu_id perf_stat__get_die_file(struct perf_stat_config *config __maybe_unused,
- struct perf_cpu_map *map, int idx)
+ struct perf_cpu cpu)
{
- return perf_env__get_die(map, idx, &perf_stat.session->header.env);
+ return perf_env__get_die_aggr_by_cpu(cpu, &perf_stat.session->header.env);
}
static struct aggr_cpu_id perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused,
- struct perf_cpu_map *map, int idx)
+ struct perf_cpu cpu)
{
- return perf_env__get_core(map, idx, &perf_stat.session->header.env);
+ return perf_env__get_core_aggr_by_cpu(cpu, &perf_stat.session->header.env);
}
static struct aggr_cpu_id perf_stat__get_node_file(struct perf_stat_config *config __maybe_unused,
- struct perf_cpu_map *map, int idx)
+ struct perf_cpu cpu)
{
- return perf_env__get_node(map, idx, &perf_stat.session->header.env);
+ return perf_env__get_node_aggr_by_cpu(cpu, &perf_stat.session->header.env);
}
-static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
+static aggr_cpu_id_get_t aggr_mode__get_aggr_file(enum aggr_mode aggr_mode)
{
- struct perf_env *env = &st->session->header.env;
+ switch (aggr_mode) {
+ case AGGR_SOCKET:
+ return perf_env__get_socket_aggr_by_cpu;
+ case AGGR_DIE:
+ return perf_env__get_die_aggr_by_cpu;
+ case AGGR_CORE:
+ return perf_env__get_core_aggr_by_cpu;
+ case AGGR_NODE:
+ return perf_env__get_node_aggr_by_cpu;
+ case AGGR_NONE:
+ case AGGR_GLOBAL:
+ case AGGR_THREAD:
+ case AGGR_UNSET:
+ default:
+ return NULL;
+ }
+}
- switch (stat_config.aggr_mode) {
+static aggr_get_id_t aggr_mode__get_id_file(enum aggr_mode aggr_mode)
+{
+ switch (aggr_mode) {
case AGGR_SOCKET:
- if (perf_env__build_socket_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) {
- perror("cannot build socket map");
- return -1;
- }
- stat_config.aggr_get_id = perf_stat__get_socket_file;
- break;
+ return perf_stat__get_socket_file;
case AGGR_DIE:
- if (perf_env__build_die_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) {
- perror("cannot build die map");
- return -1;
- }
- stat_config.aggr_get_id = perf_stat__get_die_file;
- break;
+ return perf_stat__get_die_file;
case AGGR_CORE:
- if (perf_env__build_core_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) {
- perror("cannot build core map");
- return -1;
- }
- stat_config.aggr_get_id = perf_stat__get_core_file;
- break;
+ return perf_stat__get_core_file;
case AGGR_NODE:
- if (perf_env__build_node_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) {
- perror("cannot build core map");
- return -1;
- }
- stat_config.aggr_get_id = perf_stat__get_node_file;
- break;
+ return perf_stat__get_node_file;
case AGGR_NONE:
case AGGR_GLOBAL:
case AGGR_THREAD:
case AGGR_UNSET:
default:
- break;
+ return NULL;
}
+}
+
+static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
+{
+ struct perf_env *env = &st->session->header.env;
+ aggr_cpu_id_get_t get_id = aggr_mode__get_aggr_file(stat_config.aggr_mode);
+
+ if (!get_id)
+ return 0;
+ stat_config.aggr_map = cpu_aggr_map__new(evsel_list->core.cpus, get_id, env);
+ if (!stat_config.aggr_map) {
+ pr_err("cannot build %s map", aggr_mode__string[stat_config.aggr_mode]);
+ return -1;
+ }
+ stat_config.aggr_get_id = aggr_mode__get_id_file(stat_config.aggr_mode);
return 0;
}
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 0b52e08e558e..32844d8a0ea5 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2726,6 +2726,8 @@ static size_t trace__fprintf_tp_fields(struct trace *trace, struct evsel *evsel,
offset = format_field__intval(field, sample, evsel->needs_swap);
syscall_arg.len = offset >> 16;
offset &= 0xffff;
+ if (field->flags & TEP_FIELD_IS_RELATIVE)
+ offset += field->offset + field->size;
}
val = (uintptr_t)(sample->raw_data + offset);
@@ -3257,10 +3259,21 @@ static void trace__set_bpf_map_syscalls(struct trace *trace)
static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace, const char *name)
{
+ struct bpf_program *pos, *prog = NULL;
+ const char *sec_name;
+
if (trace->bpf_obj == NULL)
return NULL;
- return bpf_object__find_program_by_title(trace->bpf_obj, name);
+ bpf_object__for_each_program(pos, trace->bpf_obj) {
+ sec_name = bpf_program__section_name(pos);
+ if (sec_name && !strcmp(sec_name, name)) {
+ prog = pos;
+ break;
+ }
+ }
+
+ return prog;
}
static struct bpf_program *trace__find_syscall_bpf_prog(struct trace *trace, struct syscall *sc,
@@ -3925,6 +3938,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
bool draining = false;
trace->live = true;
+ signal(SIGCHLD, sig_handler);
if (!trace->raw_augmented_syscalls) {
if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
@@ -3950,6 +3964,9 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
evlist__add(evlist, pgfault_min);
}
+ /* Enable ignoring missing threads when -u/-p option is defined. */
+ trace->opts.ignore_missing_thread = trace->opts.target.uid != UINT_MAX || trace->opts.target.pid;
+
if (trace->sched &&
evlist__add_newtp(evlist, "sched", "sched_stat_runtime", trace__sched_stat_runtime))
goto out_error_sched_stat_runtime;
@@ -4873,7 +4890,6 @@ int cmd_trace(int argc, const char **argv)
signal(SIGSEGV, sighandler_dump_stack);
signal(SIGFPE, sighandler_dump_stack);
- signal(SIGCHLD, sig_handler);
signal(SIGINT, sig_handler);
trace.evlist = evlist__new();
diff --git a/tools/perf/dlfilters/dlfilter-test-api-v0.c b/tools/perf/dlfilters/dlfilter-test-api-v0.c
index 7565a1852c74..b17eb52a0694 100644
--- a/tools/perf/dlfilters/dlfilter-test-api-v0.c
+++ b/tools/perf/dlfilters/dlfilter-test-api-v0.c
@@ -308,8 +308,6 @@ int filter_event_early(void *data, const struct perf_dlfilter_sample *sample, vo
int filter_event(void *data, const struct perf_dlfilter_sample *sample, void *ctx)
{
- struct filter_data *d = data;
-
pr_debug("%s API\n", __func__);
return do_checks(data, sample, ctx, false);
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/branch.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/branch.json
new file mode 100644
index 000000000000..79f2016c53b0
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/branch.json
@@ -0,0 +1,8 @@
+[
+ {
+ "ArchStdEvent": "BR_MIS_PRED"
+ },
+ {
+ "ArchStdEvent": "BR_PRED"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/bus.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/bus.json
new file mode 100644
index 000000000000..579c1c993d17
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/bus.json
@@ -0,0 +1,20 @@
+[
+ {
+ "ArchStdEvent": "CPU_CYCLES"
+ },
+ {
+ "ArchStdEvent": "BUS_ACCESS"
+ },
+ {
+ "ArchStdEvent": "BUS_CYCLES"
+ },
+ {
+ "ArchStdEvent": "BUS_ACCESS_RD"
+ },
+ {
+ "ArchStdEvent": "BUS_ACCESS_WR"
+ },
+ {
+ "ArchStdEvent": "CNT_CYCLES"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/cache.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/cache.json
new file mode 100644
index 000000000000..0141f749bff3
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/cache.json
@@ -0,0 +1,155 @@
+[
+ {
+ "ArchStdEvent": "L1I_CACHE_REFILL"
+ },
+ {
+ "ArchStdEvent": "L1I_TLB_REFILL"
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_REFILL"
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE"
+ },
+ {
+ "ArchStdEvent": "L1D_TLB_REFILL"
+ },
+ {
+ "ArchStdEvent": "L1I_CACHE"
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_WB"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_REFILL"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_WB"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_ALLOCATE"
+ },
+ {
+ "ArchStdEvent": "L1D_TLB"
+ },
+ {
+ "ArchStdEvent": "L1I_TLB"
+ },
+ {
+ "ArchStdEvent": "L3D_CACHE_ALLOCATE"
+ },
+ {
+ "ArchStdEvent": "L3D_CACHE_REFILL"
+ },
+ {
+ "ArchStdEvent": "L3D_CACHE"
+ },
+ {
+ "ArchStdEvent": "L2D_TLB_REFILL"
+ },
+ {
+ "ArchStdEvent": "L2D_TLB"
+ },
+ {
+ "ArchStdEvent": "DTLB_WALK"
+ },
+ {
+ "ArchStdEvent": "ITLB_WALK"
+ },
+ {
+ "ArchStdEvent": "LL_CACHE_RD"
+ },
+ {
+ "ArchStdEvent": "LL_CACHE_MISS_RD"
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_LMISS_RD"
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_RD"
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_WR"
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_REFILL_RD"
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_REFILL_WR"
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_REFILL_INNER"
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_REFILL_OUTER"
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_WB_VICTIM"
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_WB_CLEAN"
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_INVAL"
+ },
+ {
+ "ArchStdEvent": "L1D_TLB_REFILL_RD"
+ },
+ {
+ "ArchStdEvent": "L1D_TLB_REFILL_WR"
+ },
+ {
+ "ArchStdEvent": "L1D_TLB_RD"
+ },
+ {
+ "ArchStdEvent": "L1D_TLB_WR"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_RD"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_WR"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_REFILL_RD"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_REFILL_WR"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_WB_VICTIM"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_WB_CLEAN"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_INVAL"
+ },
+ {
+ "ArchStdEvent": "L2D_TLB_REFILL_RD"
+ },
+ {
+ "ArchStdEvent": "L2D_TLB_REFILL_WR"
+ },
+ {
+ "ArchStdEvent": "L2D_TLB_RD"
+ },
+ {
+ "ArchStdEvent": "L2D_TLB_WR"
+ },
+ {
+ "ArchStdEvent": "L3D_CACHE_RD"
+ },
+ {
+ "ArchStdEvent": "L1I_CACHE_LMISS"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_LMISS_RD"
+ },
+ {
+ "ArchStdEvent": "L3D_CACHE_LMISS_RD"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/exception.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/exception.json
new file mode 100644
index 000000000000..344a2d552ad5
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/exception.json
@@ -0,0 +1,47 @@
+[
+ {
+ "ArchStdEvent": "EXC_TAKEN"
+ },
+ {
+ "ArchStdEvent": "MEMORY_ERROR"
+ },
+ {
+ "ArchStdEvent": "EXC_UNDEF"
+ },
+ {
+ "ArchStdEvent": "EXC_SVC"
+ },
+ {
+ "ArchStdEvent": "EXC_PABORT"
+ },
+ {
+ "ArchStdEvent": "EXC_DABORT"
+ },
+ {
+ "ArchStdEvent": "EXC_IRQ"
+ },
+ {
+ "ArchStdEvent": "EXC_FIQ"
+ },
+ {
+ "ArchStdEvent": "EXC_SMC"
+ },
+ {
+ "ArchStdEvent": "EXC_HVC"
+ },
+ {
+ "ArchStdEvent": "EXC_TRAP_PABORT"
+ },
+ {
+ "ArchStdEvent": "EXC_TRAP_DABORT"
+ },
+ {
+ "ArchStdEvent": "EXC_TRAP_OTHER"
+ },
+ {
+ "ArchStdEvent": "EXC_TRAP_IRQ"
+ },
+ {
+ "ArchStdEvent": "EXC_TRAP_FIQ"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/instruction.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/instruction.json
new file mode 100644
index 000000000000..e57cd55937c6
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/instruction.json
@@ -0,0 +1,143 @@
+[
+ {
+ "ArchStdEvent": "SW_INCR"
+ },
+ {
+ "ArchStdEvent": "INST_RETIRED"
+ },
+ {
+ "ArchStdEvent": "EXC_RETURN"
+ },
+ {
+ "ArchStdEvent": "CID_WRITE_RETIRED"
+ },
+ {
+ "ArchStdEvent": "INST_SPEC"
+ },
+ {
+ "ArchStdEvent": "TTBR_WRITE_RETIRED"
+ },
+ {
+ "ArchStdEvent": "BR_RETIRED"
+ },
+ {
+ "ArchStdEvent": "BR_MIS_PRED_RETIRED"
+ },
+ {
+ "ArchStdEvent": "OP_RETIRED"
+ },
+ {
+ "ArchStdEvent": "OP_SPEC"
+ },
+ {
+ "ArchStdEvent": "LDREX_SPEC"
+ },
+ {
+ "ArchStdEvent": "STREX_PASS_SPEC"
+ },
+ {
+ "ArchStdEvent": "STREX_FAIL_SPEC"
+ },
+ {
+ "ArchStdEvent": "STREX_SPEC"
+ },
+ {
+ "ArchStdEvent": "LD_SPEC"
+ },
+ {
+ "ArchStdEvent": "ST_SPEC"
+ },
+ {
+ "ArchStdEvent": "DP_SPEC"
+ },
+ {
+ "ArchStdEvent": "ASE_SPEC"
+ },
+ {
+ "ArchStdEvent": "VFP_SPEC"
+ },
+ {
+ "ArchStdEvent": "PC_WRITE_SPEC"
+ },
+ {
+ "ArchStdEvent": "CRYPTO_SPEC"
+ },
+ {
+ "ArchStdEvent": "BR_IMMED_SPEC"
+ },
+ {
+ "ArchStdEvent": "BR_RETURN_SPEC"
+ },
+ {
+ "ArchStdEvent": "BR_INDIRECT_SPEC"
+ },
+ {
+ "ArchStdEvent": "ISB_SPEC"
+ },
+ {
+ "ArchStdEvent": "DSB_SPEC"
+ },
+ {
+ "ArchStdEvent": "DMB_SPEC"
+ },
+ {
+ "ArchStdEvent": "RC_LD_SPEC"
+ },
+ {
+ "ArchStdEvent": "RC_ST_SPEC"
+ },
+ {
+ "ArchStdEvent": "ASE_INST_SPEC"
+ },
+ {
+ "ArchStdEvent": "SVE_INST_SPEC"
+ },
+ {
+ "ArchStdEvent": "FP_HP_SPEC"
+ },
+ {
+ "ArchStdEvent": "FP_SP_SPEC"
+ },
+ {
+ "ArchStdEvent": "FP_DP_SPEC"
+ },
+ {
+ "ArchStdEvent": "SVE_PRED_SPEC"
+ },
+ {
+ "ArchStdEvent": "SVE_PRED_EMPTY_SPEC"
+ },
+ {
+ "ArchStdEvent": "SVE_PRED_FULL_SPEC"
+ },
+ {
+ "ArchStdEvent": "SVE_PRED_PARTIAL_SPEC"
+ },
+ {
+ "ArchStdEvent": "SVE_PRED_NOT_FULL_SPEC"
+ },
+ {
+ "ArchStdEvent": "SVE_LDFF_SPEC"
+ },
+ {
+ "ArchStdEvent": "SVE_LDFF_FAULT_SPEC"
+ },
+ {
+ "ArchStdEvent": "FP_SCALE_OPS_SPEC"
+ },
+ {
+ "ArchStdEvent": "FP_FIXED_OPS_SPEC"
+ },
+ {
+ "ArchStdEvent": "ASE_SVE_INT8_SPEC"
+ },
+ {
+ "ArchStdEvent": "ASE_SVE_INT16_SPEC"
+ },
+ {
+ "ArchStdEvent": "ASE_SVE_INT32_SPEC"
+ },
+ {
+ "ArchStdEvent": "ASE_SVE_INT64_SPEC"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/memory.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/memory.json
new file mode 100644
index 000000000000..e522113aeb96
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/memory.json
@@ -0,0 +1,38 @@
+[
+ {
+ "ArchStdEvent": "MEM_ACCESS"
+ },
+ {
+ "ArchStdEvent": "MEM_ACCESS_RD"
+ },
+ {
+ "ArchStdEvent": "MEM_ACCESS_WR"
+ },
+ {
+ "ArchStdEvent": "UNALIGNED_LD_SPEC"
+ },
+ {
+ "ArchStdEvent": "UNALIGNED_ST_SPEC"
+ },
+ {
+ "ArchStdEvent": "UNALIGNED_LDST_SPEC"
+ },
+ {
+ "ArchStdEvent": "LDST_ALIGN_LAT"
+ },
+ {
+ "ArchStdEvent": "LD_ALIGN_LAT"
+ },
+ {
+ "ArchStdEvent": "ST_ALIGN_LAT"
+ },
+ {
+ "ArchStdEvent": "MEM_ACCESS_CHECKED"
+ },
+ {
+ "ArchStdEvent": "MEM_ACCESS_CHECKED_RD"
+ },
+ {
+ "ArchStdEvent": "MEM_ACCESS_CHECKED_WR"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/other.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/other.json
new file mode 100644
index 000000000000..20d8365756c5
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/other.json
@@ -0,0 +1,5 @@
+[
+ {
+ "ArchStdEvent": "REMOTE_ACCESS"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/pipeline.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/pipeline.json
new file mode 100644
index 000000000000..f9fae15f7555
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/pipeline.json
@@ -0,0 +1,23 @@
+[
+ {
+ "ArchStdEvent": "STALL_FRONTEND"
+ },
+ {
+ "ArchStdEvent": "STALL_BACKEND"
+ },
+ {
+ "ArchStdEvent": "STALL"
+ },
+ {
+ "ArchStdEvent": "STALL_SLOT_BACKEND"
+ },
+ {
+ "ArchStdEvent": "STALL_SLOT_FRONTEND"
+ },
+ {
+ "ArchStdEvent": "STALL_SLOT"
+ },
+ {
+ "ArchStdEvent": "STALL_BACKEND_MEM"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/spe.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/spe.json
new file mode 100644
index 000000000000..20f2165c85fe
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/spe.json
@@ -0,0 +1,14 @@
+[
+ {
+ "ArchStdEvent": "SAMPLE_POP"
+ },
+ {
+ "ArchStdEvent": "SAMPLE_FEED"
+ },
+ {
+ "ArchStdEvent": "SAMPLE_FILTRATE"
+ },
+ {
+ "ArchStdEvent": "SAMPLE_COLLISION"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/trace.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/trace.json
new file mode 100644
index 000000000000..3116135c59e2
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/trace.json
@@ -0,0 +1,29 @@
+[
+ {
+ "ArchStdEvent": "TRB_WRAP"
+ },
+ {
+ "ArchStdEvent": "TRCEXTOUT0"
+ },
+ {
+ "ArchStdEvent": "TRCEXTOUT1"
+ },
+ {
+ "ArchStdEvent": "TRCEXTOUT2"
+ },
+ {
+ "ArchStdEvent": "TRCEXTOUT3"
+ },
+ {
+ "ArchStdEvent": "CTI_TRIGOUT4"
+ },
+ {
+ "ArchStdEvent": "CTI_TRIGOUT5"
+ },
+ {
+ "ArchStdEvent": "CTI_TRIGOUT6"
+ },
+ {
+ "ArchStdEvent": "CTI_TRIGOUT7"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/armv8-common-and-microarch.json b/tools/perf/pmu-events/arch/arm64/common-and-microarch.json
index 423767510aff..80d7a70829a0 100644
--- a/tools/perf/pmu-events/arch/arm64/armv8-common-and-microarch.json
+++ b/tools/perf/pmu-events/arch/arm64/common-and-microarch.json
@@ -300,6 +300,30 @@
"BriefDescription": "No operation sent for execution on a slot"
},
{
+ "PublicDescription": "Sample Population",
+ "EventCode": "0x4000",
+ "EventName": "SAMPLE_POP",
+ "BriefDescription": "Sample Population"
+ },
+ {
+ "PublicDescription": "Sample Taken",
+ "EventCode": "0x4001",
+ "EventName": "SAMPLE_FEED",
+ "BriefDescription": "Sample Taken"
+ },
+ {
+ "PublicDescription": "Sample Taken and not removed by filtering",
+ "EventCode": "0x4002",
+ "EventName": "SAMPLE_FILTRATE",
+ "BriefDescription": "Sample Taken and not removed by filtering"
+ },
+ {
+ "PublicDescription": "Sample collided with previous sample",
+ "EventCode": "0x4003",
+ "EventName": "SAMPLE_COLLISION",
+ "BriefDescription": "Sample collided with previous sample"
+ },
+ {
"PublicDescription": "Constant frequency cycles. The counter increments at a constant frequency equal to the rate of increment of the system counter, CNTPCT_EL0.",
"EventCode": "0x4004",
"EventName": "CNT_CYCLES",
@@ -330,6 +354,96 @@
"BriefDescription": "Level 3 data cache long-latency read miss"
},
{
+ "PublicDescription": "Trace buffer current write pointer wrapped",
+ "EventCode": "0x400C",
+ "EventName": "TRB_WRAP",
+ "BriefDescription": "Trace buffer current write pointer wrapped"
+ },
+ {
+ "PublicDescription": "PE Trace Unit external output 0",
+ "EventCode": "0x4010",
+ "EventName": "TRCEXTOUT0",
+ "BriefDescription": "PE Trace Unit external output 0"
+ },
+ {
+ "PublicDescription": "PE Trace Unit external output 1",
+ "EventCode": "0x4011",
+ "EventName": "TRCEXTOUT1",
+ "BriefDescription": "PE Trace Unit external output 1"
+ },
+ {
+ "PublicDescription": "PE Trace Unit external output 2",
+ "EventCode": "0x4012",
+ "EventName": "TRCEXTOUT2",
+ "BriefDescription": "PE Trace Unit external output 2"
+ },
+ {
+ "PublicDescription": "PE Trace Unit external output 3",
+ "EventCode": "0x4013",
+ "EventName": "TRCEXTOUT3",
+ "BriefDescription": "PE Trace Unit external output 3"
+ },
+ {
+ "PublicDescription": "Cross-trigger Interface output trigger 4",
+ "EventCode": "0x4018",
+ "EventName": "CTI_TRIGOUT4",
+ "BriefDescription": "Cross-trigger Interface output trigger 4"
+ },
+ {
+ "PublicDescription": "Cross-trigger Interface output trigger 5 ",
+ "EventCode": "0x4019",
+ "EventName": "CTI_TRIGOUT5",
+ "BriefDescription": "Cross-trigger Interface output trigger 5 "
+ },
+ {
+ "PublicDescription": "Cross-trigger Interface output trigger 6",
+ "EventCode": "0x401A",
+ "EventName": "CTI_TRIGOUT6",
+ "BriefDescription": "Cross-trigger Interface output trigger 6"
+ },
+ {
+ "PublicDescription": "Cross-trigger Interface output trigger 7",
+ "EventCode": "0x401B",
+ "EventName": "CTI_TRIGOUT7",
+ "BriefDescription": "Cross-trigger Interface output trigger 7"
+ },
+ {
+ "PublicDescription": "Access with additional latency from alignment",
+ "EventCode": "0x4020",
+ "EventName": "LDST_ALIGN_LAT",
+ "BriefDescription": "Access with additional latency from alignment"
+ },
+ {
+ "PublicDescription": "Load with additional latency from alignment",
+ "EventCode": "0x4021",
+ "EventName": "LD_ALIGN_LAT",
+ "BriefDescription": "Load with additional latency from alignment"
+ },
+ {
+ "PublicDescription": "Store with additional latency from alignment",
+ "EventCode": "0x4022",
+ "EventName": "ST_ALIGN_LAT",
+ "BriefDescription": "Store with additional latency from alignment"
+ },
+ {
+ "PublicDescription": "Checked data memory access",
+ "EventCode": "0x4024",
+ "EventName": "MEM_ACCESS_CHECKED",
+ "BriefDescription": "Checked data memory access"
+ },
+ {
+ "PublicDescription": "Checked data memory access, read",
+ "EventCode": "0x4025",
+ "EventName": "MEM_ACCESS_CHECKED_RD",
+ "BriefDescription": "Checked data memory access, read"
+ },
+ {
+ "PublicDescription": "Checked data memory access, write",
+ "EventCode": "0x4026",
+ "EventName": "MEM_ACCESS_CHECKED_WR",
+ "BriefDescription": "Checked data memory access, write"
+ },
+ {
"PublicDescription": "SIMD Instruction architecturally executed.",
"EventCode": "0x8000",
"EventName": "SIMD_INST_RETIRED",
@@ -342,6 +456,18 @@
"BriefDescription": "Instruction architecturally executed, SVE."
},
{
+ "PublicDescription": "ASE operations speculatively executed",
+ "EventCode": "0x8005",
+ "EventName": "ASE_INST_SPEC",
+ "BriefDescription": "ASE operations speculatively executed"
+ },
+ {
+ "PublicDescription": "SVE operations speculatively executed",
+ "EventCode": "0x8006",
+ "EventName": "SVE_INST_SPEC",
+ "BriefDescription": "SVE operations speculatively executed"
+ },
+ {
"PublicDescription": "Microarchitectural operation, Operations speculatively executed.",
"EventCode": "0x8008",
"EventName": "UOP_SPEC",
@@ -360,6 +486,24 @@
"BriefDescription": "Floating-point Operations speculatively executed."
},
{
+ "PublicDescription": "Floating-point half-precision operations speculatively executed",
+ "EventCode": "0x8014",
+ "EventName": "FP_HP_SPEC",
+ "BriefDescription": "Floating-point half-precision operations speculatively executed"
+ },
+ {
+ "PublicDescription": "Floating-point single-precision operations speculatively executed",
+ "EventCode": "0x8018",
+ "EventName": "FP_SP_SPEC",
+ "BriefDescription": "Floating-point single-precision operations speculatively executed"
+ },
+ {
+ "PublicDescription": "Floating-point double-precision operations speculatively executed",
+ "EventCode": "0x801C",
+ "EventName": "FP_DP_SPEC",
+ "BriefDescription": "Floating-point double-precision operations speculatively executed"
+ },
+ {
"PublicDescription": "Floating-point FMA Operations speculatively executed.",
"EventCode": "0x8028",
"EventName": "FP_FMA_SPEC",
@@ -390,6 +534,30 @@
"BriefDescription": "SVE predicated Operations speculatively executed."
},
{
+ "PublicDescription": "SVE predicated operations with no active predicates speculatively executed",
+ "EventCode": "0x8075",
+ "EventName": "SVE_PRED_EMPTY_SPEC",
+ "BriefDescription": "SVE predicated operations with no active predicates speculatively executed"
+ },
+ {
+ "PublicDescription": "SVE predicated operations speculatively executed with all active predicates",
+ "EventCode": "0x8076",
+ "EventName": "SVE_PRED_FULL_SPEC",
+ "BriefDescription": "SVE predicated operations speculatively executed with all active predicates"
+ },
+ {
+ "PublicDescription": "SVE predicated operations speculatively executed with partially active predicates",
+ "EventCode": "0x8077",
+ "EventName": "SVE_PRED_PARTIAL_SPEC",
+ "BriefDescription": "SVE predicated operations speculatively executed with partially active predicates"
+ },
+ {
+ "PublicDescription": "SVE predicated operations with empty or partially active predicates",
+ "EventCode": "0x8079",
+ "EventName": "SVE_PRED_NOT_FULL_SPEC",
+ "BriefDescription": "SVE predicated operations with empty or partially active predicates"
+ },
+ {
"PublicDescription": "SVE MOVPRFX Operations speculatively executed.",
"EventCode": "0x807C",
"EventName": "SVE_MOVPRFX_SPEC",
@@ -498,6 +666,12 @@
"BriefDescription": "SVE First-fault load Operations speculatively executed."
},
{
+ "PublicDescription": "SVE first-fault load operations speculatively executed which set FFR bit to 0",
+ "EventCode": "0x80BD",
+ "EventName": "SVE_LDFF_FAULT_SPEC",
+ "BriefDescription": "SVE first-fault load operations speculatively executed which set FFR bit to 0"
+ },
+ {
"PublicDescription": "Scalable floating-point element Operations speculatively executed.",
"EventCode": "0x80C0",
"EventName": "FP_SCALE_OPS_SPEC",
@@ -544,5 +718,29 @@
"EventCode": "0x80C7",
"EventName": "FP_DP_FIXED_OPS_SPEC",
"BriefDescription": "Non-scalable double-precision floating-point element Operations speculatively executed."
+ },
+ {
+ "PublicDescription": "Advanced SIMD and SVE 8-bit integer operations speculatively executed",
+ "EventCode": "0x80E3",
+ "EventName": "ASE_SVE_INT8_SPEC",
+ "BriefDescription": "Advanced SIMD and SVE 8-bit integer operations speculatively executed"
+ },
+ {
+ "PublicDescription": "Advanced SIMD and SVE 16-bit integer operations speculatively executed",
+ "EventCode": "0x80E7",
+ "EventName": "ASE_SVE_INT16_SPEC",
+ "BriefDescription": "Advanced SIMD and SVE 16-bit integer operations speculatively executed"
+ },
+ {
+ "PublicDescription": "Advanced SIMD and SVE 32-bit integer operations speculatively executed",
+ "EventCode": "0x80EB",
+ "EventName": "ASE_SVE_INT32_SPEC",
+ "BriefDescription": "Advanced SIMD and SVE 32-bit integer operations speculatively executed"
+ },
+ {
+ "PublicDescription": "Advanced SIMD and SVE 64-bit integer operations speculatively executed",
+ "EventCode": "0x80EF",
+ "EventName": "ASE_SVE_INT64_SPEC",
+ "BriefDescription": "Advanced SIMD and SVE 64-bit integer operations speculatively executed"
}
]
diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv
index 31d8b57ca9bb..b899db48c12a 100644
--- a/tools/perf/pmu-events/arch/arm64/mapfile.csv
+++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv
@@ -19,6 +19,7 @@
0x00000000410fd0b0,v1,arm/cortex-a76-n1,core
0x00000000410fd0c0,v1,arm/cortex-a76-n1,core
0x00000000410fd400,v1,arm/neoverse-v1,core
+0x00000000410fd490,v1,arm/neoverse-n2,core
0x00000000420f5160,v1,cavium/thunderx2,core
0x00000000430f0af0,v1,cavium/thunderx2,core
0x00000000460f0010,v1,fujitsu/a64fx,core
diff --git a/tools/perf/pmu-events/arch/arm64/armv8-recommended.json b/tools/perf/pmu-events/arch/arm64/recommended.json
index d0a19866563d..210afa856091 100644
--- a/tools/perf/pmu-events/arch/arm64/armv8-recommended.json
+++ b/tools/perf/pmu-events/arch/arm64/recommended.json
@@ -148,305 +148,305 @@
"EventCode": "0x60",
"EventName": "BUS_ACCESS_RD",
"BriefDescription": "Bus access read"
- },
- {
+ },
+ {
"PublicDescription": "Bus access write",
"EventCode": "0x61",
"EventName": "BUS_ACCESS_WR",
"BriefDescription": "Bus access write"
- },
- {
+ },
+ {
"PublicDescription": "Bus access, Normal, Cacheable, Shareable",
"EventCode": "0x62",
"EventName": "BUS_ACCESS_SHARED",
"BriefDescription": "Bus access, Normal, Cacheable, Shareable"
- },
- {
+ },
+ {
"PublicDescription": "Bus access, not Normal, Cacheable, Shareable",
"EventCode": "0x63",
"EventName": "BUS_ACCESS_NOT_SHARED",
"BriefDescription": "Bus access, not Normal, Cacheable, Shareable"
- },
- {
+ },
+ {
"PublicDescription": "Bus access, Normal",
"EventCode": "0x64",
"EventName": "BUS_ACCESS_NORMAL",
"BriefDescription": "Bus access, Normal"
- },
- {
+ },
+ {
"PublicDescription": "Bus access, peripheral",
"EventCode": "0x65",
"EventName": "BUS_ACCESS_PERIPH",
"BriefDescription": "Bus access, peripheral"
- },
- {
+ },
+ {
"PublicDescription": "Data memory access, read",
"EventCode": "0x66",
"EventName": "MEM_ACCESS_RD",
"BriefDescription": "Data memory access, read"
- },
- {
+ },
+ {
"PublicDescription": "Data memory access, write",
"EventCode": "0x67",
"EventName": "MEM_ACCESS_WR",
"BriefDescription": "Data memory access, write"
- },
- {
+ },
+ {
"PublicDescription": "Unaligned access, read",
"EventCode": "0x68",
"EventName": "UNALIGNED_LD_SPEC",
"BriefDescription": "Unaligned access, read"
- },
- {
+ },
+ {
"PublicDescription": "Unaligned access, write",
"EventCode": "0x69",
"EventName": "UNALIGNED_ST_SPEC",
"BriefDescription": "Unaligned access, write"
- },
- {
+ },
+ {
"PublicDescription": "Unaligned access",
"EventCode": "0x6a",
"EventName": "UNALIGNED_LDST_SPEC",
"BriefDescription": "Unaligned access"
- },
- {
+ },
+ {
"PublicDescription": "Exclusive operation speculatively executed, LDREX or LDX",
"EventCode": "0x6c",
"EventName": "LDREX_SPEC",
"BriefDescription": "Exclusive operation speculatively executed, LDREX or LDX"
- },
- {
+ },
+ {
"PublicDescription": "Exclusive operation speculatively executed, STREX or STX pass",
"EventCode": "0x6d",
"EventName": "STREX_PASS_SPEC",
"BriefDescription": "Exclusive operation speculatively executed, STREX or STX pass"
- },
- {
+ },
+ {
"PublicDescription": "Exclusive operation speculatively executed, STREX or STX fail",
"EventCode": "0x6e",
"EventName": "STREX_FAIL_SPEC",
"BriefDescription": "Exclusive operation speculatively executed, STREX or STX fail"
- },
- {
+ },
+ {
"PublicDescription": "Exclusive operation speculatively executed, STREX or STX",
"EventCode": "0x6f",
"EventName": "STREX_SPEC",
"BriefDescription": "Exclusive operation speculatively executed, STREX or STX"
- },
- {
+ },
+ {
"PublicDescription": "Operation speculatively executed, load",
"EventCode": "0x70",
"EventName": "LD_SPEC",
"BriefDescription": "Operation speculatively executed, load"
- },
- {
+ },
+ {
"PublicDescription": "Operation speculatively executed, store",
"EventCode": "0x71",
"EventName": "ST_SPEC",
"BriefDescription": "Operation speculatively executed, store"
- },
- {
+ },
+ {
"PublicDescription": "Operation speculatively executed, load or store",
"EventCode": "0x72",
"EventName": "LDST_SPEC",
"BriefDescription": "Operation speculatively executed, load or store"
- },
- {
+ },
+ {
"PublicDescription": "Operation speculatively executed, integer data processing",
"EventCode": "0x73",
"EventName": "DP_SPEC",
"BriefDescription": "Operation speculatively executed, integer data processing"
- },
- {
+ },
+ {
"PublicDescription": "Operation speculatively executed, Advanced SIMD instruction",
"EventCode": "0x74",
"EventName": "ASE_SPEC",
"BriefDescription": "Operation speculatively executed, Advanced SIMD instruction"
- },
- {
+ },
+ {
"PublicDescription": "Operation speculatively executed, floating-point instruction",
"EventCode": "0x75",
"EventName": "VFP_SPEC",
"BriefDescription": "Operation speculatively executed, floating-point instruction"
- },
- {
+ },
+ {
"PublicDescription": "Operation speculatively executed, software change of the PC",
"EventCode": "0x76",
"EventName": "PC_WRITE_SPEC",
"BriefDescription": "Operation speculatively executed, software change of the PC"
- },
- {
+ },
+ {
"PublicDescription": "Operation speculatively executed, Cryptographic instruction",
"EventCode": "0x77",
"EventName": "CRYPTO_SPEC",
"BriefDescription": "Operation speculatively executed, Cryptographic instruction"
- },
- {
+ },
+ {
"PublicDescription": "Branch speculatively executed, immediate branch",
"EventCode": "0x78",
"EventName": "BR_IMMED_SPEC",
"BriefDescription": "Branch speculatively executed, immediate branch"
- },
- {
+ },
+ {
"PublicDescription": "Branch speculatively executed, procedure return",
"EventCode": "0x79",
"EventName": "BR_RETURN_SPEC",
"BriefDescription": "Branch speculatively executed, procedure return"
- },
- {
+ },
+ {
"PublicDescription": "Branch speculatively executed, indirect branch",
"EventCode": "0x7a",
"EventName": "BR_INDIRECT_SPEC",
"BriefDescription": "Branch speculatively executed, indirect branch"
- },
- {
+ },
+ {
"PublicDescription": "Barrier speculatively executed, ISB",
"EventCode": "0x7c",
"EventName": "ISB_SPEC",
"BriefDescription": "Barrier speculatively executed, ISB"
- },
- {
+ },
+ {
"PublicDescription": "Barrier speculatively executed, DSB",
"EventCode": "0x7d",
"EventName": "DSB_SPEC",
"BriefDescription": "Barrier speculatively executed, DSB"
- },
- {
+ },
+ {
"PublicDescription": "Barrier speculatively executed, DMB",
"EventCode": "0x7e",
"EventName": "DMB_SPEC",
"BriefDescription": "Barrier speculatively executed, DMB"
- },
- {
+ },
+ {
"PublicDescription": "Exception taken, Other synchronous",
"EventCode": "0x81",
"EventName": "EXC_UNDEF",
"BriefDescription": "Exception taken, Other synchronous"
- },
- {
+ },
+ {
"PublicDescription": "Exception taken, Supervisor Call",
"EventCode": "0x82",
"EventName": "EXC_SVC",
"BriefDescription": "Exception taken, Supervisor Call"
- },
- {
+ },
+ {
"PublicDescription": "Exception taken, Instruction Abort",
"EventCode": "0x83",
"EventName": "EXC_PABORT",
"BriefDescription": "Exception taken, Instruction Abort"
- },
- {
+ },
+ {
"PublicDescription": "Exception taken, Data Abort and SError",
"EventCode": "0x84",
"EventName": "EXC_DABORT",
"BriefDescription": "Exception taken, Data Abort and SError"
- },
- {
+ },
+ {
"PublicDescription": "Exception taken, IRQ",
"EventCode": "0x86",
"EventName": "EXC_IRQ",
"BriefDescription": "Exception taken, IRQ"
- },
- {
+ },
+ {
"PublicDescription": "Exception taken, FIQ",
"EventCode": "0x87",
"EventName": "EXC_FIQ",
"BriefDescription": "Exception taken, FIQ"
- },
- {
+ },
+ {
"PublicDescription": "Exception taken, Secure Monitor Call",
"EventCode": "0x88",
"EventName": "EXC_SMC",
"BriefDescription": "Exception taken, Secure Monitor Call"
- },
- {
+ },
+ {
"PublicDescription": "Exception taken, Hypervisor Call",
"EventCode": "0x8a",
"EventName": "EXC_HVC",
"BriefDescription": "Exception taken, Hypervisor Call"
- },
- {
+ },
+ {
"PublicDescription": "Exception taken, Instruction Abort not taken locally",
"EventCode": "0x8b",
"EventName": "EXC_TRAP_PABORT",
"BriefDescription": "Exception taken, Instruction Abort not taken locally"
- },
- {
+ },
+ {
"PublicDescription": "Exception taken, Data Abort or SError not taken locally",
"EventCode": "0x8c",
"EventName": "EXC_TRAP_DABORT",
"BriefDescription": "Exception taken, Data Abort or SError not taken locally"
- },
- {
+ },
+ {
"PublicDescription": "Exception taken, Other traps not taken locally",
"EventCode": "0x8d",
"EventName": "EXC_TRAP_OTHER",
"BriefDescription": "Exception taken, Other traps not taken locally"
- },
- {
+ },
+ {
"PublicDescription": "Exception taken, IRQ not taken locally",
"EventCode": "0x8e",
"EventName": "EXC_TRAP_IRQ",
"BriefDescription": "Exception taken, IRQ not taken locally"
- },
- {
+ },
+ {
"PublicDescription": "Exception taken, FIQ not taken locally",
"EventCode": "0x8f",
"EventName": "EXC_TRAP_FIQ",
"BriefDescription": "Exception taken, FIQ not taken locally"
- },
- {
+ },
+ {
"PublicDescription": "Release consistency operation speculatively executed, Load-Acquire",
"EventCode": "0x90",
"EventName": "RC_LD_SPEC",
"BriefDescription": "Release consistency operation speculatively executed, Load-Acquire"
- },
- {
+ },
+ {
"PublicDescription": "Release consistency operation speculatively executed, Store-Release",
"EventCode": "0x91",
"EventName": "RC_ST_SPEC",
"BriefDescription": "Release consistency operation speculatively executed, Store-Release"
- },
- {
+ },
+ {
"PublicDescription": "Attributable Level 3 data or unified cache access, read",
"EventCode": "0xa0",
"EventName": "L3D_CACHE_RD",
"BriefDescription": "Attributable Level 3 data or unified cache access, read"
- },
- {
+ },
+ {
"PublicDescription": "Attributable Level 3 data or unified cache access, write",
"EventCode": "0xa1",
"EventName": "L3D_CACHE_WR",
"BriefDescription": "Attributable Level 3 data or unified cache access, write"
- },
- {
+ },
+ {
"PublicDescription": "Attributable Level 3 data or unified cache refill, read",
"EventCode": "0xa2",
"EventName": "L3D_CACHE_REFILL_RD",
"BriefDescription": "Attributable Level 3 data or unified cache refill, read"
- },
- {
+ },
+ {
"PublicDescription": "Attributable Level 3 data or unified cache refill, write",
"EventCode": "0xa3",
"EventName": "L3D_CACHE_REFILL_WR",
"BriefDescription": "Attributable Level 3 data or unified cache refill, write"
- },
- {
+ },
+ {
"PublicDescription": "Attributable Level 3 data or unified cache Write-Back, victim",
"EventCode": "0xa6",
"EventName": "L3D_CACHE_WB_VICTIM",
"BriefDescription": "Attributable Level 3 data or unified cache Write-Back, victim"
- },
- {
+ },
+ {
"PublicDescription": "Attributable Level 3 data or unified cache Write-Back, cache clean",
"EventCode": "0xa7",
"EventName": "L3D_CACHE_WB_CLEAN",
"BriefDescription": "Attributable Level 3 data or unified cache Write-Back, cache clean"
- },
- {
+ },
+ {
"PublicDescription": "Attributable Level 3 data or unified cache access, invalidate",
"EventCode": "0xa8",
"EventName": "L3D_CACHE_INVAL",
"BriefDescription": "Attributable Level 3 data or unified cache access, invalidate"
- }
+ }
]
diff --git a/tools/perf/pmu-events/arch/test/test_soc/cpu/uncore.json b/tools/perf/pmu-events/arch/test/test_soc/cpu/uncore.json
index 73089c682f80..41bac1c6a008 100644
--- a/tools/perf/pmu-events/arch/test/test_soc/cpu/uncore.json
+++ b/tools/perf/pmu-events/arch/test/test_soc/cpu/uncore.json
@@ -19,6 +19,22 @@
"EdgeDetect": "0"
},
{
+ "Unit": "CBO",
+ "EventCode": "0xE0",
+ "UMask": "0x00",
+ "EventName": "event-hyphen",
+ "BriefDescription": "UNC_CBO_HYPHEN",
+ "PublicDescription": "UNC_CBO_HYPHEN"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0xC0",
+ "UMask": "0x00",
+ "EventName": "event-two-hyph",
+ "BriefDescription": "UNC_CBO_TWO_HYPH",
+ "PublicDescription": "UNC_CBO_TWO_HYPH"
+ },
+ {
"EventCode": "0x7",
"EventName": "uncore_hisi_l3c.rd_hit_cpipe",
"BriefDescription": "Total read hits",
diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
index 2e7c4153875b..1a57c3f81dd4 100644
--- a/tools/perf/pmu-events/jevents.c
+++ b/tools/perf/pmu-events/jevents.c
@@ -672,8 +672,6 @@ static int json_events(const char *fn,
addfield(map, &je.metric_constraint, "", "", val);
} else if (json_streq(map, field, "MetricExpr")) {
addfield(map, &je.metric_expr, "", "", val);
- for (s = je.metric_expr; *s; s++)
- *s = tolower(*s);
} else if (json_streq(map, field, "ArchStdEvent")) {
addfield(map, &arch_std, "", "", val);
for (s = arch_std; *s; s++)
diff --git a/tools/perf/scripts/python/intel-pt-events.py b/tools/perf/scripts/python/intel-pt-events.py
index 1d3a189a9a54..66452a8ec358 100644
--- a/tools/perf/scripts/python/intel-pt-events.py
+++ b/tools/perf/scripts/python/intel-pt-events.py
@@ -32,8 +32,7 @@ try:
except:
broken_pipe_exception = IOError
-glb_switch_str = None
-glb_switch_printed = True
+glb_switch_str = {}
glb_insn = False
glb_disassembler = None
glb_src = False
@@ -70,6 +69,7 @@ def trace_begin():
ap = argparse.ArgumentParser(usage = "", add_help = False)
ap.add_argument("--insn-trace", action='store_true')
ap.add_argument("--src-trace", action='store_true')
+ ap.add_argument("--all-switch-events", action='store_true')
global glb_args
global glb_insn
global glb_src
@@ -256,10 +256,6 @@ def print_srccode(comm, param_dict, sample, symbol, dso, with_insn):
print(start_str, src_str)
def do_process_event(param_dict):
- global glb_switch_printed
- if not glb_switch_printed:
- print(glb_switch_str)
- glb_switch_printed = True
event_attr = param_dict["attr"]
sample = param_dict["sample"]
raw_buf = param_dict["raw_buf"]
@@ -274,6 +270,11 @@ def do_process_event(param_dict):
dso = get_optional(param_dict, "dso")
symbol = get_optional(param_dict, "symbol")
+ cpu = sample["cpu"]
+ if cpu in glb_switch_str:
+ print(glb_switch_str[cpu])
+ del glb_switch_str[cpu]
+
if name[0:12] == "instructions":
if glb_src:
print_srccode(comm, param_dict, sample, symbol, dso, True)
@@ -336,8 +337,6 @@ def auxtrace_error(typ, code, cpu, pid, tid, ip, ts, msg, cpumode, *x):
sys.exit(1)
def context_switch(ts, cpu, pid, tid, np_pid, np_tid, machine_pid, out, out_preempt, *x):
- global glb_switch_printed
- global glb_switch_str
if out:
out_str = "Switch out "
else:
@@ -350,6 +349,10 @@ def context_switch(ts, cpu, pid, tid, np_pid, np_tid, machine_pid, out, out_pree
machine_str = ""
else:
machine_str = "machine PID %d" % machine_pid
- glb_switch_str = "%16s %5d/%-5d [%03u] %9u.%09u %5d/%-5d %s %s" % \
+ switch_str = "%16s %5d/%-5d [%03u] %9u.%09u %5d/%-5d %s %s" % \
(out_str, pid, tid, cpu, ts / 1000000000, ts %1000000000, np_pid, np_tid, machine_str, preempt_str)
- glb_switch_printed = False
+ if glb_args.all_switch_events:
+ print(switch_str);
+ else:
+ global glb_switch_str
+ glb_switch_str[cpu] = switch_str
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index 803ca426f8e6..af2b37ef7c70 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -65,6 +65,7 @@ perf-y += pe-file-parsing.o
perf-y += expand-cgroup.o
perf-y += perf-time-to-tsc.o
perf-y += dlfilter-test.o
+perf-y += sigtrap.o
$(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build
$(call rule_mkdir)
diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c
index 0f73e300f207..56fba08a3037 100644
--- a/tools/perf/tests/attr.c
+++ b/tools/perf/tests/attr.c
@@ -65,7 +65,7 @@ do { \
#define WRITE_ASS(field, fmt) __WRITE_ASS(field, fmt, attr->field)
-static int store_event(struct perf_event_attr *attr, pid_t pid, int cpu,
+static int store_event(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu,
int fd, int group_fd, unsigned long flags)
{
FILE *file;
@@ -93,7 +93,7 @@ static int store_event(struct perf_event_attr *attr, pid_t pid, int cpu,
/* syscall arguments */
__WRITE_ASS(fd, "d", fd);
__WRITE_ASS(group_fd, "d", group_fd);
- __WRITE_ASS(cpu, "d", cpu);
+ __WRITE_ASS(cpu, "d", cpu.cpu);
__WRITE_ASS(pid, "d", pid);
__WRITE_ASS(flags, "lu", flags);
@@ -144,7 +144,7 @@ static int store_event(struct perf_event_attr *attr, pid_t pid, int cpu,
return 0;
}
-void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu,
+void test_attr__open(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu,
int fd, int group_fd, unsigned long flags)
{
int errno_saved = errno;
diff --git a/tools/perf/tests/bitmap.c b/tools/perf/tests/bitmap.c
index 384856347236..4965dd666956 100644
--- a/tools/perf/tests/bitmap.c
+++ b/tools/perf/tests/bitmap.c
@@ -17,8 +17,8 @@ static unsigned long *get_bitmap(const char *str, int nbits)
bm = bitmap_zalloc(nbits);
if (map && bm) {
- for (i = 0; i < map->nr; i++)
- set_bit(map->map[i], bm);
+ for (i = 0; i < perf_cpu_map__nr(map); i++)
+ set_bit(perf_cpu_map__cpu(map, i).cpu, bm);
}
if (map)
diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c
index 329f77f592f4..573490530194 100644
--- a/tools/perf/tests/bpf.c
+++ b/tools/perf/tests/bpf.c
@@ -296,9 +296,13 @@ static int check_env(void)
return err;
}
+/* temporarily disable libbpf deprecation warnings */
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
err = bpf_load_program(BPF_PROG_TYPE_KPROBE, insns,
ARRAY_SIZE(insns),
license, kver_int, NULL, 0);
+#pragma GCC diagnostic pop
if (err < 0) {
pr_err("Missing basic BPF support, skip this test: %s\n",
strerror(errno));
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 8cb5a1c3489e..fac3717d9ba1 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -107,6 +107,7 @@ static struct test_suite *generic_tests[] = {
&suite__expand_cgroup_events,
&suite__perf_time_to_tsc,
&suite__dlfilter,
+ &suite__sigtrap,
NULL,
};
@@ -420,7 +421,7 @@ static int run_shell_tests(int argc, const char *argv[], int i, int width,
continue;
st.file = ent->d_name;
- pr_info("%2d: %-*s:", i, width, test_suite.desc);
+ pr_info("%3d: %-*s:", i, width, test_suite.desc);
if (intlist__find(skiplist, i)) {
color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (user override)\n");
@@ -470,7 +471,7 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
continue;
}
- pr_info("%2d: %-*s:", i, width, test_description(t, -1));
+ pr_info("%3d: %-*s:", i, width, test_description(t, -1));
if (intlist__find(skiplist, i)) {
color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (user override)\n");
@@ -510,7 +511,7 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
curr, argc, argv))
continue;
- pr_info("%2d.%1d: %-*s:", i, subi + 1, subw,
+ pr_info("%3d.%1d: %-*s:", i, subi + 1, subw,
test_description(t, subi));
test_and_print(t, subi);
}
@@ -545,7 +546,7 @@ static int perf_test__list_shell(int argc, const char **argv, int i)
if (!perf_test__matches(t.desc, curr, argc, argv))
continue;
- pr_info("%2d: %s\n", i, t.desc);
+ pr_info("%3d: %s\n", i, t.desc);
}
@@ -567,14 +568,14 @@ static int perf_test__list(int argc, const char **argv)
if (!perf_test__matches(test_description(t, -1), curr, argc, argv))
continue;
- pr_info("%2d: %s\n", i, test_description(t, -1));
+ pr_info("%3d: %s\n", i, test_description(t, -1));
if (has_subtests(t)) {
int subn = num_subtests(t);
int subi;
for (subi = 0; subi < subn; subi++)
- pr_info("%2d:%1d: %s\n", i, subi + 1,
+ pr_info("%3d:%1d: %s\n", i, subi + 1,
test_description(t, subi));
}
}
@@ -606,6 +607,9 @@ int cmd_test(int argc, const char **argv)
if (ret < 0)
return ret;
+ /* Unbuffered output */
+ setvbuf(stdout, NULL, _IONBF, 0);
+
argc = parse_options_subcommand(argc, argv, test_options, test_subcommands, test_usage, 0);
if (argc >= 1 && !strcmp(argv[0], "list"))
return perf_test__list(argc - 1, argv + 1);
diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c
index 89a155092f85..84e87e31f119 100644
--- a/tools/perf/tests/cpumap.c
+++ b/tools/perf/tests/cpumap.c
@@ -38,7 +38,7 @@ static int process_event_mask(struct perf_tool *tool __maybe_unused,
TEST_ASSERT_VAL("wrong nr", map->nr == 20);
for (i = 0; i < 20; i++) {
- TEST_ASSERT_VAL("wrong cpu", map->map[i] == i);
+ TEST_ASSERT_VAL("wrong cpu", map->map[i].cpu == i);
}
perf_cpu_map__put(map);
@@ -67,8 +67,8 @@ static int process_event_cpus(struct perf_tool *tool __maybe_unused,
map = cpu_map__new_data(data);
TEST_ASSERT_VAL("wrong nr", map->nr == 2);
- TEST_ASSERT_VAL("wrong cpu", map->map[0] == 1);
- TEST_ASSERT_VAL("wrong cpu", map->map[1] == 256);
+ TEST_ASSERT_VAL("wrong cpu", map->map[0].cpu == 1);
+ TEST_ASSERT_VAL("wrong cpu", map->map[1].cpu == 256);
TEST_ASSERT_VAL("wrong refcnt", refcount_read(&map->refcnt) == 1);
perf_cpu_map__put(map);
return 0;
diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c
index d01532d40acb..78db4d704e76 100644
--- a/tools/perf/tests/event_update.c
+++ b/tools/perf/tests/event_update.c
@@ -75,10 +75,10 @@ static int process_event_cpus(struct perf_tool *tool __maybe_unused,
TEST_ASSERT_VAL("wrong id", ev->id == 123);
TEST_ASSERT_VAL("wrong type", ev->type == PERF_EVENT_UPDATE__CPUS);
- TEST_ASSERT_VAL("wrong cpus", map->nr == 3);
- TEST_ASSERT_VAL("wrong cpus", map->map[0] == 1);
- TEST_ASSERT_VAL("wrong cpus", map->map[1] == 2);
- TEST_ASSERT_VAL("wrong cpus", map->map[2] == 3);
+ TEST_ASSERT_VAL("wrong cpus", perf_cpu_map__nr(map) == 3);
+ TEST_ASSERT_VAL("wrong cpus", perf_cpu_map__cpu(map, 0).cpu == 1);
+ TEST_ASSERT_VAL("wrong cpus", perf_cpu_map__cpu(map, 1).cpu == 2);
+ TEST_ASSERT_VAL("wrong cpus", perf_cpu_map__cpu(map, 2).cpu == 3);
perf_cpu_map__put(map);
return 0;
}
diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c
index c895de481fe1..d54c5371c6a6 100644
--- a/tools/perf/tests/expr.c
+++ b/tools/perf/tests/expr.c
@@ -169,7 +169,9 @@ static int test__expr(struct test_suite *t __maybe_unused, int subtest __maybe_u
TEST_ASSERT_VAL("#num_dies", expr__parse(&num_dies, ctx, "#num_dies") == 0);
TEST_ASSERT_VAL("#num_cores >= #num_dies", num_cores >= num_dies);
TEST_ASSERT_VAL("#num_packages", expr__parse(&num_packages, ctx, "#num_packages") == 0);
- TEST_ASSERT_VAL("#num_dies >= #num_packages", num_dies >= num_packages);
+
+ if (num_dies) // Some platforms do not have CPU die support, for example s390
+ TEST_ASSERT_VAL("#num_dies >= #num_packages", num_dies >= num_packages);
/*
* Source count returns the number of events aggregating in a leader
diff --git a/tools/perf/tests/mem2node.c b/tools/perf/tests/mem2node.c
index b17b86391383..4c96829510c9 100644
--- a/tools/perf/tests/mem2node.c
+++ b/tools/perf/tests/mem2node.c
@@ -25,14 +25,15 @@ static unsigned long *get_bitmap(const char *str, int nbits)
{
struct perf_cpu_map *map = perf_cpu_map__new(str);
unsigned long *bm = NULL;
- int i;
bm = bitmap_zalloc(nbits);
if (map && bm) {
- for (i = 0; i < map->nr; i++) {
- set_bit(map->map[i], bm);
- }
+ struct perf_cpu cpu;
+ int i;
+
+ perf_cpu_map__for_each_cpu(cpu, i, map)
+ set_bit(cpu.cpu, bm);
}
if (map)
diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
index 90b2feda31ac..c3c17600f29c 100644
--- a/tools/perf/tests/mmap-basic.c
+++ b/tools/perf/tests/mmap-basic.c
@@ -59,11 +59,12 @@ static int test__basic_mmap(struct test_suite *test __maybe_unused, int subtest
}
CPU_ZERO(&cpu_set);
- CPU_SET(cpus->map[0], &cpu_set);
+ CPU_SET(perf_cpu_map__cpu(cpus, 0).cpu, &cpu_set);
sched_setaffinity(0, sizeof(cpu_set), &cpu_set);
if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) < 0) {
pr_debug("sched_setaffinity() failed on CPU %d: %s ",
- cpus->map[0], str_error_r(errno, sbuf, sizeof(sbuf)));
+ perf_cpu_map__cpu(cpus, 0).cpu,
+ str_error_r(errno, sbuf, sizeof(sbuf)));
goto out_free_cpus;
}
diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c
index cd3dd463783f..1ab362323d25 100644
--- a/tools/perf/tests/openat-syscall-all-cpus.c
+++ b/tools/perf/tests/openat-syscall-all-cpus.c
@@ -22,7 +22,8 @@
static int test__openat_syscall_event_on_all_cpus(struct test_suite *test __maybe_unused,
int subtest __maybe_unused)
{
- int err = -1, fd, cpu;
+ int err = -1, fd, idx;
+ struct perf_cpu cpu;
struct perf_cpu_map *cpus;
struct evsel *evsel;
unsigned int nr_openat_calls = 111, i;
@@ -58,23 +59,23 @@ static int test__openat_syscall_event_on_all_cpus(struct test_suite *test __mayb
goto out_evsel_delete;
}
- for (cpu = 0; cpu < cpus->nr; ++cpu) {
- unsigned int ncalls = nr_openat_calls + cpu;
+ perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
+ unsigned int ncalls = nr_openat_calls + idx;
/*
* XXX eventually lift this restriction in a way that
* keeps perf building on older glibc installations
* without CPU_ALLOC. 1024 cpus in 2010 still seems
* a reasonable upper limit tho :-)
*/
- if (cpus->map[cpu] >= CPU_SETSIZE) {
- pr_debug("Ignoring CPU %d\n", cpus->map[cpu]);
+ if (cpu.cpu >= CPU_SETSIZE) {
+ pr_debug("Ignoring CPU %d\n", cpu.cpu);
continue;
}
- CPU_SET(cpus->map[cpu], &cpu_set);
+ CPU_SET(cpu.cpu, &cpu_set);
if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) < 0) {
pr_debug("sched_setaffinity() failed on CPU %d: %s ",
- cpus->map[cpu],
+ cpu.cpu,
str_error_r(errno, sbuf, sizeof(sbuf)));
goto out_close_fd;
}
@@ -82,37 +83,29 @@ static int test__openat_syscall_event_on_all_cpus(struct test_suite *test __mayb
fd = openat(0, "/etc/passwd", O_RDONLY);
close(fd);
}
- CPU_CLR(cpus->map[cpu], &cpu_set);
+ CPU_CLR(cpu.cpu, &cpu_set);
}
- /*
- * Here we need to explicitly preallocate the counts, as if
- * we use the auto allocation it will allocate just for 1 cpu,
- * as we start by cpu 0.
- */
- if (evsel__alloc_counts(evsel, cpus->nr, 1) < 0) {
- pr_debug("evsel__alloc_counts(ncpus=%d)\n", cpus->nr);
- goto out_close_fd;
- }
+ evsel->core.cpus = perf_cpu_map__get(cpus);
err = 0;
- for (cpu = 0; cpu < cpus->nr; ++cpu) {
+ perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
unsigned int expected;
- if (cpus->map[cpu] >= CPU_SETSIZE)
+ if (cpu.cpu >= CPU_SETSIZE)
continue;
- if (evsel__read_on_cpu(evsel, cpu, 0) < 0) {
+ if (evsel__read_on_cpu(evsel, idx, 0) < 0) {
pr_debug("evsel__read_on_cpu\n");
err = -1;
break;
}
- expected = nr_openat_calls + cpu;
- if (perf_counts(evsel->counts, cpu, 0)->val != expected) {
+ expected = nr_openat_calls + idx;
+ if (perf_counts(evsel->counts, idx, 0)->val != expected) {
pr_debug("evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %" PRIu64 "\n",
- expected, cpus->map[cpu], perf_counts(evsel->counts, cpu, 0)->val);
+ expected, cpu.cpu, perf_counts(evsel->counts, idx, 0)->val);
err = -1;
}
}
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index a508f1dbcb2a..e71efadb24f5 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -2069,6 +2069,31 @@ static int test_event(struct evlist_test *e)
return ret;
}
+static int test_event_fake_pmu(const char *str)
+{
+ struct parse_events_error err;
+ struct evlist *evlist;
+ int ret;
+
+ evlist = evlist__new();
+ if (!evlist)
+ return -ENOMEM;
+
+ parse_events_error__init(&err);
+ perf_pmu__test_parse_init();
+ ret = __parse_events(evlist, str, &err, &perf_pmu__fake);
+ if (ret) {
+ pr_debug("failed to parse event '%s', err %d, str '%s'\n",
+ str, ret, err.str);
+ parse_events_error__print(&err, str);
+ }
+
+ parse_events_error__exit(&err);
+ evlist__delete(evlist);
+
+ return ret;
+}
+
static int test_events(struct evlist_test *events, unsigned cnt)
{
int ret1, ret2 = 0;
@@ -2276,6 +2301,26 @@ static int test_pmu_events_alias(char *event, char *alias)
return test_event(&e);
}
+static int test_pmu_events_alias2(void)
+{
+ static const char events[][30] = {
+ "event-hyphen",
+ "event-two-hyph",
+ };
+ unsigned long i;
+ int ret = 0;
+
+ for (i = 0; i < ARRAY_SIZE(events); i++) {
+ ret = test_event_fake_pmu(&events[i][0]);
+ if (ret) {
+ pr_err("check_parse_fake %s failed\n", &events[i][0]);
+ break;
+ }
+ }
+
+ return ret;
+}
+
static int test__parse_events(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
{
int ret1, ret2 = 0;
@@ -2313,6 +2358,10 @@ do { \
return ret;
}
+ ret1 = test_pmu_events_alias2();
+ if (!ret2)
+ ret2 = ret1;
+
ret1 = test_terms(test__terms, ARRAY_SIZE(test__terms));
if (!ret2)
ret2 = ret1;
diff --git a/tools/perf/tests/parse-metric.c b/tools/perf/tests/parse-metric.c
index 574b7e4efd3a..07b6f4ec024f 100644
--- a/tools/perf/tests/parse-metric.c
+++ b/tools/perf/tests/parse-metric.c
@@ -109,6 +109,7 @@ static void load_runtime_stat(struct runtime_stat *st, struct evlist *evlist,
struct evsel *evsel;
u64 count;
+ perf_stat__reset_shadow_stats();
evlist__for_each_entry(evlist, evsel) {
count = find_value(evsel->name, vals);
perf_stat__update_shadow_stats(evsel, count, 0, st);
diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c
index df1c9a3cc05b..1c695fb5a79c 100644
--- a/tools/perf/tests/pmu-events.c
+++ b/tools/perf/tests/pmu-events.c
@@ -143,6 +143,34 @@ static const struct perf_pmu_test_event unc_cbo_xsnp_response_miss_eviction = {
.matching_pmu = "uncore_cbox_0",
};
+static const struct perf_pmu_test_event uncore_hyphen = {
+ .event = {
+ .name = "event-hyphen",
+ .event = "umask=0x00,event=0xe0",
+ .desc = "Unit: uncore_cbox UNC_CBO_HYPHEN",
+ .topic = "uncore",
+ .long_desc = "UNC_CBO_HYPHEN",
+ .pmu = "uncore_cbox",
+ },
+ .alias_str = "umask=0,event=0xe0",
+ .alias_long_desc = "UNC_CBO_HYPHEN",
+ .matching_pmu = "uncore_cbox_0",
+};
+
+static const struct perf_pmu_test_event uncore_two_hyph = {
+ .event = {
+ .name = "event-two-hyph",
+ .event = "umask=0x00,event=0xc0",
+ .desc = "Unit: uncore_cbox UNC_CBO_TWO_HYPH",
+ .topic = "uncore",
+ .long_desc = "UNC_CBO_TWO_HYPH",
+ .pmu = "uncore_cbox",
+ },
+ .alias_str = "umask=0,event=0xc0",
+ .alias_long_desc = "UNC_CBO_TWO_HYPH",
+ .matching_pmu = "uncore_cbox_0",
+};
+
static const struct perf_pmu_test_event uncore_hisi_l3c_rd_hit_cpipe = {
.event = {
.name = "uncore_hisi_l3c.rd_hit_cpipe",
@@ -188,6 +216,8 @@ static const struct perf_pmu_test_event uncore_imc_cache_hits = {
static const struct perf_pmu_test_event *uncore_events[] = {
&uncore_hisi_ddrc_flux_wcmd,
&unc_cbo_xsnp_response_miss_eviction,
+ &uncore_hyphen,
+ &uncore_two_hyph,
&uncore_hisi_l3c_rd_hit_cpipe,
&uncore_imc_free_running_cache_miss,
&uncore_imc_cache_hits,
@@ -654,6 +684,8 @@ static struct perf_pmu_test_pmu test_pmus[] = {
},
.aliases = {
&unc_cbo_xsnp_response_miss_eviction,
+ &uncore_hyphen,
+ &uncore_two_hyph,
},
},
{
diff --git a/tools/perf/tests/shell/stat_all_metricgroups.sh b/tools/perf/tests/shell/stat_all_metricgroups.sh
index de24d374ce24..cb35e488809a 100755
--- a/tools/perf/tests/shell/stat_all_metricgroups.sh
+++ b/tools/perf/tests/shell/stat_all_metricgroups.sh
@@ -6,7 +6,7 @@ set -e
for m in $(perf list --raw-dump metricgroups); do
echo "Testing $m"
- perf stat -M "$m" true
+ perf stat -M "$m" -a true
done
exit 0
diff --git a/tools/perf/tests/sigtrap.c b/tools/perf/tests/sigtrap.c
new file mode 100644
index 000000000000..1f147fe6595f
--- /dev/null
+++ b/tools/perf/tests/sigtrap.c
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Basic test for sigtrap support.
+ *
+ * Copyright (C) 2021, Google LLC.
+ */
+
+#include <errno.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <linux/hw_breakpoint.h>
+#include <linux/string.h>
+#include <pthread.h>
+#include <signal.h>
+#include <sys/ioctl.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include "cloexec.h"
+#include "debug.h"
+#include "event.h"
+#include "tests.h"
+#include "../perf-sys.h"
+
+/*
+ * PowerPC and S390 do not support creation of instruction breakpoints using the
+ * perf_event interface.
+ *
+ * Just disable the test for these architectures until these issues are
+ * resolved.
+ */
+#if defined(__powerpc__) || defined(__s390x__)
+#define BP_ACCOUNT_IS_SUPPORTED 0
+#else
+#define BP_ACCOUNT_IS_SUPPORTED 1
+#endif
+
+#define NUM_THREADS 5
+
+static struct {
+ int tids_want_signal; /* Which threads still want a signal. */
+ int signal_count; /* Sanity check number of signals received. */
+ volatile int iterate_on; /* Variable to set breakpoint on. */
+ siginfo_t first_siginfo; /* First observed siginfo_t. */
+} ctx;
+
+#define TEST_SIG_DATA (~(unsigned long)(&ctx.iterate_on))
+
+static struct perf_event_attr make_event_attr(void)
+{
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_BREAKPOINT,
+ .size = sizeof(attr),
+ .sample_period = 1,
+ .disabled = 1,
+ .bp_addr = (unsigned long)&ctx.iterate_on,
+ .bp_type = HW_BREAKPOINT_RW,
+ .bp_len = HW_BREAKPOINT_LEN_1,
+ .inherit = 1, /* Children inherit events ... */
+ .inherit_thread = 1, /* ... but only cloned with CLONE_THREAD. */
+ .remove_on_exec = 1, /* Required by sigtrap. */
+ .sigtrap = 1, /* Request synchronous SIGTRAP on event. */
+ .sig_data = TEST_SIG_DATA,
+ .exclude_kernel = 1, /* To allow */
+ .exclude_hv = 1, /* running as !root */
+ };
+ return attr;
+}
+
+static void
+sigtrap_handler(int signum __maybe_unused, siginfo_t *info, void *ucontext __maybe_unused)
+{
+ if (!__atomic_fetch_add(&ctx.signal_count, 1, __ATOMIC_RELAXED))
+ ctx.first_siginfo = *info;
+ __atomic_fetch_sub(&ctx.tids_want_signal, syscall(SYS_gettid), __ATOMIC_RELAXED);
+}
+
+static void *test_thread(void *arg)
+{
+ pthread_barrier_t *barrier = (pthread_barrier_t *)arg;
+ pid_t tid = syscall(SYS_gettid);
+ int i;
+
+ pthread_barrier_wait(barrier);
+
+ __atomic_fetch_add(&ctx.tids_want_signal, tid, __ATOMIC_RELAXED);
+ for (i = 0; i < ctx.iterate_on - 1; i++)
+ __atomic_fetch_add(&ctx.tids_want_signal, tid, __ATOMIC_RELAXED);
+
+ return NULL;
+}
+
+static int run_test_threads(pthread_t *threads, pthread_barrier_t *barrier)
+{
+ int i;
+
+ pthread_barrier_wait(barrier);
+ for (i = 0; i < NUM_THREADS; i++)
+ TEST_ASSERT_EQUAL("pthread_join() failed", pthread_join(threads[i], NULL), 0);
+
+ return TEST_OK;
+}
+
+static int run_stress_test(int fd, pthread_t *threads, pthread_barrier_t *barrier)
+{
+ int ret;
+
+ ctx.iterate_on = 3000;
+
+ TEST_ASSERT_EQUAL("misfired signal?", ctx.signal_count, 0);
+ TEST_ASSERT_EQUAL("enable failed", ioctl(fd, PERF_EVENT_IOC_ENABLE, 0), 0);
+ ret = run_test_threads(threads, barrier);
+ TEST_ASSERT_EQUAL("disable failed", ioctl(fd, PERF_EVENT_IOC_DISABLE, 0), 0);
+
+ TEST_ASSERT_EQUAL("unexpected sigtraps", ctx.signal_count, NUM_THREADS * ctx.iterate_on);
+ TEST_ASSERT_EQUAL("missing signals or incorrectly delivered", ctx.tids_want_signal, 0);
+ TEST_ASSERT_VAL("unexpected si_addr", ctx.first_siginfo.si_addr == &ctx.iterate_on);
+#if 0 /* FIXME: enable when libc's signal.h has si_perf_{type,data} */
+ TEST_ASSERT_EQUAL("unexpected si_perf_type", ctx.first_siginfo.si_perf_type,
+ PERF_TYPE_BREAKPOINT);
+ TEST_ASSERT_EQUAL("unexpected si_perf_data", ctx.first_siginfo.si_perf_data,
+ TEST_SIG_DATA);
+#endif
+
+ return ret;
+}
+
+static int test__sigtrap(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
+{
+ struct perf_event_attr attr = make_event_attr();
+ struct sigaction action = {};
+ struct sigaction oldact;
+ pthread_t threads[NUM_THREADS];
+ pthread_barrier_t barrier;
+ char sbuf[STRERR_BUFSIZE];
+ int i, fd, ret = TEST_FAIL;
+
+ if (!BP_ACCOUNT_IS_SUPPORTED) {
+ pr_debug("Test not supported on this architecture");
+ return TEST_SKIP;
+ }
+
+ pthread_barrier_init(&barrier, NULL, NUM_THREADS + 1);
+
+ action.sa_flags = SA_SIGINFO | SA_NODEFER;
+ action.sa_sigaction = sigtrap_handler;
+ sigemptyset(&action.sa_mask);
+ if (sigaction(SIGTRAP, &action, &oldact)) {
+ pr_debug("FAILED sigaction(): %s\n", str_error_r(errno, sbuf, sizeof(sbuf)));
+ goto out;
+ }
+
+ fd = sys_perf_event_open(&attr, 0, -1, -1, perf_event_open_cloexec_flag());
+ if (fd < 0) {
+ pr_debug("FAILED sys_perf_event_open(): %s\n", str_error_r(errno, sbuf, sizeof(sbuf)));
+ goto out_restore_sigaction;
+ }
+
+ for (i = 0; i < NUM_THREADS; i++) {
+ if (pthread_create(&threads[i], NULL, test_thread, &barrier)) {
+ pr_debug("FAILED pthread_create(): %s\n", str_error_r(errno, sbuf, sizeof(sbuf)));
+ goto out_close_perf_event;
+ }
+ }
+
+ ret = run_stress_test(fd, threads, &barrier);
+
+out_close_perf_event:
+ close(fd);
+out_restore_sigaction:
+ sigaction(SIGTRAP, &oldact, NULL);
+out:
+ pthread_barrier_destroy(&barrier);
+ return ret;
+}
+
+DEFINE_SUITE("Sigtrap", sigtrap);
diff --git a/tools/perf/tests/stat.c b/tools/perf/tests/stat.c
index 2eb096b5e6da..500974040fe3 100644
--- a/tools/perf/tests/stat.c
+++ b/tools/perf/tests/stat.c
@@ -87,7 +87,8 @@ static int test__synthesize_stat(struct test_suite *test __maybe_unused, int sub
count.run = 300;
TEST_ASSERT_VAL("failed to synthesize stat_config",
- !perf_event__synthesize_stat(NULL, 1, 2, 3, &count, process_stat_event, NULL));
+ !perf_event__synthesize_stat(NULL, (struct perf_cpu){.cpu = 1}, 2, 3,
+ &count, process_stat_event, NULL));
return 0;
}
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 8f65098110fc..5bbb8f6a48fc 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -146,6 +146,7 @@ DECLARE_SUITE(pe_file_parsing);
DECLARE_SUITE(expand_cgroup_events);
DECLARE_SUITE(perf_time_to_tsc);
DECLARE_SUITE(dlfilter);
+DECLARE_SUITE(sigtrap);
/*
* PowerPC and S390 do not support creation of instruction breakpoints using the
diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
index 869986139146..ee1e3dcbc0bd 100644
--- a/tools/perf/tests/topology.c
+++ b/tools/perf/tests/topology.c
@@ -112,62 +112,88 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
TEST_ASSERT_VAL("Session header CPU map not set", session->header.env.cpu);
for (i = 0; i < session->header.env.nr_cpus_avail; i++) {
- if (!cpu_map__has(map, i))
+ struct perf_cpu cpu = { .cpu = i };
+
+ if (!perf_cpu_map__has(map, cpu))
continue;
pr_debug("CPU %d, core %d, socket %d\n", i,
session->header.env.cpu[i].core_id,
session->header.env.cpu[i].socket_id);
}
+ // Test that CPU ID contains socket, die, core and CPU
+ for (i = 0; i < perf_cpu_map__nr(map); i++) {
+ id = aggr_cpu_id__cpu(perf_cpu_map__cpu(map, i), NULL);
+ TEST_ASSERT_VAL("Cpu map - CPU ID doesn't match",
+ perf_cpu_map__cpu(map, i).cpu == id.cpu.cpu);
+
+ TEST_ASSERT_VAL("Cpu map - Core ID doesn't match",
+ session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].core_id == id.core);
+ TEST_ASSERT_VAL("Cpu map - Socket ID doesn't match",
+ session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].socket_id ==
+ id.socket);
+
+ TEST_ASSERT_VAL("Cpu map - Die ID doesn't match",
+ session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].die_id == id.die);
+ TEST_ASSERT_VAL("Cpu map - Node ID is set", id.node == -1);
+ TEST_ASSERT_VAL("Cpu map - Thread is set", id.thread == -1);
+ }
+
// Test that core ID contains socket, die and core
- for (i = 0; i < map->nr; i++) {
- id = cpu_map__get_core(map, i, NULL);
+ for (i = 0; i < perf_cpu_map__nr(map); i++) {
+ id = aggr_cpu_id__core(perf_cpu_map__cpu(map, i), NULL);
TEST_ASSERT_VAL("Core map - Core ID doesn't match",
- session->header.env.cpu[map->map[i]].core_id == id.core);
+ session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].core_id == id.core);
TEST_ASSERT_VAL("Core map - Socket ID doesn't match",
- session->header.env.cpu[map->map[i]].socket_id == id.socket);
+ session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].socket_id ==
+ id.socket);
TEST_ASSERT_VAL("Core map - Die ID doesn't match",
- session->header.env.cpu[map->map[i]].die_id == id.die);
+ session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].die_id == id.die);
TEST_ASSERT_VAL("Core map - Node ID is set", id.node == -1);
TEST_ASSERT_VAL("Core map - Thread is set", id.thread == -1);
}
// Test that die ID contains socket and die
- for (i = 0; i < map->nr; i++) {
- id = cpu_map__get_die(map, i, NULL);
+ for (i = 0; i < perf_cpu_map__nr(map); i++) {
+ id = aggr_cpu_id__die(perf_cpu_map__cpu(map, i), NULL);
TEST_ASSERT_VAL("Die map - Socket ID doesn't match",
- session->header.env.cpu[map->map[i]].socket_id == id.socket);
+ session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].socket_id ==
+ id.socket);
TEST_ASSERT_VAL("Die map - Die ID doesn't match",
- session->header.env.cpu[map->map[i]].die_id == id.die);
+ session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].die_id == id.die);
TEST_ASSERT_VAL("Die map - Node ID is set", id.node == -1);
TEST_ASSERT_VAL("Die map - Core is set", id.core == -1);
+ TEST_ASSERT_VAL("Die map - CPU is set", id.cpu.cpu == -1);
TEST_ASSERT_VAL("Die map - Thread is set", id.thread == -1);
}
// Test that socket ID contains only socket
- for (i = 0; i < map->nr; i++) {
- id = cpu_map__get_socket(map, i, NULL);
+ for (i = 0; i < perf_cpu_map__nr(map); i++) {
+ id = aggr_cpu_id__socket(perf_cpu_map__cpu(map, i), NULL);
TEST_ASSERT_VAL("Socket map - Socket ID doesn't match",
- session->header.env.cpu[map->map[i]].socket_id == id.socket);
+ session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].socket_id ==
+ id.socket);
TEST_ASSERT_VAL("Socket map - Node ID is set", id.node == -1);
TEST_ASSERT_VAL("Socket map - Die ID is set", id.die == -1);
TEST_ASSERT_VAL("Socket map - Core is set", id.core == -1);
+ TEST_ASSERT_VAL("Socket map - CPU is set", id.cpu.cpu == -1);
TEST_ASSERT_VAL("Socket map - Thread is set", id.thread == -1);
}
// Test that node ID contains only node
- for (i = 0; i < map->nr; i++) {
- id = cpu_map__get_node(map, i, NULL);
+ for (i = 0; i < perf_cpu_map__nr(map); i++) {
+ id = aggr_cpu_id__node(perf_cpu_map__cpu(map, i), NULL);
TEST_ASSERT_VAL("Node map - Node ID doesn't match",
- cpu__get_node(map->map[i]) == id.node);
+ cpu__get_node(perf_cpu_map__cpu(map, i)) == id.node);
TEST_ASSERT_VAL("Node map - Socket is set", id.socket == -1);
TEST_ASSERT_VAL("Node map - Die ID is set", id.die == -1);
TEST_ASSERT_VAL("Node map - Core is set", id.core == -1);
+ TEST_ASSERT_VAL("Node map - CPU is set", id.cpu.cpu == -1);
TEST_ASSERT_VAL("Node map - Thread is set", id.thread == -1);
}
perf_session__delete(session);
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index e81c2493efdf..44ba900828f6 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -966,6 +966,7 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel,
.opts = opts,
};
int ret = -1, err;
+ int not_annotated = list_empty(&notes->src->source);
if (sym == NULL)
return -1;
@@ -973,13 +974,15 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel,
if (ms->map->dso->annotate_warned)
return -1;
- err = symbol__annotate2(ms, evsel, opts, &browser.arch);
- if (err) {
- char msg[BUFSIZ];
- ms->map->dso->annotate_warned = true;
- symbol__strerror_disassemble(ms, err, msg, sizeof(msg));
- ui__error("Couldn't annotate %s:\n%s", sym->name, msg);
- goto out_free_offsets;
+ if (not_annotated) {
+ err = symbol__annotate2(ms, evsel, opts, &browser.arch);
+ if (err) {
+ char msg[BUFSIZ];
+ ms->map->dso->annotate_warned = true;
+ symbol__strerror_disassemble(ms, err, msg, sizeof(msg));
+ ui__error("Couldn't annotate %s:\n%s", sym->name, msg);
+ goto out_free_offsets;
+ }
}
ui_helpline__push("Press ESC to exit");
@@ -994,9 +997,11 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel,
ret = annotate_browser__run(&browser, evsel, hbt);
- annotated_source__purge(notes->src);
+ if(not_annotated)
+ annotated_source__purge(notes->src);
out_free_offsets:
- zfree(&notes->offsets);
+ if(not_annotated)
+ zfree(&notes->offsets);
return ret;
}
diff --git a/tools/perf/ui/tui/setup.c b/tools/perf/ui/tui/setup.c
index e9bfe856a5de..b1be59b4e2a4 100644
--- a/tools/perf/ui/tui/setup.c
+++ b/tools/perf/ui/tui/setup.c
@@ -170,9 +170,11 @@ void ui__exit(bool wait_for_ok)
"Press any key...", 0);
SLtt_set_cursor_visibility(1);
- SLsmg_refresh();
- SLsmg_reset_smg();
+ if (!pthread_mutex_trylock(&ui__lock)) {
+ SLsmg_refresh();
+ SLsmg_reset_smg();
+ pthread_mutex_unlock(&ui__lock);
+ }
SLang_reset_tty();
-
perf_error__unregister(&perf_tui_eops);
}
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 2e5bfbb69960..2a403cefcaf2 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -1,3 +1,4 @@
+perf-y += arm64-frame-pointer-unwind-support.o
perf-y += annotate.o
perf-y += block-info.o
perf-y += block-range.o
@@ -144,6 +145,7 @@ perf-$(CONFIG_LIBBPF) += bpf-loader.o
perf-$(CONFIG_LIBBPF) += bpf_map.o
perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o
perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter_cgroup.o
+perf-$(CONFIG_PERF_BPF_SKEL) += bpf_ftrace.o
perf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o
perf-$(CONFIG_LIBELF) += symbol-elf.o
perf-$(CONFIG_LIBELF) += probe-file.o
diff --git a/tools/perf/util/affinity.c b/tools/perf/util/affinity.c
index 7b12bd7a3080..4d216c0dc425 100644
--- a/tools/perf/util/affinity.c
+++ b/tools/perf/util/affinity.c
@@ -11,7 +11,7 @@
static int get_cpu_set_size(void)
{
- int sz = cpu__max_cpu() + 8 - 1;
+ int sz = cpu__max_cpu().cpu + 8 - 1;
/*
* sched_getaffinity doesn't like masks smaller than the kernel.
* Hopefully that's big enough.
@@ -62,7 +62,7 @@ void affinity__set(struct affinity *a, int cpu)
clear_bit(cpu, a->sched_cpus);
}
-void affinity__cleanup(struct affinity *a)
+static void __affinity__cleanup(struct affinity *a)
{
int cpu_set_size = get_cpu_set_size();
@@ -71,3 +71,9 @@ void affinity__cleanup(struct affinity *a)
zfree(&a->sched_cpus);
zfree(&a->orig_cpus);
}
+
+void affinity__cleanup(struct affinity *a)
+{
+ if (a != NULL)
+ __affinity__cleanup(a);
+}
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
index 3fc528c9270c..5e390a1a79ab 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
@@ -179,6 +179,8 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
decoder->record.phys_addr = ip;
break;
case ARM_SPE_COUNTER:
+ if (idx == SPE_CNT_PKT_HDR_INDEX_TOTAL_LAT)
+ decoder->record.latency = payload;
break;
case ARM_SPE_CONTEXT:
decoder->record.context_id = payload;
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
index 46a8556a9e95..69b31084d6be 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
@@ -33,6 +33,7 @@ struct arm_spe_record {
enum arm_spe_sample_type type;
int err;
u32 op;
+ u32 latency;
u64 from_ip;
u64 to_ip;
u64 timestamp;
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index fccac06b573a..d2b64e3f588b 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -58,6 +58,8 @@ struct arm_spe {
u8 sample_branch;
u8 sample_remote_access;
u8 sample_memory;
+ u8 sample_instructions;
+ u64 instructions_sample_period;
u64 l1d_miss_id;
u64 l1d_access_id;
@@ -68,6 +70,7 @@ struct arm_spe {
u64 branch_miss_id;
u64 remote_access_id;
u64 memory_id;
+ u64 instructions_id;
u64 kernel_start;
@@ -90,6 +93,7 @@ struct arm_spe_queue {
u64 time;
u64 timestamp;
struct thread *thread;
+ u64 period_instructions;
};
static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
@@ -202,6 +206,7 @@ static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
speq->pid = -1;
speq->tid = -1;
speq->cpu = -1;
+ speq->period_instructions = 0;
/* params set */
params.get_trace = arm_spe_get_trace;
@@ -330,6 +335,7 @@ static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq,
sample.addr = record->virt_addr;
sample.phys_addr = record->phys_addr;
sample.data_src = data_src;
+ sample.weight = record->latency;
return arm_spe_deliver_synth_event(spe, speq, event, &sample);
}
@@ -347,6 +353,36 @@ static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
sample.id = spe_events_id;
sample.stream_id = spe_events_id;
sample.addr = record->to_ip;
+ sample.weight = record->latency;
+
+ return arm_spe_deliver_synth_event(spe, speq, event, &sample);
+}
+
+static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
+ u64 spe_events_id, u64 data_src)
+{
+ struct arm_spe *spe = speq->spe;
+ struct arm_spe_record *record = &speq->decoder->record;
+ union perf_event *event = speq->event_buf;
+ struct perf_sample sample = { .ip = 0, };
+
+ /*
+ * Handles perf instruction sampling period.
+ */
+ speq->period_instructions++;
+ if (speq->period_instructions < spe->instructions_sample_period)
+ return 0;
+ speq->period_instructions = 0;
+
+ arm_spe_prep_sample(spe, speq, event, &sample);
+
+ sample.id = spe_events_id;
+ sample.stream_id = spe_events_id;
+ sample.addr = record->virt_addr;
+ sample.phys_addr = record->phys_addr;
+ sample.data_src = data_src;
+ sample.period = spe->instructions_sample_period;
+ sample.weight = record->latency;
return arm_spe_deliver_synth_event(spe, speq, event, &sample);
}
@@ -480,6 +516,12 @@ static int arm_spe_sample(struct arm_spe_queue *speq)
return err;
}
+ if (spe->sample_instructions) {
+ err = arm_spe__synth_instruction_sample(speq, spe->instructions_id, data_src);
+ if (err)
+ return err;
+ }
+
return 0;
}
@@ -993,7 +1035,8 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
attr.type = PERF_TYPE_HARDWARE;
attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
- PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC;
+ PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC |
+ PERF_SAMPLE_WEIGHT;
if (spe->timeless_decoding)
attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
else
@@ -1107,7 +1150,29 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
return err;
spe->memory_id = id;
arm_spe_set_event_name(evlist, id, "memory");
+ id += 1;
+ }
+
+ if (spe->synth_opts.instructions) {
+ if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS) {
+ pr_warning("Only instruction-based sampling period is currently supported by Arm SPE.\n");
+ goto synth_instructions_out;
+ }
+ if (spe->synth_opts.period > 1)
+ pr_warning("Arm SPE has a hardware-based sample period.\n"
+ "Additional instruction events will be discarded by --itrace\n");
+
+ spe->sample_instructions = true;
+ attr.config = PERF_COUNT_HW_INSTRUCTIONS;
+ attr.sample_period = spe->synth_opts.period;
+ spe->instructions_sample_period = attr.sample_period;
+ err = arm_spe_synth_event(session, &attr, id);
+ if (err)
+ return err;
+ spe->instructions_id = id;
+ arm_spe_set_event_name(evlist, id, "instructions");
}
+synth_instructions_out:
return 0;
}
diff --git a/tools/perf/util/arm64-frame-pointer-unwind-support.c b/tools/perf/util/arm64-frame-pointer-unwind-support.c
new file mode 100644
index 000000000000..2242a885fbd7
--- /dev/null
+++ b/tools/perf/util/arm64-frame-pointer-unwind-support.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "arm64-frame-pointer-unwind-support.h"
+#include "callchain.h"
+#include "event.h"
+#include "perf_regs.h" // SMPL_REG_MASK
+#include "unwind.h"
+
+#define perf_event_arm_regs perf_event_arm64_regs
+#include "../../arch/arm64/include/uapi/asm/perf_regs.h"
+#undef perf_event_arm_regs
+
+struct entries {
+ u64 stack[2];
+ size_t length;
+};
+
+static bool get_leaf_frame_caller_enabled(struct perf_sample *sample)
+{
+ return callchain_param.record_mode == CALLCHAIN_FP && sample->user_regs.regs
+ && sample->user_regs.mask & SMPL_REG_MASK(PERF_REG_ARM64_LR);
+}
+
+static int add_entry(struct unwind_entry *entry, void *arg)
+{
+ struct entries *entries = arg;
+
+ entries->stack[entries->length++] = entry->ip;
+ return 0;
+}
+
+u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thread, int usr_idx)
+{
+ int ret;
+ struct entries entries = {};
+ struct regs_dump old_regs = sample->user_regs;
+
+ if (!get_leaf_frame_caller_enabled(sample))
+ return 0;
+
+ /*
+ * If PC and SP are not recorded, get the value of PC from the stack
+ * and set its mask. SP is not used when doing the unwinding but it
+ * still needs to be set to prevent failures.
+ */
+
+ if (!(sample->user_regs.mask & SMPL_REG_MASK(PERF_REG_ARM64_PC))) {
+ sample->user_regs.cache_mask |= SMPL_REG_MASK(PERF_REG_ARM64_PC);
+ sample->user_regs.cache_regs[PERF_REG_ARM64_PC] = sample->callchain->ips[usr_idx+1];
+ }
+
+ if (!(sample->user_regs.mask & SMPL_REG_MASK(PERF_REG_ARM64_SP))) {
+ sample->user_regs.cache_mask |= SMPL_REG_MASK(PERF_REG_ARM64_SP);
+ sample->user_regs.cache_regs[PERF_REG_ARM64_SP] = 0;
+ }
+
+ ret = unwind__get_entries(add_entry, &entries, thread, sample, 2);
+ sample->user_regs = old_regs;
+
+ if (ret || entries.length != 2)
+ return ret;
+
+ return callchain_param.order == ORDER_CALLER ? entries.stack[0] : entries.stack[1];
+}
diff --git a/tools/perf/util/arm64-frame-pointer-unwind-support.h b/tools/perf/util/arm64-frame-pointer-unwind-support.h
new file mode 100644
index 000000000000..32af9ce94398
--- /dev/null
+++ b/tools/perf/util/arm64-frame-pointer-unwind-support.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H
+#define __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H
+
+#include "event.h"
+#include "thread.h"
+
+u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thread, int user_idx);
+
+#endif /* __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H */
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index c679394b898d..825336304a37 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -123,7 +123,7 @@ int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
mm->prev = 0;
mm->idx = mp->idx;
mm->tid = mp->tid;
- mm->cpu = mp->cpu;
+ mm->cpu = mp->cpu.cpu;
if (!mp->len) {
mm->base = NULL;
@@ -174,13 +174,13 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
mp->idx = idx;
if (per_cpu) {
- mp->cpu = evlist->core.cpus->map[idx];
+ mp->cpu = perf_cpu_map__cpu(evlist->core.cpus, idx);
if (evlist->core.threads)
mp->tid = perf_thread_map__pid(evlist->core.threads, 0);
else
mp->tid = -1;
} else {
- mp->cpu = -1;
+ mp->cpu.cpu = -1;
mp->tid = perf_thread_map__pid(evlist->core.threads, idx);
}
}
@@ -292,7 +292,7 @@ static int auxtrace_queues__queue_buffer(struct auxtrace_queues *queues,
if (!queue->set) {
queue->set = true;
queue->tid = buffer->tid;
- queue->cpu = buffer->cpu;
+ queue->cpu = buffer->cpu.cpu;
}
buffer->buffer_nr = queues->next_buffer_nr++;
@@ -339,11 +339,11 @@ static int auxtrace_queues__split_buffer(struct auxtrace_queues *queues,
return 0;
}
-static bool filter_cpu(struct perf_session *session, int cpu)
+static bool filter_cpu(struct perf_session *session, struct perf_cpu cpu)
{
unsigned long *cpu_bitmap = session->itrace_synth_opts->cpu_bitmap;
- return cpu_bitmap && cpu != -1 && !test_bit(cpu, cpu_bitmap);
+ return cpu_bitmap && cpu.cpu != -1 && !test_bit(cpu.cpu, cpu_bitmap);
}
static int auxtrace_queues__add_buffer(struct auxtrace_queues *queues,
@@ -399,7 +399,7 @@ int auxtrace_queues__add_event(struct auxtrace_queues *queues,
struct auxtrace_buffer buffer = {
.pid = -1,
.tid = event->auxtrace.tid,
- .cpu = event->auxtrace.cpu,
+ .cpu = { event->auxtrace.cpu },
.data_offset = data_offset,
.offset = event->auxtrace.offset,
.reference = event->auxtrace.reference,
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index bbf0d78c6401..19910b9011f3 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -15,6 +15,7 @@
#include <linux/list.h>
#include <linux/perf_event.h>
#include <linux/types.h>
+#include <internal/cpumap.h>
#include <asm/bitsperlong.h>
#include <asm/barrier.h>
@@ -240,7 +241,7 @@ struct auxtrace_buffer {
size_t size;
pid_t pid;
pid_t tid;
- int cpu;
+ struct perf_cpu cpu;
void *data;
off_t data_offset;
void *mmap_addr;
@@ -350,7 +351,7 @@ struct auxtrace_mmap_params {
int prot;
int idx;
pid_t tid;
- int cpu;
+ struct perf_cpu cpu;
};
/**
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index fbb3c4057c30..7ecfaac7536a 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -29,6 +29,9 @@
#include <internal/xyarray.h>
+/* temporarily disable libbpf deprecation warnings */
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+
static int libbpf_perf_print(enum libbpf_print_level level __attribute__((unused)),
const char *fmt, va_list args)
{
@@ -421,7 +424,7 @@ preproc_gen_prologue(struct bpf_program *prog, int n,
size_t prologue_cnt = 0;
int i, err;
- if (IS_ERR(priv) || !priv || priv->is_tp)
+ if (IS_ERR_OR_NULL(priv) || priv->is_tp)
goto errout;
pev = &priv->pev;
@@ -570,7 +573,7 @@ static int hook_load_preprocessor(struct bpf_program *prog)
bool need_prologue = false;
int err, i;
- if (IS_ERR(priv) || !priv) {
+ if (IS_ERR_OR_NULL(priv)) {
pr_debug("Internal error when hook preprocessor\n");
return -BPF_LOADER_ERRNO__INTERNAL;
}
@@ -642,8 +645,11 @@ int bpf__probe(struct bpf_object *obj)
goto out;
priv = bpf_program__priv(prog);
- if (IS_ERR(priv) || !priv) {
- err = PTR_ERR(priv);
+ if (IS_ERR_OR_NULL(priv)) {
+ if (!priv)
+ err = -BPF_LOADER_ERRNO__INTERNAL;
+ else
+ err = PTR_ERR(priv);
goto out;
}
@@ -693,7 +699,7 @@ int bpf__unprobe(struct bpf_object *obj)
struct bpf_prog_priv *priv = bpf_program__priv(prog);
int i;
- if (IS_ERR(priv) || !priv || priv->is_tp)
+ if (IS_ERR_OR_NULL(priv) || priv->is_tp)
continue;
for (i = 0; i < priv->pev.ntevs; i++) {
@@ -751,7 +757,7 @@ int bpf__foreach_event(struct bpf_object *obj,
struct perf_probe_event *pev;
int i, fd;
- if (IS_ERR(priv) || !priv) {
+ if (IS_ERR_OR_NULL(priv)) {
pr_debug("bpf: failed to get private field\n");
return -BPF_LOADER_ERRNO__INTERNAL;
}
diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c
index c17d4a43ce06..3ce8d03cb7ec 100644
--- a/tools/perf/util/bpf_counter.c
+++ b/tools/perf/util/bpf_counter.c
@@ -265,7 +265,7 @@ static int bpf_program_profiler__read(struct evsel *evsel)
return 0;
}
-static int bpf_program_profiler__install_pe(struct evsel *evsel, int cpu,
+static int bpf_program_profiler__install_pe(struct evsel *evsel, int cpu_map_idx,
int fd)
{
struct bpf_prog_profiler_bpf *skel;
@@ -277,7 +277,7 @@ static int bpf_program_profiler__install_pe(struct evsel *evsel, int cpu,
assert(skel != NULL);
ret = bpf_map_update_elem(bpf_map__fd(skel->maps.events),
- &cpu, &fd, BPF_ANY);
+ &cpu_map_idx, &fd, BPF_ANY);
if (ret)
return ret;
}
@@ -307,6 +307,20 @@ static bool bperf_attr_map_compatible(int attr_map_fd)
(map_info.value_size == sizeof(struct perf_event_attr_map_entry));
}
+int __weak
+bpf_map_create(enum bpf_map_type map_type,
+ const char *map_name __maybe_unused,
+ __u32 key_size,
+ __u32 value_size,
+ __u32 max_entries,
+ const struct bpf_map_create_opts *opts __maybe_unused)
+{
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+ return bpf_create_map(map_type, key_size, value_size, max_entries, 0);
+#pragma GCC diagnostic pop
+}
+
static int bperf_lock_attr_map(struct target *target)
{
char path[PATH_MAX];
@@ -320,10 +334,10 @@ static int bperf_lock_attr_map(struct target *target)
}
if (access(path, F_OK)) {
- map_fd = bpf_create_map(BPF_MAP_TYPE_HASH,
+ map_fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL,
sizeof(struct perf_event_attr),
sizeof(struct perf_event_attr_map_entry),
- ATTR_MAP_SIZE, 0);
+ ATTR_MAP_SIZE, NULL);
if (map_fd < 0)
return -1;
@@ -540,7 +554,7 @@ static int bperf__load(struct evsel *evsel, struct target *target)
filter_type == BPERF_FILTER_TGID)
key = evsel->core.threads->map[i].pid;
else if (filter_type == BPERF_FILTER_CPU)
- key = evsel->core.cpus->map[i];
+ key = evsel->core.cpus->map[i].cpu;
else
break;
@@ -566,12 +580,12 @@ out:
return err;
}
-static int bperf__install_pe(struct evsel *evsel, int cpu, int fd)
+static int bperf__install_pe(struct evsel *evsel, int cpu_map_idx, int fd)
{
struct bperf_leader_bpf *skel = evsel->leader_skel;
return bpf_map_update_elem(bpf_map__fd(skel->maps.events),
- &cpu, &fd, BPF_ANY);
+ &cpu_map_idx, &fd, BPF_ANY);
}
/*
@@ -584,7 +598,7 @@ static int bperf_sync_counters(struct evsel *evsel)
num_cpu = all_cpu_map->nr;
for (i = 0; i < num_cpu; i++) {
- cpu = all_cpu_map->map[i];
+ cpu = all_cpu_map->map[i].cpu;
bperf_trigger_reading(evsel->bperf_leader_prog_fd, cpu);
}
return 0;
@@ -605,15 +619,17 @@ static int bperf__disable(struct evsel *evsel)
static int bperf__read(struct evsel *evsel)
{
struct bperf_follower_bpf *skel = evsel->follower_skel;
- __u32 num_cpu_bpf = cpu__max_cpu();
+ __u32 num_cpu_bpf = cpu__max_cpu().cpu;
struct bpf_perf_event_value values[num_cpu_bpf];
int reading_map_fd, err = 0;
- __u32 i, j, num_cpu;
+ __u32 i;
+ int j;
bperf_sync_counters(evsel);
reading_map_fd = bpf_map__fd(skel->maps.accum_readings);
for (i = 0; i < bpf_map__max_entries(skel->maps.accum_readings); i++) {
+ struct perf_cpu entry;
__u32 cpu;
err = bpf_map_lookup_elem(reading_map_fd, &i, values);
@@ -623,16 +639,15 @@ static int bperf__read(struct evsel *evsel)
case BPERF_FILTER_GLOBAL:
assert(i == 0);
- num_cpu = all_cpu_map->nr;
- for (j = 0; j < num_cpu; j++) {
- cpu = all_cpu_map->map[j];
+ perf_cpu_map__for_each_cpu(entry, j, all_cpu_map) {
+ cpu = entry.cpu;
perf_counts(evsel->counts, cpu, 0)->val = values[cpu].counter;
perf_counts(evsel->counts, cpu, 0)->ena = values[cpu].enabled;
perf_counts(evsel->counts, cpu, 0)->run = values[cpu].running;
}
break;
case BPERF_FILTER_CPU:
- cpu = evsel->core.cpus->map[i];
+ cpu = evsel->core.cpus->map[i].cpu;
perf_counts(evsel->counts, i, 0)->val = values[cpu].counter;
perf_counts(evsel->counts, i, 0)->ena = values[cpu].enabled;
perf_counts(evsel->counts, i, 0)->run = values[cpu].running;
@@ -757,11 +772,11 @@ static inline bool bpf_counter_skip(struct evsel *evsel)
evsel->follower_skel == NULL;
}
-int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd)
+int bpf_counter__install_pe(struct evsel *evsel, int cpu_map_idx, int fd)
{
if (bpf_counter_skip(evsel))
return 0;
- return evsel->bpf_counter_ops->install_pe(evsel, cpu, fd);
+ return evsel->bpf_counter_ops->install_pe(evsel, cpu_map_idx, fd);
}
int bpf_counter__load(struct evsel *evsel, struct target *target)
diff --git a/tools/perf/util/bpf_counter.h b/tools/perf/util/bpf_counter.h
index 65ebaa6694fb..4dbf26408b69 100644
--- a/tools/perf/util/bpf_counter.h
+++ b/tools/perf/util/bpf_counter.h
@@ -16,7 +16,7 @@ typedef int (*bpf_counter_evsel_op)(struct evsel *evsel);
typedef int (*bpf_counter_evsel_target_op)(struct evsel *evsel,
struct target *target);
typedef int (*bpf_counter_evsel_install_pe_op)(struct evsel *evsel,
- int cpu,
+ int cpu_map_idx,
int fd);
struct bpf_counter_ops {
@@ -40,7 +40,7 @@ int bpf_counter__enable(struct evsel *evsel);
int bpf_counter__disable(struct evsel *evsel);
int bpf_counter__read(struct evsel *evsel);
void bpf_counter__destroy(struct evsel *evsel);
-int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd);
+int bpf_counter__install_pe(struct evsel *evsel, int cpu_map_idx, int fd);
#else /* HAVE_BPF_SKEL */
diff --git a/tools/perf/util/bpf_counter_cgroup.c b/tools/perf/util/bpf_counter_cgroup.c
index cbc6c2bca488..631e34a0b66f 100644
--- a/tools/perf/util/bpf_counter_cgroup.c
+++ b/tools/perf/util/bpf_counter_cgroup.c
@@ -48,7 +48,7 @@ static int bperf_load_program(struct evlist *evlist)
struct cgroup *cgrp, *leader_cgrp;
__u32 i, cpu;
__u32 nr_cpus = evlist->core.all_cpus->nr;
- int total_cpus = cpu__max_cpu();
+ int total_cpus = cpu__max_cpu().cpu;
int map_size, map_fd;
int prog_fd, err;
@@ -125,7 +125,7 @@ static int bperf_load_program(struct evlist *evlist)
for (cpu = 0; cpu < nr_cpus; cpu++) {
int fd = FD(evsel, cpu);
__u32 idx = evsel->core.idx * total_cpus +
- evlist->core.all_cpus->map[cpu];
+ evlist->core.all_cpus->map[cpu].cpu;
err = bpf_map_update_elem(map_fd, &idx, &fd,
BPF_ANY);
@@ -212,7 +212,7 @@ static int bperf_cgrp__sync_counters(struct evlist *evlist)
int prog_fd = bpf_program__fd(skel->progs.trigger_read);
for (i = 0; i < nr_cpus; i++) {
- cpu = evlist->core.all_cpus->map[i];
+ cpu = evlist->core.all_cpus->map[i].cpu;
bperf_trigger_reading(prog_fd, cpu);
}
@@ -245,7 +245,7 @@ static int bperf_cgrp__read(struct evsel *evsel)
{
struct evlist *evlist = evsel->evlist;
int i, cpu, nr_cpus = evlist->core.all_cpus->nr;
- int total_cpus = cpu__max_cpu();
+ int total_cpus = cpu__max_cpu().cpu;
struct perf_counts_values *counts;
struct bpf_perf_event_value *values;
int reading_map_fd, err = 0;
@@ -272,7 +272,7 @@ static int bperf_cgrp__read(struct evsel *evsel)
}
for (i = 0; i < nr_cpus; i++) {
- cpu = evlist->core.all_cpus->map[i];
+ cpu = evlist->core.all_cpus->map[i].cpu;
counts = perf_counts(evsel->counts, i, 0);
counts->val = values[cpu].counter;
diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c
new file mode 100644
index 000000000000..d756cc66eef3
--- /dev/null
+++ b/tools/perf/util/bpf_ftrace.c
@@ -0,0 +1,152 @@
+#include <stdio.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <linux/err.h>
+
+#include "util/ftrace.h"
+#include "util/cpumap.h"
+#include "util/thread_map.h"
+#include "util/debug.h"
+#include "util/evlist.h"
+#include "util/bpf_counter.h"
+
+#include "util/bpf_skel/func_latency.skel.h"
+
+static struct func_latency_bpf *skel;
+
+int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
+{
+ int fd, err;
+ int i, ncpus = 1, ntasks = 1;
+ struct filter_entry *func;
+
+ if (!list_is_singular(&ftrace->filters)) {
+ pr_err("ERROR: %s target function(s).\n",
+ list_empty(&ftrace->filters) ? "No" : "Too many");
+ return -1;
+ }
+
+ func = list_first_entry(&ftrace->filters, struct filter_entry, list);
+
+ skel = func_latency_bpf__open();
+ if (!skel) {
+ pr_err("Failed to open func latency skeleton\n");
+ return -1;
+ }
+
+ /* don't need to set cpu filter for system-wide mode */
+ if (ftrace->target.cpu_list) {
+ ncpus = perf_cpu_map__nr(ftrace->evlist->core.cpus);
+ bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus);
+ }
+
+ if (target__has_task(&ftrace->target) || target__none(&ftrace->target)) {
+ ntasks = perf_thread_map__nr(ftrace->evlist->core.threads);
+ bpf_map__set_max_entries(skel->maps.task_filter, ntasks);
+ }
+
+ set_max_rlimit();
+
+ err = func_latency_bpf__load(skel);
+ if (err) {
+ pr_err("Failed to load func latency skeleton\n");
+ goto out;
+ }
+
+ if (ftrace->target.cpu_list) {
+ u32 cpu;
+ u8 val = 1;
+
+ skel->bss->has_cpu = 1;
+ fd = bpf_map__fd(skel->maps.cpu_filter);
+
+ for (i = 0; i < ncpus; i++) {
+ cpu = perf_cpu_map__cpu(ftrace->evlist->core.cpus, i).cpu;
+ bpf_map_update_elem(fd, &cpu, &val, BPF_ANY);
+ }
+ }
+
+ if (target__has_task(&ftrace->target) || target__none(&ftrace->target)) {
+ u32 pid;
+ u8 val = 1;
+
+ skel->bss->has_task = 1;
+ fd = bpf_map__fd(skel->maps.task_filter);
+
+ for (i = 0; i < ntasks; i++) {
+ pid = perf_thread_map__pid(ftrace->evlist->core.threads, i);
+ bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
+ }
+ }
+
+ skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin,
+ false, func->name);
+ if (IS_ERR(skel->links.func_begin)) {
+ pr_err("Failed to attach fentry program\n");
+ err = PTR_ERR(skel->links.func_begin);
+ goto out;
+ }
+
+ skel->links.func_end = bpf_program__attach_kprobe(skel->progs.func_end,
+ true, func->name);
+ if (IS_ERR(skel->links.func_end)) {
+ pr_err("Failed to attach fexit program\n");
+ err = PTR_ERR(skel->links.func_end);
+ goto out;
+ }
+
+ /* XXX: we don't actually use this fd - just for poll() */
+ return open("/dev/null", O_RDONLY);
+
+out:
+ return err;
+}
+
+int perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace __maybe_unused)
+{
+ skel->bss->enabled = 1;
+ return 0;
+}
+
+int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused)
+{
+ skel->bss->enabled = 0;
+ return 0;
+}
+
+int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused,
+ int buckets[])
+{
+ int i, fd, err;
+ u32 idx;
+ u64 *hist;
+ int ncpus = cpu__max_cpu().cpu;
+
+ fd = bpf_map__fd(skel->maps.latency);
+
+ hist = calloc(ncpus, sizeof(*hist));
+ if (hist == NULL)
+ return -ENOMEM;
+
+ for (idx = 0; idx < NUM_BUCKET; idx++) {
+ err = bpf_map_lookup_elem(fd, &idx, hist);
+ if (err) {
+ buckets[idx] = 0;
+ continue;
+ }
+
+ for (i = 0; i < ncpus; i++)
+ buckets[idx] += hist[i];
+ }
+
+ free(hist);
+ return 0;
+}
+
+int perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace __maybe_unused)
+{
+ func_latency_bpf__destroy(skel);
+ return 0;
+}
diff --git a/tools/perf/util/bpf_skel/bperf.h b/tools/perf/util/bpf_skel/bperf.h
deleted file mode 100644
index 186a5551ddb9..000000000000
--- a/tools/perf/util/bpf_skel/bperf.h
+++ /dev/null
@@ -1,14 +0,0 @@
-// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
-// Copyright (c) 2021 Facebook
-
-#ifndef __BPERF_STAT_H
-#define __BPERF_STAT_H
-
-typedef struct {
- __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
- __uint(key_size, sizeof(__u32));
- __uint(value_size, sizeof(struct bpf_perf_event_value));
- __uint(max_entries, 1);
-} reading_map;
-
-#endif /* __BPERF_STAT_H */
diff --git a/tools/perf/util/bpf_skel/bperf_follower.bpf.c b/tools/perf/util/bpf_skel/bperf_follower.bpf.c
index b8fa3cb2da23..f193998530d4 100644
--- a/tools/perf/util/bpf_skel/bperf_follower.bpf.c
+++ b/tools/perf/util/bpf_skel/bperf_follower.bpf.c
@@ -1,14 +1,23 @@
// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
// Copyright (c) 2021 Facebook
-#include <linux/bpf.h>
-#include <linux/perf_event.h>
+#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
-#include "bperf.h"
#include "bperf_u.h"
-reading_map diff_readings SEC(".maps");
-reading_map accum_readings SEC(".maps");
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(struct bpf_perf_event_value));
+ __uint(max_entries, 1);
+} diff_readings SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(struct bpf_perf_event_value));
+ __uint(max_entries, 1);
+} accum_readings SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_HASH);
diff --git a/tools/perf/util/bpf_skel/bperf_leader.bpf.c b/tools/perf/util/bpf_skel/bperf_leader.bpf.c
index 4f70d1459e86..e2a2d4cd7779 100644
--- a/tools/perf/util/bpf_skel/bperf_leader.bpf.c
+++ b/tools/perf/util/bpf_skel/bperf_leader.bpf.c
@@ -1,10 +1,8 @@
// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
// Copyright (c) 2021 Facebook
-#include <linux/bpf.h>
-#include <linux/perf_event.h>
+#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
-#include "bperf.h"
struct {
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
@@ -13,8 +11,19 @@ struct {
__uint(map_flags, BPF_F_PRESERVE_ELEMS);
} events SEC(".maps");
-reading_map prev_readings SEC(".maps");
-reading_map diff_readings SEC(".maps");
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(struct bpf_perf_event_value));
+ __uint(max_entries, 1);
+} prev_readings SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(struct bpf_perf_event_value));
+ __uint(max_entries, 1);
+} diff_readings SEC(".maps");
SEC("raw_tp/sched_switch")
int BPF_PROG(on_switch)
diff --git a/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c b/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c
index ab12b4c4ece2..97037d3b3d9f 100644
--- a/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c
+++ b/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
// Copyright (c) 2020 Facebook
-#include <linux/bpf.h>
+#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c
new file mode 100644
index 000000000000..ea94187fe443
--- /dev/null
+++ b/tools/perf/util/bpf_skel/func_latency.bpf.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// Copyright (c) 2021 Google
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+// This should be in sync with "util/ftrace.h"
+#define NUM_BUCKET 22
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(__u64));
+ __uint(value_size, sizeof(__u64));
+ __uint(max_entries, 10000);
+} functime SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u8));
+ __uint(max_entries, 1);
+} cpu_filter SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u8));
+ __uint(max_entries, 1);
+} task_filter SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u64));
+ __uint(max_entries, NUM_BUCKET);
+} latency SEC(".maps");
+
+
+int enabled = 0;
+int has_cpu = 0;
+int has_task = 0;
+
+SEC("kprobe/func")
+int BPF_PROG(func_begin)
+{
+ __u64 key, now;
+
+ if (!enabled)
+ return 0;
+
+ key = bpf_get_current_pid_tgid();
+
+ if (has_cpu) {
+ __u32 cpu = bpf_get_smp_processor_id();
+ __u8 *ok;
+
+ ok = bpf_map_lookup_elem(&cpu_filter, &cpu);
+ if (!ok)
+ return 0;
+ }
+
+ if (has_task) {
+ __u32 pid = key & 0xffffffff;
+ __u8 *ok;
+
+ ok = bpf_map_lookup_elem(&task_filter, &pid);
+ if (!ok)
+ return 0;
+ }
+
+ now = bpf_ktime_get_ns();
+
+ // overwrite timestamp for nested functions
+ bpf_map_update_elem(&functime, &key, &now, BPF_ANY);
+ return 0;
+}
+
+SEC("kretprobe/func")
+int BPF_PROG(func_end)
+{
+ __u64 tid;
+ __u64 *start;
+
+ if (!enabled)
+ return 0;
+
+ tid = bpf_get_current_pid_tgid();
+
+ start = bpf_map_lookup_elem(&functime, &tid);
+ if (start) {
+ __s64 delta = bpf_ktime_get_ns() - *start;
+ __u32 key;
+ __u64 *hist;
+
+ bpf_map_delete_elem(&functime, &tid);
+
+ if (delta < 0)
+ return 0;
+
+ // calculate index using delta in usec
+ for (key = 0; key < (NUM_BUCKET - 1); key++) {
+ if (delta < ((1000UL) << key))
+ break;
+ }
+
+ hist = bpf_map_lookup_elem(&latency, &key);
+ if (!hist)
+ return 0;
+
+ *hist += 1;
+ }
+
+ return 0;
+}
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 8e2777133bd9..131207b91d15 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -1600,7 +1600,7 @@ void callchain_cursor_reset(struct callchain_cursor *cursor)
map__zput(node->ms.map);
}
-void callchain_param_setup(u64 sample_type)
+void callchain_param_setup(u64 sample_type, const char *arch)
{
if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
if ((sample_type & PERF_SAMPLE_REGS_USER) &&
@@ -1612,6 +1612,18 @@ void callchain_param_setup(u64 sample_type)
else
callchain_param.record_mode = CALLCHAIN_FP;
}
+
+ /*
+ * It's necessary to use libunwind to reliably determine the caller of
+ * a leaf function on aarch64, as otherwise we cannot know whether to
+ * start from the LR or FP.
+ *
+ * Always starting from the LR can result in duplicate or entirely
+ * erroneous entries. Always skipping the LR and starting from the FP
+ * can result in missing entries.
+ */
+ if (callchain_param.record_mode == CALLCHAIN_FP && !strcmp(arch, "arm64"))
+ dwarf_callchain_users = true;
}
static bool chain_match(struct callchain_list *base_chain,
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 5824134f983b..d95615daed73 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -280,6 +280,8 @@ static inline int arch_skip_callchain_idx(struct thread *thread __maybe_unused,
}
#endif
+void arch__add_leaf_frame_record_opts(struct record_opts *opts);
+
char *callchain_list__sym_name(struct callchain_list *cl,
char *bf, size_t bfsize, bool show_dso);
char *callchain_node__scnprintf_value(struct callchain_node *node,
@@ -298,7 +300,7 @@ int callchain_branch_counts(struct callchain_root *root,
u64 *branch_count, u64 *predicted_count,
u64 *abort_count, u64 *cycles_count);
-void callchain_param_setup(u64 sample_type);
+void callchain_param_setup(u64 sample_type, const char *arch);
bool callchain_cnode_matched(struct callchain_node *base_cnode,
struct callchain_node *pair_cnode);
diff --git a/tools/perf/util/counts.c b/tools/perf/util/counts.c
index 582f3aeaf5e4..7a447d918458 100644
--- a/tools/perf/util/counts.c
+++ b/tools/perf/util/counts.c
@@ -4,6 +4,7 @@
#include <string.h>
#include "evsel.h"
#include "counts.h"
+#include <perf/threadmap.h>
#include <linux/zalloc.h>
struct perf_counts *perf_counts__new(int ncpus, int nthreads)
@@ -55,9 +56,12 @@ void evsel__reset_counts(struct evsel *evsel)
perf_counts__reset(evsel->counts);
}
-int evsel__alloc_counts(struct evsel *evsel, int ncpus, int nthreads)
+int evsel__alloc_counts(struct evsel *evsel)
{
- evsel->counts = perf_counts__new(ncpus, nthreads);
+ struct perf_cpu_map *cpus = evsel__cpus(evsel);
+ int nthreads = perf_thread_map__nr(evsel->core.threads);
+
+ evsel->counts = perf_counts__new(perf_cpu_map__nr(cpus), nthreads);
return evsel->counts != NULL ? 0 : -ENOMEM;
}
diff --git a/tools/perf/util/counts.h b/tools/perf/util/counts.h
index 7ff36bf6d644..5de275194f2b 100644
--- a/tools/perf/util/counts.h
+++ b/tools/perf/util/counts.h
@@ -18,21 +18,21 @@ struct perf_counts {
static inline struct perf_counts_values*
-perf_counts(struct perf_counts *counts, int cpu, int thread)
+perf_counts(struct perf_counts *counts, int cpu_map_idx, int thread)
{
- return xyarray__entry(counts->values, cpu, thread);
+ return xyarray__entry(counts->values, cpu_map_idx, thread);
}
static inline bool
-perf_counts__is_loaded(struct perf_counts *counts, int cpu, int thread)
+perf_counts__is_loaded(struct perf_counts *counts, int cpu_map_idx, int thread)
{
- return *((bool *) xyarray__entry(counts->loaded, cpu, thread));
+ return *((bool *) xyarray__entry(counts->loaded, cpu_map_idx, thread));
}
static inline void
-perf_counts__set_loaded(struct perf_counts *counts, int cpu, int thread, bool loaded)
+perf_counts__set_loaded(struct perf_counts *counts, int cpu_map_idx, int thread, bool loaded)
{
- *((bool *) xyarray__entry(counts->loaded, cpu, thread)) = loaded;
+ *((bool *) xyarray__entry(counts->loaded, cpu_map_idx, thread)) = loaded;
}
struct perf_counts *perf_counts__new(int ncpus, int nthreads);
@@ -40,7 +40,7 @@ void perf_counts__delete(struct perf_counts *counts);
void perf_counts__reset(struct perf_counts *counts);
void evsel__reset_counts(struct evsel *evsel);
-int evsel__alloc_counts(struct evsel *evsel, int ncpus, int nthreads);
+int evsel__alloc_counts(struct evsel *evsel);
void evsel__free_counts(struct evsel *evsel);
#endif /* __PERF_COUNTS_H */
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 87d3eca9b872..12b2243222b0 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -13,9 +13,13 @@
#include <linux/ctype.h>
#include <linux/zalloc.h>
-static int max_cpu_num;
-static int max_present_cpu_num;
+static struct perf_cpu max_cpu_num;
+static struct perf_cpu max_present_cpu_num;
static int max_node_num;
+/**
+ * The numa node X as read from /sys/devices/system/node/nodeX indexed by the
+ * CPU number.
+ */
static int *cpunode_map;
static struct perf_cpu_map *cpu_map__from_entries(struct cpu_map_entries *cpus)
@@ -33,9 +37,9 @@ static struct perf_cpu_map *cpu_map__from_entries(struct cpu_map_entries *cpus)
* otherwise it would become 65535.
*/
if (cpus->cpu[i] == (u16) -1)
- map->map[i] = -1;
+ map->map[i].cpu = -1;
else
- map->map[i] = (int) cpus->cpu[i];
+ map->map[i].cpu = (int) cpus->cpu[i];
}
}
@@ -54,7 +58,7 @@ static struct perf_cpu_map *cpu_map__from_mask(struct perf_record_record_cpu_map
int cpu, i = 0;
for_each_set_bit(cpu, mask->mask, nbits)
- map->map[i++] = cpu;
+ map->map[i++].cpu = cpu;
}
return map;
@@ -87,7 +91,7 @@ struct perf_cpu_map *perf_cpu_map__empty_new(int nr)
cpus->nr = nr;
for (i = 0; i < nr; i++)
- cpus->map[i] = -1;
+ cpus->map[i].cpu = -1;
refcount_set(&cpus->refcnt, 1);
}
@@ -104,7 +108,7 @@ struct cpu_aggr_map *cpu_aggr_map__empty_new(int nr)
cpus->nr = nr;
for (i = 0; i < nr; i++)
- cpus->map[i] = cpu_map__empty_aggr_cpu_id();
+ cpus->map[i] = aggr_cpu_id__empty();
refcount_set(&cpus->refcnt, 1);
}
@@ -122,28 +126,21 @@ static int cpu__get_topology_int(int cpu, const char *name, int *value)
return sysfs__read_int(path, value);
}
-int cpu_map__get_socket_id(int cpu)
+int cpu__get_socket_id(struct perf_cpu cpu)
{
- int value, ret = cpu__get_topology_int(cpu, "physical_package_id", &value);
+ int value, ret = cpu__get_topology_int(cpu.cpu, "physical_package_id", &value);
return ret ?: value;
}
-struct aggr_cpu_id cpu_map__get_socket(struct perf_cpu_map *map, int idx,
- void *data __maybe_unused)
+struct aggr_cpu_id aggr_cpu_id__socket(struct perf_cpu cpu, void *data __maybe_unused)
{
- int cpu;
- struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
+ struct aggr_cpu_id id = aggr_cpu_id__empty();
- if (idx > map->nr)
- return id;
-
- cpu = map->map[idx];
-
- id.socket = cpu_map__get_socket_id(cpu);
+ id.socket = cpu__get_socket_id(cpu);
return id;
}
-static int cmp_aggr_cpu_id(const void *a_pointer, const void *b_pointer)
+static int aggr_cpu_id__cmp(const void *a_pointer, const void *b_pointer)
{
struct aggr_cpu_id *a = (struct aggr_cpu_id *)a_pointer;
struct aggr_cpu_id *b = (struct aggr_cpu_id *)b_pointer;
@@ -160,57 +157,64 @@ static int cmp_aggr_cpu_id(const void *a_pointer, const void *b_pointer)
return a->thread - b->thread;
}
-int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res,
- struct aggr_cpu_id (*f)(struct perf_cpu_map *map, int cpu, void *data),
- void *data)
+struct cpu_aggr_map *cpu_aggr_map__new(const struct perf_cpu_map *cpus,
+ aggr_cpu_id_get_t get_id,
+ void *data)
{
- int nr = cpus->nr;
- struct cpu_aggr_map *c = cpu_aggr_map__empty_new(nr);
- int cpu, s2;
- struct aggr_cpu_id s1;
+ int idx;
+ struct perf_cpu cpu;
+ struct cpu_aggr_map *c = cpu_aggr_map__empty_new(cpus->nr);
if (!c)
- return -1;
+ return NULL;
/* Reset size as it may only be partially filled */
c->nr = 0;
- for (cpu = 0; cpu < nr; cpu++) {
- s1 = f(cpus, cpu, data);
- for (s2 = 0; s2 < c->nr; s2++) {
- if (cpu_map__compare_aggr_cpu_id(s1, c->map[s2]))
+ perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
+ bool duplicate = false;
+ struct aggr_cpu_id cpu_id = get_id(cpu, data);
+
+ for (int j = 0; j < c->nr; j++) {
+ if (aggr_cpu_id__equal(&cpu_id, &c->map[j])) {
+ duplicate = true;
break;
+ }
}
- if (s2 == c->nr) {
- c->map[c->nr] = s1;
+ if (!duplicate) {
+ c->map[c->nr] = cpu_id;
c->nr++;
}
}
+ /* Trim. */
+ if (c->nr != cpus->nr) {
+ struct cpu_aggr_map *trimmed_c =
+ realloc(c,
+ sizeof(struct cpu_aggr_map) + sizeof(struct aggr_cpu_id) * c->nr);
+
+ if (trimmed_c)
+ c = trimmed_c;
+ }
/* ensure we process id in increasing order */
- qsort(c->map, c->nr, sizeof(struct aggr_cpu_id), cmp_aggr_cpu_id);
+ qsort(c->map, c->nr, sizeof(struct aggr_cpu_id), aggr_cpu_id__cmp);
+
+ return c;
- *res = c;
- return 0;
}
-int cpu_map__get_die_id(int cpu)
+int cpu__get_die_id(struct perf_cpu cpu)
{
- int value, ret = cpu__get_topology_int(cpu, "die_id", &value);
+ int value, ret = cpu__get_topology_int(cpu.cpu, "die_id", &value);
return ret ?: value;
}
-struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data)
+struct aggr_cpu_id aggr_cpu_id__die(struct perf_cpu cpu, void *data)
{
- int cpu, die;
- struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
+ struct aggr_cpu_id id;
+ int die;
- if (idx > map->nr)
- return id;
-
- cpu = map->map[idx];
-
- die = cpu_map__get_die_id(cpu);
+ die = cpu__get_die_id(cpu);
/* There is no die_id on legacy system. */
if (die == -1)
die = 0;
@@ -220,79 +224,59 @@ struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *dat
* with the socket ID and then add die to
* make a unique ID.
*/
- id = cpu_map__get_socket(map, idx, data);
- if (cpu_map__aggr_cpu_id_is_empty(id))
+ id = aggr_cpu_id__socket(cpu, data);
+ if (aggr_cpu_id__is_empty(&id))
return id;
id.die = die;
return id;
}
-int cpu_map__get_core_id(int cpu)
+int cpu__get_core_id(struct perf_cpu cpu)
{
- int value, ret = cpu__get_topology_int(cpu, "core_id", &value);
+ int value, ret = cpu__get_topology_int(cpu.cpu, "core_id", &value);
return ret ?: value;
}
-int cpu_map__get_node_id(int cpu)
-{
- return cpu__get_node(cpu);
-}
-
-struct aggr_cpu_id cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data)
+struct aggr_cpu_id aggr_cpu_id__core(struct perf_cpu cpu, void *data)
{
- int cpu;
- struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
-
- if (idx > map->nr)
- return id;
+ struct aggr_cpu_id id;
+ int core = cpu__get_core_id(cpu);
- cpu = map->map[idx];
-
- cpu = cpu_map__get_core_id(cpu);
-
- /* cpu_map__get_die returns a struct with socket and die set*/
- id = cpu_map__get_die(map, idx, data);
- if (cpu_map__aggr_cpu_id_is_empty(id))
+ /* aggr_cpu_id__die returns a struct with socket and die set. */
+ id = aggr_cpu_id__die(cpu, data);
+ if (aggr_cpu_id__is_empty(&id))
return id;
/*
* core_id is relative to socket and die, we need a global id.
* So we combine the result from cpu_map__get_die with the core id
*/
- id.core = cpu;
+ id.core = core;
return id;
+
}
-struct aggr_cpu_id cpu_map__get_node(struct perf_cpu_map *map, int idx, void *data __maybe_unused)
+struct aggr_cpu_id aggr_cpu_id__cpu(struct perf_cpu cpu, void *data)
{
- struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
+ struct aggr_cpu_id id;
- if (idx < 0 || idx >= map->nr)
+ /* aggr_cpu_id__core returns a struct with socket, die and core set. */
+ id = aggr_cpu_id__core(cpu, data);
+ if (aggr_cpu_id__is_empty(&id))
return id;
- id.node = cpu_map__get_node_id(map->map[idx]);
+ id.cpu = cpu;
return id;
-}
-int cpu_map__build_socket_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **sockp)
-{
- return cpu_map__build_map(cpus, sockp, cpu_map__get_socket, NULL);
}
-int cpu_map__build_die_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **diep)
+struct aggr_cpu_id aggr_cpu_id__node(struct perf_cpu cpu, void *data __maybe_unused)
{
- return cpu_map__build_map(cpus, diep, cpu_map__get_die, NULL);
-}
-
-int cpu_map__build_core_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **corep)
-{
- return cpu_map__build_map(cpus, corep, cpu_map__get_core, NULL);
-}
+ struct aggr_cpu_id id = aggr_cpu_id__empty();
-int cpu_map__build_node_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **numap)
-{
- return cpu_map__build_map(cpus, numap, cpu_map__get_node, NULL);
+ id.node = cpu__get_node(cpu);
+ return id;
}
/* setup simple routines to easily access node numbers given a cpu number */
@@ -335,8 +319,8 @@ static void set_max_cpu_num(void)
int ret = -1;
/* set up default */
- max_cpu_num = 4096;
- max_present_cpu_num = 4096;
+ max_cpu_num.cpu = 4096;
+ max_present_cpu_num.cpu = 4096;
mnt = sysfs__mountpoint();
if (!mnt)
@@ -349,7 +333,7 @@ static void set_max_cpu_num(void)
goto out;
}
- ret = get_max_num(path, &max_cpu_num);
+ ret = get_max_num(path, &max_cpu_num.cpu);
if (ret)
goto out;
@@ -360,11 +344,11 @@ static void set_max_cpu_num(void)
goto out;
}
- ret = get_max_num(path, &max_present_cpu_num);
+ ret = get_max_num(path, &max_present_cpu_num.cpu);
out:
if (ret)
- pr_err("Failed to read max cpus, using default of %d\n", max_cpu_num);
+ pr_err("Failed to read max cpus, using default of %d\n", max_cpu_num.cpu);
}
/* Determine highest possible node in the system for sparse allocation */
@@ -403,31 +387,31 @@ int cpu__max_node(void)
return max_node_num;
}
-int cpu__max_cpu(void)
+struct perf_cpu cpu__max_cpu(void)
{
- if (unlikely(!max_cpu_num))
+ if (unlikely(!max_cpu_num.cpu))
set_max_cpu_num();
return max_cpu_num;
}
-int cpu__max_present_cpu(void)
+struct perf_cpu cpu__max_present_cpu(void)
{
- if (unlikely(!max_present_cpu_num))
+ if (unlikely(!max_present_cpu_num.cpu))
set_max_cpu_num();
return max_present_cpu_num;
}
-int cpu__get_node(int cpu)
+int cpu__get_node(struct perf_cpu cpu)
{
if (unlikely(cpunode_map == NULL)) {
pr_debug("cpu_map not initialized\n");
return -1;
}
- return cpunode_map[cpu];
+ return cpunode_map[cpu.cpu];
}
static int init_cpunode_map(void)
@@ -437,13 +421,13 @@ static int init_cpunode_map(void)
set_max_cpu_num();
set_max_node_num();
- cpunode_map = calloc(max_cpu_num, sizeof(int));
+ cpunode_map = calloc(max_cpu_num.cpu, sizeof(int));
if (!cpunode_map) {
pr_err("%s: calloc failed\n", __func__);
return -1;
}
- for (i = 0; i < max_cpu_num; i++)
+ for (i = 0; i < max_cpu_num.cpu; i++)
cpunode_map[i] = -1;
return 0;
@@ -502,47 +486,39 @@ int cpu__setup_cpunode_map(void)
return 0;
}
-bool cpu_map__has(struct perf_cpu_map *cpus, int cpu)
-{
- return perf_cpu_map__idx(cpus, cpu) != -1;
-}
-
-int cpu_map__cpu(struct perf_cpu_map *cpus, int idx)
-{
- return cpus->map[idx];
-}
-
size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size)
{
- int i, cpu, start = -1;
+ int i, start = -1;
bool first = true;
size_t ret = 0;
#define COMMA first ? "" : ","
for (i = 0; i < map->nr + 1; i++) {
+ struct perf_cpu cpu = { .cpu = INT_MAX };
bool last = i == map->nr;
- cpu = last ? INT_MAX : map->map[i];
+ if (!last)
+ cpu = map->map[i];
if (start == -1) {
start = i;
if (last) {
ret += snprintf(buf + ret, size - ret,
"%s%d", COMMA,
- map->map[i]);
+ map->map[i].cpu);
}
- } else if (((i - start) != (cpu - map->map[start])) || last) {
+ } else if (((i - start) != (cpu.cpu - map->map[start].cpu)) || last) {
int end = i - 1;
if (start == end) {
ret += snprintf(buf + ret, size - ret,
"%s%d", COMMA,
- map->map[start]);
+ map->map[start].cpu);
} else {
ret += snprintf(buf + ret, size - ret,
"%s%d-%d", COMMA,
- map->map[start], map->map[end]);
+ map->map[start].cpu, map->map[end].cpu);
}
first = false;
start = i;
@@ -569,23 +545,23 @@ size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size)
int i, cpu;
char *ptr = buf;
unsigned char *bitmap;
- int last_cpu = cpu_map__cpu(map, map->nr - 1);
+ struct perf_cpu last_cpu = perf_cpu_map__cpu(map, map->nr - 1);
if (buf == NULL)
return 0;
- bitmap = zalloc(last_cpu / 8 + 1);
+ bitmap = zalloc(last_cpu.cpu / 8 + 1);
if (bitmap == NULL) {
buf[0] = '\0';
return 0;
}
for (i = 0; i < map->nr; i++) {
- cpu = cpu_map__cpu(map, i);
+ cpu = perf_cpu_map__cpu(map, i).cpu;
bitmap[cpu / 8] |= 1 << (cpu % 8);
}
- for (cpu = last_cpu / 4 * 4; cpu >= 0; cpu -= 4) {
+ for (cpu = last_cpu.cpu / 4 * 4; cpu >= 0; cpu -= 4) {
unsigned char bits = bitmap[cpu / 8];
if (cpu % 8)
@@ -614,32 +590,35 @@ const struct perf_cpu_map *cpu_map__online(void) /* thread unsafe */
return online;
}
-bool cpu_map__compare_aggr_cpu_id(struct aggr_cpu_id a, struct aggr_cpu_id b)
+bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b)
{
- return a.thread == b.thread &&
- a.node == b.node &&
- a.socket == b.socket &&
- a.die == b.die &&
- a.core == b.core;
+ return a->thread == b->thread &&
+ a->node == b->node &&
+ a->socket == b->socket &&
+ a->die == b->die &&
+ a->core == b->core &&
+ a->cpu.cpu == b->cpu.cpu;
}
-bool cpu_map__aggr_cpu_id_is_empty(struct aggr_cpu_id a)
+bool aggr_cpu_id__is_empty(const struct aggr_cpu_id *a)
{
- return a.thread == -1 &&
- a.node == -1 &&
- a.socket == -1 &&
- a.die == -1 &&
- a.core == -1;
+ return a->thread == -1 &&
+ a->node == -1 &&
+ a->socket == -1 &&
+ a->die == -1 &&
+ a->core == -1 &&
+ a->cpu.cpu == -1;
}
-struct aggr_cpu_id cpu_map__empty_aggr_cpu_id(void)
+struct aggr_cpu_id aggr_cpu_id__empty(void)
{
struct aggr_cpu_id ret = {
.thread = -1,
.node = -1,
.socket = -1,
.die = -1,
- .core = -1
+ .core = -1,
+ .cpu = (struct perf_cpu){ .cpu = -1 },
};
return ret;
}
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index a27eeaf086e8..703ae6d3386e 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -2,71 +2,134 @@
#ifndef __PERF_CPUMAP_H
#define __PERF_CPUMAP_H
-#include <stdio.h>
#include <stdbool.h>
+#include <stdio.h>
#include <internal/cpumap.h>
#include <perf/cpumap.h>
+/** Identify where counts are aggregated, -1 implies not to aggregate. */
struct aggr_cpu_id {
+ /** A value in the range 0 to number of threads. */
int thread;
+ /** The numa node X as read from /sys/devices/system/node/nodeX. */
int node;
+ /**
+ * The socket number as read from
+ * /sys/devices/system/cpu/cpuX/topology/physical_package_id.
+ */
int socket;
+ /** The die id as read from /sys/devices/system/cpu/cpuX/topology/die_id. */
int die;
+ /** The core id as read from /sys/devices/system/cpu/cpuX/topology/core_id. */
int core;
+ /** CPU aggregation, note there is one CPU for each SMT thread. */
+ struct perf_cpu cpu;
};
+/** A collection of aggr_cpu_id values, the "built" version is sorted and uniqued. */
struct cpu_aggr_map {
refcount_t refcnt;
+ /** Number of valid entries. */
int nr;
+ /** The entries. */
struct aggr_cpu_id map[];
};
struct perf_record_cpu_map_data;
struct perf_cpu_map *perf_cpu_map__empty_new(int nr);
-struct cpu_aggr_map *cpu_aggr_map__empty_new(int nr);
struct perf_cpu_map *cpu_map__new_data(struct perf_record_cpu_map_data *data);
size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size);
size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size);
size_t cpu_map__fprintf(struct perf_cpu_map *map, FILE *fp);
-int cpu_map__get_socket_id(int cpu);
-struct aggr_cpu_id cpu_map__get_socket(struct perf_cpu_map *map, int idx, void *data);
-int cpu_map__get_die_id(int cpu);
-struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data);
-int cpu_map__get_core_id(int cpu);
-struct aggr_cpu_id cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data);
-int cpu_map__get_node_id(int cpu);
-struct aggr_cpu_id cpu_map__get_node(struct perf_cpu_map *map, int idx, void *data);
-int cpu_map__build_socket_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **sockp);
-int cpu_map__build_die_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **diep);
-int cpu_map__build_core_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **corep);
-int cpu_map__build_node_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **nodep);
const struct perf_cpu_map *cpu_map__online(void); /* thread unsafe */
-static inline int cpu_map__socket(struct perf_cpu_map *sock, int s)
+int cpu__setup_cpunode_map(void);
+
+int cpu__max_node(void);
+struct perf_cpu cpu__max_cpu(void);
+struct perf_cpu cpu__max_present_cpu(void);
+
+/**
+ * cpu_map__is_dummy - Events associated with a pid, rather than a CPU, use a single dummy map with an entry of -1.
+ */
+static inline bool cpu_map__is_dummy(struct perf_cpu_map *cpus)
{
- if (!sock || s > sock->nr || s < 0)
- return 0;
- return sock->map[s];
+ return perf_cpu_map__nr(cpus) == 1 && perf_cpu_map__cpu(cpus, 0).cpu == -1;
}
-int cpu__setup_cpunode_map(void);
+/**
+ * cpu__get_node - Returns the numa node X as read from
+ * /sys/devices/system/node/nodeX for the given CPU.
+ */
+int cpu__get_node(struct perf_cpu cpu);
+/**
+ * cpu__get_socket_id - Returns the socket number as read from
+ * /sys/devices/system/cpu/cpuX/topology/physical_package_id for the given CPU.
+ */
+int cpu__get_socket_id(struct perf_cpu cpu);
+/**
+ * cpu__get_die_id - Returns the die id as read from
+ * /sys/devices/system/cpu/cpuX/topology/die_id for the given CPU.
+ */
+int cpu__get_die_id(struct perf_cpu cpu);
+/**
+ * cpu__get_core_id - Returns the core id as read from
+ * /sys/devices/system/cpu/cpuX/topology/core_id for the given CPU.
+ */
+int cpu__get_core_id(struct perf_cpu cpu);
-int cpu__max_node(void);
-int cpu__max_cpu(void);
-int cpu__max_present_cpu(void);
-int cpu__get_node(int cpu);
+/**
+ * cpu_aggr_map__empty_new - Create a cpu_aggr_map of size nr with every entry
+ * being empty.
+ */
+struct cpu_aggr_map *cpu_aggr_map__empty_new(int nr);
+
+typedef struct aggr_cpu_id (*aggr_cpu_id_get_t)(struct perf_cpu cpu, void *data);
+
+/**
+ * cpu_aggr_map__new - Create a cpu_aggr_map with an aggr_cpu_id for each cpu in
+ * cpus. The aggr_cpu_id is created with 'get_id' that may have a data value
+ * passed to it. The cpu_aggr_map is sorted with duplicate values removed.
+ */
+struct cpu_aggr_map *cpu_aggr_map__new(const struct perf_cpu_map *cpus,
+ aggr_cpu_id_get_t get_id,
+ void *data);
-int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res,
- struct aggr_cpu_id (*f)(struct perf_cpu_map *map, int cpu, void *data),
- void *data);
+bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b);
+bool aggr_cpu_id__is_empty(const struct aggr_cpu_id *a);
+struct aggr_cpu_id aggr_cpu_id__empty(void);
-int cpu_map__cpu(struct perf_cpu_map *cpus, int idx);
-bool cpu_map__has(struct perf_cpu_map *cpus, int cpu);
-bool cpu_map__compare_aggr_cpu_id(struct aggr_cpu_id a, struct aggr_cpu_id b);
-bool cpu_map__aggr_cpu_id_is_empty(struct aggr_cpu_id a);
-struct aggr_cpu_id cpu_map__empty_aggr_cpu_id(void);
+/**
+ * aggr_cpu_id__socket - Create an aggr_cpu_id with the socket populated with
+ * the socket for cpu. The function signature is compatible with
+ * aggr_cpu_id_get_t.
+ */
+struct aggr_cpu_id aggr_cpu_id__socket(struct perf_cpu cpu, void *data);
+/**
+ * aggr_cpu_id__die - Create an aggr_cpu_id with the die and socket populated
+ * with the die and socket for cpu. The function signature is compatible with
+ * aggr_cpu_id_get_t.
+ */
+struct aggr_cpu_id aggr_cpu_id__die(struct perf_cpu cpu, void *data);
+/**
+ * aggr_cpu_id__core - Create an aggr_cpu_id with the core, die and socket
+ * populated with the core, die and socket for cpu. The function signature is
+ * compatible with aggr_cpu_id_get_t.
+ */
+struct aggr_cpu_id aggr_cpu_id__core(struct perf_cpu cpu, void *data);
+/**
+ * aggr_cpu_id__core - Create an aggr_cpu_id with the cpu, core, die and socket
+ * populated with the cpu, core, die and socket for cpu. The function signature
+ * is compatible with aggr_cpu_id_get_t.
+ */
+struct aggr_cpu_id aggr_cpu_id__cpu(struct perf_cpu cpu, void *data);
+/**
+ * aggr_cpu_id__node - Create an aggr_cpu_id with the numa node populated for
+ * cpu. The function signature is compatible with aggr_cpu_id_get_t.
+ */
+struct aggr_cpu_id aggr_cpu_id__node(struct perf_cpu cpu, void *data);
#endif /* __PERF_CPUMAP_H */
diff --git a/tools/perf/util/cputopo.c b/tools/perf/util/cputopo.c
index 51b429c86f98..d275d843c155 100644
--- a/tools/perf/util/cputopo.c
+++ b/tools/perf/util/cputopo.c
@@ -165,7 +165,8 @@ static bool has_die_topology(void)
if (uname(&uts) < 0)
return false;
- if (strncmp(uts.machine, "x86_64", 6))
+ if (strncmp(uts.machine, "x86_64", 6) &&
+ strncmp(uts.machine, "s390x", 5))
return false;
scnprintf(filename, MAXPATHLEN, DIE_CPUS_FMT,
@@ -187,7 +188,7 @@ struct cpu_topology *cpu_topology__new(void)
struct perf_cpu_map *map;
bool has_die = has_die_topology();
- ncpus = cpu__max_present_cpu();
+ ncpus = cpu__max_present_cpu().cpu;
/* build online CPU map */
map = perf_cpu_map__new(NULL);
@@ -218,7 +219,7 @@ struct cpu_topology *cpu_topology__new(void)
tp->core_cpus_list = addr;
for (i = 0; i < nr; i++) {
- if (!cpu_map__has(map, i))
+ if (!perf_cpu_map__has(map, (struct perf_cpu){ .cpu = i }))
continue;
ret = build_cpu_topology(tp, i);
@@ -324,7 +325,7 @@ struct numa_topology *numa_topology__new(void)
if (!node_map)
goto out;
- nr = (u32) node_map->nr;
+ nr = (u32) perf_cpu_map__nr(node_map);
tp = zalloc(sizeof(*tp) + sizeof(tp->nodes[0])*nr);
if (!tp)
@@ -333,7 +334,7 @@ struct numa_topology *numa_topology__new(void)
tp->nr = nr;
for (i = 0; i < nr; i++) {
- if (load_numa_node(&tp->nodes[i], node_map->map[i])) {
+ if (load_numa_node(&tp->nodes[i], perf_cpu_map__cpu(node_map, i).cpu)) {
numa_topology__delete(tp);
tp = NULL;
break;
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index 8f7705bbc2da..9e0aee276df8 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -318,6 +318,8 @@ static int add_tracepoint_field_value(struct ctf_writer *cw,
offset = tmp_val;
len = offset >> 16;
offset &= 0xffff;
+ if (flags & TEP_FIELD_IS_RELATIVE)
+ offset += fmtf->offset + fmtf->size;
}
if (flags & TEP_FIELD_IS_ARRAY) {
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index c7a9fa0ffae9..65e6c22f38e4 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -24,16 +24,6 @@
#include "util/parse-sublevel-options.h"
#include <linux/ctype.h>
-#include <traceevent/event-parse.h>
-
-#define MAKE_LIBTRACEEVENT_VERSION(a, b, c) ((a)*255*255+(b)*255+(c))
-#ifndef LIBTRACEEVENT_VERSION
-/*
- * If LIBTRACEEVENT_VERSION wasn't computed then set to version 1.1.0 that ships
- * with the Linux kernel tools.
- */
-#define LIBTRACEEVENT_VERSION MAKE_LIBTRACEEVENT_VERSION(1, 1, 0)
-#endif
int verbose;
int debug_peo_args;
@@ -189,7 +179,7 @@ static int trace_event_printer(enum binary_printer_ops op,
break;
case BINARY_PRINT_CHAR_DATA:
printed += color_fprintf(fp, color, "%c",
- isprint(ch) ? ch : '.');
+ isprint(ch) && isascii(ch) ? ch : '.');
break;
case BINARY_PRINT_CHAR_PAD:
printed += color_fprintf(fp, color, " ");
@@ -238,15 +228,6 @@ int perf_debug_option(const char *str)
/* Allow only verbose value in range (0, 10), otherwise set 0. */
verbose = (verbose < 0) || (verbose > 10) ? 0 : verbose;
-#if MAKE_LIBTRACEEVENT_VERSION(1, 3, 0) <= LIBTRACEEVENT_VERSION
- if (verbose == 1)
- tep_set_loglevel(TEP_LOG_INFO);
- else if (verbose == 2)
- tep_set_loglevel(TEP_LOG_DEBUG);
- else if (verbose >= 3)
- tep_set_loglevel(TEP_LOG_ALL);
-#endif
-
return 0;
}
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index b9904896eb97..579e44c59914 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -285,13 +285,13 @@ out_enomem:
int perf_env__read_cpu_topology_map(struct perf_env *env)
{
- int cpu, nr_cpus;
+ int idx, nr_cpus;
if (env->cpu != NULL)
return 0;
if (env->nr_cpus_avail == 0)
- env->nr_cpus_avail = cpu__max_present_cpu();
+ env->nr_cpus_avail = cpu__max_present_cpu().cpu;
nr_cpus = env->nr_cpus_avail;
if (nr_cpus == -1)
@@ -301,10 +301,12 @@ int perf_env__read_cpu_topology_map(struct perf_env *env)
if (env->cpu == NULL)
return -ENOMEM;
- for (cpu = 0; cpu < nr_cpus; ++cpu) {
- env->cpu[cpu].core_id = cpu_map__get_core_id(cpu);
- env->cpu[cpu].socket_id = cpu_map__get_socket_id(cpu);
- env->cpu[cpu].die_id = cpu_map__get_die_id(cpu);
+ for (idx = 0; idx < nr_cpus; ++idx) {
+ struct perf_cpu cpu = { .cpu = idx };
+
+ env->cpu[idx].core_id = cpu__get_core_id(cpu);
+ env->cpu[idx].socket_id = cpu__get_socket_id(cpu);
+ env->cpu[idx].die_id = cpu__get_die_id(cpu);
}
env->nr_cpus_avail = nr_cpus;
@@ -381,7 +383,7 @@ static int perf_env__read_arch(struct perf_env *env)
static int perf_env__read_nr_cpus_avail(struct perf_env *env)
{
if (env->nr_cpus_avail == 0)
- env->nr_cpus_avail = cpu__max_present_cpu();
+ env->nr_cpus_avail = cpu__max_present_cpu().cpu;
return env->nr_cpus_avail ? 0 : -ENOENT;
}
@@ -487,7 +489,7 @@ const char *perf_env__pmu_mappings(struct perf_env *env)
return env->pmu_mappings;
}
-int perf_env__numa_node(struct perf_env *env, int cpu)
+int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu)
{
if (!env->nr_numa_map) {
struct numa_node *nn;
@@ -495,7 +497,7 @@ int perf_env__numa_node(struct perf_env *env, int cpu)
for (i = 0; i < env->nr_numa_nodes; i++) {
nn = &env->numa_nodes[i];
- nr = max(nr, perf_cpu_map__max(nn->map));
+ nr = max(nr, perf_cpu_map__max(nn->map).cpu);
}
nr++;
@@ -514,13 +516,14 @@ int perf_env__numa_node(struct perf_env *env, int cpu)
env->nr_numa_map = nr;
for (i = 0; i < env->nr_numa_nodes; i++) {
- int tmp, j;
+ struct perf_cpu tmp;
+ int j;
nn = &env->numa_nodes[i];
- perf_cpu_map__for_each_cpu(j, tmp, nn->map)
- env->numa_map[j] = i;
+ perf_cpu_map__for_each_cpu(tmp, j, nn->map)
+ env->numa_map[tmp.cpu] = i;
}
}
- return cpu >= 0 && cpu < env->nr_numa_map ? env->numa_map[cpu] : -1;
+ return cpu.cpu >= 0 && cpu.cpu < env->nr_numa_map ? env->numa_map[cpu.cpu] : -1;
}
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index 163e5ec503a2..a3541f98e1fc 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -4,6 +4,7 @@
#include <linux/types.h>
#include <linux/rbtree.h>
+#include "cpumap.h"
#include "rwsem.h"
struct perf_cpu_map;
@@ -170,5 +171,5 @@ struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env,
bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node);
struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id);
-int perf_env__numa_node(struct perf_env *env, int cpu);
+int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu);
#endif /* __PERF_ENV_H */
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 95ffed66369c..c59331eea1d9 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -44,13 +44,16 @@ struct perf_event_attr;
/* perf sample has 16 bits size limit */
#define PERF_SAMPLE_MAX_SIZE (1 << 16)
+/* number of register is bound by the number of bits in regs_dump::mask (64) */
+#define PERF_SAMPLE_REGS_CACHE_SIZE (8 * sizeof(u64))
+
struct regs_dump {
u64 abi;
u64 mask;
u64 *regs;
/* Cached values/mask filled by first register access. */
- u64 cache_regs[PERF_REGS_MAX];
+ u64 cache_regs[PERF_SAMPLE_REGS_CACHE_SIZE];
u64 cache_mask;
};
diff --git a/tools/perf/util/evlist-hybrid.c b/tools/perf/util/evlist-hybrid.c
index 7c554234b43d..7f234215147d 100644
--- a/tools/perf/util/evlist-hybrid.c
+++ b/tools/perf/util/evlist-hybrid.c
@@ -124,22 +124,23 @@ int evlist__fix_hybrid_cpus(struct evlist *evlist, const char *cpu_list)
events_nr++;
- if (matched_cpus->nr > 0 && (unmatched_cpus->nr > 0 ||
- matched_cpus->nr < cpus->nr ||
- matched_cpus->nr < pmu->cpus->nr)) {
+ if (perf_cpu_map__nr(matched_cpus) > 0 &&
+ (perf_cpu_map__nr(unmatched_cpus) > 0 ||
+ perf_cpu_map__nr(matched_cpus) < perf_cpu_map__nr(cpus) ||
+ perf_cpu_map__nr(matched_cpus) < perf_cpu_map__nr(pmu->cpus))) {
perf_cpu_map__put(evsel->core.cpus);
perf_cpu_map__put(evsel->core.own_cpus);
evsel->core.cpus = perf_cpu_map__get(matched_cpus);
evsel->core.own_cpus = perf_cpu_map__get(matched_cpus);
- if (unmatched_cpus->nr > 0) {
+ if (perf_cpu_map__nr(unmatched_cpus) > 0) {
cpu_map__snprint(matched_cpus, buf1, sizeof(buf1));
pr_warning("WARNING: use %s in '%s' for '%s', skip other cpus in list.\n",
buf1, pmu->name, evsel->name);
}
}
- if (matched_cpus->nr == 0) {
+ if (perf_cpu_map__nr(matched_cpus) == 0) {
evlist__remove(evlist, evsel);
evsel__delete(evsel);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 5f92319ce258..eaad04e1672a 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -342,36 +342,65 @@ static int evlist__nr_threads(struct evlist *evlist, struct evsel *evsel)
return perf_thread_map__nr(evlist->core.threads);
}
-void evlist__cpu_iter_start(struct evlist *evlist)
-{
- struct evsel *pos;
-
- /*
- * Reset the per evsel cpu_iter. This is needed because
- * each evsel's cpumap may have a different index space,
- * and some operations need the index to modify
- * the FD xyarray (e.g. open, close)
- */
- evlist__for_each_entry(evlist, pos)
- pos->cpu_iter = 0;
-}
+struct evlist_cpu_iterator evlist__cpu_begin(struct evlist *evlist, struct affinity *affinity)
+{
+ struct evlist_cpu_iterator itr = {
+ .container = evlist,
+ .evsel = evlist__first(evlist),
+ .cpu_map_idx = 0,
+ .evlist_cpu_map_idx = 0,
+ .evlist_cpu_map_nr = perf_cpu_map__nr(evlist->core.all_cpus),
+ .cpu = (struct perf_cpu){ .cpu = -1},
+ .affinity = affinity,
+ };
-bool evsel__cpu_iter_skip_no_inc(struct evsel *ev, int cpu)
-{
- if (ev->cpu_iter >= ev->core.cpus->nr)
- return true;
- if (cpu >= 0 && ev->core.cpus->map[ev->cpu_iter] != cpu)
- return true;
- return false;
+ if (itr.affinity) {
+ itr.cpu = perf_cpu_map__cpu(evlist->core.all_cpus, 0);
+ affinity__set(itr.affinity, itr.cpu.cpu);
+ itr.cpu_map_idx = perf_cpu_map__idx(itr.evsel->core.cpus, itr.cpu);
+ /*
+ * If this CPU isn't in the evsel's cpu map then advance through
+ * the list.
+ */
+ if (itr.cpu_map_idx == -1)
+ evlist_cpu_iterator__next(&itr);
+ }
+ return itr;
+}
+
+void evlist_cpu_iterator__next(struct evlist_cpu_iterator *evlist_cpu_itr)
+{
+ while (evlist_cpu_itr->evsel != evlist__last(evlist_cpu_itr->container)) {
+ evlist_cpu_itr->evsel = evsel__next(evlist_cpu_itr->evsel);
+ evlist_cpu_itr->cpu_map_idx =
+ perf_cpu_map__idx(evlist_cpu_itr->evsel->core.cpus,
+ evlist_cpu_itr->cpu);
+ if (evlist_cpu_itr->cpu_map_idx != -1)
+ return;
+ }
+ evlist_cpu_itr->evlist_cpu_map_idx++;
+ if (evlist_cpu_itr->evlist_cpu_map_idx < evlist_cpu_itr->evlist_cpu_map_nr) {
+ evlist_cpu_itr->evsel = evlist__first(evlist_cpu_itr->container);
+ evlist_cpu_itr->cpu =
+ perf_cpu_map__cpu(evlist_cpu_itr->container->core.all_cpus,
+ evlist_cpu_itr->evlist_cpu_map_idx);
+ if (evlist_cpu_itr->affinity)
+ affinity__set(evlist_cpu_itr->affinity, evlist_cpu_itr->cpu.cpu);
+ evlist_cpu_itr->cpu_map_idx =
+ perf_cpu_map__idx(evlist_cpu_itr->evsel->core.cpus,
+ evlist_cpu_itr->cpu);
+ /*
+ * If this CPU isn't in the evsel's cpu map then advance through
+ * the list.
+ */
+ if (evlist_cpu_itr->cpu_map_idx == -1)
+ evlist_cpu_iterator__next(evlist_cpu_itr);
+ }
}
-bool evsel__cpu_iter_skip(struct evsel *ev, int cpu)
+bool evlist_cpu_iterator__end(const struct evlist_cpu_iterator *evlist_cpu_itr)
{
- if (!evsel__cpu_iter_skip_no_inc(ev, cpu)) {
- ev->cpu_iter++;
- return false;
- }
- return true;
+ return evlist_cpu_itr->evlist_cpu_map_idx >= evlist_cpu_itr->evlist_cpu_map_nr;
}
static int evsel__strcmp(struct evsel *pos, char *evsel_name)
@@ -400,37 +429,36 @@ static int evlist__is_enabled(struct evlist *evlist)
static void __evlist__disable(struct evlist *evlist, char *evsel_name)
{
struct evsel *pos;
- struct affinity affinity;
- int cpu, i, imm = 0;
+ struct evlist_cpu_iterator evlist_cpu_itr;
+ struct affinity saved_affinity, *affinity = NULL;
bool has_imm = false;
- if (affinity__setup(&affinity) < 0)
- return;
+ // See explanation in evlist__close()
+ if (!cpu_map__is_dummy(evlist->core.cpus)) {
+ if (affinity__setup(&saved_affinity) < 0)
+ return;
+ affinity = &saved_affinity;
+ }
/* Disable 'immediate' events last */
- for (imm = 0; imm <= 1; imm++) {
- evlist__for_each_cpu(evlist, i, cpu) {
- affinity__set(&affinity, cpu);
-
- evlist__for_each_entry(evlist, pos) {
- if (evsel__strcmp(pos, evsel_name))
- continue;
- if (evsel__cpu_iter_skip(pos, cpu))
- continue;
- if (pos->disabled || !evsel__is_group_leader(pos) || !pos->core.fd)
- continue;
- if (pos->immediate)
- has_imm = true;
- if (pos->immediate != imm)
- continue;
- evsel__disable_cpu(pos, pos->cpu_iter - 1);
- }
+ for (int imm = 0; imm <= 1; imm++) {
+ evlist__for_each_cpu(evlist_cpu_itr, evlist, affinity) {
+ pos = evlist_cpu_itr.evsel;
+ if (evsel__strcmp(pos, evsel_name))
+ continue;
+ if (pos->disabled || !evsel__is_group_leader(pos) || !pos->core.fd)
+ continue;
+ if (pos->immediate)
+ has_imm = true;
+ if (pos->immediate != imm)
+ continue;
+ evsel__disable_cpu(pos, evlist_cpu_itr.cpu_map_idx);
}
if (!has_imm)
break;
}
- affinity__cleanup(&affinity);
+ affinity__cleanup(affinity);
evlist__for_each_entry(evlist, pos) {
if (evsel__strcmp(pos, evsel_name))
continue;
@@ -462,26 +490,25 @@ void evlist__disable_evsel(struct evlist *evlist, char *evsel_name)
static void __evlist__enable(struct evlist *evlist, char *evsel_name)
{
struct evsel *pos;
- struct affinity affinity;
- int cpu, i;
-
- if (affinity__setup(&affinity) < 0)
- return;
+ struct evlist_cpu_iterator evlist_cpu_itr;
+ struct affinity saved_affinity, *affinity = NULL;
- evlist__for_each_cpu(evlist, i, cpu) {
- affinity__set(&affinity, cpu);
+ // See explanation in evlist__close()
+ if (!cpu_map__is_dummy(evlist->core.cpus)) {
+ if (affinity__setup(&saved_affinity) < 0)
+ return;
+ affinity = &saved_affinity;
+ }
- evlist__for_each_entry(evlist, pos) {
- if (evsel__strcmp(pos, evsel_name))
- continue;
- if (evsel__cpu_iter_skip(pos, cpu))
- continue;
- if (!evsel__is_group_leader(pos) || !pos->core.fd)
- continue;
- evsel__enable_cpu(pos, pos->cpu_iter - 1);
- }
+ evlist__for_each_cpu(evlist_cpu_itr, evlist, affinity) {
+ pos = evlist_cpu_itr.evsel;
+ if (evsel__strcmp(pos, evsel_name))
+ continue;
+ if (!evsel__is_group_leader(pos) || !pos->core.fd)
+ continue;
+ evsel__enable_cpu(pos, evlist_cpu_itr.cpu_map_idx);
}
- affinity__cleanup(&affinity);
+ affinity__cleanup(affinity);
evlist__for_each_entry(evlist, pos) {
if (evsel__strcmp(pos, evsel_name))
continue;
@@ -800,7 +827,7 @@ perf_evlist__mmap_cb_get(struct perf_evlist *_evlist, bool overwrite, int idx)
static int
perf_evlist__mmap_cb_mmap(struct perf_mmap *_map, struct perf_mmap_param *_mp,
- int output, int cpu)
+ int output, struct perf_cpu cpu)
{
struct mmap *map = container_of(_map, struct mmap, core);
struct mmap_params *mp = container_of(_mp, struct mmap_params, core);
@@ -1264,14 +1291,14 @@ void evlist__set_selected(struct evlist *evlist, struct evsel *evsel)
void evlist__close(struct evlist *evlist)
{
struct evsel *evsel;
+ struct evlist_cpu_iterator evlist_cpu_itr;
struct affinity affinity;
- int cpu, i;
/*
* With perf record core.cpus is usually NULL.
* Use the old method to handle this for now.
*/
- if (!evlist->core.cpus) {
+ if (!evlist->core.cpus || cpu_map__is_dummy(evlist->core.cpus)) {
evlist__for_each_entry_reverse(evlist, evsel)
evsel__close(evsel);
return;
@@ -1279,15 +1306,12 @@ void evlist__close(struct evlist *evlist)
if (affinity__setup(&affinity) < 0)
return;
- evlist__for_each_cpu(evlist, i, cpu) {
- affinity__set(&affinity, cpu);
- evlist__for_each_entry_reverse(evlist, evsel) {
- if (evsel__cpu_iter_skip(evsel, cpu))
- continue;
- perf_evsel__close_cpu(&evsel->core, evsel->cpu_iter - 1);
- }
+ evlist__for_each_cpu(evlist_cpu_itr, evlist, &affinity) {
+ perf_evsel__close_cpu(&evlist_cpu_itr.evsel->core,
+ evlist_cpu_itr.cpu_map_idx);
}
+
affinity__cleanup(&affinity);
evlist__for_each_entry_reverse(evlist, evsel) {
perf_evsel__free_fd(&evsel->core);
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 97bfb8d0be4f..64cba56fbc74 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -64,6 +64,7 @@ struct evlist {
struct evsel *selected;
struct events_stats stats;
struct perf_env *env;
+ const char *hybrid_pmu_name;
void (*trace_event_sample_raw)(struct evlist *evlist,
union perf_event *event,
struct perf_sample *sample);
@@ -110,6 +111,7 @@ int __evlist__add_default_attrs(struct evlist *evlist,
__evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array))
int arch_evlist__add_default_attrs(struct evlist *evlist);
+struct evsel *arch_evlist__leader(struct list_head *list);
int evlist__add_dummy(struct evlist *evlist);
@@ -325,17 +327,53 @@ void evlist__to_front(struct evlist *evlist, struct evsel *move_evsel);
#define evlist__for_each_entry_safe(evlist, tmp, evsel) \
__evlist__for_each_entry_safe(&(evlist)->core.entries, tmp, evsel)
-#define evlist__for_each_cpu(evlist, index, cpu) \
- evlist__cpu_iter_start(evlist); \
- perf_cpu_map__for_each_cpu (cpu, index, (evlist)->core.all_cpus)
+/** Iterator state for evlist__for_each_cpu */
+struct evlist_cpu_iterator {
+ /** The list being iterated through. */
+ struct evlist *container;
+ /** The current evsel of the iterator. */
+ struct evsel *evsel;
+ /** The CPU map index corresponding to the evsel->core.cpus for the current CPU. */
+ int cpu_map_idx;
+ /**
+ * The CPU map index corresponding to evlist->core.all_cpus for the
+ * current CPU. Distinct from cpu_map_idx as the evsel's cpu map may
+ * contain fewer entries.
+ */
+ int evlist_cpu_map_idx;
+ /** The number of CPU map entries in evlist->core.all_cpus. */
+ int evlist_cpu_map_nr;
+ /** The current CPU of the iterator. */
+ struct perf_cpu cpu;
+ /** If present, used to set the affinity when switching between CPUs. */
+ struct affinity *affinity;
+};
+
+/**
+ * evlist__for_each_cpu - without affinity, iterate over the evlist. With
+ * affinity, iterate over all CPUs and then the evlist
+ * for each evsel on that CPU. When switching between
+ * CPUs the affinity is set to the CPU to avoid IPIs
+ * during syscalls.
+ * @evlist_cpu_itr: the iterator instance.
+ * @evlist: evlist instance to iterate.
+ * @affinity: NULL or used to set the affinity to the current CPU.
+ */
+#define evlist__for_each_cpu(evlist_cpu_itr, evlist, affinity) \
+ for ((evlist_cpu_itr) = evlist__cpu_begin(evlist, affinity); \
+ !evlist_cpu_iterator__end(&evlist_cpu_itr); \
+ evlist_cpu_iterator__next(&evlist_cpu_itr))
+
+/** Returns an iterator set to the first CPU/evsel of evlist. */
+struct evlist_cpu_iterator evlist__cpu_begin(struct evlist *evlist, struct affinity *affinity);
+/** Move to next element in iterator, updating CPU, evsel and the affinity. */
+void evlist_cpu_iterator__next(struct evlist_cpu_iterator *evlist_cpu_itr);
+/** Returns true when iterator is at the end of the CPUs and evlist. */
+bool evlist_cpu_iterator__end(const struct evlist_cpu_iterator *evlist_cpu_itr);
struct evsel *evlist__get_tracking_event(struct evlist *evlist);
void evlist__set_tracking_event(struct evlist *evlist, struct evsel *tracking_evsel);
-void evlist__cpu_iter_start(struct evlist *evlist);
-bool evsel__cpu_iter_skip(struct evsel *ev, int cpu);
-bool evsel__cpu_iter_skip_no_inc(struct evsel *ev, int cpu);
-
struct evsel *evlist__find_evsel_by_str(struct evlist *evlist, const char *str);
struct evsel *evlist__event2evsel(struct evlist *evlist, union perf_event *event);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index ac0127be0459..22d3267ce294 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1064,6 +1064,17 @@ void __weak arch_evsel__fixup_new_cycles(struct perf_event_attr *attr __maybe_un
{
}
+static void evsel__set_default_freq_period(struct record_opts *opts,
+ struct perf_event_attr *attr)
+{
+ if (opts->freq) {
+ attr->freq = 1;
+ attr->sample_freq = opts->freq;
+ } else {
+ attr->sample_period = opts->default_interval;
+ }
+}
+
/*
* The enable_on_exec/disabled value strategy:
*
@@ -1130,14 +1141,12 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,
* We default some events to have a default interval. But keep
* it a weak assumption overridable by the user.
*/
- if (!attr->sample_period) {
- if (opts->freq) {
- attr->freq = 1;
- attr->sample_freq = opts->freq;
- } else {
- attr->sample_period = opts->default_interval;
- }
- }
+ if ((evsel->is_libpfm_event && !attr->sample_period) ||
+ (!evsel->is_libpfm_event && (!attr->sample_period ||
+ opts->user_freq != UINT_MAX ||
+ opts->user_interval != ULLONG_MAX)))
+ evsel__set_default_freq_period(opts, attr);
+
/*
* If attr->freq was set (here or earlier), ask for period
* to be sampled.
@@ -1372,9 +1381,9 @@ int evsel__append_addr_filter(struct evsel *evsel, const char *filter)
}
/* Caller has to clear disabled after going through all CPUs. */
-int evsel__enable_cpu(struct evsel *evsel, int cpu)
+int evsel__enable_cpu(struct evsel *evsel, int cpu_map_idx)
{
- return perf_evsel__enable_cpu(&evsel->core, cpu);
+ return perf_evsel__enable_cpu(&evsel->core, cpu_map_idx);
}
int evsel__enable(struct evsel *evsel)
@@ -1387,9 +1396,9 @@ int evsel__enable(struct evsel *evsel)
}
/* Caller has to set disabled after going through all CPUs. */
-int evsel__disable_cpu(struct evsel *evsel, int cpu)
+int evsel__disable_cpu(struct evsel *evsel, int cpu_map_idx)
{
- return perf_evsel__disable_cpu(&evsel->core, cpu);
+ return perf_evsel__disable_cpu(&evsel->core, cpu_map_idx);
}
int evsel__disable(struct evsel *evsel)
@@ -1455,7 +1464,7 @@ void evsel__delete(struct evsel *evsel)
free(evsel);
}
-void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread,
+void evsel__compute_deltas(struct evsel *evsel, int cpu_map_idx, int thread,
struct perf_counts_values *count)
{
struct perf_counts_values tmp;
@@ -1463,12 +1472,12 @@ void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread,
if (!evsel->prev_raw_counts)
return;
- if (cpu == -1) {
+ if (cpu_map_idx == -1) {
tmp = evsel->prev_raw_counts->aggr;
evsel->prev_raw_counts->aggr = *count;
} else {
- tmp = *perf_counts(evsel->prev_raw_counts, cpu, thread);
- *perf_counts(evsel->prev_raw_counts, cpu, thread) = *count;
+ tmp = *perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread);
+ *perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread) = *count;
}
count->val = count->val - tmp.val;
@@ -1476,46 +1485,28 @@ void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread,
count->run = count->run - tmp.run;
}
-void perf_counts_values__scale(struct perf_counts_values *count,
- bool scale, s8 *pscaled)
+static int evsel__read_one(struct evsel *evsel, int cpu_map_idx, int thread)
{
- s8 scaled = 0;
+ struct perf_counts_values *count = perf_counts(evsel->counts, cpu_map_idx, thread);
- if (scale) {
- if (count->run == 0) {
- scaled = -1;
- count->val = 0;
- } else if (count->run < count->ena) {
- scaled = 1;
- count->val = (u64)((double) count->val * count->ena / count->run);
- }
- }
-
- if (pscaled)
- *pscaled = scaled;
-}
-
-static int evsel__read_one(struct evsel *evsel, int cpu, int thread)
-{
- struct perf_counts_values *count = perf_counts(evsel->counts, cpu, thread);
-
- return perf_evsel__read(&evsel->core, cpu, thread, count);
+ return perf_evsel__read(&evsel->core, cpu_map_idx, thread, count);
}
-static void evsel__set_count(struct evsel *counter, int cpu, int thread, u64 val, u64 ena, u64 run)
+static void evsel__set_count(struct evsel *counter, int cpu_map_idx, int thread,
+ u64 val, u64 ena, u64 run)
{
struct perf_counts_values *count;
- count = perf_counts(counter->counts, cpu, thread);
+ count = perf_counts(counter->counts, cpu_map_idx, thread);
count->val = val;
count->ena = ena;
count->run = run;
- perf_counts__set_loaded(counter->counts, cpu, thread, true);
+ perf_counts__set_loaded(counter->counts, cpu_map_idx, thread, true);
}
-static int evsel__process_group_data(struct evsel *leader, int cpu, int thread, u64 *data)
+static int evsel__process_group_data(struct evsel *leader, int cpu_map_idx, int thread, u64 *data)
{
u64 read_format = leader->core.attr.read_format;
struct sample_read_value *v;
@@ -1534,7 +1525,7 @@ static int evsel__process_group_data(struct evsel *leader, int cpu, int thread,
v = (struct sample_read_value *) data;
- evsel__set_count(leader, cpu, thread, v[0].value, ena, run);
+ evsel__set_count(leader, cpu_map_idx, thread, v[0].value, ena, run);
for (i = 1; i < nr; i++) {
struct evsel *counter;
@@ -1543,13 +1534,13 @@ static int evsel__process_group_data(struct evsel *leader, int cpu, int thread,
if (!counter)
return -EINVAL;
- evsel__set_count(counter, cpu, thread, v[i].value, ena, run);
+ evsel__set_count(counter, cpu_map_idx, thread, v[i].value, ena, run);
}
return 0;
}
-static int evsel__read_group(struct evsel *leader, int cpu, int thread)
+static int evsel__read_group(struct evsel *leader, int cpu_map_idx, int thread)
{
struct perf_stat_evsel *ps = leader->stats;
u64 read_format = leader->core.attr.read_format;
@@ -1570,67 +1561,67 @@ static int evsel__read_group(struct evsel *leader, int cpu, int thread)
ps->group_data = data;
}
- if (FD(leader, cpu, thread) < 0)
+ if (FD(leader, cpu_map_idx, thread) < 0)
return -EINVAL;
- if (readn(FD(leader, cpu, thread), data, size) <= 0)
+ if (readn(FD(leader, cpu_map_idx, thread), data, size) <= 0)
return -errno;
- return evsel__process_group_data(leader, cpu, thread, data);
+ return evsel__process_group_data(leader, cpu_map_idx, thread, data);
}
-int evsel__read_counter(struct evsel *evsel, int cpu, int thread)
+int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread)
{
u64 read_format = evsel->core.attr.read_format;
if (read_format & PERF_FORMAT_GROUP)
- return evsel__read_group(evsel, cpu, thread);
+ return evsel__read_group(evsel, cpu_map_idx, thread);
- return evsel__read_one(evsel, cpu, thread);
+ return evsel__read_one(evsel, cpu_map_idx, thread);
}
-int __evsel__read_on_cpu(struct evsel *evsel, int cpu, int thread, bool scale)
+int __evsel__read_on_cpu(struct evsel *evsel, int cpu_map_idx, int thread, bool scale)
{
struct perf_counts_values count;
size_t nv = scale ? 3 : 1;
- if (FD(evsel, cpu, thread) < 0)
+ if (FD(evsel, cpu_map_idx, thread) < 0)
return -EINVAL;
- if (evsel->counts == NULL && evsel__alloc_counts(evsel, cpu + 1, thread + 1) < 0)
+ if (evsel->counts == NULL && evsel__alloc_counts(evsel) < 0)
return -ENOMEM;
- if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) <= 0)
+ if (readn(FD(evsel, cpu_map_idx, thread), &count, nv * sizeof(u64)) <= 0)
return -errno;
- evsel__compute_deltas(evsel, cpu, thread, &count);
+ evsel__compute_deltas(evsel, cpu_map_idx, thread, &count);
perf_counts_values__scale(&count, scale, NULL);
- *perf_counts(evsel->counts, cpu, thread) = count;
+ *perf_counts(evsel->counts, cpu_map_idx, thread) = count;
return 0;
}
static int evsel__match_other_cpu(struct evsel *evsel, struct evsel *other,
- int cpu)
+ int cpu_map_idx)
{
- int cpuid;
+ struct perf_cpu cpu;
- cpuid = perf_cpu_map__cpu(evsel->core.cpus, cpu);
- return perf_cpu_map__idx(other->core.cpus, cpuid);
+ cpu = perf_cpu_map__cpu(evsel->core.cpus, cpu_map_idx);
+ return perf_cpu_map__idx(other->core.cpus, cpu);
}
-static int evsel__hybrid_group_cpu(struct evsel *evsel, int cpu)
+static int evsel__hybrid_group_cpu_map_idx(struct evsel *evsel, int cpu_map_idx)
{
struct evsel *leader = evsel__leader(evsel);
if ((evsel__is_hybrid(evsel) && !evsel__is_hybrid(leader)) ||
(!evsel__is_hybrid(evsel) && evsel__is_hybrid(leader))) {
- return evsel__match_other_cpu(evsel, leader, cpu);
+ return evsel__match_other_cpu(evsel, leader, cpu_map_idx);
}
- return cpu;
+ return cpu_map_idx;
}
-static int get_group_fd(struct evsel *evsel, int cpu, int thread)
+static int get_group_fd(struct evsel *evsel, int cpu_map_idx, int thread)
{
struct evsel *leader = evsel__leader(evsel);
int fd;
@@ -1644,11 +1635,11 @@ static int get_group_fd(struct evsel *evsel, int cpu, int thread)
*/
BUG_ON(!leader->core.fd);
- cpu = evsel__hybrid_group_cpu(evsel, cpu);
- if (cpu == -1)
+ cpu_map_idx = evsel__hybrid_group_cpu_map_idx(evsel, cpu_map_idx);
+ if (cpu_map_idx == -1)
return -1;
- fd = FD(leader, cpu, thread);
+ fd = FD(leader, cpu_map_idx, thread);
BUG_ON(fd == -1);
return fd;
@@ -1662,16 +1653,16 @@ static void evsel__remove_fd(struct evsel *pos, int nr_cpus, int nr_threads, int
}
static int update_fds(struct evsel *evsel,
- int nr_cpus, int cpu_idx,
+ int nr_cpus, int cpu_map_idx,
int nr_threads, int thread_idx)
{
struct evsel *pos;
- if (cpu_idx >= nr_cpus || thread_idx >= nr_threads)
+ if (cpu_map_idx >= nr_cpus || thread_idx >= nr_threads)
return -EINVAL;
evlist__for_each_entry(evsel->evlist, pos) {
- nr_cpus = pos != evsel ? nr_cpus : cpu_idx;
+ nr_cpus = pos != evsel ? nr_cpus : cpu_map_idx;
evsel__remove_fd(pos, nr_cpus, nr_threads, thread_idx);
@@ -1685,10 +1676,10 @@ static int update_fds(struct evsel *evsel,
return 0;
}
-bool evsel__ignore_missing_thread(struct evsel *evsel,
- int nr_cpus, int cpu,
- struct perf_thread_map *threads,
- int thread, int err)
+static bool evsel__ignore_missing_thread(struct evsel *evsel,
+ int nr_cpus, int cpu_map_idx,
+ struct perf_thread_map *threads,
+ int thread, int err)
{
pid_t ignore_pid = perf_thread_map__pid(threads, thread);
@@ -1711,7 +1702,7 @@ bool evsel__ignore_missing_thread(struct evsel *evsel,
* We should remove fd for missing_thread first
* because thread_map__remove() will decrease threads->nr.
*/
- if (update_fds(evsel, nr_cpus, cpu, threads->nr, thread))
+ if (update_fds(evsel, nr_cpus, cpu_map_idx, threads->nr, thread))
return false;
if (thread_map__remove(threads, thread))
@@ -1800,7 +1791,7 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
nthreads = threads->nr;
if (evsel->core.fd == NULL &&
- perf_evsel__alloc_fd(&evsel->core, cpus->nr, nthreads) < 0)
+ perf_evsel__alloc_fd(&evsel->core, perf_cpu_map__nr(cpus), nthreads) < 0)
return -ENOMEM;
evsel->open_flags = PERF_FLAG_FD_CLOEXEC;
@@ -1993,9 +1984,9 @@ bool evsel__increase_rlimit(enum rlimit_action *set_rlimit)
static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus,
struct perf_thread_map *threads,
- int start_cpu, int end_cpu)
+ int start_cpu_map_idx, int end_cpu_map_idx)
{
- int cpu, thread, nthreads;
+ int idx, thread, nthreads;
int pid = -1, err, old_errno;
enum rlimit_action set_rlimit = NO_CHANGE;
@@ -2022,7 +2013,7 @@ fallback_missing_features:
display_attr(&evsel->core.attr);
- for (cpu = start_cpu; cpu < end_cpu; cpu++) {
+ for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) {
for (thread = 0; thread < nthreads; thread++) {
int fd, group_fd;
@@ -2033,17 +2024,18 @@ retry_open:
if (!evsel->cgrp && !evsel->core.system_wide)
pid = perf_thread_map__pid(threads, thread);
- group_fd = get_group_fd(evsel, cpu, thread);
+ group_fd = get_group_fd(evsel, idx, thread);
test_attr__ready();
pr_debug2_peo("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx",
- pid, cpus->map[cpu], group_fd, evsel->open_flags);
+ pid, perf_cpu_map__cpu(cpus, idx).cpu, group_fd, evsel->open_flags);
- fd = sys_perf_event_open(&evsel->core.attr, pid, cpus->map[cpu],
+ fd = sys_perf_event_open(&evsel->core.attr, pid,
+ perf_cpu_map__cpu(cpus, idx).cpu,
group_fd, evsel->open_flags);
- FD(evsel, cpu, thread) = fd;
+ FD(evsel, idx, thread) = fd;
if (fd < 0) {
err = -errno;
@@ -2053,10 +2045,11 @@ retry_open:
goto try_fallback;
}
- bpf_counter__install_pe(evsel, cpu, fd);
+ bpf_counter__install_pe(evsel, idx, fd);
if (unlikely(test_attr__enabled)) {
- test_attr__open(&evsel->core.attr, pid, cpus->map[cpu],
+ test_attr__open(&evsel->core.attr, pid,
+ perf_cpu_map__cpu(cpus, idx),
fd, group_fd, evsel->open_flags);
}
@@ -2097,7 +2090,8 @@ try_fallback:
if (evsel__precise_ip_fallback(evsel))
goto retry_open;
- if (evsel__ignore_missing_thread(evsel, cpus->nr, cpu, threads, thread, err)) {
+ if (evsel__ignore_missing_thread(evsel, perf_cpu_map__nr(cpus),
+ idx, threads, thread, err)) {
/* We just removed 1 thread, so lower the upper nthreads limit. */
nthreads--;
@@ -2112,7 +2106,7 @@ try_fallback:
if (err == -EMFILE && evsel__increase_rlimit(&set_rlimit))
goto retry_open;
- if (err != -EINVAL || cpu > 0 || thread > 0)
+ if (err != -EINVAL || idx > 0 || thread > 0)
goto out_close;
if (evsel__detect_missing_features(evsel))
@@ -2124,12 +2118,12 @@ out_close:
old_errno = errno;
do {
while (--thread >= 0) {
- if (FD(evsel, cpu, thread) >= 0)
- close(FD(evsel, cpu, thread));
- FD(evsel, cpu, thread) = -1;
+ if (FD(evsel, idx, thread) >= 0)
+ close(FD(evsel, idx, thread));
+ FD(evsel, idx, thread) = -1;
}
thread = nthreads;
- } while (--cpu >= 0);
+ } while (--idx >= 0);
errno = old_errno;
return err;
}
@@ -2137,7 +2131,7 @@ out_close:
int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus,
struct perf_thread_map *threads)
{
- return evsel__open_cpu(evsel, cpus, threads, 0, cpus ? cpus->nr : 1);
+ return evsel__open_cpu(evsel, cpus, threads, 0, perf_cpu_map__nr(cpus));
}
void evsel__close(struct evsel *evsel)
@@ -2146,13 +2140,12 @@ void evsel__close(struct evsel *evsel)
perf_evsel__free_id(&evsel->core);
}
-int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu)
+int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu_map_idx)
{
- if (cpu == -1)
- return evsel__open_cpu(evsel, cpus, NULL, 0,
- cpus ? cpus->nr : 1);
+ if (cpu_map_idx == -1)
+ return evsel__open_cpu(evsel, cpus, NULL, 0, perf_cpu_map__nr(cpus));
- return evsel__open_cpu(evsel, cpus, NULL, cpu, cpu + 1);
+ return evsel__open_cpu(evsel, cpus, NULL, cpu_map_idx, cpu_map_idx + 1);
}
int evsel__open_per_thread(struct evsel *evsel, struct perf_thread_map *threads)
@@ -2706,6 +2699,8 @@ void *evsel__rawptr(struct evsel *evsel, struct perf_sample *sample, const char
if (field->flags & TEP_FIELD_IS_DYNAMIC) {
offset = *(int *)(sample->raw_data + field->offset);
offset &= 0xffff;
+ if (field->flags & TEP_FIELD_IS_RELATIVE)
+ offset += field->offset + field->size;
}
return sample->raw_data + offset;
@@ -2950,6 +2945,10 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target,
return scnprintf(msg, size, "wrong clockid (%d).", clockid);
if (perf_missing_features.aux_output)
return scnprintf(msg, size, "The 'aux_output' feature is not supported, update the kernel.");
+ if (!target__has_cpu(target))
+ return scnprintf(msg, size,
+ "Invalid event (%s) in per-thread mode, enable system wide with '-a'.",
+ evsel__name(evsel));
break;
case ENODATA:
return scnprintf(msg, size, "Cannot collect data source with the load latency event alone. "
@@ -2973,15 +2972,15 @@ struct perf_env *evsel__env(struct evsel *evsel)
static int store_evsel_ids(struct evsel *evsel, struct evlist *evlist)
{
- int cpu, thread;
+ int cpu_map_idx, thread;
- for (cpu = 0; cpu < xyarray__max_x(evsel->core.fd); cpu++) {
+ for (cpu_map_idx = 0; cpu_map_idx < xyarray__max_x(evsel->core.fd); cpu_map_idx++) {
for (thread = 0; thread < xyarray__max_y(evsel->core.fd);
thread++) {
- int fd = FD(evsel, cpu, thread);
+ int fd = FD(evsel, cpu_map_idx, thread);
if (perf_evlist__id_add_fd(&evlist->core, &evsel->core,
- cpu, thread, fd) < 0)
+ cpu_map_idx, thread, fd) < 0)
return -1;
}
}
@@ -2994,7 +2993,7 @@ int evsel__store_ids(struct evsel *evsel, struct evlist *evlist)
struct perf_cpu_map *cpus = evsel->core.cpus;
struct perf_thread_map *threads = evsel->core.threads;
- if (perf_evsel__alloc_id(&evsel->core, cpus->nr, threads->nr))
+ if (perf_evsel__alloc_id(&evsel->core, perf_cpu_map__nr(cpus), threads->nr))
return -ENOMEM;
return store_evsel_ids(evsel, evlist);
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 29d49a8c1e92..041b42d33bf5 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -11,6 +11,7 @@
#include <perf/evsel.h>
#include "symbol_conf.h"
#include <internal/cpumap.h>
+#include <perf/cpumap.h>
struct bpf_object;
struct cgroup;
@@ -121,7 +122,6 @@ struct evsel {
bool errored;
struct hashmap *per_pkg_mask;
int err;
- int cpu_iter;
struct {
evsel__sb_cb_t *cb;
void *data;
@@ -192,12 +192,9 @@ static inline struct perf_cpu_map *evsel__cpus(struct evsel *evsel)
static inline int evsel__nr_cpus(struct evsel *evsel)
{
- return evsel__cpus(evsel)->nr;
+ return perf_cpu_map__nr(evsel__cpus(evsel));
}
-void perf_counts_values__scale(struct perf_counts_values *count,
- bool scale, s8 *pscaled);
-
void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread,
struct perf_counts_values *count);
@@ -288,12 +285,12 @@ void arch_evsel__fixup_new_cycles(struct perf_event_attr *attr);
int evsel__set_filter(struct evsel *evsel, const char *filter);
int evsel__append_tp_filter(struct evsel *evsel, const char *filter);
int evsel__append_addr_filter(struct evsel *evsel, const char *filter);
-int evsel__enable_cpu(struct evsel *evsel, int cpu);
+int evsel__enable_cpu(struct evsel *evsel, int cpu_map_idx);
int evsel__enable(struct evsel *evsel);
int evsel__disable(struct evsel *evsel);
-int evsel__disable_cpu(struct evsel *evsel, int cpu);
+int evsel__disable_cpu(struct evsel *evsel, int cpu_map_idx);
-int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu);
+int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu_map_idx);
int evsel__open_per_thread(struct evsel *evsel, struct perf_thread_map *threads);
int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus,
struct perf_thread_map *threads);
@@ -305,10 +302,6 @@ bool evsel__detect_missing_features(struct evsel *evsel);
enum rlimit_action { NO_CHANGE, SET_TO_MAX, INCREASED_MAX };
bool evsel__increase_rlimit(enum rlimit_action *set_rlimit);
-bool evsel__ignore_missing_thread(struct evsel *evsel,
- int nr_cpus, int cpu,
- struct perf_thread_map *threads,
- int thread, int err);
bool evsel__precise_ip_fallback(struct evsel *evsel);
struct perf_sample;
@@ -337,32 +330,32 @@ static inline bool evsel__match2(struct evsel *e1, struct evsel *e2)
(e1->core.attr.config == e2->core.attr.config);
}
-int evsel__read_counter(struct evsel *evsel, int cpu, int thread);
+int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread);
-int __evsel__read_on_cpu(struct evsel *evsel, int cpu, int thread, bool scale);
+int __evsel__read_on_cpu(struct evsel *evsel, int cpu_map_idx, int thread, bool scale);
/**
* evsel__read_on_cpu - Read out the results on a CPU and thread
*
* @evsel - event selector to read value
- * @cpu - CPU of interest
+ * @cpu_map_idx - CPU of interest
* @thread - thread of interest
*/
-static inline int evsel__read_on_cpu(struct evsel *evsel, int cpu, int thread)
+static inline int evsel__read_on_cpu(struct evsel *evsel, int cpu_map_idx, int thread)
{
- return __evsel__read_on_cpu(evsel, cpu, thread, false);
+ return __evsel__read_on_cpu(evsel, cpu_map_idx, thread, false);
}
/**
* evsel__read_on_cpu_scaled - Read out the results on a CPU and thread, scaled
*
* @evsel - event selector to read value
- * @cpu - CPU of interest
+ * @cpu_map_idx - CPU of interest
* @thread - thread of interest
*/
-static inline int evsel__read_on_cpu_scaled(struct evsel *evsel, int cpu, int thread)
+static inline int evsel__read_on_cpu_scaled(struct evsel *evsel, int cpu_map_idx, int thread)
{
- return __evsel__read_on_cpu(evsel, cpu, thread, true);
+ return __evsel__read_on_cpu(evsel, cpu_map_idx, thread, true);
}
int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c
index 1d532b9fed29..675f318ce7c1 100644
--- a/tools/perf/util/expr.c
+++ b/tools/perf/util/expr.c
@@ -12,6 +12,7 @@
#include "expr-bison.h"
#include "expr-flex.h"
#include "smt.h"
+#include <linux/err.h>
#include <linux/kernel.h>
#include <linux/zalloc.h>
#include <ctype.h>
@@ -65,7 +66,12 @@ static bool key_equal(const void *key1, const void *key2,
struct hashmap *ids__new(void)
{
- return hashmap__new(key_hash, key_equal, NULL);
+ struct hashmap *hash;
+
+ hash = hashmap__new(key_hash, key_equal, NULL);
+ if (IS_ERR(hash))
+ return NULL;
+ return hash;
}
void ids__free(struct hashmap *ids)
@@ -299,6 +305,10 @@ struct expr_parse_ctx *expr__ctx_new(void)
return NULL;
ctx->ids = hashmap__new(key_hash, key_equal, NULL);
+ if (IS_ERR(ctx->ids)) {
+ free(ctx);
+ return NULL;
+ }
ctx->runtime = 0;
return ctx;
@@ -395,12 +405,17 @@ double expr_id_data__source_count(const struct expr_id_data *data)
double expr__get_literal(const char *literal)
{
static struct cpu_topology *topology;
+ double result = NAN;
- if (!strcmp("#smt_on", literal))
- return smt_on() > 0 ? 1.0 : 0.0;
+ if (!strcasecmp("#smt_on", literal)) {
+ result = smt_on() > 0 ? 1.0 : 0.0;
+ goto out;
+ }
- if (!strcmp("#num_cpus", literal))
- return cpu__max_present_cpu();
+ if (!strcmp("#num_cpus", literal)) {
+ result = cpu__max_present_cpu().cpu;
+ goto out;
+ }
/*
* Assume that topology strings are consistent, such as CPUs "0-1"
@@ -412,16 +427,24 @@ double expr__get_literal(const char *literal)
topology = cpu_topology__new();
if (!topology) {
pr_err("Error creating CPU topology");
- return NAN;
+ goto out;
}
}
- if (!strcmp("#num_packages", literal))
- return topology->package_cpus_lists;
- if (!strcmp("#num_dies", literal))
- return topology->die_cpus_lists;
- if (!strcmp("#num_cores", literal))
- return topology->core_cpus_lists;
+ if (!strcmp("#num_packages", literal)) {
+ result = topology->package_cpus_lists;
+ goto out;
+ }
+ if (!strcmp("#num_dies", literal)) {
+ result = topology->die_cpus_lists;
+ goto out;
+ }
+ if (!strcmp("#num_cores", literal)) {
+ result = topology->core_cpus_lists;
+ goto out;
+ }
pr_err("Unrecognized literal '%s'", literal);
- return NAN;
+out:
+ pr_debug2("literal: %s = %f\n", literal, result);
+ return result;
}
diff --git a/tools/perf/util/ftrace.h b/tools/perf/util/ftrace.h
new file mode 100644
index 000000000000..887f68a185f7
--- /dev/null
+++ b/tools/perf/util/ftrace.h
@@ -0,0 +1,81 @@
+#ifndef __PERF_FTRACE_H__
+#define __PERF_FTRACE_H__
+
+#include <linux/list.h>
+
+#include "target.h"
+
+struct evlist;
+
+struct perf_ftrace {
+ struct evlist *evlist;
+ struct target target;
+ const char *tracer;
+ struct list_head filters;
+ struct list_head notrace;
+ struct list_head graph_funcs;
+ struct list_head nograph_funcs;
+ unsigned long percpu_buffer_size;
+ bool inherit;
+ int graph_depth;
+ int func_stack_trace;
+ int func_irq_info;
+ int graph_nosleep_time;
+ int graph_noirqs;
+ int graph_verbose;
+ int graph_thresh;
+ unsigned int initial_delay;
+};
+
+struct filter_entry {
+ struct list_head list;
+ char name[];
+};
+
+#define NUM_BUCKET 22 /* 20 + 2 (for outliers in both direction) */
+
+#ifdef HAVE_BPF_SKEL
+
+int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace);
+int perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace);
+int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace);
+int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace,
+ int buckets[]);
+int perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace);
+
+#else /* !HAVE_BPF_SKEL */
+
+static inline int
+perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace __maybe_unused)
+{
+ return -1;
+}
+
+static inline int
+perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace __maybe_unused)
+{
+ return -1;
+}
+
+static inline int
+perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused)
+{
+ return -1;
+}
+
+static inline int
+perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused,
+ int buckets[] __maybe_unused)
+{
+ return -1;
+}
+
+static inline int
+perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace __maybe_unused)
+{
+ return -1;
+}
+
+#endif /* HAVE_BPF_SKEL */
+
+#endif /* __PERF_FTRACE_H__ */
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 79cce216727e..6da12e522edc 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -472,7 +472,7 @@ static int write_nrcpus(struct feat_fd *ff,
u32 nrc, nra;
int ret;
- nrc = cpu__max_present_cpu();
+ nrc = cpu__max_present_cpu().cpu;
nr = sysconf(_SC_NPROCESSORS_ONLN);
if (nr < 0)
@@ -1163,7 +1163,7 @@ static int build_caches(struct cpu_cache_level caches[], u32 *cntp)
u32 nr, cpu;
u16 level;
- nr = cpu__max_cpu();
+ nr = cpu__max_cpu().cpu;
for (cpu = 0; cpu < nr; cpu++) {
for (level = 0; level < MAX_CACHE_LVL; level++) {
@@ -1195,7 +1195,7 @@ static int build_caches(struct cpu_cache_level caches[], u32 *cntp)
static int write_cache(struct feat_fd *ff,
struct evlist *evlist __maybe_unused)
{
- u32 max_caches = cpu__max_cpu() * MAX_CACHE_LVL;
+ u32 max_caches = cpu__max_cpu().cpu * MAX_CACHE_LVL;
struct cpu_cache_level caches[max_caches];
u32 cnt = 0, i, version = 1;
int ret;
@@ -2321,6 +2321,7 @@ out:
#define FEAT_PROCESS_STR_FUN(__feat, __feat_env) \
static int process_##__feat(struct feat_fd *ff, void *data __maybe_unused) \
{\
+ free(ff->ph->env.__feat_env); \
ff->ph->env.__feat_env = do_read_string(ff); \
return ff->ph->env.__feat_env ? 0 : -ENOMEM; \
}
@@ -4124,6 +4125,7 @@ int perf_event__process_feature(struct perf_session *session,
struct perf_record_header_feature *fe = (struct perf_record_header_feature *)event;
int type = fe->header.type;
u64 feat = fe->feat_id;
+ int ret = 0;
if (type < 0 || type >= PERF_RECORD_HEADER_MAX) {
pr_warning("invalid record type %d in pipe-mode\n", type);
@@ -4141,11 +4143,13 @@ int perf_event__process_feature(struct perf_session *session,
ff.size = event->header.size - sizeof(*fe);
ff.ph = &session->header;
- if (feat_ops[feat].process(&ff, NULL))
- return -1;
+ if (feat_ops[feat].process(&ff, NULL)) {
+ ret = -1;
+ goto out;
+ }
if (!feat_ops[feat].print || !tool->show_feat_hdr)
- return 0;
+ goto out;
if (!feat_ops[feat].full_only ||
tool->show_feat_hdr >= SHOW_FEAT_HEADER_FULL_INFO) {
@@ -4154,8 +4158,9 @@ int perf_event__process_feature(struct perf_session *session,
fprintf(stdout, "# %s info available, use -I to display\n",
feat_ops[feat].name);
}
-
- return 0;
+out:
+ free_event_desc(ff.events);
+ return ret;
}
size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp)
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index b776465e04ef..0a8033b09e28 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -211,7 +211,9 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
hists__new_col_len(hists, HISTC_MEM_BLOCKED, 10);
hists__new_col_len(hists, HISTC_LOCAL_INS_LAT, 13);
hists__new_col_len(hists, HISTC_GLOBAL_INS_LAT, 13);
- hists__new_col_len(hists, HISTC_P_STAGE_CYC, 13);
+ hists__new_col_len(hists, HISTC_LOCAL_P_STAGE_CYC, 13);
+ hists__new_col_len(hists, HISTC_GLOBAL_P_STAGE_CYC, 13);
+
if (symbol_conf.nanosecs)
hists__new_col_len(hists, HISTC_TIME, 16);
else
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 621f35ae1efa..2a15e22fb89c 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -75,7 +75,8 @@ enum hist_column {
HISTC_MEM_BLOCKED,
HISTC_LOCAL_INS_LAT,
HISTC_GLOBAL_INS_LAT,
- HISTC_P_STAGE_CYC,
+ HISTC_LOCAL_P_STAGE_CYC,
+ HISTC_GLOBAL_P_STAGE_CYC,
HISTC_NR_COLS, /* Last entry */
};
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index 5f83937bf8f3..0e013c2d9eb4 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -1205,61 +1205,69 @@ out_no_progress:
static bool intel_pt_fup_event(struct intel_pt_decoder *decoder)
{
+ enum intel_pt_sample_type type = decoder->state.type;
bool ret = false;
+ decoder->state.type &= ~INTEL_PT_BRANCH;
+
if (decoder->set_fup_tx_flags) {
decoder->set_fup_tx_flags = false;
decoder->tx_flags = decoder->fup_tx_flags;
- decoder->state.type = INTEL_PT_TRANSACTION;
+ decoder->state.type |= INTEL_PT_TRANSACTION;
if (decoder->fup_tx_flags & INTEL_PT_ABORT_TX)
decoder->state.type |= INTEL_PT_BRANCH;
- decoder->state.from_ip = decoder->ip;
- decoder->state.to_ip = 0;
decoder->state.flags = decoder->fup_tx_flags;
- return true;
+ ret = true;
}
if (decoder->set_fup_ptw) {
decoder->set_fup_ptw = false;
- decoder->state.type = INTEL_PT_PTW;
+ decoder->state.type |= INTEL_PT_PTW;
decoder->state.flags |= INTEL_PT_FUP_IP;
- decoder->state.from_ip = decoder->ip;
- decoder->state.to_ip = 0;
decoder->state.ptw_payload = decoder->fup_ptw_payload;
- return true;
+ ret = true;
}
if (decoder->set_fup_mwait) {
decoder->set_fup_mwait = false;
- decoder->state.type = INTEL_PT_MWAIT_OP;
- decoder->state.from_ip = decoder->ip;
- decoder->state.to_ip = 0;
+ decoder->state.type |= INTEL_PT_MWAIT_OP;
decoder->state.mwait_payload = decoder->fup_mwait_payload;
ret = true;
}
if (decoder->set_fup_pwre) {
decoder->set_fup_pwre = false;
decoder->state.type |= INTEL_PT_PWR_ENTRY;
- decoder->state.type &= ~INTEL_PT_BRANCH;
- decoder->state.from_ip = decoder->ip;
- decoder->state.to_ip = 0;
decoder->state.pwre_payload = decoder->fup_pwre_payload;
ret = true;
}
if (decoder->set_fup_exstop) {
decoder->set_fup_exstop = false;
decoder->state.type |= INTEL_PT_EX_STOP;
- decoder->state.type &= ~INTEL_PT_BRANCH;
decoder->state.flags |= INTEL_PT_FUP_IP;
- decoder->state.from_ip = decoder->ip;
- decoder->state.to_ip = 0;
ret = true;
}
if (decoder->set_fup_bep) {
decoder->set_fup_bep = false;
decoder->state.type |= INTEL_PT_BLK_ITEMS;
- decoder->state.type &= ~INTEL_PT_BRANCH;
+ ret = true;
+ }
+ if (decoder->overflow) {
+ decoder->overflow = false;
+ if (!ret && !decoder->pge) {
+ if (decoder->hop) {
+ decoder->state.type = 0;
+ decoder->pkt_state = INTEL_PT_STATE_RESAMPLE;
+ }
+ decoder->pge = true;
+ decoder->state.type |= INTEL_PT_BRANCH | INTEL_PT_TRACE_BEGIN;
+ decoder->state.from_ip = 0;
+ decoder->state.to_ip = decoder->ip;
+ return true;
+ }
+ }
+ if (ret) {
decoder->state.from_ip = decoder->ip;
decoder->state.to_ip = 0;
- ret = true;
+ } else {
+ decoder->state.type = type;
}
return ret;
}
@@ -1608,7 +1616,16 @@ static int intel_pt_overflow(struct intel_pt_decoder *decoder)
intel_pt_clear_tx_flags(decoder);
intel_pt_set_nr(decoder);
decoder->timestamp_insn_cnt = 0;
- decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ decoder->state.from_ip = decoder->ip;
+ decoder->ip = 0;
+ decoder->pge = false;
+ decoder->set_fup_tx_flags = false;
+ decoder->set_fup_ptw = false;
+ decoder->set_fup_mwait = false;
+ decoder->set_fup_pwre = false;
+ decoder->set_fup_exstop = false;
+ decoder->set_fup_bep = false;
decoder->overflow = true;
return -EOVERFLOW;
}
@@ -2666,6 +2683,8 @@ static int intel_pt_scan_for_psb(struct intel_pt_decoder *decoder);
/* Hop mode: Ignore TNT, do not walk code, but get ip from FUPs and TIPs */
static int intel_pt_hop_trace(struct intel_pt_decoder *decoder, bool *no_tip, int *err)
{
+ *err = 0;
+
/* Leap from PSB to PSB, getting ip from FUP within PSB+ */
if (decoder->leap && !decoder->in_psb && decoder->packet.type != INTEL_PT_PSB) {
*err = intel_pt_scan_for_psb(decoder);
@@ -2678,6 +2697,7 @@ static int intel_pt_hop_trace(struct intel_pt_decoder *decoder, bool *no_tip, in
return HOP_IGNORE;
case INTEL_PT_TIP_PGD:
+ decoder->pge = false;
if (!decoder->packet.count) {
intel_pt_set_nr(decoder);
return HOP_IGNORE;
@@ -2705,18 +2725,21 @@ static int intel_pt_hop_trace(struct intel_pt_decoder *decoder, bool *no_tip, in
if (!decoder->packet.count)
return HOP_IGNORE;
intel_pt_set_ip(decoder);
- if (intel_pt_fup_event(decoder))
- return HOP_RETURN;
- if (!decoder->branch_enable)
+ if (decoder->set_fup_mwait || decoder->set_fup_pwre)
+ *no_tip = true;
+ if (!decoder->branch_enable || !decoder->pge)
*no_tip = true;
if (*no_tip) {
decoder->state.type = INTEL_PT_INSTRUCTION;
decoder->state.from_ip = decoder->ip;
decoder->state.to_ip = 0;
+ intel_pt_fup_event(decoder);
return HOP_RETURN;
}
+ intel_pt_fup_event(decoder);
+ decoder->state.type |= INTEL_PT_INSTRUCTION | INTEL_PT_BRANCH;
*err = intel_pt_walk_fup_tip(decoder);
- if (!*err)
+ if (!*err && decoder->state.to_ip)
decoder->pkt_state = INTEL_PT_STATE_RESAMPLE;
return HOP_RETURN;
@@ -2897,7 +2920,7 @@ static bool intel_pt_psb_with_fup(struct intel_pt_decoder *decoder, int *err)
{
struct intel_pt_psb_info data = { .fup = false };
- if (!decoder->branch_enable || !decoder->pge)
+ if (!decoder->branch_enable)
return false;
intel_pt_pkt_lookahead(decoder, intel_pt_psb_lookahead_cb, &data);
@@ -2924,6 +2947,7 @@ static int intel_pt_walk_trace(struct intel_pt_decoder *decoder)
if (err)
return err;
next:
+ err = 0;
if (decoder->cyc_threshold) {
if (decoder->sample_cyc && last_packet_type != INTEL_PT_CYC)
decoder->sample_cyc = false;
@@ -2962,6 +2986,7 @@ next:
case INTEL_PT_TIP_PGE: {
decoder->pge = true;
+ decoder->overflow = false;
intel_pt_mtc_cyc_cnt_pge(decoder);
intel_pt_set_nr(decoder);
if (decoder->packet.count == 0) {
@@ -2999,7 +3024,7 @@ next:
break;
}
intel_pt_set_last_ip(decoder);
- if (!decoder->branch_enable) {
+ if (!decoder->branch_enable || !decoder->pge) {
decoder->ip = decoder->last_ip;
if (intel_pt_fup_event(decoder))
return 0;
@@ -3467,10 +3492,10 @@ static int intel_pt_sync_ip(struct intel_pt_decoder *decoder)
decoder->set_fup_pwre = false;
decoder->set_fup_exstop = false;
decoder->set_fup_bep = false;
+ decoder->overflow = false;
if (!decoder->branch_enable) {
decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
- decoder->overflow = false;
decoder->state.type = 0; /* Do not have a sample */
return 0;
}
@@ -3485,7 +3510,6 @@ static int intel_pt_sync_ip(struct intel_pt_decoder *decoder)
decoder->pkt_state = INTEL_PT_STATE_RESAMPLE;
else
decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
- decoder->overflow = false;
decoder->state.from_ip = 0;
decoder->state.to_ip = decoder->ip;
@@ -3607,7 +3631,7 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder)
}
decoder->have_last_ip = true;
- decoder->pkt_state = INTEL_PT_STATE_NO_IP;
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
err = intel_pt_walk_psb(decoder);
if (err)
@@ -3704,7 +3728,8 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
if (err) {
decoder->state.err = intel_pt_ext_err(err);
- decoder->state.from_ip = decoder->ip;
+ if (err != -EOVERFLOW)
+ decoder->state.from_ip = decoder->ip;
intel_pt_update_sample_time(decoder);
decoder->sample_tot_cyc_cnt = decoder->tot_cyc_cnt;
intel_pt_set_nr(decoder);
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 556a893508da..e8613cbda331 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -2565,6 +2565,7 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
ptq->sync_switch = false;
intel_pt_next_tid(pt, ptq);
}
+ ptq->timestamp = state->est_timestamp;
if (pt->synth_opts.errors) {
err = intel_ptq_synth_error(ptq, state);
if (err)
@@ -3624,6 +3625,7 @@ static int intel_pt_parse_vm_tm_corr_arg(struct intel_pt *pt, char **args)
*args = p;
return 0;
}
+ p += 1;
while (1) {
vmcs = strtoull(p, &p, 0);
if (errno)
diff --git a/tools/perf/util/libunwind/arm64.c b/tools/perf/util/libunwind/arm64.c
index c397be0c2e32..15f60fd09424 100644
--- a/tools/perf/util/libunwind/arm64.c
+++ b/tools/perf/util/libunwind/arm64.c
@@ -23,7 +23,9 @@
#include "unwind.h"
#include "libunwind-aarch64.h"
+#define perf_event_arm_regs perf_event_arm64_regs
#include <../../../../arch/arm64/include/uapi/asm/perf_regs.h>
+#undef perf_event_arm_regs
#include "../../arch/arm64/util/unwind-libunwind.c"
/* NO_LIBUNWIND_DEBUG_FRAME is a feature flag for local libunwind,
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index fb8496df8432..f70ba56912d4 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -16,6 +16,7 @@
#include "map_symbol.h"
#include "branch.h"
#include "mem-events.h"
+#include "path.h"
#include "srcline.h"
#include "symbol.h"
#include "sort.h"
@@ -34,6 +35,7 @@
#include "bpf-event.h"
#include <internal/lib.h> // page_size
#include "cgroup.h"
+#include "arm64-frame-pointer-unwind-support.h"
#include <linux/ctype.h>
#include <symbol/kallsyms.h>
@@ -1415,7 +1417,7 @@ static int maps__set_modules_path_dir(struct maps *maps, const char *dir_name, i
struct stat st;
/*sshfs might return bad dent->d_type, so we have to stat*/
- snprintf(path, sizeof(path), "%s/%s", dir_name, dent->d_name);
+ path__join(path, sizeof(path), dir_name, dent->d_name);
if (stat(path, &st))
continue;
@@ -2710,6 +2712,15 @@ static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread,
return err;
}
+static u64 get_leaf_frame_caller(struct perf_sample *sample,
+ struct thread *thread, int usr_idx)
+{
+ if (machine__normalized_is(thread->maps->machine, "arm64"))
+ return get_leaf_frame_caller_aarch64(sample, thread, usr_idx);
+ else
+ return 0;
+}
+
static int thread__resolve_callchain_sample(struct thread *thread,
struct callchain_cursor *cursor,
struct evsel *evsel,
@@ -2723,9 +2734,10 @@ static int thread__resolve_callchain_sample(struct thread *thread,
struct ip_callchain *chain = sample->callchain;
int chain_nr = 0;
u8 cpumode = PERF_RECORD_MISC_USER;
- int i, j, err, nr_entries;
+ int i, j, err, nr_entries, usr_idx;
int skip_idx = -1;
int first_call = 0;
+ u64 leaf_frame_caller;
if (chain)
chain_nr = chain->nr;
@@ -2850,6 +2862,34 @@ check_calls:
continue;
}
+ /*
+ * PERF_CONTEXT_USER allows us to locate where the user stack ends.
+ * Depending on callchain_param.order and the position of PERF_CONTEXT_USER,
+ * the index will be different in order to add the missing frame
+ * at the right place.
+ */
+
+ usr_idx = callchain_param.order == ORDER_CALLEE ? j-2 : j-1;
+
+ if (usr_idx >= 0 && chain->ips[usr_idx] == PERF_CONTEXT_USER) {
+
+ leaf_frame_caller = get_leaf_frame_caller(sample, thread, usr_idx);
+
+ /*
+ * check if leaf_frame_Caller != ip to not add the same
+ * value twice.
+ */
+
+ if (leaf_frame_caller && leaf_frame_caller != ip) {
+
+ err = add_callchain_ip(thread, cursor, parent,
+ root_al, &cpumode, leaf_frame_caller,
+ false, NULL, NULL, 0);
+ if (err)
+ return (err < 0) ? err : 0;
+ }
+ }
+
err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, ip,
false, NULL, NULL, 0);
@@ -3079,14 +3119,19 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid,
}
/*
- * Compares the raw arch string. N.B. see instead perf_env__arch() if a
- * normalized arch is needed.
+ * Compares the raw arch string. N.B. see instead perf_env__arch() or
+ * machine__normalized_is() if a normalized arch is needed.
*/
bool machine__is(struct machine *machine, const char *arch)
{
return machine && !strcmp(perf_env__raw_arch(machine->env), arch);
}
+bool machine__normalized_is(struct machine *machine, const char *arch)
+{
+ return machine && !strcmp(perf_env__arch(machine->env), arch);
+}
+
int machine__nr_cpus_avail(struct machine *machine)
{
return machine ? perf_env__nr_cpus_avail(machine->env) : 0;
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index a143087eeb47..c5a45dc8df4c 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -208,6 +208,7 @@ static inline bool machine__is_host(struct machine *machine)
}
bool machine__is(struct machine *machine, const char *arch);
+bool machine__normalized_is(struct machine *machine, const char *arch);
int machine__nr_cpus_avail(struct machine *machine);
struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid);
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index 3167b4628b6d..ed0ab838bcc5 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -309,6 +309,9 @@ static const char * const mem_hops[] = {
* to be set with mem_hops field.
*/
"core, same node",
+ "node, same socket",
+ "socket, same board",
+ "board",
};
int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
@@ -316,7 +319,7 @@ int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
size_t i, l = 0;
u64 m = PERF_MEM_LVL_NA;
u64 hit, miss;
- int printed;
+ int printed = 0;
if (mem_info)
m = mem_info->data_src.mem_lvl;
@@ -335,18 +338,22 @@ int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
l += 7;
}
- if (mem_info && mem_info->data_src.mem_hops)
+ /*
+ * Incase mem_hops field is set, we can skip printing data source via
+ * PERF_MEM_LVL namespace.
+ */
+ if (mem_info && mem_info->data_src.mem_hops) {
l += scnprintf(out + l, sz - l, "%s ", mem_hops[mem_info->data_src.mem_hops]);
-
- printed = 0;
- for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) {
- if (!(m & 0x1))
- continue;
- if (printed++) {
- strcat(out, " or ");
- l += 4;
+ } else {
+ for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) {
+ if (!(m & 0x1))
+ continue;
+ if (printed++) {
+ strcat(out, " or ");
+ l += 4;
+ }
+ l += scnprintf(out + l, sz - l, mem_lvl[i]);
}
- l += scnprintf(out + l, sz - l, mem_lvl[i]);
}
if (mem_info && mem_info->data_src.mem_lvl_num) {
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index fffe02aae3ed..d8492e339521 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -209,8 +209,8 @@ static struct metric *metric__new(const struct pmu_event *pe,
m->metric_name = pe->metric_name;
m->modifier = modifier ? strdup(modifier) : NULL;
if (modifier && !m->modifier) {
- free(m);
expr__ctx_free(m->pctx);
+ free(m);
return NULL;
}
m->metric_expr = pe->metric_expr;
@@ -314,7 +314,7 @@ static int setup_metric_events(struct hashmap *ids,
*/
metric_id = evsel__metric_id(ev);
evlist__for_each_entry_continue(metric_evlist, ev) {
- if (!strcmp(evsel__metric_id(metric_events[i]), metric_id))
+ if (!strcmp(evsel__metric_id(ev), metric_id))
ev->metric_leader = metric_events[i];
}
}
@@ -1115,13 +1115,27 @@ out:
return ret;
}
+/**
+ * metric_list_cmp - list_sort comparator that sorts metrics with more events to
+ * the front. duration_time is excluded from the count.
+ */
static int metric_list_cmp(void *priv __maybe_unused, const struct list_head *l,
const struct list_head *r)
{
const struct metric *left = container_of(l, struct metric, nd);
const struct metric *right = container_of(r, struct metric, nd);
+ struct expr_id_data *data;
+ int left_count, right_count;
+
+ left_count = hashmap__size(left->pctx->ids);
+ if (!expr__get_id(left->pctx, "duration_time", &data))
+ left_count--;
+
+ right_count = hashmap__size(right->pctx->ids);
+ if (!expr__get_id(right->pctx, "duration_time", &data))
+ right_count--;
- return hashmap__size(right->pctx->ids) - hashmap__size(left->pctx->ids);
+ return right_count - left_count;
}
/**
@@ -1299,14 +1313,16 @@ err_out:
/**
* parse_ids - Build the event string for the ids and parse them creating an
* evlist. The encoded metric_ids are decoded.
+ * @metric_no_merge: is metric sharing explicitly disabled.
* @fake_pmu: used when testing metrics not supported by the current CPU.
* @ids: the event identifiers parsed from a metric.
* @modifier: any modifiers added to the events.
* @has_constraint: false if events should be placed in a weak group.
* @out_evlist: the created list of events.
*/
-static int parse_ids(struct perf_pmu *fake_pmu, struct expr_parse_ctx *ids,
- const char *modifier, bool has_constraint, struct evlist **out_evlist)
+static int parse_ids(bool metric_no_merge, struct perf_pmu *fake_pmu,
+ struct expr_parse_ctx *ids, const char *modifier,
+ bool has_constraint, struct evlist **out_evlist)
{
struct parse_events_error parse_error;
struct evlist *parsed_evlist;
@@ -1314,12 +1330,19 @@ static int parse_ids(struct perf_pmu *fake_pmu, struct expr_parse_ctx *ids,
int ret;
*out_evlist = NULL;
- if (hashmap__size(ids->ids) == 0) {
+ if (!metric_no_merge || hashmap__size(ids->ids) == 0) {
char *tmp;
/*
- * No ids/events in the expression parsing context. Events may
- * have been removed because of constant evaluation, e.g.:
- * event1 if #smt_on else 0
+ * We may fail to share events between metrics because
+ * duration_time isn't present in one metric. For example, a
+ * ratio of cache misses doesn't need duration_time but the same
+ * events may be used for a misses per second. Events without
+ * sharing implies multiplexing, that is best avoided, so place
+ * duration_time in every group.
+ *
+ * Also, there may be no ids/events in the expression parsing
+ * context because of constant evaluation, e.g.:
+ * event1 if #smt_on else 0
* Add a duration_time event to avoid a parse error on an empty
* string.
*/
@@ -1387,7 +1410,8 @@ static int parse_groups(struct evlist *perf_evlist, const char *str,
ret = build_combined_expr_ctx(&metric_list, &combined);
if (!ret && combined && hashmap__size(combined->ids)) {
- ret = parse_ids(fake_pmu, combined, /*modifier=*/NULL,
+ ret = parse_ids(metric_no_merge, fake_pmu, combined,
+ /*modifier=*/NULL,
/*has_constraint=*/true,
&combined_evlist);
}
@@ -1435,7 +1459,7 @@ static int parse_groups(struct evlist *perf_evlist, const char *str,
}
}
if (!metric_evlist) {
- ret = parse_ids(fake_pmu, m->pctx, m->modifier,
+ ret = parse_ids(metric_no_merge, fake_pmu, m->pctx, m->modifier,
m->has_constraint, &m->evlist);
if (ret)
goto out;
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 23ecdba9e670..0e8ff8d1e206 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -94,7 +94,7 @@ static void perf_mmap__aio_free(struct mmap *map, int idx)
}
}
-static int perf_mmap__aio_bind(struct mmap *map, int idx, int cpu, int affinity)
+static int perf_mmap__aio_bind(struct mmap *map, int idx, struct perf_cpu cpu, int affinity)
{
void *data;
size_t mmap_len;
@@ -138,7 +138,7 @@ static void perf_mmap__aio_free(struct mmap *map, int idx)
}
static int perf_mmap__aio_bind(struct mmap *map __maybe_unused, int idx __maybe_unused,
- int cpu __maybe_unused, int affinity __maybe_unused)
+ struct perf_cpu cpu __maybe_unused, int affinity __maybe_unused)
{
return 0;
}
@@ -240,7 +240,8 @@ void mmap__munmap(struct mmap *map)
static void build_node_mask(int node, struct mmap_cpu_mask *mask)
{
- int c, cpu, nr_cpus;
+ int idx, nr_cpus;
+ struct perf_cpu cpu;
const struct perf_cpu_map *cpu_map = NULL;
cpu_map = cpu_map__online();
@@ -248,16 +249,16 @@ static void build_node_mask(int node, struct mmap_cpu_mask *mask)
return;
nr_cpus = perf_cpu_map__nr(cpu_map);
- for (c = 0; c < nr_cpus; c++) {
- cpu = cpu_map->map[c]; /* map c index to online cpu index */
+ for (idx = 0; idx < nr_cpus; idx++) {
+ cpu = perf_cpu_map__cpu(cpu_map, idx); /* map c index to online cpu index */
if (cpu__get_node(cpu) == node)
- set_bit(cpu, mask->bits);
+ set_bit(cpu.cpu, mask->bits);
}
}
static int perf_mmap__setup_affinity_mask(struct mmap *map, struct mmap_params *mp)
{
- map->affinity_mask.nbits = cpu__max_cpu();
+ map->affinity_mask.nbits = cpu__max_cpu().cpu;
map->affinity_mask.bits = bitmap_zalloc(map->affinity_mask.nbits);
if (!map->affinity_mask.bits)
return -1;
@@ -265,12 +266,12 @@ static int perf_mmap__setup_affinity_mask(struct mmap *map, struct mmap_params *
if (mp->affinity == PERF_AFFINITY_NODE && cpu__max_node() > 1)
build_node_mask(cpu__get_node(map->core.cpu), &map->affinity_mask);
else if (mp->affinity == PERF_AFFINITY_CPU)
- set_bit(map->core.cpu, map->affinity_mask.bits);
+ set_bit(map->core.cpu.cpu, map->affinity_mask.bits);
return 0;
}
-int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu)
+int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, struct perf_cpu cpu)
{
if (perf_mmap__mmap(&map->core, &mp->core, fd, cpu)) {
pr_debug2("failed to mmap perf event ring buffer, error %d\n",
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index 8e259b9610f8..83f6bd4d4082 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -7,6 +7,7 @@
#include <linux/types.h>
#include <linux/ring_buffer.h>
#include <linux/bitops.h>
+#include <perf/cpumap.h>
#include <stdbool.h>
#include <pthread.h> // for cpu_set_t
#ifdef HAVE_AIO_SUPPORT
@@ -52,7 +53,7 @@ struct mmap_params {
struct auxtrace_mmap_params auxtrace_mp;
};
-int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu);
+int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, struct perf_cpu cpu);
void mmap__munmap(struct mmap *map);
union perf_event *perf_mmap__read_forward(struct mmap *map);
diff --git a/tools/perf/util/namespaces.c b/tools/perf/util/namespaces.c
index 608b20c72a5c..48aa3217300b 100644
--- a/tools/perf/util/namespaces.c
+++ b/tools/perf/util/namespaces.c
@@ -60,17 +60,49 @@ void namespaces__free(struct namespaces *namespaces)
free(namespaces);
}
+static int nsinfo__get_nspid(struct nsinfo *nsi, const char *path)
+{
+ FILE *f = NULL;
+ char *statln = NULL;
+ size_t linesz = 0;
+ char *nspid;
+
+ f = fopen(path, "r");
+ if (f == NULL)
+ return -1;
+
+ while (getline(&statln, &linesz, f) != -1) {
+ /* Use tgid if CONFIG_PID_NS is not defined. */
+ if (strstr(statln, "Tgid:") != NULL) {
+ nsi->tgid = (pid_t)strtol(strrchr(statln, '\t'),
+ NULL, 10);
+ nsi->nstgid = nsi->tgid;
+ }
+
+ if (strstr(statln, "NStgid:") != NULL) {
+ nspid = strrchr(statln, '\t');
+ nsi->nstgid = (pid_t)strtol(nspid, NULL, 10);
+ /*
+ * If innermost tgid is not the first, process is in a different
+ * PID namespace.
+ */
+ nsi->in_pidns = (statln + sizeof("NStgid:") - 1) != nspid;
+ break;
+ }
+ }
+
+ fclose(f);
+ free(statln);
+ return 0;
+}
+
int nsinfo__init(struct nsinfo *nsi)
{
char oldns[PATH_MAX];
char spath[PATH_MAX];
char *newns = NULL;
- char *statln = NULL;
- char *nspid;
struct stat old_stat;
struct stat new_stat;
- FILE *f = NULL;
- size_t linesz = 0;
int rv = -1;
if (snprintf(oldns, PATH_MAX, "/proc/self/ns/mnt") >= PATH_MAX)
@@ -100,34 +132,9 @@ int nsinfo__init(struct nsinfo *nsi)
if (snprintf(spath, PATH_MAX, "/proc/%d/status", nsi->pid) >= PATH_MAX)
goto out;
- f = fopen(spath, "r");
- if (f == NULL)
- goto out;
-
- while (getline(&statln, &linesz, f) != -1) {
- /* Use tgid if CONFIG_PID_NS is not defined. */
- if (strstr(statln, "Tgid:") != NULL) {
- nsi->tgid = (pid_t)strtol(strrchr(statln, '\t'),
- NULL, 10);
- nsi->nstgid = nsi->tgid;
- }
-
- if (strstr(statln, "NStgid:") != NULL) {
- nspid = strrchr(statln, '\t');
- nsi->nstgid = (pid_t)strtol(nspid, NULL, 10);
- /* If innermost tgid is not the first, process is in a different
- * PID namespace.
- */
- nsi->in_pidns = (statln + sizeof("NStgid:") - 1) != nspid;
- break;
- }
- }
- rv = 0;
+ rv = nsinfo__get_nspid(nsi, spath);
out:
- if (f != NULL)
- (void) fclose(f);
- free(statln);
free(newns);
return rv;
}
@@ -299,3 +306,12 @@ int nsinfo__stat(const char *filename, struct stat *st, struct nsinfo *nsi)
return ret;
}
+
+bool nsinfo__is_in_root_namespace(void)
+{
+ struct nsinfo nsi;
+
+ memset(&nsi, 0x0, sizeof(nsi));
+ nsinfo__get_nspid(&nsi, "/proc/self/status");
+ return !nsi.in_pidns;
+}
diff --git a/tools/perf/util/namespaces.h b/tools/perf/util/namespaces.h
index ad9775db7b9c..9ceea9643507 100644
--- a/tools/perf/util/namespaces.h
+++ b/tools/perf/util/namespaces.h
@@ -59,6 +59,8 @@ void nsinfo__mountns_exit(struct nscookie *nc);
char *nsinfo__realpath(const char *path, struct nsinfo *nsi);
int nsinfo__stat(const char *filename, struct stat *st, struct nsinfo *nsi);
+bool nsinfo__is_in_root_namespace(void);
+
static inline void __nsinfo__zput(struct nsinfo **nsip)
{
if (nsip) {
diff --git a/tools/perf/util/parse-events-hybrid.c b/tools/perf/util/parse-events-hybrid.c
index 9fc86971027b..284f8eabd3b9 100644
--- a/tools/perf/util/parse-events-hybrid.c
+++ b/tools/perf/util/parse-events-hybrid.c
@@ -63,10 +63,13 @@ static int create_event_hybrid(__u32 config_type, int *idx,
static int pmu_cmp(struct parse_events_state *parse_state,
struct perf_pmu *pmu)
{
- if (!parse_state->hybrid_pmu_name)
- return 0;
+ if (parse_state->evlist && parse_state->evlist->hybrid_pmu_name)
+ return strcmp(parse_state->evlist->hybrid_pmu_name, pmu->name);
+
+ if (parse_state->hybrid_pmu_name)
+ return strcmp(parse_state->hybrid_pmu_name, pmu->name);
- return strcmp(parse_state->hybrid_pmu_name, pmu->name);
+ return 0;
}
static int add_hw_hybrid(struct parse_events_state *parse_state,
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index ba74fdf74af9..9739b05b999e 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1697,6 +1697,15 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
}
}
}
+
+ if (parse_state->fake_pmu) {
+ if (!parse_events_add_pmu(parse_state, list, str, head,
+ true, true)) {
+ pr_debug("%s -> %s/%s/\n", str, "fake_pmu", str);
+ ok++;
+ }
+ }
+
out_err:
if (ok)
*listp = list;
@@ -1824,6 +1833,11 @@ out:
return ret;
}
+__weak struct evsel *arch_evlist__leader(struct list_head *list)
+{
+ return list_first_entry(list, struct evsel, core.node);
+}
+
void parse_events__set_leader(char *name, struct list_head *list,
struct parse_events_state *parse_state)
{
@@ -1837,9 +1851,10 @@ void parse_events__set_leader(char *name, struct list_head *list,
if (parse_events__set_leader_for_uncore_aliase(name, list, parse_state))
return;
- __perf_evlist__set_leader(list);
- leader = list_entry(list->next, struct evsel, core.node);
+ leader = arch_evlist__leader(list);
+ __perf_evlist__set_leader(list, &leader->core);
leader->group_name = name ? strdup(name) : NULL;
+ list_move(&leader->core.node, list);
}
/* list_event is assumed to point to malloc'ed memory */
@@ -2092,8 +2107,17 @@ static void perf_pmu__parse_init(void)
pmu = NULL;
while ((pmu = perf_pmu__scan(pmu)) != NULL) {
list_for_each_entry(alias, &pmu->aliases, list) {
- if (strchr(alias->name, '-'))
+ char *tmp = strchr(alias->name, '-');
+
+ if (tmp) {
+ char *tmp2 = NULL;
+
+ tmp2 = strchr(tmp + 1, '-');
len++;
+ if (tmp2)
+ len++;
+ }
+
len++;
}
}
@@ -2113,8 +2137,20 @@ static void perf_pmu__parse_init(void)
list_for_each_entry(alias, &pmu->aliases, list) {
struct perf_pmu_event_symbol *p = perf_pmu_events_list + len;
char *tmp = strchr(alias->name, '-');
+ char *tmp2 = NULL;
- if (tmp != NULL) {
+ if (tmp)
+ tmp2 = strchr(tmp + 1, '-');
+ if (tmp2) {
+ SET_SYMBOL(strndup(alias->name, tmp - alias->name),
+ PMU_EVENT_SYMBOL_PREFIX);
+ p++;
+ tmp++;
+ SET_SYMBOL(strndup(tmp, tmp2 - tmp), PMU_EVENT_SYMBOL_SUFFIX);
+ p++;
+ SET_SYMBOL(strdup(++tmp2), PMU_EVENT_SYMBOL_SUFFIX2);
+ len += 3;
+ } else if (tmp) {
SET_SYMBOL(strndup(alias->name, tmp - alias->name),
PMU_EVENT_SYMBOL_PREFIX);
p++;
@@ -2141,23 +2177,38 @@ err:
*/
int perf_pmu__test_parse_init(void)
{
- struct perf_pmu_event_symbol *list;
+ struct perf_pmu_event_symbol *list, *tmp, symbols[] = {
+ {(char *)"read", PMU_EVENT_SYMBOL},
+ {(char *)"event", PMU_EVENT_SYMBOL_PREFIX},
+ {(char *)"two", PMU_EVENT_SYMBOL_SUFFIX},
+ {(char *)"hyphen", PMU_EVENT_SYMBOL_SUFFIX},
+ {(char *)"hyph", PMU_EVENT_SYMBOL_SUFFIX2},
+ };
+ unsigned long i, j;
- list = malloc(sizeof(*list) * 1);
+ tmp = list = malloc(sizeof(*list) * ARRAY_SIZE(symbols));
if (!list)
return -ENOMEM;
- list->type = PMU_EVENT_SYMBOL;
- list->symbol = strdup("read");
-
- if (!list->symbol) {
- free(list);
- return -ENOMEM;
+ for (i = 0; i < ARRAY_SIZE(symbols); i++, tmp++) {
+ tmp->type = symbols[i].type;
+ tmp->symbol = strdup(symbols[i].symbol);
+ if (!list->symbol)
+ goto err_free;
}
perf_pmu_events_list = list;
- perf_pmu_events_list_num = 1;
+ perf_pmu_events_list_num = ARRAY_SIZE(symbols);
+
+ qsort(perf_pmu_events_list, ARRAY_SIZE(symbols),
+ sizeof(struct perf_pmu_event_symbol), comp_pmu);
return 0;
+
+err_free:
+ for (j = 0, tmp = list; j < i; j++, tmp++)
+ free(tmp->symbol);
+ free(list);
+ return -ENOMEM;
}
enum perf_pmu_event_symbol_type
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index c7fc93f54577..a38b8b160e80 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -53,6 +53,7 @@ enum perf_pmu_event_symbol_type {
PMU_EVENT_SYMBOL, /* normal style PMU event */
PMU_EVENT_SYMBOL_PREFIX, /* prefix of pre-suf style event */
PMU_EVENT_SYMBOL_SUFFIX, /* suffix of pre-suf style event */
+ PMU_EVENT_SYMBOL_SUFFIX2, /* suffix of pre-suf2 style event */
};
struct perf_pmu_event_symbol {
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 4efe9872c667..5b6e4b5249cf 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -149,6 +149,8 @@ static int pmu_str_check(yyscan_t scanner, struct parse_events_state *parse_stat
return PE_PMU_EVENT_PRE;
case PMU_EVENT_SYMBOL_SUFFIX:
return PE_PMU_EVENT_SUF;
+ case PMU_EVENT_SYMBOL_SUFFIX2:
+ return PE_PMU_EVENT_SUF2;
case PMU_EVENT_SYMBOL:
return parse_state->fake_pmu
? PE_PMU_EVENT_FAKE : PE_KERNEL_PMU_EVENT;
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index 174158982fae..be8c51770051 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -69,7 +69,7 @@ static void inc_group_count(struct list_head *list,
%token PE_NAME_CACHE_TYPE PE_NAME_CACHE_OP_RESULT
%token PE_PREFIX_MEM PE_PREFIX_RAW PE_PREFIX_GROUP
%token PE_ERROR
-%token PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE
+%token PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_PMU_EVENT_SUF2 PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE
%token PE_ARRAY_ALL PE_ARRAY_RANGE
%token PE_DRV_CFG_TERM
%type <num> PE_VALUE
@@ -87,7 +87,7 @@ static void inc_group_count(struct list_head *list,
%type <str> PE_MODIFIER_EVENT
%type <str> PE_MODIFIER_BP
%type <str> PE_EVENT_NAME
-%type <str> PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE
+%type <str> PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_PMU_EVENT_SUF2 PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE
%type <str> PE_DRV_CFG_TERM
%type <str> event_pmu_name
%destructor { free ($$); } <str>
@@ -372,6 +372,19 @@ PE_KERNEL_PMU_EVENT opt_pmu_config
$$ = list;
}
|
+PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF '-' PE_PMU_EVENT_SUF2 sep_dc
+{
+ struct list_head *list;
+ char pmu_name[128];
+ snprintf(pmu_name, sizeof(pmu_name), "%s-%s-%s", $1, $3, $5);
+ free($1);
+ free($3);
+ free($5);
+ if (parse_events_multi_pmu_add(_parse_state, pmu_name, NULL, &list) < 0)
+ YYABORT;
+ $$ = list;
+}
+|
PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF sep_dc
{
struct list_head *list;
diff --git a/tools/perf/util/perf_api_probe.c b/tools/perf/util/perf_api_probe.c
index 020411682a3c..c28dd50bd571 100644
--- a/tools/perf/util/perf_api_probe.c
+++ b/tools/perf/util/perf_api_probe.c
@@ -11,7 +11,7 @@
typedef void (*setup_probe_fn_t)(struct evsel *evsel);
-static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str)
+static int perf_do_probe_api(setup_probe_fn_t fn, struct perf_cpu cpu, const char *str)
{
struct evlist *evlist;
struct evsel *evsel;
@@ -29,7 +29,7 @@ static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str)
evsel = evlist__first(evlist);
while (1) {
- fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags);
+ fd = sys_perf_event_open(&evsel->core.attr, pid, cpu.cpu, -1, flags);
if (fd < 0) {
if (pid == -1 && errno == EACCES) {
pid = 0;
@@ -43,7 +43,7 @@ static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str)
fn(evsel);
- fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags);
+ fd = sys_perf_event_open(&evsel->core.attr, pid, cpu.cpu, -1, flags);
if (fd < 0) {
if (errno == EINVAL)
err = -EINVAL;
@@ -61,12 +61,13 @@ static bool perf_probe_api(setup_probe_fn_t fn)
{
const char *try[] = {"cycles:u", "instructions:u", "cpu-clock:u", NULL};
struct perf_cpu_map *cpus;
- int cpu, ret, i = 0;
+ struct perf_cpu cpu;
+ int ret, i = 0;
cpus = perf_cpu_map__new(NULL);
if (!cpus)
return false;
- cpu = cpus->map[0];
+ cpu = perf_cpu_map__cpu(cpus, 0);
perf_cpu_map__put(cpus);
do {
@@ -136,15 +137,17 @@ bool perf_can_record_cpu_wide(void)
.exclude_kernel = 1,
};
struct perf_cpu_map *cpus;
- int cpu, fd;
+ struct perf_cpu cpu;
+ int fd;
cpus = perf_cpu_map__new(NULL);
if (!cpus)
return false;
- cpu = cpus->map[0];
+
+ cpu = perf_cpu_map__cpu(cpus, 0);
perf_cpu_map__put(cpus);
- fd = sys_perf_event_open(&attr, -1, cpu, -1, 0);
+ fd = sys_perf_event_open(&attr, -1, cpu.cpu, -1, 0);
if (fd < 0)
return false;
close(fd);
diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c
index 5ee47ae1509c..a982e40ee5a9 100644
--- a/tools/perf/util/perf_regs.c
+++ b/tools/perf/util/perf_regs.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <errno.h>
+#include <string.h>
#include "perf_regs.h"
#include "event.h"
@@ -20,11 +21,679 @@ uint64_t __weak arch__user_reg_mask(void)
}
#ifdef HAVE_PERF_REGS_SUPPORT
+
+#define perf_event_arm_regs perf_event_arm64_regs
+#include "../../arch/arm64/include/uapi/asm/perf_regs.h"
+#undef perf_event_arm_regs
+
+#include "../../arch/arm/include/uapi/asm/perf_regs.h"
+#include "../../arch/csky/include/uapi/asm/perf_regs.h"
+#include "../../arch/mips/include/uapi/asm/perf_regs.h"
+#include "../../arch/powerpc/include/uapi/asm/perf_regs.h"
+#include "../../arch/riscv/include/uapi/asm/perf_regs.h"
+#include "../../arch/s390/include/uapi/asm/perf_regs.h"
+#include "../../arch/x86/include/uapi/asm/perf_regs.h"
+
+static const char *__perf_reg_name_arm64(int id)
+{
+ switch (id) {
+ case PERF_REG_ARM64_X0:
+ return "x0";
+ case PERF_REG_ARM64_X1:
+ return "x1";
+ case PERF_REG_ARM64_X2:
+ return "x2";
+ case PERF_REG_ARM64_X3:
+ return "x3";
+ case PERF_REG_ARM64_X4:
+ return "x4";
+ case PERF_REG_ARM64_X5:
+ return "x5";
+ case PERF_REG_ARM64_X6:
+ return "x6";
+ case PERF_REG_ARM64_X7:
+ return "x7";
+ case PERF_REG_ARM64_X8:
+ return "x8";
+ case PERF_REG_ARM64_X9:
+ return "x9";
+ case PERF_REG_ARM64_X10:
+ return "x10";
+ case PERF_REG_ARM64_X11:
+ return "x11";
+ case PERF_REG_ARM64_X12:
+ return "x12";
+ case PERF_REG_ARM64_X13:
+ return "x13";
+ case PERF_REG_ARM64_X14:
+ return "x14";
+ case PERF_REG_ARM64_X15:
+ return "x15";
+ case PERF_REG_ARM64_X16:
+ return "x16";
+ case PERF_REG_ARM64_X17:
+ return "x17";
+ case PERF_REG_ARM64_X18:
+ return "x18";
+ case PERF_REG_ARM64_X19:
+ return "x19";
+ case PERF_REG_ARM64_X20:
+ return "x20";
+ case PERF_REG_ARM64_X21:
+ return "x21";
+ case PERF_REG_ARM64_X22:
+ return "x22";
+ case PERF_REG_ARM64_X23:
+ return "x23";
+ case PERF_REG_ARM64_X24:
+ return "x24";
+ case PERF_REG_ARM64_X25:
+ return "x25";
+ case PERF_REG_ARM64_X26:
+ return "x26";
+ case PERF_REG_ARM64_X27:
+ return "x27";
+ case PERF_REG_ARM64_X28:
+ return "x28";
+ case PERF_REG_ARM64_X29:
+ return "x29";
+ case PERF_REG_ARM64_SP:
+ return "sp";
+ case PERF_REG_ARM64_LR:
+ return "lr";
+ case PERF_REG_ARM64_PC:
+ return "pc";
+ default:
+ return NULL;
+ }
+
+ return NULL;
+}
+
+static const char *__perf_reg_name_arm(int id)
+{
+ switch (id) {
+ case PERF_REG_ARM_R0:
+ return "r0";
+ case PERF_REG_ARM_R1:
+ return "r1";
+ case PERF_REG_ARM_R2:
+ return "r2";
+ case PERF_REG_ARM_R3:
+ return "r3";
+ case PERF_REG_ARM_R4:
+ return "r4";
+ case PERF_REG_ARM_R5:
+ return "r5";
+ case PERF_REG_ARM_R6:
+ return "r6";
+ case PERF_REG_ARM_R7:
+ return "r7";
+ case PERF_REG_ARM_R8:
+ return "r8";
+ case PERF_REG_ARM_R9:
+ return "r9";
+ case PERF_REG_ARM_R10:
+ return "r10";
+ case PERF_REG_ARM_FP:
+ return "fp";
+ case PERF_REG_ARM_IP:
+ return "ip";
+ case PERF_REG_ARM_SP:
+ return "sp";
+ case PERF_REG_ARM_LR:
+ return "lr";
+ case PERF_REG_ARM_PC:
+ return "pc";
+ default:
+ return NULL;
+ }
+
+ return NULL;
+}
+
+static const char *__perf_reg_name_csky(int id)
+{
+ switch (id) {
+ case PERF_REG_CSKY_A0:
+ return "a0";
+ case PERF_REG_CSKY_A1:
+ return "a1";
+ case PERF_REG_CSKY_A2:
+ return "a2";
+ case PERF_REG_CSKY_A3:
+ return "a3";
+ case PERF_REG_CSKY_REGS0:
+ return "regs0";
+ case PERF_REG_CSKY_REGS1:
+ return "regs1";
+ case PERF_REG_CSKY_REGS2:
+ return "regs2";
+ case PERF_REG_CSKY_REGS3:
+ return "regs3";
+ case PERF_REG_CSKY_REGS4:
+ return "regs4";
+ case PERF_REG_CSKY_REGS5:
+ return "regs5";
+ case PERF_REG_CSKY_REGS6:
+ return "regs6";
+ case PERF_REG_CSKY_REGS7:
+ return "regs7";
+ case PERF_REG_CSKY_REGS8:
+ return "regs8";
+ case PERF_REG_CSKY_REGS9:
+ return "regs9";
+ case PERF_REG_CSKY_SP:
+ return "sp";
+ case PERF_REG_CSKY_LR:
+ return "lr";
+ case PERF_REG_CSKY_PC:
+ return "pc";
+#if defined(__CSKYABIV2__)
+ case PERF_REG_CSKY_EXREGS0:
+ return "exregs0";
+ case PERF_REG_CSKY_EXREGS1:
+ return "exregs1";
+ case PERF_REG_CSKY_EXREGS2:
+ return "exregs2";
+ case PERF_REG_CSKY_EXREGS3:
+ return "exregs3";
+ case PERF_REG_CSKY_EXREGS4:
+ return "exregs4";
+ case PERF_REG_CSKY_EXREGS5:
+ return "exregs5";
+ case PERF_REG_CSKY_EXREGS6:
+ return "exregs6";
+ case PERF_REG_CSKY_EXREGS7:
+ return "exregs7";
+ case PERF_REG_CSKY_EXREGS8:
+ return "exregs8";
+ case PERF_REG_CSKY_EXREGS9:
+ return "exregs9";
+ case PERF_REG_CSKY_EXREGS10:
+ return "exregs10";
+ case PERF_REG_CSKY_EXREGS11:
+ return "exregs11";
+ case PERF_REG_CSKY_EXREGS12:
+ return "exregs12";
+ case PERF_REG_CSKY_EXREGS13:
+ return "exregs13";
+ case PERF_REG_CSKY_EXREGS14:
+ return "exregs14";
+ case PERF_REG_CSKY_TLS:
+ return "tls";
+ case PERF_REG_CSKY_HI:
+ return "hi";
+ case PERF_REG_CSKY_LO:
+ return "lo";
+#endif
+ default:
+ return NULL;
+ }
+
+ return NULL;
+}
+
+static const char *__perf_reg_name_mips(int id)
+{
+ switch (id) {
+ case PERF_REG_MIPS_PC:
+ return "PC";
+ case PERF_REG_MIPS_R1:
+ return "$1";
+ case PERF_REG_MIPS_R2:
+ return "$2";
+ case PERF_REG_MIPS_R3:
+ return "$3";
+ case PERF_REG_MIPS_R4:
+ return "$4";
+ case PERF_REG_MIPS_R5:
+ return "$5";
+ case PERF_REG_MIPS_R6:
+ return "$6";
+ case PERF_REG_MIPS_R7:
+ return "$7";
+ case PERF_REG_MIPS_R8:
+ return "$8";
+ case PERF_REG_MIPS_R9:
+ return "$9";
+ case PERF_REG_MIPS_R10:
+ return "$10";
+ case PERF_REG_MIPS_R11:
+ return "$11";
+ case PERF_REG_MIPS_R12:
+ return "$12";
+ case PERF_REG_MIPS_R13:
+ return "$13";
+ case PERF_REG_MIPS_R14:
+ return "$14";
+ case PERF_REG_MIPS_R15:
+ return "$15";
+ case PERF_REG_MIPS_R16:
+ return "$16";
+ case PERF_REG_MIPS_R17:
+ return "$17";
+ case PERF_REG_MIPS_R18:
+ return "$18";
+ case PERF_REG_MIPS_R19:
+ return "$19";
+ case PERF_REG_MIPS_R20:
+ return "$20";
+ case PERF_REG_MIPS_R21:
+ return "$21";
+ case PERF_REG_MIPS_R22:
+ return "$22";
+ case PERF_REG_MIPS_R23:
+ return "$23";
+ case PERF_REG_MIPS_R24:
+ return "$24";
+ case PERF_REG_MIPS_R25:
+ return "$25";
+ case PERF_REG_MIPS_R28:
+ return "$28";
+ case PERF_REG_MIPS_R29:
+ return "$29";
+ case PERF_REG_MIPS_R30:
+ return "$30";
+ case PERF_REG_MIPS_R31:
+ return "$31";
+ default:
+ break;
+ }
+ return NULL;
+}
+
+static const char *__perf_reg_name_powerpc(int id)
+{
+ switch (id) {
+ case PERF_REG_POWERPC_R0:
+ return "r0";
+ case PERF_REG_POWERPC_R1:
+ return "r1";
+ case PERF_REG_POWERPC_R2:
+ return "r2";
+ case PERF_REG_POWERPC_R3:
+ return "r3";
+ case PERF_REG_POWERPC_R4:
+ return "r4";
+ case PERF_REG_POWERPC_R5:
+ return "r5";
+ case PERF_REG_POWERPC_R6:
+ return "r6";
+ case PERF_REG_POWERPC_R7:
+ return "r7";
+ case PERF_REG_POWERPC_R8:
+ return "r8";
+ case PERF_REG_POWERPC_R9:
+ return "r9";
+ case PERF_REG_POWERPC_R10:
+ return "r10";
+ case PERF_REG_POWERPC_R11:
+ return "r11";
+ case PERF_REG_POWERPC_R12:
+ return "r12";
+ case PERF_REG_POWERPC_R13:
+ return "r13";
+ case PERF_REG_POWERPC_R14:
+ return "r14";
+ case PERF_REG_POWERPC_R15:
+ return "r15";
+ case PERF_REG_POWERPC_R16:
+ return "r16";
+ case PERF_REG_POWERPC_R17:
+ return "r17";
+ case PERF_REG_POWERPC_R18:
+ return "r18";
+ case PERF_REG_POWERPC_R19:
+ return "r19";
+ case PERF_REG_POWERPC_R20:
+ return "r20";
+ case PERF_REG_POWERPC_R21:
+ return "r21";
+ case PERF_REG_POWERPC_R22:
+ return "r22";
+ case PERF_REG_POWERPC_R23:
+ return "r23";
+ case PERF_REG_POWERPC_R24:
+ return "r24";
+ case PERF_REG_POWERPC_R25:
+ return "r25";
+ case PERF_REG_POWERPC_R26:
+ return "r26";
+ case PERF_REG_POWERPC_R27:
+ return "r27";
+ case PERF_REG_POWERPC_R28:
+ return "r28";
+ case PERF_REG_POWERPC_R29:
+ return "r29";
+ case PERF_REG_POWERPC_R30:
+ return "r30";
+ case PERF_REG_POWERPC_R31:
+ return "r31";
+ case PERF_REG_POWERPC_NIP:
+ return "nip";
+ case PERF_REG_POWERPC_MSR:
+ return "msr";
+ case PERF_REG_POWERPC_ORIG_R3:
+ return "orig_r3";
+ case PERF_REG_POWERPC_CTR:
+ return "ctr";
+ case PERF_REG_POWERPC_LINK:
+ return "link";
+ case PERF_REG_POWERPC_XER:
+ return "xer";
+ case PERF_REG_POWERPC_CCR:
+ return "ccr";
+ case PERF_REG_POWERPC_SOFTE:
+ return "softe";
+ case PERF_REG_POWERPC_TRAP:
+ return "trap";
+ case PERF_REG_POWERPC_DAR:
+ return "dar";
+ case PERF_REG_POWERPC_DSISR:
+ return "dsisr";
+ case PERF_REG_POWERPC_SIER:
+ return "sier";
+ case PERF_REG_POWERPC_MMCRA:
+ return "mmcra";
+ case PERF_REG_POWERPC_MMCR0:
+ return "mmcr0";
+ case PERF_REG_POWERPC_MMCR1:
+ return "mmcr1";
+ case PERF_REG_POWERPC_MMCR2:
+ return "mmcr2";
+ case PERF_REG_POWERPC_MMCR3:
+ return "mmcr3";
+ case PERF_REG_POWERPC_SIER2:
+ return "sier2";
+ case PERF_REG_POWERPC_SIER3:
+ return "sier3";
+ case PERF_REG_POWERPC_PMC1:
+ return "pmc1";
+ case PERF_REG_POWERPC_PMC2:
+ return "pmc2";
+ case PERF_REG_POWERPC_PMC3:
+ return "pmc3";
+ case PERF_REG_POWERPC_PMC4:
+ return "pmc4";
+ case PERF_REG_POWERPC_PMC5:
+ return "pmc5";
+ case PERF_REG_POWERPC_PMC6:
+ return "pmc6";
+ case PERF_REG_POWERPC_SDAR:
+ return "sdar";
+ case PERF_REG_POWERPC_SIAR:
+ return "siar";
+ default:
+ break;
+ }
+ return NULL;
+}
+
+static const char *__perf_reg_name_riscv(int id)
+{
+ switch (id) {
+ case PERF_REG_RISCV_PC:
+ return "pc";
+ case PERF_REG_RISCV_RA:
+ return "ra";
+ case PERF_REG_RISCV_SP:
+ return "sp";
+ case PERF_REG_RISCV_GP:
+ return "gp";
+ case PERF_REG_RISCV_TP:
+ return "tp";
+ case PERF_REG_RISCV_T0:
+ return "t0";
+ case PERF_REG_RISCV_T1:
+ return "t1";
+ case PERF_REG_RISCV_T2:
+ return "t2";
+ case PERF_REG_RISCV_S0:
+ return "s0";
+ case PERF_REG_RISCV_S1:
+ return "s1";
+ case PERF_REG_RISCV_A0:
+ return "a0";
+ case PERF_REG_RISCV_A1:
+ return "a1";
+ case PERF_REG_RISCV_A2:
+ return "a2";
+ case PERF_REG_RISCV_A3:
+ return "a3";
+ case PERF_REG_RISCV_A4:
+ return "a4";
+ case PERF_REG_RISCV_A5:
+ return "a5";
+ case PERF_REG_RISCV_A6:
+ return "a6";
+ case PERF_REG_RISCV_A7:
+ return "a7";
+ case PERF_REG_RISCV_S2:
+ return "s2";
+ case PERF_REG_RISCV_S3:
+ return "s3";
+ case PERF_REG_RISCV_S4:
+ return "s4";
+ case PERF_REG_RISCV_S5:
+ return "s5";
+ case PERF_REG_RISCV_S6:
+ return "s6";
+ case PERF_REG_RISCV_S7:
+ return "s7";
+ case PERF_REG_RISCV_S8:
+ return "s8";
+ case PERF_REG_RISCV_S9:
+ return "s9";
+ case PERF_REG_RISCV_S10:
+ return "s10";
+ case PERF_REG_RISCV_S11:
+ return "s11";
+ case PERF_REG_RISCV_T3:
+ return "t3";
+ case PERF_REG_RISCV_T4:
+ return "t4";
+ case PERF_REG_RISCV_T5:
+ return "t5";
+ case PERF_REG_RISCV_T6:
+ return "t6";
+ default:
+ return NULL;
+ }
+
+ return NULL;
+}
+
+static const char *__perf_reg_name_s390(int id)
+{
+ switch (id) {
+ case PERF_REG_S390_R0:
+ return "R0";
+ case PERF_REG_S390_R1:
+ return "R1";
+ case PERF_REG_S390_R2:
+ return "R2";
+ case PERF_REG_S390_R3:
+ return "R3";
+ case PERF_REG_S390_R4:
+ return "R4";
+ case PERF_REG_S390_R5:
+ return "R5";
+ case PERF_REG_S390_R6:
+ return "R6";
+ case PERF_REG_S390_R7:
+ return "R7";
+ case PERF_REG_S390_R8:
+ return "R8";
+ case PERF_REG_S390_R9:
+ return "R9";
+ case PERF_REG_S390_R10:
+ return "R10";
+ case PERF_REG_S390_R11:
+ return "R11";
+ case PERF_REG_S390_R12:
+ return "R12";
+ case PERF_REG_S390_R13:
+ return "R13";
+ case PERF_REG_S390_R14:
+ return "R14";
+ case PERF_REG_S390_R15:
+ return "R15";
+ case PERF_REG_S390_FP0:
+ return "FP0";
+ case PERF_REG_S390_FP1:
+ return "FP1";
+ case PERF_REG_S390_FP2:
+ return "FP2";
+ case PERF_REG_S390_FP3:
+ return "FP3";
+ case PERF_REG_S390_FP4:
+ return "FP4";
+ case PERF_REG_S390_FP5:
+ return "FP5";
+ case PERF_REG_S390_FP6:
+ return "FP6";
+ case PERF_REG_S390_FP7:
+ return "FP7";
+ case PERF_REG_S390_FP8:
+ return "FP8";
+ case PERF_REG_S390_FP9:
+ return "FP9";
+ case PERF_REG_S390_FP10:
+ return "FP10";
+ case PERF_REG_S390_FP11:
+ return "FP11";
+ case PERF_REG_S390_FP12:
+ return "FP12";
+ case PERF_REG_S390_FP13:
+ return "FP13";
+ case PERF_REG_S390_FP14:
+ return "FP14";
+ case PERF_REG_S390_FP15:
+ return "FP15";
+ case PERF_REG_S390_MASK:
+ return "MASK";
+ case PERF_REG_S390_PC:
+ return "PC";
+ default:
+ return NULL;
+ }
+
+ return NULL;
+}
+
+static const char *__perf_reg_name_x86(int id)
+{
+ switch (id) {
+ case PERF_REG_X86_AX:
+ return "AX";
+ case PERF_REG_X86_BX:
+ return "BX";
+ case PERF_REG_X86_CX:
+ return "CX";
+ case PERF_REG_X86_DX:
+ return "DX";
+ case PERF_REG_X86_SI:
+ return "SI";
+ case PERF_REG_X86_DI:
+ return "DI";
+ case PERF_REG_X86_BP:
+ return "BP";
+ case PERF_REG_X86_SP:
+ return "SP";
+ case PERF_REG_X86_IP:
+ return "IP";
+ case PERF_REG_X86_FLAGS:
+ return "FLAGS";
+ case PERF_REG_X86_CS:
+ return "CS";
+ case PERF_REG_X86_SS:
+ return "SS";
+ case PERF_REG_X86_DS:
+ return "DS";
+ case PERF_REG_X86_ES:
+ return "ES";
+ case PERF_REG_X86_FS:
+ return "FS";
+ case PERF_REG_X86_GS:
+ return "GS";
+ case PERF_REG_X86_R8:
+ return "R8";
+ case PERF_REG_X86_R9:
+ return "R9";
+ case PERF_REG_X86_R10:
+ return "R10";
+ case PERF_REG_X86_R11:
+ return "R11";
+ case PERF_REG_X86_R12:
+ return "R12";
+ case PERF_REG_X86_R13:
+ return "R13";
+ case PERF_REG_X86_R14:
+ return "R14";
+ case PERF_REG_X86_R15:
+ return "R15";
+
+#define XMM(x) \
+ case PERF_REG_X86_XMM ## x: \
+ case PERF_REG_X86_XMM ## x + 1: \
+ return "XMM" #x;
+ XMM(0)
+ XMM(1)
+ XMM(2)
+ XMM(3)
+ XMM(4)
+ XMM(5)
+ XMM(6)
+ XMM(7)
+ XMM(8)
+ XMM(9)
+ XMM(10)
+ XMM(11)
+ XMM(12)
+ XMM(13)
+ XMM(14)
+ XMM(15)
+#undef XMM
+ default:
+ return NULL;
+ }
+
+ return NULL;
+}
+
+const char *perf_reg_name(int id, const char *arch)
+{
+ const char *reg_name = NULL;
+
+ if (!strcmp(arch, "csky"))
+ reg_name = __perf_reg_name_csky(id);
+ else if (!strcmp(arch, "mips"))
+ reg_name = __perf_reg_name_mips(id);
+ else if (!strcmp(arch, "powerpc"))
+ reg_name = __perf_reg_name_powerpc(id);
+ else if (!strcmp(arch, "riscv"))
+ reg_name = __perf_reg_name_riscv(id);
+ else if (!strcmp(arch, "s390"))
+ reg_name = __perf_reg_name_s390(id);
+ else if (!strcmp(arch, "x86"))
+ reg_name = __perf_reg_name_x86(id);
+ else if (!strcmp(arch, "arm"))
+ reg_name = __perf_reg_name_arm(id);
+ else if (!strcmp(arch, "arm64"))
+ reg_name = __perf_reg_name_arm64(id);
+
+ return reg_name ?: "unknown";
+}
+
int perf_reg_value(u64 *valp, struct regs_dump *regs, int id)
{
int i, idx = 0;
u64 mask = regs->mask;
+ if ((u64)id >= PERF_SAMPLE_REGS_CACHE_SIZE)
+ return -EINVAL;
+
if (regs->cache_mask & (1ULL << id))
goto out;
diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h
index eeac181ebccf..ce1127af05e4 100644
--- a/tools/perf/util/perf_regs.h
+++ b/tools/perf/util/perf_regs.h
@@ -11,8 +11,11 @@ struct sample_reg {
const char *name;
uint64_t mask;
};
-#define SMPL_REG(n, b) { .name = #n, .mask = 1ULL << (b) }
-#define SMPL_REG2(n, b) { .name = #n, .mask = 3ULL << (b) }
+
+#define SMPL_REG_MASK(b) (1ULL << (b))
+#define SMPL_REG(n, b) { .name = #n, .mask = SMPL_REG_MASK(b) }
+#define SMPL_REG2_MASK(b) (3ULL << (b))
+#define SMPL_REG2(n, b) { .name = #n, .mask = SMPL_REG2_MASK(b) }
#define SMPL_REG_END { .name = NULL }
enum {
@@ -31,22 +34,16 @@ extern const struct sample_reg sample_reg_masks[];
#define DWARF_MINIMAL_REGS ((1ULL << PERF_REG_IP) | (1ULL << PERF_REG_SP))
+const char *perf_reg_name(int id, const char *arch);
int perf_reg_value(u64 *valp, struct regs_dump *regs, int id);
-static inline const char *perf_reg_name(int id)
-{
- const char *reg_name = __perf_reg_name(id);
-
- return reg_name ?: "unknown";
-}
-
#else
#define PERF_REGS_MASK 0
#define PERF_REGS_MAX 0
#define DWARF_MINIMAL_REGS PERF_REGS_MASK
-static inline const char *perf_reg_name(int id __maybe_unused)
+static inline const char *perf_reg_name(int id __maybe_unused, const char *arch __maybe_unused)
{
return "unknown";
}
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 6ae58406f4fc..8dfbba15aeb8 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -1659,6 +1659,21 @@ bool is_pmu_core(const char *name)
return !strcmp(name, "cpu") || is_arm_pmu_core(name);
}
+static bool pmu_alias_is_duplicate(struct sevent *alias_a,
+ struct sevent *alias_b)
+{
+ /* Different names -> never duplicates */
+ if (strcmp(alias_a->name, alias_b->name))
+ return false;
+
+ /* Don't remove duplicates for hybrid PMUs */
+ if (perf_pmu__is_hybrid(alias_a->pmu) &&
+ perf_pmu__is_hybrid(alias_b->pmu))
+ return false;
+
+ return true;
+}
+
void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag,
bool long_desc, bool details_flag, bool deprecated,
const char *pmu_name)
@@ -1744,12 +1759,8 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag,
qsort(aliases, len, sizeof(struct sevent), cmp_sevent);
for (j = 0; j < len; j++) {
/* Skip duplicates */
- if (j > 0 && !strcmp(aliases[j].name, aliases[j - 1].name)) {
- if (!aliases[j].pmu || !aliases[j - 1].pmu ||
- !strcmp(aliases[j].pmu, aliases[j - 1].pmu)) {
- continue;
- }
- }
+ if (j > 0 && pmu_alias_is_duplicate(&aliases[j], &aliases[j - 1]))
+ continue;
if (name_only) {
printf("%s ", aliases[j].name);
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index b2a02c9ab8ea..a834918a0a0d 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -3083,6 +3083,9 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
for (j = 0; j < num_matched_functions; j++) {
sym = syms[j];
+ if (sym->type != STT_FUNC)
+ continue;
+
/* There can be duplicated symbols in the map */
for (i = 0; i < j; i++)
if (sym->start == syms[i]->start) {
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 563a9ba8954f..52d8995cfd73 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -428,6 +428,8 @@ tracepoint_field(struct pyrf_event *pe, struct tep_format_field *field)
offset = val;
len = offset >> 16;
offset &= 0xffff;
+ if (field->flags & TEP_FIELD_IS_RELATIVE)
+ offset += field->offset + field->size;
}
if (field->flags & TEP_FIELD_IS_STRING &&
is_printable_array(data + offset, len)) {
@@ -461,7 +463,7 @@ get_tracepoint_field(struct pyrf_event *pevent, PyObject *attr_name)
struct tep_event *tp_format;
tp_format = trace_event__tp_format_id(evsel->core.attr.config);
- if (!tp_format)
+ if (IS_ERR_OR_NULL(tp_format))
return NULL;
evsel->tp_format = tp_format;
@@ -636,17 +638,17 @@ static Py_ssize_t pyrf_cpu_map__length(PyObject *obj)
{
struct pyrf_cpu_map *pcpus = (void *)obj;
- return pcpus->cpus->nr;
+ return perf_cpu_map__nr(pcpus->cpus);
}
static PyObject *pyrf_cpu_map__item(PyObject *obj, Py_ssize_t i)
{
struct pyrf_cpu_map *pcpus = (void *)obj;
- if (i >= pcpus->cpus->nr)
+ if (i >= perf_cpu_map__nr(pcpus->cpus))
return NULL;
- return Py_BuildValue("i", pcpus->cpus->map[i]);
+ return Py_BuildValue("i", perf_cpu_map__cpu(pcpus->cpus, i).cpu);
}
static PySequenceMethods pyrf_cpu_map__sequence_methods = {
@@ -1057,7 +1059,7 @@ static struct mmap *get_md(struct evlist *evlist, int cpu)
for (i = 0; i < evlist->core.nr_mmaps; i++) {
struct mmap *md = &evlist->mmap[i];
- if (md->core.cpu == cpu)
+ if (md->core.cpu.cpu == cpu)
return md;
}
@@ -1443,7 +1445,7 @@ error:
* Dummy, to avoid dragging all the test_attr infrastructure in the python
* binding.
*/
-void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu,
+void test_attr__open(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu,
int fd, int group_fd, unsigned long flags)
{
}
diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
index bff669b615ee..007a64681416 100644
--- a/tools/perf/util/record.c
+++ b/tools/perf/util/record.c
@@ -106,7 +106,7 @@ void evlist__config(struct evlist *evlist, struct record_opts *opts, struct call
if (opts->group)
evlist__set_leader(evlist);
- if (evlist->core.cpus->map[0] < 0)
+ if (perf_cpu_map__cpu(evlist->core.cpus, 0).cpu < 0)
opts->no_inherit = true;
use_comm_exec = perf_can_comm_exec();
@@ -229,7 +229,8 @@ bool evlist__can_select_event(struct evlist *evlist, const char *str)
{
struct evlist *temp_evlist;
struct evsel *evsel;
- int err, fd, cpu;
+ int err, fd;
+ struct perf_cpu cpu = { .cpu = 0 };
bool ret = false;
pid_t pid = -1;
@@ -246,14 +247,16 @@ bool evlist__can_select_event(struct evlist *evlist, const char *str)
if (!evlist || perf_cpu_map__empty(evlist->core.cpus)) {
struct perf_cpu_map *cpus = perf_cpu_map__new(NULL);
- cpu = cpus ? cpus->map[0] : 0;
+ if (cpus)
+ cpu = perf_cpu_map__cpu(cpus, 0);
+
perf_cpu_map__put(cpus);
} else {
- cpu = evlist->core.cpus->map[0];
+ cpu = perf_cpu_map__cpu(evlist->core.cpus, 0);
}
while (1) {
- fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1,
+ fd = sys_perf_event_open(&evsel->core.attr, pid, cpu.cpu, -1,
perf_event_open_cloexec_flag());
if (fd < 0) {
if (pid == -1 && errno == EACCES) {
diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
index 32a721b3e9a5..a5d945415bbc 100644
--- a/tools/perf/util/scripting-engines/trace-event-perl.c
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c
@@ -392,6 +392,8 @@ static void perl_process_tracepoint(struct perf_sample *sample,
if (field->flags & TEP_FIELD_IS_DYNAMIC) {
offset = *(int *)(data + field->offset);
offset &= 0xffff;
+ if (field->flags & TEP_FIELD_IS_RELATIVE)
+ offset += field->offset + field->size;
} else
offset = field->offset;
XPUSHs(sv_2mortal(newSVpv((char *)data + offset, 0)));
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index c0c010350bc2..e752e1f4a5f0 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -36,6 +36,7 @@
#include "../debug.h"
#include "../dso.h"
#include "../callchain.h"
+#include "../env.h"
#include "../evsel.h"
#include "../event.h"
#include "../thread.h"
@@ -687,7 +688,7 @@ static void set_sample_datasrc_in_dict(PyObject *dict,
_PyUnicode_FromString(decode));
}
-static void regs_map(struct regs_dump *regs, uint64_t mask, char *bf, int size)
+static void regs_map(struct regs_dump *regs, uint64_t mask, const char *arch, char *bf, int size)
{
unsigned int i = 0, r;
int printed = 0;
@@ -702,7 +703,7 @@ static void regs_map(struct regs_dump *regs, uint64_t mask, char *bf, int size)
printed += scnprintf(bf + printed, size - printed,
"%5s:0x%" PRIx64 " ",
- perf_reg_name(r), val);
+ perf_reg_name(r, arch), val);
}
}
@@ -711,6 +712,7 @@ static void set_regs_in_dict(PyObject *dict,
struct evsel *evsel)
{
struct perf_event_attr *attr = &evsel->core.attr;
+ const char *arch = perf_env__arch(evsel__env(evsel));
/*
* Here value 28 is a constant size which can be used to print
@@ -722,12 +724,12 @@ static void set_regs_in_dict(PyObject *dict,
int size = __sw_hweight64(attr->sample_regs_intr) * 28;
char bf[size];
- regs_map(&sample->intr_regs, attr->sample_regs_intr, bf, sizeof(bf));
+ regs_map(&sample->intr_regs, attr->sample_regs_intr, arch, bf, sizeof(bf));
pydict_set_item_string_decref(dict, "iregs",
_PyUnicode_FromString(bf));
- regs_map(&sample->user_regs, attr->sample_regs_user, bf, sizeof(bf));
+ regs_map(&sample->user_regs, attr->sample_regs_user, arch, bf, sizeof(bf));
pydict_set_item_string_decref(dict, "uregs",
_PyUnicode_FromString(bf));
@@ -942,6 +944,8 @@ static void python_process_tracepoint(struct perf_sample *sample,
offset = val;
len = offset >> 16;
offset &= 0xffff;
+ if (field->flags & TEP_FIELD_IS_RELATIVE)
+ offset += field->offset + field->size;
}
if (field->flags & TEP_FIELD_IS_STRING &&
is_printable_array(data + offset, len)) {
@@ -1553,7 +1557,7 @@ static void get_handler_name(char *str, size_t size,
}
static void
-process_stat(struct evsel *counter, int cpu, int thread, u64 tstamp,
+process_stat(struct evsel *counter, struct perf_cpu cpu, int thread, u64 tstamp,
struct perf_counts_values *count)
{
PyObject *handler, *t;
@@ -1573,7 +1577,7 @@ process_stat(struct evsel *counter, int cpu, int thread, u64 tstamp,
return;
}
- PyTuple_SetItem(t, n++, _PyLong_FromLong(cpu));
+ PyTuple_SetItem(t, n++, _PyLong_FromLong(cpu.cpu));
PyTuple_SetItem(t, n++, _PyLong_FromLong(thread));
tuple_set_u64(t, n++, tstamp);
@@ -1597,14 +1601,14 @@ static void python_process_stat(struct perf_stat_config *config,
int cpu, thread;
if (config->aggr_mode == AGGR_GLOBAL) {
- process_stat(counter, -1, -1, tstamp,
+ process_stat(counter, (struct perf_cpu){ .cpu = -1 }, -1, tstamp,
&counter->counts->aggr);
return;
}
for (thread = 0; thread < threads->nr; thread++) {
- for (cpu = 0; cpu < cpus->nr; cpu++) {
- process_stat(counter, cpus->map[cpu],
+ for (cpu = 0; cpu < perf_cpu_map__nr(cpus); cpu++) {
+ process_stat(counter, perf_cpu_map__cpu(cpus, cpu),
perf_thread_map__pid(threads, thread), tstamp,
perf_counts(counter->counts, cpu, thread));
}
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index d8857d1b6d7c..2c0d30f08e78 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -15,6 +15,7 @@
#include "map_symbol.h"
#include "branch.h"
#include "debug.h"
+#include "env.h"
#include "evlist.h"
#include "evsel.h"
#include "memswap.h"
@@ -1168,7 +1169,7 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack)
}
}
-static void regs_dump__printf(u64 mask, u64 *regs)
+static void regs_dump__printf(u64 mask, u64 *regs, const char *arch)
{
unsigned rid, i = 0;
@@ -1176,7 +1177,7 @@ static void regs_dump__printf(u64 mask, u64 *regs)
u64 val = regs[i++];
printf(".... %-5s 0x%016" PRIx64 "\n",
- perf_reg_name(rid), val);
+ perf_reg_name(rid, arch), val);
}
}
@@ -1194,7 +1195,7 @@ static inline const char *regs_dump_abi(struct regs_dump *d)
return regs_abi[d->abi];
}
-static void regs__printf(const char *type, struct regs_dump *regs)
+static void regs__printf(const char *type, struct regs_dump *regs, const char *arch)
{
u64 mask = regs->mask;
@@ -1203,23 +1204,23 @@ static void regs__printf(const char *type, struct regs_dump *regs)
mask,
regs_dump_abi(regs));
- regs_dump__printf(mask, regs->regs);
+ regs_dump__printf(mask, regs->regs, arch);
}
-static void regs_user__printf(struct perf_sample *sample)
+static void regs_user__printf(struct perf_sample *sample, const char *arch)
{
struct regs_dump *user_regs = &sample->user_regs;
if (user_regs->regs)
- regs__printf("user", user_regs);
+ regs__printf("user", user_regs, arch);
}
-static void regs_intr__printf(struct perf_sample *sample)
+static void regs_intr__printf(struct perf_sample *sample, const char *arch)
{
struct regs_dump *intr_regs = &sample->intr_regs;
if (intr_regs->regs)
- regs__printf("intr", intr_regs);
+ regs__printf("intr", intr_regs, arch);
}
static void stack_user__printf(struct stack_dump *dump)
@@ -1304,7 +1305,7 @@ char *get_page_size_name(u64 size, char *str)
}
static void dump_sample(struct evsel *evsel, union perf_event *event,
- struct perf_sample *sample)
+ struct perf_sample *sample, const char *arch)
{
u64 sample_type;
char str[PAGE_SIZE_NAME_LEN];
@@ -1325,10 +1326,10 @@ static void dump_sample(struct evsel *evsel, union perf_event *event,
branch_stack__printf(sample, evsel__has_branch_callstack(evsel));
if (sample_type & PERF_SAMPLE_REGS_USER)
- regs_user__printf(sample);
+ regs_user__printf(sample, arch);
if (sample_type & PERF_SAMPLE_REGS_INTR)
- regs_intr__printf(sample);
+ regs_intr__printf(sample, arch);
if (sample_type & PERF_SAMPLE_STACK_USER)
stack_user__printf(&sample->user_stack);
@@ -1502,7 +1503,7 @@ static int machines__deliver_event(struct machines *machines,
++evlist->stats.nr_unknown_id;
return 0;
}
- dump_sample(evsel, event, sample);
+ dump_sample(evsel, event, sample, perf_env__arch(machine->env));
if (machine == NULL) {
++evlist->stats.nr_unprocessable_samples;
return 0;
@@ -2536,16 +2537,16 @@ int perf_session__cpu_bitmap(struct perf_session *session,
return -1;
}
- for (i = 0; i < map->nr; i++) {
- int cpu = map->map[i];
+ for (i = 0; i < perf_cpu_map__nr(map); i++) {
+ struct perf_cpu cpu = perf_cpu_map__cpu(map, i);
- if (cpu >= nr_cpus) {
+ if (cpu.cpu >= nr_cpus) {
pr_err("Requested CPU %d too large. "
- "Consider raising MAX_NR_CPUS\n", cpu);
+ "Consider raising MAX_NR_CPUS\n", cpu.cpu);
goto out_delete_map;
}
- set_bit(cpu, cpu_bitmap);
+ set_bit(cpu.cpu, cpu_bitmap);
}
err = 0;
@@ -2597,7 +2598,7 @@ int perf_event__process_id_index(struct perf_session *session,
if (!sid)
return -ENOENT;
sid->idx = e->idx;
- sid->cpu = e->cpu;
+ sid->cpu.cpu = e->cpu;
sid->tid = e->tid;
}
return 0;
diff --git a/tools/perf/util/smt.c b/tools/perf/util/smt.c
index 20bacd5972ad..2b0a36ebf27a 100644
--- a/tools/perf/util/smt.c
+++ b/tools/perf/util/smt.c
@@ -5,6 +5,56 @@
#include "api/fs/fs.h"
#include "smt.h"
+/**
+ * hweight_str - Returns the number of bits set in str. Stops at first non-hex
+ * or ',' character.
+ */
+static int hweight_str(char *str)
+{
+ int result = 0;
+
+ while (*str) {
+ switch (*str++) {
+ case '0':
+ case ',':
+ break;
+ case '1':
+ case '2':
+ case '4':
+ case '8':
+ result++;
+ break;
+ case '3':
+ case '5':
+ case '6':
+ case '9':
+ case 'a':
+ case 'A':
+ case 'c':
+ case 'C':
+ result += 2;
+ break;
+ case '7':
+ case 'b':
+ case 'B':
+ case 'd':
+ case 'D':
+ case 'e':
+ case 'E':
+ result += 3;
+ break;
+ case 'f':
+ case 'F':
+ result += 4;
+ break;
+ default:
+ goto done;
+ }
+ }
+done:
+ return result;
+}
+
int smt_on(void)
{
static bool cached;
@@ -15,9 +65,12 @@ int smt_on(void)
if (cached)
return cached_result;
- if (sysfs__read_int("devices/system/cpu/smt/active", &cached_result) > 0)
- goto done;
+ if (sysfs__read_int("devices/system/cpu/smt/active", &cached_result) >= 0) {
+ cached = true;
+ return cached_result;
+ }
+ cached_result = 0;
ncpu = sysconf(_SC_NPROCESSORS_CONF);
for (cpu = 0; cpu < ncpu; cpu++) {
unsigned long long siblings;
@@ -26,27 +79,21 @@ int smt_on(void)
char fn[256];
snprintf(fn, sizeof fn,
- "devices/system/cpu/cpu%d/topology/core_cpus", cpu);
+ "devices/system/cpu/cpu%d/topology/thread_siblings", cpu);
if (sysfs__read_str(fn, &str, &strlen) < 0) {
snprintf(fn, sizeof fn,
- "devices/system/cpu/cpu%d/topology/thread_siblings",
- cpu);
+ "devices/system/cpu/cpu%d/topology/core_cpus", cpu);
if (sysfs__read_str(fn, &str, &strlen) < 0)
continue;
}
/* Entry is hex, but does not have 0x, so need custom parser */
- siblings = strtoull(str, NULL, 16);
+ siblings = hweight_str(str);
free(str);
- if (hweight64(siblings) > 1) {
+ if (siblings > 1) {
cached_result = 1;
- cached = true;
break;
}
}
- if (!cached) {
- cached_result = 0;
-done:
- cached = true;
- }
+ cached = true;
return cached_result;
}
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index a111065b484e..cfba8c337783 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -37,7 +37,7 @@ const char default_parent_pattern[] = "^sys_|^do_page_fault";
const char *parent_pattern = default_parent_pattern;
const char *default_sort_order = "comm,dso,symbol";
const char default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles";
-const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,p_stage_cyc";
+const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,local_p_stage_cyc";
const char default_top_sort_order[] = "dso,symbol";
const char default_diff_sort_order[] = "dso,symbol";
const char default_tracepoint_sort_order[] = "trace";
@@ -46,8 +46,8 @@ const char *field_order;
regex_t ignore_callees_regex;
int have_ignore_callees = 0;
enum sort_mode sort__mode = SORT_MODE__NORMAL;
-const char *dynamic_headers[] = {"local_ins_lat", "p_stage_cyc"};
-const char *arch_specific_sort_keys[] = {"p_stage_cyc"};
+static const char *const dynamic_headers[] = {"local_ins_lat", "ins_lat", "local_p_stage_cyc", "p_stage_cyc"};
+static const char *const arch_specific_sort_keys[] = {"local_p_stage_cyc", "p_stage_cyc"};
/*
* Replaces all occurrences of a char used with the:
@@ -1392,22 +1392,37 @@ struct sort_entry sort_global_ins_lat = {
};
static int64_t
-sort__global_p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right)
+sort__p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right)
{
return left->p_stage_cyc - right->p_stage_cyc;
}
+static int hist_entry__global_p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ return repsep_snprintf(bf, size, "%-*u", width,
+ he->p_stage_cyc * he->stat.nr_events);
+}
+
+
static int hist_entry__p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
size_t size, unsigned int width)
{
return repsep_snprintf(bf, size, "%-*u", width, he->p_stage_cyc);
}
-struct sort_entry sort_p_stage_cyc = {
- .se_header = "Pipeline Stage Cycle",
- .se_cmp = sort__global_p_stage_cyc_cmp,
+struct sort_entry sort_local_p_stage_cyc = {
+ .se_header = "Local Pipeline Stage Cycle",
+ .se_cmp = sort__p_stage_cyc_cmp,
.se_snprintf = hist_entry__p_stage_cyc_snprintf,
- .se_width_idx = HISTC_P_STAGE_CYC,
+ .se_width_idx = HISTC_LOCAL_P_STAGE_CYC,
+};
+
+struct sort_entry sort_global_p_stage_cyc = {
+ .se_header = "Pipeline Stage Cycle",
+ .se_cmp = sort__p_stage_cyc_cmp,
+ .se_snprintf = hist_entry__global_p_stage_cyc_snprintf,
+ .se_width_idx = HISTC_GLOBAL_P_STAGE_CYC,
};
struct sort_entry sort_mem_daddr_sym = {
@@ -1858,7 +1873,8 @@ static struct sort_dimension common_sort_dimensions[] = {
DIM(SORT_CODE_PAGE_SIZE, "code_page_size", sort_code_page_size),
DIM(SORT_LOCAL_INS_LAT, "local_ins_lat", sort_local_ins_lat),
DIM(SORT_GLOBAL_INS_LAT, "ins_lat", sort_global_ins_lat),
- DIM(SORT_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_p_stage_cyc),
+ DIM(SORT_LOCAL_PIPELINE_STAGE_CYC, "local_p_stage_cyc", sort_local_p_stage_cyc),
+ DIM(SORT_GLOBAL_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_global_p_stage_cyc),
};
#undef DIM
@@ -2365,6 +2381,8 @@ static int64_t __sort__hde_cmp(struct perf_hpp_fmt *fmt,
tep_read_number_field(field, a->raw_data, &dyn);
offset = dyn & 0xffff;
size = (dyn >> 16) & 0xffff;
+ if (field->flags & TEP_FIELD_IS_RELATIVE)
+ offset += field->offset + field->size;
/* record max width for output */
if (size > hde->dynamic_len)
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 7b7145501933..f994261888e1 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -235,7 +235,8 @@ enum sort_type {
SORT_CODE_PAGE_SIZE,
SORT_LOCAL_INS_LAT,
SORT_GLOBAL_INS_LAT,
- SORT_PIPELINE_STAGE_CYC,
+ SORT_LOCAL_PIPELINE_STAGE_CYC,
+ SORT_GLOBAL_PIPELINE_STAGE_CYC,
/* branch stack specific sort keys */
__SORT_BRANCH_STACK,
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 588601000f3f..5db83e51ceef 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -4,6 +4,7 @@
#include <linux/string.h>
#include <linux/time64.h>
#include <math.h>
+#include <perf/cpumap.h>
#include "color.h"
#include "counts.h"
#include "evlist.h"
@@ -120,11 +121,10 @@ static void aggr_printout(struct perf_stat_config *config,
id.die,
config->csv_output ? 0 : -3,
id.core, config->csv_sep);
- } else if (id.core > -1) {
+ } else if (id.cpu.cpu > -1) {
fprintf(config->output, "CPU%*d%s",
config->csv_output ? 0 : -7,
- evsel__cpus(evsel)->map[id.core],
- config->csv_sep);
+ id.cpu.cpu, config->csv_sep);
}
break;
case AGGR_THREAD:
@@ -327,26 +327,24 @@ static void print_metric_header(struct perf_stat_config *config,
fprintf(os->fh, "%*s ", config->metric_only_len, unit);
}
-static int first_shadow_cpu(struct perf_stat_config *config,
- struct evsel *evsel, struct aggr_cpu_id id)
+static int first_shadow_cpu_map_idx(struct perf_stat_config *config,
+ struct evsel *evsel, const struct aggr_cpu_id *id)
{
- struct evlist *evlist = evsel->evlist;
- int i;
+ struct perf_cpu_map *cpus = evsel__cpus(evsel);
+ struct perf_cpu cpu;
+ int idx;
if (config->aggr_mode == AGGR_NONE)
- return id.core;
+ return perf_cpu_map__idx(cpus, id->cpu);
if (!config->aggr_get_id)
return 0;
- for (i = 0; i < evsel__nr_cpus(evsel); i++) {
- int cpu2 = evsel__cpus(evsel)->map[i];
+ perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
+ struct aggr_cpu_id cpu_id = config->aggr_get_id(config, cpu);
- if (cpu_map__compare_aggr_cpu_id(
- config->aggr_get_id(config, evlist->core.cpus, cpu2),
- id)) {
- return cpu2;
- }
+ if (aggr_cpu_id__equal(&cpu_id, id))
+ return idx;
}
return 0;
}
@@ -505,7 +503,7 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int
}
perf_stat__print_shadow_stats(config, counter, uval,
- first_shadow_cpu(config, counter, id),
+ first_shadow_cpu_map_idx(config, counter, &id),
&out, &config->metric_events, st);
if (!config->csv_output && !config->metric_only) {
print_noise(config, counter, noise);
@@ -516,23 +514,26 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int
static void aggr_update_shadow(struct perf_stat_config *config,
struct evlist *evlist)
{
- int cpu, s;
+ int idx, s;
+ struct perf_cpu cpu;
struct aggr_cpu_id s2, id;
u64 val;
struct evsel *counter;
+ struct perf_cpu_map *cpus;
for (s = 0; s < config->aggr_map->nr; s++) {
id = config->aggr_map->map[s];
evlist__for_each_entry(evlist, counter) {
+ cpus = evsel__cpus(counter);
val = 0;
- for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) {
- s2 = config->aggr_get_id(config, evlist->core.cpus, cpu);
- if (!cpu_map__compare_aggr_cpu_id(s2, id))
+ perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
+ s2 = config->aggr_get_id(config, cpu);
+ if (!aggr_cpu_id__equal(&s2, &id))
continue;
- val += perf_counts(counter->counts, cpu, 0)->val;
+ val += perf_counts(counter->counts, idx, 0)->val;
}
perf_stat__update_shadow_stats(counter, val,
- first_shadow_cpu(config, counter, id),
+ first_shadow_cpu_map_idx(config, counter, &id),
&rt_stat);
}
}
@@ -627,25 +628,28 @@ struct aggr_data {
u64 ena, run, val;
struct aggr_cpu_id id;
int nr;
- int cpu;
+ int cpu_map_idx;
};
static void aggr_cb(struct perf_stat_config *config,
struct evsel *counter, void *data, bool first)
{
struct aggr_data *ad = data;
- int cpu;
+ int idx;
+ struct perf_cpu cpu;
+ struct perf_cpu_map *cpus;
struct aggr_cpu_id s2;
- for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) {
+ cpus = evsel__cpus(counter);
+ perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
struct perf_counts_values *counts;
- s2 = config->aggr_get_id(config, evsel__cpus(counter), cpu);
- if (!cpu_map__compare_aggr_cpu_id(s2, ad->id))
+ s2 = config->aggr_get_id(config, cpu);
+ if (!aggr_cpu_id__equal(&s2, &ad->id))
continue;
if (first)
ad->nr++;
- counts = perf_counts(counter->counts, cpu, 0);
+ counts = perf_counts(counter->counts, idx, 0);
/*
* When any result is bad, make them all to give
* consistent output in interval mode.
@@ -665,7 +669,7 @@ static void aggr_cb(struct perf_stat_config *config,
static void print_counter_aggrdata(struct perf_stat_config *config,
struct evsel *counter, int s,
char *prefix, bool metric_only,
- bool *first, int cpu)
+ bool *first, struct perf_cpu cpu)
{
struct aggr_data ad;
FILE *output = config->output;
@@ -695,10 +699,9 @@ static void print_counter_aggrdata(struct perf_stat_config *config,
fprintf(output, "%s", prefix);
uval = val * counter->scale;
- if (cpu != -1) {
- id = cpu_map__empty_aggr_cpu_id();
- id.core = cpu;
- }
+ if (cpu.cpu != -1)
+ id = aggr_cpu_id__cpu(cpu, /*data=*/NULL);
+
printout(config, id, nr, counter, uval,
prefix, run, ena, 1.0, &rt_stat);
if (!metric_only)
@@ -731,8 +734,8 @@ static void print_aggr(struct perf_stat_config *config,
first = true;
evlist__for_each_entry(evlist, counter) {
print_counter_aggrdata(config, counter, s,
- prefix, metric_only,
- &first, -1);
+ prefix, metric_only,
+ &first, (struct perf_cpu){ .cpu = -1 });
}
if (metric_only)
fputc('\n', output);
@@ -778,7 +781,7 @@ static struct perf_aggr_thread_value *sort_aggr_thread(
continue;
buf[i].counter = counter;
- buf[i].id = cpu_map__empty_aggr_cpu_id();
+ buf[i].id = aggr_cpu_id__empty();
buf[i].id.thread = thread;
buf[i].uval = uval;
buf[i].val = val;
@@ -866,7 +869,7 @@ static void print_counter_aggr(struct perf_stat_config *config,
fprintf(output, "%s", prefix);
uval = cd.avg * counter->scale;
- printout(config, cpu_map__empty_aggr_cpu_id(), 0, counter, uval, prefix, cd.avg_running,
+ printout(config, aggr_cpu_id__empty(), 0, counter, uval, prefix, cd.avg_running,
cd.avg_enabled, cd.avg, &rt_stat);
if (!metric_only)
fprintf(output, "\n");
@@ -878,9 +881,9 @@ static void counter_cb(struct perf_stat_config *config __maybe_unused,
{
struct aggr_data *ad = data;
- ad->val += perf_counts(counter->counts, ad->cpu, 0)->val;
- ad->ena += perf_counts(counter->counts, ad->cpu, 0)->ena;
- ad->run += perf_counts(counter->counts, ad->cpu, 0)->run;
+ ad->val += perf_counts(counter->counts, ad->cpu_map_idx, 0)->val;
+ ad->ena += perf_counts(counter->counts, ad->cpu_map_idx, 0)->ena;
+ ad->run += perf_counts(counter->counts, ad->cpu_map_idx, 0)->run;
}
/*
@@ -893,11 +896,12 @@ static void print_counter(struct perf_stat_config *config,
FILE *output = config->output;
u64 ena, run, val;
double uval;
- int cpu;
+ int idx;
+ struct perf_cpu cpu;
struct aggr_cpu_id id;
- for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) {
- struct aggr_data ad = { .cpu = cpu };
+ perf_cpu_map__for_each_cpu(cpu, idx, evsel__cpus(counter)) {
+ struct aggr_data ad = { .cpu_map_idx = idx };
if (!collect_data(config, counter, counter_cb, &ad))
return;
@@ -909,8 +913,7 @@ static void print_counter(struct perf_stat_config *config,
fprintf(output, "%s", prefix);
uval = val * counter->scale;
- id = cpu_map__empty_aggr_cpu_id();
- id.core = cpu;
+ id = aggr_cpu_id__cpu(cpu, /*data=*/NULL);
printout(config, id, 0, counter, uval, prefix,
run, ena, 1.0, &rt_stat);
@@ -922,29 +925,32 @@ static void print_no_aggr_metric(struct perf_stat_config *config,
struct evlist *evlist,
char *prefix)
{
- int cpu;
- int nrcpus = 0;
- struct evsel *counter;
- u64 ena, run, val;
- double uval;
- struct aggr_cpu_id id;
+ int all_idx;
+ struct perf_cpu cpu;
- nrcpus = evlist->core.cpus->nr;
- for (cpu = 0; cpu < nrcpus; cpu++) {
+ perf_cpu_map__for_each_cpu(cpu, all_idx, evlist->core.cpus) {
+ struct evsel *counter;
bool first = true;
if (prefix)
fputs(prefix, config->output);
evlist__for_each_entry(evlist, counter) {
- id = cpu_map__empty_aggr_cpu_id();
- id.core = cpu;
+ u64 ena, run, val;
+ double uval;
+ struct aggr_cpu_id id;
+ int counter_idx = perf_cpu_map__idx(evsel__cpus(counter), cpu);
+
+ if (counter_idx < 0)
+ continue;
+
+ id = aggr_cpu_id__cpu(cpu, /*data=*/NULL);
if (first) {
aggr_printout(config, counter, id, 0);
first = false;
}
- val = perf_counts(counter->counts, cpu, 0)->val;
- ena = perf_counts(counter->counts, cpu, 0)->ena;
- run = perf_counts(counter->counts, cpu, 0)->run;
+ val = perf_counts(counter->counts, counter_idx, 0)->val;
+ ena = perf_counts(counter->counts, counter_idx, 0)->ena;
+ run = perf_counts(counter->counts, counter_idx, 0)->run;
uval = val * counter->scale;
printout(config, id, 0, counter, uval, prefix,
@@ -1208,19 +1214,23 @@ static void print_percore_thread(struct perf_stat_config *config,
{
int s;
struct aggr_cpu_id s2, id;
+ struct perf_cpu_map *cpus;
bool first = true;
+ int idx;
+ struct perf_cpu cpu;
- for (int i = 0; i < evsel__nr_cpus(counter); i++) {
- s2 = config->aggr_get_id(config, evsel__cpus(counter), i);
+ cpus = evsel__cpus(counter);
+ perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
+ s2 = config->aggr_get_id(config, cpu);
for (s = 0; s < config->aggr_map->nr; s++) {
id = config->aggr_map->map[s];
- if (cpu_map__compare_aggr_cpu_id(s2, id))
+ if (aggr_cpu_id__equal(&s2, &id))
break;
}
print_counter_aggrdata(config, counter, s,
prefix, false,
- &first, i);
+ &first, cpu);
}
}
@@ -1243,8 +1253,8 @@ static void print_percore(struct perf_stat_config *config,
fprintf(output, "%s", prefix);
print_counter_aggrdata(config, counter, s,
- prefix, metric_only,
- &first, -1);
+ prefix, metric_only,
+ &first, (struct perf_cpu){ .cpu = -1 });
}
if (metric_only)
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 5c7308efa768..10af7804e482 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -32,7 +32,7 @@ struct saved_value {
struct evsel *evsel;
enum stat_type type;
int ctx;
- int cpu;
+ int cpu_map_idx;
struct cgroup *cgrp;
struct runtime_stat *stat;
struct stats stats;
@@ -47,8 +47,8 @@ static int saved_value_cmp(struct rb_node *rb_node, const void *entry)
rb_node);
const struct saved_value *b = entry;
- if (a->cpu != b->cpu)
- return a->cpu - b->cpu;
+ if (a->cpu_map_idx != b->cpu_map_idx)
+ return a->cpu_map_idx - b->cpu_map_idx;
/*
* Previously the rbtree was used to link generic metrics.
@@ -105,7 +105,7 @@ static void saved_value_delete(struct rblist *rblist __maybe_unused,
}
static struct saved_value *saved_value_lookup(struct evsel *evsel,
- int cpu,
+ int cpu_map_idx,
bool create,
enum stat_type type,
int ctx,
@@ -115,7 +115,7 @@ static struct saved_value *saved_value_lookup(struct evsel *evsel,
struct rblist *rblist;
struct rb_node *nd;
struct saved_value dm = {
- .cpu = cpu,
+ .cpu_map_idx = cpu_map_idx,
.evsel = evsel,
.type = type,
.ctx = ctx,
@@ -213,10 +213,10 @@ struct runtime_stat_data {
static void update_runtime_stat(struct runtime_stat *st,
enum stat_type type,
- int cpu, u64 count,
+ int cpu_map_idx, u64 count,
struct runtime_stat_data *rsd)
{
- struct saved_value *v = saved_value_lookup(NULL, cpu, true, type,
+ struct saved_value *v = saved_value_lookup(NULL, cpu_map_idx, true, type,
rsd->ctx, st, rsd->cgrp);
if (v)
@@ -229,7 +229,7 @@ static void update_runtime_stat(struct runtime_stat *st,
* instruction rates, etc:
*/
void perf_stat__update_shadow_stats(struct evsel *counter, u64 count,
- int cpu, struct runtime_stat *st)
+ int cpu_map_idx, struct runtime_stat *st)
{
u64 count_ns = count;
struct saved_value *v;
@@ -241,88 +241,88 @@ void perf_stat__update_shadow_stats(struct evsel *counter, u64 count,
count *= counter->scale;
if (evsel__is_clock(counter))
- update_runtime_stat(st, STAT_NSECS, cpu, count_ns, &rsd);
+ update_runtime_stat(st, STAT_NSECS, cpu_map_idx, count_ns, &rsd);
else if (evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
- update_runtime_stat(st, STAT_CYCLES, cpu, count, &rsd);
+ update_runtime_stat(st, STAT_CYCLES, cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
- update_runtime_stat(st, STAT_CYCLES_IN_TX, cpu, count, &rsd);
+ update_runtime_stat(st, STAT_CYCLES_IN_TX, cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TRANSACTION_START))
- update_runtime_stat(st, STAT_TRANSACTION, cpu, count, &rsd);
+ update_runtime_stat(st, STAT_TRANSACTION, cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, ELISION_START))
- update_runtime_stat(st, STAT_ELISION, cpu, count, &rsd);
+ update_runtime_stat(st, STAT_ELISION, cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS))
update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS,
- cpu, count, &rsd);
+ cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED))
update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED,
- cpu, count, &rsd);
+ cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED))
update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED,
- cpu, count, &rsd);
+ cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES))
update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES,
- cpu, count, &rsd);
+ cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES))
update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES,
- cpu, count, &rsd);
+ cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_RETIRING))
update_runtime_stat(st, STAT_TOPDOWN_RETIRING,
- cpu, count, &rsd);
+ cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_BAD_SPEC))
update_runtime_stat(st, STAT_TOPDOWN_BAD_SPEC,
- cpu, count, &rsd);
+ cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_FE_BOUND))
update_runtime_stat(st, STAT_TOPDOWN_FE_BOUND,
- cpu, count, &rsd);
+ cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_BE_BOUND))
update_runtime_stat(st, STAT_TOPDOWN_BE_BOUND,
- cpu, count, &rsd);
+ cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_HEAVY_OPS))
update_runtime_stat(st, STAT_TOPDOWN_HEAVY_OPS,
- cpu, count, &rsd);
+ cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_BR_MISPREDICT))
update_runtime_stat(st, STAT_TOPDOWN_BR_MISPREDICT,
- cpu, count, &rsd);
+ cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_LAT))
update_runtime_stat(st, STAT_TOPDOWN_FETCH_LAT,
- cpu, count, &rsd);
+ cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_MEM_BOUND))
update_runtime_stat(st, STAT_TOPDOWN_MEM_BOUND,
- cpu, count, &rsd);
+ cpu_map_idx, count, &rsd);
else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT,
- cpu, count, &rsd);
+ cpu_map_idx, count, &rsd);
else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
update_runtime_stat(st, STAT_STALLED_CYCLES_BACK,
- cpu, count, &rsd);
+ cpu_map_idx, count, &rsd);
else if (evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
- update_runtime_stat(st, STAT_BRANCHES, cpu, count, &rsd);
+ update_runtime_stat(st, STAT_BRANCHES, cpu_map_idx, count, &rsd);
else if (evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
- update_runtime_stat(st, STAT_CACHEREFS, cpu, count, &rsd);
+ update_runtime_stat(st, STAT_CACHEREFS, cpu_map_idx, count, &rsd);
else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
- update_runtime_stat(st, STAT_L1_DCACHE, cpu, count, &rsd);
+ update_runtime_stat(st, STAT_L1_DCACHE, cpu_map_idx, count, &rsd);
else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
- update_runtime_stat(st, STAT_L1_ICACHE, cpu, count, &rsd);
+ update_runtime_stat(st, STAT_L1_ICACHE, cpu_map_idx, count, &rsd);
else if (evsel__match(counter, HW_CACHE, HW_CACHE_LL))
- update_runtime_stat(st, STAT_LL_CACHE, cpu, count, &rsd);
+ update_runtime_stat(st, STAT_LL_CACHE, cpu_map_idx, count, &rsd);
else if (evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
- update_runtime_stat(st, STAT_DTLB_CACHE, cpu, count, &rsd);
+ update_runtime_stat(st, STAT_DTLB_CACHE, cpu_map_idx, count, &rsd);
else if (evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
- update_runtime_stat(st, STAT_ITLB_CACHE, cpu, count, &rsd);
+ update_runtime_stat(st, STAT_ITLB_CACHE, cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, SMI_NUM))
- update_runtime_stat(st, STAT_SMI_NUM, cpu, count, &rsd);
+ update_runtime_stat(st, STAT_SMI_NUM, cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, APERF))
- update_runtime_stat(st, STAT_APERF, cpu, count, &rsd);
+ update_runtime_stat(st, STAT_APERF, cpu_map_idx, count, &rsd);
if (counter->collect_stat) {
- v = saved_value_lookup(counter, cpu, true, STAT_NONE, 0, st,
+ v = saved_value_lookup(counter, cpu_map_idx, true, STAT_NONE, 0, st,
rsd.cgrp);
update_stats(&v->stats, count);
if (counter->metric_leader)
v->metric_total += count;
} else if (counter->metric_leader) {
v = saved_value_lookup(counter->metric_leader,
- cpu, true, STAT_NONE, 0, st, rsd.cgrp);
+ cpu_map_idx, true, STAT_NONE, 0, st, rsd.cgrp);
v->metric_total += count;
v->metric_other++;
}
@@ -464,12 +464,12 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list)
}
static double runtime_stat_avg(struct runtime_stat *st,
- enum stat_type type, int cpu,
+ enum stat_type type, int cpu_map_idx,
struct runtime_stat_data *rsd)
{
struct saved_value *v;
- v = saved_value_lookup(NULL, cpu, false, type, rsd->ctx, st, rsd->cgrp);
+ v = saved_value_lookup(NULL, cpu_map_idx, false, type, rsd->ctx, st, rsd->cgrp);
if (!v)
return 0.0;
@@ -477,12 +477,12 @@ static double runtime_stat_avg(struct runtime_stat *st,
}
static double runtime_stat_n(struct runtime_stat *st,
- enum stat_type type, int cpu,
+ enum stat_type type, int cpu_map_idx,
struct runtime_stat_data *rsd)
{
struct saved_value *v;
- v = saved_value_lookup(NULL, cpu, false, type, rsd->ctx, st, rsd->cgrp);
+ v = saved_value_lookup(NULL, cpu_map_idx, false, type, rsd->ctx, st, rsd->cgrp);
if (!v)
return 0.0;
@@ -490,7 +490,7 @@ static double runtime_stat_n(struct runtime_stat *st,
}
static void print_stalled_cycles_frontend(struct perf_stat_config *config,
- int cpu, double avg,
+ int cpu_map_idx, double avg,
struct perf_stat_output_ctx *out,
struct runtime_stat *st,
struct runtime_stat_data *rsd)
@@ -498,7 +498,7 @@ static void print_stalled_cycles_frontend(struct perf_stat_config *config,
double total, ratio = 0.0;
const char *color;
- total = runtime_stat_avg(st, STAT_CYCLES, cpu, rsd);
+ total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, rsd);
if (total)
ratio = avg / total * 100.0;
@@ -513,7 +513,7 @@ static void print_stalled_cycles_frontend(struct perf_stat_config *config,
}
static void print_stalled_cycles_backend(struct perf_stat_config *config,
- int cpu, double avg,
+ int cpu_map_idx, double avg,
struct perf_stat_output_ctx *out,
struct runtime_stat *st,
struct runtime_stat_data *rsd)
@@ -521,7 +521,7 @@ static void print_stalled_cycles_backend(struct perf_stat_config *config,
double total, ratio = 0.0;
const char *color;
- total = runtime_stat_avg(st, STAT_CYCLES, cpu, rsd);
+ total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, rsd);
if (total)
ratio = avg / total * 100.0;
@@ -532,7 +532,7 @@ static void print_stalled_cycles_backend(struct perf_stat_config *config,
}
static void print_branch_misses(struct perf_stat_config *config,
- int cpu, double avg,
+ int cpu_map_idx, double avg,
struct perf_stat_output_ctx *out,
struct runtime_stat *st,
struct runtime_stat_data *rsd)
@@ -540,7 +540,7 @@ static void print_branch_misses(struct perf_stat_config *config,
double total, ratio = 0.0;
const char *color;
- total = runtime_stat_avg(st, STAT_BRANCHES, cpu, rsd);
+ total = runtime_stat_avg(st, STAT_BRANCHES, cpu_map_idx, rsd);
if (total)
ratio = avg / total * 100.0;
@@ -551,7 +551,7 @@ static void print_branch_misses(struct perf_stat_config *config,
}
static void print_l1_dcache_misses(struct perf_stat_config *config,
- int cpu, double avg,
+ int cpu_map_idx, double avg,
struct perf_stat_output_ctx *out,
struct runtime_stat *st,
struct runtime_stat_data *rsd)
@@ -559,7 +559,7 @@ static void print_l1_dcache_misses(struct perf_stat_config *config,
double total, ratio = 0.0;
const char *color;
- total = runtime_stat_avg(st, STAT_L1_DCACHE, cpu, rsd);
+ total = runtime_stat_avg(st, STAT_L1_DCACHE, cpu_map_idx, rsd);
if (total)
ratio = avg / total * 100.0;
@@ -570,7 +570,7 @@ static void print_l1_dcache_misses(struct perf_stat_config *config,
}
static void print_l1_icache_misses(struct perf_stat_config *config,
- int cpu, double avg,
+ int cpu_map_idx, double avg,
struct perf_stat_output_ctx *out,
struct runtime_stat *st,
struct runtime_stat_data *rsd)
@@ -578,7 +578,7 @@ static void print_l1_icache_misses(struct perf_stat_config *config,
double total, ratio = 0.0;
const char *color;
- total = runtime_stat_avg(st, STAT_L1_ICACHE, cpu, rsd);
+ total = runtime_stat_avg(st, STAT_L1_ICACHE, cpu_map_idx, rsd);
if (total)
ratio = avg / total * 100.0;
@@ -588,7 +588,7 @@ static void print_l1_icache_misses(struct perf_stat_config *config,
}
static void print_dtlb_cache_misses(struct perf_stat_config *config,
- int cpu, double avg,
+ int cpu_map_idx, double avg,
struct perf_stat_output_ctx *out,
struct runtime_stat *st,
struct runtime_stat_data *rsd)
@@ -596,7 +596,7 @@ static void print_dtlb_cache_misses(struct perf_stat_config *config,
double total, ratio = 0.0;
const char *color;
- total = runtime_stat_avg(st, STAT_DTLB_CACHE, cpu, rsd);
+ total = runtime_stat_avg(st, STAT_DTLB_CACHE, cpu_map_idx, rsd);
if (total)
ratio = avg / total * 100.0;
@@ -606,7 +606,7 @@ static void print_dtlb_cache_misses(struct perf_stat_config *config,
}
static void print_itlb_cache_misses(struct perf_stat_config *config,
- int cpu, double avg,
+ int cpu_map_idx, double avg,
struct perf_stat_output_ctx *out,
struct runtime_stat *st,
struct runtime_stat_data *rsd)
@@ -614,7 +614,7 @@ static void print_itlb_cache_misses(struct perf_stat_config *config,
double total, ratio = 0.0;
const char *color;
- total = runtime_stat_avg(st, STAT_ITLB_CACHE, cpu, rsd);
+ total = runtime_stat_avg(st, STAT_ITLB_CACHE, cpu_map_idx, rsd);
if (total)
ratio = avg / total * 100.0;
@@ -624,7 +624,7 @@ static void print_itlb_cache_misses(struct perf_stat_config *config,
}
static void print_ll_cache_misses(struct perf_stat_config *config,
- int cpu, double avg,
+ int cpu_map_idx, double avg,
struct perf_stat_output_ctx *out,
struct runtime_stat *st,
struct runtime_stat_data *rsd)
@@ -632,7 +632,7 @@ static void print_ll_cache_misses(struct perf_stat_config *config,
double total, ratio = 0.0;
const char *color;
- total = runtime_stat_avg(st, STAT_LL_CACHE, cpu, rsd);
+ total = runtime_stat_avg(st, STAT_LL_CACHE, cpu_map_idx, rsd);
if (total)
ratio = avg / total * 100.0;
@@ -690,61 +690,61 @@ static double sanitize_val(double x)
return x;
}
-static double td_total_slots(int cpu, struct runtime_stat *st,
+static double td_total_slots(int cpu_map_idx, struct runtime_stat *st,
struct runtime_stat_data *rsd)
{
- return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, cpu, rsd);
+ return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, cpu_map_idx, rsd);
}
-static double td_bad_spec(int cpu, struct runtime_stat *st,
+static double td_bad_spec(int cpu_map_idx, struct runtime_stat *st,
struct runtime_stat_data *rsd)
{
double bad_spec = 0;
double total_slots;
double total;
- total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, cpu, rsd) -
- runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, cpu, rsd) +
- runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, cpu, rsd);
+ total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, cpu_map_idx, rsd) -
+ runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, cpu_map_idx, rsd) +
+ runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, cpu_map_idx, rsd);
- total_slots = td_total_slots(cpu, st, rsd);
+ total_slots = td_total_slots(cpu_map_idx, st, rsd);
if (total_slots)
bad_spec = total / total_slots;
return sanitize_val(bad_spec);
}
-static double td_retiring(int cpu, struct runtime_stat *st,
+static double td_retiring(int cpu_map_idx, struct runtime_stat *st,
struct runtime_stat_data *rsd)
{
double retiring = 0;
- double total_slots = td_total_slots(cpu, st, rsd);
+ double total_slots = td_total_slots(cpu_map_idx, st, rsd);
double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED,
- cpu, rsd);
+ cpu_map_idx, rsd);
if (total_slots)
retiring = ret_slots / total_slots;
return retiring;
}
-static double td_fe_bound(int cpu, struct runtime_stat *st,
+static double td_fe_bound(int cpu_map_idx, struct runtime_stat *st,
struct runtime_stat_data *rsd)
{
double fe_bound = 0;
- double total_slots = td_total_slots(cpu, st, rsd);
+ double total_slots = td_total_slots(cpu_map_idx, st, rsd);
double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES,
- cpu, rsd);
+ cpu_map_idx, rsd);
if (total_slots)
fe_bound = fetch_bub / total_slots;
return fe_bound;
}
-static double td_be_bound(int cpu, struct runtime_stat *st,
+static double td_be_bound(int cpu_map_idx, struct runtime_stat *st,
struct runtime_stat_data *rsd)
{
- double sum = (td_fe_bound(cpu, st, rsd) +
- td_bad_spec(cpu, st, rsd) +
- td_retiring(cpu, st, rsd));
+ double sum = (td_fe_bound(cpu_map_idx, st, rsd) +
+ td_bad_spec(cpu_map_idx, st, rsd) +
+ td_retiring(cpu_map_idx, st, rsd));
if (sum == 0)
return 0;
return sanitize_val(1.0 - sum);
@@ -755,15 +755,15 @@ static double td_be_bound(int cpu, struct runtime_stat *st,
* the ratios we need to recreate the sum.
*/
-static double td_metric_ratio(int cpu, enum stat_type type,
+static double td_metric_ratio(int cpu_map_idx, enum stat_type type,
struct runtime_stat *stat,
struct runtime_stat_data *rsd)
{
- double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu, rsd) +
- runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu, rsd) +
- runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu, rsd) +
- runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu, rsd);
- double d = runtime_stat_avg(stat, type, cpu, rsd);
+ double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu_map_idx, rsd) +
+ runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu_map_idx, rsd) +
+ runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu_map_idx, rsd) +
+ runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu_map_idx, rsd);
+ double d = runtime_stat_avg(stat, type, cpu_map_idx, rsd);
if (sum)
return d / sum;
@@ -775,23 +775,23 @@ static double td_metric_ratio(int cpu, enum stat_type type,
* We allow two missing.
*/
-static bool full_td(int cpu, struct runtime_stat *stat,
+static bool full_td(int cpu_map_idx, struct runtime_stat *stat,
struct runtime_stat_data *rsd)
{
int c = 0;
- if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu, rsd) > 0)
+ if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu_map_idx, rsd) > 0)
c++;
- if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu, rsd) > 0)
+ if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu_map_idx, rsd) > 0)
c++;
- if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu, rsd) > 0)
+ if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu_map_idx, rsd) > 0)
c++;
- if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu, rsd) > 0)
+ if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu_map_idx, rsd) > 0)
c++;
return c >= 2;
}
-static void print_smi_cost(struct perf_stat_config *config, int cpu,
+static void print_smi_cost(struct perf_stat_config *config, int cpu_map_idx,
struct perf_stat_output_ctx *out,
struct runtime_stat *st,
struct runtime_stat_data *rsd)
@@ -799,9 +799,9 @@ static void print_smi_cost(struct perf_stat_config *config, int cpu,
double smi_num, aperf, cycles, cost = 0.0;
const char *color = NULL;
- smi_num = runtime_stat_avg(st, STAT_SMI_NUM, cpu, rsd);
- aperf = runtime_stat_avg(st, STAT_APERF, cpu, rsd);
- cycles = runtime_stat_avg(st, STAT_CYCLES, cpu, rsd);
+ smi_num = runtime_stat_avg(st, STAT_SMI_NUM, cpu_map_idx, rsd);
+ aperf = runtime_stat_avg(st, STAT_APERF, cpu_map_idx, rsd);
+ cycles = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, rsd);
if ((cycles == 0) || (aperf == 0))
return;
@@ -818,7 +818,7 @@ static void print_smi_cost(struct perf_stat_config *config, int cpu,
static int prepare_metric(struct evsel **metric_events,
struct metric_ref *metric_refs,
struct expr_parse_ctx *pctx,
- int cpu,
+ int cpu_map_idx,
struct runtime_stat *st)
{
double scale;
@@ -836,7 +836,7 @@ static int prepare_metric(struct evsel **metric_events,
scale = 1e-9;
source_count = 1;
} else {
- v = saved_value_lookup(metric_events[i], cpu, false,
+ v = saved_value_lookup(metric_events[i], cpu_map_idx, false,
STAT_NONE, 0, st,
metric_events[i]->cgrp);
if (!v)
@@ -874,7 +874,7 @@ static void generic_metric(struct perf_stat_config *config,
const char *metric_name,
const char *metric_unit,
int runtime,
- int cpu,
+ int cpu_map_idx,
struct perf_stat_output_ctx *out,
struct runtime_stat *st)
{
@@ -889,7 +889,7 @@ static void generic_metric(struct perf_stat_config *config,
return;
pctx->runtime = runtime;
- i = prepare_metric(metric_events, metric_refs, pctx, cpu, st);
+ i = prepare_metric(metric_events, metric_refs, pctx, cpu_map_idx, st);
if (i < 0) {
expr__ctx_free(pctx);
return;
@@ -934,7 +934,7 @@ static void generic_metric(struct perf_stat_config *config,
expr__ctx_free(pctx);
}
-double test_generic_metric(struct metric_expr *mexp, int cpu, struct runtime_stat *st)
+double test_generic_metric(struct metric_expr *mexp, int cpu_map_idx, struct runtime_stat *st)
{
struct expr_parse_ctx *pctx;
double ratio = 0.0;
@@ -943,7 +943,7 @@ double test_generic_metric(struct metric_expr *mexp, int cpu, struct runtime_sta
if (!pctx)
return NAN;
- if (prepare_metric(mexp->metric_events, mexp->metric_refs, pctx, cpu, st) < 0)
+ if (prepare_metric(mexp->metric_events, mexp->metric_refs, pctx, cpu_map_idx, st) < 0)
goto out;
if (expr__parse(&ratio, pctx, mexp->metric_expr))
@@ -956,7 +956,7 @@ out:
void perf_stat__print_shadow_stats(struct perf_stat_config *config,
struct evsel *evsel,
- double avg, int cpu,
+ double avg, int cpu_map_idx,
struct perf_stat_output_ctx *out,
struct rblist *metric_events,
struct runtime_stat *st)
@@ -975,7 +975,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
if (config->iostat_run) {
iostat_print_metric(config, evsel, out);
} else if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
- total = runtime_stat_avg(st, STAT_CYCLES, cpu, &rsd);
+ total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, &rsd);
if (total) {
ratio = avg / total;
@@ -985,11 +985,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0);
}
- total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, cpu, &rsd);
+ total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, cpu_map_idx, &rsd);
total = max(total, runtime_stat_avg(st,
STAT_STALLED_CYCLES_BACK,
- cpu, &rsd));
+ cpu_map_idx, &rsd));
if (total && avg) {
out->new_line(config, ctxp);
@@ -999,8 +999,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
ratio);
}
} else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
- if (runtime_stat_n(st, STAT_BRANCHES, cpu, &rsd) != 0)
- print_branch_misses(config, cpu, avg, out, st, &rsd);
+ if (runtime_stat_n(st, STAT_BRANCHES, cpu_map_idx, &rsd) != 0)
+ print_branch_misses(config, cpu_map_idx, avg, out, st, &rsd);
else
print_metric(config, ctxp, NULL, NULL, "of all branches", 0);
} else if (
@@ -1009,8 +1009,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
- if (runtime_stat_n(st, STAT_L1_DCACHE, cpu, &rsd) != 0)
- print_l1_dcache_misses(config, cpu, avg, out, st, &rsd);
+ if (runtime_stat_n(st, STAT_L1_DCACHE, cpu_map_idx, &rsd) != 0)
+ print_l1_dcache_misses(config, cpu_map_idx, avg, out, st, &rsd);
else
print_metric(config, ctxp, NULL, NULL, "of all L1-dcache accesses", 0);
} else if (
@@ -1019,8 +1019,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
- if (runtime_stat_n(st, STAT_L1_ICACHE, cpu, &rsd) != 0)
- print_l1_icache_misses(config, cpu, avg, out, st, &rsd);
+ if (runtime_stat_n(st, STAT_L1_ICACHE, cpu_map_idx, &rsd) != 0)
+ print_l1_icache_misses(config, cpu_map_idx, avg, out, st, &rsd);
else
print_metric(config, ctxp, NULL, NULL, "of all L1-icache accesses", 0);
} else if (
@@ -1029,8 +1029,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
- if (runtime_stat_n(st, STAT_DTLB_CACHE, cpu, &rsd) != 0)
- print_dtlb_cache_misses(config, cpu, avg, out, st, &rsd);
+ if (runtime_stat_n(st, STAT_DTLB_CACHE, cpu_map_idx, &rsd) != 0)
+ print_dtlb_cache_misses(config, cpu_map_idx, avg, out, st, &rsd);
else
print_metric(config, ctxp, NULL, NULL, "of all dTLB cache accesses", 0);
} else if (
@@ -1039,8 +1039,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
- if (runtime_stat_n(st, STAT_ITLB_CACHE, cpu, &rsd) != 0)
- print_itlb_cache_misses(config, cpu, avg, out, st, &rsd);
+ if (runtime_stat_n(st, STAT_ITLB_CACHE, cpu_map_idx, &rsd) != 0)
+ print_itlb_cache_misses(config, cpu_map_idx, avg, out, st, &rsd);
else
print_metric(config, ctxp, NULL, NULL, "of all iTLB cache accesses", 0);
} else if (
@@ -1049,27 +1049,27 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
- if (runtime_stat_n(st, STAT_LL_CACHE, cpu, &rsd) != 0)
- print_ll_cache_misses(config, cpu, avg, out, st, &rsd);
+ if (runtime_stat_n(st, STAT_LL_CACHE, cpu_map_idx, &rsd) != 0)
+ print_ll_cache_misses(config, cpu_map_idx, avg, out, st, &rsd);
else
print_metric(config, ctxp, NULL, NULL, "of all LL-cache accesses", 0);
} else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) {
- total = runtime_stat_avg(st, STAT_CACHEREFS, cpu, &rsd);
+ total = runtime_stat_avg(st, STAT_CACHEREFS, cpu_map_idx, &rsd);
if (total)
ratio = avg * 100 / total;
- if (runtime_stat_n(st, STAT_CACHEREFS, cpu, &rsd) != 0)
+ if (runtime_stat_n(st, STAT_CACHEREFS, cpu_map_idx, &rsd) != 0)
print_metric(config, ctxp, NULL, "%8.3f %%",
"of all cache refs", ratio);
else
print_metric(config, ctxp, NULL, NULL, "of all cache refs", 0);
} else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
- print_stalled_cycles_frontend(config, cpu, avg, out, st, &rsd);
+ print_stalled_cycles_frontend(config, cpu_map_idx, avg, out, st, &rsd);
} else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
- print_stalled_cycles_backend(config, cpu, avg, out, st, &rsd);
+ print_stalled_cycles_backend(config, cpu_map_idx, avg, out, st, &rsd);
} else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
- total = runtime_stat_avg(st, STAT_NSECS, cpu, &rsd);
+ total = runtime_stat_avg(st, STAT_NSECS, cpu_map_idx, &rsd);
if (total) {
ratio = avg / total;
@@ -1078,7 +1078,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, NULL, NULL, "Ghz", 0);
}
} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
- total = runtime_stat_avg(st, STAT_CYCLES, cpu, &rsd);
+ total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, &rsd);
if (total)
print_metric(config, ctxp, NULL,
@@ -1088,8 +1088,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, NULL, NULL, "transactional cycles",
0);
} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
- total = runtime_stat_avg(st, STAT_CYCLES, cpu, &rsd);
- total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu, &rsd);
+ total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, &rsd);
+ total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd);
if (total2 < avg)
total2 = avg;
@@ -1099,19 +1099,19 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
else
print_metric(config, ctxp, NULL, NULL, "aborted cycles", 0);
} else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) {
- total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu, &rsd);
+ total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd);
if (avg)
ratio = total / avg;
- if (runtime_stat_n(st, STAT_CYCLES_IN_TX, cpu, &rsd) != 0)
+ if (runtime_stat_n(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd) != 0)
print_metric(config, ctxp, NULL, "%8.0f",
"cycles / transaction", ratio);
else
print_metric(config, ctxp, NULL, NULL, "cycles / transaction",
0);
} else if (perf_stat_evsel__is(evsel, ELISION_START)) {
- total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu, &rsd);
+ total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd);
if (avg)
ratio = total / avg;
@@ -1124,28 +1124,28 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
else
print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) {
- double fe_bound = td_fe_bound(cpu, st, &rsd);
+ double fe_bound = td_fe_bound(cpu_map_idx, st, &rsd);
if (fe_bound > 0.2)
color = PERF_COLOR_RED;
print_metric(config, ctxp, color, "%8.1f%%", "frontend bound",
fe_bound * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) {
- double retiring = td_retiring(cpu, st, &rsd);
+ double retiring = td_retiring(cpu_map_idx, st, &rsd);
if (retiring > 0.7)
color = PERF_COLOR_GREEN;
print_metric(config, ctxp, color, "%8.1f%%", "retiring",
retiring * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) {
- double bad_spec = td_bad_spec(cpu, st, &rsd);
+ double bad_spec = td_bad_spec(cpu_map_idx, st, &rsd);
if (bad_spec > 0.1)
color = PERF_COLOR_RED;
print_metric(config, ctxp, color, "%8.1f%%", "bad speculation",
bad_spec * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) {
- double be_bound = td_be_bound(cpu, st, &rsd);
+ double be_bound = td_be_bound(cpu_map_idx, st, &rsd);
const char *name = "backend bound";
static int have_recovery_bubbles = -1;
@@ -1158,14 +1158,14 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
if (be_bound > 0.2)
color = PERF_COLOR_RED;
- if (td_total_slots(cpu, st, &rsd) > 0)
+ if (td_total_slots(cpu_map_idx, st, &rsd) > 0)
print_metric(config, ctxp, color, "%8.1f%%", name,
be_bound * 100.);
else
print_metric(config, ctxp, NULL, NULL, name, 0);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_RETIRING) &&
- full_td(cpu, st, &rsd)) {
- double retiring = td_metric_ratio(cpu,
+ full_td(cpu_map_idx, st, &rsd)) {
+ double retiring = td_metric_ratio(cpu_map_idx,
STAT_TOPDOWN_RETIRING, st,
&rsd);
if (retiring > 0.7)
@@ -1173,8 +1173,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, color, "%8.1f%%", "retiring",
retiring * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_FE_BOUND) &&
- full_td(cpu, st, &rsd)) {
- double fe_bound = td_metric_ratio(cpu,
+ full_td(cpu_map_idx, st, &rsd)) {
+ double fe_bound = td_metric_ratio(cpu_map_idx,
STAT_TOPDOWN_FE_BOUND, st,
&rsd);
if (fe_bound > 0.2)
@@ -1182,8 +1182,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, color, "%8.1f%%", "frontend bound",
fe_bound * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_BE_BOUND) &&
- full_td(cpu, st, &rsd)) {
- double be_bound = td_metric_ratio(cpu,
+ full_td(cpu_map_idx, st, &rsd)) {
+ double be_bound = td_metric_ratio(cpu_map_idx,
STAT_TOPDOWN_BE_BOUND, st,
&rsd);
if (be_bound > 0.2)
@@ -1191,8 +1191,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, color, "%8.1f%%", "backend bound",
be_bound * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_BAD_SPEC) &&
- full_td(cpu, st, &rsd)) {
- double bad_spec = td_metric_ratio(cpu,
+ full_td(cpu_map_idx, st, &rsd)) {
+ double bad_spec = td_metric_ratio(cpu_map_idx,
STAT_TOPDOWN_BAD_SPEC, st,
&rsd);
if (bad_spec > 0.1)
@@ -1200,11 +1200,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, color, "%8.1f%%", "bad speculation",
bad_spec * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_HEAVY_OPS) &&
- full_td(cpu, st, &rsd) && (config->topdown_level > 1)) {
- double retiring = td_metric_ratio(cpu,
+ full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) {
+ double retiring = td_metric_ratio(cpu_map_idx,
STAT_TOPDOWN_RETIRING, st,
&rsd);
- double heavy_ops = td_metric_ratio(cpu,
+ double heavy_ops = td_metric_ratio(cpu_map_idx,
STAT_TOPDOWN_HEAVY_OPS, st,
&rsd);
double light_ops = retiring - heavy_ops;
@@ -1220,11 +1220,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, color, "%8.1f%%", "light operations",
light_ops * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_BR_MISPREDICT) &&
- full_td(cpu, st, &rsd) && (config->topdown_level > 1)) {
- double bad_spec = td_metric_ratio(cpu,
+ full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) {
+ double bad_spec = td_metric_ratio(cpu_map_idx,
STAT_TOPDOWN_BAD_SPEC, st,
&rsd);
- double br_mis = td_metric_ratio(cpu,
+ double br_mis = td_metric_ratio(cpu_map_idx,
STAT_TOPDOWN_BR_MISPREDICT, st,
&rsd);
double m_clears = bad_spec - br_mis;
@@ -1240,11 +1240,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, color, "%8.1f%%", "machine clears",
m_clears * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_LAT) &&
- full_td(cpu, st, &rsd) && (config->topdown_level > 1)) {
- double fe_bound = td_metric_ratio(cpu,
+ full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) {
+ double fe_bound = td_metric_ratio(cpu_map_idx,
STAT_TOPDOWN_FE_BOUND, st,
&rsd);
- double fetch_lat = td_metric_ratio(cpu,
+ double fetch_lat = td_metric_ratio(cpu_map_idx,
STAT_TOPDOWN_FETCH_LAT, st,
&rsd);
double fetch_bw = fe_bound - fetch_lat;
@@ -1260,11 +1260,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, color, "%8.1f%%", "fetch bandwidth",
fetch_bw * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_MEM_BOUND) &&
- full_td(cpu, st, &rsd) && (config->topdown_level > 1)) {
- double be_bound = td_metric_ratio(cpu,
+ full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) {
+ double be_bound = td_metric_ratio(cpu_map_idx,
STAT_TOPDOWN_BE_BOUND, st,
&rsd);
- double mem_bound = td_metric_ratio(cpu,
+ double mem_bound = td_metric_ratio(cpu_map_idx,
STAT_TOPDOWN_MEM_BOUND, st,
&rsd);
double core_bound = be_bound - mem_bound;
@@ -1281,12 +1281,12 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
core_bound * 100.);
} else if (evsel->metric_expr) {
generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL,
- evsel->name, evsel->metric_name, NULL, 1, cpu, out, st);
- } else if (runtime_stat_n(st, STAT_NSECS, cpu, &rsd) != 0) {
+ evsel->name, evsel->metric_name, NULL, 1, cpu_map_idx, out, st);
+ } else if (runtime_stat_n(st, STAT_NSECS, cpu_map_idx, &rsd) != 0) {
char unit = ' ';
char unit_buf[10] = "/sec";
- total = runtime_stat_avg(st, STAT_NSECS, cpu, &rsd);
+ total = runtime_stat_avg(st, STAT_NSECS, cpu_map_idx, &rsd);
if (total)
ratio = convert_unit_double(1000000000.0 * avg / total, &unit);
@@ -1294,7 +1294,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio);
} else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
- print_smi_cost(config, cpu, out, st, &rsd);
+ print_smi_cost(config, cpu_map_idx, out, st, &rsd);
} else {
num = 0;
}
@@ -1307,7 +1307,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
out->new_line(config, ctxp);
generic_metric(config, mexp->metric_expr, mexp->metric_events,
mexp->metric_refs, evsel->name, mexp->metric_name,
- mexp->metric_unit, mexp->runtime, cpu, out, st);
+ mexp->metric_unit, mexp->runtime, cpu_map_idx, out, st);
}
}
if (num == 0)
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 09ea334586f2..ee6f03481215 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -152,11 +152,13 @@ static void evsel__free_stat_priv(struct evsel *evsel)
zfree(&evsel->stats);
}
-static int evsel__alloc_prev_raw_counts(struct evsel *evsel, int ncpus, int nthreads)
+static int evsel__alloc_prev_raw_counts(struct evsel *evsel)
{
+ int cpu_map_nr = evsel__nr_cpus(evsel);
+ int nthreads = perf_thread_map__nr(evsel->core.threads);
struct perf_counts *counts;
- counts = perf_counts__new(ncpus, nthreads);
+ counts = perf_counts__new(cpu_map_nr, nthreads);
if (counts)
evsel->prev_raw_counts = counts;
@@ -177,12 +179,9 @@ static void evsel__reset_prev_raw_counts(struct evsel *evsel)
static int evsel__alloc_stats(struct evsel *evsel, bool alloc_raw)
{
- int ncpus = evsel__nr_cpus(evsel);
- int nthreads = perf_thread_map__nr(evsel->core.threads);
-
if (evsel__alloc_stat_priv(evsel) < 0 ||
- evsel__alloc_counts(evsel, ncpus, nthreads) < 0 ||
- (alloc_raw && evsel__alloc_prev_raw_counts(evsel, ncpus, nthreads) < 0))
+ evsel__alloc_counts(evsel) < 0 ||
+ (alloc_raw && evsel__alloc_prev_raw_counts(evsel) < 0))
return -ENOMEM;
return 0;
@@ -293,11 +292,12 @@ static bool pkg_id_equal(const void *__key1, const void *__key2,
return *key1 == *key2;
}
-static int check_per_pkg(struct evsel *counter,
- struct perf_counts_values *vals, int cpu, bool *skip)
+static int check_per_pkg(struct evsel *counter, struct perf_counts_values *vals,
+ int cpu_map_idx, bool *skip)
{
struct hashmap *mask = counter->per_pkg_mask;
struct perf_cpu_map *cpus = evsel__cpus(counter);
+ struct perf_cpu cpu = perf_cpu_map__cpu(cpus, cpu_map_idx);
int s, d, ret = 0;
uint64_t *key;
@@ -328,7 +328,7 @@ static int check_per_pkg(struct evsel *counter,
if (!(vals->run && vals->ena))
return 0;
- s = cpu_map__get_socket(cpus, cpu, NULL).socket;
+ s = cpu__get_socket_id(cpu);
if (s < 0)
return -1;
@@ -336,7 +336,7 @@ static int check_per_pkg(struct evsel *counter,
* On multi-die system, die_id > 0. On no-die system, die_id = 0.
* We use hashmap(socket, die) to check the used socket+die pair.
*/
- d = cpu_map__get_die(cpus, cpu, NULL).die;
+ d = cpu__get_die_id(cpu);
if (d < 0)
return -1;
@@ -345,9 +345,10 @@ static int check_per_pkg(struct evsel *counter,
return -ENOMEM;
*key = (uint64_t)d << 32 | s;
- if (hashmap__find(mask, (void *)key, NULL))
+ if (hashmap__find(mask, (void *)key, NULL)) {
*skip = true;
- else
+ free(key);
+ } else
ret = hashmap__add(mask, (void *)key, (void *)1);
return ret;
@@ -355,14 +356,14 @@ static int check_per_pkg(struct evsel *counter,
static int
process_counter_values(struct perf_stat_config *config, struct evsel *evsel,
- int cpu, int thread,
+ int cpu_map_idx, int thread,
struct perf_counts_values *count)
{
struct perf_counts_values *aggr = &evsel->counts->aggr;
static struct perf_counts_values zero;
bool skip = false;
- if (check_per_pkg(evsel, count, cpu, &skip)) {
+ if (check_per_pkg(evsel, count, cpu_map_idx, &skip)) {
pr_err("failed to read per-pkg counter\n");
return -1;
}
@@ -378,11 +379,11 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel,
case AGGR_NODE:
case AGGR_NONE:
if (!evsel->snapshot)
- evsel__compute_deltas(evsel, cpu, thread, count);
+ evsel__compute_deltas(evsel, cpu_map_idx, thread, count);
perf_counts_values__scale(count, config->scale, NULL);
if ((config->aggr_mode == AGGR_NONE) && (!evsel->percore)) {
perf_stat__update_shadow_stats(evsel, count->val,
- cpu, &rt_stat);
+ cpu_map_idx, &rt_stat);
}
if (config->aggr_mode == AGGR_THREAD) {
@@ -411,15 +412,15 @@ static int process_counter_maps(struct perf_stat_config *config,
{
int nthreads = perf_thread_map__nr(counter->core.threads);
int ncpus = evsel__nr_cpus(counter);
- int cpu, thread;
+ int idx, thread;
if (counter->core.system_wide)
nthreads = 1;
for (thread = 0; thread < nthreads; thread++) {
- for (cpu = 0; cpu < ncpus; cpu++) {
- if (process_counter_values(config, counter, cpu, thread,
- perf_counts(counter->counts, cpu, thread)))
+ for (idx = 0; idx < ncpus; idx++) {
+ if (process_counter_values(config, counter, idx, thread,
+ perf_counts(counter->counts, idx, thread)))
return -1;
}
}
@@ -531,7 +532,7 @@ size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp)
int create_perf_stat_counter(struct evsel *evsel,
struct perf_stat_config *config,
struct target *target,
- int cpu)
+ int cpu_map_idx)
{
struct perf_event_attr *attr = &evsel->core.attr;
struct evsel *leader = evsel__leader(evsel);
@@ -585,7 +586,7 @@ int create_perf_stat_counter(struct evsel *evsel,
}
if (target__has_cpu(target) && !target__has_per_thread(target))
- return evsel__open_per_cpu(evsel, evsel__cpus(evsel), cpu);
+ return evsel__open_per_cpu(evsel, evsel__cpus(evsel), cpu_map_idx);
return evsel__open_per_thread(evsel, evsel->core.threads);
}
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 32c8527de347..335d19cc3063 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -108,8 +108,7 @@ struct runtime_stat {
struct rblist value_list;
};
-typedef struct aggr_cpu_id (*aggr_get_id_t)(struct perf_stat_config *config,
- struct perf_cpu_map *m, int cpu);
+typedef struct aggr_cpu_id (*aggr_get_id_t)(struct perf_stat_config *config, struct perf_cpu cpu);
struct perf_stat_config {
enum aggr_mode aggr_mode;
@@ -209,7 +208,7 @@ void perf_stat__init_shadow_stats(void);
void perf_stat__reset_shadow_stats(void);
void perf_stat__reset_shadow_per_stat(struct runtime_stat *st);
void perf_stat__update_shadow_stats(struct evsel *counter, u64 count,
- int cpu, struct runtime_stat *st);
+ int cpu_map_idx, struct runtime_stat *st);
struct perf_stat_output_ctx {
void *ctx;
print_metric_t print_metric;
@@ -249,10 +248,10 @@ size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp);
int create_perf_stat_counter(struct evsel *evsel,
struct perf_stat_config *config,
struct target *target,
- int cpu);
+ int cpu_map_idx);
void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *config,
struct target *_target, struct timespec *ts, int argc, const char **argv);
struct metric_expr;
-double test_generic_metric(struct metric_expr *mexp, int cpu, struct runtime_stat *st);
+double test_generic_metric(struct metric_expr *mexp, int cpu_map_idx, struct runtime_stat *st);
#endif
diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c
index 96f941e01681..1e0c731fc539 100644
--- a/tools/perf/util/svghelper.c
+++ b/tools/perf/util/svghelper.c
@@ -728,20 +728,20 @@ static int str_to_bitmap(char *s, cpumask_t *b, int nr_cpus)
int i;
int ret = 0;
struct perf_cpu_map *m;
- int c;
+ struct perf_cpu c;
m = perf_cpu_map__new(s);
if (!m)
return -1;
- for (i = 0; i < m->nr; i++) {
- c = m->map[i];
- if (c >= nr_cpus) {
+ for (i = 0; i < perf_cpu_map__nr(m); i++) {
+ c = perf_cpu_map__cpu(m, i);
+ if (c.cpu >= nr_cpus) {
ret = -1;
break;
}
- set_bit(c, cpumask_bits(b));
+ set_bit(c.cpu, cpumask_bits(b));
}
perf_cpu_map__put(m);
diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
index 198982109f0f..70f095624a0b 100644
--- a/tools/perf/util/synthetic-events.c
+++ b/tools/perf/util/synthetic-events.c
@@ -1186,12 +1186,12 @@ int perf_event__synthesize_thread_map2(struct perf_tool *tool,
static void synthesize_cpus(struct cpu_map_entries *cpus,
struct perf_cpu_map *map)
{
- int i;
+ int i, map_nr = perf_cpu_map__nr(map);
- cpus->nr = map->nr;
+ cpus->nr = map_nr;
- for (i = 0; i < map->nr; i++)
- cpus->cpu[i] = map->map[i];
+ for (i = 0; i < map_nr; i++)
+ cpus->cpu[i] = perf_cpu_map__cpu(map, i).cpu;
}
static void synthesize_mask(struct perf_record_record_cpu_map *mask,
@@ -1202,13 +1202,13 @@ static void synthesize_mask(struct perf_record_record_cpu_map *mask,
mask->nr = BITS_TO_LONGS(max);
mask->long_size = sizeof(long);
- for (i = 0; i < map->nr; i++)
- set_bit(map->map[i], mask->mask);
+ for (i = 0; i < perf_cpu_map__nr(map); i++)
+ set_bit(perf_cpu_map__cpu(map, i).cpu, mask->mask);
}
static size_t cpus_size(struct perf_cpu_map *map)
{
- return sizeof(struct cpu_map_entries) + map->nr * sizeof(u16);
+ return sizeof(struct cpu_map_entries) + perf_cpu_map__nr(map) * sizeof(u16);
}
static size_t mask_size(struct perf_cpu_map *map, int *max)
@@ -1217,9 +1217,9 @@ static size_t mask_size(struct perf_cpu_map *map, int *max)
*max = 0;
- for (i = 0; i < map->nr; i++) {
+ for (i = 0; i < perf_cpu_map__nr(map); i++) {
/* bit position of the cpu is + 1 */
- int bit = map->map[i] + 1;
+ int bit = perf_cpu_map__cpu(map, i).cpu + 1;
if (bit > *max)
*max = bit;
@@ -1354,7 +1354,7 @@ int perf_event__synthesize_stat_config(struct perf_tool *tool,
}
int perf_event__synthesize_stat(struct perf_tool *tool,
- u32 cpu, u32 thread, u64 id,
+ struct perf_cpu cpu, u32 thread, u64 id,
struct perf_counts_values *count,
perf_event__handler_t process,
struct machine *machine)
@@ -1366,7 +1366,7 @@ int perf_event__synthesize_stat(struct perf_tool *tool,
event.header.misc = 0;
event.id = id;
- event.cpu = cpu;
+ event.cpu = cpu.cpu;
event.thread = thread;
event.val = count->val;
event.ena = count->ena;
@@ -1763,7 +1763,7 @@ int perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_
}
e->idx = sid->idx;
- e->cpu = sid->cpu;
+ e->cpu = sid->cpu.cpu;
e->tid = sid->tid;
}
}
diff --git a/tools/perf/util/synthetic-events.h b/tools/perf/util/synthetic-events.h
index c931433bacbf..78a0450db164 100644
--- a/tools/perf/util/synthetic-events.h
+++ b/tools/perf/util/synthetic-events.h
@@ -6,6 +6,7 @@
#include <sys/types.h> // pid_t
#include <linux/compiler.h>
#include <linux/types.h>
+#include <perf/cpumap.h>
struct auxtrace_record;
struct dso;
@@ -63,7 +64,7 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo
int perf_event__synthesize_stat_config(struct perf_tool *tool, struct perf_stat_config *config, perf_event__handler_t process, struct machine *machine);
int perf_event__synthesize_stat_events(struct perf_stat_config *config, struct perf_tool *tool, struct evlist *evlist, perf_event__handler_t process, bool attrs);
int perf_event__synthesize_stat_round(struct perf_tool *tool, u64 time, u64 type, perf_event__handler_t process, struct machine *machine);
-int perf_event__synthesize_stat(struct perf_tool *tool, u32 cpu, u32 thread, u64 id, struct perf_counts_values *count, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_stat(struct perf_tool *tool, struct perf_cpu cpu, u32 thread, u64 id, struct perf_counts_values *count, perf_event__handler_t process, struct machine *machine);
int perf_event__synthesize_thread_map2(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine);
int perf_event__synthesize_thread_map(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine, bool needs_mmap, bool mmap_data);
int perf_event__synthesize_threads(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, bool needs_mmap, bool mmap_data, unsigned int nr_threads_synthesize);
diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c
index 27945eeb0cb5..c1ebfc5d2e0c 100644
--- a/tools/perf/util/top.c
+++ b/tools/perf/util/top.c
@@ -95,15 +95,15 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size)
if (target->cpu_list)
ret += SNPRINTF(bf + ret, size - ret, ", CPU%s: %s)",
- top->evlist->core.cpus->nr > 1 ? "s" : "",
+ perf_cpu_map__nr(top->evlist->core.cpus) > 1 ? "s" : "",
target->cpu_list);
else {
if (target->tid)
ret += SNPRINTF(bf + ret, size - ret, ")");
else
ret += SNPRINTF(bf + ret, size - ret, ", %d CPU%s)",
- top->evlist->core.cpus->nr,
- top->evlist->core.cpus->nr > 1 ? "s" : "");
+ perf_cpu_map__nr(top->evlist->core.cpus),
+ perf_cpu_map__nr(top->evlist->core.cpus) > 1 ? "s" : "");
}
perf_top__reset_sample_counters(top);
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index df3c4671be72..fb4f6616b5fa 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -416,3 +416,18 @@ char *perf_exe(char *buf, int len)
}
return strcpy(buf, "perf");
}
+
+void perf_debuginfod_setup(struct perf_debuginfod *di)
+{
+ /*
+ * By default '!di->set' we clear DEBUGINFOD_URLS, so debuginfod
+ * processing is not triggered, otherwise we set it to 'di->urls'
+ * value. If 'di->urls' is "system" we keep DEBUGINFOD_URLS value.
+ */
+ if (!di->set)
+ setenv("DEBUGINFOD_URLS", "", 1);
+ else if (di->urls && strcmp(di->urls, "system"))
+ setenv("DEBUGINFOD_URLS", di->urls, 1);
+
+ pr_debug("DEBUGINFOD_URLS=%s\n", getenv("DEBUGINFOD_URLS"));
+}
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 9f0d36ba77f2..7b625cbd2dd8 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -11,6 +11,9 @@
#include <stddef.h>
#include <linux/compiler.h>
#include <sys/types.h>
+#ifndef __cplusplus
+#include <internal/cpumap.h>
+#endif
/* General helper functions */
void usage(const char *err) __noreturn;
@@ -66,6 +69,12 @@ extern bool test_attr__enabled;
void test_attr__ready(void);
void test_attr__init(void);
struct perf_event_attr;
-void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu,
+void test_attr__open(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu,
int fd, int group_fd, unsigned long flags);
+
+struct perf_debuginfod {
+ const char *urls;
+ bool set;
+};
+void perf_debuginfod_setup(struct perf_debuginfod *di);
#endif /* GIT_COMPAT_UTIL_H */
diff --git a/tools/power/acpi/.gitignore b/tools/power/acpi/.gitignore
index 0b319fc8bb17..eada0297ef88 100644
--- a/tools/power/acpi/.gitignore
+++ b/tools/power/acpi/.gitignore
@@ -2,4 +2,5 @@
/acpidbg
/acpidump
/ec
+/pfrut
/include/
diff --git a/tools/power/acpi/Makefile b/tools/power/acpi/Makefile
index a249c50ebf55..5ff1d9c864d0 100644
--- a/tools/power/acpi/Makefile
+++ b/tools/power/acpi/Makefile
@@ -9,18 +9,18 @@ include ../../scripts/Makefile.include
.NOTPARALLEL:
-all: acpidbg acpidump ec
-clean: acpidbg_clean acpidump_clean ec_clean
-install: acpidbg_install acpidump_install ec_install
-uninstall: acpidbg_uninstall acpidump_uninstall ec_uninstall
+all: acpidbg acpidump ec pfrut
+clean: acpidbg_clean acpidump_clean ec_clean pfrut_clean
+install: acpidbg_install acpidump_install ec_install pfrut_install
+uninstall: acpidbg_uninstall acpidump_uninstall ec_uninstall pfrut_uninstall
-acpidbg acpidump ec: FORCE
+acpidbg acpidump ec pfrut: FORCE
$(call descend,tools/$@,all)
-acpidbg_clean acpidump_clean ec_clean:
+acpidbg_clean acpidump_clean ec_clean pfrut_clean:
$(call descend,tools/$(@:_clean=),clean)
-acpidbg_install acpidump_install ec_install:
+acpidbg_install acpidump_install ec_install pfrut_install:
$(call descend,tools/$(@:_install=),install)
-acpidbg_uninstall acpidump_uninstall ec_uninstall:
+acpidbg_uninstall acpidump_uninstall ec_uninstall pfrut_uninstall:
$(call descend,tools/$(@:_uninstall=),uninstall)
.PHONY: FORCE
diff --git a/tools/power/acpi/Makefile.config b/tools/power/acpi/Makefile.config
index 331f6d30f472..cd7106876a5f 100644
--- a/tools/power/acpi/Makefile.config
+++ b/tools/power/acpi/Makefile.config
@@ -69,6 +69,7 @@ KERNEL_INCLUDE := $(OUTPUT)include
ACPICA_INCLUDE := $(srctree)/../../../drivers/acpi/acpica
CFLAGS += -D_LINUX -I$(KERNEL_INCLUDE) -I$(ACPICA_INCLUDE)
CFLAGS += $(WARNINGS)
+MKDIR = mkdir
ifeq ($(strip $(V)),false)
QUIET=@
diff --git a/tools/power/acpi/Makefile.rules b/tools/power/acpi/Makefile.rules
index 2a6c170b57cd..b71aada77688 100644
--- a/tools/power/acpi/Makefile.rules
+++ b/tools/power/acpi/Makefile.rules
@@ -9,7 +9,7 @@ objdir := $(OUTPUT)tools/$(TOOL)/
toolobjs := $(addprefix $(objdir),$(TOOL_OBJS))
$(OUTPUT)$(TOOL): $(toolobjs) FORCE
$(ECHO) " LD " $(subst $(OUTPUT),,$@)
- $(QUIET) $(LD) $(CFLAGS) $(LDFLAGS) $(toolobjs) -L$(OUTPUT) -o $@
+ $(QUIET) $(LD) $(CFLAGS) $(toolobjs) $(LDFLAGS) -L$(OUTPUT) -o $@
$(ECHO) " STRIP " $(subst $(OUTPUT),,$@)
$(QUIET) $(STRIPCMD) $@
@@ -21,6 +21,7 @@ $(KERNEL_INCLUDE):
$(objdir)%.o: %.c $(KERNEL_INCLUDE)
$(ECHO) " CC " $(subst $(OUTPUT),,$@)
+ $(QUIET) $(MKDIR) -p $(objdir) 2>/dev/null
$(QUIET) $(CC) -c $(CFLAGS) -o $@ $<
all: $(OUTPUT)$(TOOL)
diff --git a/tools/power/acpi/man/pfrut.8 b/tools/power/acpi/man/pfrut.8
new file mode 100644
index 000000000000..3db574770e8d
--- /dev/null
+++ b/tools/power/acpi/man/pfrut.8
@@ -0,0 +1,137 @@
+.TH "PFRUT" "8" "October 2021" "pfrut 1.0" ""
+.hy
+.SH Name
+.PP
+pfrut \- Platform Firmware Runtime Update and Telemetry tool
+.SH SYNOPSIS
+.PP
+\f[B]pfrut\f[R] [\f[I]Options\f[R]]
+.SH DESCRIPTION
+.PP
+The PFRUT(Platform Firmware Runtime Update and Telemetry) kernel interface is designed
+to
+.PD 0
+.P
+.PD
+interact with the platform firmware interface defined in the
+.PD 0
+.P
+.PD
+Management Mode Firmware Runtime
+Update (https://uefi.org/sites/default/files/resources/Intel_MM_OS_Interface_Spec_Rev100.pdf)
+.PD 0
+.P
+.PD
+\f[B]pfrut\f[R] is the tool to interact with the kernel interface.
+.PD 0
+.P
+.PD
+.SH OPTIONS
+.TP
+.B \f[B]\-h\f[R], \f[B]\-\-help\f[R]
+Display helper information.
+.TP
+.B \f[B]\-l\f[R], \f[B]\-\-load\f[R]
+Load the capsule file into the system.
+To be more specific, the capsule file will be copied to the
+communication buffer.
+.TP
+.B \f[B]\-s\f[R], \f[B]\-\-stage\f[R]
+Stage the capsule image from communication buffer into Management Mode
+and perform authentication.
+.TP
+.B \f[B]\-a\f[R], \f[B]\-\-activate\f[R]
+Activate a previous staged capsule image.
+.TP
+.B \f[B]\-u\f[R], \f[B]\-\-update\f[R]
+Perform both stage and activation actions.
+.TP
+.B \f[B]\-q\f[R], \f[B]\-\-query\f[R]
+Query the update capability.
+.TP
+.B \f[B]\-d\f[R], \f[B]\-\-setrev\f[R]
+Set the revision ID of code injection/driver update.
+.TP
+.B \f[B]\-D\f[R], \f[B]\-\-setrevlog\f[R]
+Set the revision ID of telemetry.
+.TP
+.B \f[B]\-G\f[R], \f[B]\-\-getloginfo\f[R]
+Get telemetry log information and print it out.
+.TP
+.B \f[B]\-T\f[R], \f[B]\-\-type\f[R]
+Set the telemetry log data type.
+.TP
+.B \f[B]\-L\f[R], \f[B]\-\-level\f[R]
+Set the telemetry log level.
+.TP
+.B \f[B]\-R\f[R], \f[B]\-\-read\f[R]
+Read all the telemetry data and print it out.
+.SH EXAMPLES
+.PP
+\f[B]pfrut \-G\f[R]
+.PP
+log_level:4
+.PD 0
+.P
+.PD
+log_type:0
+.PD 0
+.P
+.PD
+log_revid:2
+.PD 0
+.P
+.PD
+max_data_size:65536
+.PD 0
+.P
+.PD
+chunk1_size:0
+.PD 0
+.P
+.PD
+chunk2_size:1401
+.PD 0
+.P
+.PD
+rollover_cnt:0
+.PD 0
+.P
+.PD
+reset_cnt:4
+.PP
+\f[B]pfru \-q\f[R]
+.PP
+code injection image type:794bf8b2\-6e7b\-454e\-885f\-3fb9bb185402
+.PD 0
+.P
+.PD
+fw_version:0
+.PD 0
+.P
+.PD
+code_rt_version:1
+.PD 0
+.P
+.PD
+driver update image type:0e5f0b14\-f849\-7945\-ad81\-bc7b6d2bb245
+.PD 0
+.P
+.PD
+drv_rt_version:0
+.PD 0
+.P
+.PD
+drv_svn:0
+.PD 0
+.P
+.PD
+platform id:39214663\-b1a8\-4eaa\-9024\-f2bb53ea4723
+.PD 0
+.P
+.PD
+oem id:a36db54f\-ea2a\-e14e\-b7c4\-b5780e51ba3d
+.PP
+\f[B]pfrut \-l yours.cap \-u \-T 1 \-L 4\f[R]
+.SH AUTHORS
+Chen Yu.
diff --git a/tools/power/acpi/tools/pfrut/Makefile b/tools/power/acpi/tools/pfrut/Makefile
new file mode 100644
index 000000000000..61c1a96fd433
--- /dev/null
+++ b/tools/power/acpi/tools/pfrut/Makefile
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: GPL-2.0+
+
+include ../../Makefile.config
+
+TOOL = pfrut
+EXTRA_INSTALL = install-man
+EXTRA_UNINSTALL = uninstall-man
+
+CFLAGS += -Wall -O2
+CFLAGS += -DPFRUT_HEADER='"../../../../../include/uapi/linux/pfrut.h"'
+LDFLAGS += -luuid
+
+TOOL_OBJS = \
+ pfrut.o
+
+include ../../Makefile.rules
+
+install-man: $(srctree)/man/pfrut.8
+ $(ECHO) " INST " pfrut.8
+ $(QUIET) $(INSTALL_DATA) -D $< $(DESTDIR)$(mandir)/man8/pfrut.8
+uninstall-man:
+ $(ECHO) " UNINST " pfrut.8
+ $(QUIET) rm -f $(DESTDIR)$(mandir)/man8/pfrut.8
diff --git a/tools/power/acpi/tools/pfrut/pfrut.c b/tools/power/acpi/tools/pfrut/pfrut.c
new file mode 100644
index 000000000000..d79c335594b2
--- /dev/null
+++ b/tools/power/acpi/tools/pfrut/pfrut.c
@@ -0,0 +1,424 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Platform Firmware Runtime Update tool to do Management
+ * Mode code injection/driver update and telemetry retrieval.
+ *
+ * This tool uses the interfaces provided by pfr_update and
+ * pfr_telemetry drivers. These interfaces are exposed via
+ * /dev/pfr_update and /dev/pfr_telemetry. Write operation
+ * on the /dev/pfr_update is to load the EFI capsule into
+ * kernel space. Mmap/read operations on /dev/pfr_telemetry
+ * could be used to read the telemetry data to user space.
+ */
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <getopt.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <uuid/uuid.h>
+#include PFRUT_HEADER
+
+char *capsule_name;
+int action, query_cap, log_type, log_level, log_read, log_getinfo,
+ revid, log_revid;
+int set_log_level, set_log_type,
+ set_revid, set_log_revid;
+
+char *progname;
+
+#define LOG_ERR 0
+#define LOG_WARN 1
+#define LOG_INFO 2
+#define LOG_VERB 4
+#define LOG_EXEC_IDX 0
+#define LOG_HISTORY_IDX 1
+#define REVID_1 1
+#define REVID_2 2
+
+static int valid_log_level(int level)
+{
+ return level == LOG_ERR || level == LOG_WARN ||
+ level == LOG_INFO || level == LOG_VERB;
+}
+
+static int valid_log_type(int type)
+{
+ return type == LOG_EXEC_IDX || type == LOG_HISTORY_IDX;
+}
+
+static inline int valid_log_revid(int id)
+{
+ return id == REVID_1 || id == REVID_2;
+}
+
+static void help(void)
+{
+ fprintf(stderr,
+ "usage: %s [OPTIONS]\n"
+ " code injection:\n"
+ " -l, --load\n"
+ " -s, --stage\n"
+ " -a, --activate\n"
+ " -u, --update [stage and activate]\n"
+ " -q, --query\n"
+ " -d, --revid update\n"
+ " telemetry:\n"
+ " -G, --getloginfo\n"
+ " -T, --type(0:execution, 1:history)\n"
+ " -L, --level(0, 1, 2, 4)\n"
+ " -R, --read\n"
+ " -D, --revid log\n",
+ progname);
+}
+
+char *option_string = "l:sauqd:GT:L:RD:h";
+static struct option long_options[] = {
+ {"load", required_argument, 0, 'l'},
+ {"stage", no_argument, 0, 's'},
+ {"activate", no_argument, 0, 'a'},
+ {"update", no_argument, 0, 'u'},
+ {"query", no_argument, 0, 'q'},
+ {"getloginfo", no_argument, 0, 'G'},
+ {"type", required_argument, 0, 'T'},
+ {"level", required_argument, 0, 'L'},
+ {"read", no_argument, 0, 'R'},
+ {"setrev", required_argument, 0, 'd'},
+ {"setrevlog", required_argument, 0, 'D'},
+ {"help", no_argument, 0, 'h'},
+ {}
+};
+
+static void parse_options(int argc, char **argv)
+{
+ int option_index = 0;
+ char *pathname;
+ int opt;
+
+ pathname = strdup(argv[0]);
+ progname = basename(pathname);
+
+ while ((opt = getopt_long_only(argc, argv, option_string,
+ long_options, &option_index)) != -1) {
+ switch (opt) {
+ case 'l':
+ capsule_name = optarg;
+ break;
+ case 's':
+ action = 1;
+ break;
+ case 'a':
+ action = 2;
+ break;
+ case 'u':
+ action = 3;
+ break;
+ case 'q':
+ query_cap = 1;
+ break;
+ case 'G':
+ log_getinfo = 1;
+ break;
+ case 'T':
+ log_type = atoi(optarg);
+ set_log_type = 1;
+ break;
+ case 'L':
+ log_level = atoi(optarg);
+ set_log_level = 1;
+ break;
+ case 'R':
+ log_read = 1;
+ break;
+ case 'd':
+ revid = atoi(optarg);
+ set_revid = 1;
+ break;
+ case 'D':
+ log_revid = atoi(optarg);
+ set_log_revid = 1;
+ break;
+ case 'h':
+ help();
+ exit(0);
+ default:
+ break;
+ }
+ }
+}
+
+void print_cap(struct pfru_update_cap_info *cap)
+{
+ char *uuid;
+
+ uuid = malloc(37);
+ if (!uuid) {
+ perror("Can not allocate uuid buffer\n");
+ exit(1);
+ }
+
+ uuid_unparse(cap->code_type, uuid);
+ printf("code injection image type:%s\n", uuid);
+ printf("fw_version:%d\n", cap->fw_version);
+ printf("code_rt_version:%d\n", cap->code_rt_version);
+
+ uuid_unparse(cap->drv_type, uuid);
+ printf("driver update image type:%s\n", uuid);
+ printf("drv_rt_version:%d\n", cap->drv_rt_version);
+ printf("drv_svn:%d\n", cap->drv_svn);
+
+ uuid_unparse(cap->platform_id, uuid);
+ printf("platform id:%s\n", uuid);
+ uuid_unparse(cap->oem_id, uuid);
+ printf("oem id:%s\n", uuid);
+ printf("oem information length:%d\n", cap->oem_info_len);
+
+ free(uuid);
+}
+
+int main(int argc, char *argv[])
+{
+ int fd_update, fd_update_log, fd_capsule;
+ struct pfrt_log_data_info data_info;
+ struct pfrt_log_info info;
+ struct pfru_update_cap_info cap;
+ void *addr_map_capsule;
+ struct stat st;
+ char *log_buf;
+ int ret = 0;
+
+ if (getuid() != 0) {
+ printf("Please run the tool as root - Exiting.\n");
+ return 1;
+ }
+
+ parse_options(argc, argv);
+
+ fd_update = open("/dev/acpi_pfr_update0", O_RDWR);
+ if (fd_update < 0) {
+ printf("PFRU device not supported - Quit...\n");
+ return 1;
+ }
+
+ fd_update_log = open("/dev/acpi_pfr_telemetry0", O_RDWR);
+ if (fd_update_log < 0) {
+ printf("PFRT device not supported - Quit...\n");
+ return 1;
+ }
+
+ if (query_cap) {
+ ret = ioctl(fd_update, PFRU_IOC_QUERY_CAP, &cap);
+ if (ret)
+ perror("Query Update Capability info failed.");
+ else
+ print_cap(&cap);
+
+ close(fd_update);
+ close(fd_update_log);
+
+ return ret;
+ }
+
+ if (log_getinfo) {
+ ret = ioctl(fd_update_log, PFRT_LOG_IOC_GET_DATA_INFO, &data_info);
+ if (ret) {
+ perror("Get telemetry data info failed.");
+ close(fd_update);
+ close(fd_update_log);
+
+ return 1;
+ }
+
+ ret = ioctl(fd_update_log, PFRT_LOG_IOC_GET_INFO, &info);
+ if (ret) {
+ perror("Get telemetry info failed.");
+ close(fd_update);
+ close(fd_update_log);
+
+ return 1;
+ }
+
+ printf("log_level:%d\n", info.log_level);
+ printf("log_type:%d\n", info.log_type);
+ printf("log_revid:%d\n", info.log_revid);
+ printf("max_data_size:%d\n", data_info.max_data_size);
+ printf("chunk1_size:%d\n", data_info.chunk1_size);
+ printf("chunk2_size:%d\n", data_info.chunk2_size);
+ printf("rollover_cnt:%d\n", data_info.rollover_cnt);
+ printf("reset_cnt:%d\n", data_info.reset_cnt);
+
+ return 0;
+ }
+
+ info.log_level = -1;
+ info.log_type = -1;
+ info.log_revid = -1;
+
+ if (set_log_level) {
+ if (!valid_log_level(log_level)) {
+ printf("Invalid log level %d\n",
+ log_level);
+ } else {
+ info.log_level = log_level;
+ }
+ }
+
+ if (set_log_type) {
+ if (!valid_log_type(log_type)) {
+ printf("Invalid log type %d\n",
+ log_type);
+ } else {
+ info.log_type = log_type;
+ }
+ }
+
+ if (set_log_revid) {
+ if (!valid_log_revid(log_revid)) {
+ printf("Invalid log revid %d, unchanged.\n",
+ log_revid);
+ } else {
+ info.log_revid = log_revid;
+ }
+ }
+
+ ret = ioctl(fd_update_log, PFRT_LOG_IOC_SET_INFO, &info);
+ if (ret) {
+ perror("Log information set failed.(log_level, log_type, log_revid)");
+ close(fd_update);
+ close(fd_update_log);
+
+ return 1;
+ }
+
+ if (set_revid) {
+ ret = ioctl(fd_update, PFRU_IOC_SET_REV, &revid);
+ if (ret) {
+ perror("pfru update revid set failed");
+ close(fd_update);
+ close(fd_update_log);
+
+ return 1;
+ }
+
+ printf("pfru update revid set to %d\n", revid);
+ }
+
+ if (capsule_name) {
+ fd_capsule = open(capsule_name, O_RDONLY);
+ if (fd_capsule < 0) {
+ perror("Can not open capsule file...");
+ close(fd_update);
+ close(fd_update_log);
+
+ return 1;
+ }
+
+ if (fstat(fd_capsule, &st) < 0) {
+ perror("Can not fstat capsule file...");
+ close(fd_capsule);
+ close(fd_update);
+ close(fd_update_log);
+
+ return 1;
+ }
+
+ addr_map_capsule = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED,
+ fd_capsule, 0);
+ if (addr_map_capsule == MAP_FAILED) {
+ perror("Failed to mmap capsule file.");
+ close(fd_capsule);
+ close(fd_update);
+ close(fd_update_log);
+
+ return 1;
+ }
+
+ ret = write(fd_update, (char *)addr_map_capsule, st.st_size);
+ printf("Load %d bytes of capsule file into the system\n",
+ ret);
+
+ if (ret == -1) {
+ perror("Failed to load capsule file");
+ close(fd_capsule);
+ close(fd_update);
+ close(fd_update_log);
+
+ return 1;
+ }
+
+ munmap(addr_map_capsule, st.st_size);
+ close(fd_capsule);
+ printf("Load done.\n");
+ }
+
+ if (action) {
+ if (action == 1) {
+ ret = ioctl(fd_update, PFRU_IOC_STAGE, NULL);
+ } else if (action == 2) {
+ ret = ioctl(fd_update, PFRU_IOC_ACTIVATE, NULL);
+ } else if (action == 3) {
+ ret = ioctl(fd_update, PFRU_IOC_STAGE_ACTIVATE, NULL);
+ } else {
+ close(fd_update);
+ close(fd_update_log);
+
+ return 1;
+ }
+ printf("Update finished, return %d\n", ret);
+ }
+
+ close(fd_update);
+
+ if (log_read) {
+ void *p_mmap;
+ int max_data_sz;
+
+ ret = ioctl(fd_update_log, PFRT_LOG_IOC_GET_DATA_INFO, &data_info);
+ if (ret) {
+ perror("Get telemetry data info failed.");
+ close(fd_update_log);
+
+ return 1;
+ }
+
+ max_data_sz = data_info.max_data_size;
+ if (!max_data_sz) {
+ printf("No telemetry data available.\n");
+ close(fd_update_log);
+
+ return 1;
+ }
+
+ log_buf = malloc(max_data_sz + 1);
+ if (!log_buf) {
+ perror("log_buf allocate failed.");
+ close(fd_update_log);
+
+ return 1;
+ }
+
+ p_mmap = mmap(NULL, max_data_sz, PROT_READ, MAP_SHARED, fd_update_log, 0);
+ if (p_mmap == MAP_FAILED) {
+ perror("mmap error.");
+ close(fd_update_log);
+
+ return 1;
+ }
+
+ memcpy(log_buf, p_mmap, max_data_sz);
+ log_buf[max_data_sz] = '\0';
+ printf("%s\n", log_buf);
+ free(log_buf);
+
+ munmap(p_mmap, max_data_sz);
+ }
+
+ close(fd_update_log);
+
+ return 0;
+}
diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c
index bf9fd3549a1d..efe72fa48224 100644
--- a/tools/power/x86/intel-speed-select/isst-config.c
+++ b/tools/power/x86/intel-speed-select/isst-config.c
@@ -15,7 +15,7 @@ struct process_cmd_struct {
int arg;
};
-static const char *version_str = "v1.10";
+static const char *version_str = "v1.11";
static const int supported_api_ver = 1;
static struct isst_if_platform_info isst_platform_info;
static char *progname;
@@ -1599,6 +1599,7 @@ static void set_scaling_min_to_cpuinfo_max(int cpu)
die_id != get_physical_die_id(i))
continue;
+ adjust_scaling_max_from_base_freq(i);
set_cpufreq_scaling_min_max_from_cpuinfo(i, 1, 0);
adjust_scaling_min_from_base_freq(i);
}
@@ -1615,6 +1616,7 @@ static void set_scaling_min_to_cpuinfo_min(int cpu)
die_id != get_physical_die_id(i))
continue;
+ adjust_scaling_max_from_base_freq(i);
set_cpufreq_scaling_min_max_from_cpuinfo(i, 0, 0);
}
}
diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include
index 071312f5eb92..b0be5f40a3f1 100644
--- a/tools/scripts/Makefile.include
+++ b/tools/scripts/Makefile.include
@@ -87,7 +87,18 @@ LLVM_STRIP ?= llvm-strip
ifeq ($(CC_NO_CLANG), 1)
EXTRA_WARNINGS += -Wstrict-aliasing=3
-endif
+
+else ifneq ($(CROSS_COMPILE),)
+CLANG_CROSS_FLAGS := --target=$(notdir $(CROSS_COMPILE:%-=%))
+GCC_TOOLCHAIN_DIR := $(dir $(shell which $(CROSS_COMPILE)gcc))
+ifneq ($(GCC_TOOLCHAIN_DIR),)
+CLANG_CROSS_FLAGS += --prefix=$(GCC_TOOLCHAIN_DIR)$(notdir $(CROSS_COMPILE))
+CLANG_CROSS_FLAGS += --sysroot=$(shell $(CROSS_COMPILE)gcc -print-sysroot)
+CLANG_CROSS_FLAGS += --gcc-toolchain=$(realpath $(GCC_TOOLCHAIN_DIR)/..)
+endif # GCC_TOOLCHAIN_DIR
+CFLAGS += $(CLANG_CROSS_FLAGS)
+AFLAGS += $(CLANG_CROSS_FLAGS)
+endif # CROSS_COMPILE
# Hack to avoid type-punned warnings on old systems such as RHEL5:
# We should be changing CFLAGS and checking gcc version, but this
diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild
index 86deba8308a1..1acdf2fc31c5 100644
--- a/tools/testing/cxl/Kbuild
+++ b/tools/testing/cxl/Kbuild
@@ -1,7 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
+ldflags-y += --wrap=acpi_table_parse_cedt
ldflags-y += --wrap=is_acpi_device_node
-ldflags-y += --wrap=acpi_get_table
-ldflags-y += --wrap=acpi_put_table
ldflags-y += --wrap=acpi_evaluate_integer
ldflags-y += --wrap=acpi_pci_find_root
ldflags-y += --wrap=pci_walk_bus
diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c
index cb32f9e27d5d..736d99006fb7 100644
--- a/tools/testing/cxl/test/cxl.c
+++ b/tools/testing/cxl/test/cxl.c
@@ -182,6 +182,13 @@ static struct {
},
};
+struct acpi_cedt_cfmws *mock_cfmws[4] = {
+ [0] = &mock_cedt.cfmws0.cfmws,
+ [1] = &mock_cedt.cfmws1.cfmws,
+ [2] = &mock_cedt.cfmws2.cfmws,
+ [3] = &mock_cedt.cfmws3.cfmws,
+};
+
struct cxl_mock_res {
struct list_head list;
struct range range;
@@ -232,12 +239,6 @@ static struct cxl_mock_res *alloc_mock_res(resource_size_t size)
static int populate_cedt(void)
{
- struct acpi_cedt_cfmws *cfmws[4] = {
- [0] = &mock_cedt.cfmws0.cfmws,
- [1] = &mock_cedt.cfmws1.cfmws,
- [2] = &mock_cedt.cfmws2.cfmws,
- [3] = &mock_cedt.cfmws3.cfmws,
- };
struct cxl_mock_res *res;
int i;
@@ -257,8 +258,8 @@ static int populate_cedt(void)
chbs->length = size;
}
- for (i = 0; i < ARRAY_SIZE(cfmws); i++) {
- struct acpi_cedt_cfmws *window = cfmws[i];
+ for (i = 0; i < ARRAY_SIZE(mock_cfmws); i++) {
+ struct acpi_cedt_cfmws *window = mock_cfmws[i];
res = alloc_mock_res(window->window_size);
if (!res)
@@ -269,21 +270,44 @@ static int populate_cedt(void)
return 0;
}
-static acpi_status mock_acpi_get_table(char *signature, u32 instance,
- struct acpi_table_header **out_table)
+/*
+ * WARNING, this hack assumes the format of 'struct
+ * cxl_cfmws_context' and 'struct cxl_chbs_context' share the property that
+ * the first struct member is the device being probed by the cxl_acpi
+ * driver.
+ */
+struct cxl_cedt_context {
+ struct device *dev;
+};
+
+static int mock_acpi_table_parse_cedt(enum acpi_cedt_type id,
+ acpi_tbl_entry_handler_arg handler_arg,
+ void *arg)
{
- if (instance < U32_MAX || strcmp(signature, ACPI_SIG_CEDT) != 0)
- return acpi_get_table(signature, instance, out_table);
+ struct cxl_cedt_context *ctx = arg;
+ struct device *dev = ctx->dev;
+ union acpi_subtable_headers *h;
+ unsigned long end;
+ int i;
- *out_table = (struct acpi_table_header *) &mock_cedt;
- return AE_OK;
-}
+ if (dev != &cxl_acpi->dev)
+ return acpi_table_parse_cedt(id, handler_arg, arg);
-static void mock_acpi_put_table(struct acpi_table_header *table)
-{
- if (table == (struct acpi_table_header *) &mock_cedt)
- return;
- acpi_put_table(table);
+ if (id == ACPI_CEDT_TYPE_CHBS)
+ for (i = 0; i < ARRAY_SIZE(mock_cedt.chbs); i++) {
+ h = (union acpi_subtable_headers *)&mock_cedt.chbs[i];
+ end = (unsigned long)&mock_cedt.chbs[i + 1];
+ handler_arg(h, arg, end);
+ }
+
+ if (id == ACPI_CEDT_TYPE_CFMWS)
+ for (i = 0; i < ARRAY_SIZE(mock_cfmws); i++) {
+ h = (union acpi_subtable_headers *) mock_cfmws[i];
+ end = (unsigned long) h + mock_cfmws[i]->header.length;
+ handler_arg(h, arg, end);
+ }
+
+ return 0;
}
static bool is_mock_bridge(struct device *dev)
@@ -388,8 +412,7 @@ static struct cxl_mock_ops cxl_mock_ops = {
.is_mock_port = is_mock_port,
.is_mock_dev = is_mock_dev,
.mock_port = mock_cxl_root_port,
- .acpi_get_table = mock_acpi_get_table,
- .acpi_put_table = mock_acpi_put_table,
+ .acpi_table_parse_cedt = mock_acpi_table_parse_cedt,
.acpi_evaluate_integer = mock_acpi_evaluate_integer,
.acpi_pci_find_root = mock_acpi_pci_find_root,
.list = LIST_HEAD_INIT(cxl_mock_ops.list),
@@ -574,3 +597,4 @@ static __exit void cxl_test_exit(void)
module_init(cxl_test_init);
module_exit(cxl_test_exit);
MODULE_LICENSE("GPL v2");
+MODULE_IMPORT_NS(ACPI);
diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c
index 12a8437a9ca0..8c2086c4caef 100644
--- a/tools/testing/cxl/test/mem.c
+++ b/tools/testing/cxl/test/mem.c
@@ -28,8 +28,24 @@ static struct cxl_cel_entry mock_cel[] = {
.opcode = cpu_to_le16(CXL_MBOX_OP_SET_LSA),
.effect = cpu_to_le16(EFFECT(1) | EFFECT(2)),
},
+ {
+ .opcode = cpu_to_le16(CXL_MBOX_OP_GET_HEALTH_INFO),
+ .effect = cpu_to_le16(0),
+ },
};
+/* See CXL 2.0 Table 181 Get Health Info Output Payload */
+struct cxl_mbox_health_info {
+ u8 health_status;
+ u8 media_status;
+ u8 ext_status;
+ u8 life_used;
+ __le16 temperature;
+ __le32 dirty_shutdowns;
+ __le32 volatile_errors;
+ __le32 pmem_errors;
+} __packed;
+
static struct {
struct cxl_mbox_get_supported_logs gsl;
struct cxl_gsl_entry entry;
@@ -54,7 +70,7 @@ static int mock_gsl(struct cxl_mbox_cmd *cmd)
return 0;
}
-static int mock_get_log(struct cxl_mem *cxlm, struct cxl_mbox_cmd *cmd)
+static int mock_get_log(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd)
{
struct cxl_mbox_get_log *gl = cmd->payload_in;
u32 offset = le32_to_cpu(gl->offset);
@@ -64,7 +80,7 @@ static int mock_get_log(struct cxl_mem *cxlm, struct cxl_mbox_cmd *cmd)
if (cmd->size_in < sizeof(*gl))
return -EINVAL;
- if (length > cxlm->payload_size)
+ if (length > cxlds->payload_size)
return -EINVAL;
if (offset + length > sizeof(mock_cel))
return -EINVAL;
@@ -78,9 +94,9 @@ static int mock_get_log(struct cxl_mem *cxlm, struct cxl_mbox_cmd *cmd)
return 0;
}
-static int mock_id(struct cxl_mem *cxlm, struct cxl_mbox_cmd *cmd)
+static int mock_id(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd)
{
- struct platform_device *pdev = to_platform_device(cxlm->dev);
+ struct platform_device *pdev = to_platform_device(cxlds->dev);
struct cxl_mbox_identify id = {
.fw_revision = { "mock fw v1 " },
.lsa_size = cpu_to_le32(LSA_SIZE),
@@ -120,10 +136,10 @@ static int mock_id(struct cxl_mem *cxlm, struct cxl_mbox_cmd *cmd)
return 0;
}
-static int mock_get_lsa(struct cxl_mem *cxlm, struct cxl_mbox_cmd *cmd)
+static int mock_get_lsa(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd)
{
struct cxl_mbox_get_lsa *get_lsa = cmd->payload_in;
- void *lsa = dev_get_drvdata(cxlm->dev);
+ void *lsa = dev_get_drvdata(cxlds->dev);
u32 offset, length;
if (sizeof(*get_lsa) > cmd->size_in)
@@ -139,10 +155,10 @@ static int mock_get_lsa(struct cxl_mem *cxlm, struct cxl_mbox_cmd *cmd)
return 0;
}
-static int mock_set_lsa(struct cxl_mem *cxlm, struct cxl_mbox_cmd *cmd)
+static int mock_set_lsa(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd)
{
struct cxl_mbox_set_lsa *set_lsa = cmd->payload_in;
- void *lsa = dev_get_drvdata(cxlm->dev);
+ void *lsa = dev_get_drvdata(cxlds->dev);
u32 offset, length;
if (sizeof(*set_lsa) > cmd->size_in)
@@ -156,9 +172,39 @@ static int mock_set_lsa(struct cxl_mem *cxlm, struct cxl_mbox_cmd *cmd)
return 0;
}
-static int cxl_mock_mbox_send(struct cxl_mem *cxlm, struct cxl_mbox_cmd *cmd)
+static int mock_health_info(struct cxl_dev_state *cxlds,
+ struct cxl_mbox_cmd *cmd)
{
- struct device *dev = cxlm->dev;
+ struct cxl_mbox_health_info health_info = {
+ /* set flags for maint needed, perf degraded, hw replacement */
+ .health_status = 0x7,
+ /* set media status to "All Data Lost" */
+ .media_status = 0x3,
+ /*
+ * set ext_status flags for:
+ * ext_life_used: normal,
+ * ext_temperature: critical,
+ * ext_corrected_volatile: warning,
+ * ext_corrected_persistent: normal,
+ */
+ .ext_status = 0x18,
+ .life_used = 15,
+ .temperature = cpu_to_le16(25),
+ .dirty_shutdowns = cpu_to_le32(10),
+ .volatile_errors = cpu_to_le32(20),
+ .pmem_errors = cpu_to_le32(30),
+ };
+
+ if (cmd->size_out < sizeof(health_info))
+ return -EINVAL;
+
+ memcpy(cmd->payload_out, &health_info, sizeof(health_info));
+ return 0;
+}
+
+static int cxl_mock_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd)
+{
+ struct device *dev = cxlds->dev;
int rc = -EIO;
switch (cmd->opcode) {
@@ -166,16 +212,19 @@ static int cxl_mock_mbox_send(struct cxl_mem *cxlm, struct cxl_mbox_cmd *cmd)
rc = mock_gsl(cmd);
break;
case CXL_MBOX_OP_GET_LOG:
- rc = mock_get_log(cxlm, cmd);
+ rc = mock_get_log(cxlds, cmd);
break;
case CXL_MBOX_OP_IDENTIFY:
- rc = mock_id(cxlm, cmd);
+ rc = mock_id(cxlds, cmd);
break;
case CXL_MBOX_OP_GET_LSA:
- rc = mock_get_lsa(cxlm, cmd);
+ rc = mock_get_lsa(cxlds, cmd);
break;
case CXL_MBOX_OP_SET_LSA:
- rc = mock_set_lsa(cxlm, cmd);
+ rc = mock_set_lsa(cxlds, cmd);
+ break;
+ case CXL_MBOX_OP_GET_HEALTH_INFO:
+ rc = mock_health_info(cxlds, cmd);
break;
default:
break;
@@ -196,7 +245,7 @@ static int cxl_mock_mem_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct cxl_memdev *cxlmd;
- struct cxl_mem *cxlm;
+ struct cxl_dev_state *cxlds;
void *lsa;
int rc;
@@ -208,30 +257,30 @@ static int cxl_mock_mem_probe(struct platform_device *pdev)
return rc;
dev_set_drvdata(dev, lsa);
- cxlm = cxl_mem_create(dev);
- if (IS_ERR(cxlm))
- return PTR_ERR(cxlm);
+ cxlds = cxl_dev_state_create(dev);
+ if (IS_ERR(cxlds))
+ return PTR_ERR(cxlds);
- cxlm->mbox_send = cxl_mock_mbox_send;
- cxlm->payload_size = SZ_4K;
+ cxlds->mbox_send = cxl_mock_mbox_send;
+ cxlds->payload_size = SZ_4K;
- rc = cxl_mem_enumerate_cmds(cxlm);
+ rc = cxl_enumerate_cmds(cxlds);
if (rc)
return rc;
- rc = cxl_mem_identify(cxlm);
+ rc = cxl_dev_state_identify(cxlds);
if (rc)
return rc;
- rc = cxl_mem_create_range_info(cxlm);
+ rc = cxl_mem_create_range_info(cxlds);
if (rc)
return rc;
- cxlmd = devm_cxl_add_memdev(cxlm);
+ cxlmd = devm_cxl_add_memdev(cxlds);
if (IS_ERR(cxlmd))
return PTR_ERR(cxlmd);
- if (range_len(&cxlm->pmem_range) && IS_ENABLED(CONFIG_CXL_PMEM))
+ if (range_len(&cxlds->pmem_range) && IS_ENABLED(CONFIG_CXL_PMEM))
rc = devm_cxl_add_nvdimm(dev, cxlmd);
return 0;
diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c
index b8c108abcf07..17408f892df4 100644
--- a/tools/testing/cxl/test/mock.c
+++ b/tools/testing/cxl/test/mock.c
@@ -58,36 +58,23 @@ bool __wrap_is_acpi_device_node(const struct fwnode_handle *fwnode)
}
EXPORT_SYMBOL(__wrap_is_acpi_device_node);
-acpi_status __wrap_acpi_get_table(char *signature, u32 instance,
- struct acpi_table_header **out_table)
+int __wrap_acpi_table_parse_cedt(enum acpi_cedt_type id,
+ acpi_tbl_entry_handler_arg handler_arg,
+ void *arg)
{
- int index;
+ int index, rc;
struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
- acpi_status status;
if (ops)
- status = ops->acpi_get_table(signature, instance, out_table);
+ rc = ops->acpi_table_parse_cedt(id, handler_arg, arg);
else
- status = acpi_get_table(signature, instance, out_table);
+ rc = acpi_table_parse_cedt(id, handler_arg, arg);
put_cxl_mock_ops(index);
- return status;
-}
-EXPORT_SYMBOL(__wrap_acpi_get_table);
-
-void __wrap_acpi_put_table(struct acpi_table_header *table)
-{
- int index;
- struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
-
- if (ops)
- ops->acpi_put_table(table);
- else
- acpi_put_table(table);
- put_cxl_mock_ops(index);
+ return rc;
}
-EXPORT_SYMBOL(__wrap_acpi_put_table);
+EXPORT_SYMBOL_NS_GPL(__wrap_acpi_table_parse_cedt, ACPI);
acpi_status __wrap_acpi_evaluate_integer(acpi_handle handle,
acpi_string pathname,
@@ -169,3 +156,4 @@ __wrap_nvdimm_bus_register(struct device *dev,
EXPORT_SYMBOL_GPL(__wrap_nvdimm_bus_register);
MODULE_LICENSE("GPL v2");
+MODULE_IMPORT_NS(ACPI);
diff --git a/tools/testing/cxl/test/mock.h b/tools/testing/cxl/test/mock.h
index 805a94cb3fbe..15ed0fd877e4 100644
--- a/tools/testing/cxl/test/mock.h
+++ b/tools/testing/cxl/test/mock.h
@@ -6,9 +6,9 @@
struct cxl_mock_ops {
struct list_head list;
bool (*is_mock_adev)(struct acpi_device *dev);
- acpi_status (*acpi_get_table)(char *signature, u32 instance,
- struct acpi_table_header **out_table);
- void (*acpi_put_table)(struct acpi_table_header *table);
+ int (*acpi_table_parse_cedt)(enum acpi_cedt_type id,
+ acpi_tbl_entry_handler_arg handler_arg,
+ void *arg);
bool (*is_mock_bridge)(struct device *dev);
acpi_status (*acpi_evaluate_integer)(acpi_handle handle,
acpi_string pathname,
diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py
index 68e6f461c758..7a706f96f68d 100755
--- a/tools/testing/kunit/kunit.py
+++ b/tools/testing/kunit/kunit.py
@@ -15,38 +15,57 @@ import time
assert sys.version_info >= (3, 7), "Python version is too old"
-from collections import namedtuple
+from dataclasses import dataclass
from enum import Enum, auto
-from typing import Iterable, Sequence, List
+from typing import Any, Iterable, Sequence, List, Optional
import kunit_json
import kunit_kernel
import kunit_parser
-KunitResult = namedtuple('KunitResult', ['status','result','elapsed_time'])
-
-KunitConfigRequest = namedtuple('KunitConfigRequest',
- ['build_dir', 'make_options'])
-KunitBuildRequest = namedtuple('KunitBuildRequest',
- ['jobs', 'build_dir', 'alltests',
- 'make_options'])
-KunitExecRequest = namedtuple('KunitExecRequest',
- ['timeout', 'build_dir', 'alltests',
- 'filter_glob', 'kernel_args', 'run_isolated'])
-KunitParseRequest = namedtuple('KunitParseRequest',
- ['raw_output', 'build_dir', 'json'])
-KunitRequest = namedtuple('KunitRequest', ['raw_output','timeout', 'jobs',
- 'build_dir', 'alltests', 'filter_glob',
- 'kernel_args', 'run_isolated', 'json', 'make_options'])
-
-KernelDirectoryPath = sys.argv[0].split('tools/testing/kunit/')[0]
-
class KunitStatus(Enum):
SUCCESS = auto()
CONFIG_FAILURE = auto()
BUILD_FAILURE = auto()
TEST_FAILURE = auto()
+@dataclass
+class KunitResult:
+ status: KunitStatus
+ result: Any
+ elapsed_time: float
+
+@dataclass
+class KunitConfigRequest:
+ build_dir: str
+ make_options: Optional[List[str]]
+
+@dataclass
+class KunitBuildRequest(KunitConfigRequest):
+ jobs: int
+ alltests: bool
+
+@dataclass
+class KunitParseRequest:
+ raw_output: Optional[str]
+ build_dir: str
+ json: Optional[str]
+
+@dataclass
+class KunitExecRequest(KunitParseRequest):
+ timeout: int
+ alltests: bool
+ filter_glob: str
+ kernel_args: Optional[List[str]]
+ run_isolated: Optional[str]
+
+@dataclass
+class KunitRequest(KunitExecRequest, KunitBuildRequest):
+ pass
+
+
+KernelDirectoryPath = sys.argv[0].split('tools/testing/kunit/')[0]
+
def get_kernel_root_path() -> str:
path = sys.argv[0] if not __file__ else __file__
parts = os.path.realpath(path).split('tools/testing/kunit')
@@ -91,6 +110,14 @@ def build_tests(linux: kunit_kernel.LinuxSourceTree,
'built kernel successfully',
build_end - build_start)
+def config_and_build_tests(linux: kunit_kernel.LinuxSourceTree,
+ request: KunitBuildRequest) -> KunitResult:
+ config_result = config_tests(linux, request)
+ if config_result.status != KunitStatus.SUCCESS:
+ return config_result
+
+ return build_tests(linux, request)
+
def _list_tests(linux: kunit_kernel.LinuxSourceTree, request: KunitExecRequest) -> List[str]:
args = ['kunit.action=list']
if request.kernel_args:
@@ -121,8 +148,7 @@ def _suites_from_test_list(tests: List[str]) -> List[str]:
-def exec_tests(linux: kunit_kernel.LinuxSourceTree, request: KunitExecRequest,
- parse_request: KunitParseRequest) -> KunitResult:
+def exec_tests(linux: kunit_kernel.LinuxSourceTree, request: KunitExecRequest) -> KunitResult:
filter_globs = [request.filter_glob]
if request.run_isolated:
tests = _list_tests(linux, request)
@@ -147,17 +173,23 @@ def exec_tests(linux: kunit_kernel.LinuxSourceTree, request: KunitExecRequest,
filter_glob=filter_glob,
build_dir=request.build_dir)
- result = parse_tests(parse_request, run_result)
+ result = parse_tests(request, run_result)
# run_kernel() doesn't block on the kernel exiting.
# That only happens after we get the last line of output from `run_result`.
# So exec_time here actually contains parsing + execution time, which is fine.
test_end = time.time()
exec_time += test_end - test_start
- test_counts.add_subtest_counts(result.result.test.counts)
+ test_counts.add_subtest_counts(result.result.counts)
+
+ if len(filter_globs) == 1 and test_counts.crashed > 0:
+ bd = request.build_dir
+ print('The kernel seems to have crashed; you can decode the stack traces with:')
+ print('$ scripts/decode_stacktrace.sh {}/vmlinux {} < {} | tee {}/decoded.log | {} parse'.format(
+ bd, bd, kunit_kernel.get_outfile_path(bd), bd, sys.argv[0]))
kunit_status = _map_to_overall_status(test_counts.get_status())
- return KunitResult(status=kunit_status, result=result.result, elapsed_time=exec_time)
+ return KunitResult(status=kunit_status, result=result, elapsed_time=exec_time)
def _map_to_overall_status(test_status: kunit_parser.TestStatus) -> KunitStatus:
if test_status in (kunit_parser.TestStatus.SUCCESS, kunit_parser.TestStatus.SKIPPED):
@@ -168,14 +200,12 @@ def _map_to_overall_status(test_status: kunit_parser.TestStatus) -> KunitStatus:
def parse_tests(request: KunitParseRequest, input_data: Iterable[str]) -> KunitResult:
parse_start = time.time()
- test_result = kunit_parser.TestResult(kunit_parser.TestStatus.SUCCESS,
- kunit_parser.Test(),
- 'Tests not Parsed.')
+ test_result = kunit_parser.Test()
if request.raw_output:
# Treat unparsed results as one passing test.
- test_result.test.status = kunit_parser.TestStatus.SUCCESS
- test_result.test.counts.passed = 1
+ test_result.status = kunit_parser.TestStatus.SUCCESS
+ test_result.counts.passed = 1
output: Iterable[str] = input_data
if request.raw_output == 'all':
@@ -193,7 +223,7 @@ def parse_tests(request: KunitParseRequest, input_data: Iterable[str]) -> KunitR
if request.json:
json_obj = kunit_json.get_json_result(
- test_result=test_result,
+ test=test_result,
def_config='kunit_defconfig',
build_dir=request.build_dir,
json_path=request.json)
@@ -211,27 +241,15 @@ def run_tests(linux: kunit_kernel.LinuxSourceTree,
request: KunitRequest) -> KunitResult:
run_start = time.time()
- config_request = KunitConfigRequest(request.build_dir,
- request.make_options)
- config_result = config_tests(linux, config_request)
+ config_result = config_tests(linux, request)
if config_result.status != KunitStatus.SUCCESS:
return config_result
- build_request = KunitBuildRequest(request.jobs, request.build_dir,
- request.alltests,
- request.make_options)
- build_result = build_tests(linux, build_request)
+ build_result = build_tests(linux, request)
if build_result.status != KunitStatus.SUCCESS:
return build_result
- exec_request = KunitExecRequest(request.timeout, request.build_dir,
- request.alltests, request.filter_glob,
- request.kernel_args, request.run_isolated)
- parse_request = KunitParseRequest(request.raw_output,
- request.build_dir,
- request.json)
-
- exec_result = exec_tests(linux, exec_request, parse_request)
+ exec_result = exec_tests(linux, request)
run_end = time.time()
@@ -264,6 +282,9 @@ def massage_argv(argv: Sequence[str]) -> Sequence[str]:
return f'{arg}={pseudo_bool_flag_defaults[arg]}'
return list(map(massage_arg, argv))
+def get_default_jobs() -> int:
+ return len(os.sched_getaffinity(0))
+
def add_common_opts(parser) -> None:
parser.add_argument('--build_dir',
help='As in the make command, it specifies the build '
@@ -280,6 +301,10 @@ def add_common_opts(parser) -> None:
' If given a directory, (e.g. lib/kunit), "/.kunitconfig" '
'will get automatically appended.',
metavar='kunitconfig')
+ parser.add_argument('--kconfig_add',
+ help='Additional Kconfig options to append to the '
+ '.kunitconfig, e.g. CONFIG_KASAN=y. Can be repeated.',
+ action='append')
parser.add_argument('--arch',
help=('Specifies the architecture to run tests under. '
@@ -310,7 +335,7 @@ def add_build_opts(parser) -> None:
parser.add_argument('--jobs',
help='As in the make command, "Specifies the number of '
'jobs (commands) to run simultaneously."',
- type=int, default=8, metavar='jobs')
+ type=int, default=get_default_jobs(), metavar='jobs')
def add_exec_opts(parser) -> None:
parser.add_argument('--timeout',
@@ -398,20 +423,21 @@ def main(argv, linux=None):
if not linux:
linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir,
kunitconfig_path=cli_args.kunitconfig,
+ kconfig_add=cli_args.kconfig_add,
arch=cli_args.arch,
cross_compile=cli_args.cross_compile,
qemu_config_path=cli_args.qemu_config)
- request = KunitRequest(cli_args.raw_output,
- cli_args.timeout,
- cli_args.jobs,
- cli_args.build_dir,
- cli_args.alltests,
- cli_args.filter_glob,
- cli_args.kernel_args,
- cli_args.run_isolated,
- cli_args.json,
- cli_args.make_options)
+ request = KunitRequest(build_dir=cli_args.build_dir,
+ make_options=cli_args.make_options,
+ jobs=cli_args.jobs,
+ alltests=cli_args.alltests,
+ raw_output=cli_args.raw_output,
+ json=cli_args.json,
+ timeout=cli_args.timeout,
+ filter_glob=cli_args.filter_glob,
+ kernel_args=cli_args.kernel_args,
+ run_isolated=cli_args.run_isolated)
result = run_tests(linux, request)
if result.status != KunitStatus.SUCCESS:
sys.exit(1)
@@ -423,12 +449,13 @@ def main(argv, linux=None):
if not linux:
linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir,
kunitconfig_path=cli_args.kunitconfig,
+ kconfig_add=cli_args.kconfig_add,
arch=cli_args.arch,
cross_compile=cli_args.cross_compile,
qemu_config_path=cli_args.qemu_config)
- request = KunitConfigRequest(cli_args.build_dir,
- cli_args.make_options)
+ request = KunitConfigRequest(build_dir=cli_args.build_dir,
+ make_options=cli_args.make_options)
result = config_tests(linux, request)
kunit_parser.print_with_timestamp((
'Elapsed time: %.3fs\n') % (
@@ -439,15 +466,16 @@ def main(argv, linux=None):
if not linux:
linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir,
kunitconfig_path=cli_args.kunitconfig,
+ kconfig_add=cli_args.kconfig_add,
arch=cli_args.arch,
cross_compile=cli_args.cross_compile,
qemu_config_path=cli_args.qemu_config)
- request = KunitBuildRequest(cli_args.jobs,
- cli_args.build_dir,
- cli_args.alltests,
- cli_args.make_options)
- result = build_tests(linux, request)
+ request = KunitBuildRequest(build_dir=cli_args.build_dir,
+ make_options=cli_args.make_options,
+ jobs=cli_args.jobs,
+ alltests=cli_args.alltests)
+ result = config_and_build_tests(linux, request)
kunit_parser.print_with_timestamp((
'Elapsed time: %.3fs\n') % (
result.elapsed_time))
@@ -457,20 +485,20 @@ def main(argv, linux=None):
if not linux:
linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir,
kunitconfig_path=cli_args.kunitconfig,
+ kconfig_add=cli_args.kconfig_add,
arch=cli_args.arch,
cross_compile=cli_args.cross_compile,
qemu_config_path=cli_args.qemu_config)
- exec_request = KunitExecRequest(cli_args.timeout,
- cli_args.build_dir,
- cli_args.alltests,
- cli_args.filter_glob,
- cli_args.kernel_args,
- cli_args.run_isolated)
- parse_request = KunitParseRequest(cli_args.raw_output,
- cli_args.build_dir,
- cli_args.json)
- result = exec_tests(linux, exec_request, parse_request)
+ exec_request = KunitExecRequest(raw_output=cli_args.raw_output,
+ build_dir=cli_args.build_dir,
+ json=cli_args.json,
+ timeout=cli_args.timeout,
+ alltests=cli_args.alltests,
+ filter_glob=cli_args.filter_glob,
+ kernel_args=cli_args.kernel_args,
+ run_isolated=cli_args.run_isolated)
+ result = exec_tests(linux, exec_request)
kunit_parser.print_with_timestamp((
'Elapsed time: %.3fs\n') % (result.elapsed_time))
if result.status != KunitStatus.SUCCESS:
@@ -482,9 +510,9 @@ def main(argv, linux=None):
else:
with open(cli_args.file, 'r', errors='backslashreplace') as f:
kunit_output = f.read().splitlines()
- request = KunitParseRequest(cli_args.raw_output,
- None,
- cli_args.json)
+ request = KunitParseRequest(raw_output=cli_args.raw_output,
+ build_dir='',
+ json=cli_args.json)
result = parse_tests(request, kunit_output)
if result.status != KunitStatus.SUCCESS:
sys.exit(1)
diff --git a/tools/testing/kunit/kunit_config.py b/tools/testing/kunit/kunit_config.py
index c77c7d2ef622..677354546156 100644
--- a/tools/testing/kunit/kunit_config.py
+++ b/tools/testing/kunit/kunit_config.py
@@ -62,33 +62,34 @@ class Kconfig(object):
for entry in self.entries():
f.write(str(entry) + '\n')
- def parse_from_string(self, blob: str) -> None:
- """Parses a string containing KconfigEntrys and populates this Kconfig."""
- self._entries = []
- is_not_set_matcher = re.compile(CONFIG_IS_NOT_SET_PATTERN)
- config_matcher = re.compile(CONFIG_PATTERN)
- for line in blob.split('\n'):
- line = line.strip()
- if not line:
- continue
-
- match = config_matcher.match(line)
- if match:
- entry = KconfigEntry(match.group(1), match.group(2))
- self.add_entry(entry)
- continue
-
- empty_match = is_not_set_matcher.match(line)
- if empty_match:
- entry = KconfigEntry(empty_match.group(1), 'n')
- self.add_entry(entry)
- continue
-
- if line[0] == '#':
- continue
- else:
- raise KconfigParseError('Failed to parse: ' + line)
-
- def read_from_file(self, path: str) -> None:
- with open(path, 'r') as f:
- self.parse_from_string(f.read())
+def parse_file(path: str) -> Kconfig:
+ with open(path, 'r') as f:
+ return parse_from_string(f.read())
+
+def parse_from_string(blob: str) -> Kconfig:
+ """Parses a string containing Kconfig entries."""
+ kconfig = Kconfig()
+ is_not_set_matcher = re.compile(CONFIG_IS_NOT_SET_PATTERN)
+ config_matcher = re.compile(CONFIG_PATTERN)
+ for line in blob.split('\n'):
+ line = line.strip()
+ if not line:
+ continue
+
+ match = config_matcher.match(line)
+ if match:
+ entry = KconfigEntry(match.group(1), match.group(2))
+ kconfig.add_entry(entry)
+ continue
+
+ empty_match = is_not_set_matcher.match(line)
+ if empty_match:
+ entry = KconfigEntry(empty_match.group(1), 'n')
+ kconfig.add_entry(entry)
+ continue
+
+ if line[0] == '#':
+ continue
+ else:
+ raise KconfigParseError('Failed to parse: ' + line)
+ return kconfig
diff --git a/tools/testing/kunit/kunit_json.py b/tools/testing/kunit/kunit_json.py
index 746bec72b9ac..6862671709bc 100644
--- a/tools/testing/kunit/kunit_json.py
+++ b/tools/testing/kunit/kunit_json.py
@@ -11,7 +11,7 @@ import os
import kunit_parser
-from kunit_parser import Test, TestResult, TestStatus
+from kunit_parser import Test, TestStatus
from typing import Any, Dict, Optional
JsonObj = Dict[str, Any]
@@ -30,6 +30,8 @@ def _get_group_json(test: Test, def_config: str,
test_case = {"name": subtest.name, "status": "FAIL"}
if subtest.status == TestStatus.SUCCESS:
test_case["status"] = "PASS"
+ elif subtest.status == TestStatus.SKIPPED:
+ test_case["status"] = "SKIP"
elif subtest.status == TestStatus.TEST_CRASHED:
test_case["status"] = "ERROR"
test_cases.append(test_case)
@@ -48,9 +50,9 @@ def _get_group_json(test: Test, def_config: str,
}
return test_group
-def get_json_result(test_result: TestResult, def_config: str,
+def get_json_result(test: Test, def_config: str,
build_dir: Optional[str], json_path: str) -> str:
- test_group = _get_group_json(test_result.test, def_config, build_dir)
+ test_group = _get_group_json(test, def_config, build_dir)
test_group["name"] = "KUnit Test Group"
json_obj = json.dumps(test_group, indent=4)
if json_path != 'stdout':
diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py
index 66095568bf32..44bbe54f25f1 100644
--- a/tools/testing/kunit/kunit_kernel.py
+++ b/tools/testing/kunit/kunit_kernel.py
@@ -21,6 +21,7 @@ import qemu_config
KCONFIG_PATH = '.config'
KUNITCONFIG_PATH = '.kunitconfig'
+OLD_KUNITCONFIG_PATH = 'last_used_kunitconfig'
DEFAULT_KUNITCONFIG_PATH = 'tools/testing/kunit/configs/default.config'
BROKEN_ALLCONFIG_PATH = 'tools/testing/kunit/configs/broken_on_uml.config'
OUTFILE_PATH = 'test.log'
@@ -116,8 +117,7 @@ class LinuxSourceTreeOperationsQemu(LinuxSourceTreeOperations):
self._extra_qemu_params = qemu_arch_params.extra_qemu_params
def make_arch_qemuconfig(self, base_kunitconfig: kunit_config.Kconfig) -> None:
- kconfig = kunit_config.Kconfig()
- kconfig.parse_from_string(self._kconfig)
+ kconfig = kunit_config.parse_from_string(self._kconfig)
base_kunitconfig.merge_in_entries(kconfig)
def start(self, params: List[str], build_dir: str) -> subprocess.Popen:
@@ -180,6 +180,9 @@ def get_kconfig_path(build_dir) -> str:
def get_kunitconfig_path(build_dir) -> str:
return get_file_path(build_dir, KUNITCONFIG_PATH)
+def get_old_kunitconfig_path(build_dir) -> str:
+ return get_file_path(build_dir, OLD_KUNITCONFIG_PATH)
+
def get_outfile_path(build_dir) -> str:
return get_file_path(build_dir, OUTFILE_PATH)
@@ -206,6 +209,7 @@ def get_source_tree_ops_from_qemu_config(config_path: str,
# exists as a file.
module_path = '.' + os.path.join(os.path.basename(QEMU_CONFIGS_DIR), os.path.basename(config_path))
spec = importlib.util.spec_from_file_location(module_path, config_path)
+ assert spec is not None
config = importlib.util.module_from_spec(spec)
# See https://github.com/python/typeshed/pull/2626 for context.
assert isinstance(spec.loader, importlib.abc.Loader)
@@ -225,6 +229,7 @@ class LinuxSourceTree(object):
build_dir: str,
load_config=True,
kunitconfig_path='',
+ kconfig_add: Optional[List[str]]=None,
arch=None,
cross_compile=None,
qemu_config_path=None) -> None:
@@ -249,8 +254,11 @@ class LinuxSourceTree(object):
if not os.path.exists(kunitconfig_path):
shutil.copyfile(DEFAULT_KUNITCONFIG_PATH, kunitconfig_path)
- self._kconfig = kunit_config.Kconfig()
- self._kconfig.read_from_file(kunitconfig_path)
+ self._kconfig = kunit_config.parse_file(kunitconfig_path)
+ if kconfig_add:
+ kconfig = kunit_config.parse_from_string('\n'.join(kconfig_add))
+ self._kconfig.merge_in_entries(kconfig)
+
def clean(self) -> bool:
try:
@@ -262,17 +270,18 @@ class LinuxSourceTree(object):
def validate_config(self, build_dir) -> bool:
kconfig_path = get_kconfig_path(build_dir)
- validated_kconfig = kunit_config.Kconfig()
- validated_kconfig.read_from_file(kconfig_path)
- if not self._kconfig.is_subset_of(validated_kconfig):
- invalid = self._kconfig.entries() - validated_kconfig.entries()
- message = 'Provided Kconfig is not contained in validated .config. Following fields found in kunitconfig, ' \
- 'but not in .config: %s' % (
- ', '.join([str(e) for e in invalid])
- )
- logging.error(message)
- return False
- return True
+ validated_kconfig = kunit_config.parse_file(kconfig_path)
+ if self._kconfig.is_subset_of(validated_kconfig):
+ return True
+ invalid = self._kconfig.entries() - validated_kconfig.entries()
+ message = 'Not all Kconfig options selected in kunitconfig were in the generated .config.\n' \
+ 'This is probably due to unsatisfied dependencies.\n' \
+ 'Missing: ' + ', '.join([str(e) for e in invalid])
+ if self._arch == 'um':
+ message += '\nNote: many Kconfig options aren\'t available on UML. You can try running ' \
+ 'on a different architecture with something like "--arch=x86_64".'
+ logging.error(message)
+ return False
def build_config(self, build_dir, make_options) -> bool:
kconfig_path = get_kconfig_path(build_dir)
@@ -285,25 +294,38 @@ class LinuxSourceTree(object):
except ConfigError as e:
logging.error(e)
return False
- return self.validate_config(build_dir)
+ if not self.validate_config(build_dir):
+ return False
+
+ old_path = get_old_kunitconfig_path(build_dir)
+ if os.path.exists(old_path):
+ os.remove(old_path) # write_to_file appends to the file
+ self._kconfig.write_to_file(old_path)
+ return True
+
+ def _kunitconfig_changed(self, build_dir: str) -> bool:
+ old_path = get_old_kunitconfig_path(build_dir)
+ if not os.path.exists(old_path):
+ return True
+
+ old_kconfig = kunit_config.parse_file(old_path)
+ return old_kconfig.entries() != self._kconfig.entries()
def build_reconfig(self, build_dir, make_options) -> bool:
"""Creates a new .config if it is not a subset of the .kunitconfig."""
kconfig_path = get_kconfig_path(build_dir)
- if os.path.exists(kconfig_path):
- existing_kconfig = kunit_config.Kconfig()
- existing_kconfig.read_from_file(kconfig_path)
- self._ops.make_arch_qemuconfig(self._kconfig)
- if not self._kconfig.is_subset_of(existing_kconfig):
- print('Regenerating .config ...')
- os.remove(kconfig_path)
- return self.build_config(build_dir, make_options)
- else:
- return True
- else:
+ if not os.path.exists(kconfig_path):
print('Generating .config ...')
return self.build_config(build_dir, make_options)
+ existing_kconfig = kunit_config.parse_file(kconfig_path)
+ self._ops.make_arch_qemuconfig(self._kconfig)
+ if self._kconfig.is_subset_of(existing_kconfig) and not self._kunitconfig_changed(build_dir):
+ return True
+ print('Regenerating .config ...')
+ os.remove(kconfig_path)
+ return self.build_config(build_dir, make_options)
+
def build_kernel(self, alltests, jobs, build_dir, make_options) -> bool:
try:
if alltests:
diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py
index 3355196d0515..05ff334761dd 100644
--- a/tools/testing/kunit/kunit_parser.py
+++ b/tools/testing/kunit/kunit_parser.py
@@ -12,14 +12,11 @@
from __future__ import annotations
import re
-from collections import namedtuple
-from datetime import datetime
+import datetime
from enum import Enum, auto
from functools import reduce
from typing import Iterable, Iterator, List, Optional, Tuple
-TestResult = namedtuple('TestResult', ['status','test','log'])
-
class Test(object):
"""
A class to represent a test parsed from KTAP results. All KTAP
@@ -168,42 +165,51 @@ class TestCounts:
class LineStream:
"""
A class to represent the lines of kernel output.
- Provides a peek()/pop() interface over an iterator of
+ Provides a lazy peek()/pop() interface over an iterator of
(line#, text).
"""
_lines: Iterator[Tuple[int, str]]
_next: Tuple[int, str]
+ _need_next: bool
_done: bool
def __init__(self, lines: Iterator[Tuple[int, str]]):
"""Creates a new LineStream that wraps the given iterator."""
self._lines = lines
self._done = False
+ self._need_next = True
self._next = (0, '')
- self._get_next()
def _get_next(self) -> None:
- """Advances the LineSteam to the next line."""
+ """Advances the LineSteam to the next line, if necessary."""
+ if not self._need_next:
+ return
try:
self._next = next(self._lines)
except StopIteration:
self._done = True
+ finally:
+ self._need_next = False
def peek(self) -> str:
"""Returns the current line, without advancing the LineStream.
"""
+ self._get_next()
return self._next[1]
def pop(self) -> str:
"""Returns the current line and advances the LineStream to
the next line.
"""
- n = self._next
- self._get_next()
- return n[1]
+ s = self.peek()
+ if self._done:
+ raise ValueError(f'LineStream: going past EOF, last line was {s}')
+ self._need_next = True
+ return s
def __bool__(self) -> bool:
"""Returns True if stream has more lines."""
+ self._get_next()
return not self._done
# Only used by kunit_tool_test.py.
@@ -216,6 +222,7 @@ class LineStream:
def line_number(self) -> int:
"""Returns the line number of the current line."""
+ self._get_next()
return self._next[0]
# Parsing helper methods:
@@ -340,8 +347,8 @@ def parse_test_plan(lines: LineStream, test: Test) -> bool:
"""
Parses test plan line and stores the expected number of subtests in
test object. Reports an error if expected count is 0.
- Returns False and reports missing test plan error if fails to parse
- test plan.
+ Returns False and sets expected_count to None if there is no valid test
+ plan.
Accepted format:
- '1..[number of subtests]'
@@ -356,14 +363,10 @@ def parse_test_plan(lines: LineStream, test: Test) -> bool:
match = TEST_PLAN.match(lines.peek())
if not match:
test.expected_count = None
- test.add_error('missing plan line!')
return False
test.log.append(lines.pop())
expected_count = int(match.group(1))
test.expected_count = expected_count
- if expected_count == 0:
- test.status = TestStatus.NO_TESTS
- test.add_error('0 tests run!')
return True
TEST_RESULT = re.compile(r'^(ok|not ok) ([0-9]+) (- )?([^#]*)( # .*)?$')
@@ -514,7 +517,7 @@ ANSI_LEN = len(red(''))
def print_with_timestamp(message: str) -> None:
"""Prints message with timestamp at beginning."""
- print('[%s] %s' % (datetime.now().strftime('%H:%M:%S'), message))
+ print('[%s] %s' % (datetime.datetime.now().strftime('%H:%M:%S'), message))
def format_test_divider(message: str, len_message: int) -> str:
"""
@@ -590,6 +593,8 @@ def format_test_result(test: Test) -> str:
return (green('[PASSED] ') + test.name)
elif test.status == TestStatus.SKIPPED:
return (yellow('[SKIPPED] ') + test.name)
+ elif test.status == TestStatus.NO_TESTS:
+ return (yellow('[NO TESTS RUN] ') + test.name)
elif test.status == TestStatus.TEST_CRASHED:
print_log(test.log)
return (red('[CRASHED] ') + test.name)
@@ -732,6 +737,7 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str]) -> Test:
# test plan
test.name = "main"
parse_test_plan(lines, test)
+ parent_test = True
else:
# If KTAP/TAP header is not found, test must be subtest
# header or test result line so parse attempt to parser
@@ -745,7 +751,7 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str]) -> Test:
expected_count = test.expected_count
subtests = []
test_num = 1
- while expected_count is None or test_num <= expected_count:
+ while parent_test and (expected_count is None or test_num <= expected_count):
# Loop to parse any subtests.
# Break after parsing expected number of tests or
# if expected number of tests is unknown break when test
@@ -780,9 +786,15 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str]) -> Test:
parse_test_result(lines, test, expected_num)
else:
test.add_error('missing subtest result line!')
+
+ # Check for there being no tests
+ if parent_test and len(subtests) == 0:
+ test.status = TestStatus.NO_TESTS
+ test.add_error('0 tests run!')
+
# Add statuses to TestCounts attribute in Test object
bubble_up_test_results(test)
- if parent_test:
+ if parent_test and not main:
# If test has subtests and is not the main test object, print
# footer.
print_test_footer(test)
@@ -790,7 +802,7 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str]) -> Test:
print_test_result(test)
return test
-def parse_run_tests(kernel_output: Iterable[str]) -> TestResult:
+def parse_run_tests(kernel_output: Iterable[str]) -> Test:
"""
Using kernel output, extract KTAP lines, parse the lines for test
results and print condensed test results and summary line .
@@ -799,8 +811,7 @@ def parse_run_tests(kernel_output: Iterable[str]) -> TestResult:
kernel_output - Iterable object contains lines of kernel output
Return:
- TestResult - Tuple containg status of main test object, main test
- object with all subtests, and log of all KTAP lines.
+ Test - the main test object with all subtests.
"""
print_with_timestamp(DIVIDER)
lines = extract_tap_lines(kernel_output)
@@ -814,4 +825,4 @@ def parse_run_tests(kernel_output: Iterable[str]) -> TestResult:
test.status = test.counts.get_status()
print_with_timestamp(DIVIDER)
print_summary_line(test)
- return TestResult(test.status, test, lines)
+ return test
diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py
index 9c4126731457..352369dffbd9 100755
--- a/tools/testing/kunit/kunit_tool_test.py
+++ b/tools/testing/kunit/kunit_tool_test.py
@@ -13,9 +13,10 @@ import tempfile, shutil # Handling test_tmpdir
import itertools
import json
+import os
import signal
import subprocess
-import os
+from typing import Iterable
import kunit_config
import kunit_parser
@@ -50,10 +51,9 @@ class KconfigTest(unittest.TestCase):
self.assertFalse(kconfig1.is_subset_of(kconfig0))
def test_read_from_file(self):
- kconfig = kunit_config.Kconfig()
kconfig_path = test_data_path('test_read_from_file.kconfig')
- kconfig.read_from_file(kconfig_path)
+ kconfig = kunit_config.parse_file(kconfig_path)
expected_kconfig = kunit_config.Kconfig()
expected_kconfig.add_entry(
@@ -86,8 +86,7 @@ class KconfigTest(unittest.TestCase):
expected_kconfig.write_to_file(kconfig_path)
- actual_kconfig = kunit_config.Kconfig()
- actual_kconfig.read_from_file(kconfig_path)
+ actual_kconfig = kunit_config.parse_file(kconfig_path)
self.assertEqual(actual_kconfig.entries(),
expected_kconfig.entries())
@@ -179,7 +178,7 @@ class KUnitParserTest(unittest.TestCase):
with open(empty_log) as file:
result = kunit_parser.parse_run_tests(
kunit_parser.extract_tap_lines(file.readlines()))
- self.assertEqual(0, len(result.test.subtests))
+ self.assertEqual(0, len(result.subtests))
self.assertEqual(
kunit_parser.TestStatus.FAILURE_TO_PARSE_TESTS,
result.status)
@@ -191,7 +190,10 @@ class KUnitParserTest(unittest.TestCase):
result = kunit_parser.parse_run_tests(
kunit_parser.extract_tap_lines(
file.readlines()))
- self.assertEqual(2, result.test.counts.errors)
+ # A missing test plan is not an error.
+ self.assertEqual(0, result.counts.errors)
+ # All tests should be accounted for.
+ self.assertEqual(10, result.counts.total())
self.assertEqual(
kunit_parser.TestStatus.SUCCESS,
result.status)
@@ -201,11 +203,23 @@ class KUnitParserTest(unittest.TestCase):
with open(header_log) as file:
result = kunit_parser.parse_run_tests(
kunit_parser.extract_tap_lines(file.readlines()))
- self.assertEqual(0, len(result.test.subtests))
+ self.assertEqual(0, len(result.subtests))
self.assertEqual(
kunit_parser.TestStatus.NO_TESTS,
result.status)
+ def test_no_tests_no_plan(self):
+ no_plan_log = test_data_path('test_is_test_passed-no_tests_no_plan.log')
+ with open(no_plan_log) as file:
+ result = kunit_parser.parse_run_tests(
+ kunit_parser.extract_tap_lines(file.readlines()))
+ self.assertEqual(0, len(result.subtests[0].subtests[0].subtests))
+ self.assertEqual(
+ kunit_parser.TestStatus.NO_TESTS,
+ result.subtests[0].subtests[0].status)
+ self.assertEqual(1, result.counts.errors)
+
+
def test_no_kunit_output(self):
crash_log = test_data_path('test_insufficient_memory.log')
print_mock = mock.patch('builtins.print').start()
@@ -214,7 +228,7 @@ class KUnitParserTest(unittest.TestCase):
kunit_parser.extract_tap_lines(file.readlines()))
print_mock.assert_any_call(StrContains('invalid KTAP input!'))
print_mock.stop()
- self.assertEqual(0, len(result.test.subtests))
+ self.assertEqual(0, len(result.subtests))
def test_crashed_test(self):
crashed_log = test_data_path('test_is_test_passed-crash.log')
@@ -255,10 +269,10 @@ class KUnitParserTest(unittest.TestCase):
result.status)
self.assertEqual(
"sysctl_test",
- result.test.subtests[0].name)
+ result.subtests[0].name)
self.assertEqual(
"example",
- result.test.subtests[1].name)
+ result.subtests[1].name)
file.close()
@@ -269,7 +283,7 @@ class KUnitParserTest(unittest.TestCase):
self.assertEqual(
kunit_parser.TestStatus.SUCCESS,
result.status)
- self.assertEqual('kunit-resource-test', result.test.subtests[0].name)
+ self.assertEqual('kunit-resource-test', result.subtests[0].name)
def test_ignores_multiple_prefixes(self):
prefix_log = test_data_path('test_multiple_prefixes.log')
@@ -278,7 +292,7 @@ class KUnitParserTest(unittest.TestCase):
self.assertEqual(
kunit_parser.TestStatus.SUCCESS,
result.status)
- self.assertEqual('kunit-resource-test', result.test.subtests[0].name)
+ self.assertEqual('kunit-resource-test', result.subtests[0].name)
def test_prefix_mixed_kernel_output(self):
mixed_prefix_log = test_data_path('test_interrupted_tap_output.log')
@@ -287,7 +301,7 @@ class KUnitParserTest(unittest.TestCase):
self.assertEqual(
kunit_parser.TestStatus.SUCCESS,
result.status)
- self.assertEqual('kunit-resource-test', result.test.subtests[0].name)
+ self.assertEqual('kunit-resource-test', result.subtests[0].name)
def test_prefix_poundsign(self):
pound_log = test_data_path('test_pound_sign.log')
@@ -296,7 +310,7 @@ class KUnitParserTest(unittest.TestCase):
self.assertEqual(
kunit_parser.TestStatus.SUCCESS,
result.status)
- self.assertEqual('kunit-resource-test', result.test.subtests[0].name)
+ self.assertEqual('kunit-resource-test', result.subtests[0].name)
def test_kernel_panic_end(self):
panic_log = test_data_path('test_kernel_panic_interrupt.log')
@@ -305,7 +319,7 @@ class KUnitParserTest(unittest.TestCase):
self.assertEqual(
kunit_parser.TestStatus.TEST_CRASHED,
result.status)
- self.assertEqual('kunit-resource-test', result.test.subtests[0].name)
+ self.assertEqual('kunit-resource-test', result.subtests[0].name)
def test_pound_no_prefix(self):
pound_log = test_data_path('test_pound_no_prefix.log')
@@ -314,7 +328,46 @@ class KUnitParserTest(unittest.TestCase):
self.assertEqual(
kunit_parser.TestStatus.SUCCESS,
result.status)
- self.assertEqual('kunit-resource-test', result.test.subtests[0].name)
+ self.assertEqual('kunit-resource-test', result.subtests[0].name)
+
+def line_stream_from_strs(strs: Iterable[str]) -> kunit_parser.LineStream:
+ return kunit_parser.LineStream(enumerate(strs, start=1))
+
+class LineStreamTest(unittest.TestCase):
+
+ def test_basic(self):
+ stream = line_stream_from_strs(['hello', 'world'])
+
+ self.assertTrue(stream, msg='Should be more input')
+ self.assertEqual(stream.line_number(), 1)
+ self.assertEqual(stream.peek(), 'hello')
+ self.assertEqual(stream.pop(), 'hello')
+
+ self.assertTrue(stream, msg='Should be more input')
+ self.assertEqual(stream.line_number(), 2)
+ self.assertEqual(stream.peek(), 'world')
+ self.assertEqual(stream.pop(), 'world')
+
+ self.assertFalse(stream, msg='Should be no more input')
+ with self.assertRaisesRegex(ValueError, 'LineStream: going past EOF'):
+ stream.pop()
+
+ def test_is_lazy(self):
+ called_times = 0
+ def generator():
+ nonlocal called_times
+ for i in range(1,5):
+ called_times += 1
+ yield called_times, str(called_times)
+
+ stream = kunit_parser.LineStream(generator())
+ self.assertEqual(called_times, 0)
+
+ self.assertEqual(stream.pop(), '1')
+ self.assertEqual(called_times, 1)
+
+ self.assertEqual(stream.pop(), '2')
+ self.assertEqual(called_times, 2)
class LinuxSourceTreeTest(unittest.TestCase):
@@ -336,6 +389,10 @@ class LinuxSourceTreeTest(unittest.TestCase):
pass
kunit_kernel.LinuxSourceTree('', kunitconfig_path=dir)
+ def test_kconfig_add(self):
+ tree = kunit_kernel.LinuxSourceTree('', kconfig_add=['CONFIG_NOT_REAL=y'])
+ self.assertIn(kunit_config.KconfigEntry('NOT_REAL', 'y'), tree._kconfig.entries())
+
def test_invalid_arch(self):
with self.assertRaisesRegex(kunit_kernel.ConfigError, 'not a valid arch, options are.*x86_64'):
kunit_kernel.LinuxSourceTree('', arch='invalid')
@@ -356,6 +413,51 @@ class LinuxSourceTreeTest(unittest.TestCase):
with open(kunit_kernel.get_outfile_path(build_dir), 'rt') as outfile:
self.assertEqual(outfile.read(), 'hi\nbye\n', msg='Missing some output')
+ def test_build_reconfig_no_config(self):
+ with tempfile.TemporaryDirectory('') as build_dir:
+ with open(kunit_kernel.get_kunitconfig_path(build_dir), 'w') as f:
+ f.write('CONFIG_KUNIT=y')
+
+ tree = kunit_kernel.LinuxSourceTree(build_dir)
+ mock_build_config = mock.patch.object(tree, 'build_config').start()
+
+ # Should generate the .config
+ self.assertTrue(tree.build_reconfig(build_dir, make_options=[]))
+ mock_build_config.assert_called_once_with(build_dir, [])
+
+ def test_build_reconfig_existing_config(self):
+ with tempfile.TemporaryDirectory('') as build_dir:
+ # Existing .config is a superset, should not touch it
+ with open(kunit_kernel.get_kunitconfig_path(build_dir), 'w') as f:
+ f.write('CONFIG_KUNIT=y')
+ with open(kunit_kernel.get_old_kunitconfig_path(build_dir), 'w') as f:
+ f.write('CONFIG_KUNIT=y')
+ with open(kunit_kernel.get_kconfig_path(build_dir), 'w') as f:
+ f.write('CONFIG_KUNIT=y\nCONFIG_KUNIT_TEST=y')
+
+ tree = kunit_kernel.LinuxSourceTree(build_dir)
+ mock_build_config = mock.patch.object(tree, 'build_config').start()
+
+ self.assertTrue(tree.build_reconfig(build_dir, make_options=[]))
+ self.assertEqual(mock_build_config.call_count, 0)
+
+ def test_build_reconfig_remove_option(self):
+ with tempfile.TemporaryDirectory('') as build_dir:
+ # We removed CONFIG_KUNIT_TEST=y from our .kunitconfig...
+ with open(kunit_kernel.get_kunitconfig_path(build_dir), 'w') as f:
+ f.write('CONFIG_KUNIT=y')
+ with open(kunit_kernel.get_old_kunitconfig_path(build_dir), 'w') as f:
+ f.write('CONFIG_KUNIT=y\nCONFIG_KUNIT_TEST=y')
+ with open(kunit_kernel.get_kconfig_path(build_dir), 'w') as f:
+ f.write('CONFIG_KUNIT=y\nCONFIG_KUNIT_TEST=y')
+
+ tree = kunit_kernel.LinuxSourceTree(build_dir)
+ mock_build_config = mock.patch.object(tree, 'build_config').start()
+
+ # ... so we should trigger a call to build_config()
+ self.assertTrue(tree.build_reconfig(build_dir, make_options=[]))
+ mock_build_config.assert_called_once_with(build_dir, [])
+
# TODO: add more test cases.
@@ -365,7 +467,7 @@ class KUnitJsonTest(unittest.TestCase):
with open(test_data_path(log_file)) as file:
test_result = kunit_parser.parse_run_tests(file)
json_obj = kunit_json.get_json_result(
- test_result=test_result,
+ test=test_result,
def_config='kunit_defconfig',
build_dir=None,
json_path='stdout')
@@ -383,6 +485,12 @@ class KUnitJsonTest(unittest.TestCase):
{'name': 'example_simple_test', 'status': 'ERROR'},
result["sub_groups"][1]["test_cases"][0])
+ def test_skipped_test_json(self):
+ result = self._json_for('test_skip_tests.log')
+ self.assertEqual(
+ {'name': 'example_skip_test', 'status': 'SKIP'},
+ result["sub_groups"][1]["test_cases"][1])
+
def test_no_tests_json(self):
result = self._json_for('test_is_test_passed-no_tests_run_with_header.log')
self.assertEqual(0, len(result['sub_groups']))
@@ -418,8 +526,8 @@ class KUnitMainTest(unittest.TestCase):
def test_build_passes_args_pass(self):
kunit.main(['build'], self.linux_source_mock)
- self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 0)
- self.linux_source_mock.build_kernel.assert_called_once_with(False, 8, '.kunit', None)
+ self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1)
+ self.linux_source_mock.build_kernel.assert_called_once_with(False, kunit.get_default_jobs(), '.kunit', None)
self.assertEqual(self.linux_source_mock.run_kernel.call_count, 0)
def test_exec_passes_args_pass(self):
@@ -525,8 +633,9 @@ class KUnitMainTest(unittest.TestCase):
def test_build_builddir(self):
build_dir = '.kunit'
+ jobs = kunit.get_default_jobs()
kunit.main(['build', '--build_dir', build_dir], self.linux_source_mock)
- self.linux_source_mock.build_kernel.assert_called_once_with(False, 8, build_dir, None)
+ self.linux_source_mock.build_kernel.assert_called_once_with(False, jobs, build_dir, None)
def test_exec_builddir(self):
build_dir = '.kunit'
@@ -542,6 +651,7 @@ class KUnitMainTest(unittest.TestCase):
# Just verify that we parsed and initialized it correctly here.
mock_linux_init.assert_called_once_with('.kunit',
kunitconfig_path='mykunitconfig',
+ kconfig_add=None,
arch='um',
cross_compile=None,
qemu_config_path=None)
@@ -553,6 +663,19 @@ class KUnitMainTest(unittest.TestCase):
# Just verify that we parsed and initialized it correctly here.
mock_linux_init.assert_called_once_with('.kunit',
kunitconfig_path='mykunitconfig',
+ kconfig_add=None,
+ arch='um',
+ cross_compile=None,
+ qemu_config_path=None)
+
+ @mock.patch.object(kunit_kernel, 'LinuxSourceTree')
+ def test_run_kconfig_add(self, mock_linux_init):
+ mock_linux_init.return_value = self.linux_source_mock
+ kunit.main(['run', '--kconfig_add=CONFIG_KASAN=y', '--kconfig_add=CONFIG_KCSAN=y'])
+ # Just verify that we parsed and initialized it correctly here.
+ mock_linux_init.assert_called_once_with('.kunit',
+ kunitconfig_path=None,
+ kconfig_add=['CONFIG_KASAN=y', 'CONFIG_KCSAN=y'],
arch='um',
cross_compile=None,
qemu_config_path=None)
@@ -569,7 +692,7 @@ class KUnitMainTest(unittest.TestCase):
self.linux_source_mock.run_kernel.return_value = ['TAP version 14', 'init: random output'] + want
got = kunit._list_tests(self.linux_source_mock,
- kunit.KunitExecRequest(300, '.kunit', False, 'suite*', None, 'suite'))
+ kunit.KunitExecRequest(None, '.kunit', None, 300, False, 'suite*', None, 'suite'))
self.assertEqual(got, want)
# Should respect the user's filter glob when listing tests.
@@ -584,7 +707,7 @@ class KUnitMainTest(unittest.TestCase):
# Should respect the user's filter glob when listing tests.
mock_tests.assert_called_once_with(mock.ANY,
- kunit.KunitExecRequest(300, '.kunit', False, 'suite*.test*', None, 'suite'))
+ kunit.KunitExecRequest(None, '.kunit', None, 300, False, 'suite*.test*', None, 'suite'))
self.linux_source_mock.run_kernel.assert_has_calls([
mock.call(args=None, build_dir='.kunit', filter_glob='suite.test*', timeout=300),
mock.call(args=None, build_dir='.kunit', filter_glob='suite2.test*', timeout=300),
@@ -597,7 +720,7 @@ class KUnitMainTest(unittest.TestCase):
# Should respect the user's filter glob when listing tests.
mock_tests.assert_called_once_with(mock.ANY,
- kunit.KunitExecRequest(300, '.kunit', False, 'suite*', None, 'test'))
+ kunit.KunitExecRequest(None, '.kunit', None, 300, False, 'suite*', None, 'test'))
self.linux_source_mock.run_kernel.assert_has_calls([
mock.call(args=None, build_dir='.kunit', filter_glob='suite.test1', timeout=300),
mock.call(args=None, build_dir='.kunit', filter_glob='suite.test2', timeout=300),
diff --git a/tools/testing/kunit/run_checks.py b/tools/testing/kunit/run_checks.py
new file mode 100755
index 000000000000..4f32133ed77c
--- /dev/null
+++ b/tools/testing/kunit/run_checks.py
@@ -0,0 +1,81 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+#
+# This file runs some basic checks to verify kunit works.
+# It is only of interest if you're making changes to KUnit itself.
+#
+# Copyright (C) 2021, Google LLC.
+# Author: Daniel Latypov <dlatypov@google.com.com>
+
+from concurrent import futures
+import datetime
+import os
+import shutil
+import subprocess
+import sys
+import textwrap
+from typing import Dict, List, Sequence, Tuple
+
+ABS_TOOL_PATH = os.path.abspath(os.path.dirname(__file__))
+TIMEOUT = datetime.timedelta(minutes=5).total_seconds()
+
+commands: Dict[str, Sequence[str]] = {
+ 'kunit_tool_test.py': ['./kunit_tool_test.py'],
+ 'kunit smoke test': ['./kunit.py', 'run', '--kunitconfig=lib/kunit', '--build_dir=kunit_run_checks'],
+ 'pytype': ['/bin/sh', '-c', 'pytype *.py'],
+ 'mypy': ['/bin/sh', '-c', 'mypy *.py'],
+}
+
+# The user might not have mypy or pytype installed, skip them if so.
+# Note: you can install both via `$ pip install mypy pytype`
+necessary_deps : Dict[str, str] = {
+ 'pytype': 'pytype',
+ 'mypy': 'mypy',
+}
+
+def main(argv: Sequence[str]) -> None:
+ if argv:
+ raise RuntimeError('This script takes no arguments')
+
+ future_to_name: Dict[futures.Future, str] = {}
+ executor = futures.ThreadPoolExecutor(max_workers=len(commands))
+ for name, argv in commands.items():
+ if name in necessary_deps and shutil.which(necessary_deps[name]) is None:
+ print(f'{name}: SKIPPED, {necessary_deps[name]} not in $PATH')
+ continue
+ f = executor.submit(run_cmd, argv)
+ future_to_name[f] = name
+
+ has_failures = False
+ print(f'Waiting on {len(future_to_name)} checks ({", ".join(future_to_name.values())})...')
+ for f in futures.as_completed(future_to_name.keys()):
+ name = future_to_name[f]
+ ex = f.exception()
+ if not ex:
+ print(f'{name}: PASSED')
+ continue
+
+ has_failures = True
+ if isinstance(ex, subprocess.TimeoutExpired):
+ print(f'{name}: TIMED OUT')
+ elif isinstance(ex, subprocess.CalledProcessError):
+ print(f'{name}: FAILED')
+ else:
+ print('{name}: unexpected exception: {ex}')
+ continue
+
+ output = ex.output
+ if output:
+ print(textwrap.indent(output.decode(), '> '))
+ executor.shutdown()
+
+ if has_failures:
+ sys.exit(1)
+
+
+def run_cmd(argv: Sequence[str]):
+ subprocess.check_output(argv, stderr=subprocess.STDOUT, cwd=ABS_TOOL_PATH, timeout=TIMEOUT)
+
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff --git a/tools/testing/kunit/test_data/test_is_test_passed-no_tests_no_plan.log b/tools/testing/kunit/test_data/test_is_test_passed-no_tests_no_plan.log
new file mode 100644
index 000000000000..dd873c981108
--- /dev/null
+++ b/tools/testing/kunit/test_data/test_is_test_passed-no_tests_no_plan.log
@@ -0,0 +1,7 @@
+TAP version 14
+1..1
+ # Subtest: suite
+ 1..1
+ # Subtest: case
+ ok 1 - case # SKIP
+ok 1 - suite
diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild
index 47f9cc9dcd94..c57d9e9d4480 100644
--- a/tools/testing/nvdimm/Kbuild
+++ b/tools/testing/nvdimm/Kbuild
@@ -35,8 +35,6 @@ obj-$(CONFIG_DAX) += dax.o
endif
obj-$(CONFIG_DEV_DAX) += device_dax.o
obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
-obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem_core.o
-obj-$(CONFIG_DEV_DAX_PMEM_COMPAT) += dax_pmem_compat.o
nfit-y := $(ACPI_SRC)/core.o
nfit-y += $(ACPI_SRC)/intel.o
@@ -67,12 +65,8 @@ device_dax-y += dax-dev.o
device_dax-y += device_dax_test.o
device_dax-y += config_check.o
-dax_pmem-y := $(DAX_SRC)/pmem/pmem.o
+dax_pmem-y := $(DAX_SRC)/pmem.o
dax_pmem-y += dax_pmem_test.o
-dax_pmem_core-y := $(DAX_SRC)/pmem/core.o
-dax_pmem_core-y += dax_pmem_core_test.o
-dax_pmem_compat-y := $(DAX_SRC)/pmem/compat.o
-dax_pmem_compat-y += dax_pmem_compat_test.o
dax_pmem-y += config_check.o
libnvdimm-y := $(NVDIMM_SRC)/core.o
diff --git a/tools/testing/nvdimm/dax_pmem_compat_test.c b/tools/testing/nvdimm/dax_pmem_compat_test.c
deleted file mode 100644
index 7cd1877f3765..000000000000
--- a/tools/testing/nvdimm/dax_pmem_compat_test.c
+++ /dev/null
@@ -1,8 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright(c) 2019 Intel Corporation. All rights reserved.
-
-#include <linux/module.h>
-#include <linux/printk.h>
-#include "watermark.h"
-
-nfit_test_watermark(dax_pmem_compat);
diff --git a/tools/testing/nvdimm/dax_pmem_core_test.c b/tools/testing/nvdimm/dax_pmem_core_test.c
deleted file mode 100644
index a4249cdbeec1..000000000000
--- a/tools/testing/nvdimm/dax_pmem_core_test.c
+++ /dev/null
@@ -1,8 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright(c) 2019 Intel Corporation. All rights reserved.
-
-#include <linux/module.h>
-#include <linux/printk.h>
-#include "watermark.h"
-
-nfit_test_watermark(dax_pmem_core);
diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c
index ed563bdd88f3..b752ce47ead3 100644
--- a/tools/testing/nvdimm/test/iomap.c
+++ b/tools/testing/nvdimm/test/iomap.c
@@ -100,25 +100,17 @@ static void nfit_test_kill(void *_pgmap)
{
struct dev_pagemap *pgmap = _pgmap;
- WARN_ON(!pgmap || !pgmap->ref);
-
- if (pgmap->ops && pgmap->ops->kill)
- pgmap->ops->kill(pgmap);
- else
- percpu_ref_kill(pgmap->ref);
-
- if (pgmap->ops && pgmap->ops->cleanup) {
- pgmap->ops->cleanup(pgmap);
- } else {
- wait_for_completion(&pgmap->done);
- percpu_ref_exit(pgmap->ref);
- }
+ WARN_ON(!pgmap);
+
+ percpu_ref_kill(&pgmap->ref);
+
+ wait_for_completion(&pgmap->done);
+ percpu_ref_exit(&pgmap->ref);
}
static void dev_pagemap_percpu_release(struct percpu_ref *ref)
{
- struct dev_pagemap *pgmap =
- container_of(ref, struct dev_pagemap, internal_ref);
+ struct dev_pagemap *pgmap = container_of(ref, struct dev_pagemap, ref);
complete(&pgmap->done);
}
@@ -132,22 +124,11 @@ void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
if (!nfit_res)
return devm_memremap_pages(dev, pgmap);
- if (!pgmap->ref) {
- if (pgmap->ops && (pgmap->ops->kill || pgmap->ops->cleanup))
- return ERR_PTR(-EINVAL);
-
- init_completion(&pgmap->done);
- error = percpu_ref_init(&pgmap->internal_ref,
- dev_pagemap_percpu_release, 0, GFP_KERNEL);
- if (error)
- return ERR_PTR(error);
- pgmap->ref = &pgmap->internal_ref;
- } else {
- if (!pgmap->ops || !pgmap->ops->kill || !pgmap->ops->cleanup) {
- WARN(1, "Missing reference count teardown definition\n");
- return ERR_PTR(-EINVAL);
- }
- }
+ init_completion(&pgmap->done);
+ error = percpu_ref_init(&pgmap->ref, dev_pagemap_percpu_release, 0,
+ GFP_KERNEL);
+ if (error)
+ return ERR_PTR(error);
error = devm_add_action_or_reset(dev, nfit_test_kill, pgmap);
if (error)
diff --git a/tools/testing/nvdimm/test/ndtest.c b/tools/testing/nvdimm/test/ndtest.c
index 6862915f1fb0..3ca7c32e9362 100644
--- a/tools/testing/nvdimm/test/ndtest.c
+++ b/tools/testing/nvdimm/test/ndtest.c
@@ -1054,10 +1054,6 @@ static __init int ndtest_init(void)
libnvdimm_test();
device_dax_test();
dax_pmem_test();
- dax_pmem_core_test();
-#ifdef CONFIG_DEV_DAX_PMEM_COMPAT
- dax_pmem_compat_test();
-#endif
nfit_test_setup(ndtest_resource_lookup, NULL);
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index b1bff5fb0f65..0bc91ffee257 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -3300,10 +3300,6 @@ static __init int nfit_test_init(void)
acpi_nfit_test();
device_dax_test();
dax_pmem_test();
- dax_pmem_core_test();
-#ifdef CONFIG_DEV_DAX_PMEM_COMPAT
- dax_pmem_compat_test();
-#endif
nfit_test_setup(nfit_test_lookup, nfit_test_evaluate_dsm);
diff --git a/tools/testing/radix-tree/linux/lockdep.h b/tools/testing/radix-tree/linux/lockdep.h
index 565fccdfe6e9..016cff473cfc 100644
--- a/tools/testing/radix-tree/linux/lockdep.h
+++ b/tools/testing/radix-tree/linux/lockdep.h
@@ -1,5 +1,8 @@
#ifndef _LINUX_LOCKDEP_H
#define _LINUX_LOCKDEP_H
+
+#include <linux/spinlock.h>
+
struct lock_class_key {
unsigned int a;
};
diff --git a/tools/testing/scatterlist/linux/mm.h b/tools/testing/scatterlist/linux/mm.h
index 16ec895bbe5f..5bd9e6e80625 100644
--- a/tools/testing/scatterlist/linux/mm.h
+++ b/tools/testing/scatterlist/linux/mm.h
@@ -74,7 +74,7 @@ static inline unsigned long page_to_phys(struct page *page)
__UNIQUE_ID(min1_), __UNIQUE_ID(min2_), \
x, y)
-#define preemptible() (1)
+#define pagefault_disabled() (0)
static inline void *kmap(struct page *page)
{
@@ -127,6 +127,7 @@ kmalloc_array(unsigned int n, unsigned int size, unsigned int flags)
#define kmemleak_free(a)
#define PageSlab(p) (0)
+#define flush_dcache_page(p)
#define MAX_ERRNO 4095
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index c852eb40c4f7..d08fe4cfe811 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
-TARGETS = arm64
+TARGETS += alsa
+TARGETS += arm64
TARGETS += bpf
TARGETS += breakpoints
TARGETS += capabilities
diff --git a/tools/testing/selftests/alsa/.gitignore b/tools/testing/selftests/alsa/.gitignore
new file mode 100644
index 000000000000..3bb7c41266a8
--- /dev/null
+++ b/tools/testing/selftests/alsa/.gitignore
@@ -0,0 +1 @@
+mixer-test
diff --git a/tools/testing/selftests/alsa/Makefile b/tools/testing/selftests/alsa/Makefile
new file mode 100644
index 000000000000..f64d9090426d
--- /dev/null
+++ b/tools/testing/selftests/alsa/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+
+CFLAGS += $(shell pkg-config --cflags alsa)
+LDLIBS += $(shell pkg-config --libs alsa)
+
+TEST_GEN_PROGS := mixer-test
+
+include ../lib.mk
diff --git a/tools/testing/selftests/alsa/mixer-test.c b/tools/testing/selftests/alsa/mixer-test.c
new file mode 100644
index 000000000000..17f158d7a767
--- /dev/null
+++ b/tools/testing/selftests/alsa/mixer-test.c
@@ -0,0 +1,705 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// kselftest for the ALSA mixer API
+//
+// Original author: Mark Brown <broonie@kernel.org>
+// Copyright (c) 2021 Arm Limited
+
+// This test will iterate over all cards detected in the system, exercising
+// every mixer control it can find. This may conflict with other system
+// software if there is audio activity so is best run on a system with a
+// minimal active userspace.
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <getopt.h>
+#include <stdarg.h>
+#include <ctype.h>
+#include <math.h>
+#include <errno.h>
+#include <assert.h>
+#include <alsa/asoundlib.h>
+#include <poll.h>
+#include <stdint.h>
+
+#include "../kselftest.h"
+
+#define TESTS_PER_CONTROL 3
+
+struct card_data {
+ snd_ctl_t *handle;
+ int card;
+ int num_ctls;
+ snd_ctl_elem_list_t *ctls;
+ struct card_data *next;
+};
+
+struct ctl_data {
+ const char *name;
+ snd_ctl_elem_id_t *id;
+ snd_ctl_elem_info_t *info;
+ snd_ctl_elem_value_t *def_val;
+ int elem;
+ struct card_data *card;
+ struct ctl_data *next;
+};
+
+static const char *alsa_config =
+"ctl.hw {\n"
+" @args [ CARD ]\n"
+" @args.CARD.type string\n"
+" type hw\n"
+" card $CARD\n"
+"}\n"
+;
+
+int num_cards = 0;
+int num_controls = 0;
+struct card_data *card_list = NULL;
+struct ctl_data *ctl_list = NULL;
+
+#ifdef SND_LIB_VER
+#if SND_LIB_VERSION >= SND_LIB_VER(1, 2, 6)
+#define LIB_HAS_LOAD_STRING
+#endif
+#endif
+
+#ifndef LIB_HAS_LOAD_STRING
+int snd_config_load_string(snd_config_t **config, const char *s, size_t size)
+{
+ snd_input_t *input;
+ snd_config_t *dst;
+ int err;
+
+ assert(config && s);
+ if (size == 0)
+ size = strlen(s);
+ err = snd_input_buffer_open(&input, s, size);
+ if (err < 0)
+ return err;
+ err = snd_config_top(&dst);
+ if (err < 0) {
+ snd_input_close(input);
+ return err;
+ }
+ err = snd_config_load(dst, input);
+ snd_input_close(input);
+ if (err < 0) {
+ snd_config_delete(dst);
+ return err;
+ }
+ *config = dst;
+ return 0;
+}
+#endif
+
+void find_controls(void)
+{
+ char name[32];
+ int card, ctl, err;
+ struct card_data *card_data;
+ struct ctl_data *ctl_data;
+ snd_config_t *config;
+
+ card = -1;
+ if (snd_card_next(&card) < 0 || card < 0)
+ return;
+
+ err = snd_config_load_string(&config, alsa_config, strlen(alsa_config));
+ if (err < 0) {
+ ksft_print_msg("Unable to parse custom alsa-lib configuration: %s\n",
+ snd_strerror(err));
+ ksft_exit_fail();
+ }
+
+ while (card >= 0) {
+ sprintf(name, "hw:%d", card);
+
+ card_data = malloc(sizeof(*card_data));
+ if (!card_data)
+ ksft_exit_fail_msg("Out of memory\n");
+
+ err = snd_ctl_open_lconf(&card_data->handle, name, 0, config);
+ if (err < 0) {
+ ksft_print_msg("Failed to get hctl for card %d: %s\n",
+ card, snd_strerror(err));
+ goto next_card;
+ }
+
+ /* Count controls */
+ snd_ctl_elem_list_malloc(&card_data->ctls);
+ snd_ctl_elem_list(card_data->handle, card_data->ctls);
+ card_data->num_ctls = snd_ctl_elem_list_get_count(card_data->ctls);
+
+ /* Enumerate control information */
+ snd_ctl_elem_list_alloc_space(card_data->ctls, card_data->num_ctls);
+ snd_ctl_elem_list(card_data->handle, card_data->ctls);
+
+ card_data->card = num_cards++;
+ card_data->next = card_list;
+ card_list = card_data;
+
+ num_controls += card_data->num_ctls;
+
+ for (ctl = 0; ctl < card_data->num_ctls; ctl++) {
+ ctl_data = malloc(sizeof(*ctl_data));
+ if (!ctl_data)
+ ksft_exit_fail_msg("Out of memory\n");
+
+ ctl_data->card = card_data;
+ ctl_data->elem = ctl;
+ ctl_data->name = snd_ctl_elem_list_get_name(card_data->ctls,
+ ctl);
+
+ err = snd_ctl_elem_id_malloc(&ctl_data->id);
+ if (err < 0)
+ ksft_exit_fail_msg("Out of memory\n");
+
+ err = snd_ctl_elem_info_malloc(&ctl_data->info);
+ if (err < 0)
+ ksft_exit_fail_msg("Out of memory\n");
+
+ err = snd_ctl_elem_value_malloc(&ctl_data->def_val);
+ if (err < 0)
+ ksft_exit_fail_msg("Out of memory\n");
+
+ snd_ctl_elem_list_get_id(card_data->ctls, ctl,
+ ctl_data->id);
+ snd_ctl_elem_info_set_id(ctl_data->info, ctl_data->id);
+ err = snd_ctl_elem_info(card_data->handle,
+ ctl_data->info);
+ if (err < 0) {
+ ksft_print_msg("%s getting info for %d\n",
+ snd_strerror(err),
+ ctl_data->name);
+ }
+
+ snd_ctl_elem_value_set_id(ctl_data->def_val,
+ ctl_data->id);
+
+ ctl_data->next = ctl_list;
+ ctl_list = ctl_data;
+ }
+
+ next_card:
+ if (snd_card_next(&card) < 0) {
+ ksft_print_msg("snd_card_next");
+ break;
+ }
+ }
+
+ snd_config_delete(config);
+}
+
+bool ctl_value_index_valid(struct ctl_data *ctl, snd_ctl_elem_value_t *val,
+ int index)
+{
+ long int_val;
+ long long int64_val;
+
+ switch (snd_ctl_elem_info_get_type(ctl->info)) {
+ case SND_CTL_ELEM_TYPE_NONE:
+ ksft_print_msg("%s.%d Invalid control type NONE\n",
+ ctl->name, index);
+ return false;
+
+ case SND_CTL_ELEM_TYPE_BOOLEAN:
+ int_val = snd_ctl_elem_value_get_boolean(val, index);
+ switch (int_val) {
+ case 0:
+ case 1:
+ break;
+ default:
+ ksft_print_msg("%s.%d Invalid boolean value %ld\n",
+ ctl->name, index, int_val);
+ return false;
+ }
+ break;
+
+ case SND_CTL_ELEM_TYPE_INTEGER:
+ int_val = snd_ctl_elem_value_get_integer(val, index);
+
+ if (int_val < snd_ctl_elem_info_get_min(ctl->info)) {
+ ksft_print_msg("%s.%d value %ld less than minimum %ld\n",
+ ctl->name, index, int_val,
+ snd_ctl_elem_info_get_min(ctl->info));
+ return false;
+ }
+
+ if (int_val > snd_ctl_elem_info_get_max(ctl->info)) {
+ ksft_print_msg("%s.%d value %ld more than maximum %ld\n",
+ ctl->name, index, int_val,
+ snd_ctl_elem_info_get_max(ctl->info));
+ return false;
+ }
+
+ /* Only check step size if there is one and we're in bounds */
+ if (snd_ctl_elem_info_get_step(ctl->info) &&
+ (int_val - snd_ctl_elem_info_get_min(ctl->info) %
+ snd_ctl_elem_info_get_step(ctl->info))) {
+ ksft_print_msg("%s.%d value %ld invalid for step %ld minimum %ld\n",
+ ctl->name, index, int_val,
+ snd_ctl_elem_info_get_step(ctl->info),
+ snd_ctl_elem_info_get_min(ctl->info));
+ return false;
+ }
+ break;
+
+ case SND_CTL_ELEM_TYPE_INTEGER64:
+ int64_val = snd_ctl_elem_value_get_integer64(val, index);
+
+ if (int64_val < snd_ctl_elem_info_get_min64(ctl->info)) {
+ ksft_print_msg("%s.%d value %lld less than minimum %lld\n",
+ ctl->name, index, int64_val,
+ snd_ctl_elem_info_get_min64(ctl->info));
+ return false;
+ }
+
+ if (int64_val > snd_ctl_elem_info_get_max64(ctl->info)) {
+ ksft_print_msg("%s.%d value %lld more than maximum %lld\n",
+ ctl->name, index, int64_val,
+ snd_ctl_elem_info_get_max(ctl->info));
+ return false;
+ }
+
+ /* Only check step size if there is one and we're in bounds */
+ if (snd_ctl_elem_info_get_step64(ctl->info) &&
+ (int64_val - snd_ctl_elem_info_get_min64(ctl->info)) %
+ snd_ctl_elem_info_get_step64(ctl->info)) {
+ ksft_print_msg("%s.%d value %lld invalid for step %lld minimum %lld\n",
+ ctl->name, index, int64_val,
+ snd_ctl_elem_info_get_step64(ctl->info),
+ snd_ctl_elem_info_get_min64(ctl->info));
+ return false;
+ }
+ break;
+
+ case SND_CTL_ELEM_TYPE_ENUMERATED:
+ int_val = snd_ctl_elem_value_get_enumerated(val, index);
+
+ if (int_val < 0) {
+ ksft_print_msg("%s.%d negative value %ld for enumeration\n",
+ ctl->name, index, int_val);
+ return false;
+ }
+
+ if (int_val >= snd_ctl_elem_info_get_items(ctl->info)) {
+ ksft_print_msg("%s.%d value %ld more than item count %ld\n",
+ ctl->name, index, int_val,
+ snd_ctl_elem_info_get_items(ctl->info));
+ return false;
+ }
+ break;
+
+ default:
+ /* No tests for other types */
+ break;
+ }
+
+ return true;
+}
+
+/*
+ * Check that the provided value meets the constraints for the
+ * provided control.
+ */
+bool ctl_value_valid(struct ctl_data *ctl, snd_ctl_elem_value_t *val)
+{
+ int i;
+ bool valid = true;
+
+ for (i = 0; i < snd_ctl_elem_info_get_count(ctl->info); i++)
+ if (!ctl_value_index_valid(ctl, val, i))
+ valid = false;
+
+ return valid;
+}
+
+/*
+ * Check that we can read the default value and it is valid. Write
+ * tests use the read value to restore the default.
+ */
+void test_ctl_get_value(struct ctl_data *ctl)
+{
+ int err;
+
+ /* If the control is turned off let's be polite */
+ if (snd_ctl_elem_info_is_inactive(ctl->info)) {
+ ksft_print_msg("%s is inactive\n", ctl->name);
+ ksft_test_result_skip("get_value.%d.%d\n",
+ ctl->card->card, ctl->elem);
+ return;
+ }
+
+ /* Can't test reading on an unreadable control */
+ if (!snd_ctl_elem_info_is_readable(ctl->info)) {
+ ksft_print_msg("%s is not readable\n", ctl->name);
+ ksft_test_result_skip("get_value.%d.%d\n",
+ ctl->card->card, ctl->elem);
+ return;
+ }
+
+ err = snd_ctl_elem_read(ctl->card->handle, ctl->def_val);
+ if (err < 0) {
+ ksft_print_msg("snd_ctl_elem_read() failed: %s\n",
+ snd_strerror(err));
+ goto out;
+ }
+
+ if (!ctl_value_valid(ctl, ctl->def_val))
+ err = -EINVAL;
+
+out:
+ ksft_test_result(err >= 0, "get_value.%d.%d\n",
+ ctl->card->card, ctl->elem);
+}
+
+bool show_mismatch(struct ctl_data *ctl, int index,
+ snd_ctl_elem_value_t *read_val,
+ snd_ctl_elem_value_t *expected_val)
+{
+ long long expected_int, read_int;
+
+ /*
+ * We factor out the code to compare values representable as
+ * integers, ensure that check doesn't log otherwise.
+ */
+ expected_int = 0;
+ read_int = 0;
+
+ switch (snd_ctl_elem_info_get_type(ctl->info)) {
+ case SND_CTL_ELEM_TYPE_BOOLEAN:
+ expected_int = snd_ctl_elem_value_get_boolean(expected_val,
+ index);
+ read_int = snd_ctl_elem_value_get_boolean(read_val, index);
+ break;
+
+ case SND_CTL_ELEM_TYPE_INTEGER:
+ expected_int = snd_ctl_elem_value_get_integer(expected_val,
+ index);
+ read_int = snd_ctl_elem_value_get_integer(read_val, index);
+ break;
+
+ case SND_CTL_ELEM_TYPE_INTEGER64:
+ expected_int = snd_ctl_elem_value_get_integer64(expected_val,
+ index);
+ read_int = snd_ctl_elem_value_get_integer64(read_val,
+ index);
+ break;
+
+ case SND_CTL_ELEM_TYPE_ENUMERATED:
+ expected_int = snd_ctl_elem_value_get_enumerated(expected_val,
+ index);
+ read_int = snd_ctl_elem_value_get_enumerated(read_val,
+ index);
+ break;
+
+ default:
+ break;
+ }
+
+ if (expected_int != read_int) {
+ /*
+ * NOTE: The volatile attribute means that the hardware
+ * can voluntarily change the state of control element
+ * independent of any operation by software.
+ */
+ bool is_volatile = snd_ctl_elem_info_is_volatile(ctl->info);
+ ksft_print_msg("%s.%d expected %lld but read %lld, is_volatile %d\n",
+ ctl->name, index, expected_int, read_int, is_volatile);
+ return !is_volatile;
+ } else {
+ return false;
+ }
+}
+
+/*
+ * Write a value then if possible verify that we get the expected
+ * result. An optional expected value can be provided if we expect
+ * the write to fail, for verifying that invalid writes don't corrupt
+ * anything.
+ */
+int write_and_verify(struct ctl_data *ctl,
+ snd_ctl_elem_value_t *write_val,
+ snd_ctl_elem_value_t *expected_val)
+{
+ int err, i;
+ bool error_expected, mismatch_shown;
+ snd_ctl_elem_value_t *read_val, *w_val;
+ snd_ctl_elem_value_alloca(&read_val);
+ snd_ctl_elem_value_alloca(&w_val);
+
+ /*
+ * We need to copy the write value since writing can modify
+ * the value which causes surprises, and allocate an expected
+ * value if we expect to read back what we wrote.
+ */
+ snd_ctl_elem_value_copy(w_val, write_val);
+ if (expected_val) {
+ error_expected = true;
+ } else {
+ error_expected = false;
+ snd_ctl_elem_value_alloca(&expected_val);
+ snd_ctl_elem_value_copy(expected_val, write_val);
+ }
+
+ /*
+ * Do the write, if we have an expected value ignore the error
+ * and carry on to validate the expected value.
+ */
+ err = snd_ctl_elem_write(ctl->card->handle, w_val);
+ if (err < 0 && !error_expected) {
+ ksft_print_msg("snd_ctl_elem_write() failed: %s\n",
+ snd_strerror(err));
+ return err;
+ }
+
+ /* Can we do the verification part? */
+ if (!snd_ctl_elem_info_is_readable(ctl->info))
+ return err;
+
+ snd_ctl_elem_value_set_id(read_val, ctl->id);
+
+ err = snd_ctl_elem_read(ctl->card->handle, read_val);
+ if (err < 0) {
+ ksft_print_msg("snd_ctl_elem_read() failed: %s\n",
+ snd_strerror(err));
+ return err;
+ }
+
+ /*
+ * Use the libray to compare values, if there's a mismatch
+ * carry on and try to provide a more useful diagnostic than
+ * just "mismatch".
+ */
+ if (!snd_ctl_elem_value_compare(expected_val, read_val))
+ return 0;
+
+ mismatch_shown = false;
+ for (i = 0; i < snd_ctl_elem_info_get_count(ctl->info); i++)
+ if (show_mismatch(ctl, i, read_val, expected_val))
+ mismatch_shown = true;
+
+ if (!mismatch_shown)
+ ksft_print_msg("%s read and written values differ\n",
+ ctl->name);
+
+ return -1;
+}
+
+/*
+ * Make sure we can write the default value back to the control, this
+ * should validate that at least some write works.
+ */
+void test_ctl_write_default(struct ctl_data *ctl)
+{
+ int err;
+
+ /* If the control is turned off let's be polite */
+ if (snd_ctl_elem_info_is_inactive(ctl->info)) {
+ ksft_print_msg("%s is inactive\n", ctl->name);
+ ksft_test_result_skip("write_default.%d.%d\n",
+ ctl->card->card, ctl->elem);
+ return;
+ }
+
+ if (!snd_ctl_elem_info_is_writable(ctl->info)) {
+ ksft_print_msg("%s is not writeable\n", ctl->name);
+ ksft_test_result_skip("write_default.%d.%d\n",
+ ctl->card->card, ctl->elem);
+ return;
+ }
+
+ /* No idea what the default was for unreadable controls */
+ if (!snd_ctl_elem_info_is_readable(ctl->info)) {
+ ksft_print_msg("%s couldn't read default\n", ctl->name);
+ ksft_test_result_skip("write_default.%d.%d\n",
+ ctl->card->card, ctl->elem);
+ return;
+ }
+
+ err = write_and_verify(ctl, ctl->def_val, NULL);
+
+ ksft_test_result(err >= 0, "write_default.%d.%d\n",
+ ctl->card->card, ctl->elem);
+}
+
+bool test_ctl_write_valid_boolean(struct ctl_data *ctl)
+{
+ int err, i, j;
+ bool fail = false;
+ snd_ctl_elem_value_t *val;
+ snd_ctl_elem_value_alloca(&val);
+
+ snd_ctl_elem_value_set_id(val, ctl->id);
+
+ for (i = 0; i < snd_ctl_elem_info_get_count(ctl->info); i++) {
+ for (j = 0; j < 2; j++) {
+ snd_ctl_elem_value_set_boolean(val, i, j);
+ err = write_and_verify(ctl, val, NULL);
+ if (err != 0)
+ fail = true;
+ }
+ }
+
+ return !fail;
+}
+
+bool test_ctl_write_valid_integer(struct ctl_data *ctl)
+{
+ int err;
+ int i;
+ long j, step;
+ bool fail = false;
+ snd_ctl_elem_value_t *val;
+ snd_ctl_elem_value_alloca(&val);
+
+ snd_ctl_elem_value_set_id(val, ctl->id);
+
+ step = snd_ctl_elem_info_get_step(ctl->info);
+ if (!step)
+ step = 1;
+
+ for (i = 0; i < snd_ctl_elem_info_get_count(ctl->info); i++) {
+ for (j = snd_ctl_elem_info_get_min(ctl->info);
+ j <= snd_ctl_elem_info_get_max(ctl->info); j += step) {
+
+ snd_ctl_elem_value_set_integer(val, i, j);
+ err = write_and_verify(ctl, val, NULL);
+ if (err != 0)
+ fail = true;
+ }
+ }
+
+
+ return !fail;
+}
+
+bool test_ctl_write_valid_integer64(struct ctl_data *ctl)
+{
+ int err, i;
+ long long j, step;
+ bool fail = false;
+ snd_ctl_elem_value_t *val;
+ snd_ctl_elem_value_alloca(&val);
+
+ snd_ctl_elem_value_set_id(val, ctl->id);
+
+ step = snd_ctl_elem_info_get_step64(ctl->info);
+ if (!step)
+ step = 1;
+
+ for (i = 0; i < snd_ctl_elem_info_get_count(ctl->info); i++) {
+ for (j = snd_ctl_elem_info_get_min64(ctl->info);
+ j <= snd_ctl_elem_info_get_max64(ctl->info); j += step) {
+
+ snd_ctl_elem_value_set_integer64(val, i, j);
+ err = write_and_verify(ctl, val, NULL);
+ if (err != 0)
+ fail = true;
+ }
+ }
+
+ return !fail;
+}
+
+bool test_ctl_write_valid_enumerated(struct ctl_data *ctl)
+{
+ int err, i, j;
+ bool fail = false;
+ snd_ctl_elem_value_t *val;
+ snd_ctl_elem_value_alloca(&val);
+
+ snd_ctl_elem_value_set_id(val, ctl->id);
+
+ for (i = 0; i < snd_ctl_elem_info_get_count(ctl->info); i++) {
+ for (j = 0; j < snd_ctl_elem_info_get_items(ctl->info); j++) {
+ snd_ctl_elem_value_set_enumerated(val, i, j);
+ err = write_and_verify(ctl, val, NULL);
+ if (err != 0)
+ fail = true;
+ }
+ }
+
+ return !fail;
+}
+
+void test_ctl_write_valid(struct ctl_data *ctl)
+{
+ bool pass;
+ int err;
+
+ /* If the control is turned off let's be polite */
+ if (snd_ctl_elem_info_is_inactive(ctl->info)) {
+ ksft_print_msg("%s is inactive\n", ctl->name);
+ ksft_test_result_skip("write_valid.%d.%d\n",
+ ctl->card->card, ctl->elem);
+ return;
+ }
+
+ if (!snd_ctl_elem_info_is_writable(ctl->info)) {
+ ksft_print_msg("%s is not writeable\n", ctl->name);
+ ksft_test_result_skip("write_valid.%d.%d\n",
+ ctl->card->card, ctl->elem);
+ return;
+ }
+
+ switch (snd_ctl_elem_info_get_type(ctl->info)) {
+ case SND_CTL_ELEM_TYPE_BOOLEAN:
+ pass = test_ctl_write_valid_boolean(ctl);
+ break;
+
+ case SND_CTL_ELEM_TYPE_INTEGER:
+ pass = test_ctl_write_valid_integer(ctl);
+ break;
+
+ case SND_CTL_ELEM_TYPE_INTEGER64:
+ pass = test_ctl_write_valid_integer64(ctl);
+ break;
+
+ case SND_CTL_ELEM_TYPE_ENUMERATED:
+ pass = test_ctl_write_valid_enumerated(ctl);
+ break;
+
+ default:
+ /* No tests for this yet */
+ ksft_test_result_skip("write_valid.%d.%d\n",
+ ctl->card->card, ctl->elem);
+ return;
+ }
+
+ /* Restore the default value to minimise disruption */
+ err = write_and_verify(ctl, ctl->def_val, NULL);
+ if (err < 0)
+ pass = false;
+
+ ksft_test_result(pass, "write_valid.%d.%d\n",
+ ctl->card->card, ctl->elem);
+}
+
+int main(void)
+{
+ struct ctl_data *ctl;
+
+ ksft_print_header();
+
+ find_controls();
+
+ ksft_set_plan(num_controls * TESTS_PER_CONTROL);
+
+ for (ctl = ctl_list; ctl != NULL; ctl = ctl->next) {
+ /*
+ * Must test get_value() before we write anything, the
+ * test stores the default value for later cleanup.
+ */
+ test_ctl_get_value(ctl);
+ test_ctl_write_default(ctl);
+ test_ctl_write_valid(ctl);
+ }
+
+ ksft_exit_pass();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile
index ced910fb4019..1e8d9a8f59df 100644
--- a/tools/testing/selftests/arm64/Makefile
+++ b/tools/testing/selftests/arm64/Makefile
@@ -4,7 +4,7 @@
ARCH ?= $(shell uname -m 2>/dev/null || echo not)
ifneq (,$(filter $(ARCH),aarch64 arm64))
-ARM64_SUBTARGETS ?= tags signal pauth fp mte bti
+ARM64_SUBTARGETS ?= tags signal pauth fp mte bti abi
else
ARM64_SUBTARGETS :=
endif
diff --git a/tools/testing/selftests/arm64/abi/.gitignore b/tools/testing/selftests/arm64/abi/.gitignore
new file mode 100644
index 000000000000..b79cf5814c23
--- /dev/null
+++ b/tools/testing/selftests/arm64/abi/.gitignore
@@ -0,0 +1 @@
+syscall-abi
diff --git a/tools/testing/selftests/arm64/abi/Makefile b/tools/testing/selftests/arm64/abi/Makefile
new file mode 100644
index 000000000000..96eba974ac8d
--- /dev/null
+++ b/tools/testing/selftests/arm64/abi/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2021 ARM Limited
+
+TEST_GEN_PROGS := syscall-abi
+
+include ../../lib.mk
+
+$(OUTPUT)/syscall-abi: syscall-abi.c syscall-abi-asm.S
diff --git a/tools/testing/selftests/arm64/abi/syscall-abi-asm.S b/tools/testing/selftests/arm64/abi/syscall-abi-asm.S
new file mode 100644
index 000000000000..983467cfcee0
--- /dev/null
+++ b/tools/testing/selftests/arm64/abi/syscall-abi-asm.S
@@ -0,0 +1,240 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2021 ARM Limited.
+//
+// Assembly portion of the syscall ABI test
+
+//
+// Load values from memory into registers, invoke a syscall and save the
+// register values back to memory for later checking. The syscall to be
+// invoked is configured in x8 of the input GPR data.
+//
+// x0: SVE VL, 0 for FP only
+//
+// GPRs: gpr_in, gpr_out
+// FPRs: fpr_in, fpr_out
+// Zn: z_in, z_out
+// Pn: p_in, p_out
+// FFR: ffr_in, ffr_out
+
+.arch_extension sve
+
+.globl do_syscall
+do_syscall:
+ // Store callee saved registers x19-x29 (80 bytes) plus x0 and x1
+ stp x29, x30, [sp, #-112]!
+ mov x29, sp
+ stp x0, x1, [sp, #16]
+ stp x19, x20, [sp, #32]
+ stp x21, x22, [sp, #48]
+ stp x23, x24, [sp, #64]
+ stp x25, x26, [sp, #80]
+ stp x27, x28, [sp, #96]
+
+ // Load GPRs x8-x28, and save our SP/FP for later comparison
+ ldr x2, =gpr_in
+ add x2, x2, #64
+ ldp x8, x9, [x2], #16
+ ldp x10, x11, [x2], #16
+ ldp x12, x13, [x2], #16
+ ldp x14, x15, [x2], #16
+ ldp x16, x17, [x2], #16
+ ldp x18, x19, [x2], #16
+ ldp x20, x21, [x2], #16
+ ldp x22, x23, [x2], #16
+ ldp x24, x25, [x2], #16
+ ldp x26, x27, [x2], #16
+ ldr x28, [x2], #8
+ str x29, [x2], #8 // FP
+ str x30, [x2], #8 // LR
+
+ // Load FPRs if we're not doing SVE
+ cbnz x0, 1f
+ ldr x2, =fpr_in
+ ldp q0, q1, [x2]
+ ldp q2, q3, [x2, #16 * 2]
+ ldp q4, q5, [x2, #16 * 4]
+ ldp q6, q7, [x2, #16 * 6]
+ ldp q8, q9, [x2, #16 * 8]
+ ldp q10, q11, [x2, #16 * 10]
+ ldp q12, q13, [x2, #16 * 12]
+ ldp q14, q15, [x2, #16 * 14]
+ ldp q16, q17, [x2, #16 * 16]
+ ldp q18, q19, [x2, #16 * 18]
+ ldp q20, q21, [x2, #16 * 20]
+ ldp q22, q23, [x2, #16 * 22]
+ ldp q24, q25, [x2, #16 * 24]
+ ldp q26, q27, [x2, #16 * 26]
+ ldp q28, q29, [x2, #16 * 28]
+ ldp q30, q31, [x2, #16 * 30]
+1:
+
+ // Load the SVE registers if we're doing SVE
+ cbz x0, 1f
+
+ ldr x2, =z_in
+ ldr z0, [x2, #0, MUL VL]
+ ldr z1, [x2, #1, MUL VL]
+ ldr z2, [x2, #2, MUL VL]
+ ldr z3, [x2, #3, MUL VL]
+ ldr z4, [x2, #4, MUL VL]
+ ldr z5, [x2, #5, MUL VL]
+ ldr z6, [x2, #6, MUL VL]
+ ldr z7, [x2, #7, MUL VL]
+ ldr z8, [x2, #8, MUL VL]
+ ldr z9, [x2, #9, MUL VL]
+ ldr z10, [x2, #10, MUL VL]
+ ldr z11, [x2, #11, MUL VL]
+ ldr z12, [x2, #12, MUL VL]
+ ldr z13, [x2, #13, MUL VL]
+ ldr z14, [x2, #14, MUL VL]
+ ldr z15, [x2, #15, MUL VL]
+ ldr z16, [x2, #16, MUL VL]
+ ldr z17, [x2, #17, MUL VL]
+ ldr z18, [x2, #18, MUL VL]
+ ldr z19, [x2, #19, MUL VL]
+ ldr z20, [x2, #20, MUL VL]
+ ldr z21, [x2, #21, MUL VL]
+ ldr z22, [x2, #22, MUL VL]
+ ldr z23, [x2, #23, MUL VL]
+ ldr z24, [x2, #24, MUL VL]
+ ldr z25, [x2, #25, MUL VL]
+ ldr z26, [x2, #26, MUL VL]
+ ldr z27, [x2, #27, MUL VL]
+ ldr z28, [x2, #28, MUL VL]
+ ldr z29, [x2, #29, MUL VL]
+ ldr z30, [x2, #30, MUL VL]
+ ldr z31, [x2, #31, MUL VL]
+
+ ldr x2, =ffr_in
+ ldr p0, [x2, #0]
+ wrffr p0.b
+
+ ldr x2, =p_in
+ ldr p0, [x2, #0, MUL VL]
+ ldr p1, [x2, #1, MUL VL]
+ ldr p2, [x2, #2, MUL VL]
+ ldr p3, [x2, #3, MUL VL]
+ ldr p4, [x2, #4, MUL VL]
+ ldr p5, [x2, #5, MUL VL]
+ ldr p6, [x2, #6, MUL VL]
+ ldr p7, [x2, #7, MUL VL]
+ ldr p8, [x2, #8, MUL VL]
+ ldr p9, [x2, #9, MUL VL]
+ ldr p10, [x2, #10, MUL VL]
+ ldr p11, [x2, #11, MUL VL]
+ ldr p12, [x2, #12, MUL VL]
+ ldr p13, [x2, #13, MUL VL]
+ ldr p14, [x2, #14, MUL VL]
+ ldr p15, [x2, #15, MUL VL]
+1:
+
+ // Do the syscall
+ svc #0
+
+ // Save GPRs x8-x30
+ ldr x2, =gpr_out
+ add x2, x2, #64
+ stp x8, x9, [x2], #16
+ stp x10, x11, [x2], #16
+ stp x12, x13, [x2], #16
+ stp x14, x15, [x2], #16
+ stp x16, x17, [x2], #16
+ stp x18, x19, [x2], #16
+ stp x20, x21, [x2], #16
+ stp x22, x23, [x2], #16
+ stp x24, x25, [x2], #16
+ stp x26, x27, [x2], #16
+ stp x28, x29, [x2], #16
+ str x30, [x2]
+
+ // Restore x0 and x1 for feature checks
+ ldp x0, x1, [sp, #16]
+
+ // Save FPSIMD state
+ ldr x2, =fpr_out
+ stp q0, q1, [x2]
+ stp q2, q3, [x2, #16 * 2]
+ stp q4, q5, [x2, #16 * 4]
+ stp q6, q7, [x2, #16 * 6]
+ stp q8, q9, [x2, #16 * 8]
+ stp q10, q11, [x2, #16 * 10]
+ stp q12, q13, [x2, #16 * 12]
+ stp q14, q15, [x2, #16 * 14]
+ stp q16, q17, [x2, #16 * 16]
+ stp q18, q19, [x2, #16 * 18]
+ stp q20, q21, [x2, #16 * 20]
+ stp q22, q23, [x2, #16 * 22]
+ stp q24, q25, [x2, #16 * 24]
+ stp q26, q27, [x2, #16 * 26]
+ stp q28, q29, [x2, #16 * 28]
+ stp q30, q31, [x2, #16 * 30]
+
+ // Save the SVE state if we have some
+ cbz x0, 1f
+
+ ldr x2, =z_out
+ str z0, [x2, #0, MUL VL]
+ str z1, [x2, #1, MUL VL]
+ str z2, [x2, #2, MUL VL]
+ str z3, [x2, #3, MUL VL]
+ str z4, [x2, #4, MUL VL]
+ str z5, [x2, #5, MUL VL]
+ str z6, [x2, #6, MUL VL]
+ str z7, [x2, #7, MUL VL]
+ str z8, [x2, #8, MUL VL]
+ str z9, [x2, #9, MUL VL]
+ str z10, [x2, #10, MUL VL]
+ str z11, [x2, #11, MUL VL]
+ str z12, [x2, #12, MUL VL]
+ str z13, [x2, #13, MUL VL]
+ str z14, [x2, #14, MUL VL]
+ str z15, [x2, #15, MUL VL]
+ str z16, [x2, #16, MUL VL]
+ str z17, [x2, #17, MUL VL]
+ str z18, [x2, #18, MUL VL]
+ str z19, [x2, #19, MUL VL]
+ str z20, [x2, #20, MUL VL]
+ str z21, [x2, #21, MUL VL]
+ str z22, [x2, #22, MUL VL]
+ str z23, [x2, #23, MUL VL]
+ str z24, [x2, #24, MUL VL]
+ str z25, [x2, #25, MUL VL]
+ str z26, [x2, #26, MUL VL]
+ str z27, [x2, #27, MUL VL]
+ str z28, [x2, #28, MUL VL]
+ str z29, [x2, #29, MUL VL]
+ str z30, [x2, #30, MUL VL]
+ str z31, [x2, #31, MUL VL]
+
+ ldr x2, =p_out
+ str p0, [x2, #0, MUL VL]
+ str p1, [x2, #1, MUL VL]
+ str p2, [x2, #2, MUL VL]
+ str p3, [x2, #3, MUL VL]
+ str p4, [x2, #4, MUL VL]
+ str p5, [x2, #5, MUL VL]
+ str p6, [x2, #6, MUL VL]
+ str p7, [x2, #7, MUL VL]
+ str p8, [x2, #8, MUL VL]
+ str p9, [x2, #9, MUL VL]
+ str p10, [x2, #10, MUL VL]
+ str p11, [x2, #11, MUL VL]
+ str p12, [x2, #12, MUL VL]
+ str p13, [x2, #13, MUL VL]
+ str p14, [x2, #14, MUL VL]
+ str p15, [x2, #15, MUL VL]
+
+ ldr x2, =ffr_out
+ rdffr p0.b
+ str p0, [x2, #0]
+1:
+
+ // Restore callee saved registers x19-x30
+ ldp x19, x20, [sp, #32]
+ ldp x21, x22, [sp, #48]
+ ldp x23, x24, [sp, #64]
+ ldp x25, x26, [sp, #80]
+ ldp x27, x28, [sp, #96]
+ ldp x29, x30, [sp], #112
+
+ ret
diff --git a/tools/testing/selftests/arm64/abi/syscall-abi.c b/tools/testing/selftests/arm64/abi/syscall-abi.c
new file mode 100644
index 000000000000..d8eeeafb50dc
--- /dev/null
+++ b/tools/testing/selftests/arm64/abi/syscall-abi.c
@@ -0,0 +1,318 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021 ARM Limited.
+ */
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <asm/hwcap.h>
+#include <asm/sigcontext.h>
+#include <asm/unistd.h>
+
+#include "../../kselftest.h"
+
+#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
+#define NUM_VL ((SVE_VQ_MAX - SVE_VQ_MIN) + 1)
+
+extern void do_syscall(int sve_vl);
+
+static void fill_random(void *buf, size_t size)
+{
+ int i;
+ uint32_t *lbuf = buf;
+
+ /* random() returns a 32 bit number regardless of the size of long */
+ for (i = 0; i < size / sizeof(uint32_t); i++)
+ lbuf[i] = random();
+}
+
+/*
+ * We also repeat the test for several syscalls to try to expose different
+ * behaviour.
+ */
+static struct syscall_cfg {
+ int syscall_nr;
+ const char *name;
+} syscalls[] = {
+ { __NR_getpid, "getpid()" },
+ { __NR_sched_yield, "sched_yield()" },
+};
+
+#define NUM_GPR 31
+uint64_t gpr_in[NUM_GPR];
+uint64_t gpr_out[NUM_GPR];
+
+static void setup_gpr(struct syscall_cfg *cfg, int sve_vl)
+{
+ fill_random(gpr_in, sizeof(gpr_in));
+ gpr_in[8] = cfg->syscall_nr;
+ memset(gpr_out, 0, sizeof(gpr_out));
+}
+
+static int check_gpr(struct syscall_cfg *cfg, int sve_vl)
+{
+ int errors = 0;
+ int i;
+
+ /*
+ * GPR x0-x7 may be clobbered, and all others should be preserved.
+ */
+ for (i = 9; i < ARRAY_SIZE(gpr_in); i++) {
+ if (gpr_in[i] != gpr_out[i]) {
+ ksft_print_msg("%s SVE VL %d mismatch in GPR %d: %llx != %llx\n",
+ cfg->name, sve_vl, i,
+ gpr_in[i], gpr_out[i]);
+ errors++;
+ }
+ }
+
+ return errors;
+}
+
+#define NUM_FPR 32
+uint64_t fpr_in[NUM_FPR * 2];
+uint64_t fpr_out[NUM_FPR * 2];
+
+static void setup_fpr(struct syscall_cfg *cfg, int sve_vl)
+{
+ fill_random(fpr_in, sizeof(fpr_in));
+ memset(fpr_out, 0, sizeof(fpr_out));
+}
+
+static int check_fpr(struct syscall_cfg *cfg, int sve_vl)
+{
+ int errors = 0;
+ int i;
+
+ if (!sve_vl) {
+ for (i = 0; i < ARRAY_SIZE(fpr_in); i++) {
+ if (fpr_in[i] != fpr_out[i]) {
+ ksft_print_msg("%s Q%d/%d mismatch %llx != %llx\n",
+ cfg->name,
+ i / 2, i % 2,
+ fpr_in[i], fpr_out[i]);
+ errors++;
+ }
+ }
+ }
+
+ return errors;
+}
+
+static uint8_t z_zero[__SVE_ZREG_SIZE(SVE_VQ_MAX)];
+uint8_t z_in[SVE_NUM_PREGS * __SVE_ZREG_SIZE(SVE_VQ_MAX)];
+uint8_t z_out[SVE_NUM_PREGS * __SVE_ZREG_SIZE(SVE_VQ_MAX)];
+
+static void setup_z(struct syscall_cfg *cfg, int sve_vl)
+{
+ fill_random(z_in, sizeof(z_in));
+ fill_random(z_out, sizeof(z_out));
+}
+
+static int check_z(struct syscall_cfg *cfg, int sve_vl)
+{
+ size_t reg_size = sve_vl;
+ int errors = 0;
+ int i;
+
+ if (!sve_vl)
+ return 0;
+
+ /*
+ * After a syscall the low 128 bits of the Z registers should
+ * be preserved and the rest be zeroed or preserved.
+ */
+ for (i = 0; i < SVE_NUM_ZREGS; i++) {
+ void *in = &z_in[reg_size * i];
+ void *out = &z_out[reg_size * i];
+
+ if (memcmp(in, out, SVE_VQ_BYTES) != 0) {
+ ksft_print_msg("%s SVE VL %d Z%d low 128 bits changed\n",
+ cfg->name, sve_vl, i);
+ errors++;
+ }
+ }
+
+ return errors;
+}
+
+uint8_t p_in[SVE_NUM_PREGS * __SVE_PREG_SIZE(SVE_VQ_MAX)];
+uint8_t p_out[SVE_NUM_PREGS * __SVE_PREG_SIZE(SVE_VQ_MAX)];
+
+static void setup_p(struct syscall_cfg *cfg, int sve_vl)
+{
+ fill_random(p_in, sizeof(p_in));
+ fill_random(p_out, sizeof(p_out));
+}
+
+static int check_p(struct syscall_cfg *cfg, int sve_vl)
+{
+ size_t reg_size = sve_vq_from_vl(sve_vl) * 2; /* 1 bit per VL byte */
+
+ int errors = 0;
+ int i;
+
+ if (!sve_vl)
+ return 0;
+
+ /* After a syscall the P registers should be preserved or zeroed */
+ for (i = 0; i < SVE_NUM_PREGS * reg_size; i++)
+ if (p_out[i] && (p_in[i] != p_out[i]))
+ errors++;
+ if (errors)
+ ksft_print_msg("%s SVE VL %d predicate registers non-zero\n",
+ cfg->name, sve_vl);
+
+ return errors;
+}
+
+uint8_t ffr_in[__SVE_PREG_SIZE(SVE_VQ_MAX)];
+uint8_t ffr_out[__SVE_PREG_SIZE(SVE_VQ_MAX)];
+
+static void setup_ffr(struct syscall_cfg *cfg, int sve_vl)
+{
+ /*
+ * It is only valid to set a contiguous set of bits starting
+ * at 0. For now since we're expecting this to be cleared by
+ * a syscall just set all bits.
+ */
+ memset(ffr_in, 0xff, sizeof(ffr_in));
+ fill_random(ffr_out, sizeof(ffr_out));
+}
+
+static int check_ffr(struct syscall_cfg *cfg, int sve_vl)
+{
+ size_t reg_size = sve_vq_from_vl(sve_vl) * 2; /* 1 bit per VL byte */
+ int errors = 0;
+ int i;
+
+ if (!sve_vl)
+ return 0;
+
+ /* After a syscall the P registers should be preserved or zeroed */
+ for (i = 0; i < reg_size; i++)
+ if (ffr_out[i] && (ffr_in[i] != ffr_out[i]))
+ errors++;
+ if (errors)
+ ksft_print_msg("%s SVE VL %d FFR non-zero\n",
+ cfg->name, sve_vl);
+
+ return errors;
+}
+
+typedef void (*setup_fn)(struct syscall_cfg *cfg, int sve_vl);
+typedef int (*check_fn)(struct syscall_cfg *cfg, int sve_vl);
+
+/*
+ * Each set of registers has a setup function which is called before
+ * the syscall to fill values in a global variable for loading by the
+ * test code and a check function which validates that the results are
+ * as expected. Vector lengths are passed everywhere, a vector length
+ * of 0 should be treated as do not test.
+ */
+static struct {
+ setup_fn setup;
+ check_fn check;
+} regset[] = {
+ { setup_gpr, check_gpr },
+ { setup_fpr, check_fpr },
+ { setup_z, check_z },
+ { setup_p, check_p },
+ { setup_ffr, check_ffr },
+};
+
+static bool do_test(struct syscall_cfg *cfg, int sve_vl)
+{
+ int errors = 0;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(regset); i++)
+ regset[i].setup(cfg, sve_vl);
+
+ do_syscall(sve_vl);
+
+ for (i = 0; i < ARRAY_SIZE(regset); i++)
+ errors += regset[i].check(cfg, sve_vl);
+
+ return errors == 0;
+}
+
+static void test_one_syscall(struct syscall_cfg *cfg)
+{
+ int sve_vq, sve_vl;
+
+ /* FPSIMD only case */
+ ksft_test_result(do_test(cfg, 0),
+ "%s FPSIMD\n", cfg->name);
+
+ if (!(getauxval(AT_HWCAP) & HWCAP_SVE))
+ return;
+
+ for (sve_vq = SVE_VQ_MAX; sve_vq > 0; --sve_vq) {
+ sve_vl = prctl(PR_SVE_SET_VL, sve_vq * 16);
+ if (sve_vl == -1)
+ ksft_exit_fail_msg("PR_SVE_SET_VL failed: %s (%d)\n",
+ strerror(errno), errno);
+
+ sve_vl &= PR_SVE_VL_LEN_MASK;
+
+ if (sve_vq != sve_vq_from_vl(sve_vl))
+ sve_vq = sve_vq_from_vl(sve_vl);
+
+ ksft_test_result(do_test(cfg, sve_vl),
+ "%s SVE VL %d\n", cfg->name, sve_vl);
+ }
+}
+
+int sve_count_vls(void)
+{
+ unsigned int vq;
+ int vl_count = 0;
+ int vl;
+
+ if (!(getauxval(AT_HWCAP) & HWCAP_SVE))
+ return 0;
+
+ /*
+ * Enumerate up to SVE_VQ_MAX vector lengths
+ */
+ for (vq = SVE_VQ_MAX; vq > 0; --vq) {
+ vl = prctl(PR_SVE_SET_VL, vq * 16);
+ if (vl == -1)
+ ksft_exit_fail_msg("PR_SVE_SET_VL failed: %s (%d)\n",
+ strerror(errno), errno);
+
+ vl &= PR_SVE_VL_LEN_MASK;
+
+ if (vq != sve_vq_from_vl(vl))
+ vq = sve_vq_from_vl(vl);
+
+ vl_count++;
+ }
+
+ return vl_count;
+}
+
+int main(void)
+{
+ int i;
+
+ srandom(getpid());
+
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(syscalls) * (sve_count_vls() + 1));
+
+ for (i = 0; i < ARRAY_SIZE(syscalls); i++)
+ test_one_syscall(&syscalls[i]);
+
+ ksft_print_cnts();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/arm64/fp/.gitignore b/tools/testing/selftests/arm64/fp/.gitignore
index b67395903b9b..c50d86331ed2 100644
--- a/tools/testing/selftests/arm64/fp/.gitignore
+++ b/tools/testing/selftests/arm64/fp/.gitignore
@@ -1,3 +1,4 @@
+fp-pidbench
fpsimd-test
rdvl-sve
sve-probe-vls
diff --git a/tools/testing/selftests/arm64/fp/Makefile b/tools/testing/selftests/arm64/fp/Makefile
index ba1488c7c315..95f0b877a060 100644
--- a/tools/testing/selftests/arm64/fp/Makefile
+++ b/tools/testing/selftests/arm64/fp/Makefile
@@ -2,13 +2,15 @@
CFLAGS += -I../../../../../usr/include/
TEST_GEN_PROGS := sve-ptrace sve-probe-vls vec-syscfg
-TEST_PROGS_EXTENDED := fpsimd-test fpsimd-stress \
+TEST_PROGS_EXTENDED := fp-pidbench fpsimd-test fpsimd-stress \
rdvl-sve \
sve-test sve-stress \
vlset
all: $(TEST_GEN_PROGS) $(TEST_PROGS_EXTENDED)
+fp-pidbench: fp-pidbench.S asm-utils.o
+ $(CC) -nostdlib $^ -o $@
fpsimd-test: fpsimd-test.o asm-utils.o
$(CC) -nostdlib $^ -o $@
rdvl-sve: rdvl-sve.o rdvl.o
diff --git a/tools/testing/selftests/arm64/fp/fp-pidbench.S b/tools/testing/selftests/arm64/fp/fp-pidbench.S
new file mode 100644
index 000000000000..16a436389bfc
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/fp-pidbench.S
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2021 ARM Limited.
+// Original author: Mark Brown <broonie@kernel.org>
+//
+// Trivial syscall overhead benchmark.
+//
+// This is implemented in asm to ensure that we don't have any issues with
+// system libraries using instructions that disrupt the test.
+
+#include <asm/unistd.h>
+#include "assembler.h"
+
+.arch_extension sve
+
+.macro test_loop per_loop
+ mov x10, x20
+ mov x8, #__NR_getpid
+ mrs x11, CNTVCT_EL0
+1:
+ \per_loop
+ svc #0
+ sub x10, x10, #1
+ cbnz x10, 1b
+
+ mrs x12, CNTVCT_EL0
+ sub x0, x12, x11
+ bl putdec
+ puts "\n"
+.endm
+
+// Main program entry point
+.globl _start
+function _start
+_start:
+ puts "Iterations per test: "
+ mov x20, #10000
+ lsl x20, x20, #8
+ mov x0, x20
+ bl putdec
+ puts "\n"
+
+ // Test having never used SVE
+ puts "No SVE: "
+ test_loop
+
+ // Check for SVE support - should use hwcap but that's hard in asm
+ mrs x0, ID_AA64PFR0_EL1
+ ubfx x0, x0, #32, #4
+ cbnz x0, 1f
+ puts "System does not support SVE\n"
+ b out
+1:
+
+ // Execute a SVE instruction
+ puts "SVE VL: "
+ rdvl x0, #8
+ bl putdec
+ puts "\n"
+
+ puts "SVE used once: "
+ test_loop
+
+ // Use SVE per syscall
+ puts "SVE used per syscall: "
+ test_loop "rdvl x0, #8"
+
+ // And we're done
+out:
+ mov x0, #0
+ mov x8, #__NR_exit
+ svc #0
diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c
index c4417bc48d4f..a3c1e67441f9 100644
--- a/tools/testing/selftests/arm64/fp/sve-ptrace.c
+++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c
@@ -21,16 +21,37 @@
#include "../../kselftest.h"
-#define VL_TESTS (((SVE_VQ_MAX - SVE_VQ_MIN) + 1) * 3)
-#define FPSIMD_TESTS 5
-
-#define EXPECTED_TESTS (VL_TESTS + FPSIMD_TESTS)
+#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
/* <linux/elf.h> and <sys/auxv.h> don't like each other, so: */
#ifndef NT_ARM_SVE
#define NT_ARM_SVE 0x405
#endif
+struct vec_type {
+ const char *name;
+ unsigned long hwcap_type;
+ unsigned long hwcap;
+ int regset;
+ int prctl_set;
+};
+
+static const struct vec_type vec_types[] = {
+ {
+ .name = "SVE",
+ .hwcap_type = AT_HWCAP,
+ .hwcap = HWCAP_SVE,
+ .regset = NT_ARM_SVE,
+ .prctl_set = PR_SVE_SET_VL,
+ },
+};
+
+#define VL_TESTS (((SVE_VQ_MAX - SVE_VQ_MIN) + 1) * 3)
+#define FLAG_TESTS 2
+#define FPSIMD_TESTS 3
+
+#define EXPECTED_TESTS ((VL_TESTS + FLAG_TESTS + FPSIMD_TESTS) * ARRAY_SIZE(vec_types))
+
static void fill_buf(char *buf, size_t size)
{
int i;
@@ -59,7 +80,8 @@ static int get_fpsimd(pid_t pid, struct user_fpsimd_state *fpsimd)
return ptrace(PTRACE_GETREGSET, pid, NT_PRFPREG, &iov);
}
-static struct user_sve_header *get_sve(pid_t pid, void **buf, size_t *size)
+static struct user_sve_header *get_sve(pid_t pid, const struct vec_type *type,
+ void **buf, size_t *size)
{
struct user_sve_header *sve;
void *p;
@@ -80,7 +102,7 @@ static struct user_sve_header *get_sve(pid_t pid, void **buf, size_t *size)
iov.iov_base = *buf;
iov.iov_len = sz;
- if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_SVE, &iov))
+ if (ptrace(PTRACE_GETREGSET, pid, type->regset, &iov))
goto error;
sve = *buf;
@@ -96,17 +118,18 @@ error:
return NULL;
}
-static int set_sve(pid_t pid, const struct user_sve_header *sve)
+static int set_sve(pid_t pid, const struct vec_type *type,
+ const struct user_sve_header *sve)
{
struct iovec iov;
iov.iov_base = (void *)sve;
iov.iov_len = sve->size;
- return ptrace(PTRACE_SETREGSET, pid, NT_ARM_SVE, &iov);
+ return ptrace(PTRACE_SETREGSET, pid, type->regset, &iov);
}
/* Validate setting and getting the inherit flag */
-static void ptrace_set_get_inherit(pid_t child)
+static void ptrace_set_get_inherit(pid_t child, const struct vec_type *type)
{
struct user_sve_header sve;
struct user_sve_header *new_sve = NULL;
@@ -118,9 +141,10 @@ static void ptrace_set_get_inherit(pid_t child)
sve.size = sizeof(sve);
sve.vl = sve_vl_from_vq(SVE_VQ_MIN);
sve.flags = SVE_PT_VL_INHERIT;
- ret = set_sve(child, &sve);
+ ret = set_sve(child, type, &sve);
if (ret != 0) {
- ksft_test_result_fail("Failed to set SVE_PT_VL_INHERIT\n");
+ ksft_test_result_fail("Failed to set %s SVE_PT_VL_INHERIT\n",
+ type->name);
return;
}
@@ -128,35 +152,39 @@ static void ptrace_set_get_inherit(pid_t child)
* Read back the new register state and verify that we have
* set the flags we expected.
*/
- if (!get_sve(child, (void **)&new_sve, &new_sve_size)) {
- ksft_test_result_fail("Failed to read SVE flags\n");
+ if (!get_sve(child, type, (void **)&new_sve, &new_sve_size)) {
+ ksft_test_result_fail("Failed to read %s SVE flags\n",
+ type->name);
return;
}
ksft_test_result(new_sve->flags & SVE_PT_VL_INHERIT,
- "SVE_PT_VL_INHERIT set\n");
+ "%s SVE_PT_VL_INHERIT set\n", type->name);
/* Now clear */
sve.flags &= ~SVE_PT_VL_INHERIT;
- ret = set_sve(child, &sve);
+ ret = set_sve(child, type, &sve);
if (ret != 0) {
- ksft_test_result_fail("Failed to clear SVE_PT_VL_INHERIT\n");
+ ksft_test_result_fail("Failed to clear %s SVE_PT_VL_INHERIT\n",
+ type->name);
return;
}
- if (!get_sve(child, (void **)&new_sve, &new_sve_size)) {
- ksft_test_result_fail("Failed to read SVE flags\n");
+ if (!get_sve(child, type, (void **)&new_sve, &new_sve_size)) {
+ ksft_test_result_fail("Failed to read %s SVE flags\n",
+ type->name);
return;
}
ksft_test_result(!(new_sve->flags & SVE_PT_VL_INHERIT),
- "SVE_PT_VL_INHERIT cleared\n");
+ "%s SVE_PT_VL_INHERIT cleared\n", type->name);
free(new_sve);
}
/* Validate attempting to set the specfied VL via ptrace */
-static void ptrace_set_get_vl(pid_t child, unsigned int vl, bool *supported)
+static void ptrace_set_get_vl(pid_t child, const struct vec_type *type,
+ unsigned int vl, bool *supported)
{
struct user_sve_header sve;
struct user_sve_header *new_sve = NULL;
@@ -166,10 +194,10 @@ static void ptrace_set_get_vl(pid_t child, unsigned int vl, bool *supported)
*supported = false;
/* Check if the VL is supported in this process */
- prctl_vl = prctl(PR_SVE_SET_VL, vl);
+ prctl_vl = prctl(type->prctl_set, vl);
if (prctl_vl == -1)
- ksft_exit_fail_msg("prctl(PR_SVE_SET_VL) failed: %s (%d)\n",
- strerror(errno), errno);
+ ksft_exit_fail_msg("prctl(PR_%s_SET_VL) failed: %s (%d)\n",
+ type->name, strerror(errno), errno);
/* If the VL is not supported then a supported VL will be returned */
*supported = (prctl_vl == vl);
@@ -178,9 +206,10 @@ static void ptrace_set_get_vl(pid_t child, unsigned int vl, bool *supported)
memset(&sve, 0, sizeof(sve));
sve.size = sizeof(sve);
sve.vl = vl;
- ret = set_sve(child, &sve);
+ ret = set_sve(child, type, &sve);
if (ret != 0) {
- ksft_test_result_fail("Failed to set VL %u\n", vl);
+ ksft_test_result_fail("Failed to set %s VL %u\n",
+ type->name, vl);
return;
}
@@ -188,12 +217,14 @@ static void ptrace_set_get_vl(pid_t child, unsigned int vl, bool *supported)
* Read back the new register state and verify that we have the
* same VL that we got from prctl() on ourselves.
*/
- if (!get_sve(child, (void **)&new_sve, &new_sve_size)) {
- ksft_test_result_fail("Failed to read VL %u\n", vl);
+ if (!get_sve(child, type, (void **)&new_sve, &new_sve_size)) {
+ ksft_test_result_fail("Failed to read %s VL %u\n",
+ type->name, vl);
return;
}
- ksft_test_result(new_sve->vl = prctl_vl, "Set VL %u\n", vl);
+ ksft_test_result(new_sve->vl = prctl_vl, "Set %s VL %u\n",
+ type->name, vl);
free(new_sve);
}
@@ -209,7 +240,7 @@ static void check_u32(unsigned int vl, const char *reg,
}
/* Access the FPSIMD registers via the SVE regset */
-static void ptrace_sve_fpsimd(pid_t child)
+static void ptrace_sve_fpsimd(pid_t child, const struct vec_type *type)
{
void *svebuf = NULL;
size_t svebufsz = 0;
@@ -219,17 +250,18 @@ static void ptrace_sve_fpsimd(pid_t child)
unsigned char *p;
/* New process should start with FPSIMD registers only */
- sve = get_sve(child, &svebuf, &svebufsz);
+ sve = get_sve(child, type, &svebuf, &svebufsz);
if (!sve) {
- ksft_test_result_fail("get_sve: %s\n", strerror(errno));
+ ksft_test_result_fail("get_sve(%s): %s\n",
+ type->name, strerror(errno));
return;
} else {
- ksft_test_result_pass("get_sve(FPSIMD)\n");
+ ksft_test_result_pass("get_sve(%s FPSIMD)\n", type->name);
}
ksft_test_result((sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD,
- "Set FPSIMD registers\n");
+ "Got FPSIMD registers via %s\n", type->name);
if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_FPSIMD)
goto out;
@@ -243,9 +275,9 @@ static void ptrace_sve_fpsimd(pid_t child)
p[j] = j;
}
- if (set_sve(child, sve)) {
- ksft_test_result_fail("set_sve(FPSIMD): %s\n",
- strerror(errno));
+ if (set_sve(child, type, sve)) {
+ ksft_test_result_fail("set_sve(%s FPSIMD): %s\n",
+ type->name, strerror(errno));
goto out;
}
@@ -257,16 +289,20 @@ static void ptrace_sve_fpsimd(pid_t child)
goto out;
}
if (memcmp(fpsimd, &new_fpsimd, sizeof(*fpsimd)) == 0)
- ksft_test_result_pass("get_fpsimd() gave same state\n");
+ ksft_test_result_pass("%s get_fpsimd() gave same state\n",
+ type->name);
else
- ksft_test_result_fail("get_fpsimd() gave different state\n");
+ ksft_test_result_fail("%s get_fpsimd() gave different state\n",
+ type->name);
out:
free(svebuf);
}
/* Validate attempting to set SVE data and read SVE data */
-static void ptrace_set_sve_get_sve_data(pid_t child, unsigned int vl)
+static void ptrace_set_sve_get_sve_data(pid_t child,
+ const struct vec_type *type,
+ unsigned int vl)
{
void *write_buf;
void *read_buf = NULL;
@@ -281,8 +317,8 @@ static void ptrace_set_sve_get_sve_data(pid_t child, unsigned int vl)
data_size = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE);
write_buf = malloc(data_size);
if (!write_buf) {
- ksft_test_result_fail("Error allocating %d byte buffer for VL %u\n",
- data_size, vl);
+ ksft_test_result_fail("Error allocating %d byte buffer for %s VL %u\n",
+ data_size, type->name, vl);
return;
}
write_sve = write_buf;
@@ -306,23 +342,26 @@ static void ptrace_set_sve_get_sve_data(pid_t child, unsigned int vl)
/* TODO: Generate a valid FFR pattern */
- ret = set_sve(child, write_sve);
+ ret = set_sve(child, type, write_sve);
if (ret != 0) {
- ksft_test_result_fail("Failed to set VL %u data\n", vl);
+ ksft_test_result_fail("Failed to set %s VL %u data\n",
+ type->name, vl);
goto out;
}
/* Read the data back */
- if (!get_sve(child, (void **)&read_buf, &read_sve_size)) {
- ksft_test_result_fail("Failed to read VL %u data\n", vl);
+ if (!get_sve(child, type, (void **)&read_buf, &read_sve_size)) {
+ ksft_test_result_fail("Failed to read %s VL %u data\n",
+ type->name, vl);
goto out;
}
read_sve = read_buf;
/* We might read more data if there's extensions we don't know */
if (read_sve->size < write_sve->size) {
- ksft_test_result_fail("Wrote %d bytes, only read %d\n",
- write_sve->size, read_sve->size);
+ ksft_test_result_fail("%s wrote %d bytes, only read %d\n",
+ type->name, write_sve->size,
+ read_sve->size);
goto out_read;
}
@@ -349,7 +388,8 @@ static void ptrace_set_sve_get_sve_data(pid_t child, unsigned int vl)
check_u32(vl, "FPCR", write_buf + SVE_PT_SVE_FPCR_OFFSET(vq),
read_buf + SVE_PT_SVE_FPCR_OFFSET(vq), &errors);
- ksft_test_result(errors == 0, "Set and get SVE data for VL %u\n", vl);
+ ksft_test_result(errors == 0, "Set and get %s data for VL %u\n",
+ type->name, vl);
out_read:
free(read_buf);
@@ -358,7 +398,9 @@ out:
}
/* Validate attempting to set SVE data and read SVE data */
-static void ptrace_set_sve_get_fpsimd_data(pid_t child, unsigned int vl)
+static void ptrace_set_sve_get_fpsimd_data(pid_t child,
+ const struct vec_type *type,
+ unsigned int vl)
{
void *write_buf;
struct user_sve_header *write_sve;
@@ -376,8 +418,8 @@ static void ptrace_set_sve_get_fpsimd_data(pid_t child, unsigned int vl)
data_size = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE);
write_buf = malloc(data_size);
if (!write_buf) {
- ksft_test_result_fail("Error allocating %d byte buffer for VL %u\n",
- data_size, vl);
+ ksft_test_result_fail("Error allocating %d byte buffer for %s VL %u\n",
+ data_size, type->name, vl);
return;
}
write_sve = write_buf;
@@ -395,16 +437,17 @@ static void ptrace_set_sve_get_fpsimd_data(pid_t child, unsigned int vl)
fill_buf(write_buf + SVE_PT_SVE_FPSR_OFFSET(vq), SVE_PT_SVE_FPSR_SIZE);
fill_buf(write_buf + SVE_PT_SVE_FPCR_OFFSET(vq), SVE_PT_SVE_FPCR_SIZE);
- ret = set_sve(child, write_sve);
+ ret = set_sve(child, type, write_sve);
if (ret != 0) {
- ksft_test_result_fail("Failed to set VL %u data\n", vl);
+ ksft_test_result_fail("Failed to set %s VL %u data\n",
+ type->name, vl);
goto out;
}
/* Read the data back */
if (get_fpsimd(child, &fpsimd_state)) {
- ksft_test_result_fail("Failed to read VL %u FPSIMD data\n",
- vl);
+ ksft_test_result_fail("Failed to read %s VL %u FPSIMD data\n",
+ type->name, vl);
goto out;
}
@@ -419,7 +462,8 @@ static void ptrace_set_sve_get_fpsimd_data(pid_t child, unsigned int vl)
sizeof(tmp));
if (tmp != fpsimd_state.vregs[i]) {
- printf("# Mismatch in FPSIMD for VL %u Z%d\n", vl, i);
+ printf("# Mismatch in FPSIMD for %s VL %u Z%d\n",
+ type->name, vl, i);
errors++;
}
}
@@ -429,8 +473,8 @@ static void ptrace_set_sve_get_fpsimd_data(pid_t child, unsigned int vl)
check_u32(vl, "FPCR", write_buf + SVE_PT_SVE_FPCR_OFFSET(vq),
&fpsimd_state.fpcr, &errors);
- ksft_test_result(errors == 0, "Set and get FPSIMD data for VL %u\n",
- vl);
+ ksft_test_result(errors == 0, "Set and get FPSIMD data for %s VL %u\n",
+ type->name, vl);
out:
free(write_buf);
@@ -440,7 +484,7 @@ static int do_parent(pid_t child)
{
int ret = EXIT_FAILURE;
pid_t pid;
- int status;
+ int status, i;
siginfo_t si;
unsigned int vq, vl;
bool vl_supported;
@@ -499,26 +543,54 @@ static int do_parent(pid_t child)
}
}
- /* FPSIMD via SVE regset */
- ptrace_sve_fpsimd(child);
-
- /* prctl() flags */
- ptrace_set_get_inherit(child);
+ for (i = 0; i < ARRAY_SIZE(vec_types); i++) {
+ /* FPSIMD via SVE regset */
+ if (getauxval(vec_types[i].hwcap_type) & vec_types[i].hwcap) {
+ ptrace_sve_fpsimd(child, &vec_types[i]);
+ } else {
+ ksft_test_result_skip("%s FPSIMD get via SVE\n",
+ vec_types[i].name);
+ ksft_test_result_skip("%s FPSIMD set via SVE\n",
+ vec_types[i].name);
+ ksft_test_result_skip("%s set read via FPSIMD\n",
+ vec_types[i].name);
+ }
- /* Step through every possible VQ */
- for (vq = SVE_VQ_MIN; vq <= SVE_VQ_MAX; vq++) {
- vl = sve_vl_from_vq(vq);
+ /* prctl() flags */
+ if (getauxval(vec_types[i].hwcap_type) & vec_types[i].hwcap) {
+ ptrace_set_get_inherit(child, &vec_types[i]);
+ } else {
+ ksft_test_result_skip("%s SVE_PT_VL_INHERIT set\n",
+ vec_types[i].name);
+ ksft_test_result_skip("%s SVE_PT_VL_INHERIT cleared\n",
+ vec_types[i].name);
+ }
- /* First, try to set this vector length */
- ptrace_set_get_vl(child, vl, &vl_supported);
+ /* Step through every possible VQ */
+ for (vq = SVE_VQ_MIN; vq <= SVE_VQ_MAX; vq++) {
+ vl = sve_vl_from_vq(vq);
+
+ /* First, try to set this vector length */
+ if (getauxval(vec_types[i].hwcap_type) &
+ vec_types[i].hwcap) {
+ ptrace_set_get_vl(child, &vec_types[i], vl,
+ &vl_supported);
+ } else {
+ ksft_test_result_skip("%s get/set VL %d\n",
+ vec_types[i].name, vl);
+ vl_supported = false;
+ }
- /* If the VL is supported validate data set/get */
- if (vl_supported) {
- ptrace_set_sve_get_sve_data(child, vl);
- ptrace_set_sve_get_fpsimd_data(child, vl);
- } else {
- ksft_test_result_skip("set SVE get SVE for VL %d\n", vl);
- ksft_test_result_skip("set SVE get FPSIMD for VL %d\n", vl);
+ /* If the VL is supported validate data set/get */
+ if (vl_supported) {
+ ptrace_set_sve_get_sve_data(child, &vec_types[i], vl);
+ ptrace_set_sve_get_fpsimd_data(child, &vec_types[i], vl);
+ } else {
+ ksft_test_result_skip("%s set SVE get SVE for VL %d\n",
+ vec_types[i].name, vl);
+ ksft_test_result_skip("%s set SVE get FPSIMD for VL %d\n",
+ vec_types[i].name, vl);
+ }
}
}
diff --git a/tools/testing/selftests/arm64/fp/vec-syscfg.c b/tools/testing/selftests/arm64/fp/vec-syscfg.c
index 272b888e018e..c90658811a83 100644
--- a/tools/testing/selftests/arm64/fp/vec-syscfg.c
+++ b/tools/testing/selftests/arm64/fp/vec-syscfg.c
@@ -21,8 +21,6 @@
#include "../../kselftest.h"
#include "rdvl.h"
-#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
-
#define ARCH_MIN_VL SVE_VL_MIN
struct vec_data {
diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.c b/tools/testing/selftests/arm64/signal/test_signals_utils.c
index 22722abc9dfa..2f8c23af3b5e 100644
--- a/tools/testing/selftests/arm64/signal/test_signals_utils.c
+++ b/tools/testing/selftests/arm64/signal/test_signals_utils.c
@@ -310,14 +310,12 @@ int test_setup(struct tdescr *td)
int test_run(struct tdescr *td)
{
- if (td->sig_trig) {
- if (td->trigger)
- return td->trigger(td);
- else
- return default_trigger(td);
- } else {
+ if (td->trigger)
+ return td->trigger(td);
+ else if (td->sig_trig)
+ return default_trigger(td);
+ else
return td->run(td, NULL, NULL);
- }
}
void test_result(struct tdescr *td)
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 62fafbeb4672..42ffc24e9e71 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -23,9 +23,8 @@ BPF_GCC ?= $(shell command -v bpf-gcc;)
SAN_CFLAGS ?=
CFLAGS += -g -O0 -rdynamic -Wall $(GENFLAGS) $(SAN_CFLAGS) \
-I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) \
- -I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT) \
- -Dbpf_prog_load=bpf_prog_test_load \
- -Dbpf_load_program=bpf_test_load_program
+ -I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT)
+LDFLAGS += $(SAN_CFLAGS)
LDLIBS += -lcap -lelf -lz -lrt -lpthread
# Silence some warnings when compiled with clang
@@ -46,10 +45,8 @@ ifneq ($(BPF_GCC),)
TEST_GEN_PROGS += test_progs-bpf_gcc
endif
-TEST_GEN_FILES = test_lwt_ip_encap.o \
- test_tc_edt.o
-TEST_FILES = xsk_prereqs.sh \
- $(wildcard progs/btf_dump_test_case_*.c)
+TEST_GEN_FILES = test_lwt_ip_encap.o test_tc_edt.o
+TEST_FILES = xsk_prereqs.sh $(wildcard progs/btf_dump_test_case_*.c)
# Order correspond to 'make run_tests' order
TEST_PROGS := test_kmod.sh \
@@ -108,7 +105,10 @@ endif
OVERRIDE_TARGETS := 1
override define CLEAN
$(call msg,CLEAN)
- $(Q)$(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN)
+ $(Q)$(RM) -r $(TEST_GEN_PROGS)
+ $(Q)$(RM) -r $(TEST_GEN_PROGS_EXTENDED)
+ $(Q)$(RM) -r $(TEST_GEN_FILES)
+ $(Q)$(RM) -r $(EXTRA_CLEAN)
$(Q)$(MAKE) -C bpf_testmod clean
$(Q)$(MAKE) docs-clean
endef
@@ -170,7 +170,7 @@ $(OUTPUT)/%:%.c
$(OUTPUT)/urandom_read: urandom_read.c
$(call msg,BINARY,,$@)
- $(Q)$(CC) $(LDFLAGS) -o $@ $< $(LDLIBS) -Wl,--build-id=sha1
+ $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $< $(LDLIBS) -Wl,--build-id=sha1 -o $@
$(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(wildcard bpf_testmod/Makefile bpf_testmod/*.[ch])
$(call msg,MOD,,$@)
@@ -178,10 +178,6 @@ $(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(wildcard bpf_testmod/Makefile bpf_tes
$(Q)$(MAKE) $(submake_extras) -C bpf_testmod
$(Q)cp bpf_testmod/bpf_testmod.ko $@
-$(OUTPUT)/test_stub.o: test_stub.c $(BPFOBJ)
- $(call msg,CC,,$@)
- $(Q)$(CC) -c $(CFLAGS) -o $@ $<
-
DEFAULT_BPFTOOL := $(HOST_SCRATCH_DIR)/sbin/bpftool
$(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) $(RUNQSLOWER_OUTPUT)
@@ -194,24 +190,34 @@ $(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) $(RUNQSLOWER_OUTPUT)
TEST_GEN_PROGS_EXTENDED += $(DEFAULT_BPFTOOL)
-$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/test_stub.o $(BPFOBJ)
-
-$(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
-$(OUTPUT)/test_skb_cgroup_id_user: cgroup_helpers.c
-$(OUTPUT)/test_sock: cgroup_helpers.c
-$(OUTPUT)/test_sock_addr: cgroup_helpers.c
-$(OUTPUT)/test_sockmap: cgroup_helpers.c
-$(OUTPUT)/test_tcpnotify_user: cgroup_helpers.c trace_helpers.c
-$(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c
-$(OUTPUT)/test_cgroup_storage: cgroup_helpers.c
-$(OUTPUT)/test_sock_fields: cgroup_helpers.c
-$(OUTPUT)/test_sysctl: cgroup_helpers.c
+$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(BPFOBJ)
+
+CGROUP_HELPERS := $(OUTPUT)/cgroup_helpers.o
+TESTING_HELPERS := $(OUTPUT)/testing_helpers.o
+TRACE_HELPERS := $(OUTPUT)/trace_helpers.o
+
+$(OUTPUT)/test_dev_cgroup: $(CGROUP_HELPERS) $(TESTING_HELPERS)
+$(OUTPUT)/test_skb_cgroup_id_user: $(CGROUP_HELPERS) $(TESTING_HELPERS)
+$(OUTPUT)/test_sock: $(CGROUP_HELPERS) $(TESTING_HELPERS)
+$(OUTPUT)/test_sock_addr: $(CGROUP_HELPERS) $(TESTING_HELPERS)
+$(OUTPUT)/test_sockmap: $(CGROUP_HELPERS) $(TESTING_HELPERS)
+$(OUTPUT)/test_tcpnotify_user: $(CGROUP_HELPERS) $(TESTING_HELPERS) $(TRACE_HELPERS)
+$(OUTPUT)/get_cgroup_id_user: $(CGROUP_HELPERS) $(TESTING_HELPERS)
+$(OUTPUT)/test_cgroup_storage: $(CGROUP_HELPERS) $(TESTING_HELPERS)
+$(OUTPUT)/test_sock_fields: $(CGROUP_HELPERS) $(TESTING_HELPERS)
+$(OUTPUT)/test_sysctl: $(CGROUP_HELPERS) $(TESTING_HELPERS)
+$(OUTPUT)/test_tag: $(TESTING_HELPERS)
+$(OUTPUT)/test_lirc_mode2_user: $(TESTING_HELPERS)
+$(OUTPUT)/xdping: $(TESTING_HELPERS)
+$(OUTPUT)/flow_dissector_load: $(TESTING_HELPERS)
+$(OUTPUT)/test_maps: $(TESTING_HELPERS)
+$(OUTPUT)/test_verifier: $(TESTING_HELPERS)
BPFTOOL ?= $(DEFAULT_BPFTOOL)
$(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \
$(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/bpftool
$(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \
- CC=$(HOSTCC) LD=$(HOSTLD) \
+ ARCH= CROSS_COMPILE= CC=$(HOSTCC) LD=$(HOSTLD) \
EXTRA_CFLAGS='-g -O0' \
OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \
LIBBPF_OUTPUT=$(HOST_BUILD_DIR)/libbpf/ \
@@ -231,18 +237,18 @@ docs-clean:
prefix= OUTPUT=$(OUTPUT)/ DESTDIR=$(OUTPUT)/ $@
$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
- ../../../include/uapi/linux/bpf.h \
+ $(APIDIR)/linux/bpf.h \
| $(BUILD_DIR)/libbpf
$(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \
EXTRA_CFLAGS='-g -O0' \
DESTDIR=$(SCRATCH_DIR) prefix= all install_headers
ifneq ($(BPFOBJ),$(HOST_BPFOBJ))
-$(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
- ../../../include/uapi/linux/bpf.h \
- | $(HOST_BUILD_DIR)/libbpf
+$(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
+ $(APIDIR)/linux/bpf.h \
+ | $(HOST_BUILD_DIR)/libbpf
$(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) \
- EXTRA_CFLAGS='-g -O0' \
+ EXTRA_CFLAGS='-g -O0' ARCH= CROSS_COMPILE= \
OUTPUT=$(HOST_BUILD_DIR)/libbpf/ CC=$(HOSTCC) LD=$(HOSTLD) \
DESTDIR=$(HOST_SCRATCH_DIR)/ prefix= all install_headers
endif
@@ -304,12 +310,12 @@ $(OUTPUT)/flow_dissector_load.o: flow_dissector_load.h
# $3 - CFLAGS
define CLANG_BPF_BUILD_RULE
$(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2)
- $(Q)$(CLANG) $3 -O2 -target bpf -c $1 -o $2 -mcpu=v3
+ $(Q)$(CLANG) $3 -O2 -target bpf -c $1 -mcpu=v3 -o $2
endef
# Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32
define CLANG_NOALU32_BPF_BUILD_RULE
$(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2)
- $(Q)$(CLANG) $3 -O2 -target bpf -c $1 -o $2 -mcpu=v2
+ $(Q)$(CLANG) $3 -O2 -target bpf -c $1 -mcpu=v2 -o $2
endef
# Build BPF object using GCC
define GCC_BPF_BUILD_RULE
@@ -323,9 +329,10 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \
linked_vars.skel.h linked_maps.skel.h
LSKELS := kfunc_call_test.c fentry_test.c fexit_test.c fexit_sleep.c \
- test_ringbuf.c atomics.c trace_printk.c trace_vprintk.c
+ test_ringbuf.c atomics.c trace_printk.c trace_vprintk.c \
+ map_ptr_kern.c core_kern.c
# Generate both light skeleton and libbpf skeleton for these
-LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c
+LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c kfunc_call_test_subprog.c
SKEL_BLACKLIST += $$(LSKELS)
test_static_linked.skel.h-deps := test_static_linked1.o test_static_linked2.o
@@ -471,13 +478,12 @@ TRUNNER_TESTS_DIR := prog_tests
TRUNNER_BPF_PROGS_DIR := progs
TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \
network_helpers.c testing_helpers.c \
- btf_helpers.c flow_dissector_load.h
+ btf_helpers.c flow_dissector_load.h
TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \
ima_setup.sh \
$(wildcard progs/btf_dump_test_case_*.c)
TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
-TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS)
-TRUNNER_BPF_CFLAGS += -DENABLE_ATOMICS_TESTS
+TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS) -DENABLE_ATOMICS_TESTS
$(eval $(call DEFINE_TEST_RUNNER,test_progs))
# Define test_progs-no_alu32 test runner.
@@ -530,16 +536,22 @@ $(OUTPUT)/bench_trigger.o: $(OUTPUT)/trigger_bench.skel.h
$(OUTPUT)/bench_ringbufs.o: $(OUTPUT)/ringbuf_bench.skel.h \
$(OUTPUT)/perfbuf_bench.skel.h
$(OUTPUT)/bench_bloom_filter_map.o: $(OUTPUT)/bloom_filter_bench.skel.h
+$(OUTPUT)/bench_bpf_loop.o: $(OUTPUT)/bpf_loop_bench.skel.h
+$(OUTPUT)/bench_strncmp.o: $(OUTPUT)/strncmp_bench.skel.h
$(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ)
$(OUTPUT)/bench: LDLIBS += -lm
-$(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUTPUT)/testing_helpers.o \
+$(OUTPUT)/bench: $(OUTPUT)/bench.o \
+ $(TESTING_HELPERS) \
+ $(TRACE_HELPERS) \
$(OUTPUT)/bench_count.o \
$(OUTPUT)/bench_rename.o \
$(OUTPUT)/bench_trigger.o \
$(OUTPUT)/bench_ringbufs.o \
- $(OUTPUT)/bench_bloom_filter_map.o
+ $(OUTPUT)/bench_bloom_filter_map.o \
+ $(OUTPUT)/bench_bpf_loop.o \
+ $(OUTPUT)/bench_strncmp.o
$(call msg,BINARY,,$@)
- $(Q)$(CC) $(LDFLAGS) -o $@ $(filter %.a %.o,$^) $(LDLIBS)
+ $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@
EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \
prog_tests/tests.h map_tests/tests.h verifier/tests.h \
diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst
index 5e287e445f75..42ef250c7acc 100644
--- a/tools/testing/selftests/bpf/README.rst
+++ b/tools/testing/selftests/bpf/README.rst
@@ -204,16 +204,17 @@ __ https://reviews.llvm.org/D93563
btf_tag test and Clang version
==============================
-The btf_tag selftest require LLVM support to recognize the btf_decl_tag attribute.
-It was introduced in `Clang 14`__.
+The btf_tag selftest requires LLVM support to recognize the btf_decl_tag and
+btf_type_tag attributes. They are introduced in `Clang 14` [0_, 1_].
-Without it, the btf_tag selftest will be skipped and you will observe:
+Without them, the btf_tag selftest will be skipped and you will observe:
.. code-block:: console
#<test_num> btf_tag:SKIP
-__ https://reviews.llvm.org/D111588
+.. _0: https://reviews.llvm.org/D111588
+.. _1: https://reviews.llvm.org/D111199
Clang dependencies for static linking tests
===========================================
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index cc4722f693e9..f973320e6dbf 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -29,26 +29,10 @@ static int libbpf_print_fn(enum libbpf_print_level level,
return vfprintf(stderr, format, args);
}
-static int bump_memlock_rlimit(void)
+void setup_libbpf(void)
{
- struct rlimit rlim_new = {
- .rlim_cur = RLIM_INFINITY,
- .rlim_max = RLIM_INFINITY,
- };
-
- return setrlimit(RLIMIT_MEMLOCK, &rlim_new);
-}
-
-void setup_libbpf()
-{
- int err;
-
libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
libbpf_set_print(libbpf_print_fn);
-
- err = bump_memlock_rlimit();
- if (err)
- fprintf(stderr, "failed to increase RLIMIT_MEMLOCK: %d", err);
}
void false_hits_report_progress(int iter, struct bench_res *res, long delta_ns)
@@ -134,6 +118,39 @@ void hits_drops_report_final(struct bench_res res[], int res_cnt)
total_ops_mean, total_ops_stddev);
}
+void ops_report_progress(int iter, struct bench_res *res, long delta_ns)
+{
+ double hits_per_sec, hits_per_prod;
+
+ hits_per_sec = res->hits / 1000000.0 / (delta_ns / 1000000000.0);
+ hits_per_prod = hits_per_sec / env.producer_cnt;
+
+ printf("Iter %3d (%7.3lfus): ", iter, (delta_ns - 1000000000) / 1000.0);
+
+ printf("hits %8.3lfM/s (%7.3lfM/prod)\n", hits_per_sec, hits_per_prod);
+}
+
+void ops_report_final(struct bench_res res[], int res_cnt)
+{
+ double hits_mean = 0.0, hits_stddev = 0.0;
+ int i;
+
+ for (i = 0; i < res_cnt; i++)
+ hits_mean += res[i].hits / 1000000.0 / (0.0 + res_cnt);
+
+ if (res_cnt > 1) {
+ for (i = 0; i < res_cnt; i++)
+ hits_stddev += (hits_mean - res[i].hits / 1000000.0) *
+ (hits_mean - res[i].hits / 1000000.0) /
+ (res_cnt - 1.0);
+
+ hits_stddev = sqrt(hits_stddev);
+ }
+ printf("Summary: throughput %8.3lf \u00B1 %5.3lf M ops/s (%7.3lfM ops/prod), ",
+ hits_mean, hits_stddev, hits_mean / env.producer_cnt);
+ printf("latency %8.3lf ns/op\n", 1000.0 / hits_mean * env.producer_cnt);
+}
+
const char *argp_program_version = "benchmark";
const char *argp_program_bug_address = "<bpf@vger.kernel.org>";
const char argp_program_doc[] =
@@ -171,10 +188,14 @@ static const struct argp_option opts[] = {
extern struct argp bench_ringbufs_argp;
extern struct argp bench_bloom_map_argp;
+extern struct argp bench_bpf_loop_argp;
+extern struct argp bench_strncmp_argp;
static const struct argp_child bench_parsers[] = {
{ &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 },
{ &bench_bloom_map_argp, 0, "Bloom filter map benchmark", 0 },
+ { &bench_bpf_loop_argp, 0, "bpf_loop helper benchmark", 0 },
+ { &bench_strncmp_argp, 0, "bpf_strncmp helper benchmark", 0 },
{},
};
@@ -359,6 +380,11 @@ extern const struct bench bench_trig_kprobe;
extern const struct bench bench_trig_fentry;
extern const struct bench bench_trig_fentry_sleep;
extern const struct bench bench_trig_fmodret;
+extern const struct bench bench_trig_uprobe_base;
+extern const struct bench bench_trig_uprobe_with_nop;
+extern const struct bench bench_trig_uretprobe_with_nop;
+extern const struct bench bench_trig_uprobe_without_nop;
+extern const struct bench bench_trig_uretprobe_without_nop;
extern const struct bench bench_rb_libbpf;
extern const struct bench bench_rb_custom;
extern const struct bench bench_pb_libbpf;
@@ -368,6 +394,9 @@ extern const struct bench bench_bloom_update;
extern const struct bench bench_bloom_false_positive;
extern const struct bench bench_hashmap_without_bloom;
extern const struct bench bench_hashmap_with_bloom;
+extern const struct bench bench_bpf_loop;
+extern const struct bench bench_strncmp_no_helper;
+extern const struct bench bench_strncmp_helper;
static const struct bench *benchs[] = {
&bench_count_global,
@@ -385,6 +414,11 @@ static const struct bench *benchs[] = {
&bench_trig_fentry,
&bench_trig_fentry_sleep,
&bench_trig_fmodret,
+ &bench_trig_uprobe_base,
+ &bench_trig_uprobe_with_nop,
+ &bench_trig_uretprobe_with_nop,
+ &bench_trig_uprobe_without_nop,
+ &bench_trig_uretprobe_without_nop,
&bench_rb_libbpf,
&bench_rb_custom,
&bench_pb_libbpf,
@@ -394,6 +428,9 @@ static const struct bench *benchs[] = {
&bench_bloom_false_positive,
&bench_hashmap_without_bloom,
&bench_hashmap_with_bloom,
+ &bench_bpf_loop,
+ &bench_strncmp_no_helper,
+ &bench_strncmp_helper,
};
static void setup_benchmark()
diff --git a/tools/testing/selftests/bpf/bench.h b/tools/testing/selftests/bpf/bench.h
index 624c6b11501f..fb3e213df3dc 100644
--- a/tools/testing/selftests/bpf/bench.h
+++ b/tools/testing/selftests/bpf/bench.h
@@ -38,8 +38,8 @@ struct bench_res {
struct bench {
const char *name;
- void (*validate)();
- void (*setup)();
+ void (*validate)(void);
+ void (*setup)(void);
void *(*producer_thread)(void *ctx);
void *(*consumer_thread)(void *ctx);
void (*measure)(struct bench_res* res);
@@ -54,13 +54,16 @@ struct counter {
extern struct env env;
extern const struct bench *bench;
-void setup_libbpf();
+void setup_libbpf(void);
void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns);
void hits_drops_report_final(struct bench_res res[], int res_cnt);
void false_hits_report_progress(int iter, struct bench_res *res, long delta_ns);
void false_hits_report_final(struct bench_res res[], int res_cnt);
+void ops_report_progress(int iter, struct bench_res *res, long delta_ns);
+void ops_report_final(struct bench_res res[], int res_cnt);
-static inline __u64 get_time_ns() {
+static inline __u64 get_time_ns(void)
+{
struct timespec t;
clock_gettime(CLOCK_MONOTONIC, &t);
diff --git a/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c b/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c
index 6eeeed2913e6..5bcb8a8cdeb2 100644
--- a/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c
+++ b/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c
@@ -63,29 +63,34 @@ static const struct argp_option opts[] = {
static error_t parse_arg(int key, char *arg, struct argp_state *state)
{
+ long ret;
+
switch (key) {
case ARG_NR_ENTRIES:
- args.nr_entries = strtol(arg, NULL, 10);
- if (args.nr_entries == 0) {
+ ret = strtol(arg, NULL, 10);
+ if (ret < 1 || ret > UINT_MAX) {
fprintf(stderr, "Invalid nr_entries count.");
argp_usage(state);
}
+ args.nr_entries = ret;
break;
case ARG_NR_HASH_FUNCS:
- args.nr_hash_funcs = strtol(arg, NULL, 10);
- if (args.nr_hash_funcs == 0 || args.nr_hash_funcs > 15) {
+ ret = strtol(arg, NULL, 10);
+ if (ret < 1 || ret > 15) {
fprintf(stderr,
"The bloom filter must use 1 to 15 hash functions.");
argp_usage(state);
}
+ args.nr_hash_funcs = ret;
break;
case ARG_VALUE_SIZE:
- args.value_size = strtol(arg, NULL, 10);
- if (args.value_size < 2 || args.value_size > 256) {
+ ret = strtol(arg, NULL, 10);
+ if (ret < 2 || ret > 256) {
fprintf(stderr,
"Invalid value size. Must be between 2 and 256 bytes");
argp_usage(state);
}
+ args.value_size = ret;
break;
default:
return ARGP_ERR_UNKNOWN;
diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c b/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c
new file mode 100644
index 000000000000..d0a6572bfab6
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <argp.h>
+#include "bench.h"
+#include "bpf_loop_bench.skel.h"
+
+/* BPF triggering benchmarks */
+static struct ctx {
+ struct bpf_loop_bench *skel;
+} ctx;
+
+static struct {
+ __u32 nr_loops;
+} args = {
+ .nr_loops = 10,
+};
+
+enum {
+ ARG_NR_LOOPS = 4000,
+};
+
+static const struct argp_option opts[] = {
+ { "nr_loops", ARG_NR_LOOPS, "nr_loops", 0,
+ "Set number of loops for the bpf_loop helper"},
+ {},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+ switch (key) {
+ case ARG_NR_LOOPS:
+ args.nr_loops = strtol(arg, NULL, 10);
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+
+ return 0;
+}
+
+/* exported into benchmark runner */
+const struct argp bench_bpf_loop_argp = {
+ .options = opts,
+ .parser = parse_arg,
+};
+
+static void validate(void)
+{
+ if (env.consumer_cnt != 1) {
+ fprintf(stderr, "benchmark doesn't support multi-consumer!\n");
+ exit(1);
+ }
+}
+
+static void *producer(void *input)
+{
+ while (true)
+ /* trigger the bpf program */
+ syscall(__NR_getpgid);
+
+ return NULL;
+}
+
+static void *consumer(void *input)
+{
+ return NULL;
+}
+
+static void measure(struct bench_res *res)
+{
+ res->hits = atomic_swap(&ctx.skel->bss->hits, 0);
+}
+
+static void setup(void)
+{
+ struct bpf_link *link;
+
+ setup_libbpf();
+
+ ctx.skel = bpf_loop_bench__open_and_load();
+ if (!ctx.skel) {
+ fprintf(stderr, "failed to open skeleton\n");
+ exit(1);
+ }
+
+ link = bpf_program__attach(ctx.skel->progs.benchmark);
+ if (!link) {
+ fprintf(stderr, "failed to attach program!\n");
+ exit(1);
+ }
+
+ ctx.skel->bss->nr_loops = args.nr_loops;
+}
+
+const struct bench bench_bpf_loop = {
+ .name = "bpf-loop",
+ .validate = validate,
+ .setup = setup,
+ .producer_thread = producer,
+ .consumer_thread = consumer,
+ .measure = measure,
+ .report_progress = ops_report_progress,
+ .report_final = ops_report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/bench_count.c b/tools/testing/selftests/bpf/benchs/bench_count.c
index befba7a82643..078972ce208e 100644
--- a/tools/testing/selftests/bpf/benchs/bench_count.c
+++ b/tools/testing/selftests/bpf/benchs/bench_count.c
@@ -36,7 +36,7 @@ static struct count_local_ctx {
struct counter *hits;
} count_local_ctx;
-static void count_local_setup()
+static void count_local_setup(void)
{
struct count_local_ctx *ctx = &count_local_ctx;
diff --git a/tools/testing/selftests/bpf/benchs/bench_rename.c b/tools/testing/selftests/bpf/benchs/bench_rename.c
index c7ec114eca56..3c203b6d6a6e 100644
--- a/tools/testing/selftests/bpf/benchs/bench_rename.c
+++ b/tools/testing/selftests/bpf/benchs/bench_rename.c
@@ -11,7 +11,7 @@ static struct ctx {
int fd;
} ctx;
-static void validate()
+static void validate(void)
{
if (env.producer_cnt != 1) {
fprintf(stderr, "benchmark doesn't support multi-producer!\n");
@@ -43,7 +43,7 @@ static void measure(struct bench_res *res)
res->hits = atomic_swap(&ctx.hits.value, 0);
}
-static void setup_ctx()
+static void setup_ctx(void)
{
setup_libbpf();
@@ -71,36 +71,36 @@ static void attach_bpf(struct bpf_program *prog)
}
}
-static void setup_base()
+static void setup_base(void)
{
setup_ctx();
}
-static void setup_kprobe()
+static void setup_kprobe(void)
{
setup_ctx();
attach_bpf(ctx.skel->progs.prog1);
}
-static void setup_kretprobe()
+static void setup_kretprobe(void)
{
setup_ctx();
attach_bpf(ctx.skel->progs.prog2);
}
-static void setup_rawtp()
+static void setup_rawtp(void)
{
setup_ctx();
attach_bpf(ctx.skel->progs.prog3);
}
-static void setup_fentry()
+static void setup_fentry(void)
{
setup_ctx();
attach_bpf(ctx.skel->progs.prog4);
}
-static void setup_fexit()
+static void setup_fexit(void)
{
setup_ctx();
attach_bpf(ctx.skel->progs.prog5);
diff --git a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
index d167bffac679..da8593b3494a 100644
--- a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
+++ b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
@@ -88,12 +88,12 @@ const struct argp bench_ringbufs_argp = {
static struct counter buf_hits;
-static inline void bufs_trigger_batch()
+static inline void bufs_trigger_batch(void)
{
(void)syscall(__NR_getpgid);
}
-static void bufs_validate()
+static void bufs_validate(void)
{
if (env.consumer_cnt != 1) {
fprintf(stderr, "rb-libbpf benchmark doesn't support multi-consumer!\n");
@@ -132,7 +132,7 @@ static void ringbuf_libbpf_measure(struct bench_res *res)
res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
}
-static struct ringbuf_bench *ringbuf_setup_skeleton()
+static struct ringbuf_bench *ringbuf_setup_skeleton(void)
{
struct ringbuf_bench *skel;
@@ -167,7 +167,7 @@ static int buf_process_sample(void *ctx, void *data, size_t len)
return 0;
}
-static void ringbuf_libbpf_setup()
+static void ringbuf_libbpf_setup(void)
{
struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
struct bpf_link *link;
@@ -223,7 +223,7 @@ static void ringbuf_custom_measure(struct bench_res *res)
res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
}
-static void ringbuf_custom_setup()
+static void ringbuf_custom_setup(void)
{
struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx;
const size_t page_size = getpagesize();
@@ -352,7 +352,7 @@ static void perfbuf_measure(struct bench_res *res)
res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
}
-static struct perfbuf_bench *perfbuf_setup_skeleton()
+static struct perfbuf_bench *perfbuf_setup_skeleton(void)
{
struct perfbuf_bench *skel;
@@ -390,15 +390,10 @@ perfbuf_process_sample_raw(void *input_ctx, int cpu,
return LIBBPF_PERF_EVENT_CONT;
}
-static void perfbuf_libbpf_setup()
+static void perfbuf_libbpf_setup(void)
{
struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
struct perf_event_attr attr;
- struct perf_buffer_raw_opts pb_opts = {
- .event_cb = perfbuf_process_sample_raw,
- .ctx = (void *)(long)0,
- .attr = &attr,
- };
struct bpf_link *link;
ctx->skel = perfbuf_setup_skeleton();
@@ -423,7 +418,8 @@ static void perfbuf_libbpf_setup()
}
ctx->perfbuf = perf_buffer__new_raw(bpf_map__fd(ctx->skel->maps.perfbuf),
- args.perfbuf_sz, &pb_opts);
+ args.perfbuf_sz, &attr,
+ perfbuf_process_sample_raw, NULL, NULL);
if (!ctx->perfbuf) {
fprintf(stderr, "failed to create perfbuf\n");
exit(1);
diff --git a/tools/testing/selftests/bpf/benchs/bench_strncmp.c b/tools/testing/selftests/bpf/benchs/bench_strncmp.c
new file mode 100644
index 000000000000..494b591c0289
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_strncmp.c
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2021. Huawei Technologies Co., Ltd */
+#include <argp.h>
+#include "bench.h"
+#include "strncmp_bench.skel.h"
+
+static struct strncmp_ctx {
+ struct strncmp_bench *skel;
+} ctx;
+
+static struct strncmp_args {
+ u32 cmp_str_len;
+} args = {
+ .cmp_str_len = 32,
+};
+
+enum {
+ ARG_CMP_STR_LEN = 5000,
+};
+
+static const struct argp_option opts[] = {
+ { "cmp-str-len", ARG_CMP_STR_LEN, "CMP_STR_LEN", 0,
+ "Set the length of compared string" },
+ {},
+};
+
+static error_t strncmp_parse_arg(int key, char *arg, struct argp_state *state)
+{
+ switch (key) {
+ case ARG_CMP_STR_LEN:
+ args.cmp_str_len = strtoul(arg, NULL, 10);
+ if (!args.cmp_str_len ||
+ args.cmp_str_len >= sizeof(ctx.skel->bss->str)) {
+ fprintf(stderr, "Invalid cmp str len (limit %zu)\n",
+ sizeof(ctx.skel->bss->str));
+ argp_usage(state);
+ }
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+
+ return 0;
+}
+
+const struct argp bench_strncmp_argp = {
+ .options = opts,
+ .parser = strncmp_parse_arg,
+};
+
+static void strncmp_validate(void)
+{
+ if (env.consumer_cnt != 1) {
+ fprintf(stderr, "strncmp benchmark doesn't support multi-consumer!\n");
+ exit(1);
+ }
+}
+
+static void strncmp_setup(void)
+{
+ int err;
+ char *target;
+ size_t i, sz;
+
+ sz = sizeof(ctx.skel->rodata->target);
+ if (!sz || sz < sizeof(ctx.skel->bss->str)) {
+ fprintf(stderr, "invalid string size (target %zu, src %zu)\n",
+ sz, sizeof(ctx.skel->bss->str));
+ exit(1);
+ }
+
+ setup_libbpf();
+
+ ctx.skel = strncmp_bench__open();
+ if (!ctx.skel) {
+ fprintf(stderr, "failed to open skeleton\n");
+ exit(1);
+ }
+
+ srandom(time(NULL));
+ target = ctx.skel->rodata->target;
+ for (i = 0; i < sz - 1; i++)
+ target[i] = '1' + random() % 9;
+ target[sz - 1] = '\0';
+
+ ctx.skel->rodata->cmp_str_len = args.cmp_str_len;
+
+ memcpy(ctx.skel->bss->str, target, args.cmp_str_len);
+ ctx.skel->bss->str[args.cmp_str_len] = '\0';
+ /* Make bss->str < rodata->target */
+ ctx.skel->bss->str[args.cmp_str_len - 1] -= 1;
+
+ err = strncmp_bench__load(ctx.skel);
+ if (err) {
+ fprintf(stderr, "failed to load skeleton\n");
+ strncmp_bench__destroy(ctx.skel);
+ exit(1);
+ }
+}
+
+static void strncmp_attach_prog(struct bpf_program *prog)
+{
+ struct bpf_link *link;
+
+ link = bpf_program__attach(prog);
+ if (!link) {
+ fprintf(stderr, "failed to attach program!\n");
+ exit(1);
+ }
+}
+
+static void strncmp_no_helper_setup(void)
+{
+ strncmp_setup();
+ strncmp_attach_prog(ctx.skel->progs.strncmp_no_helper);
+}
+
+static void strncmp_helper_setup(void)
+{
+ strncmp_setup();
+ strncmp_attach_prog(ctx.skel->progs.strncmp_helper);
+}
+
+static void *strncmp_producer(void *ctx)
+{
+ while (true)
+ (void)syscall(__NR_getpgid);
+ return NULL;
+}
+
+static void *strncmp_consumer(void *ctx)
+{
+ return NULL;
+}
+
+static void strncmp_measure(struct bench_res *res)
+{
+ res->hits = atomic_swap(&ctx.skel->bss->hits, 0);
+}
+
+const struct bench bench_strncmp_no_helper = {
+ .name = "strncmp-no-helper",
+ .validate = strncmp_validate,
+ .setup = strncmp_no_helper_setup,
+ .producer_thread = strncmp_producer,
+ .consumer_thread = strncmp_consumer,
+ .measure = strncmp_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_strncmp_helper = {
+ .name = "strncmp-helper",
+ .validate = strncmp_validate,
+ .setup = strncmp_helper_setup,
+ .producer_thread = strncmp_producer,
+ .consumer_thread = strncmp_consumer,
+ .measure = strncmp_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c
index f41a491a8cc0..7f957c55a3ca 100644
--- a/tools/testing/selftests/bpf/benchs/bench_trigger.c
+++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
@@ -2,6 +2,7 @@
/* Copyright (c) 2020 Facebook */
#include "bench.h"
#include "trigger_bench.skel.h"
+#include "trace_helpers.h"
/* BPF triggering benchmarks */
static struct trigger_ctx {
@@ -10,7 +11,7 @@ static struct trigger_ctx {
static struct counter base_hits;
-static void trigger_validate()
+static void trigger_validate(void)
{
if (env.consumer_cnt != 1) {
fprintf(stderr, "benchmark doesn't support multi-consumer!\n");
@@ -44,7 +45,7 @@ static void trigger_measure(struct bench_res *res)
res->hits = atomic_swap(&ctx.skel->bss->hits, 0);
}
-static void setup_ctx()
+static void setup_ctx(void)
{
setup_libbpf();
@@ -66,37 +67,37 @@ static void attach_bpf(struct bpf_program *prog)
}
}
-static void trigger_tp_setup()
+static void trigger_tp_setup(void)
{
setup_ctx();
attach_bpf(ctx.skel->progs.bench_trigger_tp);
}
-static void trigger_rawtp_setup()
+static void trigger_rawtp_setup(void)
{
setup_ctx();
attach_bpf(ctx.skel->progs.bench_trigger_raw_tp);
}
-static void trigger_kprobe_setup()
+static void trigger_kprobe_setup(void)
{
setup_ctx();
attach_bpf(ctx.skel->progs.bench_trigger_kprobe);
}
-static void trigger_fentry_setup()
+static void trigger_fentry_setup(void)
{
setup_ctx();
attach_bpf(ctx.skel->progs.bench_trigger_fentry);
}
-static void trigger_fentry_sleep_setup()
+static void trigger_fentry_sleep_setup(void)
{
setup_ctx();
attach_bpf(ctx.skel->progs.bench_trigger_fentry_sleep);
}
-static void trigger_fmodret_setup()
+static void trigger_fmodret_setup(void)
{
setup_ctx();
attach_bpf(ctx.skel->progs.bench_trigger_fmodret);
@@ -107,6 +108,101 @@ static void *trigger_consumer(void *input)
return NULL;
}
+/* make sure call is not inlined and not avoided by compiler, so __weak and
+ * inline asm volatile in the body of the function
+ *
+ * There is a performance difference between uprobing at nop location vs other
+ * instructions. So use two different targets, one of which starts with nop
+ * and another doesn't.
+ *
+ * GCC doesn't generate stack setup preample for these functions due to them
+ * having no input arguments and doing nothing in the body.
+ */
+__weak void uprobe_target_with_nop(void)
+{
+ asm volatile ("nop");
+}
+
+__weak void uprobe_target_without_nop(void)
+{
+ asm volatile ("");
+}
+
+static void *uprobe_base_producer(void *input)
+{
+ while (true) {
+ uprobe_target_with_nop();
+ atomic_inc(&base_hits.value);
+ }
+ return NULL;
+}
+
+static void *uprobe_producer_with_nop(void *input)
+{
+ while (true)
+ uprobe_target_with_nop();
+ return NULL;
+}
+
+static void *uprobe_producer_without_nop(void *input)
+{
+ while (true)
+ uprobe_target_without_nop();
+ return NULL;
+}
+
+static void usetup(bool use_retprobe, bool use_nop)
+{
+ size_t uprobe_offset;
+ ssize_t base_addr;
+ struct bpf_link *link;
+
+ setup_libbpf();
+
+ ctx.skel = trigger_bench__open_and_load();
+ if (!ctx.skel) {
+ fprintf(stderr, "failed to open skeleton\n");
+ exit(1);
+ }
+
+ base_addr = get_base_addr();
+ if (use_nop)
+ uprobe_offset = get_uprobe_offset(&uprobe_target_with_nop, base_addr);
+ else
+ uprobe_offset = get_uprobe_offset(&uprobe_target_without_nop, base_addr);
+
+ link = bpf_program__attach_uprobe(ctx.skel->progs.bench_trigger_uprobe,
+ use_retprobe,
+ -1 /* all PIDs */,
+ "/proc/self/exe",
+ uprobe_offset);
+ if (!link) {
+ fprintf(stderr, "failed to attach uprobe!\n");
+ exit(1);
+ }
+ ctx.skel->links.bench_trigger_uprobe = link;
+}
+
+static void uprobe_setup_with_nop(void)
+{
+ usetup(false, true);
+}
+
+static void uretprobe_setup_with_nop(void)
+{
+ usetup(true, true);
+}
+
+static void uprobe_setup_without_nop(void)
+{
+ usetup(false, false);
+}
+
+static void uretprobe_setup_without_nop(void)
+{
+ usetup(true, false);
+}
+
const struct bench bench_trig_base = {
.name = "trig-base",
.validate = trigger_validate,
@@ -182,3 +278,53 @@ const struct bench bench_trig_fmodret = {
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
};
+
+const struct bench bench_trig_uprobe_base = {
+ .name = "trig-uprobe-base",
+ .setup = NULL, /* no uprobe/uretprobe is attached */
+ .producer_thread = uprobe_base_producer,
+ .consumer_thread = trigger_consumer,
+ .measure = trigger_base_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_uprobe_with_nop = {
+ .name = "trig-uprobe-with-nop",
+ .setup = uprobe_setup_with_nop,
+ .producer_thread = uprobe_producer_with_nop,
+ .consumer_thread = trigger_consumer,
+ .measure = trigger_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_uretprobe_with_nop = {
+ .name = "trig-uretprobe-with-nop",
+ .setup = uretprobe_setup_with_nop,
+ .producer_thread = uprobe_producer_with_nop,
+ .consumer_thread = trigger_consumer,
+ .measure = trigger_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_uprobe_without_nop = {
+ .name = "trig-uprobe-without-nop",
+ .setup = uprobe_setup_without_nop,
+ .producer_thread = uprobe_producer_without_nop,
+ .consumer_thread = trigger_consumer,
+ .measure = trigger_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_uretprobe_without_nop = {
+ .name = "trig-uretprobe-without-nop",
+ .setup = uretprobe_setup_without_nop,
+ .producer_thread = uprobe_producer_without_nop,
+ .consumer_thread = trigger_consumer,
+ .measure = trigger_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_bpf_loop.sh b/tools/testing/selftests/bpf/benchs/run_bench_bpf_loop.sh
new file mode 100755
index 000000000000..d4f5f73b356b
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_bench_bpf_loop.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source ./benchs/run_common.sh
+
+set -eufo pipefail
+
+for t in 1 4 8 12 16; do
+for i in 10 100 500 1000 5000 10000 50000 100000 500000 1000000; do
+subtitle "nr_loops: $i, nr_threads: $t"
+ summarize_ops "bpf_loop: " \
+ "$($RUN_BENCH -p $t --nr_loops $i bpf-loop)"
+ printf "\n"
+done
+done
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_strncmp.sh b/tools/testing/selftests/bpf/benchs/run_bench_strncmp.sh
new file mode 100755
index 000000000000..142697284b45
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_bench_strncmp.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source ./benchs/run_common.sh
+
+set -eufo pipefail
+
+for s in 1 8 64 512 2048 4095; do
+ for b in no-helper helper; do
+ summarize ${b}-${s} "$($RUN_BENCH --cmp-str-len=$s strncmp-${b})"
+ done
+done
diff --git a/tools/testing/selftests/bpf/benchs/run_common.sh b/tools/testing/selftests/bpf/benchs/run_common.sh
index 9a16be78b180..6c5e6023a69f 100644
--- a/tools/testing/selftests/bpf/benchs/run_common.sh
+++ b/tools/testing/selftests/bpf/benchs/run_common.sh
@@ -33,6 +33,14 @@ function percentage()
echo "$*" | sed -E "s/.*Percentage\s=\s+([0-9]+\.[0-9]+).*/\1/"
}
+function ops()
+{
+ echo -n "throughput: "
+ echo -n "$*" | sed -E "s/.*throughput\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+\sM\sops\/s).*/\1/"
+ echo -n -e ", latency: "
+ echo "$*" | sed -E "s/.*latency\s+([0-9]+\.[0-9]+\sns\/op).*/\1/"
+}
+
function total()
{
echo "$*" | sed -E "s/.*total operations\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/"
@@ -52,6 +60,13 @@ function summarize_percentage()
printf "%-20s %s%%\n" "$bench" "$(percentage $summary)"
}
+function summarize_ops()
+{
+ bench="$1"
+ summary=$(echo $2 | tail -n1)
+ printf "%-20s %s\n" "$bench" "$(ops $summary)"
+}
+
function summarize_total()
{
bench="$1"
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
index 5d52ea2768df..df3b292a8ffe 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
@@ -33,6 +33,22 @@ noinline int bpf_testmod_loop_test(int n)
return sum;
}
+__weak noinline struct file *bpf_testmod_return_ptr(int arg)
+{
+ static struct file f = {};
+
+ switch (arg) {
+ case 1: return (void *)EINVAL; /* user addr */
+ case 2: return (void *)0xcafe4a11; /* user addr */
+ case 3: return (void *)-EINVAL; /* canonical, but invalid */
+ case 4: return (void *)(1ull << 60); /* non-canonical and invalid */
+ case 5: return (void *)~(1ull << 30); /* trigger extable */
+ case 6: return &f; /* valid addr */
+ case 7: return (void *)((long)&f | 1); /* kernel tricks */
+ default: return NULL;
+ }
+}
+
noinline ssize_t
bpf_testmod_test_read(struct file *file, struct kobject *kobj,
struct bin_attribute *bin_attr,
@@ -43,6 +59,10 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj,
.off = off,
.len = len,
};
+ int i = 1;
+
+ while (bpf_testmod_return_ptr(i))
+ i++;
/* This is always true. Use the check to make sure the compiler
* doesn't remove bpf_testmod_loop_test.
diff --git a/tools/testing/selftests/bpf/btf_helpers.c b/tools/testing/selftests/bpf/btf_helpers.c
index b5b6b013a245..b5941d514e17 100644
--- a/tools/testing/selftests/bpf/btf_helpers.c
+++ b/tools/testing/selftests/bpf/btf_helpers.c
@@ -25,11 +25,12 @@ static const char * const btf_kind_str_mapping[] = {
[BTF_KIND_DATASEC] = "DATASEC",
[BTF_KIND_FLOAT] = "FLOAT",
[BTF_KIND_DECL_TAG] = "DECL_TAG",
+ [BTF_KIND_TYPE_TAG] = "TYPE_TAG",
};
static const char *btf_kind_str(__u16 kind)
{
- if (kind > BTF_KIND_DECL_TAG)
+ if (kind > BTF_KIND_TYPE_TAG)
return "UNKNOWN";
return btf_kind_str_mapping[kind];
}
@@ -109,6 +110,7 @@ int fprintf_btf_type_raw(FILE *out, const struct btf *btf, __u32 id)
case BTF_KIND_VOLATILE:
case BTF_KIND_RESTRICT:
case BTF_KIND_TYPEDEF:
+ case BTF_KIND_TYPE_TAG:
fprintf(out, " type_id=%u", t->type);
break;
case BTF_KIND_ARRAY: {
@@ -238,7 +240,6 @@ const char *btf_type_c_dump(const struct btf *btf)
static char buf[16 * 1024];
FILE *buf_file;
struct btf_dump *d = NULL;
- struct btf_dump_opts opts = {};
int err, i;
buf_file = fmemopen(buf, sizeof(buf) - 1, "w");
@@ -247,22 +248,26 @@ const char *btf_type_c_dump(const struct btf *btf)
return NULL;
}
- opts.ctx = buf_file;
- d = btf_dump__new(btf, NULL, &opts, btf_dump_printf);
+ d = btf_dump__new(btf, btf_dump_printf, buf_file, NULL);
if (libbpf_get_error(d)) {
fprintf(stderr, "Failed to create btf_dump instance: %ld\n", libbpf_get_error(d));
- return NULL;
+ goto err_out;
}
for (i = 1; i < btf__type_cnt(btf); i++) {
err = btf_dump__dump_type(d, i);
if (err) {
fprintf(stderr, "Failed to dump type [%d]: %d\n", i, err);
- return NULL;
+ goto err_out;
}
}
+ btf_dump__free(d);
fflush(buf_file);
fclose(buf_file);
return buf;
+err_out:
+ btf_dump__free(d);
+ fclose(buf_file);
+ return NULL;
}
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 5192305159ec..f6287132fa89 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -38,7 +38,9 @@ CONFIG_IPV6_SIT=m
CONFIG_BPF_JIT=y
CONFIG_BPF_LSM=y
CONFIG_SECURITY=y
+CONFIG_RC_CORE=y
CONFIG_LIRC=y
+CONFIG_BPF_LIRC_MODE2=y
CONFIG_IMA=y
CONFIG_SECURITYFS=y
CONFIG_IMA_WRITE_POLICY=y
diff --git a/tools/testing/selftests/bpf/flow_dissector_load.h b/tools/testing/selftests/bpf/flow_dissector_load.h
index 9d0acc2fc6cc..f40b585f4e7e 100644
--- a/tools/testing/selftests/bpf/flow_dissector_load.h
+++ b/tools/testing/selftests/bpf/flow_dissector_load.h
@@ -4,6 +4,7 @@
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
+#include "testing_helpers.h"
static inline int bpf_flow_load(struct bpf_object **obj,
const char *path,
@@ -18,7 +19,7 @@ static inline int bpf_flow_load(struct bpf_object **obj,
int prog_array_fd;
int ret, fd, i;
- ret = bpf_prog_load(path, BPF_PROG_TYPE_FLOW_DISSECTOR, obj,
+ ret = bpf_prog_test_load(path, BPF_PROG_TYPE_FLOW_DISSECTOR, obj,
prog_fd);
if (ret)
return ret;
diff --git a/tools/testing/selftests/bpf/get_cgroup_id_user.c b/tools/testing/selftests/bpf/get_cgroup_id_user.c
index 99628e1a1e58..3a7b82bd9e94 100644
--- a/tools/testing/selftests/bpf/get_cgroup_id_user.c
+++ b/tools/testing/selftests/bpf/get_cgroup_id_user.c
@@ -19,6 +19,7 @@
#include <bpf/libbpf.h>
#include "cgroup_helpers.h"
+#include "testing_helpers.h"
#include "bpf_rlimit.h"
#define CHECK(condition, tag, format...) ({ \
@@ -66,8 +67,8 @@ int main(int argc, char **argv)
if (CHECK(cgroup_fd < 0, "cgroup_setup_and_join", "err %d errno %d\n", cgroup_fd, errno))
return 1;
- err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
- if (CHECK(err, "bpf_prog_load", "err %d errno %d\n", err, errno))
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
+ if (CHECK(err, "bpf_prog_test_load", "err %d errno %d\n", err, errno))
goto cleanup_cgroup_env;
cgidmap_fd = bpf_find_map(__func__, obj, "cg_ids");
diff --git a/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c
index f4d870da7684..78c76496b14a 100644
--- a/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c
+++ b/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c
@@ -68,13 +68,6 @@ static void map_batch_verify(int *visited, __u32 max_entries, int *keys,
static void __test_map_lookup_and_update_batch(bool is_pcpu)
{
- struct bpf_create_map_attr xattr = {
- .name = "array_map",
- .map_type = is_pcpu ? BPF_MAP_TYPE_PERCPU_ARRAY :
- BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(__s64),
- };
int map_fd, *keys, *visited;
__u32 count, total, total_success;
const __u32 max_entries = 10;
@@ -86,10 +79,10 @@ static void __test_map_lookup_and_update_batch(bool is_pcpu)
.flags = 0,
);
- xattr.max_entries = max_entries;
- map_fd = bpf_create_map_xattr(&xattr);
+ map_fd = bpf_map_create(is_pcpu ? BPF_MAP_TYPE_PERCPU_ARRAY : BPF_MAP_TYPE_ARRAY,
+ "array_map", sizeof(int), sizeof(__s64), max_entries, NULL);
CHECK(map_fd == -1,
- "bpf_create_map_xattr()", "error:%s\n", strerror(errno));
+ "bpf_map_create()", "error:%s\n", strerror(errno));
value_size = sizeof(__s64);
if (is_pcpu)
diff --git a/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c
index 976bf415fbdd..f807d53fd8dd 100644
--- a/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c
+++ b/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c
@@ -83,22 +83,15 @@ void __test_map_lookup_and_delete_batch(bool is_pcpu)
int err, step, value_size;
bool nospace_err;
void *values;
- struct bpf_create_map_attr xattr = {
- .name = "hash_map",
- .map_type = is_pcpu ? BPF_MAP_TYPE_PERCPU_HASH :
- BPF_MAP_TYPE_HASH,
- .key_size = sizeof(int),
- .value_size = sizeof(int),
- };
DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
.elem_flags = 0,
.flags = 0,
);
- xattr.max_entries = max_entries;
- map_fd = bpf_create_map_xattr(&xattr);
+ map_fd = bpf_map_create(is_pcpu ? BPF_MAP_TYPE_PERCPU_HASH : BPF_MAP_TYPE_HASH,
+ "hash_map", sizeof(int), sizeof(int), max_entries, NULL);
CHECK(map_fd == -1,
- "bpf_create_map_xattr()", "error:%s\n", strerror(errno));
+ "bpf_map_create()", "error:%s\n", strerror(errno));
value_size = is_pcpu ? sizeof(value) : sizeof(int);
keys = malloc(max_entries * sizeof(int));
diff --git a/tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c
index 2e986e5e4cac..87d07b596e17 100644
--- a/tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c
+++ b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c
@@ -64,13 +64,7 @@ static void map_batch_verify(int *visited, __u32 max_entries,
void test_lpm_trie_map_batch_ops(void)
{
- struct bpf_create_map_attr xattr = {
- .name = "lpm_trie_map",
- .map_type = BPF_MAP_TYPE_LPM_TRIE,
- .key_size = sizeof(struct test_lpm_key),
- .value_size = sizeof(int),
- .map_flags = BPF_F_NO_PREALLOC,
- };
+ LIBBPF_OPTS(bpf_map_create_opts, create_opts, .map_flags = BPF_F_NO_PREALLOC);
struct test_lpm_key *keys, key;
int map_fd, *values, *visited;
__u32 step, count, total, total_success;
@@ -82,9 +76,10 @@ void test_lpm_trie_map_batch_ops(void)
.flags = 0,
);
- xattr.max_entries = max_entries;
- map_fd = bpf_create_map_xattr(&xattr);
- CHECK(map_fd == -1, "bpf_create_map_xattr()", "error:%s\n",
+ map_fd = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, "lpm_trie_map",
+ sizeof(struct test_lpm_key), sizeof(int),
+ max_entries, &create_opts);
+ CHECK(map_fd == -1, "bpf_map_create()", "error:%s\n",
strerror(errno));
keys = malloc(max_entries * sizeof(struct test_lpm_key));
diff --git a/tools/testing/selftests/bpf/map_tests/sk_storage_map.c b/tools/testing/selftests/bpf/map_tests/sk_storage_map.c
index e569edc679d8..099eb4dfd4f7 100644
--- a/tools/testing/selftests/bpf/map_tests/sk_storage_map.c
+++ b/tools/testing/selftests/bpf/map_tests/sk_storage_map.c
@@ -19,16 +19,12 @@
#include <test_btf.h>
#include <test_maps.h>
-static struct bpf_create_map_attr xattr = {
- .name = "sk_storage_map",
- .map_type = BPF_MAP_TYPE_SK_STORAGE,
- .map_flags = BPF_F_NO_PREALLOC,
- .max_entries = 0,
- .key_size = 4,
- .value_size = 8,
+static struct bpf_map_create_opts map_opts = {
+ .sz = sizeof(map_opts),
.btf_key_type_id = 1,
.btf_value_type_id = 3,
.btf_fd = -1,
+ .map_flags = BPF_F_NO_PREALLOC,
};
static unsigned int nr_sk_threads_done;
@@ -140,7 +136,7 @@ static int load_btf(void)
memcpy(raw_btf + sizeof(btf_hdr) + sizeof(btf_raw_types),
btf_str_sec, sizeof(btf_str_sec));
- return bpf_load_btf(raw_btf, sizeof(raw_btf), 0, 0, 0);
+ return bpf_btf_load(raw_btf, sizeof(raw_btf), NULL);
}
static int create_sk_storage_map(void)
@@ -150,13 +146,13 @@ static int create_sk_storage_map(void)
btf_fd = load_btf();
CHECK(btf_fd == -1, "bpf_load_btf", "btf_fd:%d errno:%d\n",
btf_fd, errno);
- xattr.btf_fd = btf_fd;
+ map_opts.btf_fd = btf_fd;
- map_fd = bpf_create_map_xattr(&xattr);
- xattr.btf_fd = -1;
+ map_fd = bpf_map_create(BPF_MAP_TYPE_SK_STORAGE, "sk_storage_map", 4, 8, 0, &map_opts);
+ map_opts.btf_fd = -1;
close(btf_fd);
CHECK(map_fd == -1,
- "bpf_create_map_xattr()", "errno:%d\n", errno);
+ "bpf_map_create()", "errno:%d\n", errno);
return map_fd;
}
@@ -463,20 +459,20 @@ static void test_sk_storage_map_basic(void)
int cnt;
int lock;
} value = { .cnt = 0xeB9f, .lock = 0, }, lookup_value;
- struct bpf_create_map_attr bad_xattr;
+ struct bpf_map_create_opts bad_xattr;
int btf_fd, map_fd, sk_fd, err;
btf_fd = load_btf();
CHECK(btf_fd == -1, "bpf_load_btf", "btf_fd:%d errno:%d\n",
btf_fd, errno);
- xattr.btf_fd = btf_fd;
+ map_opts.btf_fd = btf_fd;
sk_fd = socket(AF_INET6, SOCK_STREAM, 0);
CHECK(sk_fd == -1, "socket()", "sk_fd:%d errno:%d\n",
sk_fd, errno);
- map_fd = bpf_create_map_xattr(&xattr);
- CHECK(map_fd == -1, "bpf_create_map_xattr(good_xattr)",
+ map_fd = bpf_map_create(BPF_MAP_TYPE_SK_STORAGE, "sk_storage_map", 4, 8, 0, &map_opts);
+ CHECK(map_fd == -1, "bpf_map_create(good_xattr)",
"map_fd:%d errno:%d\n", map_fd, errno);
/* Add new elem */
@@ -560,31 +556,29 @@ static void test_sk_storage_map_basic(void)
CHECK(!err || errno != ENOENT, "bpf_map_delete_elem()",
"err:%d errno:%d\n", err, errno);
- memcpy(&bad_xattr, &xattr, sizeof(xattr));
+ memcpy(&bad_xattr, &map_opts, sizeof(map_opts));
bad_xattr.btf_key_type_id = 0;
- err = bpf_create_map_xattr(&bad_xattr);
- CHECK(!err || errno != EINVAL, "bap_create_map_xattr(bad_xattr)",
+ err = bpf_map_create(BPF_MAP_TYPE_SK_STORAGE, "sk_storage_map", 4, 8, 0, &bad_xattr);
+ CHECK(!err || errno != EINVAL, "bpf_map_create(bad_xattr)",
"err:%d errno:%d\n", err, errno);
- memcpy(&bad_xattr, &xattr, sizeof(xattr));
+ memcpy(&bad_xattr, &map_opts, sizeof(map_opts));
bad_xattr.btf_key_type_id = 3;
- err = bpf_create_map_xattr(&bad_xattr);
- CHECK(!err || errno != EINVAL, "bap_create_map_xattr(bad_xattr)",
+ err = bpf_map_create(BPF_MAP_TYPE_SK_STORAGE, "sk_storage_map", 4, 8, 0, &bad_xattr);
+ CHECK(!err || errno != EINVAL, "bpf_map_create(bad_xattr)",
"err:%d errno:%d\n", err, errno);
- memcpy(&bad_xattr, &xattr, sizeof(xattr));
- bad_xattr.max_entries = 1;
- err = bpf_create_map_xattr(&bad_xattr);
- CHECK(!err || errno != EINVAL, "bap_create_map_xattr(bad_xattr)",
+ err = bpf_map_create(BPF_MAP_TYPE_SK_STORAGE, "sk_storage_map", 4, 8, 1, &map_opts);
+ CHECK(!err || errno != EINVAL, "bpf_map_create(bad_xattr)",
"err:%d errno:%d\n", err, errno);
- memcpy(&bad_xattr, &xattr, sizeof(xattr));
+ memcpy(&bad_xattr, &map_opts, sizeof(map_opts));
bad_xattr.map_flags = 0;
- err = bpf_create_map_xattr(&bad_xattr);
+ err = bpf_map_create(BPF_MAP_TYPE_SK_STORAGE, "sk_storage_map", 4, 8, 0, &bad_xattr);
CHECK(!err || errno != EINVAL, "bap_create_map_xattr(bad_xattr)",
"err:%d errno:%d\n", err, errno);
- xattr.btf_fd = -1;
+ map_opts.btf_fd = -1;
close(btf_fd);
close(map_fd);
close(sk_fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c
index 5861446d0777..0ee29e11eaee 100644
--- a/tools/testing/selftests/bpf/prog_tests/align.c
+++ b/tools/testing/selftests/bpf/prog_tests/align.c
@@ -39,13 +39,13 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {1, "R1=ctx(id=0,off=0,imm=0)"},
- {1, "R10=fp0"},
- {1, "R3_w=inv2"},
- {2, "R3_w=inv4"},
- {3, "R3_w=inv8"},
- {4, "R3_w=inv16"},
- {5, "R3_w=inv32"},
+ {0, "R1=ctx(id=0,off=0,imm=0)"},
+ {0, "R10=fp0"},
+ {0, "R3_w=inv2"},
+ {1, "R3_w=inv4"},
+ {2, "R3_w=inv8"},
+ {3, "R3_w=inv16"},
+ {4, "R3_w=inv32"},
},
},
{
@@ -67,19 +67,19 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {1, "R1=ctx(id=0,off=0,imm=0)"},
- {1, "R10=fp0"},
- {1, "R3_w=inv1"},
- {2, "R3_w=inv2"},
- {3, "R3_w=inv4"},
- {4, "R3_w=inv8"},
- {5, "R3_w=inv16"},
- {6, "R3_w=inv1"},
- {7, "R4_w=inv32"},
- {8, "R4_w=inv16"},
- {9, "R4_w=inv8"},
- {10, "R4_w=inv4"},
- {11, "R4_w=inv2"},
+ {0, "R1=ctx(id=0,off=0,imm=0)"},
+ {0, "R10=fp0"},
+ {0, "R3_w=inv1"},
+ {1, "R3_w=inv2"},
+ {2, "R3_w=inv4"},
+ {3, "R3_w=inv8"},
+ {4, "R3_w=inv16"},
+ {5, "R3_w=inv1"},
+ {6, "R4_w=inv32"},
+ {7, "R4_w=inv16"},
+ {8, "R4_w=inv8"},
+ {9, "R4_w=inv4"},
+ {10, "R4_w=inv2"},
},
},
{
@@ -96,14 +96,14 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {1, "R1=ctx(id=0,off=0,imm=0)"},
- {1, "R10=fp0"},
- {1, "R3_w=inv4"},
- {2, "R3_w=inv8"},
- {3, "R3_w=inv10"},
- {4, "R4_w=inv8"},
- {5, "R4_w=inv12"},
- {6, "R4_w=inv14"},
+ {0, "R1=ctx(id=0,off=0,imm=0)"},
+ {0, "R10=fp0"},
+ {0, "R3_w=inv4"},
+ {1, "R3_w=inv8"},
+ {2, "R3_w=inv10"},
+ {3, "R4_w=inv8"},
+ {4, "R4_w=inv12"},
+ {5, "R4_w=inv14"},
},
},
{
@@ -118,12 +118,12 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {1, "R1=ctx(id=0,off=0,imm=0)"},
- {1, "R10=fp0"},
+ {0, "R1=ctx(id=0,off=0,imm=0)"},
+ {0, "R10=fp0"},
+ {0, "R3_w=inv7"},
{1, "R3_w=inv7"},
- {2, "R3_w=inv7"},
- {3, "R3_w=inv14"},
- {4, "R3_w=inv56"},
+ {2, "R3_w=inv14"},
+ {3, "R3_w=inv56"},
},
},
@@ -161,19 +161,19 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {7, "R0_w=pkt(id=0,off=8,r=8,imm=0)"},
- {7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
- {8, "R3_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
- {9, "R3_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
- {10, "R3_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
- {11, "R3_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
- {18, "R3=pkt_end(id=0,off=0,imm=0)"},
- {18, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
- {19, "R4_w=inv(id=0,umax_value=8160,var_off=(0x0; 0x1fe0))"},
- {20, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
- {21, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
- {22, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
- {23, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
+ {6, "R0_w=pkt(id=0,off=8,r=8,imm=0)"},
+ {6, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+ {7, "R3_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
+ {8, "R3_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {9, "R3_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
+ {10, "R3_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
+ {12, "R3_w=pkt_end(id=0,off=0,imm=0)"},
+ {17, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+ {18, "R4_w=inv(id=0,umax_value=8160,var_off=(0x0; 0x1fe0))"},
+ {19, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
+ {20, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
+ {21, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {22, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
},
},
{
@@ -194,16 +194,16 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
- {8, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"},
- {9, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
- {10, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"},
- {11, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
- {12, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"},
- {13, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
- {14, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"},
- {15, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
- {16, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
+ {6, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+ {7, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"},
+ {8, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+ {9, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"},
+ {10, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
+ {11, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"},
+ {12, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {13, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"},
+ {14, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
+ {15, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
},
},
{
@@ -234,14 +234,14 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {4, "R5_w=pkt(id=0,off=0,r=0,imm=0)"},
- {5, "R5_w=pkt(id=0,off=14,r=0,imm=0)"},
- {6, "R4_w=pkt(id=0,off=14,r=0,imm=0)"},
- {10, "R2=pkt(id=0,off=0,r=18,imm=0)"},
+ {2, "R5_w=pkt(id=0,off=0,r=0,imm=0)"},
+ {4, "R5_w=pkt(id=0,off=14,r=0,imm=0)"},
+ {5, "R4_w=pkt(id=0,off=14,r=0,imm=0)"},
+ {9, "R2=pkt(id=0,off=0,r=18,imm=0)"},
{10, "R5=pkt(id=0,off=14,r=18,imm=0)"},
{10, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+ {13, "R4_w=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"},
{14, "R4_w=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"},
- {15, "R4_w=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"},
},
},
{
@@ -296,8 +296,8 @@ static struct bpf_align_test tests[] = {
/* Calculated offset in R6 has unknown value, but known
* alignment of 4.
*/
- {8, "R2_w=pkt(id=0,off=0,r=8,imm=0)"},
- {8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {6, "R2_w=pkt(id=0,off=0,r=8,imm=0)"},
+ {7, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
/* Offset is added to packet pointer R5, resulting in
* known fixed offset, and variable offset from R6.
*/
@@ -313,11 +313,11 @@ static struct bpf_align_test tests[] = {
/* Variable offset is added to R5 packet pointer,
* resulting in auxiliary alignment of 4.
*/
- {18, "R5_w=pkt(id=2,off=0,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {17, "R5_w=pkt(id=2,off=0,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
/* Constant offset is added to R5, resulting in
* reg->off of 14.
*/
- {19, "R5_w=pkt(id=2,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {18, "R5_w=pkt(id=2,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off
* (14) which is 16. Then the variable offset is 4-byte
@@ -329,18 +329,18 @@ static struct bpf_align_test tests[] = {
/* Constant offset is added to R5 packet pointer,
* resulting in reg->off value of 14.
*/
- {26, "R5_w=pkt(id=0,off=14,r=8"},
+ {25, "R5_w=pkt(id=0,off=14,r=8"},
/* Variable offset is added to R5, resulting in a
* variable offset of (4n).
*/
- {27, "R5_w=pkt(id=3,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {26, "R5_w=pkt(id=3,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
/* Constant is added to R5 again, setting reg->off to 18. */
- {28, "R5_w=pkt(id=3,off=18,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {27, "R5_w=pkt(id=3,off=18,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
/* And once more we add a variable; resulting var_off
* is still (4n), fixed offset is not changed.
* Also, we create a new reg->id.
*/
- {29, "R5_w=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc)"},
+ {28, "R5_w=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc)"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (18)
* which is 20. Then the variable offset is (4n), so
@@ -386,13 +386,13 @@ static struct bpf_align_test tests[] = {
/* Calculated offset in R6 has unknown value, but known
* alignment of 4.
*/
- {8, "R2_w=pkt(id=0,off=0,r=8,imm=0)"},
- {8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {6, "R2_w=pkt(id=0,off=0,r=8,imm=0)"},
+ {7, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
/* Adding 14 makes R6 be (4n+2) */
- {9, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+ {8, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
/* Packet pointer has (4n+2) offset */
{11, "R5_w=pkt(id=1,off=0,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc)"},
- {13, "R4=pkt(id=1,off=4,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc)"},
+ {12, "R4=pkt(id=1,off=4,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc)"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (0)
* which is 2. Then the variable offset is (4n+2), so
@@ -403,12 +403,12 @@ static struct bpf_align_test tests[] = {
/* Newly read value in R6 was shifted left by 2, so has
* known alignment of 4.
*/
- {18, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {17, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
/* Added (4n) to packet pointer's (4n+2) var_off, giving
* another (4n+2).
*/
{19, "R5_w=pkt(id=2,off=0,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc)"},
- {21, "R4=pkt(id=2,off=4,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc)"},
+ {20, "R4=pkt(id=2,off=4,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc)"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (0)
* which is 2. Then the variable offset is (4n+2), so
@@ -448,18 +448,18 @@ static struct bpf_align_test tests[] = {
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT,
.matches = {
- {4, "R5_w=pkt_end(id=0,off=0,imm=0)"},
+ {3, "R5_w=pkt_end(id=0,off=0,imm=0)"},
/* (ptr - ptr) << 2 == unknown, (4n) */
- {6, "R5_w=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc)"},
+ {5, "R5_w=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc)"},
/* (4n) + 14 == (4n+2). We blow our bounds, because
* the add could overflow.
*/
- {7, "R5_w=inv(id=0,smin_value=-9223372036854775806,smax_value=9223372036854775806,umin_value=2,umax_value=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"},
+ {6, "R5_w=inv(id=0,smin_value=-9223372036854775806,smax_value=9223372036854775806,umin_value=2,umax_value=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"},
/* Checked s>=0 */
{9, "R5=inv(id=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"},
/* packet pointer + nonnegative (4n+2) */
{11, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"},
- {13, "R4_w=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"},
+ {12, "R4_w=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"},
/* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine.
* We checked the bounds, but it might have been able
* to overflow if the packet pointer started in the
@@ -502,14 +502,14 @@ static struct bpf_align_test tests[] = {
/* Calculated offset in R6 has unknown value, but known
* alignment of 4.
*/
- {7, "R2_w=pkt(id=0,off=0,r=8,imm=0)"},
- {9, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {6, "R2_w=pkt(id=0,off=0,r=8,imm=0)"},
+ {8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
/* Adding 14 makes R6 be (4n+2) */
- {10, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+ {9, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
/* New unknown value in R7 is (4n) */
- {11, "R7_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {10, "R7_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
/* Subtracting it from R6 blows our unsigned bounds */
- {12, "R6=inv(id=0,smin_value=-1006,smax_value=1034,umin_value=2,umax_value=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"},
+ {11, "R6=inv(id=0,smin_value=-1006,smax_value=1034,umin_value=2,umax_value=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"},
/* Checked s>= 0 */
{14, "R6=inv(id=0,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc))"},
/* At the time the word size load is performed from R5,
@@ -556,14 +556,14 @@ static struct bpf_align_test tests[] = {
/* Calculated offset in R6 has unknown value, but known
* alignment of 4.
*/
- {7, "R2_w=pkt(id=0,off=0,r=8,imm=0)"},
- {10, "R6_w=inv(id=0,umax_value=60,var_off=(0x0; 0x3c))"},
+ {6, "R2_w=pkt(id=0,off=0,r=8,imm=0)"},
+ {9, "R6_w=inv(id=0,umax_value=60,var_off=(0x0; 0x3c))"},
/* Adding 14 makes R6 be (4n+2) */
- {11, "R6_w=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"},
+ {10, "R6_w=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"},
/* Subtracting from packet pointer overflows ubounds */
{13, "R5_w=pkt(id=2,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c)"},
/* New unknown value in R7 is (4n), >= 76 */
- {15, "R7_w=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"},
+ {14, "R7_w=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"},
/* Adding it to packet pointer gives nice bounds again */
{16, "R5_w=pkt(id=3,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"},
/* At the time the word size load is performed from R5,
@@ -594,6 +594,12 @@ static int do_test_single(struct bpf_align_test *test)
struct bpf_insn *prog = test->insns;
int prog_type = test->prog_type;
char bpf_vlog_copy[32768];
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
+ .prog_flags = BPF_F_STRICT_ALIGNMENT,
+ .log_buf = bpf_vlog,
+ .log_size = sizeof(bpf_vlog),
+ .log_level = 2,
+ );
const char *line_ptr;
int cur_line = -1;
int prog_len, i;
@@ -601,9 +607,8 @@ static int do_test_single(struct bpf_align_test *test)
int ret;
prog_len = probe_filter_length(prog);
- fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER,
- prog, prog_len, BPF_F_STRICT_ALIGNMENT,
- "GPL", 0, bpf_vlog, sizeof(bpf_vlog), 2);
+ fd_prog = bpf_prog_load(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL",
+ prog, prog_len, &opts);
if (fd_prog < 0 && test->result != REJECT) {
printf("Failed to load program.\n");
printf("%s", bpf_vlog);
@@ -620,12 +625,15 @@ static int do_test_single(struct bpf_align_test *test)
line_ptr = strtok(bpf_vlog_copy, "\n");
for (i = 0; i < MAX_MATCHES; i++) {
struct bpf_reg_match m = test->matches[i];
+ int tmp;
if (!m.match)
break;
while (line_ptr) {
cur_line = -1;
sscanf(line_ptr, "%u: ", &cur_line);
+ if (cur_line == -1)
+ sscanf(line_ptr, "from %u to %u: ", &tmp, &cur_line);
if (cur_line == m.line)
break;
line_ptr = strtok(NULL, "\n");
@@ -637,7 +645,19 @@ static int do_test_single(struct bpf_align_test *test)
printf("%s", bpf_vlog);
break;
}
+ /* Check the next line as well in case the previous line
+ * did not have a corresponding bpf insn. Example:
+ * func#0 @0
+ * 0: R1=ctx(id=0,off=0,imm=0) R10=fp0
+ * 0: (b7) r3 = 2 ; R3_w=inv2
+ */
if (!strstr(line_ptr, m.match)) {
+ cur_line = -1;
+ line_ptr = strtok(NULL, "\n");
+ sscanf(line_ptr, "%u: ", &cur_line);
+ }
+ if (cur_line != m.line || !line_ptr ||
+ !strstr(line_ptr, m.match)) {
printf("Failed to find match %u: %s\n",
m.line, m.match);
ret = 1;
diff --git a/tools/testing/selftests/bpf/prog_tests/atomics.c b/tools/testing/selftests/bpf/prog_tests/atomics.c
index 0f9525293881..86b7d5d84eec 100644
--- a/tools/testing/selftests/bpf/prog_tests/atomics.c
+++ b/tools/testing/selftests/bpf/prog_tests/atomics.c
@@ -167,7 +167,7 @@ static void test_cmpxchg(struct atomics_lskel *skel)
prog_fd = skel->progs.cmpxchg.prog_fd;
err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
NULL, NULL, &retval, &duration);
- if (CHECK(err || retval, "test_run add",
+ if (CHECK(err || retval, "test_run cmpxchg",
"err %d errno %d retval %d duration %d\n", err, errno, retval, duration))
goto cleanup;
@@ -196,7 +196,7 @@ static void test_xchg(struct atomics_lskel *skel)
prog_fd = skel->progs.xchg.prog_fd;
err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
NULL, NULL, &retval, &duration);
- if (CHECK(err || retval, "test_run add",
+ if (CHECK(err || retval, "test_run xchg",
"err %d errno %d retval %d duration %d\n", err, errno, retval, duration))
goto cleanup;
diff --git a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c
index be73e3de6668..d2d9e965eba5 100644
--- a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c
@@ -7,32 +7,33 @@
static void test_fail_cases(void)
{
+ LIBBPF_OPTS(bpf_map_create_opts, opts);
__u32 value;
int fd, err;
/* Invalid key size */
- fd = bpf_create_map(BPF_MAP_TYPE_BLOOM_FILTER, 4, sizeof(value), 100, 0);
- if (!ASSERT_LT(fd, 0, "bpf_create_map bloom filter invalid key size"))
+ fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 4, sizeof(value), 100, NULL);
+ if (!ASSERT_LT(fd, 0, "bpf_map_create bloom filter invalid key size"))
close(fd);
/* Invalid value size */
- fd = bpf_create_map(BPF_MAP_TYPE_BLOOM_FILTER, 0, 0, 100, 0);
- if (!ASSERT_LT(fd, 0, "bpf_create_map bloom filter invalid value size 0"))
+ fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, 0, 100, NULL);
+ if (!ASSERT_LT(fd, 0, "bpf_map_create bloom filter invalid value size 0"))
close(fd);
/* Invalid max entries size */
- fd = bpf_create_map(BPF_MAP_TYPE_BLOOM_FILTER, 0, sizeof(value), 0, 0);
- if (!ASSERT_LT(fd, 0, "bpf_create_map bloom filter invalid max entries size"))
+ fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, sizeof(value), 0, NULL);
+ if (!ASSERT_LT(fd, 0, "bpf_map_create bloom filter invalid max entries size"))
close(fd);
/* Bloom filter maps do not support BPF_F_NO_PREALLOC */
- fd = bpf_create_map(BPF_MAP_TYPE_BLOOM_FILTER, 0, sizeof(value), 100,
- BPF_F_NO_PREALLOC);
- if (!ASSERT_LT(fd, 0, "bpf_create_map bloom filter invalid flags"))
+ opts.map_flags = BPF_F_NO_PREALLOC;
+ fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, sizeof(value), 100, &opts);
+ if (!ASSERT_LT(fd, 0, "bpf_map_create bloom filter invalid flags"))
close(fd);
- fd = bpf_create_map(BPF_MAP_TYPE_BLOOM_FILTER, 0, sizeof(value), 100, 0);
- if (!ASSERT_GE(fd, 0, "bpf_create_map bloom filter"))
+ fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, sizeof(value), 100, NULL);
+ if (!ASSERT_GE(fd, 0, "bpf_map_create bloom filter"))
return;
/* Test invalid flags */
@@ -56,13 +57,14 @@ static void test_fail_cases(void)
static void test_success_cases(void)
{
+ LIBBPF_OPTS(bpf_map_create_opts, opts);
char value[11];
int fd, err;
/* Create a map */
- fd = bpf_create_map(BPF_MAP_TYPE_BLOOM_FILTER, 0, sizeof(value), 100,
- BPF_F_ZERO_SEED | BPF_F_NUMA_NODE);
- if (!ASSERT_GE(fd, 0, "bpf_create_map bloom filter success case"))
+ opts.map_flags = BPF_F_ZERO_SEED | BPF_F_NUMA_NODE;
+ fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, sizeof(value), 100, &opts);
+ if (!ASSERT_GE(fd, 0, "bpf_map_create bloom filter success case"))
return;
/* Add a value to the bloom filter */
@@ -100,9 +102,9 @@ static void test_inner_map(struct bloom_filter_map *skel, const __u32 *rand_vals
struct bpf_link *link;
/* Create a bloom filter map that will be used as the inner map */
- inner_map_fd = bpf_create_map(BPF_MAP_TYPE_BLOOM_FILTER, 0, sizeof(*rand_vals),
- nr_rand_vals, 0);
- if (!ASSERT_GE(inner_map_fd, 0, "bpf_create_map bloom filter inner map"))
+ inner_map_fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, sizeof(*rand_vals),
+ nr_rand_vals, NULL);
+ if (!ASSERT_GE(inner_map_fd, 0, "bpf_map_create bloom filter inner map"))
return;
for (i = 0; i < nr_rand_vals; i++) {
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 9454331aaf85..b84f859b1267 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -469,12 +469,12 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
* fills seq_file buffer and then the other will trigger
* overflow and needs restart.
*/
- map1_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0);
- if (CHECK(map1_fd < 0, "bpf_create_map",
+ map1_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 8, 1, NULL);
+ if (CHECK(map1_fd < 0, "bpf_map_create",
"map_creation failed: %s\n", strerror(errno)))
goto out;
- map2_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0);
- if (CHECK(map2_fd < 0, "bpf_create_map",
+ map2_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 8, 1, NULL);
+ if (CHECK(map2_fd < 0, "bpf_map_create",
"map_creation failed: %s\n", strerror(errno)))
goto free_map1;
@@ -699,14 +699,13 @@ static void test_bpf_percpu_hash_map(void)
char buf[64];
void *val;
- val = malloc(8 * bpf_num_possible_cpus());
-
skel = bpf_iter_bpf_percpu_hash_map__open();
if (CHECK(!skel, "bpf_iter_bpf_percpu_hash_map__open",
"skeleton open failed\n"))
return;
skel->rodata->num_cpus = bpf_num_possible_cpus();
+ val = malloc(8 * bpf_num_possible_cpus());
err = bpf_iter_bpf_percpu_hash_map__load(skel);
if (CHECK(!skel, "bpf_iter_bpf_percpu_hash_map__load",
@@ -770,6 +769,7 @@ free_link:
bpf_link__destroy(link);
out:
bpf_iter_bpf_percpu_hash_map__destroy(skel);
+ free(val);
}
static void test_bpf_array_map(void)
@@ -870,14 +870,13 @@ static void test_bpf_percpu_array_map(void)
void *val;
int len;
- val = malloc(8 * bpf_num_possible_cpus());
-
skel = bpf_iter_bpf_percpu_array_map__open();
if (CHECK(!skel, "bpf_iter_bpf_percpu_array_map__open",
"skeleton open failed\n"))
return;
skel->rodata->num_cpus = bpf_num_possible_cpus();
+ val = malloc(8 * bpf_num_possible_cpus());
err = bpf_iter_bpf_percpu_array_map__load(skel);
if (CHECK(!skel, "bpf_iter_bpf_percpu_array_map__load",
@@ -933,6 +932,7 @@ free_link:
bpf_link__destroy(link);
out:
bpf_iter_bpf_percpu_array_map__destroy(skel);
+ free(val);
}
/* An iterator program deletes all local storage in a map. */
@@ -1206,13 +1206,14 @@ static void test_task_vma(void)
goto out;
/* Read CMP_BUFFER_SIZE (1kB) from bpf_iter. Read in small chunks
- * to trigger seq_file corner cases. The expected output is much
- * longer than 1kB, so the while loop will terminate.
+ * to trigger seq_file corner cases.
*/
len = 0;
while (len < CMP_BUFFER_SIZE) {
err = read_fd_into_buffer(iter_fd, task_vma_output + len,
min(read_size, CMP_BUFFER_SIZE - len));
+ if (!err)
+ break;
if (CHECK(err < 0, "read_iter_fd", "read_iter_fd failed\n"))
goto out;
len += err;
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_loop.c b/tools/testing/selftests/bpf/prog_tests/bpf_loop.c
new file mode 100644
index 000000000000..380d7a2072e3
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_loop.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "bpf_loop.skel.h"
+
+static void check_nr_loops(struct bpf_loop *skel)
+{
+ struct bpf_link *link;
+
+ link = bpf_program__attach(skel->progs.test_prog);
+ if (!ASSERT_OK_PTR(link, "link"))
+ return;
+
+ /* test 0 loops */
+ skel->bss->nr_loops = 0;
+
+ usleep(1);
+
+ ASSERT_EQ(skel->bss->nr_loops_returned, skel->bss->nr_loops,
+ "0 loops");
+
+ /* test 500 loops */
+ skel->bss->nr_loops = 500;
+
+ usleep(1);
+
+ ASSERT_EQ(skel->bss->nr_loops_returned, skel->bss->nr_loops,
+ "500 loops");
+ ASSERT_EQ(skel->bss->g_output, (500 * 499) / 2, "g_output");
+
+ /* test exceeding the max limit */
+ skel->bss->nr_loops = -1;
+
+ usleep(1);
+
+ ASSERT_EQ(skel->bss->err, -E2BIG, "over max limit");
+
+ bpf_link__destroy(link);
+}
+
+static void check_callback_fn_stop(struct bpf_loop *skel)
+{
+ struct bpf_link *link;
+
+ link = bpf_program__attach(skel->progs.test_prog);
+ if (!ASSERT_OK_PTR(link, "link"))
+ return;
+
+ /* testing that loop is stopped when callback_fn returns 1 */
+ skel->bss->nr_loops = 400;
+ skel->data->stop_index = 50;
+
+ usleep(1);
+
+ ASSERT_EQ(skel->bss->nr_loops_returned, skel->data->stop_index + 1,
+ "nr_loops_returned");
+ ASSERT_EQ(skel->bss->g_output, (50 * 49) / 2,
+ "g_output");
+
+ bpf_link__destroy(link);
+}
+
+static void check_null_callback_ctx(struct bpf_loop *skel)
+{
+ struct bpf_link *link;
+
+ /* check that user is able to pass in a null callback_ctx */
+ link = bpf_program__attach(skel->progs.prog_null_ctx);
+ if (!ASSERT_OK_PTR(link, "link"))
+ return;
+
+ skel->bss->nr_loops = 10;
+
+ usleep(1);
+
+ ASSERT_EQ(skel->bss->nr_loops_returned, skel->bss->nr_loops,
+ "nr_loops_returned");
+
+ bpf_link__destroy(link);
+}
+
+static void check_invalid_flags(struct bpf_loop *skel)
+{
+ struct bpf_link *link;
+
+ /* check that passing in non-zero flags returns -EINVAL */
+ link = bpf_program__attach(skel->progs.prog_invalid_flags);
+ if (!ASSERT_OK_PTR(link, "link"))
+ return;
+
+ usleep(1);
+
+ ASSERT_EQ(skel->bss->err, -EINVAL, "err");
+
+ bpf_link__destroy(link);
+}
+
+static void check_nested_calls(struct bpf_loop *skel)
+{
+ __u32 nr_loops = 100, nested_callback_nr_loops = 4;
+ struct bpf_link *link;
+
+ /* check that nested calls are supported */
+ link = bpf_program__attach(skel->progs.prog_nested_calls);
+ if (!ASSERT_OK_PTR(link, "link"))
+ return;
+
+ skel->bss->nr_loops = nr_loops;
+ skel->bss->nested_callback_nr_loops = nested_callback_nr_loops;
+
+ usleep(1);
+
+ ASSERT_EQ(skel->bss->nr_loops_returned, nr_loops * nested_callback_nr_loops
+ * nested_callback_nr_loops, "nr_loops_returned");
+ ASSERT_EQ(skel->bss->g_output, (4 * 3) / 2 * nested_callback_nr_loops
+ * nr_loops, "g_output");
+
+ bpf_link__destroy(link);
+}
+
+void test_bpf_loop(void)
+{
+ struct bpf_loop *skel;
+
+ skel = bpf_loop__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bpf_loop__open_and_load"))
+ return;
+
+ skel->bss->pid = getpid();
+
+ if (test__start_subtest("check_nr_loops"))
+ check_nr_loops(skel);
+ if (test__start_subtest("check_callback_fn_stop"))
+ check_callback_fn_stop(skel);
+ if (test__start_subtest("check_null_callback_ctx"))
+ check_null_callback_ctx(skel);
+ if (test__start_subtest("check_invalid_flags"))
+ check_invalid_flags(skel);
+ if (test__start_subtest("check_nested_calls"))
+ check_nested_calls(skel);
+
+ bpf_loop__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
index eb8eeebe6935..dbe56fa8582d 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
@@ -48,7 +48,7 @@ void serial_test_bpf_obj_id(void)
bzero(zeros, sizeof(zeros));
for (i = 0; i < nr_iters; i++) {
now = time(NULL);
- err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT,
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT,
&objs[i], &prog_fds[i]);
/* test_obj_id.o is a dumb prog. It should never fail
* to load.
@@ -65,8 +65,8 @@ void serial_test_bpf_obj_id(void)
if (CHECK_FAIL(err))
goto done;
- prog = bpf_object__find_program_by_title(objs[i],
- "raw_tp/sys_enter");
+ prog = bpf_object__find_program_by_name(objs[i],
+ "test_obj_id");
if (CHECK_FAIL(!prog))
goto done;
links[i] = bpf_program__attach(prog);
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
index 94e03df69d71..8f7a1cef7d87 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
@@ -217,21 +217,22 @@ static bool found;
static int libbpf_debug_print(enum libbpf_print_level level,
const char *format, va_list args)
{
- char *log_buf;
+ const char *prog_name, *log_buf;
if (level != LIBBPF_WARN ||
- strcmp(format, "libbpf: \n%s\n")) {
+ !strstr(format, "-- BEGIN PROG LOAD LOG --")) {
vprintf(format, args);
return 0;
}
+ prog_name = va_arg(args, char *);
log_buf = va_arg(args, char *);
if (!log_buf)
goto out;
if (err_str && strstr(log_buf, err_str) != NULL)
found = true;
out:
- printf(format, log_buf);
+ printf(format, prog_name, log_buf);
return 0;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
index 27f5d8ea7964..ff6cce9fef06 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
@@ -19,16 +19,28 @@ extern int extra_prog_load_log_flags;
static int check_load(const char *file, enum bpf_prog_type type)
{
- struct bpf_prog_load_attr attr;
struct bpf_object *obj = NULL;
- int err, prog_fd;
-
- memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
- attr.file = file;
- attr.prog_type = type;
- attr.log_level = 4 | extra_prog_load_log_flags;
- attr.prog_flags = BPF_F_TEST_RND_HI32;
- err = bpf_prog_load_xattr(&attr, &obj, &prog_fd);
+ struct bpf_program *prog;
+ int err;
+
+ obj = bpf_object__open_file(file, NULL);
+ err = libbpf_get_error(obj);
+ if (err)
+ return err;
+
+ prog = bpf_object__next_program(obj, NULL);
+ if (!prog) {
+ err = -ENOENT;
+ goto err_out;
+ }
+
+ bpf_program__set_type(prog, type);
+ bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32);
+ bpf_program__set_log_level(prog, 4 | extra_prog_load_log_flags);
+
+ err = bpf_object__load(obj);
+
+err_out:
bpf_object__close(obj);
return err;
}
@@ -115,6 +127,12 @@ void test_verif_scale_pyperf600()
scale_test("pyperf600.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
}
+void test_verif_scale_pyperf600_bpf_loop(void)
+{
+ /* use the bpf_loop helper*/
+ scale_test("pyperf600_bpf_loop.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+}
+
void test_verif_scale_pyperf600_nounroll()
{
/* no unroll at all.
@@ -165,6 +183,12 @@ void test_verif_scale_strobemeta()
scale_test("strobemeta.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
}
+void test_verif_scale_strobemeta_bpf_loop(void)
+{
+ /* use the bpf_loop helper*/
+ scale_test("strobemeta_bpf_loop.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+}
+
void test_verif_scale_strobemeta_nounroll1()
{
/* no unroll, tiny loops */
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index ac596cb06e40..8ba53acf9eb4 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -22,7 +22,6 @@
#include <bpf/libbpf.h>
#include <bpf/btf.h>
-#include "bpf_rlimit.h"
#include "bpf_util.h"
#include "../test_btf.h"
#include "test_progs.h"
@@ -3939,6 +3938,23 @@ static struct btf_raw_test raw_tests[] = {
.btf_load_err = true,
.err_str = "Invalid component_idx",
},
+{
+ .descr = "type_tag test #1",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_TAG_ENC(NAME_TBD, 1), /* [2] */
+ BTF_PTR_ENC(2), /* [3] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 1,
+},
}; /* struct btf_raw_test raw_tests[] */
@@ -4046,20 +4062,40 @@ static void *btf_raw_create(const struct btf_header *hdr,
next_str_idx < strs_cnt ? strs_idx[next_str_idx] : NULL;
done:
+ free(strs_idx);
if (err) {
- if (raw_btf)
- free(raw_btf);
- if (strs_idx)
- free(strs_idx);
+ free(raw_btf);
return NULL;
}
return raw_btf;
}
+static int load_raw_btf(const void *raw_data, size_t raw_size)
+{
+ LIBBPF_OPTS(bpf_btf_load_opts, opts);
+ int btf_fd;
+
+ if (always_log) {
+ opts.log_buf = btf_log_buf,
+ opts.log_size = BTF_LOG_BUF_SIZE,
+ opts.log_level = 1;
+ }
+
+ btf_fd = bpf_btf_load(raw_data, raw_size, &opts);
+ if (btf_fd < 0 && !always_log) {
+ opts.log_buf = btf_log_buf,
+ opts.log_size = BTF_LOG_BUF_SIZE,
+ opts.log_level = 1;
+ btf_fd = bpf_btf_load(raw_data, raw_size, &opts);
+ }
+
+ return btf_fd;
+}
+
static void do_test_raw(unsigned int test_num)
{
struct btf_raw_test *test = &raw_tests[test_num - 1];
- struct bpf_create_map_attr create_attr = {};
+ LIBBPF_OPTS(bpf_map_create_opts, opts);
int map_fd = -1, btf_fd = -1;
unsigned int raw_btf_size;
struct btf_header *hdr;
@@ -4085,16 +4121,14 @@ static void do_test_raw(unsigned int test_num)
hdr->str_len = (int)hdr->str_len + test->str_len_delta;
*btf_log_buf = '\0';
- btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
- btf_log_buf, BTF_LOG_BUF_SIZE,
- always_log);
+ btf_fd = load_raw_btf(raw_btf, raw_btf_size);
free(raw_btf);
err = ((btf_fd < 0) != test->btf_load_err);
if (CHECK(err, "btf_fd:%d test->btf_load_err:%u",
btf_fd, test->btf_load_err) ||
CHECK(test->err_str && !strstr(btf_log_buf, test->err_str),
- "expected err_str:%s", test->err_str)) {
+ "expected err_str:%s\n", test->err_str)) {
err = -1;
goto done;
}
@@ -4102,16 +4136,11 @@ static void do_test_raw(unsigned int test_num)
if (err || btf_fd < 0)
goto done;
- create_attr.name = test->map_name;
- create_attr.map_type = test->map_type;
- create_attr.key_size = test->key_size;
- create_attr.value_size = test->value_size;
- create_attr.max_entries = test->max_entries;
- create_attr.btf_fd = btf_fd;
- create_attr.btf_key_type_id = test->key_type_id;
- create_attr.btf_value_type_id = test->value_type_id;
-
- map_fd = bpf_create_map_xattr(&create_attr);
+ opts.btf_fd = btf_fd;
+ opts.btf_key_type_id = test->key_type_id;
+ opts.btf_value_type_id = test->value_type_id;
+ map_fd = bpf_map_create(test->map_type, test->map_name,
+ test->key_size, test->value_size, test->max_entries, &opts);
err = ((map_fd < 0) != test->map_create_err);
CHECK(err, "map_fd:%d test->map_create_err:%u",
@@ -4217,9 +4246,7 @@ static int test_big_btf_info(unsigned int test_num)
goto done;
}
- btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
- btf_log_buf, BTF_LOG_BUF_SIZE,
- always_log);
+ btf_fd = load_raw_btf(raw_btf, raw_btf_size);
if (CHECK(btf_fd < 0, "errno:%d", errno)) {
err = -1;
goto done;
@@ -4275,7 +4302,7 @@ done:
static int test_btf_id(unsigned int test_num)
{
const struct btf_get_info_test *test = &get_info_tests[test_num - 1];
- struct bpf_create_map_attr create_attr = {};
+ LIBBPF_OPTS(bpf_map_create_opts, opts);
uint8_t *raw_btf = NULL, *user_btf[2] = {};
int btf_fd[2] = {-1, -1}, map_fd = -1;
struct bpf_map_info map_info = {};
@@ -4305,9 +4332,7 @@ static int test_btf_id(unsigned int test_num)
info[i].btf_size = raw_btf_size;
}
- btf_fd[0] = bpf_load_btf(raw_btf, raw_btf_size,
- btf_log_buf, BTF_LOG_BUF_SIZE,
- always_log);
+ btf_fd[0] = load_raw_btf(raw_btf, raw_btf_size);
if (CHECK(btf_fd[0] < 0, "errno:%d", errno)) {
err = -1;
goto done;
@@ -4340,16 +4365,11 @@ static int test_btf_id(unsigned int test_num)
}
/* Test btf members in struct bpf_map_info */
- create_attr.name = "test_btf_id";
- create_attr.map_type = BPF_MAP_TYPE_ARRAY;
- create_attr.key_size = sizeof(int);
- create_attr.value_size = sizeof(unsigned int);
- create_attr.max_entries = 4;
- create_attr.btf_fd = btf_fd[0];
- create_attr.btf_key_type_id = 1;
- create_attr.btf_value_type_id = 2;
-
- map_fd = bpf_create_map_xattr(&create_attr);
+ opts.btf_fd = btf_fd[0];
+ opts.btf_key_type_id = 1;
+ opts.btf_value_type_id = 2;
+ map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "test_btf_id",
+ sizeof(int), sizeof(int), 4, &opts);
if (CHECK(map_fd < 0, "errno:%d", errno)) {
err = -1;
goto done;
@@ -4442,9 +4462,7 @@ static void do_test_get_info(unsigned int test_num)
goto done;
}
- btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
- btf_log_buf, BTF_LOG_BUF_SIZE,
- always_log);
+ btf_fd = load_raw_btf(raw_btf, raw_btf_size);
if (CHECK(btf_fd <= 0, "errno:%d", errno)) {
err = -1;
goto done;
@@ -5138,7 +5156,7 @@ static void do_test_pprint(int test_num)
{
const struct btf_raw_test *test = &pprint_test_template[test_num];
enum pprint_mapv_kind_t mapv_kind = test->mapv_kind;
- struct bpf_create_map_attr create_attr = {};
+ LIBBPF_OPTS(bpf_map_create_opts, opts);
bool ordered_map, lossless_map, percpu_map;
int err, ret, num_cpus, rounded_value_size;
unsigned int key, nr_read_elems;
@@ -5164,26 +5182,19 @@ static void do_test_pprint(int test_num)
return;
*btf_log_buf = '\0';
- btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
- btf_log_buf, BTF_LOG_BUF_SIZE,
- always_log);
+ btf_fd = load_raw_btf(raw_btf, raw_btf_size);
free(raw_btf);
- if (CHECK(btf_fd < 0, "errno:%d", errno)) {
+ if (CHECK(btf_fd < 0, "errno:%d\n", errno)) {
err = -1;
goto done;
}
- create_attr.name = test->map_name;
- create_attr.map_type = test->map_type;
- create_attr.key_size = test->key_size;
- create_attr.value_size = test->value_size;
- create_attr.max_entries = test->max_entries;
- create_attr.btf_fd = btf_fd;
- create_attr.btf_key_type_id = test->key_type_id;
- create_attr.btf_value_type_id = test->value_type_id;
-
- map_fd = bpf_create_map_xattr(&create_attr);
+ opts.btf_fd = btf_fd;
+ opts.btf_key_type_id = test->key_type_id;
+ opts.btf_value_type_id = test->value_type_id;
+ map_fd = bpf_map_create(test->map_type, test->map_name,
+ test->key_size, test->value_size, test->max_entries, &opts);
if (CHECK(map_fd < 0, "errno:%d", errno)) {
err = -1;
goto done;
@@ -6538,9 +6549,7 @@ static void do_test_info_raw(unsigned int test_num)
return;
*btf_log_buf = '\0';
- btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
- btf_log_buf, BTF_LOG_BUF_SIZE,
- always_log);
+ btf_fd = load_raw_btf(raw_btf, raw_btf_size);
free(raw_btf);
if (CHECK(btf_fd < 0, "invalid btf_fd errno:%d", errno)) {
@@ -6629,7 +6638,7 @@ struct btf_dedup_test {
struct btf_dedup_opts opts;
};
-const struct btf_dedup_test dedup_tests[] = {
+static struct btf_dedup_test dedup_tests[] = {
{
.descr = "dedup: unused strings filtering",
@@ -6649,9 +6658,6 @@ const struct btf_dedup_test dedup_tests[] = {
},
BTF_STR_SEC("\0int\0long"),
},
- .opts = {
- .dont_resolve_fwds = false,
- },
},
{
.descr = "dedup: strings deduplication",
@@ -6674,9 +6680,6 @@ const struct btf_dedup_test dedup_tests[] = {
},
BTF_STR_SEC("\0int\0long int"),
},
- .opts = {
- .dont_resolve_fwds = false,
- },
},
{
.descr = "dedup: struct example #1",
@@ -6757,9 +6760,6 @@ const struct btf_dedup_test dedup_tests[] = {
},
BTF_STR_SEC("\0a\0b\0c\0d\0int\0float\0next\0s"),
},
- .opts = {
- .dont_resolve_fwds = false,
- },
},
{
.descr = "dedup: struct <-> fwd resolution w/ hash collision",
@@ -6802,8 +6802,7 @@ const struct btf_dedup_test dedup_tests[] = {
BTF_STR_SEC("\0s\0x"),
},
.opts = {
- .dont_resolve_fwds = false,
- .dedup_table_size = 1, /* force hash collisions */
+ .force_collisions = true, /* force hash collisions */
},
},
{
@@ -6849,8 +6848,7 @@ const struct btf_dedup_test dedup_tests[] = {
BTF_STR_SEC("\0s\0x"),
},
.opts = {
- .dont_resolve_fwds = false,
- .dedup_table_size = 1, /* force hash collisions */
+ .force_collisions = true, /* force hash collisions */
},
},
{
@@ -6874,15 +6872,16 @@ const struct btf_dedup_test dedup_tests[] = {
BTF_RESTRICT_ENC(8), /* [11] restrict */
BTF_FUNC_PROTO_ENC(1, 2), /* [12] func_proto */
BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
- BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 8),
+ BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 18),
BTF_FUNC_ENC(NAME_TBD, 12), /* [13] func */
BTF_TYPE_FLOAT_ENC(NAME_TBD, 2), /* [14] float */
BTF_DECL_TAG_ENC(NAME_TBD, 13, -1), /* [15] decl_tag */
BTF_DECL_TAG_ENC(NAME_TBD, 13, 1), /* [16] decl_tag */
BTF_DECL_TAG_ENC(NAME_TBD, 7, -1), /* [17] decl_tag */
+ BTF_TYPE_TAG_ENC(NAME_TBD, 8), /* [18] type_tag */
BTF_END_RAW,
},
- BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q"),
+ BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q\0R"),
},
.expect = {
.raw_types = {
@@ -6903,18 +6902,16 @@ const struct btf_dedup_test dedup_tests[] = {
BTF_RESTRICT_ENC(8), /* [11] restrict */
BTF_FUNC_PROTO_ENC(1, 2), /* [12] func_proto */
BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
- BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 8),
+ BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 18),
BTF_FUNC_ENC(NAME_TBD, 12), /* [13] func */
BTF_TYPE_FLOAT_ENC(NAME_TBD, 2), /* [14] float */
BTF_DECL_TAG_ENC(NAME_TBD, 13, -1), /* [15] decl_tag */
BTF_DECL_TAG_ENC(NAME_TBD, 13, 1), /* [16] decl_tag */
BTF_DECL_TAG_ENC(NAME_TBD, 7, -1), /* [17] decl_tag */
+ BTF_TYPE_TAG_ENC(NAME_TBD, 8), /* [18] type_tag */
BTF_END_RAW,
},
- BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q"),
- },
- .opts = {
- .dont_resolve_fwds = false,
+ BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q\0R"),
},
},
{
@@ -6967,9 +6964,6 @@ const struct btf_dedup_test dedup_tests[] = {
},
BTF_STR_SEC("\0int\0some other int\0float"),
},
- .opts = {
- .dont_resolve_fwds = false,
- },
},
{
.descr = "dedup: enum fwd resolution",
@@ -7011,9 +7005,6 @@ const struct btf_dedup_test dedup_tests[] = {
},
BTF_STR_SEC("\0e1\0e1_val\0e2\0e2_val"),
},
- .opts = {
- .dont_resolve_fwds = false,
- },
},
{
.descr = "dedup: datasec and vars pass-through",
@@ -7056,8 +7047,7 @@ const struct btf_dedup_test dedup_tests[] = {
BTF_STR_SEC("\0.bss\0t"),
},
.opts = {
- .dont_resolve_fwds = false,
- .dedup_table_size = 1
+ .force_collisions = true
},
},
{
@@ -7101,9 +7091,6 @@ const struct btf_dedup_test dedup_tests[] = {
},
BTF_STR_SEC("\0t\0a1\0a2\0f\0tag"),
},
- .opts = {
- .dont_resolve_fwds = false,
- },
},
{
.descr = "dedup: func/func_param tags",
@@ -7154,9 +7141,6 @@ const struct btf_dedup_test dedup_tests[] = {
},
BTF_STR_SEC("\0a1\0a2\0f\0tag1\0tag2\0tag3"),
},
- .opts = {
- .dont_resolve_fwds = false,
- },
},
{
.descr = "dedup: struct/struct_member tags",
@@ -7202,9 +7186,6 @@ const struct btf_dedup_test dedup_tests[] = {
},
BTF_STR_SEC("\0t\0m1\0m2\0tag1\0tag2\0tag3"),
},
- .opts = {
- .dont_resolve_fwds = false,
- },
},
{
.descr = "dedup: typedef tags",
@@ -7235,8 +7216,160 @@ const struct btf_dedup_test dedup_tests[] = {
},
BTF_STR_SEC("\0t\0tag1\0tag2\0tag3"),
},
- .opts = {
- .dont_resolve_fwds = false,
+},
+{
+ .descr = "dedup: btf_type_tag #1",
+ .input = {
+ .raw_types = {
+ /* ptr -> tag2 -> tag1 -> int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(2), 2), /* [3] */
+ BTF_PTR_ENC(3), /* [4] */
+ /* ptr -> tag2 -> tag1 -> int */
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [5] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(2), 5), /* [6] */
+ BTF_PTR_ENC(6), /* [7] */
+ /* ptr -> tag1 -> int */
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [8] */
+ BTF_PTR_ENC(8), /* [9] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag1\0tag2"),
+ },
+ .expect = {
+ .raw_types = {
+ /* ptr -> tag2 -> tag1 -> int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(2), 2), /* [3] */
+ BTF_PTR_ENC(3), /* [4] */
+ /* ptr -> tag1 -> int */
+ BTF_PTR_ENC(2), /* [5] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag1\0tag2"),
+ },
+},
+{
+ .descr = "dedup: btf_type_tag #2",
+ .input = {
+ .raw_types = {
+ /* ptr -> tag2 -> tag1 -> int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(2), 2), /* [3] */
+ BTF_PTR_ENC(3), /* [4] */
+ /* ptr -> tag2 -> int */
+ BTF_TYPE_TAG_ENC(NAME_NTH(2), 1), /* [5] */
+ BTF_PTR_ENC(5), /* [6] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag1\0tag2"),
+ },
+ .expect = {
+ .raw_types = {
+ /* ptr -> tag2 -> tag1 -> int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(2), 2), /* [3] */
+ BTF_PTR_ENC(3), /* [4] */
+ /* ptr -> tag2 -> int */
+ BTF_TYPE_TAG_ENC(NAME_NTH(2), 1), /* [5] */
+ BTF_PTR_ENC(5), /* [6] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag1\0tag2"),
+ },
+},
+{
+ .descr = "dedup: btf_type_tag #3",
+ .input = {
+ .raw_types = {
+ /* ptr -> tag2 -> tag1 -> int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(2), 2), /* [3] */
+ BTF_PTR_ENC(3), /* [4] */
+ /* ptr -> tag1 -> tag2 -> int */
+ BTF_TYPE_TAG_ENC(NAME_NTH(2), 1), /* [5] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 5), /* [6] */
+ BTF_PTR_ENC(6), /* [7] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag1\0tag2"),
+ },
+ .expect = {
+ .raw_types = {
+ /* ptr -> tag2 -> tag1 -> int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(2), 2), /* [3] */
+ BTF_PTR_ENC(3), /* [4] */
+ /* ptr -> tag1 -> tag2 -> int */
+ BTF_TYPE_TAG_ENC(NAME_NTH(2), 1), /* [5] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 5), /* [6] */
+ BTF_PTR_ENC(6), /* [7] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag1\0tag2"),
+ },
+},
+{
+ .descr = "dedup: btf_type_tag #4",
+ .input = {
+ .raw_types = {
+ /* ptr -> tag1 -> int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */
+ BTF_PTR_ENC(2), /* [3] */
+ /* ptr -> tag1 -> long */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 64, 8), /* [4] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 4), /* [5] */
+ BTF_PTR_ENC(5), /* [6] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag1"),
+ },
+ .expect = {
+ .raw_types = {
+ /* ptr -> tag1 -> int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */
+ BTF_PTR_ENC(2), /* [3] */
+ /* ptr -> tag1 -> long */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 64, 8), /* [4] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 4), /* [5] */
+ BTF_PTR_ENC(5), /* [6] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag1"),
+ },
+},
+{
+ .descr = "dedup: btf_type_tag #5, struct",
+ .input = {
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */
+ BTF_TYPE_ENC(NAME_NTH(2), BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 1), 4), /* [3] */
+ BTF_MEMBER_ENC(NAME_NTH(3), 2, BTF_MEMBER_OFFSET(0, 0)),
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [4] */
+ BTF_TYPE_ENC(NAME_NTH(2), BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 1), 4), /* [5] */
+ BTF_MEMBER_ENC(NAME_NTH(3), 4, BTF_MEMBER_OFFSET(0, 0)),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag1\0t\0m"),
+ },
+ .expect = {
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */
+ BTF_TYPE_ENC(NAME_NTH(2), BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 1), 4), /* [3] */
+ BTF_MEMBER_ENC(NAME_NTH(3), 2, BTF_MEMBER_OFFSET(0, 0)),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag1\0t\0m"),
},
},
@@ -7257,6 +7390,7 @@ static int btf_type_size(const struct btf_type *t)
case BTF_KIND_TYPEDEF:
case BTF_KIND_FUNC:
case BTF_KIND_FLOAT:
+ case BTF_KIND_TYPE_TAG:
return base_size;
case BTF_KIND_INT:
return base_size + sizeof(__u32);
@@ -7295,7 +7429,7 @@ static void dump_btf_strings(const char *strs, __u32 len)
static void do_test_dedup(unsigned int test_num)
{
- const struct btf_dedup_test *test = &dedup_tests[test_num - 1];
+ struct btf_dedup_test *test = &dedup_tests[test_num - 1];
__u32 test_nr_types, expect_nr_types, test_btf_size, expect_btf_size;
const struct btf_header *test_hdr, *expect_hdr;
struct btf *test_btf = NULL, *expect_btf = NULL;
@@ -7339,7 +7473,8 @@ static void do_test_dedup(unsigned int test_num)
goto done;
}
- err = btf__dedup(test_btf, NULL, &test->opts);
+ test->opts.sz = sizeof(test->opts);
+ err = btf__dedup(test_btf, &test->opts);
if (CHECK(err, "btf_dedup failed errno:%d", err)) {
err = -1;
goto done;
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c b/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c
index 64554fd33547..90aac437576d 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c
@@ -92,7 +92,7 @@ struct s2 {\n\
int *f3;\n\
};\n\n", "c_dump");
- err = btf__dedup(btf2, NULL, NULL);
+ err = btf__dedup(btf2, NULL);
if (!ASSERT_OK(err, "btf_dedup"))
goto cleanup;
@@ -186,7 +186,7 @@ static void test_split_fwd_resolve() {
"\t'f1' type_id=7 bits_offset=0\n"
"\t'f2' type_id=9 bits_offset=64");
- err = btf__dedup(btf2, NULL, NULL);
+ err = btf__dedup(btf2, NULL);
if (!ASSERT_OK(err, "btf_dedup"))
goto cleanup;
@@ -283,7 +283,7 @@ static void test_split_struct_duped() {
"[13] STRUCT 's3' size=8 vlen=1\n"
"\t'f1' type_id=12 bits_offset=0");
- err = btf__dedup(btf2, NULL, NULL);
+ err = btf__dedup(btf2, NULL);
if (!ASSERT_OK(err, "btf_dedup"))
goto cleanup;
@@ -314,6 +314,117 @@ cleanup:
btf__free(btf1);
}
+static void btf_add_dup_struct_in_cu(struct btf *btf, int start_id)
+{
+#define ID(n) (start_id + n)
+ btf__set_pointer_size(btf, 8); /* enforce 64-bit arch */
+
+ btf__add_int(btf, "int", 4, BTF_INT_SIGNED); /* [1] int */
+
+ btf__add_struct(btf, "s", 8); /* [2] struct s { */
+ btf__add_field(btf, "a", ID(3), 0, 0); /* struct anon a; */
+ btf__add_field(btf, "b", ID(4), 0, 0); /* struct anon b; */
+ /* } */
+
+ btf__add_struct(btf, "(anon)", 8); /* [3] struct anon { */
+ btf__add_field(btf, "f1", ID(1), 0, 0); /* int f1; */
+ btf__add_field(btf, "f2", ID(1), 32, 0); /* int f2; */
+ /* } */
+
+ btf__add_struct(btf, "(anon)", 8); /* [4] struct anon { */
+ btf__add_field(btf, "f1", ID(1), 0, 0); /* int f1; */
+ btf__add_field(btf, "f2", ID(1), 32, 0); /* int f2; */
+ /* } */
+#undef ID
+}
+
+static void test_split_dup_struct_in_cu()
+{
+ struct btf *btf1, *btf2 = NULL;
+ int err;
+
+ /* generate the base data.. */
+ btf1 = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf1, "empty_main_btf"))
+ return;
+
+ btf_add_dup_struct_in_cu(btf1, 0);
+
+ VALIDATE_RAW_BTF(
+ btf1,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] STRUCT 's' size=8 vlen=2\n"
+ "\t'a' type_id=3 bits_offset=0\n"
+ "\t'b' type_id=4 bits_offset=0",
+ "[3] STRUCT '(anon)' size=8 vlen=2\n"
+ "\t'f1' type_id=1 bits_offset=0\n"
+ "\t'f2' type_id=1 bits_offset=32",
+ "[4] STRUCT '(anon)' size=8 vlen=2\n"
+ "\t'f1' type_id=1 bits_offset=0\n"
+ "\t'f2' type_id=1 bits_offset=32");
+
+ /* ..dedup them... */
+ err = btf__dedup(btf1, NULL);
+ if (!ASSERT_OK(err, "btf_dedup"))
+ goto cleanup;
+
+ VALIDATE_RAW_BTF(
+ btf1,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] STRUCT 's' size=8 vlen=2\n"
+ "\t'a' type_id=3 bits_offset=0\n"
+ "\t'b' type_id=3 bits_offset=0",
+ "[3] STRUCT '(anon)' size=8 vlen=2\n"
+ "\t'f1' type_id=1 bits_offset=0\n"
+ "\t'f2' type_id=1 bits_offset=32");
+
+ /* and add the same data on top of it */
+ btf2 = btf__new_empty_split(btf1);
+ if (!ASSERT_OK_PTR(btf2, "empty_split_btf"))
+ goto cleanup;
+
+ btf_add_dup_struct_in_cu(btf2, 3);
+
+ VALIDATE_RAW_BTF(
+ btf2,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] STRUCT 's' size=8 vlen=2\n"
+ "\t'a' type_id=3 bits_offset=0\n"
+ "\t'b' type_id=3 bits_offset=0",
+ "[3] STRUCT '(anon)' size=8 vlen=2\n"
+ "\t'f1' type_id=1 bits_offset=0\n"
+ "\t'f2' type_id=1 bits_offset=32",
+ "[4] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[5] STRUCT 's' size=8 vlen=2\n"
+ "\t'a' type_id=6 bits_offset=0\n"
+ "\t'b' type_id=7 bits_offset=0",
+ "[6] STRUCT '(anon)' size=8 vlen=2\n"
+ "\t'f1' type_id=4 bits_offset=0\n"
+ "\t'f2' type_id=4 bits_offset=32",
+ "[7] STRUCT '(anon)' size=8 vlen=2\n"
+ "\t'f1' type_id=4 bits_offset=0\n"
+ "\t'f2' type_id=4 bits_offset=32");
+
+ err = btf__dedup(btf2, NULL);
+ if (!ASSERT_OK(err, "btf_dedup"))
+ goto cleanup;
+
+ /* after dedup it should match the original data */
+ VALIDATE_RAW_BTF(
+ btf2,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] STRUCT 's' size=8 vlen=2\n"
+ "\t'a' type_id=3 bits_offset=0\n"
+ "\t'b' type_id=3 bits_offset=0",
+ "[3] STRUCT '(anon)' size=8 vlen=2\n"
+ "\t'f1' type_id=1 bits_offset=0\n"
+ "\t'f2' type_id=1 bits_offset=32");
+
+cleanup:
+ btf__free(btf2);
+ btf__free(btf1);
+}
+
void test_btf_dedup_split()
{
if (test__start_subtest("split_simple"))
@@ -322,4 +433,6 @@ void test_btf_dedup_split()
test_split_struct_duped();
if (test__start_subtest("split_fwd_resolve"))
test_split_fwd_resolve();
+ if (test__start_subtest("split_dup_struct_in_cu"))
+ test_split_dup_struct_in_cu();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
index aa76360d8f49..9e26903f9170 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
@@ -13,25 +13,23 @@ static struct btf_dump_test_case {
const char *name;
const char *file;
bool known_ptr_sz;
- struct btf_dump_opts opts;
} btf_dump_test_cases[] = {
- {"btf_dump: syntax", "btf_dump_test_case_syntax", true, {}},
- {"btf_dump: ordering", "btf_dump_test_case_ordering", false, {}},
- {"btf_dump: padding", "btf_dump_test_case_padding", true, {}},
- {"btf_dump: packing", "btf_dump_test_case_packing", true, {}},
- {"btf_dump: bitfields", "btf_dump_test_case_bitfields", true, {}},
- {"btf_dump: multidim", "btf_dump_test_case_multidim", false, {}},
- {"btf_dump: namespacing", "btf_dump_test_case_namespacing", false, {}},
+ {"btf_dump: syntax", "btf_dump_test_case_syntax", true},
+ {"btf_dump: ordering", "btf_dump_test_case_ordering", false},
+ {"btf_dump: padding", "btf_dump_test_case_padding", true},
+ {"btf_dump: packing", "btf_dump_test_case_packing", true},
+ {"btf_dump: bitfields", "btf_dump_test_case_bitfields", true},
+ {"btf_dump: multidim", "btf_dump_test_case_multidim", false},
+ {"btf_dump: namespacing", "btf_dump_test_case_namespacing", false},
};
-static int btf_dump_all_types(const struct btf *btf,
- const struct btf_dump_opts *opts)
+static int btf_dump_all_types(const struct btf *btf, void *ctx)
{
size_t type_cnt = btf__type_cnt(btf);
struct btf_dump *d;
int err = 0, id;
- d = btf_dump__new(btf, NULL, opts, btf_dump_printf);
+ d = btf_dump__new(btf, btf_dump_printf, ctx, NULL);
err = libbpf_get_error(d);
if (err)
return err;
@@ -88,8 +86,7 @@ static int test_btf_dump_case(int n, struct btf_dump_test_case *t)
goto done;
}
- t->opts.ctx = f;
- err = btf_dump_all_types(btf, &t->opts);
+ err = btf_dump_all_types(btf, f);
fclose(f);
close(fd);
if (CHECK(err, "btf_dump", "failure during C dumping: %d\n", err)) {
@@ -137,7 +134,6 @@ static void test_btf_dump_incremental(void)
{
struct btf *btf = NULL;
struct btf_dump *d = NULL;
- struct btf_dump_opts opts;
int id, err, i;
dump_buf_file = open_memstream(&dump_buf, &dump_buf_sz);
@@ -146,8 +142,7 @@ static void test_btf_dump_incremental(void)
btf = btf__new_empty();
if (!ASSERT_OK_PTR(btf, "new_empty"))
goto err_out;
- opts.ctx = dump_buf_file;
- d = btf_dump__new(btf, NULL, &opts, btf_dump_printf);
+ d = btf_dump__new(btf, btf_dump_printf, dump_buf_file, NULL);
if (!ASSERT_OK(libbpf_get_error(d), "btf_dump__new"))
goto err_out;
@@ -328,7 +323,7 @@ static void test_btf_dump_int_data(struct btf *btf, struct btf_dump *d,
char *str)
{
#ifdef __SIZEOF_INT128__
- __int128 i = 0xffffffffffffffff;
+ unsigned __int128 i = 0xffffffffffffffff;
/* this dance is required because we cannot directly initialize
* a 128-bit value to anything larger than a 64-bit value.
@@ -761,7 +756,7 @@ static void test_btf_dump_struct_data(struct btf *btf, struct btf_dump *d,
/* overflow bpf_sock_ops struct with final element nonzero/zero.
* Regardless of the value of the final field, we don't have all the
* data we need to display it, so we should trigger an overflow.
- * In other words oveflow checking should trump "is field zero?"
+ * In other words overflow checking should trump "is field zero?"
* checks because if we've overflowed, it shouldn't matter what the
* field is - we can't trust its value so shouldn't display it.
*/
@@ -814,26 +809,28 @@ static void test_btf_datasec(struct btf *btf, struct btf_dump *d, char *str,
static void test_btf_dump_datasec_data(char *str)
{
- struct btf *btf = btf__parse("xdping_kern.o", NULL);
- struct btf_dump_opts opts = { .ctx = str };
+ struct btf *btf;
char license[4] = "GPL";
struct btf_dump *d;
+ btf = btf__parse("xdping_kern.o", NULL);
if (!ASSERT_OK_PTR(btf, "xdping_kern.o BTF not found"))
return;
- d = btf_dump__new(btf, NULL, &opts, btf_dump_snprintf);
+ d = btf_dump__new(btf, btf_dump_snprintf, str, NULL);
if (!ASSERT_OK_PTR(d, "could not create BTF dump"))
- return;
+ goto out;
test_btf_datasec(btf, d, str, "license",
"SEC(\"license\") char[4] _license = (char[4])['G','P','L',];",
license, sizeof(license));
+out:
+ btf_dump__free(d);
+ btf__free(btf);
}
void test_btf_dump() {
char str[STRSIZE];
- struct btf_dump_opts opts = { .ctx = str };
struct btf_dump *d;
struct btf *btf;
int i;
@@ -853,7 +850,7 @@ void test_btf_dump() {
if (!ASSERT_OK_PTR(btf, "no kernel BTF found"))
return;
- d = btf_dump__new(btf, NULL, &opts, btf_dump_snprintf);
+ d = btf_dump__new(btf, btf_dump_snprintf, str, NULL);
if (!ASSERT_OK_PTR(d, "could not create BTF dump"))
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c
index 762f6a9da8b5..664ffc0364f4 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c
@@ -90,7 +90,7 @@ static void print_err_line(void)
static void test_conn(void)
{
- int listen_fd = -1, cli_fd = -1, err;
+ int listen_fd = -1, cli_fd = -1, srv_fd = -1, err;
socklen_t addrlen = sizeof(srv_sa6);
int srv_port;
@@ -112,6 +112,10 @@ static void test_conn(void)
if (CHECK_FAIL(cli_fd == -1))
goto done;
+ srv_fd = accept(listen_fd, NULL, NULL);
+ if (CHECK_FAIL(srv_fd == -1))
+ goto done;
+
if (CHECK(skel->bss->listen_tp_sport != srv_port ||
skel->bss->req_sk_sport != srv_port,
"Unexpected sk src port",
@@ -134,11 +138,13 @@ done:
close(listen_fd);
if (cli_fd != -1)
close(cli_fd);
+ if (srv_fd != -1)
+ close(srv_fd);
}
static void test_syncookie(void)
{
- int listen_fd = -1, cli_fd = -1, err;
+ int listen_fd = -1, cli_fd = -1, srv_fd = -1, err;
socklen_t addrlen = sizeof(srv_sa6);
int srv_port;
@@ -161,6 +167,10 @@ static void test_syncookie(void)
if (CHECK_FAIL(cli_fd == -1))
goto done;
+ srv_fd = accept(listen_fd, NULL, NULL);
+ if (CHECK_FAIL(srv_fd == -1))
+ goto done;
+
if (CHECK(skel->bss->listen_tp_sport != srv_port,
"Unexpected tp src port",
"listen_tp_sport:%u expected:%u\n",
@@ -188,6 +198,8 @@ done:
close(listen_fd);
if (cli_fd != -1)
close(cli_fd);
+ if (srv_fd != -1)
+ close(srv_fd);
}
struct test {
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_split.c b/tools/testing/selftests/bpf/prog_tests/btf_split.c
index b1ffe61f2aa9..eef1158676ed 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_split.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_split.c
@@ -13,7 +13,6 @@ static void btf_dump_printf(void *ctx, const char *fmt, va_list args)
}
void test_btf_split() {
- struct btf_dump_opts opts;
struct btf_dump *d = NULL;
const struct btf_type *t;
struct btf *btf1, *btf2;
@@ -68,8 +67,7 @@ void test_btf_split() {
dump_buf_file = open_memstream(&dump_buf, &dump_buf_sz);
if (!ASSERT_OK_PTR(dump_buf_file, "dump_memstream"))
return;
- opts.ctx = dump_buf_file;
- d = btf_dump__new(btf2, NULL, &opts, btf_dump_printf);
+ d = btf_dump__new(btf2, btf_dump_printf, dump_buf_file, NULL);
if (!ASSERT_OK_PTR(d, "btf_dump__new"))
goto cleanup;
for (i = 1; i < btf__type_cnt(btf2); i++) {
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_tag.c b/tools/testing/selftests/bpf/prog_tests/btf_tag.c
index 91821f42714d..88d63e23e35f 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_tag.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_tag.c
@@ -1,20 +1,50 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2021 Facebook */
#include <test_progs.h>
-#include "tag.skel.h"
+#include "btf_decl_tag.skel.h"
-void test_btf_tag(void)
+/* struct btf_type_tag_test is referenced in btf_type_tag.skel.h */
+struct btf_type_tag_test {
+ int **p;
+};
+#include "btf_type_tag.skel.h"
+
+static void test_btf_decl_tag(void)
+{
+ struct btf_decl_tag *skel;
+
+ skel = btf_decl_tag__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "btf_decl_tag"))
+ return;
+
+ if (skel->rodata->skip_tests) {
+ printf("%s:SKIP: btf_decl_tag attribute not supported", __func__);
+ test__skip();
+ }
+
+ btf_decl_tag__destroy(skel);
+}
+
+static void test_btf_type_tag(void)
{
- struct tag *skel;
+ struct btf_type_tag *skel;
- skel = tag__open_and_load();
- if (!ASSERT_OK_PTR(skel, "btf_tag"))
+ skel = btf_type_tag__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "btf_type_tag"))
return;
if (skel->rodata->skip_tests) {
- printf("%s:SKIP: btf_tag attribute not supported", __func__);
+ printf("%s:SKIP: btf_type_tag attribute not supported", __func__);
test__skip();
}
- tag__destroy(skel);
+ btf_type_tag__destroy(skel);
+}
+
+void test_btf_tag(void)
+{
+ if (test__start_subtest("btf_decl_tag"))
+ test_btf_decl_tag();
+ if (test__start_subtest("btf_type_tag"))
+ test_btf_type_tag();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_write.c b/tools/testing/selftests/bpf/prog_tests/btf_write.c
index b912eeb0b6b4..addf99c05896 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_write.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_write.c
@@ -297,6 +297,16 @@ static void gen_btf(struct btf *btf)
ASSERT_EQ(btf_decl_tag(t)->component_idx, 1, "tag_component_idx");
ASSERT_STREQ(btf_type_raw_dump(btf, 19),
"[19] DECL_TAG 'tag2' type_id=14 component_idx=1", "raw_dump");
+
+ /* TYPE_TAG */
+ id = btf__add_type_tag(btf, "tag1", 1);
+ ASSERT_EQ(id, 20, "tag_id");
+ t = btf__type_by_id(btf, 20);
+ ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "tag1", "tag_value");
+ ASSERT_EQ(btf_kind(t), BTF_KIND_TYPE_TAG, "tag_kind");
+ ASSERT_EQ(t->type, 1, "tag_type");
+ ASSERT_STREQ(btf_type_raw_dump(btf, 20),
+ "[20] TYPE_TAG 'tag1' type_id=1", "raw_dump");
}
static void test_btf_add()
@@ -337,7 +347,8 @@ static void test_btf_add()
"[17] DATASEC 'datasec1' size=12 vlen=1\n"
"\ttype_id=1 offset=4 size=8",
"[18] DECL_TAG 'tag1' type_id=16 component_idx=-1",
- "[19] DECL_TAG 'tag2' type_id=14 component_idx=1");
+ "[19] DECL_TAG 'tag2' type_id=14 component_idx=1",
+ "[20] TYPE_TAG 'tag1' type_id=1");
btf__free(btf);
}
@@ -359,7 +370,7 @@ static void test_btf_add_btf()
gen_btf(btf2);
id = btf__add_btf(btf1, btf2);
- if (!ASSERT_EQ(id, 20, "id"))
+ if (!ASSERT_EQ(id, 21, "id"))
goto cleanup;
VALIDATE_RAW_BTF(
@@ -391,35 +402,37 @@ static void test_btf_add_btf()
"\ttype_id=1 offset=4 size=8",
"[18] DECL_TAG 'tag1' type_id=16 component_idx=-1",
"[19] DECL_TAG 'tag2' type_id=14 component_idx=1",
+ "[20] TYPE_TAG 'tag1' type_id=1",
/* types appended from the second BTF */
- "[20] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
- "[21] PTR '(anon)' type_id=20",
- "[22] CONST '(anon)' type_id=24",
- "[23] VOLATILE '(anon)' type_id=22",
- "[24] RESTRICT '(anon)' type_id=23",
- "[25] ARRAY '(anon)' type_id=21 index_type_id=20 nr_elems=10",
- "[26] STRUCT 's1' size=8 vlen=2\n"
- "\t'f1' type_id=20 bits_offset=0\n"
- "\t'f2' type_id=20 bits_offset=32 bitfield_size=16",
- "[27] UNION 'u1' size=8 vlen=1\n"
- "\t'f1' type_id=20 bits_offset=0 bitfield_size=16",
- "[28] ENUM 'e1' size=4 vlen=2\n"
+ "[21] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[22] PTR '(anon)' type_id=21",
+ "[23] CONST '(anon)' type_id=25",
+ "[24] VOLATILE '(anon)' type_id=23",
+ "[25] RESTRICT '(anon)' type_id=24",
+ "[26] ARRAY '(anon)' type_id=22 index_type_id=21 nr_elems=10",
+ "[27] STRUCT 's1' size=8 vlen=2\n"
+ "\t'f1' type_id=21 bits_offset=0\n"
+ "\t'f2' type_id=21 bits_offset=32 bitfield_size=16",
+ "[28] UNION 'u1' size=8 vlen=1\n"
+ "\t'f1' type_id=21 bits_offset=0 bitfield_size=16",
+ "[29] ENUM 'e1' size=4 vlen=2\n"
"\t'v1' val=1\n"
"\t'v2' val=2",
- "[29] FWD 'struct_fwd' fwd_kind=struct",
- "[30] FWD 'union_fwd' fwd_kind=union",
- "[31] ENUM 'enum_fwd' size=4 vlen=0",
- "[32] TYPEDEF 'typedef1' type_id=20",
- "[33] FUNC 'func1' type_id=34 linkage=global",
- "[34] FUNC_PROTO '(anon)' ret_type_id=20 vlen=2\n"
- "\t'p1' type_id=20\n"
- "\t'p2' type_id=21",
- "[35] VAR 'var1' type_id=20, linkage=global-alloc",
- "[36] DATASEC 'datasec1' size=12 vlen=1\n"
- "\ttype_id=20 offset=4 size=8",
- "[37] DECL_TAG 'tag1' type_id=35 component_idx=-1",
- "[38] DECL_TAG 'tag2' type_id=33 component_idx=1");
+ "[30] FWD 'struct_fwd' fwd_kind=struct",
+ "[31] FWD 'union_fwd' fwd_kind=union",
+ "[32] ENUM 'enum_fwd' size=4 vlen=0",
+ "[33] TYPEDEF 'typedef1' type_id=21",
+ "[34] FUNC 'func1' type_id=35 linkage=global",
+ "[35] FUNC_PROTO '(anon)' ret_type_id=21 vlen=2\n"
+ "\t'p1' type_id=21\n"
+ "\t'p2' type_id=22",
+ "[36] VAR 'var1' type_id=21, linkage=global-alloc",
+ "[37] DATASEC 'datasec1' size=12 vlen=1\n"
+ "\ttype_id=21 offset=4 size=8",
+ "[38] DECL_TAG 'tag1' type_id=36 component_idx=-1",
+ "[39] DECL_TAG 'tag2' type_id=34 component_idx=1",
+ "[40] TYPE_TAG 'tag1' type_id=21");
cleanup:
btf__free(btf1);
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c
index 5de485c7370f..858916d11e2e 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c
@@ -16,7 +16,7 @@ static int prog_load(void)
};
size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
- return bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB,
+ return bpf_test_load_program(BPF_PROG_TYPE_CGROUP_SKB,
prog, insns_cnt, "GPL", 0,
bpf_log_buf, BPF_LOG_BUF_SIZE);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
index 731bea84d8ed..d3e8f729c623 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
@@ -15,22 +15,22 @@ static int prog_load_cnt(int verdict, int val)
int cgroup_storage_fd, percpu_cgroup_storage_fd;
if (map_fd < 0)
- map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0);
+ map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 8, 1, NULL);
if (map_fd < 0) {
printf("failed to create map '%s'\n", strerror(errno));
return -1;
}
- cgroup_storage_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE,
- sizeof(struct bpf_cgroup_storage_key), 8, 0, 0);
+ cgroup_storage_fd = bpf_map_create(BPF_MAP_TYPE_CGROUP_STORAGE, NULL,
+ sizeof(struct bpf_cgroup_storage_key), 8, 0, NULL);
if (cgroup_storage_fd < 0) {
printf("failed to create map '%s'\n", strerror(errno));
return -1;
}
- percpu_cgroup_storage_fd = bpf_create_map(
- BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
- sizeof(struct bpf_cgroup_storage_key), 8, 0, 0);
+ percpu_cgroup_storage_fd = bpf_map_create(
+ BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, NULL,
+ sizeof(struct bpf_cgroup_storage_key), 8, 0, NULL);
if (percpu_cgroup_storage_fd < 0) {
printf("failed to create map '%s'\n", strerror(errno));
return -1;
@@ -66,7 +66,7 @@ static int prog_load_cnt(int verdict, int val)
size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
int ret;
- ret = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB,
+ ret = bpf_test_load_program(BPF_PROG_TYPE_CGROUP_SKB,
prog, insns_cnt, "GPL", 0,
bpf_log_buf, BPF_LOG_BUF_SIZE);
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c
index 10d3c33821a7..356547e849e2 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c
@@ -18,7 +18,7 @@ static int prog_load(int verdict)
};
size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
- return bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB,
+ return bpf_test_load_program(BPF_PROG_TYPE_CGROUP_SKB,
prog, insns_cnt, "GPL", 0,
bpf_log_buf, BPF_LOG_BUF_SIZE);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/connect_force_port.c b/tools/testing/selftests/bpf/prog_tests/connect_force_port.c
index 9229db2f5ca5..9c4325f4aef2 100644
--- a/tools/testing/selftests/bpf/prog_tests/connect_force_port.c
+++ b/tools/testing/selftests/bpf/prog_tests/connect_force_port.c
@@ -51,24 +51,25 @@ static int run_test(int cgroup_fd, int server_fd, int family, int type)
bool v4 = family == AF_INET;
__u16 expected_local_port = v4 ? 22222 : 22223;
__u16 expected_peer_port = 60000;
- struct bpf_prog_load_attr attr = {
- .file = v4 ? "./connect_force_port4.o" :
- "./connect_force_port6.o",
- };
struct bpf_program *prog;
struct bpf_object *obj;
- int xlate_fd, fd, err;
+ const char *obj_file = v4 ? "connect_force_port4.o" : "connect_force_port6.o";
+ int fd, err;
__u32 duration = 0;
- err = bpf_prog_load_xattr(&attr, &obj, &xlate_fd);
- if (err) {
- log_err("Failed to load BPF object");
+ obj = bpf_object__open_file(obj_file, NULL);
+ if (!ASSERT_OK_PTR(obj, "bpf_obj_open"))
return -1;
+
+ err = bpf_object__load(obj);
+ if (!ASSERT_OK(err, "bpf_obj_load")) {
+ err = -EIO;
+ goto close_bpf_object;
}
- prog = bpf_object__find_program_by_title(obj, v4 ?
- "cgroup/connect4" :
- "cgroup/connect6");
+ prog = bpf_object__find_program_by_name(obj, v4 ?
+ "connect4" :
+ "connect6");
if (CHECK(!prog, "find_prog", "connect prog not found\n")) {
err = -EIO;
goto close_bpf_object;
@@ -82,9 +83,9 @@ static int run_test(int cgroup_fd, int server_fd, int family, int type)
goto close_bpf_object;
}
- prog = bpf_object__find_program_by_title(obj, v4 ?
- "cgroup/getpeername4" :
- "cgroup/getpeername6");
+ prog = bpf_object__find_program_by_name(obj, v4 ?
+ "getpeername4" :
+ "getpeername6");
if (CHECK(!prog, "find_prog", "getpeername prog not found\n")) {
err = -EIO;
goto close_bpf_object;
@@ -98,9 +99,9 @@ static int run_test(int cgroup_fd, int server_fd, int family, int type)
goto close_bpf_object;
}
- prog = bpf_object__find_program_by_title(obj, v4 ?
- "cgroup/getsockname4" :
- "cgroup/getsockname6");
+ prog = bpf_object__find_program_by_name(obj, v4 ?
+ "getsockname4" :
+ "getsockname6");
if (CHECK(!prog, "find_prog", "getsockname prog not found\n")) {
err = -EIO;
goto close_bpf_object;
diff --git a/tools/testing/selftests/bpf/prog_tests/core_kern.c b/tools/testing/selftests/bpf/prog_tests/core_kern.c
new file mode 100644
index 000000000000..561c5185d886
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/core_kern.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "test_progs.h"
+#include "core_kern.lskel.h"
+
+void test_core_kern_lskel(void)
+{
+ struct core_kern_lskel *skel;
+
+ skel = core_kern_lskel__open_and_load();
+ ASSERT_OK_PTR(skel, "open_and_load");
+ core_kern_lskel__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index 55ec85ba7375..b8bdd1c3efca 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -10,7 +10,7 @@ static int duration = 0;
#define STRUCT_TO_CHAR_PTR(struct_name) (const char *)&(struct struct_name)
-#define MODULES_CASE(name, sec_name, tp_name) { \
+#define MODULES_CASE(name, pg_name, tp_name) { \
.case_name = name, \
.bpf_obj_file = "test_core_reloc_module.o", \
.btf_src_file = NULL, /* find in kernel module BTFs */ \
@@ -28,7 +28,7 @@ static int duration = 0;
.comm_len = sizeof("test_progs"), \
}, \
.output_len = sizeof(struct core_reloc_module_output), \
- .prog_sec_name = sec_name, \
+ .prog_name = pg_name, \
.raw_tp_name = tp_name, \
.trigger = __trigger_module_test_read, \
.needs_testmod = true, \
@@ -43,7 +43,9 @@ static int duration = 0;
#define FLAVORS_CASE_COMMON(name) \
.case_name = #name, \
.bpf_obj_file = "test_core_reloc_flavors.o", \
- .btf_src_file = "btf__core_reloc_" #name ".o" \
+ .btf_src_file = "btf__core_reloc_" #name ".o", \
+ .raw_tp_name = "sys_enter", \
+ .prog_name = "test_core_flavors" \
#define FLAVORS_CASE(name) { \
FLAVORS_CASE_COMMON(name), \
@@ -66,7 +68,9 @@ static int duration = 0;
#define NESTING_CASE_COMMON(name) \
.case_name = #name, \
.bpf_obj_file = "test_core_reloc_nesting.o", \
- .btf_src_file = "btf__core_reloc_" #name ".o"
+ .btf_src_file = "btf__core_reloc_" #name ".o", \
+ .raw_tp_name = "sys_enter", \
+ .prog_name = "test_core_nesting" \
#define NESTING_CASE(name) { \
NESTING_CASE_COMMON(name), \
@@ -91,7 +95,9 @@ static int duration = 0;
#define ARRAYS_CASE_COMMON(name) \
.case_name = #name, \
.bpf_obj_file = "test_core_reloc_arrays.o", \
- .btf_src_file = "btf__core_reloc_" #name ".o"
+ .btf_src_file = "btf__core_reloc_" #name ".o", \
+ .raw_tp_name = "sys_enter", \
+ .prog_name = "test_core_arrays" \
#define ARRAYS_CASE(name) { \
ARRAYS_CASE_COMMON(name), \
@@ -123,7 +129,9 @@ static int duration = 0;
#define PRIMITIVES_CASE_COMMON(name) \
.case_name = #name, \
.bpf_obj_file = "test_core_reloc_primitives.o", \
- .btf_src_file = "btf__core_reloc_" #name ".o"
+ .btf_src_file = "btf__core_reloc_" #name ".o", \
+ .raw_tp_name = "sys_enter", \
+ .prog_name = "test_core_primitives" \
#define PRIMITIVES_CASE(name) { \
PRIMITIVES_CASE_COMMON(name), \
@@ -158,6 +166,8 @@ static int duration = 0;
.e = 5, .f = 6, .g = 7, .h = 8, \
}, \
.output_len = sizeof(struct core_reloc_mods_output), \
+ .raw_tp_name = "sys_enter", \
+ .prog_name = "test_core_mods", \
}
#define PTR_AS_ARR_CASE(name) { \
@@ -174,6 +184,8 @@ static int duration = 0;
.a = 3, \
}, \
.output_len = sizeof(struct core_reloc_ptr_as_arr), \
+ .raw_tp_name = "sys_enter", \
+ .prog_name = "test_core_ptr_as_arr", \
}
#define INTS_DATA(struct_name) STRUCT_TO_CHAR_PTR(struct_name) { \
@@ -190,7 +202,9 @@ static int duration = 0;
#define INTS_CASE_COMMON(name) \
.case_name = #name, \
.bpf_obj_file = "test_core_reloc_ints.o", \
- .btf_src_file = "btf__core_reloc_" #name ".o"
+ .btf_src_file = "btf__core_reloc_" #name ".o", \
+ .raw_tp_name = "sys_enter", \
+ .prog_name = "test_core_ints"
#define INTS_CASE(name) { \
INTS_CASE_COMMON(name), \
@@ -208,7 +222,9 @@ static int duration = 0;
#define FIELD_EXISTS_CASE_COMMON(name) \
.case_name = #name, \
.bpf_obj_file = "test_core_reloc_existence.o", \
- .btf_src_file = "btf__core_reloc_" #name ".o" \
+ .btf_src_file = "btf__core_reloc_" #name ".o", \
+ .raw_tp_name = "sys_enter", \
+ .prog_name = "test_core_existence"
#define BITFIELDS_CASE_COMMON(objfile, test_name_prefix, name) \
.case_name = test_name_prefix#name, \
@@ -223,6 +239,8 @@ static int duration = 0;
.output = STRUCT_TO_CHAR_PTR(core_reloc_bitfields_output) \
__VA_ARGS__, \
.output_len = sizeof(struct core_reloc_bitfields_output), \
+ .raw_tp_name = "sys_enter", \
+ .prog_name = "test_core_bitfields", \
}, { \
BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_direct.o", \
"direct:", name), \
@@ -231,7 +249,7 @@ static int duration = 0;
.output = STRUCT_TO_CHAR_PTR(core_reloc_bitfields_output) \
__VA_ARGS__, \
.output_len = sizeof(struct core_reloc_bitfields_output), \
- .prog_sec_name = "tp_btf/sys_enter", \
+ .prog_name = "test_core_bitfields_direct", \
}
@@ -239,17 +257,21 @@ static int duration = 0;
BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_probed.o", \
"probed:", name), \
.fails = true, \
+ .raw_tp_name = "sys_enter", \
+ .prog_name = "test_core_bitfields", \
}, { \
BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_direct.o", \
"direct:", name), \
- .prog_sec_name = "tp_btf/sys_enter", \
.fails = true, \
+ .prog_name = "test_core_bitfields_direct", \
}
#define SIZE_CASE_COMMON(name) \
.case_name = #name, \
.bpf_obj_file = "test_core_reloc_size.o", \
- .btf_src_file = "btf__core_reloc_" #name ".o"
+ .btf_src_file = "btf__core_reloc_" #name ".o", \
+ .raw_tp_name = "sys_enter", \
+ .prog_name = "test_core_size"
#define SIZE_OUTPUT_DATA(type) \
STRUCT_TO_CHAR_PTR(core_reloc_size_output) { \
@@ -277,8 +299,10 @@ static int duration = 0;
#define TYPE_BASED_CASE_COMMON(name) \
.case_name = #name, \
- .bpf_obj_file = "test_core_reloc_type_based.o", \
- .btf_src_file = "btf__core_reloc_" #name ".o" \
+ .bpf_obj_file = "test_core_reloc_type_based.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".o", \
+ .raw_tp_name = "sys_enter", \
+ .prog_name = "test_core_type_based"
#define TYPE_BASED_CASE(name, ...) { \
TYPE_BASED_CASE_COMMON(name), \
@@ -295,7 +319,9 @@ static int duration = 0;
#define TYPE_ID_CASE_COMMON(name) \
.case_name = #name, \
.bpf_obj_file = "test_core_reloc_type_id.o", \
- .btf_src_file = "btf__core_reloc_" #name ".o" \
+ .btf_src_file = "btf__core_reloc_" #name ".o", \
+ .raw_tp_name = "sys_enter", \
+ .prog_name = "test_core_type_id"
#define TYPE_ID_CASE(name, setup_fn) { \
TYPE_ID_CASE_COMMON(name), \
@@ -312,7 +338,9 @@ static int duration = 0;
#define ENUMVAL_CASE_COMMON(name) \
.case_name = #name, \
.bpf_obj_file = "test_core_reloc_enumval.o", \
- .btf_src_file = "btf__core_reloc_" #name ".o" \
+ .btf_src_file = "btf__core_reloc_" #name ".o", \
+ .raw_tp_name = "sys_enter", \
+ .prog_name = "test_core_enumval"
#define ENUMVAL_CASE(name, ...) { \
ENUMVAL_CASE_COMMON(name), \
@@ -342,7 +370,7 @@ struct core_reloc_test_case {
bool fails;
bool needs_testmod;
bool relaxed_core_relocs;
- const char *prog_sec_name;
+ const char *prog_name;
const char *raw_tp_name;
setup_test_fn setup;
trigger_test_fn trigger;
@@ -433,7 +461,7 @@ static int setup_type_id_case_local(struct core_reloc_test_case *test)
static int setup_type_id_case_success(struct core_reloc_test_case *test) {
struct core_reloc_type_id_output *exp = (void *)test->output;
- struct btf *targ_btf = btf__parse(test->btf_src_file, NULL);
+ struct btf *targ_btf;
int err;
err = setup_type_id_case_local(test);
@@ -497,11 +525,13 @@ static struct core_reloc_test_case test_cases[] = {
.comm_len = sizeof("test_progs"),
},
.output_len = sizeof(struct core_reloc_kernel_output),
+ .raw_tp_name = "sys_enter",
+ .prog_name = "test_core_kernel",
},
/* validate we can find kernel module BTF types for relocs/attach */
- MODULES_CASE("module_probed", "raw_tp/bpf_testmod_test_read", "bpf_testmod_test_read"),
- MODULES_CASE("module_direct", "tp_btf/bpf_testmod_test_read", NULL),
+ MODULES_CASE("module_probed", "test_core_module_probed", "bpf_testmod_test_read"),
+ MODULES_CASE("module_direct", "test_core_module_direct", NULL),
/* validate BPF program can use multiple flavors to match against
* single target BTF type
@@ -580,6 +610,8 @@ static struct core_reloc_test_case test_cases[] = {
.c = 0, /* BUG in clang, should be 3 */
},
.output_len = sizeof(struct core_reloc_misc_output),
+ .raw_tp_name = "sys_enter",
+ .prog_name = "test_core_misc",
},
/* validate field existence checks */
@@ -848,14 +880,9 @@ void test_core_reloc(void)
if (!ASSERT_OK_PTR(obj, "obj_open"))
goto cleanup;
- probe_name = "raw_tracepoint/sys_enter";
- tp_name = "sys_enter";
- if (test_case->prog_sec_name) {
- probe_name = test_case->prog_sec_name;
- tp_name = test_case->raw_tp_name; /* NULL for tp_btf */
- }
-
- prog = bpf_object__find_program_by_title(obj, probe_name);
+ probe_name = test_case->prog_name;
+ tp_name = test_case->raw_tp_name; /* NULL for tp_btf */
+ prog = bpf_object__find_program_by_name(obj, probe_name);
if (CHECK(!prog, "find_probe",
"prog '%s' not found\n", probe_name))
goto cleanup;
@@ -881,7 +908,8 @@ void test_core_reloc(void)
data = mmap_data;
memset(mmap_data, 0, sizeof(*data));
- memcpy(data->in, test_case->input, test_case->input_len);
+ if (test_case->input_len)
+ memcpy(data->in, test_case->input, test_case->input_len);
data->my_pid_tgid = my_pid_tgid;
link = bpf_program__attach_raw_tracepoint(prog, tp_name);
diff --git a/tools/testing/selftests/bpf/prog_tests/d_path.c b/tools/testing/selftests/bpf/prog_tests/d_path.c
index 0a577a248d34..911345c526e6 100644
--- a/tools/testing/selftests/bpf/prog_tests/d_path.c
+++ b/tools/testing/selftests/bpf/prog_tests/d_path.c
@@ -9,6 +9,8 @@
#define MAX_FILES 7
#include "test_d_path.skel.h"
+#include "test_d_path_check_rdonly_mem.skel.h"
+#include "test_d_path_check_types.skel.h"
static int duration;
@@ -99,7 +101,7 @@ out_close:
return ret;
}
-void test_d_path(void)
+static void test_d_path_basic(void)
{
struct test_d_path__bss *bss;
struct test_d_path *skel;
@@ -155,3 +157,35 @@ void test_d_path(void)
cleanup:
test_d_path__destroy(skel);
}
+
+static void test_d_path_check_rdonly_mem(void)
+{
+ struct test_d_path_check_rdonly_mem *skel;
+
+ skel = test_d_path_check_rdonly_mem__open_and_load();
+ ASSERT_ERR_PTR(skel, "unexpected_load_overwriting_rdonly_mem");
+
+ test_d_path_check_rdonly_mem__destroy(skel);
+}
+
+static void test_d_path_check_types(void)
+{
+ struct test_d_path_check_types *skel;
+
+ skel = test_d_path_check_types__open_and_load();
+ ASSERT_ERR_PTR(skel, "unexpected_load_passing_wrong_type");
+
+ test_d_path_check_types__destroy(skel);
+}
+
+void test_d_path(void)
+{
+ if (test__start_subtest("basic"))
+ test_d_path_basic();
+
+ if (test__start_subtest("check_rdonly_mem"))
+ test_d_path_check_rdonly_mem();
+
+ if (test__start_subtest("check_alloc_mem"))
+ test_d_path_check_types();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/exhandler.c b/tools/testing/selftests/bpf/prog_tests/exhandler.c
new file mode 100644
index 000000000000..118bb182ee20
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/exhandler.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021, Oracle and/or its affiliates. */
+
+#include <test_progs.h>
+
+/* Test that verifies exception handling is working. fork()
+ * triggers task_newtask tracepoint; that new task will have a
+ * NULL pointer task_works, and the associated task->task_works->func
+ * should not be NULL if task_works itself is non-NULL.
+ *
+ * So to verify exception handling we want to see a NULL task_works
+ * and task_works->func; if we see this we can conclude that the
+ * exception handler ran when we attempted to dereference task->task_works
+ * and zeroed the destination register.
+ */
+#include "exhandler_kern.skel.h"
+
+void test_exhandler(void)
+{
+ int err = 0, duration = 0, status;
+ struct exhandler_kern *skel;
+ pid_t cpid;
+
+ skel = exhandler_kern__open_and_load();
+ if (CHECK(!skel, "skel_load", "skeleton failed: %d\n", err))
+ goto cleanup;
+
+ skel->bss->test_pid = getpid();
+
+ err = exhandler_kern__attach(skel);
+ if (!ASSERT_OK(err, "attach"))
+ goto cleanup;
+ cpid = fork();
+ if (!ASSERT_GT(cpid, -1, "fork failed"))
+ goto cleanup;
+ if (cpid == 0)
+ _exit(0);
+ waitpid(cpid, &status, 0);
+
+ ASSERT_NEQ(skel->bss->exception_triggered, 0, "verify exceptions occurred");
+cleanup:
+ exhandler_kern__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
index 9cff14a23bb7..c52f99f6a909 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
@@ -65,7 +65,7 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
int err, tgt_fd, i;
struct btf *btf;
- err = bpf_prog_load(target_obj_file, BPF_PROG_TYPE_UNSPEC,
+ err = bpf_prog_test_load(target_obj_file, BPF_PROG_TYPE_UNSPEC,
&tgt_obj, &tgt_fd);
if (!ASSERT_OK(err, "tgt_prog_load"))
return;
@@ -101,6 +101,8 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
for (i = 0; i < prog_cnt; i++) {
struct bpf_link_info link_info;
+ struct bpf_program *pos;
+ const char *pos_sec_name;
char *tgt_name;
__s32 btf_id;
@@ -109,7 +111,14 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
goto close_prog;
btf_id = btf__find_by_name_kind(btf, tgt_name + 1, BTF_KIND_FUNC);
- prog[i] = bpf_object__find_program_by_title(obj, prog_name[i]);
+ prog[i] = NULL;
+ bpf_object__for_each_program(pos, obj) {
+ pos_sec_name = bpf_program__section_name(pos);
+ if (pos_sec_name && !strcmp(pos_sec_name, prog_name[i])) {
+ prog[i] = pos;
+ break;
+ }
+ }
if (!ASSERT_OK_PTR(prog[i], prog_name[i]))
goto close_prog;
@@ -211,8 +220,8 @@ static void test_func_replace_verify(void)
static int test_second_attach(struct bpf_object *obj)
{
- const char *prog_name = "freplace/get_constant";
- const char *tgt_name = prog_name + 9; /* cut off freplace/ */
+ const char *prog_name = "security_new_get_constant";
+ const char *tgt_name = "get_constant";
const char *tgt_obj_file = "./test_pkt_access.o";
struct bpf_program *prog = NULL;
struct bpf_object *tgt_obj;
@@ -220,11 +229,11 @@ static int test_second_attach(struct bpf_object *obj)
struct bpf_link *link;
int err = 0, tgt_fd;
- prog = bpf_object__find_program_by_title(obj, prog_name);
+ prog = bpf_object__find_program_by_name(obj, prog_name);
if (CHECK(!prog, "find_prog", "prog %s not found\n", prog_name))
return -ENOENT;
- err = bpf_prog_load(tgt_obj_file, BPF_PROG_TYPE_UNSPEC,
+ err = bpf_prog_test_load(tgt_obj_file, BPF_PROG_TYPE_UNSPEC,
&tgt_obj, &tgt_fd);
if (CHECK(err, "second_prog_load", "file %s err %d errno %d\n",
tgt_obj_file, err, errno))
@@ -274,7 +283,7 @@ static void test_fmod_ret_freplace(void)
__u32 duration = 0;
int err, pkt_fd, attach_prog_fd;
- err = bpf_prog_load(tgt_name, BPF_PROG_TYPE_UNSPEC,
+ err = bpf_prog_test_load(tgt_name, BPF_PROG_TYPE_UNSPEC,
&pkt_obj, &pkt_fd);
/* the target prog should load fine */
if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n",
@@ -341,7 +350,7 @@ static void test_obj_load_failure_common(const char *obj_file,
int err, pkt_fd;
__u32 duration = 0;
- err = bpf_prog_load(target_obj_file, BPF_PROG_TYPE_UNSPEC,
+ err = bpf_prog_test_load(target_obj_file, BPF_PROG_TYPE_UNSPEC,
&pkt_obj, &pkt_fd);
/* the target prog should load fine */
if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n",
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
index 7c9b62e971f1..e4cede6b4b2d 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
@@ -20,34 +20,33 @@ void test_fexit_stress(void)
BPF_EXIT_INSN(),
};
- struct bpf_load_program_attr load_attr = {
- .prog_type = BPF_PROG_TYPE_TRACING,
- .license = "GPL",
- .insns = trace_program,
- .insns_cnt = sizeof(trace_program) / sizeof(struct bpf_insn),
+ LIBBPF_OPTS(bpf_prog_load_opts, trace_opts,
.expected_attach_type = BPF_TRACE_FEXIT,
- };
+ .log_buf = error,
+ .log_size = sizeof(error),
+ );
const struct bpf_insn skb_program[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
};
- struct bpf_load_program_attr skb_load_attr = {
- .prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
- .license = "GPL",
- .insns = skb_program,
- .insns_cnt = sizeof(skb_program) / sizeof(struct bpf_insn),
- };
+ LIBBPF_OPTS(bpf_prog_load_opts, skb_opts,
+ .log_buf = error,
+ .log_size = sizeof(error),
+ );
err = libbpf_find_vmlinux_btf_id("bpf_fentry_test1",
- load_attr.expected_attach_type);
+ trace_opts.expected_attach_type);
if (CHECK(err <= 0, "find_vmlinux_btf_id", "failed: %d\n", err))
goto out;
- load_attr.attach_btf_id = err;
+ trace_opts.attach_btf_id = err;
for (i = 0; i < CNT; i++) {
- fexit_fd[i] = bpf_load_program_xattr(&load_attr, error, sizeof(error));
+ fexit_fd[i] = bpf_prog_load(BPF_PROG_TYPE_TRACING, NULL, "GPL",
+ trace_program,
+ sizeof(trace_program) / sizeof(struct bpf_insn),
+ &trace_opts);
if (CHECK(fexit_fd[i] < 0, "fexit loaded",
"failed: %d errno %d\n", fexit_fd[i], errno))
goto out;
@@ -57,7 +56,9 @@ void test_fexit_stress(void)
goto out;
}
- filter_fd = bpf_load_program_xattr(&skb_load_attr, error, sizeof(error));
+ filter_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL",
+ skb_program, sizeof(skb_program) / sizeof(struct bpf_insn),
+ &skb_opts);
if (CHECK(filter_fd < 0, "test_program_loaded", "failed: %d errno %d\n",
filter_fd, errno))
goto out;
diff --git a/tools/testing/selftests/bpf/prog_tests/find_vma.c b/tools/testing/selftests/bpf/prog_tests/find_vma.c
new file mode 100644
index 000000000000..b74b3c0c555a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/find_vma.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include "find_vma.skel.h"
+#include "find_vma_fail1.skel.h"
+#include "find_vma_fail2.skel.h"
+
+static void test_and_reset_skel(struct find_vma *skel, int expected_find_zero_ret)
+{
+ ASSERT_EQ(skel->bss->found_vm_exec, 1, "found_vm_exec");
+ ASSERT_EQ(skel->data->find_addr_ret, 0, "find_addr_ret");
+ ASSERT_EQ(skel->data->find_zero_ret, expected_find_zero_ret, "find_zero_ret");
+ ASSERT_OK_PTR(strstr(skel->bss->d_iname, "test_progs"), "find_test_progs");
+
+ skel->bss->found_vm_exec = 0;
+ skel->data->find_addr_ret = -1;
+ skel->data->find_zero_ret = -1;
+ skel->bss->d_iname[0] = 0;
+}
+
+static int open_pe(void)
+{
+ struct perf_event_attr attr = {0};
+ int pfd;
+
+ /* create perf event */
+ attr.size = sizeof(attr);
+ attr.type = PERF_TYPE_HARDWARE;
+ attr.config = PERF_COUNT_HW_CPU_CYCLES;
+ attr.freq = 1;
+ attr.sample_freq = 4000;
+ pfd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, PERF_FLAG_FD_CLOEXEC);
+
+ return pfd >= 0 ? pfd : -errno;
+}
+
+static void test_find_vma_pe(struct find_vma *skel)
+{
+ struct bpf_link *link = NULL;
+ volatile int j = 0;
+ int pfd, i;
+
+ pfd = open_pe();
+ if (pfd < 0) {
+ if (pfd == -ENOENT || pfd == -EOPNOTSUPP) {
+ printf("%s:SKIP:no PERF_COUNT_HW_CPU_CYCLES\n", __func__);
+ test__skip();
+ goto cleanup;
+ }
+ if (!ASSERT_GE(pfd, 0, "perf_event_open"))
+ goto cleanup;
+ }
+
+ link = bpf_program__attach_perf_event(skel->progs.handle_pe, pfd);
+ if (!ASSERT_OK_PTR(link, "attach_perf_event"))
+ goto cleanup;
+
+ for (i = 0; i < 1000000; ++i)
+ ++j;
+
+ test_and_reset_skel(skel, -EBUSY /* in nmi, irq_work is busy */);
+cleanup:
+ bpf_link__destroy(link);
+ close(pfd);
+}
+
+static void test_find_vma_kprobe(struct find_vma *skel)
+{
+ int err;
+
+ err = find_vma__attach(skel);
+ if (!ASSERT_OK(err, "get_branch_snapshot__attach"))
+ return;
+
+ getpgid(skel->bss->target_pid);
+ test_and_reset_skel(skel, -ENOENT /* could not find vma for ptr 0 */);
+}
+
+static void test_illegal_write_vma(void)
+{
+ struct find_vma_fail1 *skel;
+
+ skel = find_vma_fail1__open_and_load();
+ if (!ASSERT_ERR_PTR(skel, "find_vma_fail1__open_and_load"))
+ find_vma_fail1__destroy(skel);
+}
+
+static void test_illegal_write_task(void)
+{
+ struct find_vma_fail2 *skel;
+
+ skel = find_vma_fail2__open_and_load();
+ if (!ASSERT_ERR_PTR(skel, "find_vma_fail2__open_and_load"))
+ find_vma_fail2__destroy(skel);
+}
+
+void serial_test_find_vma(void)
+{
+ struct find_vma *skel;
+
+ skel = find_vma__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "find_vma__open_and_load"))
+ return;
+
+ skel->bss->target_pid = getpid();
+ skel->bss->addr = (__u64)(uintptr_t)test_find_vma_pe;
+
+ test_find_vma_pe(skel);
+ usleep(100000); /* allow the irq_work to finish */
+ test_find_vma_kprobe(skel);
+
+ find_vma__destroy(skel);
+ test_illegal_write_vma();
+ test_illegal_write_task();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c
index 6093728497c7..93ac3f28226c 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c
@@ -30,7 +30,7 @@ void serial_test_flow_dissector_load_bytes(void)
/* make sure bpf_skb_load_bytes is not allowed from skb-less context
*/
- fd = bpf_load_program(BPF_PROG_TYPE_FLOW_DISSECTOR, prog,
+ fd = bpf_test_load_program(BPF_PROG_TYPE_FLOW_DISSECTOR, prog,
ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
CHECK(fd < 0,
"flow_dissector-bpf_skb_load_bytes-load",
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c
index f0c6c226aba8..7c79462d2702 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c
@@ -47,9 +47,9 @@ static int load_prog(enum bpf_prog_type type)
};
int fd;
- fd = bpf_load_program(type, prog, ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
+ fd = bpf_test_load_program(type, prog, ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
if (CHECK_FAIL(fd < 0))
- perror("bpf_load_program");
+ perror("bpf_test_load_program");
return fd;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c b/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c
new file mode 100644
index 000000000000..85c427119fe9
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "get_func_args_test.skel.h"
+
+void test_get_func_args_test(void)
+{
+ struct get_func_args_test *skel = NULL;
+ __u32 duration = 0, retval;
+ int err, prog_fd;
+
+ skel = get_func_args_test__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "get_func_args_test__open_and_load"))
+ return;
+
+ err = get_func_args_test__attach(skel);
+ if (!ASSERT_OK(err, "get_func_args_test__attach"))
+ goto cleanup;
+
+ /* This runs bpf_fentry_test* functions and triggers
+ * fentry/fexit programs.
+ */
+ prog_fd = bpf_program__fd(skel->progs.test1);
+ err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
+ NULL, NULL, &retval, &duration);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(retval, 0, "test_run");
+
+ /* This runs bpf_modify_return_test function and triggers
+ * fmod_ret_test and fexit_test programs.
+ */
+ prog_fd = bpf_program__fd(skel->progs.fmod_ret_test);
+ err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
+ NULL, NULL, &retval, &duration);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(retval, 1234, "test_run");
+
+ ASSERT_EQ(skel->bss->test1_result, 1, "test1_result");
+ ASSERT_EQ(skel->bss->test2_result, 1, "test2_result");
+ ASSERT_EQ(skel->bss->test3_result, 1, "test3_result");
+ ASSERT_EQ(skel->bss->test4_result, 1, "test4_result");
+
+cleanup:
+ get_func_args_test__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
index 522237aa4470..e834a01de16a 100644
--- a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
+++ b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
@@ -24,13 +24,19 @@ static void get_stack_print_output(void *ctx, int cpu, void *data, __u32 size)
{
bool good_kern_stack = false, good_user_stack = false;
const char *nonjit_func = "___bpf_prog_run";
- struct get_stack_trace_t *e = data;
+ /* perfbuf-submitted data is 4-byte aligned, but we need 8-byte
+ * alignment, so copy data into a local variable, for simplicity
+ */
+ struct get_stack_trace_t e;
int i, num_stack;
static __u64 cnt;
struct ksym *ks;
cnt++;
+ memset(&e, 0, sizeof(e));
+ memcpy(&e, data, size <= sizeof(e) ? size : sizeof(e));
+
if (size < sizeof(struct get_stack_trace_t)) {
__u64 *raw_data = data;
bool found = false;
@@ -57,19 +63,19 @@ static void get_stack_print_output(void *ctx, int cpu, void *data, __u32 size)
good_user_stack = true;
}
} else {
- num_stack = e->kern_stack_size / sizeof(__u64);
+ num_stack = e.kern_stack_size / sizeof(__u64);
if (env.jit_enabled) {
good_kern_stack = num_stack > 0;
} else {
for (i = 0; i < num_stack; i++) {
- ks = ksym_search(e->kern_stack[i]);
+ ks = ksym_search(e.kern_stack[i]);
if (ks && (strcmp(ks->name, nonjit_func) == 0)) {
good_kern_stack = true;
break;
}
}
}
- if (e->user_stack_size > 0 && e->user_stack_buildid_size > 0)
+ if (e.user_stack_size > 0 && e.user_stack_buildid_size > 0)
good_user_stack = true;
}
@@ -83,9 +89,8 @@ void test_get_stack_raw_tp(void)
{
const char *file = "./test_get_stack_rawtp.o";
const char *file_err = "./test_get_stack_rawtp_err.o";
- const char *prog_name = "raw_tracepoint/sys_enter";
+ const char *prog_name = "bpf_prog1";
int i, err, prog_fd, exp_cnt = MAX_CNT_RAWTP;
- struct perf_buffer_opts pb_opts = {};
struct perf_buffer *pb = NULL;
struct bpf_link *link = NULL;
struct timespec tv = {0, 10};
@@ -94,15 +99,15 @@ void test_get_stack_raw_tp(void)
struct bpf_map *map;
cpu_set_t cpu_set;
- err = bpf_prog_load(file_err, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
+ err = bpf_prog_test_load(file_err, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
if (CHECK(err >= 0, "prog_load raw tp", "err %d errno %d\n", err, errno))
return;
- err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno))
return;
- prog = bpf_object__find_program_by_title(obj, prog_name);
+ prog = bpf_object__find_program_by_name(obj, prog_name);
if (CHECK(!prog, "find_probe", "prog '%s' not found\n", prog_name))
goto close_prog;
@@ -124,8 +129,8 @@ void test_get_stack_raw_tp(void)
if (!ASSERT_OK_PTR(link, "attach_raw_tp"))
goto close_prog;
- pb_opts.sample_cb = get_stack_print_output;
- pb = perf_buffer__new(bpf_map__fd(map), 8, &pb_opts);
+ pb = perf_buffer__new(bpf_map__fd(map), 8, get_stack_print_output,
+ NULL, NULL, NULL);
if (!ASSERT_OK_PTR(pb, "perf_buf__new"))
goto close_prog;
diff --git a/tools/testing/selftests/bpf/prog_tests/global_data.c b/tools/testing/selftests/bpf/prog_tests/global_data.c
index afd8639f9a94..9da131b32e13 100644
--- a/tools/testing/selftests/bpf/prog_tests/global_data.c
+++ b/tools/testing/selftests/bpf/prog_tests/global_data.c
@@ -136,7 +136,7 @@ void test_global_data(void)
struct bpf_object *obj;
int err, prog_fd;
- err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
if (CHECK(err, "load program", "error %d loading %s\n", err, file))
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/global_func_args.c b/tools/testing/selftests/bpf/prog_tests/global_func_args.c
index 8bcc2869102f..93a2439237b0 100644
--- a/tools/testing/selftests/bpf/prog_tests/global_func_args.c
+++ b/tools/testing/selftests/bpf/prog_tests/global_func_args.c
@@ -44,7 +44,7 @@ void test_global_func_args(void)
struct bpf_object *obj;
int err, prog_fd;
- err = bpf_prog_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd);
if (CHECK(err, "load program", "error %d loading %s\n", err, file))
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
index 01e51d16c8b8..ce10d2fc3a6c 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
#include <network_helpers.h>
+#include "kfree_skb.skel.h"
struct meta {
int ifindex;
@@ -58,63 +59,43 @@ void serial_test_kfree_skb(void)
.ctx_in = &skb,
.ctx_size_in = sizeof(skb),
};
- struct bpf_prog_load_attr attr = {
- .file = "./kfree_skb.o",
- };
-
- struct bpf_link *link = NULL, *link_fentry = NULL, *link_fexit = NULL;
- struct bpf_map *perf_buf_map, *global_data;
- struct bpf_program *prog, *fentry, *fexit;
- struct bpf_object *obj, *obj2 = NULL;
- struct perf_buffer_opts pb_opts = {};
+ struct kfree_skb *skel = NULL;
+ struct bpf_link *link;
+ struct bpf_object *obj;
struct perf_buffer *pb = NULL;
- int err, kfree_skb_fd;
+ int err;
bool passed = false;
__u32 duration = 0;
const int zero = 0;
bool test_ok[2];
- err = bpf_prog_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS,
+ err = bpf_prog_test_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS,
&obj, &tattr.prog_fd);
if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno))
return;
- err = bpf_prog_load_xattr(&attr, &obj2, &kfree_skb_fd);
- if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno))
- goto close_prog;
-
- prog = bpf_object__find_program_by_title(obj2, "tp_btf/kfree_skb");
- if (CHECK(!prog, "find_prog", "prog kfree_skb not found\n"))
- goto close_prog;
- fentry = bpf_object__find_program_by_title(obj2, "fentry/eth_type_trans");
- if (CHECK(!fentry, "find_prog", "prog eth_type_trans not found\n"))
- goto close_prog;
- fexit = bpf_object__find_program_by_title(obj2, "fexit/eth_type_trans");
- if (CHECK(!fexit, "find_prog", "prog eth_type_trans not found\n"))
- goto close_prog;
-
- global_data = bpf_object__find_map_by_name(obj2, ".bss");
- if (CHECK(!global_data, "find global data", "not found\n"))
+ skel = kfree_skb__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "kfree_skb_skel"))
goto close_prog;
- link = bpf_program__attach_raw_tracepoint(prog, NULL);
+ link = bpf_program__attach_raw_tracepoint(skel->progs.trace_kfree_skb, NULL);
if (!ASSERT_OK_PTR(link, "attach_raw_tp"))
goto close_prog;
- link_fentry = bpf_program__attach_trace(fentry);
- if (!ASSERT_OK_PTR(link_fentry, "attach fentry"))
- goto close_prog;
- link_fexit = bpf_program__attach_trace(fexit);
- if (!ASSERT_OK_PTR(link_fexit, "attach fexit"))
+ skel->links.trace_kfree_skb = link;
+
+ link = bpf_program__attach_trace(skel->progs.fentry_eth_type_trans);
+ if (!ASSERT_OK_PTR(link, "attach fentry"))
goto close_prog;
+ skel->links.fentry_eth_type_trans = link;
- perf_buf_map = bpf_object__find_map_by_name(obj2, "perf_buf_map");
- if (CHECK(!perf_buf_map, "find_perf_buf_map", "not found\n"))
+ link = bpf_program__attach_trace(skel->progs.fexit_eth_type_trans);
+ if (!ASSERT_OK_PTR(link, "attach fexit"))
goto close_prog;
+ skel->links.fexit_eth_type_trans = link;
/* set up perf buffer */
- pb_opts.sample_cb = on_sample;
- pb_opts.ctx = &passed;
- pb = perf_buffer__new(bpf_map__fd(perf_buf_map), 1, &pb_opts);
+ pb = perf_buffer__new(bpf_map__fd(skel->maps.perf_buf_map), 1,
+ on_sample, NULL, &passed, NULL);
if (!ASSERT_OK_PTR(pb, "perf_buf__new"))
goto close_prog;
@@ -135,7 +116,7 @@ void serial_test_kfree_skb(void)
*/
ASSERT_TRUE(passed, "passed");
- err = bpf_map_lookup_elem(bpf_map__fd(global_data), &zero, test_ok);
+ err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.bss), &zero, test_ok);
if (CHECK(err, "get_result",
"failed to get output data: %d\n", err))
goto close_prog;
@@ -143,9 +124,6 @@ void serial_test_kfree_skb(void)
CHECK_FAIL(!test_ok[0] || !test_ok[1]);
close_prog:
perf_buffer__free(pb);
- bpf_link__destroy(link);
- bpf_link__destroy(link_fentry);
- bpf_link__destroy(link_fexit);
bpf_object__close(obj);
- bpf_object__close(obj2);
+ kfree_skb__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
index 5c9c0176991b..7d7445ccc141 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
@@ -4,6 +4,7 @@
#include <network_helpers.h>
#include "kfunc_call_test.lskel.h"
#include "kfunc_call_test_subprog.skel.h"
+#include "kfunc_call_test_subprog.lskel.h"
static void test_main(void)
{
@@ -49,6 +50,26 @@ static void test_subprog(void)
kfunc_call_test_subprog__destroy(skel);
}
+static void test_subprog_lskel(void)
+{
+ struct kfunc_call_test_subprog_lskel *skel;
+ int prog_fd, retval, err;
+
+ skel = kfunc_call_test_subprog_lskel__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel"))
+ return;
+
+ prog_fd = skel->progs.kfunc_call_test1.prog_fd;
+ err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+ NULL, NULL, (__u32 *)&retval, NULL);
+ ASSERT_OK(err, "bpf_prog_test_run(test1)");
+ ASSERT_EQ(retval, 10, "test1-retval");
+ ASSERT_NEQ(skel->data->active_res, -1, "active_res");
+ ASSERT_EQ(skel->data->sk_state_res, BPF_TCP_CLOSE, "sk_state_res");
+
+ kfunc_call_test_subprog_lskel__destroy(skel);
+}
+
void test_kfunc_call(void)
{
if (test__start_subtest("main"))
@@ -56,4 +77,7 @@ void test_kfunc_call(void)
if (test__start_subtest("subprog"))
test_subprog();
+
+ if (test__start_subtest("subprog_lskel"))
+ test_subprog_lskel();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
index 79f6bd1e50d6..f6933b06daf8 100644
--- a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
@@ -8,6 +8,7 @@
#include "test_ksyms_btf_null_check.skel.h"
#include "test_ksyms_weak.skel.h"
#include "test_ksyms_weak.lskel.h"
+#include "test_ksyms_btf_write_check.skel.h"
static int duration;
@@ -137,6 +138,16 @@ cleanup:
test_ksyms_weak_lskel__destroy(skel);
}
+static void test_write_check(void)
+{
+ struct test_ksyms_btf_write_check *skel;
+
+ skel = test_ksyms_btf_write_check__open_and_load();
+ ASSERT_ERR_PTR(skel, "unexpected load of a prog writing to ksym memory\n");
+
+ test_ksyms_btf_write_check__destroy(skel);
+}
+
void test_ksyms_btf(void)
{
int percpu_datasec;
@@ -167,4 +178,7 @@ void test_ksyms_btf(void)
if (test__start_subtest("weak_ksyms_lskel"))
test_weak_syms_lskel();
+
+ if (test__start_subtest("write_check"))
+ test_write_check();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
index 8073105548ff..540ef28fabff 100644
--- a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
+++ b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
@@ -30,7 +30,7 @@ static void test_l4lb(const char *file)
char buf[128];
u32 *magic = (u32 *)buf;
- err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
if (CHECK_FAIL(err))
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/legacy_printk.c b/tools/testing/selftests/bpf/prog_tests/legacy_printk.c
new file mode 100644
index 000000000000..ec6e45f2a644
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/legacy_printk.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include "test_legacy_printk.skel.h"
+
+static int execute_one_variant(bool legacy)
+{
+ struct test_legacy_printk *skel;
+ int err, zero = 0, my_pid = getpid(), res, map_fd;
+
+ skel = test_legacy_printk__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return -errno;
+
+ bpf_program__set_autoload(skel->progs.handle_legacy, legacy);
+ bpf_program__set_autoload(skel->progs.handle_modern, !legacy);
+
+ err = test_legacy_printk__load(skel);
+ /* no ASSERT_OK, we expect one of two variants can fail here */
+ if (err)
+ goto err_out;
+
+ if (legacy) {
+ map_fd = bpf_map__fd(skel->maps.my_pid_map);
+ err = bpf_map_update_elem(map_fd, &zero, &my_pid, BPF_ANY);
+ if (!ASSERT_OK(err, "my_pid_map_update"))
+ goto err_out;
+ err = bpf_map_lookup_elem(map_fd, &zero, &res);
+ } else {
+ skel->bss->my_pid_var = my_pid;
+ }
+
+ err = test_legacy_printk__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto err_out;
+
+ usleep(1); /* trigger */
+
+ if (legacy) {
+ map_fd = bpf_map__fd(skel->maps.res_map);
+ err = bpf_map_lookup_elem(map_fd, &zero, &res);
+ if (!ASSERT_OK(err, "res_map_lookup"))
+ goto err_out;
+ } else {
+ res = skel->bss->res_var;
+ }
+
+ if (!ASSERT_GT(res, 0, "res")) {
+ err = -EINVAL;
+ goto err_out;
+ }
+
+err_out:
+ test_legacy_printk__destroy(skel);
+ return err;
+}
+
+void test_legacy_printk(void)
+{
+ /* legacy variant should work everywhere */
+ ASSERT_OK(execute_one_variant(true /* legacy */), "legacy_case");
+
+ /* execute modern variant, can fail the load on old kernels */
+ execute_one_variant(false);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c b/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c
new file mode 100644
index 000000000000..9f766ddd946a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2021 Facebook */
+
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+void test_libbpf_probe_prog_types(void)
+{
+ struct btf *btf;
+ const struct btf_type *t;
+ const struct btf_enum *e;
+ int i, n, id;
+
+ btf = btf__parse("/sys/kernel/btf/vmlinux", NULL);
+ if (!ASSERT_OK_PTR(btf, "btf_parse"))
+ return;
+
+ /* find enum bpf_prog_type and enumerate each value */
+ id = btf__find_by_name_kind(btf, "bpf_prog_type", BTF_KIND_ENUM);
+ if (!ASSERT_GT(id, 0, "bpf_prog_type_id"))
+ goto cleanup;
+ t = btf__type_by_id(btf, id);
+ if (!ASSERT_OK_PTR(t, "bpf_prog_type_enum"))
+ goto cleanup;
+
+ for (e = btf_enum(t), i = 0, n = btf_vlen(t); i < n; e++, i++) {
+ const char *prog_type_name = btf__str_by_offset(btf, e->name_off);
+ enum bpf_prog_type prog_type = (enum bpf_prog_type)e->val;
+ int res;
+
+ if (prog_type == BPF_PROG_TYPE_UNSPEC)
+ continue;
+
+ if (!test__start_subtest(prog_type_name))
+ continue;
+
+ res = libbpf_probe_bpf_prog_type(prog_type, NULL);
+ ASSERT_EQ(res, 1, prog_type_name);
+ }
+
+cleanup:
+ btf__free(btf);
+}
+
+void test_libbpf_probe_map_types(void)
+{
+ struct btf *btf;
+ const struct btf_type *t;
+ const struct btf_enum *e;
+ int i, n, id;
+
+ btf = btf__parse("/sys/kernel/btf/vmlinux", NULL);
+ if (!ASSERT_OK_PTR(btf, "btf_parse"))
+ return;
+
+ /* find enum bpf_map_type and enumerate each value */
+ id = btf__find_by_name_kind(btf, "bpf_map_type", BTF_KIND_ENUM);
+ if (!ASSERT_GT(id, 0, "bpf_map_type_id"))
+ goto cleanup;
+ t = btf__type_by_id(btf, id);
+ if (!ASSERT_OK_PTR(t, "bpf_map_type_enum"))
+ goto cleanup;
+
+ for (e = btf_enum(t), i = 0, n = btf_vlen(t); i < n; e++, i++) {
+ const char *map_type_name = btf__str_by_offset(btf, e->name_off);
+ enum bpf_map_type map_type = (enum bpf_map_type)e->val;
+ int res;
+
+ if (map_type == BPF_MAP_TYPE_UNSPEC)
+ continue;
+
+ if (!test__start_subtest(map_type_name))
+ continue;
+
+ res = libbpf_probe_bpf_map_type(map_type, NULL);
+ ASSERT_EQ(res, 1, map_type_name);
+ }
+
+cleanup:
+ btf__free(btf);
+}
+
+void test_libbpf_probe_helpers(void)
+{
+#define CASE(prog, helper, supp) { \
+ .prog_type_name = "BPF_PROG_TYPE_" # prog, \
+ .helper_name = "bpf_" # helper, \
+ .prog_type = BPF_PROG_TYPE_ ## prog, \
+ .helper_id = BPF_FUNC_ ## helper, \
+ .supported = supp, \
+}
+ const struct case_def {
+ const char *prog_type_name;
+ const char *helper_name;
+ enum bpf_prog_type prog_type;
+ enum bpf_func_id helper_id;
+ bool supported;
+ } cases[] = {
+ CASE(KPROBE, unspec, false),
+ CASE(KPROBE, map_lookup_elem, true),
+ CASE(KPROBE, loop, true),
+
+ CASE(KPROBE, ktime_get_coarse_ns, false),
+ CASE(SOCKET_FILTER, ktime_get_coarse_ns, true),
+
+ CASE(KPROBE, sys_bpf, false),
+ CASE(SYSCALL, sys_bpf, true),
+ };
+ size_t case_cnt = ARRAY_SIZE(cases), i;
+ char buf[128];
+
+ for (i = 0; i < case_cnt; i++) {
+ const struct case_def *d = &cases[i];
+ int res;
+
+ snprintf(buf, sizeof(buf), "%s+%s", d->prog_type_name, d->helper_name);
+
+ if (!test__start_subtest(buf))
+ continue;
+
+ res = libbpf_probe_bpf_helper(d->prog_type, d->helper_id, NULL);
+ ASSERT_EQ(res, d->supported, buf);
+ }
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c b/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c
index 5a2a689dbb68..4e0b2ec057aa 100644
--- a/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c
+++ b/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c
@@ -27,7 +27,7 @@ void test_load_bytes_relative(void)
if (CHECK_FAIL(server_fd < 0))
goto close_cgroup_fd;
- err = bpf_prog_load("./load_bytes_relative.o", BPF_PROG_TYPE_CGROUP_SKB,
+ err = bpf_prog_test_load("./load_bytes_relative.o", BPF_PROG_TYPE_CGROUP_SKB,
&obj, &prog_fd);
if (CHECK_FAIL(err))
goto close_server_fd;
diff --git a/tools/testing/selftests/bpf/prog_tests/log_buf.c b/tools/testing/selftests/bpf/prog_tests/log_buf.c
new file mode 100644
index 000000000000..e469b023962b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/log_buf.c
@@ -0,0 +1,276 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+#include "test_log_buf.skel.h"
+
+static size_t libbpf_log_pos;
+static char libbpf_log_buf[1024 * 1024];
+static bool libbpf_log_error;
+
+static int libbpf_print_cb(enum libbpf_print_level level, const char *fmt, va_list args)
+{
+ int emitted_cnt;
+ size_t left_cnt;
+
+ left_cnt = sizeof(libbpf_log_buf) - libbpf_log_pos;
+ emitted_cnt = vsnprintf(libbpf_log_buf + libbpf_log_pos, left_cnt, fmt, args);
+
+ if (emitted_cnt < 0 || emitted_cnt + 1 > left_cnt) {
+ libbpf_log_error = true;
+ return 0;
+ }
+
+ libbpf_log_pos += emitted_cnt;
+ return 0;
+}
+
+static void obj_load_log_buf(void)
+{
+ libbpf_print_fn_t old_print_cb = libbpf_set_print(libbpf_print_cb);
+ LIBBPF_OPTS(bpf_object_open_opts, opts);
+ const size_t log_buf_sz = 1024 * 1024;
+ struct test_log_buf* skel;
+ char *obj_log_buf, *good_log_buf, *bad_log_buf;
+ int err;
+
+ obj_log_buf = malloc(3 * log_buf_sz);
+ if (!ASSERT_OK_PTR(obj_log_buf, "obj_log_buf"))
+ return;
+
+ good_log_buf = obj_log_buf + log_buf_sz;
+ bad_log_buf = obj_log_buf + 2 * log_buf_sz;
+ obj_log_buf[0] = good_log_buf[0] = bad_log_buf[0] = '\0';
+
+ opts.kernel_log_buf = obj_log_buf;
+ opts.kernel_log_size = log_buf_sz;
+ opts.kernel_log_level = 4; /* for BTF this will turn into 1 */
+
+ /* In the first round every prog has its own log_buf, so libbpf logs
+ * don't have program failure logs
+ */
+ skel = test_log_buf__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ /* set very verbose level for good_prog so we always get detailed logs */
+ bpf_program__set_log_buf(skel->progs.good_prog, good_log_buf, log_buf_sz);
+ bpf_program__set_log_level(skel->progs.good_prog, 2);
+
+ bpf_program__set_log_buf(skel->progs.bad_prog, bad_log_buf, log_buf_sz);
+ /* log_level 0 with custom log_buf means that verbose logs are not
+ * requested if program load is successful, but libbpf should retry
+ * with log_level 1 on error and put program's verbose load log into
+ * custom log_buf
+ */
+ bpf_program__set_log_level(skel->progs.bad_prog, 0);
+
+ err = test_log_buf__load(skel);
+ if (!ASSERT_ERR(err, "unexpected_load_success"))
+ goto cleanup;
+
+ ASSERT_FALSE(libbpf_log_error, "libbpf_log_error");
+
+ /* there should be no prog loading log because we specified per-prog log buf */
+ ASSERT_NULL(strstr(libbpf_log_buf, "-- BEGIN PROG LOAD LOG --"), "unexp_libbpf_log");
+ ASSERT_OK_PTR(strstr(libbpf_log_buf, "prog 'bad_prog': BPF program load failed"),
+ "libbpf_log_not_empty");
+ ASSERT_OK_PTR(strstr(obj_log_buf, "DATASEC license"), "obj_log_not_empty");
+ ASSERT_OK_PTR(strstr(good_log_buf, "0: R1=ctx(id=0,off=0,imm=0) R10=fp0"),
+ "good_log_verbose");
+ ASSERT_OK_PTR(strstr(bad_log_buf, "invalid access to map value, value_size=16 off=16000 size=4"),
+ "bad_log_not_empty");
+
+ if (env.verbosity > VERBOSE_NONE) {
+ printf("LIBBPF LOG: \n=================\n%s=================\n", libbpf_log_buf);
+ printf("OBJ LOG: \n=================\n%s=================\n", obj_log_buf);
+ printf("GOOD_PROG LOG:\n=================\n%s=================\n", good_log_buf);
+ printf("BAD_PROG LOG:\n=================\n%s=================\n", bad_log_buf);
+ }
+
+ /* reset everything */
+ test_log_buf__destroy(skel);
+ obj_log_buf[0] = good_log_buf[0] = bad_log_buf[0] = '\0';
+ libbpf_log_buf[0] = '\0';
+ libbpf_log_pos = 0;
+ libbpf_log_error = false;
+
+ /* In the second round we let bad_prog's failure be logged through print callback */
+ opts.kernel_log_buf = NULL; /* let everything through into print callback */
+ opts.kernel_log_size = 0;
+ opts.kernel_log_level = 1;
+
+ skel = test_log_buf__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ /* set normal verbose level for good_prog to check log_level is taken into account */
+ bpf_program__set_log_buf(skel->progs.good_prog, good_log_buf, log_buf_sz);
+ bpf_program__set_log_level(skel->progs.good_prog, 1);
+
+ err = test_log_buf__load(skel);
+ if (!ASSERT_ERR(err, "unexpected_load_success"))
+ goto cleanup;
+
+ ASSERT_FALSE(libbpf_log_error, "libbpf_log_error");
+
+ /* this time prog loading error should be logged through print callback */
+ ASSERT_OK_PTR(strstr(libbpf_log_buf, "libbpf: prog 'bad_prog': -- BEGIN PROG LOAD LOG --"),
+ "libbpf_log_correct");
+ ASSERT_STREQ(obj_log_buf, "", "obj_log__empty");
+ ASSERT_STREQ(good_log_buf, "processed 4 insns (limit 1000000) max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0\n",
+ "good_log_ok");
+ ASSERT_STREQ(bad_log_buf, "", "bad_log_empty");
+
+ if (env.verbosity > VERBOSE_NONE) {
+ printf("LIBBPF LOG: \n=================\n%s=================\n", libbpf_log_buf);
+ printf("OBJ LOG: \n=================\n%s=================\n", obj_log_buf);
+ printf("GOOD_PROG LOG:\n=================\n%s=================\n", good_log_buf);
+ printf("BAD_PROG LOG:\n=================\n%s=================\n", bad_log_buf);
+ }
+
+cleanup:
+ free(obj_log_buf);
+ test_log_buf__destroy(skel);
+ libbpf_set_print(old_print_cb);
+}
+
+static void bpf_prog_load_log_buf(void)
+{
+ const struct bpf_insn good_prog_insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ const size_t good_prog_insn_cnt = sizeof(good_prog_insns) / sizeof(struct bpf_insn);
+ const struct bpf_insn bad_prog_insns[] = {
+ BPF_EXIT_INSN(),
+ };
+ size_t bad_prog_insn_cnt = sizeof(bad_prog_insns) / sizeof(struct bpf_insn);
+ LIBBPF_OPTS(bpf_prog_load_opts, opts);
+ const size_t log_buf_sz = 1024 * 1024;
+ char *log_buf;
+ int fd = -1;
+
+ log_buf = malloc(log_buf_sz);
+ if (!ASSERT_OK_PTR(log_buf, "log_buf_alloc"))
+ return;
+ opts.log_buf = log_buf;
+ opts.log_size = log_buf_sz;
+
+ /* with log_level == 0 log_buf shoud stay empty for good prog */
+ log_buf[0] = '\0';
+ opts.log_level = 0;
+ fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "good_prog", "GPL",
+ good_prog_insns, good_prog_insn_cnt, &opts);
+ ASSERT_STREQ(log_buf, "", "good_log_0");
+ ASSERT_GE(fd, 0, "good_fd1");
+ if (fd >= 0)
+ close(fd);
+ fd = -1;
+
+ /* log_level == 2 should always fill log_buf, even for good prog */
+ log_buf[0] = '\0';
+ opts.log_level = 2;
+ fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "good_prog", "GPL",
+ good_prog_insns, good_prog_insn_cnt, &opts);
+ ASSERT_OK_PTR(strstr(log_buf, "0: R1=ctx(id=0,off=0,imm=0) R10=fp0"), "good_log_2");
+ ASSERT_GE(fd, 0, "good_fd2");
+ if (fd >= 0)
+ close(fd);
+ fd = -1;
+
+ /* log_level == 0 should fill log_buf for bad prog */
+ log_buf[0] = '\0';
+ opts.log_level = 0;
+ fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "bad_prog", "GPL",
+ bad_prog_insns, bad_prog_insn_cnt, &opts);
+ ASSERT_OK_PTR(strstr(log_buf, "R0 !read_ok"), "bad_log_0");
+ ASSERT_LT(fd, 0, "bad_fd");
+ if (fd >= 0)
+ close(fd);
+ fd = -1;
+
+ free(log_buf);
+}
+
+static void bpf_btf_load_log_buf(void)
+{
+ LIBBPF_OPTS(bpf_btf_load_opts, opts);
+ const size_t log_buf_sz = 1024 * 1024;
+ const void *raw_btf_data;
+ __u32 raw_btf_size;
+ struct btf *btf;
+ char *log_buf;
+ int fd = -1;
+
+ btf = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf, "empty_btf"))
+ return;
+
+ ASSERT_GT(btf__add_int(btf, "int", 4, 0), 0, "int_type");
+
+ raw_btf_data = btf__raw_data(btf, &raw_btf_size);
+ if (!ASSERT_OK_PTR(raw_btf_data, "raw_btf_data_good"))
+ goto cleanup;
+
+ log_buf = malloc(log_buf_sz);
+ if (!ASSERT_OK_PTR(log_buf, "log_buf_alloc"))
+ goto cleanup;
+ opts.log_buf = log_buf;
+ opts.log_size = log_buf_sz;
+
+ /* with log_level == 0 log_buf shoud stay empty for good BTF */
+ log_buf[0] = '\0';
+ opts.log_level = 0;
+ fd = bpf_btf_load(raw_btf_data, raw_btf_size, &opts);
+ ASSERT_STREQ(log_buf, "", "good_log_0");
+ ASSERT_GE(fd, 0, "good_fd1");
+ if (fd >= 0)
+ close(fd);
+ fd = -1;
+
+ /* log_level == 2 should always fill log_buf, even for good BTF */
+ log_buf[0] = '\0';
+ opts.log_level = 2;
+ fd = bpf_btf_load(raw_btf_data, raw_btf_size, &opts);
+ printf("LOG_BUF: %s\n", log_buf);
+ ASSERT_OK_PTR(strstr(log_buf, "magic: 0xeb9f"), "good_log_2");
+ ASSERT_GE(fd, 0, "good_fd2");
+ if (fd >= 0)
+ close(fd);
+ fd = -1;
+
+ /* make BTF bad, add pointer pointing to non-existing type */
+ ASSERT_GT(btf__add_ptr(btf, 100), 0, "bad_ptr_type");
+
+ raw_btf_data = btf__raw_data(btf, &raw_btf_size);
+ if (!ASSERT_OK_PTR(raw_btf_data, "raw_btf_data_bad"))
+ goto cleanup;
+
+ /* log_level == 0 should fill log_buf for bad BTF */
+ log_buf[0] = '\0';
+ opts.log_level = 0;
+ fd = bpf_btf_load(raw_btf_data, raw_btf_size, &opts);
+ printf("LOG_BUF: %s\n", log_buf);
+ ASSERT_OK_PTR(strstr(log_buf, "[2] PTR (anon) type_id=100 Invalid type_id"), "bad_log_0");
+ ASSERT_LT(fd, 0, "bad_fd");
+ if (fd >= 0)
+ close(fd);
+ fd = -1;
+
+cleanup:
+ free(log_buf);
+ btf__free(btf);
+}
+
+void test_log_buf(void)
+{
+ if (test__start_subtest("obj_load_log_buf"))
+ obj_load_log_buf();
+ if (test__start_subtest("bpf_prog_load_log_buf"))
+ bpf_prog_load_log_buf();
+ if (test__start_subtest("bpf_btf_load_log_buf"))
+ bpf_btf_load_log_buf();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/map_lock.c b/tools/testing/selftests/bpf/prog_tests/map_lock.c
index ce17b1ed8709..23d19e9cf26a 100644
--- a/tools/testing/selftests/bpf/prog_tests/map_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/map_lock.c
@@ -53,9 +53,9 @@ void test_map_lock(void)
int err = 0, key = 0, i;
void *ret;
- err = bpf_prog_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd);
if (CHECK_FAIL(err)) {
- printf("test_map_lock:bpf_prog_load errno %d\n", errno);
+ printf("test_map_lock:bpf_prog_test_load errno %d\n", errno);
goto close_prog;
}
map_fd[0] = bpf_find_map(__func__, obj, "hash_map");
diff --git a/tools/testing/selftests/bpf/prog_tests/map_ptr.c b/tools/testing/selftests/bpf/prog_tests/map_ptr.c
index 4972f92205c7..273725504f11 100644
--- a/tools/testing/selftests/bpf/prog_tests/map_ptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/map_ptr.c
@@ -4,31 +4,29 @@
#include <test_progs.h>
#include <network_helpers.h>
-#include "map_ptr_kern.skel.h"
+#include "map_ptr_kern.lskel.h"
void test_map_ptr(void)
{
- struct map_ptr_kern *skel;
+ struct map_ptr_kern_lskel *skel;
__u32 duration = 0, retval;
char buf[128];
int err;
int page_size = getpagesize();
- skel = map_ptr_kern__open();
+ skel = map_ptr_kern_lskel__open();
if (!ASSERT_OK_PTR(skel, "skel_open"))
return;
- err = bpf_map__set_max_entries(skel->maps.m_ringbuf, page_size);
- if (!ASSERT_OK(err, "bpf_map__set_max_entries"))
- goto cleanup;
+ skel->maps.m_ringbuf.max_entries = page_size;
- err = map_ptr_kern__load(skel);
+ err = map_ptr_kern_lskel__load(skel);
if (!ASSERT_OK(err, "skel_load"))
goto cleanup;
skel->bss->page_size = page_size;
- err = bpf_prog_test_run(bpf_program__fd(skel->progs.cg_skb), 1, &pkt_v4,
+ err = bpf_prog_test_run(skel->progs.cg_skb.prog_fd, 1, &pkt_v4,
sizeof(pkt_v4), buf, NULL, &retval, NULL);
if (CHECK(err, "test_run", "err=%d errno=%d\n", err, errno))
@@ -39,5 +37,5 @@ void test_map_ptr(void)
goto cleanup;
cleanup:
- map_ptr_kern__destroy(skel);
+ map_ptr_kern_lskel__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c b/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c
index 7589c03fd26b..eb2feaac81fe 100644
--- a/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c
+++ b/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c
@@ -204,8 +204,8 @@ static int pass_ack(struct migrate_reuseport_test_case *test_case)
{
int err;
- err = bpf_link__detach(test_case->link);
- if (!ASSERT_OK(err, "bpf_link__detach"))
+ err = bpf_link__destroy(test_case->link);
+ if (!ASSERT_OK(err, "bpf_link__destroy"))
return -1;
test_case->link = NULL;
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
index 4e32f3586a75..5fc2b3a0711e 100644
--- a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
+++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
@@ -47,7 +47,6 @@ void serial_test_perf_buffer(void)
{
int err, on_len, nr_on_cpus = 0, nr_cpus, i, j;
int zero = 0, my_pid = getpid();
- struct perf_buffer_opts pb_opts = {};
struct test_perf_buffer *skel;
cpu_set_t cpu_seen;
struct perf_buffer *pb;
@@ -82,9 +81,8 @@ void serial_test_perf_buffer(void)
goto out_close;
/* set up perf buffer */
- pb_opts.sample_cb = on_sample;
- pb_opts.ctx = &cpu_seen;
- pb = perf_buffer__new(bpf_map__fd(skel->maps.perf_buf_map), 1, &pb_opts);
+ pb = perf_buffer__new(bpf_map__fd(skel->maps.perf_buf_map), 1,
+ on_sample, NULL, &cpu_seen, NULL);
if (!ASSERT_OK_PTR(pb, "perf_buf__new"))
goto out_close;
diff --git a/tools/testing/selftests/bpf/prog_tests/pinning.c b/tools/testing/selftests/bpf/prog_tests/pinning.c
index d4b953ae3407..31c09ba577eb 100644
--- a/tools/testing/selftests/bpf/prog_tests/pinning.c
+++ b/tools/testing/selftests/bpf/prog_tests/pinning.c
@@ -241,8 +241,8 @@ void test_pinning(void)
goto out;
}
- map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(__u32),
- sizeof(__u64), 1, 0);
+ map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(__u32),
+ sizeof(__u64), 1, NULL);
if (CHECK(map_fd < 0, "create pinmap manually", "fd %d\n", map_fd))
goto out;
diff --git a/tools/testing/selftests/bpf/prog_tests/pkt_access.c b/tools/testing/selftests/bpf/prog_tests/pkt_access.c
index 44b514fabccd..6628710ec3c6 100644
--- a/tools/testing/selftests/bpf/prog_tests/pkt_access.c
+++ b/tools/testing/selftests/bpf/prog_tests/pkt_access.c
@@ -9,7 +9,7 @@ void test_pkt_access(void)
__u32 duration, retval;
int err, prog_fd;
- err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
if (CHECK_FAIL(err))
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c b/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c
index 939015cd6dba..c9d2d6a1bfcc 100644
--- a/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c
+++ b/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c
@@ -9,7 +9,7 @@ void test_pkt_md_access(void)
__u32 duration, retval;
int err, prog_fd;
- err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
if (CHECK_FAIL(err))
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/prog_array_init.c b/tools/testing/selftests/bpf/prog_tests/prog_array_init.c
new file mode 100644
index 000000000000..fc4657619739
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/prog_array_init.c
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2021 Hengqi Chen */
+
+#include <test_progs.h>
+#include "test_prog_array_init.skel.h"
+
+void test_prog_array_init(void)
+{
+ struct test_prog_array_init *skel;
+ int err;
+
+ skel = test_prog_array_init__open();
+ if (!ASSERT_OK_PTR(skel, "could not open BPF object"))
+ return;
+
+ skel->rodata->my_pid = getpid();
+
+ err = test_prog_array_init__load(skel);
+ if (!ASSERT_OK(err, "could not load BPF object"))
+ goto cleanup;
+
+ skel->links.entry = bpf_program__attach_raw_tracepoint(skel->progs.entry, "sys_enter");
+ if (!ASSERT_OK_PTR(skel->links.entry, "could not attach BPF program"))
+ goto cleanup;
+
+ usleep(1);
+
+ ASSERT_EQ(skel->bss->value, 42, "unexpected value");
+
+cleanup:
+ test_prog_array_init__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
index f47e7b1cb32c..b9822f914eeb 100644
--- a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
@@ -14,7 +14,7 @@ static void test_queue_stack_map_by_type(int type)
int i, err, prog_fd, map_in_fd, map_out_fd;
char file[32], buf[128];
struct bpf_object *obj;
- struct iphdr *iph = (void *)buf + sizeof(struct ethhdr);
+ struct iphdr iph;
/* Fill test values to be used */
for (i = 0; i < MAP_SIZE; i++)
@@ -27,7 +27,7 @@ static void test_queue_stack_map_by_type(int type)
else
return;
- err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
if (CHECK_FAIL(err))
return;
@@ -60,15 +60,17 @@ static void test_queue_stack_map_by_type(int type)
err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
buf, &size, &retval, &duration);
- if (err || retval || size != sizeof(pkt_v4) ||
- iph->daddr != val)
+ if (err || retval || size != sizeof(pkt_v4))
+ break;
+ memcpy(&iph, buf + sizeof(struct ethhdr), sizeof(iph));
+ if (iph.daddr != val)
break;
}
- CHECK(err || retval || size != sizeof(pkt_v4) || iph->daddr != val,
+ CHECK(err || retval || size != sizeof(pkt_v4) || iph.daddr != val,
"bpf_map_pop_elem",
"err %d errno %d retval %d size %d iph->daddr %u\n",
- err, errno, retval, size, iph->daddr);
+ err, errno, retval, size, iph.daddr);
/* Queue is empty, program should return TC_ACT_SHOT */
err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_reject_nbd_invalid.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_reject_nbd_invalid.c
index 9807336a3016..e2f1445b0e10 100644
--- a/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_reject_nbd_invalid.c
+++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_reject_nbd_invalid.c
@@ -18,15 +18,15 @@ void test_raw_tp_writable_reject_nbd_invalid(void)
BPF_EXIT_INSN(),
};
- struct bpf_load_program_attr load_attr = {
- .prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
- .license = "GPL v2",
- .insns = program,
- .insns_cnt = sizeof(program) / sizeof(struct bpf_insn),
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
.log_level = 2,
- };
+ .log_buf = error,
+ .log_size = sizeof(error),
+ );
- bpf_fd = bpf_load_program_xattr(&load_attr, error, sizeof(error));
+ bpf_fd = bpf_prog_load(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, NULL, "GPL v2",
+ program, sizeof(program) / sizeof(struct bpf_insn),
+ &opts);
if (CHECK(bpf_fd < 0, "bpf_raw_tracepoint_writable load",
"failed: %d errno %d\n", bpf_fd, errno))
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c
index ddefa1192e5d..239baccabccb 100644
--- a/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c
+++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c
@@ -17,15 +17,15 @@ void serial_test_raw_tp_writable_test_run(void)
BPF_EXIT_INSN(),
};
- struct bpf_load_program_attr load_attr = {
- .prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
- .license = "GPL v2",
- .insns = trace_program,
- .insns_cnt = sizeof(trace_program) / sizeof(struct bpf_insn),
+ LIBBPF_OPTS(bpf_prog_load_opts, trace_opts,
.log_level = 2,
- };
+ .log_buf = error,
+ .log_size = sizeof(error),
+ );
- int bpf_fd = bpf_load_program_xattr(&load_attr, error, sizeof(error));
+ int bpf_fd = bpf_prog_load(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, NULL, "GPL v2",
+ trace_program, sizeof(trace_program) / sizeof(struct bpf_insn),
+ &trace_opts);
if (CHECK(bpf_fd < 0, "bpf_raw_tracepoint_writable loaded",
"failed: %d errno %d\n", bpf_fd, errno))
return;
@@ -35,15 +35,14 @@ void serial_test_raw_tp_writable_test_run(void)
BPF_EXIT_INSN(),
};
- struct bpf_load_program_attr skb_load_attr = {
- .prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
- .license = "GPL v2",
- .insns = skb_program,
- .insns_cnt = sizeof(skb_program) / sizeof(struct bpf_insn),
- };
+ LIBBPF_OPTS(bpf_prog_load_opts, skb_opts,
+ .log_buf = error,
+ .log_size = sizeof(error),
+ );
- int filter_fd =
- bpf_load_program_xattr(&skb_load_attr, error, sizeof(error));
+ int filter_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL v2",
+ skb_program, sizeof(skb_program) / sizeof(struct bpf_insn),
+ &skb_opts);
if (CHECK(filter_fd < 0, "test_program_loaded", "failed: %d errno %d\n",
filter_fd, errno))
goto out_bpffd;
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
index 167cd8a2edfd..e945195b24c9 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
@@ -62,8 +62,8 @@ void test_ringbuf_multi(void)
if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n"))
goto cleanup;
- proto_fd = bpf_create_map(BPF_MAP_TYPE_RINGBUF, 0, 0, page_size, 0);
- if (CHECK(proto_fd < 0, "bpf_create_map", "bpf_create_map failed\n"))
+ proto_fd = bpf_map_create(BPF_MAP_TYPE_RINGBUF, NULL, 0, 0, page_size, NULL);
+ if (CHECK(proto_fd < 0, "bpf_map_create", "bpf_map_create failed\n"))
goto cleanup;
err = bpf_map__set_inner_map_fd(skel->maps.ringbuf_hash, proto_fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
index 3cfc910ab3c1..1cbd8cd64044 100644
--- a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
+++ b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
@@ -18,7 +18,6 @@
#include <netinet/in.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
-#include "bpf_rlimit.h"
#include "bpf_util.h"
#include "test_progs.h"
@@ -66,29 +65,20 @@ static union sa46 {
static int create_maps(enum bpf_map_type inner_type)
{
- struct bpf_create_map_attr attr = {};
+ LIBBPF_OPTS(bpf_map_create_opts, opts);
inner_map_type = inner_type;
/* Creating reuseport_array */
- attr.name = "reuseport_array";
- attr.map_type = inner_type;
- attr.key_size = sizeof(__u32);
- attr.value_size = sizeof(__u32);
- attr.max_entries = REUSEPORT_ARRAY_SIZE;
-
- reuseport_array = bpf_create_map_xattr(&attr);
+ reuseport_array = bpf_map_create(inner_type, "reuseport_array",
+ sizeof(__u32), sizeof(__u32), REUSEPORT_ARRAY_SIZE, NULL);
RET_ERR(reuseport_array < 0, "creating reuseport_array",
"reuseport_array:%d errno:%d\n", reuseport_array, errno);
/* Creating outer_map */
- attr.name = "outer_map";
- attr.map_type = BPF_MAP_TYPE_ARRAY_OF_MAPS;
- attr.key_size = sizeof(__u32);
- attr.value_size = sizeof(__u32);
- attr.max_entries = 1;
- attr.inner_map_fd = reuseport_array;
- outer_map = bpf_create_map_xattr(&attr);
+ opts.inner_map_fd = reuseport_array;
+ outer_map = bpf_map_create(BPF_MAP_TYPE_ARRAY_OF_MAPS, "outer_map",
+ sizeof(__u32), sizeof(__u32), 1, &opts);
RET_ERR(outer_map < 0, "creating outer_map",
"outer_map:%d errno:%d\n", outer_map, errno);
diff --git a/tools/testing/selftests/bpf/prog_tests/signal_pending.c b/tools/testing/selftests/bpf/prog_tests/signal_pending.c
index fdfdcff6cbef..aecfe662c070 100644
--- a/tools/testing/selftests/bpf/prog_tests/signal_pending.c
+++ b/tools/testing/selftests/bpf/prog_tests/signal_pending.c
@@ -22,7 +22,7 @@ static void test_signal_pending_by_type(enum bpf_prog_type prog_type)
prog[i] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0);
prog[ARRAY_SIZE(prog) - 1] = BPF_EXIT_INSN();
- prog_fd = bpf_load_program(prog_type, prog, ARRAY_SIZE(prog),
+ prog_fd = bpf_test_load_program(prog_type, prog, ARRAY_SIZE(prog),
"GPL", 0, NULL, 0);
CHECK(prog_fd < 0, "test-run", "errno %d\n", errno);
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
index 6db07401bc49..597d0467a926 100644
--- a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
+++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
@@ -30,7 +30,6 @@
#include <bpf/bpf.h>
#include "test_progs.h"
-#include "bpf_rlimit.h"
#include "bpf_util.h"
#include "cgroup_helpers.h"
#include "network_helpers.h"
@@ -937,6 +936,37 @@ static void test_drop_on_lookup(struct test_sk_lookup *skel)
.connect_to = { EXT_IP6, EXT_PORT },
.listen_at = { EXT_IP6, INT_PORT },
},
+ /* The program will drop on success, meaning that the ifindex
+ * was 1.
+ */
+ {
+ .desc = "TCP IPv4 drop on valid ifindex",
+ .lookup_prog = skel->progs.check_ifindex,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { EXT_IP4, EXT_PORT },
+ },
+ {
+ .desc = "TCP IPv6 drop on valid ifindex",
+ .lookup_prog = skel->progs.check_ifindex,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { EXT_IP6, EXT_PORT },
+ },
+ {
+ .desc = "UDP IPv4 drop on valid ifindex",
+ .lookup_prog = skel->progs.check_ifindex,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { EXT_IP4, EXT_PORT },
+ },
+ {
+ .desc = "UDP IPv6 drop on valid ifindex",
+ .lookup_prog = skel->progs.check_ifindex,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { EXT_IP6, EXT_PORT },
+ },
};
const struct test *t;
diff --git a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
index c437e6ba8fe2..b5319ba2ee27 100644
--- a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
+++ b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
@@ -32,7 +32,7 @@ void test_skb_ctx(void)
int err;
int i;
- err = bpf_prog_load("./test_skb_ctx.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
+ err = bpf_prog_test_load("./test_skb_ctx.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
&tattr.prog_fd);
if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno))
return;
@@ -111,4 +111,6 @@ void test_skb_ctx(void)
"ctx_out_mark",
"skb->mark == %u, expected %d\n",
skb.mark, 10);
+
+ bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/skb_helpers.c b/tools/testing/selftests/bpf/prog_tests/skb_helpers.c
index f302ad84a298..6f802a1c0800 100644
--- a/tools/testing/selftests/bpf/prog_tests/skb_helpers.c
+++ b/tools/testing/selftests/bpf/prog_tests/skb_helpers.c
@@ -20,7 +20,7 @@ void test_skb_helpers(void)
struct bpf_object *obj;
int err;
- err = bpf_prog_load("./test_skb_helpers.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
+ err = bpf_prog_test_load("./test_skb_helpers.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
&tattr.prog_fd);
if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno))
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/sock_fields.c b/tools/testing/selftests/bpf/prog_tests/sock_fields.c
index fae40db4d81f..9fc040eaa482 100644
--- a/tools/testing/selftests/bpf/prog_tests/sock_fields.c
+++ b/tools/testing/selftests/bpf/prog_tests/sock_fields.c
@@ -15,7 +15,6 @@
#include "network_helpers.h"
#include "cgroup_helpers.h"
#include "test_progs.h"
-#include "bpf_rlimit.h"
#include "test_sock_fields.skel.h"
enum bpf_linum_array_idx {
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index 1352ec104149..85db0f4cdd95 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -91,9 +91,9 @@ static void test_sockmap_create_update_free(enum bpf_map_type map_type)
if (CHECK_FAIL(s < 0))
return;
- map = bpf_create_map(map_type, sizeof(int), sizeof(int), 1, 0);
+ map = bpf_map_create(map_type, NULL, sizeof(int), sizeof(int), 1, NULL);
if (CHECK_FAIL(map < 0)) {
- perror("bpf_create_map");
+ perror("bpf_cmap_create");
goto out;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
index 7a0d64fdc192..af293ea1542c 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
@@ -97,7 +97,7 @@ static void run_tests(int family, enum bpf_map_type map_type)
char test_name[MAX_TEST_NAME];
int map;
- map = bpf_create_map(map_type, sizeof(int), sizeof(int), 1, 0);
+ map = bpf_map_create(map_type, NULL, sizeof(int), sizeof(int), 1, NULL);
if (CHECK_FAIL(map < 0)) {
perror("bpf_map_create");
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index 2a9cb951bfd6..7e21bfab6358 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -502,8 +502,8 @@ static void test_lookup_32_bit_value(int family, int sotype, int mapfd)
if (s < 0)
return;
- mapfd = bpf_create_map(BPF_MAP_TYPE_SOCKMAP, sizeof(key),
- sizeof(value32), 1, 0);
+ mapfd = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL, sizeof(key),
+ sizeof(value32), 1, NULL);
if (mapfd < 0) {
FAIL_ERRNO("map_create");
goto close;
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt.c b/tools/testing/selftests/bpf/prog_tests/sockopt.c
index 3e8517a8395a..cd09f4c7dd92 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt.c
@@ -852,22 +852,21 @@ static struct sockopt_test {
static int load_prog(const struct bpf_insn *insns,
enum bpf_attach_type expected_attach_type)
{
- struct bpf_load_program_attr attr = {
- .prog_type = BPF_PROG_TYPE_CGROUP_SOCKOPT,
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
.expected_attach_type = expected_attach_type,
- .insns = insns,
- .license = "GPL",
.log_level = 2,
- };
- int fd;
+ .log_buf = bpf_log_buf,
+ .log_size = sizeof(bpf_log_buf),
+ );
+ int fd, insns_cnt = 0;
for (;
- insns[attr.insns_cnt].code != (BPF_JMP | BPF_EXIT);
- attr.insns_cnt++) {
+ insns[insns_cnt].code != (BPF_JMP | BPF_EXIT);
+ insns_cnt++) {
}
- attr.insns_cnt++;
+ insns_cnt++;
- fd = bpf_load_program_xattr(&attr, bpf_log_buf, sizeof(bpf_log_buf));
+ fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCKOPT, NULL, "GPL", insns, insns_cnt, &opts);
if (verbose && fd < 0)
fprintf(stderr, "%s\n", bpf_log_buf);
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
index 86f97681ad89..8ed78a9383ba 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
@@ -136,7 +136,8 @@ static int start_server(void)
return fd;
}
-static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title)
+static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title,
+ const char *prog_name)
{
enum bpf_attach_type attach_type;
enum bpf_prog_type prog_type;
@@ -145,20 +146,20 @@ static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title)
err = libbpf_prog_type_by_name(title, &prog_type, &attach_type);
if (err) {
- log_err("Failed to deduct types for %s BPF program", title);
+ log_err("Failed to deduct types for %s BPF program", prog_name);
return -1;
}
- prog = bpf_object__find_program_by_title(obj, title);
+ prog = bpf_object__find_program_by_name(obj, prog_name);
if (!prog) {
- log_err("Failed to find %s BPF program", title);
+ log_err("Failed to find %s BPF program", prog_name);
return -1;
}
err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd,
attach_type, 0);
if (err) {
- log_err("Failed to attach %s BPF program", title);
+ log_err("Failed to attach %s BPF program", prog_name);
return -1;
}
@@ -167,25 +168,25 @@ static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title)
static void run_test(int cgroup_fd)
{
- struct bpf_prog_load_attr attr = {
- .file = "./sockopt_inherit.o",
- };
int server_fd = -1, client_fd;
struct bpf_object *obj;
void *server_err;
pthread_t tid;
- int ignored;
int err;
- err = bpf_prog_load_xattr(&attr, &obj, &ignored);
- if (CHECK_FAIL(err))
+ obj = bpf_object__open_file("sockopt_inherit.o", NULL);
+ if (!ASSERT_OK_PTR(obj, "obj_open"))
return;
- err = prog_attach(obj, cgroup_fd, "cgroup/getsockopt");
+ err = bpf_object__load(obj);
+ if (!ASSERT_OK(err, "obj_load"))
+ goto close_bpf_object;
+
+ err = prog_attach(obj, cgroup_fd, "cgroup/getsockopt", "_getsockopt");
if (CHECK_FAIL(err))
goto close_bpf_object;
- err = prog_attach(obj, cgroup_fd, "cgroup/setsockopt");
+ err = prog_attach(obj, cgroup_fd, "cgroup/setsockopt", "_setsockopt");
if (CHECK_FAIL(err))
goto close_bpf_object;
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c b/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c
index bc34f7773444..abce12ddcc37 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c
@@ -297,14 +297,10 @@ detach:
void test_sockopt_multi(void)
{
- struct bpf_prog_load_attr attr = {
- .file = "./sockopt_multi.o",
- };
int cg_parent = -1, cg_child = -1;
struct bpf_object *obj = NULL;
int sock_fd = -1;
int err = -1;
- int ignored;
cg_parent = test__join_cgroup("/parent");
if (CHECK_FAIL(cg_parent < 0))
@@ -314,8 +310,12 @@ void test_sockopt_multi(void)
if (CHECK_FAIL(cg_child < 0))
goto out;
- err = bpf_prog_load_xattr(&attr, &obj, &ignored);
- if (CHECK_FAIL(err))
+ obj = bpf_object__open_file("sockopt_multi.o", NULL);
+ if (!ASSERT_OK_PTR(obj, "obj_load"))
+ goto out;
+
+ err = bpf_object__load(obj);
+ if (!ASSERT_OK(err, "obj_load"))
goto out;
sock_fd = socket(AF_INET, SOCK_STREAM, 0);
diff --git a/tools/testing/selftests/bpf/prog_tests/spinlock.c b/tools/testing/selftests/bpf/prog_tests/spinlock.c
index 7577a77a4c4c..6307f5d2b417 100644
--- a/tools/testing/selftests/bpf/prog_tests/spinlock.c
+++ b/tools/testing/selftests/bpf/prog_tests/spinlock.c
@@ -24,9 +24,9 @@ void test_spinlock(void)
int err = 0, i;
void *ret;
- err = bpf_prog_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd);
if (CHECK_FAIL(err)) {
- printf("test_spin_lock:bpf_prog_load errno %d\n", errno);
+ printf("test_spin_lock:bpf_prog_test_load errno %d\n", errno);
goto close_prog;
}
for (i = 0; i < 4; i++)
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
index 04b476bd62b9..313f0a66232e 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
@@ -4,7 +4,7 @@
void test_stacktrace_map(void)
{
int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
- const char *prog_name = "tracepoint/sched/sched_switch";
+ const char *prog_name = "oncpu";
int err, prog_fd, stack_trace_len;
const char *file = "./test_stacktrace_map.o";
__u32 key, val, duration = 0;
@@ -12,11 +12,11 @@ void test_stacktrace_map(void)
struct bpf_object *obj;
struct bpf_link *link;
- err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
return;
- prog = bpf_object__find_program_by_title(obj, prog_name);
+ prog = bpf_object__find_program_by_name(obj, prog_name);
if (CHECK(!prog, "find_prog", "prog '%s' not found\n", prog_name))
goto close_prog;
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
index 4fd30bb651ad..1cb8dd36bd8f 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
@@ -3,7 +3,7 @@
void test_stacktrace_map_raw_tp(void)
{
- const char *prog_name = "tracepoint/sched/sched_switch";
+ const char *prog_name = "oncpu";
int control_map_fd, stackid_hmap_fd, stackmap_fd;
const char *file = "./test_stacktrace_map.o";
__u32 key, val, duration = 0;
@@ -12,11 +12,11 @@ void test_stacktrace_map_raw_tp(void)
struct bpf_object *obj;
struct bpf_link *link = NULL;
- err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno))
return;
- prog = bpf_object__find_program_by_title(obj, prog_name);
+ prog = bpf_object__find_program_by_name(obj, prog_name);
if (CHECK(!prog, "find_prog", "prog '%s' not found\n", prog_name))
goto close_prog;
diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
index 9825f1f7bfcc..5dc0f425bd11 100644
--- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c
+++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
@@ -16,7 +16,7 @@ static void test_tailcall_1(void)
char prog_name[32];
char buff[128] = {};
- err = bpf_prog_load("tailcall1.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
+ err = bpf_prog_test_load("tailcall1.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
&prog_fd);
if (CHECK_FAIL(err))
return;
@@ -154,7 +154,7 @@ static void test_tailcall_2(void)
char prog_name[32];
char buff[128] = {};
- err = bpf_prog_load("tailcall2.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
+ err = bpf_prog_test_load("tailcall2.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
&prog_fd);
if (CHECK_FAIL(err))
return;
@@ -228,7 +228,7 @@ static void test_tailcall_count(const char *which)
__u32 retval, duration;
char buff[128] = {};
- err = bpf_prog_load(which, BPF_PROG_TYPE_SCHED_CLS, &obj,
+ err = bpf_prog_test_load(which, BPF_PROG_TYPE_SCHED_CLS, &obj,
&prog_fd);
if (CHECK_FAIL(err))
return;
@@ -324,7 +324,7 @@ static void test_tailcall_4(void)
char buff[128] = {};
char prog_name[32];
- err = bpf_prog_load("tailcall4.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
+ err = bpf_prog_test_load("tailcall4.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
&prog_fd);
if (CHECK_FAIL(err))
return;
@@ -412,7 +412,7 @@ static void test_tailcall_5(void)
char buff[128] = {};
char prog_name[32];
- err = bpf_prog_load("tailcall5.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
+ err = bpf_prog_test_load("tailcall5.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
&prog_fd);
if (CHECK_FAIL(err))
return;
@@ -498,7 +498,7 @@ static void test_tailcall_bpf2bpf_1(void)
__u32 retval, duration;
char prog_name[32];
- err = bpf_prog_load("tailcall_bpf2bpf1.o", BPF_PROG_TYPE_SCHED_CLS,
+ err = bpf_prog_test_load("tailcall_bpf2bpf1.o", BPF_PROG_TYPE_SCHED_CLS,
&obj, &prog_fd);
if (CHECK_FAIL(err))
return;
@@ -582,7 +582,7 @@ static void test_tailcall_bpf2bpf_2(void)
__u32 retval, duration;
char buff[128] = {};
- err = bpf_prog_load("tailcall_bpf2bpf2.o", BPF_PROG_TYPE_SCHED_CLS,
+ err = bpf_prog_test_load("tailcall_bpf2bpf2.o", BPF_PROG_TYPE_SCHED_CLS,
&obj, &prog_fd);
if (CHECK_FAIL(err))
return;
@@ -660,7 +660,7 @@ static void test_tailcall_bpf2bpf_3(void)
__u32 retval, duration;
char prog_name[32];
- err = bpf_prog_load("tailcall_bpf2bpf3.o", BPF_PROG_TYPE_SCHED_CLS,
+ err = bpf_prog_test_load("tailcall_bpf2bpf3.o", BPF_PROG_TYPE_SCHED_CLS,
&obj, &prog_fd);
if (CHECK_FAIL(err))
return;
@@ -757,7 +757,7 @@ static void test_tailcall_bpf2bpf_4(bool noise)
__u32 retval, duration;
char prog_name[32];
- err = bpf_prog_load("tailcall_bpf2bpf4.o", BPF_PROG_TYPE_SCHED_CLS,
+ err = bpf_prog_test_load("tailcall_bpf2bpf4.o", BPF_PROG_TYPE_SCHED_CLS,
&obj, &prog_fd);
if (CHECK_FAIL(err))
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c b/tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c
index 1bdc1d86a50c..17947c9e1d66 100644
--- a/tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c
+++ b/tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c
@@ -11,7 +11,7 @@ void test_task_fd_query_rawtp(void)
__u32 duration = 0;
char buf[256];
- err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno))
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c b/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c
index 3f131b8fe328..c2a98a7a8dfc 100644
--- a/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c
+++ b/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c
@@ -13,8 +13,8 @@ static void test_task_fd_query_tp_core(const char *probe_name,
__u32 duration = 0;
char buf[256];
- err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
- if (CHECK(err, "bpf_prog_load", "err %d errno %d\n", err, errno))
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
+ if (CHECK(err, "bpf_prog_test_load", "err %d errno %d\n", err, errno))
goto close_prog;
snprintf(buf, sizeof(buf),
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
index 4b18b73df10b..c2426df58e17 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -105,6 +105,13 @@ static int setns_by_fd(int nsfd)
if (!ASSERT_OK(err, "unshare"))
return err;
+ /* Make our /sys mount private, so the following umount won't
+ * trigger the global umount in case it's shared.
+ */
+ err = mount("none", "/sys", NULL, MS_PRIVATE, NULL);
+ if (!ASSERT_OK(err, "remount private /sys"))
+ return err;
+
err = umount2("/sys", MNT_DETACH);
if (!ASSERT_OK(err, "umount2 /sys"))
return err;
diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_estats.c b/tools/testing/selftests/bpf/prog_tests/tcp_estats.c
index 594307dffd13..11bf755be4c9 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcp_estats.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_estats.c
@@ -8,7 +8,7 @@ void test_tcp_estats(void)
struct bpf_object *obj;
__u32 duration = 0;
- err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
CHECK(err, "", "err %d errno %d\n", err, errno);
if (err)
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
index 265b4fe33ec3..96ff2c20af81 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
@@ -2,6 +2,7 @@
#include <test_progs.h>
#include "cgroup_helpers.h"
#include "network_helpers.h"
+#include "tcp_rtt.skel.h"
struct tcp_rtt_storage {
__u32 invoked;
@@ -91,26 +92,18 @@ static int verify_sk(int map_fd, int client_fd, const char *msg, __u32 invoked,
static int run_test(int cgroup_fd, int server_fd)
{
- struct bpf_prog_load_attr attr = {
- .prog_type = BPF_PROG_TYPE_SOCK_OPS,
- .file = "./tcp_rtt.o",
- .expected_attach_type = BPF_CGROUP_SOCK_OPS,
- };
- struct bpf_object *obj;
- struct bpf_map *map;
+ struct tcp_rtt *skel;
int client_fd;
int prog_fd;
int map_fd;
int err;
- err = bpf_prog_load_xattr(&attr, &obj, &prog_fd);
- if (err) {
- log_err("Failed to load BPF object");
+ skel = tcp_rtt__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_load"))
return -1;
- }
- map = bpf_object__next_map(obj, NULL);
- map_fd = bpf_map__fd(map);
+ map_fd = bpf_map__fd(skel->maps.socket_storage_map);
+ prog_fd = bpf_program__fd(skel->progs._sockops);
err = bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_SOCK_OPS, 0);
if (err) {
@@ -149,7 +142,7 @@ close_client_fd:
close(client_fd);
close_bpf_object:
- bpf_object__close(obj);
+ tcp_rtt__destroy(skel);
return err;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpffs.c b/tools/testing/selftests/bpf/prog_tests/test_bpffs.c
index d29ebfeef9c5..214d9f4a94a5 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_bpffs.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_bpffs.c
@@ -19,11 +19,13 @@ static int read_iter(char *file)
fd = open(file, 0);
if (fd < 0)
return -1;
- while ((len = read(fd, buf, sizeof(buf))) > 0)
+ while ((len = read(fd, buf, sizeof(buf))) > 0) {
+ buf[sizeof(buf) - 1] = '\0';
if (strstr(buf, "iter")) {
close(fd);
return 0;
}
+ }
close(fd);
return -1;
}
@@ -80,7 +82,7 @@ static int fn(void)
if (!ASSERT_OK(err, "creating " TDIR "/fs1/b"))
goto out;
- map = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 4, 1, 0);
+ map = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 4, 1, NULL);
if (!ASSERT_GT(map, 0, "create_map(ARRAY)"))
goto out;
err = bpf_obj_pin(map, TDIR "/fs1/c");
diff --git a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
index 7e13129f593a..509e21d5cb9d 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
@@ -30,17 +30,29 @@ extern int extra_prog_load_log_flags;
static int check_load(const char *file)
{
- struct bpf_prog_load_attr attr;
struct bpf_object *obj = NULL;
- int err, prog_fd;
+ struct bpf_program *prog;
+ int err;
- memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
- attr.file = file;
- attr.prog_type = BPF_PROG_TYPE_UNSPEC;
- attr.log_level = extra_prog_load_log_flags;
- attr.prog_flags = BPF_F_TEST_RND_HI32;
found = false;
- err = bpf_prog_load_xattr(&attr, &obj, &prog_fd);
+
+ obj = bpf_object__open_file(file, NULL);
+ err = libbpf_get_error(obj);
+ if (err)
+ return err;
+
+ prog = bpf_object__next_program(obj, NULL);
+ if (!prog) {
+ err = -ENOENT;
+ goto err_out;
+ }
+
+ bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32);
+ bpf_program__set_log_level(prog, extra_prog_load_log_flags);
+
+ err = bpf_object__load(obj);
+
+err_out:
bpf_object__close(obj);
return err;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
index d2c16eaae367..26ac26a88026 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
@@ -28,10 +28,6 @@ static unsigned int duration;
struct storage {
void *inode;
unsigned int value;
- /* Lock ensures that spin locked versions of local stoage operations
- * also work, most operations in this tests are still single threaded
- */
- struct bpf_spin_lock lock;
};
/* Fork and exec the provided rm binary and return the exit code of the
@@ -66,27 +62,24 @@ static int run_self_unlink(int *monitored_pid, const char *rm_path)
static bool check_syscall_operations(int map_fd, int obj_fd)
{
- struct storage val = { .value = TEST_STORAGE_VALUE, .lock = { 0 } },
- lookup_val = { .value = 0, .lock = { 0 } };
+ struct storage val = { .value = TEST_STORAGE_VALUE },
+ lookup_val = { .value = 0 };
int err;
/* Looking up an existing element should fail initially */
- err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val,
- BPF_F_LOCK);
+ err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, 0);
if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem",
"err:%d errno:%d\n", err, errno))
return false;
/* Create a new element */
- err = bpf_map_update_elem(map_fd, &obj_fd, &val,
- BPF_NOEXIST | BPF_F_LOCK);
+ err = bpf_map_update_elem(map_fd, &obj_fd, &val, BPF_NOEXIST);
if (CHECK(err < 0, "bpf_map_update_elem", "err:%d errno:%d\n", err,
errno))
return false;
/* Lookup the newly created element */
- err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val,
- BPF_F_LOCK);
+ err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, 0);
if (CHECK(err < 0, "bpf_map_lookup_elem", "err:%d errno:%d", err,
errno))
return false;
@@ -102,8 +95,7 @@ static bool check_syscall_operations(int map_fd, int obj_fd)
return false;
/* The lookup should fail, now that the element has been deleted */
- err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val,
- BPF_F_LOCK);
+ err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, 0);
if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem",
"err:%d errno:%d\n", err, errno))
return false;
diff --git a/tools/testing/selftests/bpf/prog_tests/test_overhead.c b/tools/testing/selftests/bpf/prog_tests/test_overhead.c
index 123c68c1917d..05acb376f74d 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_overhead.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_overhead.c
@@ -56,11 +56,11 @@ static void setaffinity(void)
void test_test_overhead(void)
{
- const char *kprobe_name = "kprobe/__set_task_comm";
- const char *kretprobe_name = "kretprobe/__set_task_comm";
- const char *raw_tp_name = "raw_tp/task_rename";
- const char *fentry_name = "fentry/__set_task_comm";
- const char *fexit_name = "fexit/__set_task_comm";
+ const char *kprobe_name = "prog1";
+ const char *kretprobe_name = "prog2";
+ const char *raw_tp_name = "prog3";
+ const char *fentry_name = "prog4";
+ const char *fexit_name = "prog5";
const char *kprobe_func = "__set_task_comm";
struct bpf_program *kprobe_prog, *kretprobe_prog, *raw_tp_prog;
struct bpf_program *fentry_prog, *fexit_prog;
@@ -76,23 +76,23 @@ void test_test_overhead(void)
if (!ASSERT_OK_PTR(obj, "obj_open_file"))
return;
- kprobe_prog = bpf_object__find_program_by_title(obj, kprobe_name);
+ kprobe_prog = bpf_object__find_program_by_name(obj, kprobe_name);
if (CHECK(!kprobe_prog, "find_probe",
"prog '%s' not found\n", kprobe_name))
goto cleanup;
- kretprobe_prog = bpf_object__find_program_by_title(obj, kretprobe_name);
+ kretprobe_prog = bpf_object__find_program_by_name(obj, kretprobe_name);
if (CHECK(!kretprobe_prog, "find_probe",
"prog '%s' not found\n", kretprobe_name))
goto cleanup;
- raw_tp_prog = bpf_object__find_program_by_title(obj, raw_tp_name);
+ raw_tp_prog = bpf_object__find_program_by_name(obj, raw_tp_name);
if (CHECK(!raw_tp_prog, "find_probe",
"prog '%s' not found\n", raw_tp_name))
goto cleanup;
- fentry_prog = bpf_object__find_program_by_title(obj, fentry_name);
+ fentry_prog = bpf_object__find_program_by_name(obj, fentry_name);
if (CHECK(!fentry_prog, "find_probe",
"prog '%s' not found\n", fentry_name))
goto cleanup;
- fexit_prog = bpf_object__find_program_by_title(obj, fexit_name);
+ fexit_prog = bpf_object__find_program_by_name(obj, fexit_name);
if (CHECK(!fexit_prog, "find_probe",
"prog '%s' not found\n", fexit_name))
goto cleanup;
diff --git a/tools/testing/selftests/bpf/prog_tests/test_strncmp.c b/tools/testing/selftests/bpf/prog_tests/test_strncmp.c
new file mode 100644
index 000000000000..b57a3009465f
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_strncmp.c
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2021. Huawei Technologies Co., Ltd */
+#include <test_progs.h>
+#include "strncmp_test.skel.h"
+
+static int trigger_strncmp(const struct strncmp_test *skel)
+{
+ int cmp;
+
+ usleep(1);
+
+ cmp = skel->bss->cmp_ret;
+ if (cmp > 0)
+ return 1;
+ if (cmp < 0)
+ return -1;
+ return 0;
+}
+
+/*
+ * Compare str and target after making str[i] != target[i].
+ * When exp is -1, make str[i] < target[i] and delta = -1.
+ */
+static void strncmp_full_str_cmp(struct strncmp_test *skel, const char *name,
+ int exp)
+{
+ size_t nr = sizeof(skel->bss->str);
+ char *str = skel->bss->str;
+ int delta = exp;
+ int got;
+ size_t i;
+
+ memcpy(str, skel->rodata->target, nr);
+ for (i = 0; i < nr - 1; i++) {
+ str[i] += delta;
+
+ got = trigger_strncmp(skel);
+ ASSERT_EQ(got, exp, name);
+
+ str[i] -= delta;
+ }
+}
+
+static void test_strncmp_ret(void)
+{
+ struct strncmp_test *skel;
+ struct bpf_program *prog;
+ int err, got;
+
+ skel = strncmp_test__open();
+ if (!ASSERT_OK_PTR(skel, "strncmp_test open"))
+ return;
+
+ bpf_object__for_each_program(prog, skel->obj)
+ bpf_program__set_autoload(prog, false);
+
+ bpf_program__set_autoload(skel->progs.do_strncmp, true);
+
+ err = strncmp_test__load(skel);
+ if (!ASSERT_EQ(err, 0, "strncmp_test load"))
+ goto out;
+
+ err = strncmp_test__attach(skel);
+ if (!ASSERT_EQ(err, 0, "strncmp_test attach"))
+ goto out;
+
+ skel->bss->target_pid = getpid();
+
+ /* Empty str */
+ skel->bss->str[0] = '\0';
+ got = trigger_strncmp(skel);
+ ASSERT_EQ(got, -1, "strncmp: empty str");
+
+ /* Same string */
+ memcpy(skel->bss->str, skel->rodata->target, sizeof(skel->bss->str));
+ got = trigger_strncmp(skel);
+ ASSERT_EQ(got, 0, "strncmp: same str");
+
+ /* Not-null-termainted string */
+ memcpy(skel->bss->str, skel->rodata->target, sizeof(skel->bss->str));
+ skel->bss->str[sizeof(skel->bss->str) - 1] = 'A';
+ got = trigger_strncmp(skel);
+ ASSERT_EQ(got, 1, "strncmp: not-null-term str");
+
+ strncmp_full_str_cmp(skel, "strncmp: less than", -1);
+ strncmp_full_str_cmp(skel, "strncmp: greater than", 1);
+out:
+ strncmp_test__destroy(skel);
+}
+
+static void test_strncmp_bad_not_const_str_size(void)
+{
+ struct strncmp_test *skel;
+ struct bpf_program *prog;
+ int err;
+
+ skel = strncmp_test__open();
+ if (!ASSERT_OK_PTR(skel, "strncmp_test open"))
+ return;
+
+ bpf_object__for_each_program(prog, skel->obj)
+ bpf_program__set_autoload(prog, false);
+
+ bpf_program__set_autoload(skel->progs.strncmp_bad_not_const_str_size,
+ true);
+
+ err = strncmp_test__load(skel);
+ ASSERT_ERR(err, "strncmp_test load bad_not_const_str_size");
+
+ strncmp_test__destroy(skel);
+}
+
+static void test_strncmp_bad_writable_target(void)
+{
+ struct strncmp_test *skel;
+ struct bpf_program *prog;
+ int err;
+
+ skel = strncmp_test__open();
+ if (!ASSERT_OK_PTR(skel, "strncmp_test open"))
+ return;
+
+ bpf_object__for_each_program(prog, skel->obj)
+ bpf_program__set_autoload(prog, false);
+
+ bpf_program__set_autoload(skel->progs.strncmp_bad_writable_target,
+ true);
+
+ err = strncmp_test__load(skel);
+ ASSERT_ERR(err, "strncmp_test load bad_writable_target");
+
+ strncmp_test__destroy(skel);
+}
+
+static void test_strncmp_bad_not_null_term_target(void)
+{
+ struct strncmp_test *skel;
+ struct bpf_program *prog;
+ int err;
+
+ skel = strncmp_test__open();
+ if (!ASSERT_OK_PTR(skel, "strncmp_test open"))
+ return;
+
+ bpf_object__for_each_program(prog, skel->obj)
+ bpf_program__set_autoload(prog, false);
+
+ bpf_program__set_autoload(skel->progs.strncmp_bad_not_null_term_target,
+ true);
+
+ err = strncmp_test__load(skel);
+ ASSERT_ERR(err, "strncmp_test load bad_not_null_term_target");
+
+ strncmp_test__destroy(skel);
+}
+
+void test_test_strncmp(void)
+{
+ if (test__start_subtest("strncmp_ret"))
+ test_strncmp_ret();
+ if (test__start_subtest("strncmp_bad_not_const_str_size"))
+ test_strncmp_bad_not_const_str_size();
+ if (test__start_subtest("strncmp_bad_writable_target"))
+ test_strncmp_bad_writable_target();
+ if (test__start_subtest("strncmp_bad_not_null_term_target"))
+ test_strncmp_bad_not_null_term_target();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c b/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c
index 8652d0a46c87..39e79291c82b 100644
--- a/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c
+++ b/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c
@@ -35,7 +35,7 @@ void serial_test_tp_attach_query(void)
query = malloc(sizeof(*query) + sizeof(__u32) * num_progs);
for (i = 0; i < num_progs; i++) {
- err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj[i],
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj[i],
&prog_fd[i]);
if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
goto cleanup1;
diff --git a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
index fc146671b20a..9c795ee52b7b 100644
--- a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
+++ b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
@@ -35,7 +35,7 @@ static struct bpf_link *load(struct bpf_object *obj, const char *name)
struct bpf_program *prog;
int duration = 0;
- prog = bpf_object__find_program_by_title(obj, name);
+ prog = bpf_object__find_program_by_name(obj, name);
if (CHECK(!prog, "find_probe", "prog '%s' not found\n", name))
return ERR_PTR(-EINVAL);
return bpf_program__attach_trace(prog);
@@ -44,8 +44,8 @@ static struct bpf_link *load(struct bpf_object *obj, const char *name)
/* TODO: use different target function to run in concurrent mode */
void serial_test_trampoline_count(void)
{
- const char *fentry_name = "fentry/__set_task_comm";
- const char *fexit_name = "fexit/__set_task_comm";
+ const char *fentry_name = "prog1";
+ const char *fexit_name = "prog2";
const char *object = "test_trampoline_count.o";
struct inst inst[MAX_TRAMP_PROGS] = {};
int err, i = 0, duration = 0;
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp.c b/tools/testing/selftests/bpf/prog_tests/xdp.c
index 48921ff74850..ac65456b7ab8 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp.c
@@ -11,12 +11,12 @@ void test_xdp(void)
const char *file = "./test_xdp.o";
struct bpf_object *obj;
char buf[128];
- struct ipv6hdr *iph6 = (void *)buf + sizeof(struct ethhdr);
- struct iphdr *iph = (void *)buf + sizeof(struct ethhdr);
+ struct ipv6hdr iph6;
+ struct iphdr iph;
__u32 duration, retval, size;
int err, prog_fd, map_fd;
- err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
if (CHECK_FAIL(err))
return;
@@ -28,16 +28,17 @@ void test_xdp(void)
err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
buf, &size, &retval, &duration);
-
+ memcpy(&iph, buf + sizeof(struct ethhdr), sizeof(iph));
CHECK(err || retval != XDP_TX || size != 74 ||
- iph->protocol != IPPROTO_IPIP, "ipv4",
+ iph.protocol != IPPROTO_IPIP, "ipv4",
"err %d errno %d retval %d size %d\n",
err, errno, retval, size);
err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6),
buf, &size, &retval, &duration);
+ memcpy(&iph6, buf + sizeof(struct ethhdr), sizeof(iph6));
CHECK(err || retval != XDP_TX || size != 114 ||
- iph6->nexthdr != IPPROTO_IPV6, "ipv6",
+ iph6.nexthdr != IPPROTO_IPV6, "ipv6",
"err %d errno %d retval %d size %d\n",
err, errno, retval, size);
out:
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
index f529e3c923ae..3f5a17c38be5 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
@@ -10,7 +10,7 @@ static void test_xdp_adjust_tail_shrink(void)
int err, prog_fd;
char buf[128];
- err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
if (CHECK_FAIL(err))
return;
@@ -38,7 +38,7 @@ static void test_xdp_adjust_tail_grow(void)
__u32 duration, retval, size, expect_sz;
int err, prog_fd;
- err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
if (CHECK_FAIL(err))
return;
@@ -75,7 +75,7 @@ static void test_xdp_adjust_tail_grow2(void)
.data_size_out = 0, /* Per test */
};
- err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &tattr.prog_fd);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &tattr.prog_fd);
if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno))
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c
index 4c4057262cd8..c6fa390e3aa1 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c
@@ -16,7 +16,7 @@ void serial_test_xdp_attach(void)
len = sizeof(info);
- err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj1, &fd1);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj1, &fd1);
if (CHECK_FAIL(err))
return;
err = bpf_obj_get_info_by_fd(fd1, &info, &len);
@@ -24,7 +24,7 @@ void serial_test_xdp_attach(void)
goto out_1;
id1 = info.id;
- err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj2, &fd2);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj2, &fd2);
if (CHECK_FAIL(err))
goto out_1;
@@ -34,7 +34,7 @@ void serial_test_xdp_attach(void)
goto out_2;
id2 = info.id;
- err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj3, &fd3);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj3, &fd3);
if (CHECK_FAIL(err))
goto out_2;
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
index faa22b84f2ee..5e3a26b15ec6 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
@@ -218,9 +218,9 @@ static int send_udp_packets(int vary_dst_ip)
.h_dest = BOND2_MAC,
.h_proto = htons(ETH_P_IP),
};
- uint8_t buf[128] = {};
- struct iphdr *iph = (struct iphdr *)(buf + sizeof(eh));
- struct udphdr *uh = (struct udphdr *)(buf + sizeof(eh) + sizeof(*iph));
+ struct iphdr iph = {};
+ struct udphdr uh = {};
+ uint8_t buf[128];
int i, s = -1;
int ifindex;
@@ -232,17 +232,16 @@ static int send_udp_packets(int vary_dst_ip)
if (!ASSERT_GT(ifindex, 0, "get bond1 ifindex"))
goto err;
- memcpy(buf, &eh, sizeof(eh));
- iph->ihl = 5;
- iph->version = 4;
- iph->tos = 16;
- iph->id = 1;
- iph->ttl = 64;
- iph->protocol = IPPROTO_UDP;
- iph->saddr = 1;
- iph->daddr = 2;
- iph->tot_len = htons(sizeof(buf) - ETH_HLEN);
- iph->check = 0;
+ iph.ihl = 5;
+ iph.version = 4;
+ iph.tos = 16;
+ iph.id = 1;
+ iph.ttl = 64;
+ iph.protocol = IPPROTO_UDP;
+ iph.saddr = 1;
+ iph.daddr = 2;
+ iph.tot_len = htons(sizeof(buf) - ETH_HLEN);
+ iph.check = 0;
for (i = 1; i <= NPACKETS; i++) {
int n;
@@ -253,10 +252,15 @@ static int send_udp_packets(int vary_dst_ip)
};
/* vary the UDP destination port for even distribution with roundrobin/xor modes */
- uh->dest++;
+ uh.dest++;
if (vary_dst_ip)
- iph->daddr++;
+ iph.daddr++;
+
+ /* construct a packet */
+ memcpy(buf, &eh, sizeof(eh));
+ memcpy(buf + sizeof(eh), &iph, sizeof(iph));
+ memcpy(buf + sizeof(eh) + sizeof(iph), &uh, sizeof(uh));
n = sendto(s, buf, sizeof(buf), 0, (struct sockaddr *)&saddr_ll, sizeof(saddr_ll));
if (!ASSERT_EQ(n, sizeof(buf), "sendto"))
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
index 3bd5904b4db5..c98a897ad692 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
@@ -42,14 +42,13 @@ void test_xdp_bpf2bpf(void)
char buf[128];
int err, pkt_fd, map_fd;
bool passed = false;
- struct iphdr *iph = (void *)buf + sizeof(struct ethhdr);
+ struct iphdr iph;
struct iptnl_info value4 = {.family = AF_INET};
struct test_xdp *pkt_skel = NULL;
struct test_xdp_bpf2bpf *ftrace_skel = NULL;
struct vip key4 = {.protocol = 6, .family = AF_INET};
struct bpf_program *prog;
struct perf_buffer *pb = NULL;
- struct perf_buffer_opts pb_opts = {};
/* Load XDP program to introspect */
pkt_skel = test_xdp__open_and_load();
@@ -86,19 +85,17 @@ void test_xdp_bpf2bpf(void)
goto out;
/* Set up perf buffer */
- pb_opts.sample_cb = on_sample;
- pb_opts.ctx = &passed;
- pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map),
- 1, &pb_opts);
+ pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map), 1,
+ on_sample, NULL, &passed, NULL);
if (!ASSERT_OK_PTR(pb, "perf_buf__new"))
goto out;
/* Run test program */
err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4),
buf, &size, &retval, &duration);
-
+ memcpy(&iph, buf + sizeof(struct ethhdr), sizeof(iph));
if (CHECK(err || retval != XDP_TX || size != 74 ||
- iph->protocol != IPPROTO_IPIP, "ipv4",
+ iph.protocol != IPPROTO_IPIP, "ipv4",
"err %d errno %d retval %d size %d\n",
err, errno, retval, size))
goto out;
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_info.c b/tools/testing/selftests/bpf/prog_tests/xdp_info.c
index 4e2a4fd56f67..abe48e82e1dc 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_info.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_info.c
@@ -29,7 +29,7 @@ void serial_test_xdp_info(void)
/* Setup prog */
- err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
if (CHECK_FAIL(err))
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_link.c b/tools/testing/selftests/bpf/prog_tests/xdp_link.c
index 983ab0b47d30..b2b357f8c74c 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_link.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_link.c
@@ -8,46 +8,47 @@
void serial_test_xdp_link(void)
{
- __u32 duration = 0, id1, id2, id0 = 0, prog_fd1, prog_fd2, err;
DECLARE_LIBBPF_OPTS(bpf_xdp_set_link_opts, opts, .old_fd = -1);
struct test_xdp_link *skel1 = NULL, *skel2 = NULL;
+ __u32 id1, id2, id0 = 0, prog_fd1, prog_fd2;
struct bpf_link_info link_info;
struct bpf_prog_info prog_info;
struct bpf_link *link;
+ int err;
__u32 link_info_len = sizeof(link_info);
__u32 prog_info_len = sizeof(prog_info);
skel1 = test_xdp_link__open_and_load();
- if (CHECK(!skel1, "skel_load", "skeleton open and load failed\n"))
+ if (!ASSERT_OK_PTR(skel1, "skel_load"))
goto cleanup;
prog_fd1 = bpf_program__fd(skel1->progs.xdp_handler);
skel2 = test_xdp_link__open_and_load();
- if (CHECK(!skel2, "skel_load", "skeleton open and load failed\n"))
+ if (!ASSERT_OK_PTR(skel2, "skel_load"))
goto cleanup;
prog_fd2 = bpf_program__fd(skel2->progs.xdp_handler);
memset(&prog_info, 0, sizeof(prog_info));
err = bpf_obj_get_info_by_fd(prog_fd1, &prog_info, &prog_info_len);
- if (CHECK(err, "fd_info1", "failed %d\n", -errno))
+ if (!ASSERT_OK(err, "fd_info1"))
goto cleanup;
id1 = prog_info.id;
memset(&prog_info, 0, sizeof(prog_info));
err = bpf_obj_get_info_by_fd(prog_fd2, &prog_info, &prog_info_len);
- if (CHECK(err, "fd_info2", "failed %d\n", -errno))
+ if (!ASSERT_OK(err, "fd_info2"))
goto cleanup;
id2 = prog_info.id;
/* set initial prog attachment */
err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, prog_fd1, XDP_FLAGS_REPLACE, &opts);
- if (CHECK(err, "fd_attach", "initial prog attach failed: %d\n", err))
+ if (!ASSERT_OK(err, "fd_attach"))
goto cleanup;
/* validate prog ID */
err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
- CHECK(err || id0 != id1, "id1_check",
- "loaded prog id %u != id1 %u, err %d", id0, id1, err);
+ if (!ASSERT_OK(err, "id1_check_err") || !ASSERT_EQ(id0, id1, "id1_check_val"))
+ goto cleanup;
/* BPF link is not allowed to replace prog attachment */
link = bpf_program__attach_xdp(skel1->progs.xdp_handler, IFINDEX_LO);
@@ -62,7 +63,7 @@ void serial_test_xdp_link(void)
/* detach BPF program */
opts.old_fd = prog_fd1;
err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, XDP_FLAGS_REPLACE, &opts);
- if (CHECK(err, "prog_detach", "failed %d\n", err))
+ if (!ASSERT_OK(err, "prog_detach"))
goto cleanup;
/* now BPF link should attach successfully */
@@ -73,24 +74,23 @@ void serial_test_xdp_link(void)
/* validate prog ID */
err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
- if (CHECK(err || id0 != id1, "id1_check",
- "loaded prog id %u != id1 %u, err %d", id0, id1, err))
+ if (!ASSERT_OK(err, "id1_check_err") || !ASSERT_EQ(id0, id1, "id1_check_val"))
goto cleanup;
/* BPF prog attach is not allowed to replace BPF link */
opts.old_fd = prog_fd1;
err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, prog_fd2, XDP_FLAGS_REPLACE, &opts);
- if (CHECK(!err, "prog_attach_fail", "unexpected success\n"))
+ if (!ASSERT_ERR(err, "prog_attach_fail"))
goto cleanup;
/* Can't force-update when BPF link is active */
err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd2, 0);
- if (CHECK(!err, "prog_update_fail", "unexpected success\n"))
+ if (!ASSERT_ERR(err, "prog_update_fail"))
goto cleanup;
/* Can't force-detach when BPF link is active */
err = bpf_set_link_xdp_fd(IFINDEX_LO, -1, 0);
- if (CHECK(!err, "prog_detach_fail", "unexpected success\n"))
+ if (!ASSERT_ERR(err, "prog_detach_fail"))
goto cleanup;
/* BPF link is not allowed to replace another BPF link */
@@ -110,40 +110,39 @@ void serial_test_xdp_link(void)
skel2->links.xdp_handler = link;
err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
- if (CHECK(err || id0 != id2, "id2_check",
- "loaded prog id %u != id2 %u, err %d", id0, id1, err))
+ if (!ASSERT_OK(err, "id2_check_err") || !ASSERT_EQ(id0, id2, "id2_check_val"))
goto cleanup;
/* updating program under active BPF link works as expected */
err = bpf_link__update_program(link, skel1->progs.xdp_handler);
- if (CHECK(err, "link_upd", "failed: %d\n", err))
+ if (!ASSERT_OK(err, "link_upd"))
goto cleanup;
memset(&link_info, 0, sizeof(link_info));
err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &link_info, &link_info_len);
- if (CHECK(err, "link_info", "failed: %d\n", err))
+ if (!ASSERT_OK(err, "link_info"))
goto cleanup;
- CHECK(link_info.type != BPF_LINK_TYPE_XDP, "link_type",
- "got %u != exp %u\n", link_info.type, BPF_LINK_TYPE_XDP);
- CHECK(link_info.prog_id != id1, "link_prog_id",
- "got %u != exp %u\n", link_info.prog_id, id1);
- CHECK(link_info.xdp.ifindex != IFINDEX_LO, "link_ifindex",
- "got %u != exp %u\n", link_info.xdp.ifindex, IFINDEX_LO);
+ ASSERT_EQ(link_info.type, BPF_LINK_TYPE_XDP, "link_type");
+ ASSERT_EQ(link_info.prog_id, id1, "link_prog_id");
+ ASSERT_EQ(link_info.xdp.ifindex, IFINDEX_LO, "link_ifindex");
+
+ /* updating program under active BPF link with different type fails */
+ err = bpf_link__update_program(link, skel1->progs.tc_handler);
+ if (!ASSERT_ERR(err, "link_upd_invalid"))
+ goto cleanup;
err = bpf_link__detach(link);
- if (CHECK(err, "link_detach", "failed %d\n", err))
+ if (!ASSERT_OK(err, "link_detach"))
goto cleanup;
memset(&link_info, 0, sizeof(link_info));
err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &link_info, &link_info_len);
- if (CHECK(err, "link_info", "failed: %d\n", err))
- goto cleanup;
- CHECK(link_info.prog_id != id1, "link_prog_id",
- "got %u != exp %u\n", link_info.prog_id, id1);
+
+ ASSERT_OK(err, "link_info");
+ ASSERT_EQ(link_info.prog_id, id1, "link_prog_id");
/* ifindex should be zeroed out */
- CHECK(link_info.xdp.ifindex != 0, "link_ifindex",
- "got %u != exp %u\n", link_info.xdp.ifindex, 0);
+ ASSERT_EQ(link_info.xdp.ifindex, 0, "link_ifindex");
cleanup:
test_xdp_link__destroy(skel1);
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_perf.c b/tools/testing/selftests/bpf/prog_tests/xdp_perf.c
index 7185bee16fe4..15a3900e4370 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_perf.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_perf.c
@@ -9,7 +9,7 @@ void test_xdp_perf(void)
char in[128], out[128];
int err, prog_fd;
- err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
if (CHECK_FAIL(err))
return;
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_unix.c b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
index 94423902685d..c21e3f545371 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
@@ -49,7 +49,7 @@ int dump_unix(struct bpf_iter__unix *ctx)
sock_i_ino(sk));
if (unix_sk->addr) {
- if (!UNIX_ABSTRACT(unix_sk)) {
+ if (unix_sk->addr->name->sun_path[0]) {
BPF_SEQ_PRINTF(seq, " %s", unix_sk->addr->name->sun_path);
} else {
/* The name of the abstract UNIX domain socket starts
diff --git a/tools/testing/selftests/bpf/progs/bpf_loop.c b/tools/testing/selftests/bpf/progs/bpf_loop.c
new file mode 100644
index 000000000000..12349e4601e8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_loop.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct callback_ctx {
+ int output;
+};
+
+/* These should be set by the user program */
+u32 nested_callback_nr_loops;
+u32 stop_index = -1;
+u32 nr_loops;
+int pid;
+
+/* Making these global variables so that the userspace program
+ * can verify the output through the skeleton
+ */
+int nr_loops_returned;
+int g_output;
+int err;
+
+static int callback(__u32 index, void *data)
+{
+ struct callback_ctx *ctx = data;
+
+ if (index >= stop_index)
+ return 1;
+
+ ctx->output += index;
+
+ return 0;
+}
+
+static int empty_callback(__u32 index, void *data)
+{
+ return 0;
+}
+
+static int nested_callback2(__u32 index, void *data)
+{
+ nr_loops_returned += bpf_loop(nested_callback_nr_loops, callback, data, 0);
+
+ return 0;
+}
+
+static int nested_callback1(__u32 index, void *data)
+{
+ bpf_loop(nested_callback_nr_loops, nested_callback2, data, 0);
+ return 0;
+}
+
+SEC("fentry/__x64_sys_nanosleep")
+int test_prog(void *ctx)
+{
+ struct callback_ctx data = {};
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ nr_loops_returned = bpf_loop(nr_loops, callback, &data, 0);
+
+ if (nr_loops_returned < 0)
+ err = nr_loops_returned;
+ else
+ g_output = data.output;
+
+ return 0;
+}
+
+SEC("fentry/__x64_sys_nanosleep")
+int prog_null_ctx(void *ctx)
+{
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ nr_loops_returned = bpf_loop(nr_loops, empty_callback, NULL, 0);
+
+ return 0;
+}
+
+SEC("fentry/__x64_sys_nanosleep")
+int prog_invalid_flags(void *ctx)
+{
+ struct callback_ctx data = {};
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ err = bpf_loop(nr_loops, callback, &data, 1);
+
+ return 0;
+}
+
+SEC("fentry/__x64_sys_nanosleep")
+int prog_nested_calls(void *ctx)
+{
+ struct callback_ctx data = {};
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ nr_loops_returned = 0;
+ bpf_loop(nr_loops, nested_callback1, &data, 0);
+
+ g_output = data.output;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_loop_bench.c b/tools/testing/selftests/bpf/progs/bpf_loop_bench.c
new file mode 100644
index 000000000000..9dafdc244462
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_loop_bench.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+u32 nr_loops;
+long hits;
+
+static int empty_callback(__u32 index, void *data)
+{
+ return 0;
+}
+
+SEC("fentry/__x64_sys_getpgid")
+int benchmark(void *ctx)
+{
+ for (int i = 0; i < 1000; i++) {
+ bpf_loop(nr_loops, empty_callback, NULL, 0);
+
+ __sync_add_and_fetch(&hits, nr_loops);
+ }
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
index eef5646ddb19..e0f42601be9b 100644
--- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
+++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
@@ -6,8 +6,6 @@
#define AF_INET6 10
#define __SO_ACCEPTCON (1 << 16)
-#define UNIX_HASH_SIZE 256
-#define UNIX_ABSTRACT(unix_sk) (unix_sk->addr->hash < UNIX_HASH_SIZE)
#define SOL_TCP 6
#define TCP_CONGESTION 13
diff --git a/tools/testing/selftests/bpf/progs/tag.c b/tools/testing/selftests/bpf/progs/btf_decl_tag.c
index 1792f4eda095..c88ccc53529a 100644
--- a/tools/testing/selftests/bpf/progs/tag.c
+++ b/tools/testing/selftests/bpf/progs/btf_decl_tag.c
@@ -4,10 +4,6 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
-#ifndef __has_attribute
-#define __has_attribute(x) 0
-#endif
-
#if __has_attribute(btf_decl_tag)
#define __tag1 __attribute__((btf_decl_tag("tag1")))
#define __tag2 __attribute__((btf_decl_tag("tag2")))
diff --git a/tools/testing/selftests/bpf/progs/btf_type_tag.c b/tools/testing/selftests/bpf/progs/btf_type_tag.c
new file mode 100644
index 000000000000..1d488da7e920
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf_type_tag.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#if __has_attribute(btf_type_tag)
+#define __tag1 __attribute__((btf_type_tag("tag1")))
+#define __tag2 __attribute__((btf_type_tag("tag2")))
+volatile const bool skip_tests = false;
+#else
+#define __tag1
+#define __tag2
+volatile const bool skip_tests = true;
+#endif
+
+struct btf_type_tag_test {
+ int __tag1 * __tag1 __tag2 *p;
+} g;
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(sub, int x)
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/core_kern.c b/tools/testing/selftests/bpf/progs/core_kern.c
new file mode 100644
index 000000000000..13499cc15c7d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/core_kern.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+#define ATTR __always_inline
+#include "test_jhash.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32);
+ __type(value, u32);
+ __uint(max_entries, 256);
+} array1 SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32);
+ __type(value, u32);
+ __uint(max_entries, 256);
+} array2 SEC(".maps");
+
+static __noinline int randmap(int v, const struct net_device *dev)
+{
+ struct bpf_map *map = (struct bpf_map *)&array1;
+ int key = bpf_get_prandom_u32() & 0xff;
+ int *val;
+
+ if (bpf_get_prandom_u32() & 1)
+ map = (struct bpf_map *)&array2;
+
+ val = bpf_map_lookup_elem(map, &key);
+ if (val)
+ *val = bpf_get_prandom_u32() + v + dev->mtu;
+
+ return 0;
+}
+
+SEC("tp_btf/xdp_devmap_xmit")
+int BPF_PROG(tp_xdp_devmap_xmit_multi, const struct net_device
+ *from_dev, const struct net_device *to_dev, int sent, int drops,
+ int err)
+{
+ return randmap(from_dev->ifindex, from_dev);
+}
+
+SEC("fentry/eth_type_trans")
+int BPF_PROG(fentry_eth_type_trans, struct sk_buff *skb,
+ struct net_device *dev, unsigned short protocol)
+{
+ return randmap(dev->ifindex + skb->len, dev);
+}
+
+SEC("fexit/eth_type_trans")
+int BPF_PROG(fexit_eth_type_trans, struct sk_buff *skb,
+ struct net_device *dev, unsigned short protocol)
+{
+ return randmap(dev->ifindex + skb->len, dev);
+}
+
+volatile const int never;
+
+struct __sk_bUfF /* it will not exist in vmlinux */ {
+ int len;
+} __attribute__((preserve_access_index));
+
+struct bpf_testmod_test_read_ctx /* it exists in bpf_testmod */ {
+ size_t len;
+} __attribute__((preserve_access_index));
+
+SEC("tc")
+int balancer_ingress(struct __sk_buff *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ void *ptr;
+ int ret = 0, nh_off, i = 0;
+
+ nh_off = 14;
+
+ /* pragma unroll doesn't work on large loops */
+#define C do { \
+ ptr = data + i; \
+ if (ptr + nh_off > data_end) \
+ break; \
+ ctx->tc_index = jhash(ptr, nh_off, ctx->cb[0] + i++); \
+ if (never) { \
+ /* below is a dead code with unresolvable CO-RE relo */ \
+ i += ((struct __sk_bUfF *)ctx)->len; \
+ /* this CO-RE relo may or may not resolve
+ * depending on whether bpf_testmod is loaded.
+ */ \
+ i += ((struct bpf_testmod_test_read_ctx *)ctx)->len; \
+ } \
+ } while (0);
+#define C30 C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;
+ C30;C30;C30; /* 90 calls */
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/exhandler_kern.c b/tools/testing/selftests/bpf/progs/exhandler_kern.c
new file mode 100644
index 000000000000..f5ca142abf8f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/exhandler_kern.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021, Oracle and/or its affiliates. */
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+char _license[] SEC("license") = "GPL";
+
+unsigned int exception_triggered;
+int test_pid;
+
+/* TRACE_EVENT(task_newtask,
+ * TP_PROTO(struct task_struct *p, u64 clone_flags)
+ */
+SEC("tp_btf/task_newtask")
+int BPF_PROG(trace_task_newtask, struct task_struct *task, u64 clone_flags)
+{
+ int pid = bpf_get_current_pid_tgid() >> 32;
+ struct callback_head *work;
+ void *func;
+
+ if (test_pid != pid)
+ return 0;
+
+ /* To verify we hit an exception we dereference task->task_works->func.
+ * If task work has been added,
+ * - task->task_works is non-NULL; and
+ * - task->task_works->func is non-NULL also (the callback function
+ * must be specified for the task work.
+ *
+ * However, for a newly-created task, task->task_works is NULLed,
+ * so we know the exception handler triggered if task_works is
+ * NULL and func is NULL.
+ */
+ work = task->task_works;
+ func = work->func;
+ if (!work && !func)
+ exception_triggered++;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c
index 49a84a3a2306..48cd14b43741 100644
--- a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c
@@ -73,7 +73,7 @@ int test_subprog2(struct args_subprog2 *ctx)
__builtin_preserve_access_index(&skb->len));
ret = ctx->ret;
- /* bpf_prog_load() loads "test_pkt_access.o" with BPF_F_TEST_RND_HI32
+ /* bpf_prog_test_load() loads "test_pkt_access.o" with BPF_F_TEST_RND_HI32
* which randomizes upper 32 bits after BPF_ALU32 insns.
* Hence after 'w0 <<= 1' upper bits of $rax are random.
* That is expected and correct. Trim them.
diff --git a/tools/testing/selftests/bpf/progs/find_vma.c b/tools/testing/selftests/bpf/progs/find_vma.c
new file mode 100644
index 000000000000..38034fb82530
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/find_vma.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct callback_ctx {
+ int dummy;
+};
+
+#define VM_EXEC 0x00000004
+#define DNAME_INLINE_LEN 32
+
+pid_t target_pid = 0;
+char d_iname[DNAME_INLINE_LEN] = {0};
+__u32 found_vm_exec = 0;
+__u64 addr = 0;
+int find_zero_ret = -1;
+int find_addr_ret = -1;
+
+static long check_vma(struct task_struct *task, struct vm_area_struct *vma,
+ struct callback_ctx *data)
+{
+ if (vma->vm_file)
+ bpf_probe_read_kernel_str(d_iname, DNAME_INLINE_LEN - 1,
+ vma->vm_file->f_path.dentry->d_iname);
+
+ /* check for VM_EXEC */
+ if (vma->vm_flags & VM_EXEC)
+ found_vm_exec = 1;
+
+ return 0;
+}
+
+SEC("raw_tp/sys_enter")
+int handle_getpid(void)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+ struct callback_ctx data = {};
+
+ if (task->pid != target_pid)
+ return 0;
+
+ find_addr_ret = bpf_find_vma(task, addr, check_vma, &data, 0);
+
+ /* this should return -ENOENT */
+ find_zero_ret = bpf_find_vma(task, 0, check_vma, &data, 0);
+ return 0;
+}
+
+SEC("perf_event")
+int handle_pe(void)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+ struct callback_ctx data = {};
+
+ if (task->pid != target_pid)
+ return 0;
+
+ find_addr_ret = bpf_find_vma(task, addr, check_vma, &data, 0);
+
+ /* In NMI, this should return -EBUSY, as the previous call is using
+ * the irq_work.
+ */
+ find_zero_ret = bpf_find_vma(task, 0, check_vma, &data, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/find_vma_fail1.c b/tools/testing/selftests/bpf/progs/find_vma_fail1.c
new file mode 100644
index 000000000000..b3b326b8e2d1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/find_vma_fail1.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct callback_ctx {
+ int dummy;
+};
+
+static long write_vma(struct task_struct *task, struct vm_area_struct *vma,
+ struct callback_ctx *data)
+{
+ /* writing to vma, which is illegal */
+ vma->vm_flags |= 0x55;
+
+ return 0;
+}
+
+SEC("raw_tp/sys_enter")
+int handle_getpid(void)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+ struct callback_ctx data = {};
+
+ bpf_find_vma(task, 0, write_vma, &data, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/find_vma_fail2.c b/tools/testing/selftests/bpf/progs/find_vma_fail2.c
new file mode 100644
index 000000000000..9bcf3203e26b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/find_vma_fail2.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct callback_ctx {
+ int dummy;
+};
+
+static long write_task(struct task_struct *task, struct vm_area_struct *vma,
+ struct callback_ctx *data)
+{
+ /* writing to task, which is illegal */
+ task->mm = NULL;
+
+ return 0;
+}
+
+SEC("raw_tp/sys_enter")
+int handle_getpid(void)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+ struct callback_ctx data = {};
+
+ bpf_find_vma(task, 0, write_task, &data, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/get_func_args_test.c b/tools/testing/selftests/bpf/progs/get_func_args_test.c
new file mode 100644
index 000000000000..e0f34a55e697
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/get_func_args_test.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <errno.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u64 test1_result = 0;
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(test1)
+{
+ __u64 cnt = bpf_get_func_arg_cnt(ctx);
+ __u64 a = 0, z = 0, ret = 0;
+ __s64 err;
+
+ test1_result = cnt == 1;
+
+ /* valid arguments */
+ err = bpf_get_func_arg(ctx, 0, &a);
+
+ /* We need to cast access to traced function argument values with
+ * proper type cast, because trampoline uses type specific instruction
+ * to save it, like for 'int a' with 32-bit mov like:
+ *
+ * mov %edi,-0x8(%rbp)
+ *
+ * so the upper 4 bytes are not zeroed.
+ */
+ test1_result &= err == 0 && ((int) a == 1);
+
+ /* not valid argument */
+ err = bpf_get_func_arg(ctx, 1, &z);
+ test1_result &= err == -EINVAL;
+
+ /* return value fails in fentry */
+ err = bpf_get_func_ret(ctx, &ret);
+ test1_result &= err == -EOPNOTSUPP;
+ return 0;
+}
+
+__u64 test2_result = 0;
+SEC("fexit/bpf_fentry_test2")
+int BPF_PROG(test2)
+{
+ __u64 cnt = bpf_get_func_arg_cnt(ctx);
+ __u64 a = 0, b = 0, z = 0, ret = 0;
+ __s64 err;
+
+ test2_result = cnt == 2;
+
+ /* valid arguments */
+ err = bpf_get_func_arg(ctx, 0, &a);
+ test2_result &= err == 0 && (int) a == 2;
+
+ err = bpf_get_func_arg(ctx, 1, &b);
+ test2_result &= err == 0 && b == 3;
+
+ /* not valid argument */
+ err = bpf_get_func_arg(ctx, 2, &z);
+ test2_result &= err == -EINVAL;
+
+ /* return value */
+ err = bpf_get_func_ret(ctx, &ret);
+ test2_result &= err == 0 && ret == 5;
+ return 0;
+}
+
+__u64 test3_result = 0;
+SEC("fmod_ret/bpf_modify_return_test")
+int BPF_PROG(fmod_ret_test, int _a, int *_b, int _ret)
+{
+ __u64 cnt = bpf_get_func_arg_cnt(ctx);
+ __u64 a = 0, b = 0, z = 0, ret = 0;
+ __s64 err;
+
+ test3_result = cnt == 2;
+
+ /* valid arguments */
+ err = bpf_get_func_arg(ctx, 0, &a);
+ test3_result &= err == 0 && ((int) a == 1);
+
+ err = bpf_get_func_arg(ctx, 1, &b);
+ test3_result &= err == 0 && ((int *) b == _b);
+
+ /* not valid argument */
+ err = bpf_get_func_arg(ctx, 2, &z);
+ test3_result &= err == -EINVAL;
+
+ /* return value */
+ err = bpf_get_func_ret(ctx, &ret);
+ test3_result &= err == 0 && ret == 0;
+
+ /* change return value, it's checked in fexit_test program */
+ return 1234;
+}
+
+__u64 test4_result = 0;
+SEC("fexit/bpf_modify_return_test")
+int BPF_PROG(fexit_test, int _a, int *_b, int _ret)
+{
+ __u64 cnt = bpf_get_func_arg_cnt(ctx);
+ __u64 a = 0, b = 0, z = 0, ret = 0;
+ __s64 err;
+
+ test4_result = cnt == 2;
+
+ /* valid arguments */
+ err = bpf_get_func_arg(ctx, 0, &a);
+ test4_result &= err == 0 && ((int) a == 1);
+
+ err = bpf_get_func_arg(ctx, 1, &b);
+ test4_result &= err == 0 && ((int *) b == _b);
+
+ /* not valid argument */
+ err = bpf_get_func_arg(ctx, 2, &z);
+ test4_result &= err == -EINVAL;
+
+ /* return value */
+ err = bpf_get_func_ret(ctx, &ret);
+ test4_result &= err == 0 && ret == 1234;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/local_storage.c b/tools/testing/selftests/bpf/progs/local_storage.c
index 95868bc7ada9..9b1f9b75d5c2 100644
--- a/tools/testing/selftests/bpf/progs/local_storage.c
+++ b/tools/testing/selftests/bpf/progs/local_storage.c
@@ -20,7 +20,6 @@ int sk_storage_result = -1;
struct local_storage {
struct inode *exec_inode;
__u32 value;
- struct bpf_spin_lock lock;
};
struct {
@@ -58,9 +57,7 @@ int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim)
bpf_get_current_task_btf(), 0, 0);
if (storage) {
/* Don't let an executable delete itself */
- bpf_spin_lock(&storage->lock);
is_self_unlink = storage->exec_inode == victim->d_inode;
- bpf_spin_unlock(&storage->lock);
if (is_self_unlink)
return -EPERM;
}
@@ -68,7 +65,7 @@ int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim)
return 0;
}
-SEC("lsm/inode_rename")
+SEC("lsm.s/inode_rename")
int BPF_PROG(inode_rename, struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags)
@@ -89,10 +86,8 @@ int BPF_PROG(inode_rename, struct inode *old_dir, struct dentry *old_dentry,
if (!storage)
return 0;
- bpf_spin_lock(&storage->lock);
if (storage->value != DUMMY_STORAGE_VALUE)
inode_storage_result = -1;
- bpf_spin_unlock(&storage->lock);
err = bpf_inode_storage_delete(&inode_storage_map, old_dentry->d_inode);
if (!err)
@@ -101,7 +96,7 @@ int BPF_PROG(inode_rename, struct inode *old_dir, struct dentry *old_dentry,
return 0;
}
-SEC("lsm/socket_bind")
+SEC("lsm.s/socket_bind")
int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address,
int addrlen)
{
@@ -117,10 +112,8 @@ int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address,
if (!storage)
return 0;
- bpf_spin_lock(&storage->lock);
if (storage->value != DUMMY_STORAGE_VALUE)
sk_storage_result = -1;
- bpf_spin_unlock(&storage->lock);
err = bpf_sk_storage_delete(&sk_storage_map, sock->sk);
if (!err)
@@ -129,7 +122,7 @@ int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address,
return 0;
}
-SEC("lsm/socket_post_create")
+SEC("lsm.s/socket_post_create")
int BPF_PROG(socket_post_create, struct socket *sock, int family, int type,
int protocol, int kern)
{
@@ -144,9 +137,7 @@ int BPF_PROG(socket_post_create, struct socket *sock, int family, int type,
if (!storage)
return 0;
- bpf_spin_lock(&storage->lock);
storage->value = DUMMY_STORAGE_VALUE;
- bpf_spin_unlock(&storage->lock);
return 0;
}
@@ -154,7 +145,7 @@ int BPF_PROG(socket_post_create, struct socket *sock, int family, int type,
/* This uses the local storage to remember the inode of the binary that a
* process was originally executing.
*/
-SEC("lsm/bprm_committed_creds")
+SEC("lsm.s/bprm_committed_creds")
void BPF_PROG(exec, struct linux_binprm *bprm)
{
__u32 pid = bpf_get_current_pid_tgid() >> 32;
@@ -166,18 +157,13 @@ void BPF_PROG(exec, struct linux_binprm *bprm)
storage = bpf_task_storage_get(&task_storage_map,
bpf_get_current_task_btf(), 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
- if (storage) {
- bpf_spin_lock(&storage->lock);
+ if (storage)
storage->exec_inode = bprm->file->f_inode;
- bpf_spin_unlock(&storage->lock);
- }
storage = bpf_inode_storage_get(&inode_storage_map, bprm->file->f_inode,
0, BPF_LOCAL_STORAGE_GET_F_CREATE);
if (!storage)
return;
- bpf_spin_lock(&storage->lock);
storage->value = DUMMY_STORAGE_VALUE;
- bpf_spin_unlock(&storage->lock);
}
diff --git a/tools/testing/selftests/bpf/progs/loop3.c b/tools/testing/selftests/bpf/progs/loop3.c
index 76e93b31c14b..717dab14322b 100644
--- a/tools/testing/selftests/bpf/progs/loop3.c
+++ b/tools/testing/selftests/bpf/progs/loop3.c
@@ -12,9 +12,9 @@
char _license[] SEC("license") = "GPL";
SEC("raw_tracepoint/consume_skb")
-int while_true(volatile struct pt_regs* ctx)
+int while_true(struct pt_regs *ctx)
{
- __u64 i = 0, sum = 0;
+ volatile __u64 i = 0, sum = 0;
do {
i++;
sum += PT_REGS_RC(ctx);
diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
index b1b711d9b214..b64df94ec476 100644
--- a/tools/testing/selftests/bpf/progs/map_ptr_kern.c
+++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
@@ -334,9 +334,11 @@ static inline int check_lpm_trie(void)
return 1;
}
+#define INNER_MAX_ENTRIES 1234
+
struct inner_map {
__uint(type, BPF_MAP_TYPE_ARRAY);
- __uint(max_entries, 1);
+ __uint(max_entries, INNER_MAX_ENTRIES);
__type(key, __u32);
__type(value, __u32);
} inner_map SEC(".maps");
@@ -348,7 +350,7 @@ struct {
__type(value, __u32);
__array(values, struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
- __uint(max_entries, 1);
+ __uint(max_entries, INNER_MAX_ENTRIES);
__type(key, __u32);
__type(value, __u32);
});
@@ -360,8 +362,13 @@ static inline int check_array_of_maps(void)
{
struct bpf_array *array_of_maps = (struct bpf_array *)&m_array_of_maps;
struct bpf_map *map = (struct bpf_map *)&m_array_of_maps;
+ struct bpf_array *inner_map;
+ int key = 0;
VERIFY(check_default(&array_of_maps->map, map));
+ inner_map = bpf_map_lookup_elem(array_of_maps, &key);
+ VERIFY(inner_map != 0);
+ VERIFY(inner_map->map.max_entries == INNER_MAX_ENTRIES);
return 1;
}
@@ -382,8 +389,13 @@ static inline int check_hash_of_maps(void)
{
struct bpf_htab *hash_of_maps = (struct bpf_htab *)&m_hash_of_maps;
struct bpf_map *map = (struct bpf_map *)&m_hash_of_maps;
+ struct bpf_htab *inner_map;
+ int key = 2;
VERIFY(check_default(&hash_of_maps->map, map));
+ inner_map = bpf_map_lookup_elem(hash_of_maps, &key);
+ VERIFY(inner_map != 0);
+ VERIFY(inner_map->map.max_entries == INNER_MAX_ENTRIES);
return 1;
}
diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h
index 2fb7adafb6b6..1ed28882daf3 100644
--- a/tools/testing/selftests/bpf/progs/pyperf.h
+++ b/tools/testing/selftests/bpf/progs/pyperf.h
@@ -159,6 +159,59 @@ struct {
__uint(value_size, sizeof(long long) * 127);
} stackmap SEC(".maps");
+#ifdef USE_BPF_LOOP
+struct process_frame_ctx {
+ int cur_cpu;
+ int32_t *symbol_counter;
+ void *frame_ptr;
+ FrameData *frame;
+ PidData *pidData;
+ Symbol *sym;
+ Event *event;
+ bool done;
+};
+
+#define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var))
+
+static int process_frame_callback(__u32 i, struct process_frame_ctx *ctx)
+{
+ int zero = 0;
+ void *frame_ptr = ctx->frame_ptr;
+ PidData *pidData = ctx->pidData;
+ FrameData *frame = ctx->frame;
+ int32_t *symbol_counter = ctx->symbol_counter;
+ int cur_cpu = ctx->cur_cpu;
+ Event *event = ctx->event;
+ Symbol *sym = ctx->sym;
+
+ if (frame_ptr && get_frame_data(frame_ptr, pidData, frame, sym)) {
+ int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu;
+ int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, sym);
+
+ if (!symbol_id) {
+ bpf_map_update_elem(&symbolmap, sym, &zero, 0);
+ symbol_id = bpf_map_lookup_elem(&symbolmap, sym);
+ if (!symbol_id) {
+ ctx->done = true;
+ return 1;
+ }
+ }
+ if (*symbol_id == new_symbol_id)
+ (*symbol_counter)++;
+
+ barrier_var(i);
+ if (i >= STACK_MAX_LEN)
+ return 1;
+
+ event->stack[i] = *symbol_id;
+
+ event->stack_len = i + 1;
+ frame_ptr = frame->f_back;
+ }
+ return 0;
+}
+#endif /* USE_BPF_LOOP */
+
#ifdef GLOBAL_FUNC
__noinline
#elif defined(SUBPROGS)
@@ -228,11 +281,26 @@ int __on_event(struct bpf_raw_tracepoint_args *ctx)
int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym);
if (symbol_counter == NULL)
return 0;
+#ifdef USE_BPF_LOOP
+ struct process_frame_ctx ctx = {
+ .cur_cpu = cur_cpu,
+ .symbol_counter = symbol_counter,
+ .frame_ptr = frame_ptr,
+ .frame = &frame,
+ .pidData = pidData,
+ .sym = &sym,
+ .event = event,
+ };
+
+ bpf_loop(STACK_MAX_LEN, process_frame_callback, &ctx, 0);
+ if (ctx.done)
+ return 0;
+#else
#ifdef NO_UNROLL
#pragma clang loop unroll(disable)
#else
#pragma clang loop unroll(full)
-#endif
+#endif /* NO_UNROLL */
/* Unwind python stack */
for (int i = 0; i < STACK_MAX_LEN; ++i) {
if (frame_ptr && get_frame_data(frame_ptr, pidData, &frame, &sym)) {
@@ -251,6 +319,7 @@ int __on_event(struct bpf_raw_tracepoint_args *ctx)
frame_ptr = frame.f_back;
}
}
+#endif /* USE_BPF_LOOP */
event->stack_complete = frame_ptr == NULL;
} else {
event->stack_complete = 1;
diff --git a/tools/testing/selftests/bpf/progs/pyperf600_bpf_loop.c b/tools/testing/selftests/bpf/progs/pyperf600_bpf_loop.c
new file mode 100644
index 000000000000..5c2059dc01af
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/pyperf600_bpf_loop.c
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#define STACK_MAX_LEN 600
+#define USE_BPF_LOOP
+#include "pyperf.h"
diff --git a/tools/testing/selftests/bpf/progs/strncmp_bench.c b/tools/testing/selftests/bpf/progs/strncmp_bench.c
new file mode 100644
index 000000000000..18373a7df76e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/strncmp_bench.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2021. Huawei Technologies Co., Ltd */
+#include <linux/types.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#define STRNCMP_STR_SZ 4096
+
+/* Will be updated by benchmark before program loading */
+const volatile unsigned int cmp_str_len = 1;
+const char target[STRNCMP_STR_SZ];
+
+long hits = 0;
+char str[STRNCMP_STR_SZ];
+
+char _license[] SEC("license") = "GPL";
+
+static __always_inline int local_strncmp(const char *s1, unsigned int sz,
+ const char *s2)
+{
+ int ret = 0;
+ unsigned int i;
+
+ for (i = 0; i < sz; i++) {
+ /* E.g. 0xff > 0x31 */
+ ret = (unsigned char)s1[i] - (unsigned char)s2[i];
+ if (ret || !s1[i])
+ break;
+ }
+
+ return ret;
+}
+
+SEC("tp/syscalls/sys_enter_getpgid")
+int strncmp_no_helper(void *ctx)
+{
+ if (local_strncmp(str, cmp_str_len + 1, target) < 0)
+ __sync_add_and_fetch(&hits, 1);
+ return 0;
+}
+
+SEC("tp/syscalls/sys_enter_getpgid")
+int strncmp_helper(void *ctx)
+{
+ if (bpf_strncmp(str, cmp_str_len + 1, target) < 0)
+ __sync_add_and_fetch(&hits, 1);
+ return 0;
+}
+
diff --git a/tools/testing/selftests/bpf/progs/strncmp_test.c b/tools/testing/selftests/bpf/progs/strncmp_test.c
new file mode 100644
index 000000000000..900d930d48a8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/strncmp_test.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2021. Huawei Technologies Co., Ltd */
+#include <stdbool.h>
+#include <linux/types.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#define STRNCMP_STR_SZ 8
+
+const char target[STRNCMP_STR_SZ] = "EEEEEEE";
+char str[STRNCMP_STR_SZ];
+int cmp_ret = 0;
+int target_pid = 0;
+
+const char no_str_target[STRNCMP_STR_SZ] = "12345678";
+char writable_target[STRNCMP_STR_SZ];
+unsigned int no_const_str_size = STRNCMP_STR_SZ;
+
+char _license[] SEC("license") = "GPL";
+
+SEC("tp/syscalls/sys_enter_nanosleep")
+int do_strncmp(void *ctx)
+{
+ if ((bpf_get_current_pid_tgid() >> 32) != target_pid)
+ return 0;
+
+ cmp_ret = bpf_strncmp(str, STRNCMP_STR_SZ, target);
+ return 0;
+}
+
+SEC("tp/syscalls/sys_enter_nanosleep")
+int strncmp_bad_not_const_str_size(void *ctx)
+{
+ /* The value of string size is not const, so will fail */
+ cmp_ret = bpf_strncmp(str, no_const_str_size, target);
+ return 0;
+}
+
+SEC("tp/syscalls/sys_enter_nanosleep")
+int strncmp_bad_writable_target(void *ctx)
+{
+ /* Compared target is not read-only, so will fail */
+ cmp_ret = bpf_strncmp(str, STRNCMP_STR_SZ, writable_target);
+ return 0;
+}
+
+SEC("tp/syscalls/sys_enter_nanosleep")
+int strncmp_bad_not_null_term_target(void *ctx)
+{
+ /* Compared target is not null-terminated, so will fail */
+ cmp_ret = bpf_strncmp(str, STRNCMP_STR_SZ, no_str_target);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h
index 60c93aee2f4a..753718595c26 100644
--- a/tools/testing/selftests/bpf/progs/strobemeta.h
+++ b/tools/testing/selftests/bpf/progs/strobemeta.h
@@ -445,6 +445,48 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
return payload;
}
+#ifdef USE_BPF_LOOP
+enum read_type {
+ READ_INT_VAR,
+ READ_MAP_VAR,
+ READ_STR_VAR,
+};
+
+struct read_var_ctx {
+ struct strobemeta_payload *data;
+ void *tls_base;
+ struct strobemeta_cfg *cfg;
+ void *payload;
+ /* value gets mutated */
+ struct strobe_value_generic *value;
+ enum read_type type;
+};
+
+static int read_var_callback(__u32 index, struct read_var_ctx *ctx)
+{
+ switch (ctx->type) {
+ case READ_INT_VAR:
+ if (index >= STROBE_MAX_INTS)
+ return 1;
+ read_int_var(ctx->cfg, index, ctx->tls_base, ctx->value, ctx->data);
+ break;
+ case READ_MAP_VAR:
+ if (index >= STROBE_MAX_MAPS)
+ return 1;
+ ctx->payload = read_map_var(ctx->cfg, index, ctx->tls_base,
+ ctx->value, ctx->data, ctx->payload);
+ break;
+ case READ_STR_VAR:
+ if (index >= STROBE_MAX_STRS)
+ return 1;
+ ctx->payload += read_str_var(ctx->cfg, index, ctx->tls_base,
+ ctx->value, ctx->data, ctx->payload);
+ break;
+ }
+ return 0;
+}
+#endif /* USE_BPF_LOOP */
+
/*
* read_strobe_meta returns NULL, if no metadata was read; otherwise returns
* pointer to *right after* payload ends
@@ -475,11 +517,36 @@ static void *read_strobe_meta(struct task_struct *task,
*/
tls_base = (void *)task;
+#ifdef USE_BPF_LOOP
+ struct read_var_ctx ctx = {
+ .cfg = cfg,
+ .tls_base = tls_base,
+ .value = &value,
+ .data = data,
+ .payload = payload,
+ };
+ int err;
+
+ ctx.type = READ_INT_VAR;
+ err = bpf_loop(STROBE_MAX_INTS, read_var_callback, &ctx, 0);
+ if (err != STROBE_MAX_INTS)
+ return NULL;
+
+ ctx.type = READ_STR_VAR;
+ err = bpf_loop(STROBE_MAX_STRS, read_var_callback, &ctx, 0);
+ if (err != STROBE_MAX_STRS)
+ return NULL;
+
+ ctx.type = READ_MAP_VAR;
+ err = bpf_loop(STROBE_MAX_MAPS, read_var_callback, &ctx, 0);
+ if (err != STROBE_MAX_MAPS)
+ return NULL;
+#else
#ifdef NO_UNROLL
#pragma clang loop unroll(disable)
#else
#pragma unroll
-#endif
+#endif /* NO_UNROLL */
for (int i = 0; i < STROBE_MAX_INTS; ++i) {
read_int_var(cfg, i, tls_base, &value, data);
}
@@ -487,7 +554,7 @@ static void *read_strobe_meta(struct task_struct *task,
#pragma clang loop unroll(disable)
#else
#pragma unroll
-#endif
+#endif /* NO_UNROLL */
for (int i = 0; i < STROBE_MAX_STRS; ++i) {
payload += read_str_var(cfg, i, tls_base, &value, data, payload);
}
@@ -495,10 +562,12 @@ static void *read_strobe_meta(struct task_struct *task,
#pragma clang loop unroll(disable)
#else
#pragma unroll
-#endif
+#endif /* NO_UNROLL */
for (int i = 0; i < STROBE_MAX_MAPS; ++i) {
payload = read_map_var(cfg, i, tls_base, &value, data, payload);
}
+#endif /* USE_BPF_LOOP */
+
/*
* return pointer right after end of payload, so it's possible to
* calculate exact amount of useful data that needs to be sent
diff --git a/tools/testing/selftests/bpf/progs/strobemeta_bpf_loop.c b/tools/testing/selftests/bpf/progs/strobemeta_bpf_loop.c
new file mode 100644
index 000000000000..d18b992f0165
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/strobemeta_bpf_loop.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (c) 2021 Facebook */
+
+#define STROBE_MAX_INTS 2
+#define STROBE_MAX_STRS 25
+#define STROBE_MAX_MAPS 100
+#define STROBE_MAX_MAP_ENTRIES 20
+#define USE_BPF_LOOP
+#include "strobemeta.h"
diff --git a/tools/testing/selftests/bpf/progs/test_d_path_check_rdonly_mem.c b/tools/testing/selftests/bpf/progs/test_d_path_check_rdonly_mem.c
new file mode 100644
index 000000000000..27c27cff6a3a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_d_path_check_rdonly_mem.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Google */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+extern const int bpf_prog_active __ksym;
+
+SEC("fentry/security_inode_getattr")
+int BPF_PROG(d_path_check_rdonly_mem, struct path *path, struct kstat *stat,
+ __u32 request_mask, unsigned int query_flags)
+{
+ void *active;
+ __u32 cpu;
+
+ cpu = bpf_get_smp_processor_id();
+ active = (void *)bpf_per_cpu_ptr(&bpf_prog_active, cpu);
+ if (active) {
+ /* FAIL here! 'active' points to readonly memory. bpf helpers
+ * that update its arguments can not write into it.
+ */
+ bpf_d_path(path, active, sizeof(int));
+ }
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_d_path_check_types.c b/tools/testing/selftests/bpf/progs/test_d_path_check_types.c
new file mode 100644
index 000000000000..7e02b7361307
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_d_path_check_types.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+extern const int bpf_prog_active __ksym;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 1 << 12);
+} ringbuf SEC(".maps");
+
+SEC("fentry/security_inode_getattr")
+int BPF_PROG(d_path_check_rdonly_mem, struct path *path, struct kstat *stat,
+ __u32 request_mask, unsigned int query_flags)
+{
+ void *active;
+ u32 cpu;
+
+ cpu = bpf_get_smp_processor_id();
+ active = (void *)bpf_per_cpu_ptr(&bpf_prog_active, cpu);
+ if (active) {
+ /* FAIL here! 'active' points to 'regular' memory. It
+ * cannot be submitted to ring buffer.
+ */
+ bpf_ringbuf_submit(active, 0);
+ }
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c b/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c
new file mode 100644
index 000000000000..2180c41cd890
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Google */
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+
+extern const int bpf_prog_active __ksym; /* int type global var. */
+
+SEC("raw_tp/sys_enter")
+int handler(const void *ctx)
+{
+ int *active;
+ __u32 cpu;
+
+ cpu = bpf_get_smp_processor_id();
+ active = (int *)bpf_per_cpu_ptr(&bpf_prog_active, cpu);
+ if (active) {
+ /* Kernel memory obtained from bpf_{per,this}_cpu_ptr
+ * is read-only, should _not_ pass verification.
+ */
+ /* WRITE_ONCE */
+ *(volatile int *)active = -1;
+ }
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_weak.c b/tools/testing/selftests/bpf/progs/test_ksyms_weak.c
index 8eadbd4caf7a..5f8379aadb29 100644
--- a/tools/testing/selftests/bpf/progs/test_ksyms_weak.c
+++ b/tools/testing/selftests/bpf/progs/test_ksyms_weak.c
@@ -38,7 +38,7 @@ int pass_handler(const void *ctx)
/* tests existing symbols. */
rq = (struct rq *)bpf_per_cpu_ptr(&runqueues, 0);
if (rq)
- out__existing_typed = 0;
+ out__existing_typed = rq->cpu;
out__existing_typeless = (__u64)&bpf_prog_active;
/* tests non-existent symbols. */
diff --git a/tools/testing/selftests/bpf/progs/test_l4lb.c b/tools/testing/selftests/bpf/progs/test_l4lb.c
index 04fee08863cb..c26057ec46dc 100644
--- a/tools/testing/selftests/bpf/progs/test_l4lb.c
+++ b/tools/testing/selftests/bpf/progs/test_l4lb.c
@@ -448,7 +448,7 @@ static __always_inline int process_packet(void *data, __u64 off, void *data_end,
return bpf_redirect(ifindex, 0);
}
-SEC("l4lb-demo")
+SEC("tc")
int balancer_ingress(struct __sk_buff *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
diff --git a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c
index b9e2753f4f91..19e4d2071c60 100644
--- a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c
+++ b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c
@@ -447,7 +447,7 @@ static __noinline int process_packet(void *data, __u64 off, void *data_end,
return bpf_redirect(ifindex, 0);
}
-SEC("l4lb-demo")
+SEC("tc")
int balancer_ingress(struct __sk_buff *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
diff --git a/tools/testing/selftests/bpf/progs/test_legacy_printk.c b/tools/testing/selftests/bpf/progs/test_legacy_printk.c
new file mode 100644
index 000000000000..64c2d9ced529
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_legacy_printk.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <linux/bpf.h>
+#define BPF_NO_GLOBAL_DATA
+#include <bpf/bpf_helpers.h>
+
+char LICENSE[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, int);
+ __uint(max_entries, 1);
+} my_pid_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, int);
+ __uint(max_entries, 1);
+} res_map SEC(".maps");
+
+volatile int my_pid_var = 0;
+volatile int res_var = 0;
+
+SEC("tp/raw_syscalls/sys_enter")
+int handle_legacy(void *ctx)
+{
+ int zero = 0, *my_pid, cur_pid, *my_res;
+
+ my_pid = bpf_map_lookup_elem(&my_pid_map, &zero);
+ if (!my_pid)
+ return 1;
+
+ cur_pid = bpf_get_current_pid_tgid() >> 32;
+ if (cur_pid != *my_pid)
+ return 1;
+
+ my_res = bpf_map_lookup_elem(&res_map, &zero);
+ if (!my_res)
+ return 1;
+
+ if (*my_res == 0)
+ /* use bpf_printk() in combination with BPF_NO_GLOBAL_DATA to
+ * force .rodata.str1.1 section that previously caused
+ * problems on old kernels due to libbpf always tried to
+ * create a global data map for it
+ */
+ bpf_printk("Legacy-case bpf_printk test, pid %d\n", cur_pid);
+ *my_res = 1;
+
+ return *my_res;
+}
+
+SEC("tp/raw_syscalls/sys_enter")
+int handle_modern(void *ctx)
+{
+ int zero = 0, cur_pid;
+
+ cur_pid = bpf_get_current_pid_tgid() >> 32;
+ if (cur_pid != my_pid_var)
+ return 1;
+
+ if (res_var == 0)
+ /* we need bpf_printk() to validate libbpf logic around unused
+ * global maps and legacy kernels; see comment in handle_legacy()
+ */
+ bpf_printk("Modern-case bpf_printk test, pid %d\n", cur_pid);
+ res_var = 1;
+
+ return res_var;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_log_buf.c b/tools/testing/selftests/bpf/progs/test_log_buf.c
new file mode 100644
index 000000000000..199f459bd5ae
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_log_buf.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+int a[4];
+const volatile int off = 4000;
+
+SEC("raw_tp/sys_enter")
+int good_prog(const void *ctx)
+{
+ a[0] = (int)(long)ctx;
+ return a[1];
+}
+
+SEC("raw_tp/sys_enter")
+int bad_prog(const void *ctx)
+{
+ /* out of bounds access */
+ return a[off];
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_map_lock.c b/tools/testing/selftests/bpf/progs/test_map_lock.c
index b5c07ae7b68f..acf073db9e8b 100644
--- a/tools/testing/selftests/bpf/progs/test_map_lock.c
+++ b/tools/testing/selftests/bpf/progs/test_map_lock.c
@@ -30,7 +30,7 @@ struct {
__type(value, struct array_elem);
} array_map SEC(".maps");
-SEC("map_lock_demo")
+SEC("cgroup/skb")
int bpf_map_lock_test(struct __sk_buff *skb)
{
struct hmap_elem zero = {}, *val;
diff --git a/tools/testing/selftests/bpf/progs/test_module_attach.c b/tools/testing/selftests/bpf/progs/test_module_attach.c
index b36857093f71..50ce16d02da7 100644
--- a/tools/testing/selftests/bpf/progs/test_module_attach.c
+++ b/tools/testing/selftests/bpf/progs/test_module_attach.c
@@ -87,6 +87,18 @@ int BPF_PROG(handle_fexit,
return 0;
}
+SEC("fexit/bpf_testmod_return_ptr")
+int BPF_PROG(handle_fexit_ret, int arg, struct file *ret)
+{
+ long buf = 0;
+
+ bpf_probe_read_kernel(&buf, 8, ret);
+ bpf_probe_read_kernel(&buf, 8, (char *)ret + 256);
+ *(volatile long long *)ret;
+ *(volatile int *)&ret->f_mode;
+ return 0;
+}
+
__u32 fmod_ret_read_sz = 0;
SEC("fmod_ret/bpf_testmod_test_read")
diff --git a/tools/testing/selftests/bpf/progs/test_prog_array_init.c b/tools/testing/selftests/bpf/progs/test_prog_array_init.c
new file mode 100644
index 000000000000..2cd138356126
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_prog_array_init.c
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2021 Hengqi Chen */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+const volatile pid_t my_pid = 0;
+int value = 0;
+
+SEC("raw_tp/sys_enter")
+int tailcall_1(void *ctx)
+{
+ value = 42;
+ return 0;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 2);
+ __uint(key_size, sizeof(__u32));
+ __array(values, int (void *));
+} prog_array_init SEC(".maps") = {
+ .values = {
+ [1] = (void *)&tailcall_1,
+ },
+};
+
+SEC("raw_tp/sys_enter")
+int entry(void *ctx)
+{
+ pid_t pid = bpf_get_current_pid_tgid() >> 32;
+
+ if (pid != my_pid)
+ return 0;
+
+ bpf_tail_call(ctx, &prog_array_init, 1);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_queue_stack_map.h b/tools/testing/selftests/bpf/progs/test_queue_stack_map.h
index 0fcd3ff0e38a..648e8cab7a23 100644
--- a/tools/testing/selftests/bpf/progs/test_queue_stack_map.h
+++ b/tools/testing/selftests/bpf/progs/test_queue_stack_map.h
@@ -24,7 +24,7 @@ struct {
__uint(value_size, sizeof(__u32));
} map_out SEC(".maps");
-SEC("test")
+SEC("tc")
int _test(struct __sk_buff *skb)
{
void *data_end = (void *)(long)skb->data_end;
diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup.c b/tools/testing/selftests/bpf/progs/test_sk_lookup.c
index 19d2465d9442..83b0aaa52ef7 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_lookup.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_lookup.c
@@ -84,6 +84,14 @@ int lookup_drop(struct bpf_sk_lookup *ctx)
return SK_DROP;
}
+SEC("sk_lookup")
+int check_ifindex(struct bpf_sk_lookup *ctx)
+{
+ if (ctx->ingress_ifindex == 1)
+ return SK_DROP;
+ return SK_PASS;
+}
+
SEC("sk_reuseport")
int reuseport_pass(struct sk_reuseport_md *ctx)
{
diff --git a/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c b/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c
index 8e94e5c080aa..6dc1f28fc4b6 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c
@@ -68,7 +68,7 @@ static void set_task_info(struct sock *sk)
}
SEC("fentry/inet_csk_listen_start")
-int BPF_PROG(trace_inet_csk_listen_start, struct sock *sk, int backlog)
+int BPF_PROG(trace_inet_csk_listen_start, struct sock *sk)
{
set_task_info(sk);
diff --git a/tools/testing/selftests/bpf/progs/test_skb_ctx.c b/tools/testing/selftests/bpf/progs/test_skb_ctx.c
index 1d61b36e6067..c482110cfc95 100644
--- a/tools/testing/selftests/bpf/progs/test_skb_ctx.c
+++ b/tools/testing/selftests/bpf/progs/test_skb_ctx.c
@@ -5,7 +5,7 @@
char _license[] SEC("license") = "GPL";
-SEC("skb_ctx")
+SEC("tc")
int process(struct __sk_buff *skb)
{
#pragma clang loop unroll(full)
diff --git a/tools/testing/selftests/bpf/progs/test_skc_to_unix_sock.c b/tools/testing/selftests/bpf/progs/test_skc_to_unix_sock.c
index a408ec95cba4..eacda9fe07eb 100644
--- a/tools/testing/selftests/bpf/progs/test_skc_to_unix_sock.c
+++ b/tools/testing/selftests/bpf/progs/test_skc_to_unix_sock.c
@@ -23,7 +23,7 @@ int BPF_PROG(unix_listen, struct socket *sock, int backlog)
if (!unix_sk)
return 0;
- if (!UNIX_ABSTRACT(unix_sk))
+ if (unix_sk->addr->name->sun_path[0])
return 0;
len = unix_sk->addr->len - sizeof(short);
diff --git a/tools/testing/selftests/bpf/progs/test_spin_lock.c b/tools/testing/selftests/bpf/progs/test_spin_lock.c
index 0d31a3b3505f..7e88309d3229 100644
--- a/tools/testing/selftests/bpf/progs/test_spin_lock.c
+++ b/tools/testing/selftests/bpf/progs/test_spin_lock.c
@@ -45,7 +45,7 @@ struct {
#define CREDIT_PER_NS(delta, rate) (((delta) * rate) >> 20)
-SEC("spin_lock_demo")
+SEC("tc")
int bpf_sping_lock_test(struct __sk_buff *skb)
{
volatile int credit = 0, max_credit = 100, pkt_len = 64;
diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c
index a8233e7f173b..728dbd39eff0 100644
--- a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c
+++ b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2018 Facebook
-#include <linux/bpf.h>
+#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#ifndef PERF_MAX_STACK_DEPTH
@@ -41,11 +41,11 @@ struct {
/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
struct sched_switch_args {
unsigned long long pad;
- char prev_comm[16];
+ char prev_comm[TASK_COMM_LEN];
int prev_pid;
int prev_prio;
long long prev_state;
- char next_comm[16];
+ char next_comm[TASK_COMM_LEN];
int next_pid;
int next_prio;
};
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_estats.c b/tools/testing/selftests/bpf/progs/test_tcp_estats.c
index 2c5c602c6011..e2ae049c2f85 100644
--- a/tools/testing/selftests/bpf/progs/test_tcp_estats.c
+++ b/tools/testing/selftests/bpf/progs/test_tcp_estats.c
@@ -244,7 +244,7 @@ static __always_inline void send_basic_event(struct sock *sk,
bpf_map_update_elem(&ev_record_map, &key, &ev, BPF_ANY);
}
-SEC("dummy_tracepoint")
+SEC("tp/dummy/tracepoint")
int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
{
if (!arg->sock)
diff --git a/tools/testing/selftests/bpf/progs/test_tracepoint.c b/tools/testing/selftests/bpf/progs/test_tracepoint.c
index ce6974016f53..43bd7a20cc50 100644
--- a/tools/testing/selftests/bpf/progs/test_tracepoint.c
+++ b/tools/testing/selftests/bpf/progs/test_tracepoint.c
@@ -1,17 +1,17 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2017 Facebook
-#include <linux/bpf.h>
+#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
struct sched_switch_args {
unsigned long long pad;
- char prev_comm[16];
+ char prev_comm[TASK_COMM_LEN];
int prev_pid;
int prev_prio;
long long prev_state;
- char next_comm[16];
+ char next_comm[TASK_COMM_LEN];
int next_pid;
int next_prio;
};
diff --git a/tools/testing/selftests/bpf/progs/test_verif_scale2.c b/tools/testing/selftests/bpf/progs/test_verif_scale2.c
index f024154c7be7..f90ffcafd1e8 100644
--- a/tools/testing/selftests/bpf/progs/test_verif_scale2.c
+++ b/tools/testing/selftests/bpf/progs/test_verif_scale2.c
@@ -1,11 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
-#include <linux/bpf.h>
+#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#define ATTR __always_inline
#include "test_jhash.h"
-SEC("scale90_inline")
+SEC("tc")
int balancer_ingress(struct __sk_buff *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_link.c b/tools/testing/selftests/bpf/progs/test_xdp_link.c
index ee7d6ac0f615..64ff32eaae92 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_link.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_link.c
@@ -10,3 +10,9 @@ int xdp_handler(struct xdp_md *xdp)
{
return 0;
}
+
+SEC("tc")
+int tc_handler(struct __sk_buff *skb)
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c
index 9a4d09590b3d..2098f3f27f18 100644
--- a/tools/testing/selftests/bpf/progs/trigger_bench.c
+++ b/tools/testing/selftests/bpf/progs/trigger_bench.c
@@ -52,3 +52,10 @@ int bench_trigger_fmodret(void *ctx)
__sync_add_and_fetch(&hits, 1);
return -22;
}
+
+SEC("uprobe/self/uprobe_target")
+int bench_trigger_uprobe(void *ctx)
+{
+ __sync_add_and_fetch(&hits, 1);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/test_bpftool_synctypes.py b/tools/testing/selftests/bpf/test_bpftool_synctypes.py
index be54b7335a76..6bf21e47882a 100755
--- a/tools/testing/selftests/bpf/test_bpftool_synctypes.py
+++ b/tools/testing/selftests/bpf/test_bpftool_synctypes.py
@@ -9,7 +9,15 @@ import os, sys
LINUX_ROOT = os.path.abspath(os.path.join(__file__,
os.pardir, os.pardir, os.pardir, os.pardir, os.pardir))
-BPFTOOL_DIR = os.path.join(LINUX_ROOT, 'tools/bpf/bpftool')
+BPFTOOL_DIR = os.getenv('BPFTOOL_DIR',
+ os.path.join(LINUX_ROOT, 'tools/bpf/bpftool'))
+BPFTOOL_BASHCOMP_DIR = os.getenv('BPFTOOL_BASHCOMP_DIR',
+ os.path.join(BPFTOOL_DIR, 'bash-completion'))
+BPFTOOL_DOC_DIR = os.getenv('BPFTOOL_DOC_DIR',
+ os.path.join(BPFTOOL_DIR, 'Documentation'))
+INCLUDE_DIR = os.getenv('INCLUDE_DIR',
+ os.path.join(LINUX_ROOT, 'tools/include'))
+
retval = 0
class BlockParser(object):
@@ -242,12 +250,6 @@ class FileExtractor(object):
end_marker = re.compile('}\\\\n')
return self.__get_description_list(start_marker, pattern, end_marker)
- def default_options(self):
- """
- Return the default options contained in HELP_SPEC_OPTIONS
- """
- return { '-j', '--json', '-p', '--pretty', '-d', '--debug' }
-
def get_bashcomp_list(self, block_name):
"""
Search for and parse a list of type names from a variable in bash
@@ -274,7 +276,56 @@ class SourceFileExtractor(FileExtractor):
defined in children classes.
"""
def get_options(self):
- return self.default_options().union(self.get_help_list_macro('HELP_SPEC_OPTIONS'))
+ return self.get_help_list_macro('HELP_SPEC_OPTIONS')
+
+class MainHeaderFileExtractor(SourceFileExtractor):
+ """
+ An extractor for bpftool's main.h
+ """
+ filename = os.path.join(BPFTOOL_DIR, 'main.h')
+
+ def get_common_options(self):
+ """
+ Parse the list of common options in main.h (options that apply to all
+ commands), which looks to the lists of options in other source files
+ but has different start and end markers:
+
+ "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-d|--debug} | {-l|--legacy}"
+
+ Return a set containing all options, such as:
+
+ {'-p', '-d', '--legacy', '--pretty', '--debug', '--json', '-l', '-j'}
+ """
+ start_marker = re.compile(f'"OPTIONS :=')
+ pattern = re.compile('([\w-]+) ?(?:\||}[ }\]"])')
+ end_marker = re.compile('#define')
+
+ parser = InlineListParser(self.reader)
+ parser.search_block(start_marker)
+ return parser.parse(pattern, end_marker)
+
+class ManSubstitutionsExtractor(SourceFileExtractor):
+ """
+ An extractor for substitutions.rst
+ """
+ filename = os.path.join(BPFTOOL_DOC_DIR, 'substitutions.rst')
+
+ def get_common_options(self):
+ """
+ Parse the list of common options in substitutions.rst (options that
+ apply to all commands).
+
+ Return a set containing all options, such as:
+
+ {'-p', '-d', '--legacy', '--pretty', '--debug', '--json', '-l', '-j'}
+ """
+ start_marker = re.compile('\|COMMON_OPTIONS\| replace:: {')
+ pattern = re.compile('\*\*([\w/-]+)\*\*')
+ end_marker = re.compile('}$')
+
+ parser = InlineListParser(self.reader)
+ parser.search_block(start_marker)
+ return parser.parse(pattern, end_marker)
class ProgFileExtractor(SourceFileExtractor):
"""
@@ -350,7 +401,7 @@ class BpfHeaderExtractor(FileExtractor):
"""
An extractor for the UAPI BPF header.
"""
- filename = os.path.join(LINUX_ROOT, 'tools/include/uapi/linux/bpf.h')
+ filename = os.path.join(INCLUDE_DIR, 'uapi/linux/bpf.h')
def get_prog_types(self):
return self.get_enum('bpf_prog_type')
@@ -374,7 +425,7 @@ class ManProgExtractor(ManPageExtractor):
"""
An extractor for bpftool-prog.rst.
"""
- filename = os.path.join(BPFTOOL_DIR, 'Documentation/bpftool-prog.rst')
+ filename = os.path.join(BPFTOOL_DOC_DIR, 'bpftool-prog.rst')
def get_attach_types(self):
return self.get_rst_list('ATTACH_TYPE')
@@ -383,7 +434,7 @@ class ManMapExtractor(ManPageExtractor):
"""
An extractor for bpftool-map.rst.
"""
- filename = os.path.join(BPFTOOL_DIR, 'Documentation/bpftool-map.rst')
+ filename = os.path.join(BPFTOOL_DOC_DIR, 'bpftool-map.rst')
def get_map_types(self):
return self.get_rst_list('TYPE')
@@ -392,7 +443,7 @@ class ManCgroupExtractor(ManPageExtractor):
"""
An extractor for bpftool-cgroup.rst.
"""
- filename = os.path.join(BPFTOOL_DIR, 'Documentation/bpftool-cgroup.rst')
+ filename = os.path.join(BPFTOOL_DOC_DIR, 'bpftool-cgroup.rst')
def get_attach_types(self):
return self.get_rst_list('ATTACH_TYPE')
@@ -411,7 +462,7 @@ class BashcompExtractor(FileExtractor):
"""
An extractor for bpftool's bash completion file.
"""
- filename = os.path.join(BPFTOOL_DIR, 'bash-completion/bpftool')
+ filename = os.path.join(BPFTOOL_BASHCOMP_DIR, 'bpftool')
def get_prog_attach_types(self):
return self.get_bashcomp_list('BPFTOOL_PROG_ATTACH_TYPES')
@@ -562,7 +613,7 @@ def main():
help_cmd_options = source_info.get_options()
source_info.close()
- man_cmd_info = ManGenericExtractor(os.path.join('Documentation', 'bpftool-' + cmd + '.rst'))
+ man_cmd_info = ManGenericExtractor(os.path.join(BPFTOOL_DOC_DIR, 'bpftool-' + cmd + '.rst'))
man_cmd_options = man_cmd_info.get_options()
man_cmd_info.close()
@@ -573,13 +624,26 @@ def main():
help_main_options = source_main_info.get_options()
source_main_info.close()
- man_main_info = ManGenericExtractor(os.path.join('Documentation', 'bpftool.rst'))
+ man_main_info = ManGenericExtractor(os.path.join(BPFTOOL_DOC_DIR, 'bpftool.rst'))
man_main_options = man_main_info.get_options()
man_main_info.close()
verify(help_main_options, man_main_options,
f'Comparing {source_main_info.filename} (do_help() OPTIONS) and {man_main_info.filename} (OPTIONS):')
+ # Compare common options (options that apply to all commands)
+
+ main_hdr_info = MainHeaderFileExtractor()
+ source_common_options = main_hdr_info.get_common_options()
+ main_hdr_info.close()
+
+ man_substitutions = ManSubstitutionsExtractor()
+ man_common_options = man_substitutions.get_common_options()
+ man_substitutions.close()
+
+ verify(source_common_options, man_common_options,
+ f'Comparing common options from {main_hdr_info.filename} (HELP_SPEC_OPTIONS) and {man_substitutions.filename}:')
+
sys.exit(retval)
if __name__ == "__main__":
diff --git a/tools/testing/selftests/bpf/test_btf.h b/tools/testing/selftests/bpf/test_btf.h
index 32c7a57867da..128989bed8b7 100644
--- a/tools/testing/selftests/bpf/test_btf.h
+++ b/tools/testing/selftests/bpf/test_btf.h
@@ -72,4 +72,7 @@
#define BTF_DECL_TAG_ENC(value, type, component_idx) \
BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 0, 0), type), (component_idx)
+#define BTF_TYPE_TAG_ENC(value, type) \
+ BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_TYPE_TAG, 0, 0), type)
+
#endif /* _TEST_BTF_H */
diff --git a/tools/testing/selftests/bpf/test_cgroup_storage.c b/tools/testing/selftests/bpf/test_cgroup_storage.c
index 0cda61da5d39..5b8314cd77fd 100644
--- a/tools/testing/selftests/bpf/test_cgroup_storage.c
+++ b/tools/testing/selftests/bpf/test_cgroup_storage.c
@@ -8,6 +8,7 @@
#include "bpf_rlimit.h"
#include "cgroup_helpers.h"
+#include "testing_helpers.h"
char bpf_log_buf[BPF_LOG_BUF_SIZE];
@@ -50,15 +51,15 @@ int main(int argc, char **argv)
goto err;
}
- map_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE, sizeof(key),
- sizeof(value), 0, 0);
+ map_fd = bpf_map_create(BPF_MAP_TYPE_CGROUP_STORAGE, NULL, sizeof(key),
+ sizeof(value), 0, NULL);
if (map_fd < 0) {
printf("Failed to create map: %s\n", strerror(errno));
goto out;
}
- percpu_map_fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
- sizeof(key), sizeof(value), 0, 0);
+ percpu_map_fd = bpf_map_create(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, NULL,
+ sizeof(key), sizeof(value), 0, NULL);
if (percpu_map_fd < 0) {
printf("Failed to create map: %s\n", strerror(errno));
goto out;
@@ -66,7 +67,7 @@ int main(int argc, char **argv)
prog[0].imm = percpu_map_fd;
prog[7].imm = map_fd;
- prog_fd = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB,
+ prog_fd = bpf_test_load_program(BPF_PROG_TYPE_CGROUP_SKB,
prog, insns_cnt, "GPL", 0,
bpf_log_buf, BPF_LOG_BUF_SIZE);
if (prog_fd < 0) {
diff --git a/tools/testing/selftests/bpf/test_cpp.cpp b/tools/testing/selftests/bpf/test_cpp.cpp
index a8d2e9a87fbf..e00201de2890 100644
--- a/tools/testing/selftests/bpf/test_cpp.cpp
+++ b/tools/testing/selftests/bpf/test_cpp.cpp
@@ -7,9 +7,15 @@
/* do nothing, just make sure we can link successfully */
+static void dump_printf(void *ctx, const char *fmt, va_list args)
+{
+}
+
int main(int argc, char *argv[])
{
+ struct btf_dump_opts opts = { };
struct test_core_extern *skel;
+ struct btf *btf;
/* libbpf.h */
libbpf_set_print(NULL);
@@ -18,7 +24,8 @@ int main(int argc, char *argv[])
bpf_prog_get_fd_by_id(0);
/* btf.h */
- btf__new(NULL, 0);
+ btf = btf__new(NULL, 0);
+ btf_dump__new(btf, dump_printf, nullptr, &opts);
/* BPF skeleton */
skel = test_core_extern__open_and_load();
diff --git a/tools/testing/selftests/bpf/test_dev_cgroup.c b/tools/testing/selftests/bpf/test_dev_cgroup.c
index 804dddd97d4c..c299d3452695 100644
--- a/tools/testing/selftests/bpf/test_dev_cgroup.c
+++ b/tools/testing/selftests/bpf/test_dev_cgroup.c
@@ -14,6 +14,7 @@
#include <bpf/libbpf.h>
#include "cgroup_helpers.h"
+#include "testing_helpers.h"
#include "bpf_rlimit.h"
#define DEV_CGROUP_PROG "./dev_cgroup.o"
@@ -27,7 +28,7 @@ int main(int argc, char **argv)
int prog_fd, cgroup_fd;
__u32 prog_cnt;
- if (bpf_prog_load(DEV_CGROUP_PROG, BPF_PROG_TYPE_CGROUP_DEVICE,
+ if (bpf_prog_test_load(DEV_CGROUP_PROG, BPF_PROG_TYPE_CGROUP_DEVICE,
&obj, &prog_fd)) {
printf("Failed to load DEV_CGROUP program\n");
goto out;
diff --git a/tools/testing/selftests/bpf/test_lirc_mode2_user.c b/tools/testing/selftests/bpf/test_lirc_mode2_user.c
index fb5fd6841ef3..ebf68dce5504 100644
--- a/tools/testing/selftests/bpf/test_lirc_mode2_user.c
+++ b/tools/testing/selftests/bpf/test_lirc_mode2_user.c
@@ -45,6 +45,8 @@
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
+#include "testing_helpers.h"
+
int main(int argc, char **argv)
{
struct bpf_object *obj;
@@ -58,8 +60,8 @@ int main(int argc, char **argv)
return 2;
}
- ret = bpf_prog_load("test_lirc_mode2_kern.o",
- BPF_PROG_TYPE_LIRC_MODE2, &obj, &progfd);
+ ret = bpf_prog_test_load("test_lirc_mode2_kern.o",
+ BPF_PROG_TYPE_LIRC_MODE2, &obj, &progfd);
if (ret) {
printf("Failed to load bpf program\n");
return 1;
diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c
index 006be3963977..baa3e3ecae82 100644
--- a/tools/testing/selftests/bpf/test_lpm_map.c
+++ b/tools/testing/selftests/bpf/test_lpm_map.c
@@ -208,6 +208,7 @@ static void test_lpm_order(void)
static void test_lpm_map(int keysize)
{
+ LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC);
size_t i, j, n_matches, n_matches_after_delete, n_nodes, n_lookups;
struct tlpm_node *t, *list = NULL;
struct bpf_lpm_trie_key *key;
@@ -233,11 +234,11 @@ static void test_lpm_map(int keysize)
key = alloca(sizeof(*key) + keysize);
memset(key, 0, sizeof(*key) + keysize);
- map = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE,
+ map = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, NULL,
sizeof(*key) + keysize,
keysize + 1,
4096,
- BPF_F_NO_PREALLOC);
+ &opts);
assert(map >= 0);
for (i = 0; i < n_nodes; ++i) {
@@ -329,6 +330,7 @@ static void test_lpm_map(int keysize)
static void test_lpm_ipaddr(void)
{
+ LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC);
struct bpf_lpm_trie_key *key_ipv4;
struct bpf_lpm_trie_key *key_ipv6;
size_t key_size_ipv4;
@@ -342,14 +344,14 @@ static void test_lpm_ipaddr(void)
key_ipv4 = alloca(key_size_ipv4);
key_ipv6 = alloca(key_size_ipv6);
- map_fd_ipv4 = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE,
+ map_fd_ipv4 = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, NULL,
key_size_ipv4, sizeof(value),
- 100, BPF_F_NO_PREALLOC);
+ 100, &opts);
assert(map_fd_ipv4 >= 0);
- map_fd_ipv6 = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE,
+ map_fd_ipv6 = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, NULL,
key_size_ipv6, sizeof(value),
- 100, BPF_F_NO_PREALLOC);
+ 100, &opts);
assert(map_fd_ipv6 >= 0);
/* Fill data some IPv4 and IPv6 address ranges */
@@ -423,6 +425,7 @@ static void test_lpm_ipaddr(void)
static void test_lpm_delete(void)
{
+ LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC);
struct bpf_lpm_trie_key *key;
size_t key_size;
int map_fd;
@@ -431,9 +434,9 @@ static void test_lpm_delete(void)
key_size = sizeof(*key) + sizeof(__u32);
key = alloca(key_size);
- map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE,
+ map_fd = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, NULL,
key_size, sizeof(value),
- 100, BPF_F_NO_PREALLOC);
+ 100, &opts);
assert(map_fd >= 0);
/* Add nodes:
@@ -535,6 +538,7 @@ static void test_lpm_delete(void)
static void test_lpm_get_next_key(void)
{
+ LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC);
struct bpf_lpm_trie_key *key_p, *next_key_p;
size_t key_size;
__u32 value = 0;
@@ -544,8 +548,7 @@ static void test_lpm_get_next_key(void)
key_p = alloca(key_size);
next_key_p = alloca(key_size);
- map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, key_size, sizeof(value),
- 100, BPF_F_NO_PREALLOC);
+ map_fd = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, NULL, key_size, sizeof(value), 100, &opts);
assert(map_fd >= 0);
/* empty tree. get_next_key should return ENOENT */
@@ -753,6 +756,7 @@ static void setup_lpm_mt_test_info(struct lpm_mt_test_info *info, int map_fd)
static void test_lpm_multi_thread(void)
{
+ LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC);
struct lpm_mt_test_info info[4];
size_t key_size, value_size;
pthread_t thread_id[4];
@@ -762,8 +766,7 @@ static void test_lpm_multi_thread(void)
/* create a trie */
value_size = sizeof(__u32);
key_size = sizeof(struct bpf_lpm_trie_key) + value_size;
- map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, key_size, value_size,
- 100, BPF_F_NO_PREALLOC);
+ map_fd = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, NULL, key_size, value_size, 100, &opts);
/* create 4 threads to test update, delete, lookup and get_next_key */
setup_lpm_mt_test_info(&info[0], map_fd);
diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c
index 7e9049fa3edf..b9f1bbbc8aba 100644
--- a/tools/testing/selftests/bpf/test_lru_map.c
+++ b/tools/testing/selftests/bpf/test_lru_map.c
@@ -28,13 +28,14 @@ static int nr_cpus;
static int create_map(int map_type, int map_flags, unsigned int size)
{
+ LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = map_flags);
int map_fd;
- map_fd = bpf_create_map(map_type, sizeof(unsigned long long),
- sizeof(unsigned long long), size, map_flags);
+ map_fd = bpf_map_create(map_type, NULL, sizeof(unsigned long long),
+ sizeof(unsigned long long), size, &opts);
if (map_fd == -1)
- perror("bpf_create_map");
+ perror("bpf_map_create");
return map_fd;
}
@@ -42,8 +43,6 @@ static int create_map(int map_type, int map_flags, unsigned int size)
static int bpf_map_lookup_elem_with_ref_bit(int fd, unsigned long long key,
void *value)
{
- struct bpf_load_program_attr prog;
- struct bpf_create_map_attr map;
struct bpf_insn insns[] = {
BPF_LD_MAP_VALUE(BPF_REG_9, 0, 0),
BPF_LD_MAP_FD(BPF_REG_1, fd),
@@ -64,25 +63,13 @@ static int bpf_map_lookup_elem_with_ref_bit(int fd, unsigned long long key,
int mfd, pfd, ret, zero = 0;
__u32 retval = 0;
- memset(&map, 0, sizeof(map));
- map.map_type = BPF_MAP_TYPE_ARRAY;
- map.key_size = sizeof(int);
- map.value_size = sizeof(unsigned long long);
- map.max_entries = 1;
-
- mfd = bpf_create_map_xattr(&map);
+ mfd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), sizeof(__u64), 1, NULL);
if (mfd < 0)
return -1;
insns[0].imm = mfd;
- memset(&prog, 0, sizeof(prog));
- prog.prog_type = BPF_PROG_TYPE_SCHED_CLS;
- prog.insns = insns;
- prog.insns_cnt = ARRAY_SIZE(insns);
- prog.license = "GPL";
-
- pfd = bpf_load_program_xattr(&prog, NULL, 0);
+ pfd = bpf_prog_load(BPF_PROG_TYPE_SCHED_CLS, NULL, "GPL", insns, ARRAY_SIZE(insns), NULL);
if (pfd < 0) {
close(mfd);
return -1;
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index c7a36a9378f8..50f7e74ca0b9 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -23,8 +23,8 @@
#include <bpf/libbpf.h>
#include "bpf_util.h"
-#include "bpf_rlimit.h"
#include "test_maps.h"
+#include "testing_helpers.h"
#ifndef ENOTSUPP
#define ENOTSUPP 524
@@ -32,15 +32,14 @@
static int skips;
-static int map_flags;
+static struct bpf_map_create_opts map_opts = { .sz = sizeof(map_opts) };
static void test_hashmap(unsigned int task, void *data)
{
long long key, next_key, first_key, value;
int fd;
- fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
- 2, map_flags);
+ fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(key), sizeof(value), 2, &map_opts);
if (fd < 0) {
printf("Failed to create hashmap '%s'!\n", strerror(errno));
exit(1);
@@ -137,8 +136,7 @@ static void test_hashmap_sizes(unsigned int task, void *data)
for (i = 1; i <= 512; i <<= 1)
for (j = 1; j <= 1 << 18; j <<= 1) {
- fd = bpf_create_map(BPF_MAP_TYPE_HASH, i, j,
- 2, map_flags);
+ fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, i, j, 2, &map_opts);
if (fd < 0) {
if (errno == ENOMEM)
return;
@@ -159,8 +157,8 @@ static void test_hashmap_percpu(unsigned int task, void *data)
int expected_key_mask = 0;
int fd, i;
- fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_HASH, sizeof(key),
- sizeof(bpf_percpu(value, 0)), 2, map_flags);
+ fd = bpf_map_create(BPF_MAP_TYPE_PERCPU_HASH, NULL, sizeof(key),
+ sizeof(bpf_percpu(value, 0)), 2, &map_opts);
if (fd < 0) {
printf("Failed to create hashmap '%s'!\n", strerror(errno));
exit(1);
@@ -271,11 +269,11 @@ static int helper_fill_hashmap(int max_entries)
int i, fd, ret;
long long key, value;
- fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
- max_entries, map_flags);
+ fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(key), sizeof(value),
+ max_entries, &map_opts);
CHECK(fd < 0,
"failed to create hashmap",
- "err: %s, flags: 0x%x\n", strerror(errno), map_flags);
+ "err: %s, flags: 0x%x\n", strerror(errno), map_opts.map_flags);
for (i = 0; i < max_entries; i++) {
key = i; value = key;
@@ -331,8 +329,8 @@ static void test_hashmap_zero_seed(void)
int i, first, second, old_flags;
long long key, next_first, next_second;
- old_flags = map_flags;
- map_flags |= BPF_F_ZERO_SEED;
+ old_flags = map_opts.map_flags;
+ map_opts.map_flags |= BPF_F_ZERO_SEED;
first = helper_fill_hashmap(3);
second = helper_fill_hashmap(3);
@@ -354,7 +352,7 @@ static void test_hashmap_zero_seed(void)
key = next_first;
}
- map_flags = old_flags;
+ map_opts.map_flags = old_flags;
close(first);
close(second);
}
@@ -364,8 +362,7 @@ static void test_arraymap(unsigned int task, void *data)
int key, next_key, fd;
long long value;
- fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(key), sizeof(value),
- 2, 0);
+ fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(key), sizeof(value), 2, NULL);
if (fd < 0) {
printf("Failed to create arraymap '%s'!\n", strerror(errno));
exit(1);
@@ -420,8 +417,8 @@ static void test_arraymap_percpu(unsigned int task, void *data)
BPF_DECLARE_PERCPU(long, values);
int key, next_key, fd, i;
- fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key),
- sizeof(bpf_percpu(values, 0)), 2, 0);
+ fd = bpf_map_create(BPF_MAP_TYPE_PERCPU_ARRAY, NULL, sizeof(key),
+ sizeof(bpf_percpu(values, 0)), 2, NULL);
if (fd < 0) {
printf("Failed to create arraymap '%s'!\n", strerror(errno));
exit(1);
@@ -483,8 +480,8 @@ static void test_arraymap_percpu_many_keys(void)
unsigned int nr_keys = 2000;
int key, fd, i;
- fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key),
- sizeof(bpf_percpu(values, 0)), nr_keys, 0);
+ fd = bpf_map_create(BPF_MAP_TYPE_PERCPU_ARRAY, NULL, sizeof(key),
+ sizeof(bpf_percpu(values, 0)), nr_keys, NULL);
if (fd < 0) {
printf("Failed to create per-cpu arraymap '%s'!\n",
strerror(errno));
@@ -515,8 +512,7 @@ static void test_devmap(unsigned int task, void *data)
int fd;
__u32 key, value;
- fd = bpf_create_map(BPF_MAP_TYPE_DEVMAP, sizeof(key), sizeof(value),
- 2, 0);
+ fd = bpf_map_create(BPF_MAP_TYPE_DEVMAP, NULL, sizeof(key), sizeof(value), 2, NULL);
if (fd < 0) {
printf("Failed to create devmap '%s'!\n", strerror(errno));
exit(1);
@@ -530,8 +526,7 @@ static void test_devmap_hash(unsigned int task, void *data)
int fd;
__u32 key, value;
- fd = bpf_create_map(BPF_MAP_TYPE_DEVMAP_HASH, sizeof(key), sizeof(value),
- 2, 0);
+ fd = bpf_map_create(BPF_MAP_TYPE_DEVMAP_HASH, NULL, sizeof(key), sizeof(value), 2, NULL);
if (fd < 0) {
printf("Failed to create devmap_hash '%s'!\n", strerror(errno));
exit(1);
@@ -551,14 +546,12 @@ static void test_queuemap(unsigned int task, void *data)
vals[i] = rand();
/* Invalid key size */
- fd = bpf_create_map(BPF_MAP_TYPE_QUEUE, 4, sizeof(val), MAP_SIZE,
- map_flags);
+ fd = bpf_map_create(BPF_MAP_TYPE_QUEUE, NULL, 4, sizeof(val), MAP_SIZE, &map_opts);
assert(fd < 0 && errno == EINVAL);
- fd = bpf_create_map(BPF_MAP_TYPE_QUEUE, 0, sizeof(val), MAP_SIZE,
- map_flags);
+ fd = bpf_map_create(BPF_MAP_TYPE_QUEUE, NULL, 0, sizeof(val), MAP_SIZE, &map_opts);
/* Queue map does not support BPF_F_NO_PREALLOC */
- if (map_flags & BPF_F_NO_PREALLOC) {
+ if (map_opts.map_flags & BPF_F_NO_PREALLOC) {
assert(fd < 0 && errno == EINVAL);
return;
}
@@ -609,14 +602,12 @@ static void test_stackmap(unsigned int task, void *data)
vals[i] = rand();
/* Invalid key size */
- fd = bpf_create_map(BPF_MAP_TYPE_STACK, 4, sizeof(val), MAP_SIZE,
- map_flags);
+ fd = bpf_map_create(BPF_MAP_TYPE_STACK, NULL, 4, sizeof(val), MAP_SIZE, &map_opts);
assert(fd < 0 && errno == EINVAL);
- fd = bpf_create_map(BPF_MAP_TYPE_STACK, 0, sizeof(val), MAP_SIZE,
- map_flags);
+ fd = bpf_map_create(BPF_MAP_TYPE_STACK, NULL, 0, sizeof(val), MAP_SIZE, &map_opts);
/* Stack map does not support BPF_F_NO_PREALLOC */
- if (map_flags & BPF_F_NO_PREALLOC) {
+ if (map_opts.map_flags & BPF_F_NO_PREALLOC) {
assert(fd < 0 && errno == EINVAL);
return;
}
@@ -743,9 +734,9 @@ static void test_sockmap(unsigned int tasks, void *data)
}
/* Test sockmap with connected sockets */
- fd = bpf_create_map(BPF_MAP_TYPE_SOCKMAP,
+ fd = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL,
sizeof(key), sizeof(value),
- 6, 0);
+ 6, NULL);
if (fd < 0) {
if (!bpf_probe_map_type(BPF_MAP_TYPE_SOCKMAP, 0)) {
printf("%s SKIP (unsupported map type BPF_MAP_TYPE_SOCKMAP)\n",
@@ -830,21 +821,21 @@ static void test_sockmap(unsigned int tasks, void *data)
}
/* Load SK_SKB program and Attach */
- err = bpf_prog_load(SOCKMAP_PARSE_PROG,
+ err = bpf_prog_test_load(SOCKMAP_PARSE_PROG,
BPF_PROG_TYPE_SK_SKB, &obj, &parse_prog);
if (err) {
printf("Failed to load SK_SKB parse prog\n");
goto out_sockmap;
}
- err = bpf_prog_load(SOCKMAP_TCP_MSG_PROG,
+ err = bpf_prog_test_load(SOCKMAP_TCP_MSG_PROG,
BPF_PROG_TYPE_SK_MSG, &obj, &msg_prog);
if (err) {
printf("Failed to load SK_SKB msg prog\n");
goto out_sockmap;
}
- err = bpf_prog_load(SOCKMAP_VERDICT_PROG,
+ err = bpf_prog_test_load(SOCKMAP_VERDICT_PROG,
BPF_PROG_TYPE_SK_SKB, &obj, &verdict_prog);
if (err) {
printf("Failed to load SK_SKB verdict prog\n");
@@ -1167,8 +1158,7 @@ static void test_map_in_map(void)
obj = bpf_object__open(MAPINMAP_PROG);
- fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(int), sizeof(int),
- 2, 0);
+ fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(int), sizeof(int), 2, NULL);
if (fd < 0) {
printf("Failed to create hashmap '%s'!\n", strerror(errno));
exit(1);
@@ -1314,8 +1304,8 @@ static void test_map_large(void)
} key;
int fd, i, value;
- fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
- MAP_SIZE, map_flags);
+ fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(key), sizeof(value),
+ MAP_SIZE, &map_opts);
if (fd < 0) {
printf("Failed to create large map '%s'!\n", strerror(errno));
exit(1);
@@ -1468,8 +1458,8 @@ static void test_map_parallel(void)
int i, fd, key = 0, value = 0;
int data[2];
- fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
- MAP_SIZE, map_flags);
+ fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(key), sizeof(value),
+ MAP_SIZE, &map_opts);
if (fd < 0) {
printf("Failed to create map for parallel test '%s'!\n",
strerror(errno));
@@ -1517,9 +1507,13 @@ static void test_map_parallel(void)
static void test_map_rdonly(void)
{
int fd, key = 0, value = 0;
+ __u32 old_flags;
- fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
- MAP_SIZE, map_flags | BPF_F_RDONLY);
+ old_flags = map_opts.map_flags;
+ map_opts.map_flags |= BPF_F_RDONLY;
+ fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(key), sizeof(value),
+ MAP_SIZE, &map_opts);
+ map_opts.map_flags = old_flags;
if (fd < 0) {
printf("Failed to create map for read only test '%s'!\n",
strerror(errno));
@@ -1542,9 +1536,13 @@ static void test_map_rdonly(void)
static void test_map_wronly_hash(void)
{
int fd, key = 0, value = 0;
+ __u32 old_flags;
- fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
- MAP_SIZE, map_flags | BPF_F_WRONLY);
+ old_flags = map_opts.map_flags;
+ map_opts.map_flags |= BPF_F_WRONLY;
+ fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(key), sizeof(value),
+ MAP_SIZE, &map_opts);
+ map_opts.map_flags = old_flags;
if (fd < 0) {
printf("Failed to create map for write only test '%s'!\n",
strerror(errno));
@@ -1566,13 +1564,17 @@ static void test_map_wronly_hash(void)
static void test_map_wronly_stack_or_queue(enum bpf_map_type map_type)
{
int fd, value = 0;
+ __u32 old_flags;
+
assert(map_type == BPF_MAP_TYPE_QUEUE ||
map_type == BPF_MAP_TYPE_STACK);
- fd = bpf_create_map(map_type, 0, sizeof(value), MAP_SIZE,
- map_flags | BPF_F_WRONLY);
+ old_flags = map_opts.map_flags;
+ map_opts.map_flags |= BPF_F_WRONLY;
+ fd = bpf_map_create(map_type, NULL, 0, sizeof(value), MAP_SIZE, &map_opts);
+ map_opts.map_flags = old_flags;
/* Stack/Queue maps do not support BPF_F_NO_PREALLOC */
- if (map_flags & BPF_F_NO_PREALLOC) {
+ if (map_opts.map_flags & BPF_F_NO_PREALLOC) {
assert(fd < 0 && errno == EINVAL);
return;
}
@@ -1699,8 +1701,8 @@ static void test_reuseport_array(void)
__u32 fds_idx = 0;
int fd;
- map_fd = bpf_create_map(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
- sizeof(__u32), sizeof(__u64), array_size, 0);
+ map_fd = bpf_map_create(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, NULL,
+ sizeof(__u32), sizeof(__u64), array_size, NULL);
CHECK(map_fd < 0, "reuseport array create",
"map_fd:%d, errno:%d\n", map_fd, errno);
@@ -1836,8 +1838,8 @@ static void test_reuseport_array(void)
close(map_fd);
/* Test 32 bit fd */
- map_fd = bpf_create_map(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
- sizeof(__u32), sizeof(__u32), array_size, 0);
+ map_fd = bpf_map_create(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, NULL,
+ sizeof(__u32), sizeof(__u32), array_size, NULL);
CHECK(map_fd < 0, "reuseport array create",
"map_fd:%d, errno:%d\n", map_fd, errno);
prepare_reuseport_grp(SOCK_STREAM, map_fd, sizeof(__u32), &fd64,
@@ -1895,10 +1897,10 @@ int main(void)
libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
- map_flags = 0;
+ map_opts.map_flags = 0;
run_all_tests();
- map_flags = BPF_F_NO_PREALLOC;
+ map_opts.map_flags = BPF_F_NO_PREALLOC;
run_all_tests();
#define DEFINE_TEST(name) test_##name();
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index c65986bd9d07..2ecb73a65206 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -4,7 +4,6 @@
#define _GNU_SOURCE
#include "test_progs.h"
#include "cgroup_helpers.h"
-#include "bpf_rlimit.h"
#include <argp.h>
#include <pthread.h>
#include <sched.h>
@@ -473,11 +472,11 @@ static struct prog_test_def prog_test_defs[] = {
#include <prog_tests/tests.h>
#undef DEFINE_TEST
};
-const int prog_test_cnt = ARRAY_SIZE(prog_test_defs);
+static const int prog_test_cnt = ARRAY_SIZE(prog_test_defs);
const char *argp_program_version = "test_progs 0.1";
const char *argp_program_bug_address = "<bpf@vger.kernel.org>";
-const char argp_program_doc[] = "BPF selftests test runner";
+static const char argp_program_doc[] = "BPF selftests test runner";
enum ARG_KEYS {
ARG_TEST_NUM = 'n',
@@ -939,7 +938,7 @@ static void *dispatch_thread(void *ctx)
{
struct dispatch_data *data = ctx;
int sock_fd;
- FILE *log_fd = NULL;
+ FILE *log_fp = NULL;
sock_fd = data->sock_fd;
@@ -1002,8 +1001,8 @@ static void *dispatch_thread(void *ctx)
/* collect all logs */
if (msg_test_done.test_done.have_log) {
- log_fd = open_memstream(&result->log_buf, &result->log_cnt);
- if (!log_fd)
+ log_fp = open_memstream(&result->log_buf, &result->log_cnt);
+ if (!log_fp)
goto error;
while (true) {
@@ -1014,12 +1013,12 @@ static void *dispatch_thread(void *ctx)
if (msg_log.type != MSG_TEST_LOG)
goto error;
- fprintf(log_fd, "%s", msg_log.test_log.log_buf);
+ fprintf(log_fp, "%s", msg_log.test_log.log_buf);
if (msg_log.test_log.is_last)
break;
}
- fclose(log_fd);
- log_fd = NULL;
+ fclose(log_fp);
+ log_fp = NULL;
}
/* output log */
{
@@ -1045,8 +1044,8 @@ error:
if (env.debug)
fprintf(stderr, "[%d]: Protocol/IO error: %s.\n", data->worker_id, strerror(errno));
- if (log_fd)
- fclose(log_fd);
+ if (log_fp)
+ fclose(log_fp);
done:
{
struct msg msg_exit;
@@ -1198,11 +1197,11 @@ static int server_main(void)
env.sub_succ_cnt += result->sub_succ_cnt;
}
+ print_all_error_logs();
+
fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n",
env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt);
- print_all_error_logs();
-
/* reap all workers */
for (i = 0; i < env.workers; i++) {
int wstatus, pid;
@@ -1342,7 +1341,6 @@ int main(int argc, char **argv)
/* Use libbpf 1.0 API mode */
libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
-
libbpf_set_print(libbpf_print_fn);
srand(time(NULL));
@@ -1484,11 +1482,11 @@ int main(int argc, char **argv)
if (env.list_test_names)
goto out;
+ print_all_error_logs();
+
fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n",
env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt);
- print_all_error_logs();
-
close(env.saved_netns_fd);
out:
if (!env.list_test_names && env.has_testmod)
diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/test_sock.c
index 9613f7538840..fe10f8134278 100644
--- a/tools/testing/selftests/bpf/test_sock.c
+++ b/tools/testing/selftests/bpf/test_sock.c
@@ -35,18 +35,21 @@ struct sock_test {
/* Endpoint to bind() to */
const char *ip;
unsigned short port;
+ unsigned short port_retry;
/* Expected test result */
enum {
LOAD_REJECT,
ATTACH_REJECT,
BIND_REJECT,
SUCCESS,
+ RETRY_SUCCESS,
+ RETRY_REJECT
} result;
};
static struct sock_test tests[] = {
{
- "bind4 load with invalid access: src_ip6",
+ .descr = "bind4 load with invalid access: src_ip6",
.insns = {
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
@@ -54,16 +57,12 @@ static struct sock_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET4_POST_BIND,
- BPF_CGROUP_INET4_POST_BIND,
- 0,
- 0,
- NULL,
- 0,
- LOAD_REJECT,
+ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .result = LOAD_REJECT,
},
{
- "bind4 load with invalid access: mark",
+ .descr = "bind4 load with invalid access: mark",
.insns = {
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
@@ -71,16 +70,12 @@ static struct sock_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET4_POST_BIND,
- BPF_CGROUP_INET4_POST_BIND,
- 0,
- 0,
- NULL,
- 0,
- LOAD_REJECT,
+ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .result = LOAD_REJECT,
},
{
- "bind6 load with invalid access: src_ip4",
+ .descr = "bind6 load with invalid access: src_ip4",
.insns = {
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
@@ -88,16 +83,12 @@ static struct sock_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET6_POST_BIND,
- BPF_CGROUP_INET6_POST_BIND,
- 0,
- 0,
- NULL,
- 0,
- LOAD_REJECT,
+ .expected_attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .result = LOAD_REJECT,
},
{
- "sock_create load with invalid access: src_port",
+ .descr = "sock_create load with invalid access: src_port",
.insns = {
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
@@ -105,128 +96,106 @@ static struct sock_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET_SOCK_CREATE,
- BPF_CGROUP_INET_SOCK_CREATE,
- 0,
- 0,
- NULL,
- 0,
- LOAD_REJECT,
+ .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE,
+ .attach_type = BPF_CGROUP_INET_SOCK_CREATE,
+ .result = LOAD_REJECT,
},
{
- "sock_create load w/o expected_attach_type (compat mode)",
+ .descr = "sock_create load w/o expected_attach_type (compat mode)",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- 0,
- BPF_CGROUP_INET_SOCK_CREATE,
- AF_INET,
- SOCK_STREAM,
- "127.0.0.1",
- 8097,
- SUCCESS,
+ .expected_attach_type = 0,
+ .attach_type = BPF_CGROUP_INET_SOCK_CREATE,
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ .ip = "127.0.0.1",
+ .port = 8097,
+ .result = SUCCESS,
},
{
- "sock_create load w/ expected_attach_type",
+ .descr = "sock_create load w/ expected_attach_type",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET_SOCK_CREATE,
- BPF_CGROUP_INET_SOCK_CREATE,
- AF_INET,
- SOCK_STREAM,
- "127.0.0.1",
- 8097,
- SUCCESS,
+ .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE,
+ .attach_type = BPF_CGROUP_INET_SOCK_CREATE,
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ .ip = "127.0.0.1",
+ .port = 8097,
+ .result = SUCCESS,
},
{
- "attach type mismatch bind4 vs bind6",
+ .descr = "attach type mismatch bind4 vs bind6",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET4_POST_BIND,
- BPF_CGROUP_INET6_POST_BIND,
- 0,
- 0,
- NULL,
- 0,
- ATTACH_REJECT,
+ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .result = ATTACH_REJECT,
},
{
- "attach type mismatch bind6 vs bind4",
+ .descr = "attach type mismatch bind6 vs bind4",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET6_POST_BIND,
- BPF_CGROUP_INET4_POST_BIND,
- 0,
- 0,
- NULL,
- 0,
- ATTACH_REJECT,
+ .expected_attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .result = ATTACH_REJECT,
},
{
- "attach type mismatch default vs bind4",
+ .descr = "attach type mismatch default vs bind4",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- 0,
- BPF_CGROUP_INET4_POST_BIND,
- 0,
- 0,
- NULL,
- 0,
- ATTACH_REJECT,
+ .expected_attach_type = 0,
+ .attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .result = ATTACH_REJECT,
},
{
- "attach type mismatch bind6 vs sock_create",
+ .descr = "attach type mismatch bind6 vs sock_create",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET6_POST_BIND,
- BPF_CGROUP_INET_SOCK_CREATE,
- 0,
- 0,
- NULL,
- 0,
- ATTACH_REJECT,
+ .expected_attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .attach_type = BPF_CGROUP_INET_SOCK_CREATE,
+ .result = ATTACH_REJECT,
},
{
- "bind4 reject all",
+ .descr = "bind4 reject all",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET4_POST_BIND,
- BPF_CGROUP_INET4_POST_BIND,
- AF_INET,
- SOCK_STREAM,
- "0.0.0.0",
- 0,
- BIND_REJECT,
+ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ .ip = "0.0.0.0",
+ .result = BIND_REJECT,
},
{
- "bind6 reject all",
+ .descr = "bind6 reject all",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET6_POST_BIND,
- BPF_CGROUP_INET6_POST_BIND,
- AF_INET6,
- SOCK_STREAM,
- "::",
- 0,
- BIND_REJECT,
+ .expected_attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ .ip = "::",
+ .result = BIND_REJECT,
},
{
- "bind6 deny specific IP & port",
+ .descr = "bind6 deny specific IP & port",
.insns = {
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
@@ -247,16 +216,16 @@ static struct sock_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET6_POST_BIND,
- BPF_CGROUP_INET6_POST_BIND,
- AF_INET6,
- SOCK_STREAM,
- "::1",
- 8193,
- BIND_REJECT,
+ .expected_attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ .ip = "::1",
+ .port = 8193,
+ .result = BIND_REJECT,
},
{
- "bind4 allow specific IP & port",
+ .descr = "bind4 allow specific IP & port",
.insns = {
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
@@ -277,41 +246,132 @@ static struct sock_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET4_POST_BIND,
- BPF_CGROUP_INET4_POST_BIND,
- AF_INET,
- SOCK_STREAM,
- "127.0.0.1",
- 4098,
- SUCCESS,
+ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ .ip = "127.0.0.1",
+ .port = 4098,
+ .result = SUCCESS,
},
{
- "bind4 allow all",
+ .descr = "bind4 deny specific IP & port of TCP, and retry",
.insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+ /* if (ip == expected && port == expected) */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_ip4)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7,
+ __bpf_constant_ntohl(0x7F000001), 4),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_port)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x1002, 2),
+
+ /* return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_A(1),
+
+ /* else return ALLOW; */
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET4_POST_BIND,
- BPF_CGROUP_INET4_POST_BIND,
- AF_INET,
- SOCK_STREAM,
- "0.0.0.0",
- 0,
- SUCCESS,
+ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ .ip = "127.0.0.1",
+ .port = 4098,
+ .port_retry = 5000,
+ .result = RETRY_SUCCESS,
},
{
- "bind6 allow all",
+ .descr = "bind4 deny specific IP & port of UDP, and retry",
.insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+ /* if (ip == expected && port == expected) */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_ip4)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7,
+ __bpf_constant_ntohl(0x7F000001), 4),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_port)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x1002, 2),
+
+ /* return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_A(1),
+
+ /* else return ALLOW; */
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET6_POST_BIND,
- BPF_CGROUP_INET6_POST_BIND,
- AF_INET6,
- SOCK_STREAM,
- "::",
- 0,
- SUCCESS,
+ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .domain = AF_INET,
+ .type = SOCK_DGRAM,
+ .ip = "127.0.0.1",
+ .port = 4098,
+ .port_retry = 5000,
+ .result = RETRY_SUCCESS,
+ },
+ {
+ .descr = "bind6 deny specific IP & port, and retry",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+ /* if (ip == expected && port == expected) */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_ip6[3])),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7,
+ __bpf_constant_ntohl(0x00000001), 4),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_port)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x2001, 2),
+
+ /* return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_A(1),
+
+ /* else return ALLOW; */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .expected_attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ .ip = "::1",
+ .port = 8193,
+ .port_retry = 9000,
+ .result = RETRY_SUCCESS,
+ },
+ {
+ .descr = "bind4 allow all",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ .ip = "0.0.0.0",
+ .result = SUCCESS,
+ },
+ {
+ .descr = "bind6 allow all",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .expected_attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ .ip = "::",
+ .result = SUCCESS,
},
};
@@ -328,18 +388,17 @@ static size_t probe_prog_length(const struct bpf_insn *fp)
static int load_sock_prog(const struct bpf_insn *prog,
enum bpf_attach_type attach_type)
{
- struct bpf_load_program_attr attr;
- int ret;
-
- memset(&attr, 0, sizeof(struct bpf_load_program_attr));
- attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
- attr.expected_attach_type = attach_type;
- attr.insns = prog;
- attr.insns_cnt = probe_prog_length(attr.insns);
- attr.license = "GPL";
- attr.log_level = 2;
-
- ret = bpf_load_program_xattr(&attr, bpf_log_buf, BPF_LOG_BUF_SIZE);
+ LIBBPF_OPTS(bpf_prog_load_opts, opts);
+ int ret, insn_cnt;
+
+ insn_cnt = probe_prog_length(prog);
+
+ opts.expected_attach_type = attach_type;
+ opts.log_buf = bpf_log_buf;
+ opts.log_size = BPF_LOG_BUF_SIZE;
+ opts.log_level = 2;
+
+ ret = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", prog, insn_cnt, &opts);
if (verbose && ret < 0)
fprintf(stderr, "%s\n", bpf_log_buf);
@@ -352,14 +411,15 @@ static int attach_sock_prog(int cgfd, int progfd,
return bpf_prog_attach(progfd, cgfd, attach_type, BPF_F_ALLOW_OVERRIDE);
}
-static int bind_sock(int domain, int type, const char *ip, unsigned short port)
+static int bind_sock(int domain, int type, const char *ip,
+ unsigned short port, unsigned short port_retry)
{
struct sockaddr_storage addr;
struct sockaddr_in6 *addr6;
struct sockaddr_in *addr4;
int sockfd = -1;
socklen_t len;
- int err = 0;
+ int res = SUCCESS;
sockfd = socket(domain, type, 0);
if (sockfd < 0)
@@ -385,21 +445,44 @@ static int bind_sock(int domain, int type, const char *ip, unsigned short port)
goto err;
}
- if (bind(sockfd, (const struct sockaddr *)&addr, len) == -1)
- goto err;
+ if (bind(sockfd, (const struct sockaddr *)&addr, len) == -1) {
+ /* sys_bind() may fail for different reasons, errno has to be
+ * checked to confirm that BPF program rejected it.
+ */
+ if (errno != EPERM)
+ goto err;
+ if (port_retry)
+ goto retry;
+ res = BIND_REJECT;
+ goto out;
+ }
goto out;
+retry:
+ if (domain == AF_INET)
+ addr4->sin_port = htons(port_retry);
+ else
+ addr6->sin6_port = htons(port_retry);
+ if (bind(sockfd, (const struct sockaddr *)&addr, len) == -1) {
+ if (errno != EPERM)
+ goto err;
+ res = RETRY_REJECT;
+ } else {
+ res = RETRY_SUCCESS;
+ }
+ goto out;
err:
- err = -1;
+ res = -1;
out:
close(sockfd);
- return err;
+ return res;
}
static int run_test_case(int cgfd, const struct sock_test *test)
{
int progfd = -1;
int err = 0;
+ int res;
printf("Test case: %s .. ", test->descr);
progfd = load_sock_prog(test->insns, test->expected_attach_type);
@@ -417,21 +500,11 @@ static int run_test_case(int cgfd, const struct sock_test *test)
goto err;
}
- if (bind_sock(test->domain, test->type, test->ip, test->port) == -1) {
- /* sys_bind() may fail for different reasons, errno has to be
- * checked to confirm that BPF program rejected it.
- */
- if (test->result == BIND_REJECT && errno == EPERM)
- goto out;
- else
- goto err;
- }
-
+ res = bind_sock(test->domain, test->type, test->ip, test->port,
+ test->port_retry);
+ if (res > 0 && test->result == res)
+ goto out;
- if (test->result != SUCCESS)
- goto err;
-
- goto out;
err:
err = -1;
out:
diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c
index aa3f185fcb89..f0c8d05ba6d1 100644
--- a/tools/testing/selftests/bpf/test_sock_addr.c
+++ b/tools/testing/selftests/bpf/test_sock_addr.c
@@ -645,17 +645,14 @@ static int mk_sockaddr(int domain, const char *ip, unsigned short port,
static int load_insns(const struct sock_addr_test *test,
const struct bpf_insn *insns, size_t insns_cnt)
{
- struct bpf_load_program_attr load_attr;
+ LIBBPF_OPTS(bpf_prog_load_opts, opts);
int ret;
- memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
- load_attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
- load_attr.expected_attach_type = test->expected_attach_type;
- load_attr.insns = insns;
- load_attr.insns_cnt = insns_cnt;
- load_attr.license = "GPL";
+ opts.expected_attach_type = test->expected_attach_type;
+ opts.log_buf = bpf_log_buf;
+ opts.log_size = BPF_LOG_BUF_SIZE;
- ret = bpf_load_program_xattr(&load_attr, bpf_log_buf, BPF_LOG_BUF_SIZE);
+ ret = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, NULL, "GPL", insns, insns_cnt, &opts);
if (ret < 0 && test->expected_result != LOAD_REJECT) {
log_err(">>> Loading program error.\n"
">>> Verifier output:\n%s\n-------\n", bpf_log_buf);
@@ -666,23 +663,36 @@ static int load_insns(const struct sock_addr_test *test,
static int load_path(const struct sock_addr_test *test, const char *path)
{
- struct bpf_prog_load_attr attr;
struct bpf_object *obj;
- int prog_fd;
+ struct bpf_program *prog;
+ int err;
- memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
- attr.file = path;
- attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
- attr.expected_attach_type = test->expected_attach_type;
- attr.prog_flags = BPF_F_TEST_RND_HI32;
+ obj = bpf_object__open_file(path, NULL);
+ err = libbpf_get_error(obj);
+ if (err) {
+ log_err(">>> Opening BPF object (%s) error.\n", path);
+ return -1;
+ }
+
+ prog = bpf_object__next_program(obj, NULL);
+ if (!prog)
+ goto err_out;
- if (bpf_prog_load_xattr(&attr, &obj, &prog_fd)) {
+ bpf_program__set_type(prog, BPF_PROG_TYPE_CGROUP_SOCK_ADDR);
+ bpf_program__set_expected_attach_type(prog, test->expected_attach_type);
+ bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32);
+
+ err = bpf_object__load(obj);
+ if (err) {
if (test->expected_result != LOAD_REJECT)
log_err(">>> Loading program (%s) error.\n", path);
- return -1;
+ goto err_out;
}
- return prog_fd;
+ return bpf_program__fd(prog);
+err_out:
+ bpf_object__close(obj);
+ return -1;
}
static int bind4_prog_load(const struct sock_addr_test *test)
diff --git a/tools/testing/selftests/bpf/test_stub.c b/tools/testing/selftests/bpf/test_stub.c
deleted file mode 100644
index 47e132726203..000000000000
--- a/tools/testing/selftests/bpf/test_stub.c
+++ /dev/null
@@ -1,44 +0,0 @@
-// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
-/* Copyright (C) 2019 Netronome Systems, Inc. */
-
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-#include <string.h>
-
-int extra_prog_load_log_flags = 0;
-
-int bpf_prog_test_load(const char *file, enum bpf_prog_type type,
- struct bpf_object **pobj, int *prog_fd)
-{
- struct bpf_prog_load_attr attr;
-
- memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
- attr.file = file;
- attr.prog_type = type;
- attr.expected_attach_type = 0;
- attr.prog_flags = BPF_F_TEST_RND_HI32;
- attr.log_level = extra_prog_load_log_flags;
-
- return bpf_prog_load_xattr(&attr, pobj, prog_fd);
-}
-
-int bpf_test_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
- size_t insns_cnt, const char *license,
- __u32 kern_version, char *log_buf,
- size_t log_buf_sz)
-{
- struct bpf_load_program_attr load_attr;
-
- memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
- load_attr.prog_type = type;
- load_attr.expected_attach_type = 0;
- load_attr.name = NULL;
- load_attr.insns = insns;
- load_attr.insns_cnt = insns_cnt;
- load_attr.license = license;
- load_attr.kern_version = kern_version;
- load_attr.prog_flags = BPF_F_TEST_RND_HI32;
- load_attr.log_level = extra_prog_load_log_flags;
-
- return bpf_load_program_xattr(&load_attr, log_buf, log_buf_sz);
-}
diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/test_sysctl.c
index a3bb6d399daa..4f6cf833b522 100644
--- a/tools/testing/selftests/bpf/test_sysctl.c
+++ b/tools/testing/selftests/bpf/test_sysctl.c
@@ -17,6 +17,7 @@
#include "bpf_rlimit.h"
#include "bpf_util.h"
#include "cgroup_helpers.h"
+#include "testing_helpers.h"
#define CG_PATH "/foo"
#define MAX_INSNS 512
@@ -1435,14 +1436,10 @@ static int load_sysctl_prog_insns(struct sysctl_test *test,
const char *sysctl_path)
{
struct bpf_insn *prog = test->insns;
- struct bpf_load_program_attr attr;
- int ret;
+ LIBBPF_OPTS(bpf_prog_load_opts, opts);
+ int ret, insn_cnt;
- memset(&attr, 0, sizeof(struct bpf_load_program_attr));
- attr.prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL;
- attr.insns = prog;
- attr.insns_cnt = probe_prog_length(attr.insns);
- attr.license = "GPL";
+ insn_cnt = probe_prog_length(prog);
if (test->fixup_value_insn) {
char buf[128];
@@ -1465,7 +1462,10 @@ static int load_sysctl_prog_insns(struct sysctl_test *test,
return -1;
}
- ret = bpf_load_program_xattr(&attr, bpf_log_buf, BPF_LOG_BUF_SIZE);
+ opts.log_buf = bpf_log_buf;
+ opts.log_size = BPF_LOG_BUF_SIZE;
+
+ ret = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SYSCTL, NULL, "GPL", prog, insn_cnt, &opts);
if (ret < 0 && test->result != LOAD_REJECT) {
log_err(">>> Loading program error.\n"
">>> Verifier output:\n%s\n-------\n", bpf_log_buf);
@@ -1476,15 +1476,10 @@ static int load_sysctl_prog_insns(struct sysctl_test *test,
static int load_sysctl_prog_file(struct sysctl_test *test)
{
- struct bpf_prog_load_attr attr;
struct bpf_object *obj;
int prog_fd;
- memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
- attr.file = test->prog_file;
- attr.prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL;
-
- if (bpf_prog_load_xattr(&attr, &obj, &prog_fd)) {
+ if (bpf_prog_test_load(test->prog_file, BPF_PROG_TYPE_CGROUP_SYSCTL, &obj, &prog_fd)) {
if (test->result != LOAD_REJECT)
log_err(">>> Loading program (%s) error.\n",
test->prog_file);
diff --git a/tools/testing/selftests/bpf/test_tag.c b/tools/testing/selftests/bpf/test_tag.c
index 6272c784ca2a..0851c42ee31c 100644
--- a/tools/testing/selftests/bpf/test_tag.c
+++ b/tools/testing/selftests/bpf/test_tag.c
@@ -21,6 +21,7 @@
#include "../../../include/linux/filter.h"
#include "bpf_rlimit.h"
+#include "testing_helpers.h"
static struct bpf_insn prog[BPF_MAXINSNS];
@@ -57,7 +58,7 @@ static int bpf_try_load_prog(int insns, int fd_map,
int fd_prog;
bpf_filler(insns, fd_map);
- fd_prog = bpf_load_program(BPF_PROG_TYPE_SCHED_CLS, prog, insns, "", 0,
+ fd_prog = bpf_test_load_program(BPF_PROG_TYPE_SCHED_CLS, prog, insns, "", 0,
NULL, 0);
assert(fd_prog > 0);
if (fd_map > 0)
@@ -184,11 +185,12 @@ static void do_test(uint32_t *tests, int start_insns, int fd_map,
int main(void)
{
+ LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC);
uint32_t tests = 0;
int i, fd_map;
- fd_map = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(int),
- sizeof(int), 1, BPF_F_NO_PREALLOC);
+ fd_map = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(int),
+ sizeof(int), 1, &opts);
assert(fd_map > 0);
for (i = 0; i < 5; i++) {
diff --git a/tools/testing/selftests/bpf/test_tcpnotify_user.c b/tools/testing/selftests/bpf/test_tcpnotify_user.c
index 4a39304cc5a6..4c5114765b23 100644
--- a/tools/testing/selftests/bpf/test_tcpnotify_user.c
+++ b/tools/testing/selftests/bpf/test_tcpnotify_user.c
@@ -25,6 +25,7 @@
#include "test_tcpnotify.h"
#include "trace_helpers.h"
+#include "testing_helpers.h"
#define SOCKET_BUFFER_SIZE (getpagesize() < 8192L ? getpagesize() : 8192L)
@@ -71,7 +72,6 @@ int main(int argc, char **argv)
{
const char *file = "test_tcpnotify_kern.o";
struct bpf_map *perf_map, *global_map;
- struct perf_buffer_opts pb_opts = {};
struct tcpnotify_globals g = {0};
struct perf_buffer *pb = NULL;
const char *cg_path = "/foo";
@@ -92,7 +92,7 @@ int main(int argc, char **argv)
if (cg_fd < 0)
goto err;
- if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) {
+ if (bpf_prog_test_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) {
printf("FAILED: load_bpf_file failed for: %s\n", file);
goto err;
}
@@ -116,8 +116,7 @@ int main(int argc, char **argv)
return -1;
}
- pb_opts.sample_cb = dummyfn;
- pb = perf_buffer__new(bpf_map__fd(perf_map), 8, &pb_opts);
+ pb = perf_buffer__new(bpf_map__fd(perf_map), 8, dummyfn, NULL, NULL, NULL);
if (!pb)
goto err;
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 465ef3f112c0..76cd903117af 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -41,7 +41,6 @@
# define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS 1
# endif
#endif
-#include "bpf_rlimit.h"
#include "bpf_rand.h"
#include "bpf_util.h"
#include "test_btf.h"
@@ -54,7 +53,7 @@
#define MAX_INSNS BPF_MAXINSNS
#define MAX_TEST_INSNS 1000000
#define MAX_FIXUPS 8
-#define MAX_NR_MAPS 21
+#define MAX_NR_MAPS 22
#define MAX_TEST_RUNS 8
#define POINTER_VALUE 0xcafe4all
#define TEST_DATA_LEN 64
@@ -462,11 +461,11 @@ static int __create_map(uint32_t type, uint32_t size_key,
uint32_t size_value, uint32_t max_elem,
uint32_t extra_flags)
{
+ LIBBPF_OPTS(bpf_map_create_opts, opts);
int fd;
- fd = bpf_create_map(type, size_key, size_value, max_elem,
- (type == BPF_MAP_TYPE_HASH ?
- BPF_F_NO_PREALLOC : 0) | extra_flags);
+ opts.map_flags = (type == BPF_MAP_TYPE_HASH ? BPF_F_NO_PREALLOC : 0) | extra_flags;
+ fd = bpf_map_create(type, NULL, size_key, size_value, max_elem, &opts);
if (fd < 0) {
if (skip_unsupported_map(type))
return -1;
@@ -499,8 +498,7 @@ static int create_prog_dummy_simple(enum bpf_prog_type prog_type, int ret)
BPF_EXIT_INSN(),
};
- return bpf_load_program(prog_type, prog,
- ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
+ return bpf_prog_load(prog_type, NULL, "GPL", prog, ARRAY_SIZE(prog), NULL);
}
static int create_prog_dummy_loop(enum bpf_prog_type prog_type, int mfd,
@@ -515,8 +513,7 @@ static int create_prog_dummy_loop(enum bpf_prog_type prog_type, int mfd,
BPF_EXIT_INSN(),
};
- return bpf_load_program(prog_type, prog,
- ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
+ return bpf_prog_load(prog_type, NULL, "GPL", prog, ARRAY_SIZE(prog), NULL);
}
static int create_prog_array(enum bpf_prog_type prog_type, uint32_t max_elem,
@@ -524,8 +521,8 @@ static int create_prog_array(enum bpf_prog_type prog_type, uint32_t max_elem,
{
int mfd, p1fd, p2fd, p3fd;
- mfd = bpf_create_map(BPF_MAP_TYPE_PROG_ARRAY, sizeof(int),
- sizeof(int), max_elem, 0);
+ mfd = bpf_map_create(BPF_MAP_TYPE_PROG_ARRAY, NULL, sizeof(int),
+ sizeof(int), max_elem, NULL);
if (mfd < 0) {
if (skip_unsupported_map(BPF_MAP_TYPE_PROG_ARRAY))
return -1;
@@ -555,10 +552,11 @@ err:
static int create_map_in_map(void)
{
+ LIBBPF_OPTS(bpf_map_create_opts, opts);
int inner_map_fd, outer_map_fd;
- inner_map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(int),
- sizeof(int), 1, 0);
+ inner_map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int),
+ sizeof(int), 1, NULL);
if (inner_map_fd < 0) {
if (skip_unsupported_map(BPF_MAP_TYPE_ARRAY))
return -1;
@@ -566,8 +564,9 @@ static int create_map_in_map(void)
return inner_map_fd;
}
- outer_map_fd = bpf_create_map_in_map(BPF_MAP_TYPE_ARRAY_OF_MAPS, NULL,
- sizeof(int), inner_map_fd, 1, 0);
+ opts.inner_map_fd = inner_map_fd;
+ outer_map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY_OF_MAPS, NULL,
+ sizeof(int), sizeof(int), 1, &opts);
if (outer_map_fd < 0) {
if (skip_unsupported_map(BPF_MAP_TYPE_ARRAY_OF_MAPS))
return -1;
@@ -586,8 +585,8 @@ static int create_cgroup_storage(bool percpu)
BPF_MAP_TYPE_CGROUP_STORAGE;
int fd;
- fd = bpf_create_map(type, sizeof(struct bpf_cgroup_storage_key),
- TEST_DATA_LEN, 0, 0);
+ fd = bpf_map_create(type, NULL, sizeof(struct bpf_cgroup_storage_key),
+ TEST_DATA_LEN, 0, NULL);
if (fd < 0) {
if (skip_unsupported_map(type))
return -1;
@@ -654,7 +653,7 @@ static int load_btf(void)
memcpy(ptr, btf_str_sec, hdr.str_len);
ptr += hdr.str_len;
- btf_fd = bpf_load_btf(raw_btf, ptr - raw_btf, 0, 0, 0);
+ btf_fd = bpf_btf_load(raw_btf, ptr - raw_btf, NULL);
free(raw_btf);
if (btf_fd < 0)
return -1;
@@ -663,22 +662,17 @@ static int load_btf(void)
static int create_map_spin_lock(void)
{
- struct bpf_create_map_attr attr = {
- .name = "test_map",
- .map_type = BPF_MAP_TYPE_ARRAY,
- .key_size = 4,
- .value_size = 8,
- .max_entries = 1,
+ LIBBPF_OPTS(bpf_map_create_opts, opts,
.btf_key_type_id = 1,
.btf_value_type_id = 3,
- };
+ );
int fd, btf_fd;
btf_fd = load_btf();
if (btf_fd < 0)
return -1;
- attr.btf_fd = btf_fd;
- fd = bpf_create_map_xattr(&attr);
+ opts.btf_fd = btf_fd;
+ fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "test_map", 4, 8, 1, &opts);
if (fd < 0)
printf("Failed to create map with spin_lock\n");
return fd;
@@ -686,24 +680,19 @@ static int create_map_spin_lock(void)
static int create_sk_storage_map(void)
{
- struct bpf_create_map_attr attr = {
- .name = "test_map",
- .map_type = BPF_MAP_TYPE_SK_STORAGE,
- .key_size = 4,
- .value_size = 8,
- .max_entries = 0,
+ LIBBPF_OPTS(bpf_map_create_opts, opts,
.map_flags = BPF_F_NO_PREALLOC,
.btf_key_type_id = 1,
.btf_value_type_id = 3,
- };
+ );
int fd, btf_fd;
btf_fd = load_btf();
if (btf_fd < 0)
return -1;
- attr.btf_fd = btf_fd;
- fd = bpf_create_map_xattr(&attr);
- close(attr.btf_fd);
+ opts.btf_fd = btf_fd;
+ fd = bpf_map_create(BPF_MAP_TYPE_SK_STORAGE, "test_map", 4, 8, 0, &opts);
+ close(opts.btf_fd);
if (fd < 0)
printf("Failed to create sk_storage_map\n");
return fd;
@@ -711,22 +700,18 @@ static int create_sk_storage_map(void)
static int create_map_timer(void)
{
- struct bpf_create_map_attr attr = {
- .name = "test_map",
- .map_type = BPF_MAP_TYPE_ARRAY,
- .key_size = 4,
- .value_size = 16,
- .max_entries = 1,
+ LIBBPF_OPTS(bpf_map_create_opts, opts,
.btf_key_type_id = 1,
.btf_value_type_id = 5,
- };
+ );
int fd, btf_fd;
btf_fd = load_btf();
if (btf_fd < 0)
return -1;
- attr.btf_fd = btf_fd;
- fd = bpf_create_map_xattr(&attr);
+
+ opts.btf_fd = btf_fd;
+ fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "test_map", 4, 16, 1, &opts);
if (fd < 0)
printf("Failed to create map with timer\n");
return fd;
@@ -1089,7 +1074,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
int fd_prog, expected_ret, alignment_prevented_execution;
int prog_len, prog_type = test->prog_type;
struct bpf_insn *prog = test->insns;
- struct bpf_load_program_attr attr;
+ LIBBPF_OPTS(bpf_prog_load_opts, opts);
int run_errs, run_successes;
int map_fds[MAX_NR_MAPS];
const char *expected_err;
@@ -1129,32 +1114,34 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
test->result_unpriv : test->result;
expected_err = unpriv && test->errstr_unpriv ?
test->errstr_unpriv : test->errstr;
- memset(&attr, 0, sizeof(attr));
- attr.prog_type = prog_type;
- attr.expected_attach_type = test->expected_attach_type;
- attr.insns = prog;
- attr.insns_cnt = prog_len;
- attr.license = "GPL";
+
+ opts.expected_attach_type = test->expected_attach_type;
if (verbose)
- attr.log_level = 1;
+ opts.log_level = 1;
else if (expected_ret == VERBOSE_ACCEPT)
- attr.log_level = 2;
+ opts.log_level = 2;
else
- attr.log_level = 4;
- attr.prog_flags = pflags;
+ opts.log_level = 4;
+ opts.prog_flags = pflags;
if (prog_type == BPF_PROG_TYPE_TRACING && test->kfunc) {
- attr.attach_btf_id = libbpf_find_vmlinux_btf_id(test->kfunc,
- attr.expected_attach_type);
- if (attr.attach_btf_id < 0) {
+ int attach_btf_id;
+
+ attach_btf_id = libbpf_find_vmlinux_btf_id(test->kfunc,
+ opts.expected_attach_type);
+ if (attach_btf_id < 0) {
printf("FAIL\nFailed to find BTF ID for '%s'!\n",
test->kfunc);
(*errors)++;
return;
}
+
+ opts.attach_btf_id = attach_btf_id;
}
- fd_prog = bpf_load_program_xattr(&attr, bpf_vlog, sizeof(bpf_vlog));
+ opts.log_buf = bpf_vlog;
+ opts.log_size = sizeof(bpf_vlog);
+ fd_prog = bpf_prog_load(prog_type, NULL, "GPL", prog, prog_len, &opts);
saved_errno = errno;
/* BPF_PROG_TYPE_TRACING requires more setup and
@@ -1407,6 +1394,9 @@ int main(int argc, char **argv)
return EXIT_FAILURE;
}
+ /* Use libbpf 1.0 API mode */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
bpf_semi_rand_init();
return do_test(unpriv, from, to);
}
diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c
index 800d503e5cb4..795b6798ccee 100644
--- a/tools/testing/selftests/bpf/testing_helpers.c
+++ b/tools/testing/selftests/bpf/testing_helpers.c
@@ -1,7 +1,11 @@
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (C) 2019 Netronome Systems, Inc. */
/* Copyright (C) 2020 Facebook, Inc. */
#include <stdlib.h>
+#include <string.h>
#include <errno.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
#include "testing_helpers.h"
int parse_num_list(const char *s, bool **num_set, int *num_set_len)
@@ -78,3 +82,61 @@ __u32 link_info_prog_id(const struct bpf_link *link, struct bpf_link_info *info)
}
return info->prog_id;
}
+
+int extra_prog_load_log_flags = 0;
+
+int bpf_prog_test_load(const char *file, enum bpf_prog_type type,
+ struct bpf_object **pobj, int *prog_fd)
+{
+ LIBBPF_OPTS(bpf_object_open_opts, opts,
+ .kernel_log_level = extra_prog_load_log_flags,
+ );
+ struct bpf_object *obj;
+ struct bpf_program *prog;
+ __u32 flags;
+ int err;
+
+ obj = bpf_object__open_file(file, &opts);
+ if (!obj)
+ return -errno;
+
+ prog = bpf_object__next_program(obj, NULL);
+ if (!prog) {
+ err = -ENOENT;
+ goto err_out;
+ }
+
+ if (type != BPF_PROG_TYPE_UNSPEC)
+ bpf_program__set_type(prog, type);
+
+ flags = bpf_program__flags(prog) | BPF_F_TEST_RND_HI32;
+ bpf_program__set_flags(prog, flags);
+
+ err = bpf_object__load(obj);
+ if (err)
+ goto err_out;
+
+ *pobj = obj;
+ *prog_fd = bpf_program__fd(prog);
+
+ return 0;
+err_out:
+ bpf_object__close(obj);
+ return err;
+}
+
+int bpf_test_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
+ size_t insns_cnt, const char *license,
+ __u32 kern_version, char *log_buf,
+ size_t log_buf_sz)
+{
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
+ .kern_version = kern_version,
+ .prog_flags = BPF_F_TEST_RND_HI32,
+ .log_level = extra_prog_load_log_flags,
+ .log_buf = log_buf,
+ .log_size = log_buf_sz,
+ );
+
+ return bpf_prog_load(type, NULL, license, insns, insns_cnt, &opts);
+}
diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h
index d4f8e749611b..f46ebc476ee8 100644
--- a/tools/testing/selftests/bpf/testing_helpers.h
+++ b/tools/testing/selftests/bpf/testing_helpers.h
@@ -6,3 +6,9 @@
int parse_num_list(const char *s, bool **set, int *set_len);
__u32 link_info_prog_id(const struct bpf_link *link, struct bpf_link_info *info);
+int bpf_prog_test_load(const char *file, enum bpf_prog_type type,
+ struct bpf_object **pobj, int *prog_fd);
+int bpf_test_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
+ size_t insns_cnt, const char *license,
+ __u32 kern_version, char *log_buf,
+ size_t log_buf_sz);
diff --git a/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c
index c22dc83a41fd..b39665f33524 100644
--- a/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c
+++ b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c
@@ -138,6 +138,8 @@
BPF_EXIT_INSN(),
},
.result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R0 leaks addr into mem",
},
{
"Dest pointer in r0 - succeed",
@@ -156,4 +158,88 @@
BPF_EXIT_INSN(),
},
.result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R0 leaks addr into mem",
+},
+{
+ "Dest pointer in r0 - succeed, check 2",
+ .insns = {
+ /* r0 = &val */
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
+ /* val = r0; */
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+ /* r5 = &val */
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_10),
+ /* r0 = atomic_cmpxchg(&val, r0, r5); */
+ BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, BPF_REG_10, BPF_REG_5, -8),
+ /* r1 = *r0 */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, -8),
+ /* exit(0); */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R0 leaks addr into mem",
+},
+{
+ "Dest pointer in r0 - succeed, check 3",
+ .insns = {
+ /* r0 = &val */
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
+ /* val = r0; */
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+ /* r5 = &val */
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_10),
+ /* r0 = atomic_cmpxchg(&val, r0, r5); */
+ BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, BPF_REG_10, BPF_REG_5, -8),
+ /* exit(0); */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid size of register fill",
+ .errstr_unpriv = "R0 leaks addr into mem",
+},
+{
+ "Dest pointer in r0 - succeed, check 4",
+ .insns = {
+ /* r0 = &val */
+ BPF_MOV32_REG(BPF_REG_0, BPF_REG_10),
+ /* val = r0; */
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -8),
+ /* r5 = &val */
+ BPF_MOV32_REG(BPF_REG_5, BPF_REG_10),
+ /* r0 = atomic_cmpxchg(&val, r0, r5); */
+ BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, BPF_REG_10, BPF_REG_5, -8),
+ /* r1 = *r10 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_10, -8),
+ /* exit(0); */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R10 partial copy of pointer",
+},
+{
+ "Dest pointer in r0 - succeed, check 5",
+ .insns = {
+ /* r0 = &val */
+ BPF_MOV32_REG(BPF_REG_0, BPF_REG_10),
+ /* val = r0; */
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -8),
+ /* r5 = &val */
+ BPF_MOV32_REG(BPF_REG_5, BPF_REG_10),
+ /* r0 = atomic_cmpxchg(&val, r0, r5); */
+ BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, BPF_REG_10, BPF_REG_5, -8),
+ /* r1 = *r0 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, -8),
+ /* exit(0); */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "R0 invalid mem access",
+ .errstr_unpriv = "R10 partial copy of pointer",
},
diff --git a/tools/testing/selftests/bpf/verifier/atomic_fetch.c b/tools/testing/selftests/bpf/verifier/atomic_fetch.c
index 3bc9ff7a860b..5bf03fb4fa2b 100644
--- a/tools/testing/selftests/bpf/verifier/atomic_fetch.c
+++ b/tools/testing/selftests/bpf/verifier/atomic_fetch.c
@@ -1,3 +1,97 @@
+{
+ "atomic dw/fetch and address leakage of (map ptr & -1) via stack slot",
+ .insns = {
+ BPF_LD_IMM64(BPF_REG_1, -1),
+ BPF_LD_MAP_FD(BPF_REG_8, 0),
+ BPF_LD_MAP_FD(BPF_REG_9, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_9, 0),
+ BPF_ATOMIC_OP(BPF_DW, BPF_AND | BPF_FETCH, BPF_REG_2, BPF_REG_1, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_2, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_9, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 2, 4 },
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "leaking pointer from stack off -8",
+},
+{
+ "atomic dw/fetch and address leakage of (map ptr & -1) via returned value",
+ .insns = {
+ BPF_LD_IMM64(BPF_REG_1, -1),
+ BPF_LD_MAP_FD(BPF_REG_8, 0),
+ BPF_LD_MAP_FD(BPF_REG_9, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_9, 0),
+ BPF_ATOMIC_OP(BPF_DW, BPF_AND | BPF_FETCH, BPF_REG_2, BPF_REG_1, 0),
+ BPF_MOV64_REG(BPF_REG_9, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_9, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 2, 4 },
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "leaking pointer from stack off -8",
+},
+{
+ "atomic w/fetch and address leakage of (map ptr & -1) via stack slot",
+ .insns = {
+ BPF_LD_IMM64(BPF_REG_1, -1),
+ BPF_LD_MAP_FD(BPF_REG_8, 0),
+ BPF_LD_MAP_FD(BPF_REG_9, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_9, 0),
+ BPF_ATOMIC_OP(BPF_W, BPF_AND | BPF_FETCH, BPF_REG_2, BPF_REG_1, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_2, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_9, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 2, 4 },
+ .result = REJECT,
+ .errstr = "invalid size of register fill",
+},
+{
+ "atomic w/fetch and address leakage of (map ptr & -1) via returned value",
+ .insns = {
+ BPF_LD_IMM64(BPF_REG_1, -1),
+ BPF_LD_MAP_FD(BPF_REG_8, 0),
+ BPF_LD_MAP_FD(BPF_REG_9, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_9, 0),
+ BPF_ATOMIC_OP(BPF_W, BPF_AND | BPF_FETCH, BPF_REG_2, BPF_REG_1, 0),
+ BPF_MOV64_REG(BPF_REG_9, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_9, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 2, 4 },
+ .result = REJECT,
+ .errstr = "invalid size of register fill",
+},
#define __ATOMIC_FETCH_OP_TEST(src_reg, dst_reg, operand1, op, operand2, expect) \
{ \
"atomic fetch " #op ", src=" #dst_reg " dst=" #dst_reg, \
diff --git a/tools/testing/selftests/bpf/verifier/btf_ctx_access.c b/tools/testing/selftests/bpf/verifier/btf_ctx_access.c
new file mode 100644
index 000000000000..6340db6b46dc
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/btf_ctx_access.c
@@ -0,0 +1,12 @@
+{
+ "btf_ctx_access accept",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 8), /* load 2nd argument value (int pointer) */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACING,
+ .expected_attach_type = BPF_TRACE_FENTRY,
+ .kfunc = "bpf_modify_return_test",
+},
diff --git a/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c
index d78627be060f..a2b006e2fd06 100644
--- a/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c
+++ b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c
@@ -229,6 +229,24 @@
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, local_port)),
+ /* 1-byte read from ingress_ifindex field */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, ingress_ifindex)),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, ingress_ifindex) + 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, ingress_ifindex) + 2),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, ingress_ifindex) + 3),
+ /* 2-byte read from ingress_ifindex field */
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, ingress_ifindex)),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, ingress_ifindex) + 2),
+ /* 4-byte read from ingress_ifindex field */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, ingress_ifindex)),
+
/* 8-byte read from sk field */
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, sk)),
@@ -351,6 +369,20 @@
.expected_attach_type = BPF_SK_LOOKUP,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
+{
+ "invalid 8-byte read from bpf_sk_lookup ingress_ifindex field",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, ingress_ifindex)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
/* invalid 1,2,4-byte reads from 8-byte fields in bpf_sk_lookup */
{
"invalid 4-byte read from bpf_sk_lookup sk field",
diff --git a/tools/testing/selftests/bpf/verifier/ringbuf.c b/tools/testing/selftests/bpf/verifier/ringbuf.c
new file mode 100644
index 000000000000..b64d33e4833c
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/ringbuf.c
@@ -0,0 +1,95 @@
+{
+ "ringbuf: invalid reservation offset 1",
+ .insns = {
+ /* reserve 8 byte ringbuf memory */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_2, 8),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve),
+ /* store a pointer to the reserved memory in R6 */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ /* check whether the reservation was successful */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+ /* spill R6(mem) into the stack */
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -8),
+ /* fill it back in R7 */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, -8),
+ /* should be able to access *(R7) = 0 */
+ BPF_ST_MEM(BPF_DW, BPF_REG_7, 0, 0),
+ /* submit the reserved ringbuf memory */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ /* add invalid offset to reserved ringbuf memory */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xcafe),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_ringbuf = { 1 },
+ .result = REJECT,
+ .errstr = "dereference of modified alloc_mem ptr R1",
+},
+{
+ "ringbuf: invalid reservation offset 2",
+ .insns = {
+ /* reserve 8 byte ringbuf memory */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_2, 8),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve),
+ /* store a pointer to the reserved memory in R6 */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ /* check whether the reservation was successful */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+ /* spill R6(mem) into the stack */
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -8),
+ /* fill it back in R7 */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, -8),
+ /* add invalid offset to reserved ringbuf memory */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 0xcafe),
+ /* should be able to access *(R7) = 0 */
+ BPF_ST_MEM(BPF_DW, BPF_REG_7, 0, 0),
+ /* submit the reserved ringbuf memory */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_ringbuf = { 1 },
+ .result = REJECT,
+ .errstr = "R7 min value is outside of the allowed memory range",
+},
+{
+ "ringbuf: check passing rb mem to helpers",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ /* reserve 8 byte ringbuf memory */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_2, 8),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ /* check whether the reservation was successful */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ /* pass allocated ring buffer memory to fib lookup */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_3, 8),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_fib_lookup),
+ /* submit the ringbuf memory */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_ringbuf = { 2 },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .result = ACCEPT,
+},
diff --git a/tools/testing/selftests/bpf/verifier/search_pruning.c b/tools/testing/selftests/bpf/verifier/search_pruning.c
index 7e50cb80873a..682519769fe3 100644
--- a/tools/testing/selftests/bpf/verifier/search_pruning.c
+++ b/tools/testing/selftests/bpf/verifier/search_pruning.c
@@ -133,6 +133,77 @@
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
},
{
+ "precision tracking for u32 spill/fill",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_1),
+ BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+ BPF_MOV32_IMM(BPF_REG_6, 32),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_MOV32_IMM(BPF_REG_6, 4),
+ /* Additional insns to introduce a pruning point. */
+ BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ /* u32 spill/fill */
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_6, -8),
+ BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_10, -8),
+ /* out-of-bound map value access for r6=32 */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_8),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_8b = { 15 },
+ .result = REJECT,
+ .errstr = "R0 min value is outside of the allowed memory range",
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+ "precision tracking for u32 spills, u64 fill",
+ .insns = {
+ BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ BPF_MOV32_IMM(BPF_REG_7, 0xffffffff),
+ /* Additional insns to introduce a pruning point. */
+ BPF_MOV64_IMM(BPF_REG_3, 1),
+ BPF_MOV64_IMM(BPF_REG_3, 1),
+ BPF_MOV64_IMM(BPF_REG_3, 1),
+ BPF_MOV64_IMM(BPF_REG_3, 1),
+ BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_MOV64_IMM(BPF_REG_3, 1),
+ BPF_ALU32_IMM(BPF_DIV, BPF_REG_3, 0),
+ /* u32 spills, u64 fill */
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_6, -4),
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, -8),
+ /* if r8 != X goto pc+1 r8 known in fallthrough branch */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_8, 0xffffffff, 1),
+ BPF_MOV64_IMM(BPF_REG_3, 1),
+ /* if r8 == X goto pc+1 condition always true on first
+ * traversal, so starts backtracking to mark r8 as requiring
+ * precision. r7 marked as needing precision. r6 not marked
+ * since it's not tracked.
+ */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_8, 0xffffffff, 1),
+ /* fails if r8 correctly marked unknown after fill. */
+ BPF_ALU32_IMM(BPF_DIV, BPF_REG_3, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "div by zero",
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
"allocated_stack",
.insns = {
BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1),
diff --git a/tools/testing/selftests/bpf/verifier/spill_fill.c b/tools/testing/selftests/bpf/verifier/spill_fill.c
index 7ab3de108761..8cfc5349d2a8 100644
--- a/tools/testing/selftests/bpf/verifier/spill_fill.c
+++ b/tools/testing/selftests/bpf/verifier/spill_fill.c
@@ -59,6 +59,34 @@
.result_unpriv = ACCEPT,
},
{
+ "check with invalid reg offset 0",
+ .insns = {
+ /* reserve 8 byte ringbuf memory */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_2, 8),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve),
+ /* store a pointer to the reserved memory in R6 */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ /* add invalid offset to memory or NULL */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
+ /* check whether the reservation was successful */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ /* should not be able to access *(R7) = 0 */
+ BPF_ST_MEM(BPF_W, BPF_REG_6, 0, 0),
+ /* submit the reserved ringbuf memory */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_ringbuf = { 1 },
+ .result = REJECT,
+ .errstr = "R0 pointer arithmetic on alloc_mem_or_null prohibited",
+},
+{
"check corrupted spill/fill",
.insns = {
/* spill R1(ctx) into stack */
@@ -176,6 +204,38 @@
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
},
{
+ "Spill u32 const scalars. Refill as u64. Offset to skb->data",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ /* r6 = 0 */
+ BPF_MOV32_IMM(BPF_REG_6, 0),
+ /* r7 = 20 */
+ BPF_MOV32_IMM(BPF_REG_7, 20),
+ /* *(u32 *)(r10 -4) = r6 */
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_6, -4),
+ /* *(u32 *)(r10 -8) = r7 */
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7, -8),
+ /* r4 = *(u64 *)(r10 -8) */
+ BPF_LDX_MEM(BPF_H, BPF_REG_4, BPF_REG_10, -8),
+ /* r0 = r2 */
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=inv,umax=65535 */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4),
+ /* if (r0 > r3) R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=inv,umax=65535 */
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ /* r0 = *(u32 *)r2 R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=inv20 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid access to packet",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+},
+{
"Spill a u32 const scalar. Refill as u16 from fp-6. Offset to skb->data",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
index 2debba4e8a3a..359f3e8f8b60 100644
--- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
+++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
@@ -1078,6 +1078,29 @@
.errstr_unpriv = "R0 pointer -= pointer prohibited",
},
{
+ "map access: trying to leak tainted dst reg",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_MOV32_IMM(BPF_REG_1, 0xFFFFFFFF),
+ BPF_MOV32_REG(BPF_REG_1, BPF_REG_1),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 4 },
+ .result = REJECT,
+ .errstr = "math between map_value pointer and 4294967295 is not allowed",
+},
+{
"32bit pkt_ptr -= scalar",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
diff --git a/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c b/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c
index bfb97383e6b5..b4ec228eb95d 100644
--- a/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c
+++ b/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c
@@ -35,7 +35,7 @@
.prog_type = BPF_PROG_TYPE_XDP,
},
{
- "XDP pkt read, pkt_data' > pkt_end, good access",
+ "XDP pkt read, pkt_data' > pkt_end, corner case, good access",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
@@ -88,6 +88,41 @@
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
+ "XDP pkt read, pkt_data' > pkt_end, corner case +1, good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+{
+ "XDP pkt read, pkt_data' > pkt_end, corner case -1, bad access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+{
"XDP pkt read, pkt_end > pkt_data', good access",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
@@ -106,16 +141,16 @@
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
- "XDP pkt read, pkt_end > pkt_data', bad access 1",
+ "XDP pkt read, pkt_end > pkt_data', corner case -1, bad access",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
offsetof(struct xdp_md, data_end)),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
@@ -143,6 +178,42 @@
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
+ "XDP pkt read, pkt_end > pkt_data', corner case, good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+{
+ "XDP pkt read, pkt_end > pkt_data', corner case +1, good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+{
"XDP pkt read, pkt_data' < pkt_end, good access",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
@@ -161,16 +232,16 @@
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
- "XDP pkt read, pkt_data' < pkt_end, bad access 1",
+ "XDP pkt read, pkt_data' < pkt_end, corner case -1, bad access",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
offsetof(struct xdp_md, data_end)),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
@@ -198,7 +269,43 @@
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
- "XDP pkt read, pkt_end < pkt_data', good access",
+ "XDP pkt read, pkt_data' < pkt_end, corner case, good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+ BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+{
+ "XDP pkt read, pkt_data' < pkt_end, corner case +1, good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+{
+ "XDP pkt read, pkt_end < pkt_data', corner case, good access",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
@@ -251,6 +358,41 @@
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
+ "XDP pkt read, pkt_end < pkt_data', corner case +1, good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9),
+ BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+{
+ "XDP pkt read, pkt_end < pkt_data', corner case -1, bad access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+ BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+{
"XDP pkt read, pkt_data' >= pkt_end, good access",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
@@ -268,15 +410,15 @@
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
- "XDP pkt read, pkt_data' >= pkt_end, bad access 1",
+ "XDP pkt read, pkt_data' >= pkt_end, corner case -1, bad access",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
offsetof(struct xdp_md, data_end)),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
@@ -304,7 +446,41 @@
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
- "XDP pkt read, pkt_end >= pkt_data', good access",
+ "XDP pkt read, pkt_data' >= pkt_end, corner case, good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+{
+ "XDP pkt read, pkt_data' >= pkt_end, corner case +1, good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+{
+ "XDP pkt read, pkt_end >= pkt_data', corner case, good access",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
@@ -359,7 +535,44 @@
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
- "XDP pkt read, pkt_data' <= pkt_end, good access",
+ "XDP pkt read, pkt_end >= pkt_data', corner case +1, good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+{
+ "XDP pkt read, pkt_end >= pkt_data', corner case -1, bad access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+{
+ "XDP pkt read, pkt_data' <= pkt_end, corner case, good access",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
@@ -414,6 +627,43 @@
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
+ "XDP pkt read, pkt_data' <= pkt_end, corner case +1, good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+{
+ "XDP pkt read, pkt_data' <= pkt_end, corner case -1, bad access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+{
"XDP pkt read, pkt_end <= pkt_data', good access",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
@@ -431,15 +681,15 @@
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
- "XDP pkt read, pkt_end <= pkt_data', bad access 1",
+ "XDP pkt read, pkt_end <= pkt_data', corner case -1, bad access",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
offsetof(struct xdp_md, data_end)),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
@@ -467,7 +717,41 @@
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
- "XDP pkt read, pkt_meta' > pkt_data, good access",
+ "XDP pkt read, pkt_end <= pkt_data', corner case, good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+{
+ "XDP pkt read, pkt_end <= pkt_data', corner case +1, good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+{
+ "XDP pkt read, pkt_meta' > pkt_data, corner case, good access",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
offsetof(struct xdp_md, data_meta)),
@@ -520,6 +804,41 @@
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
+ "XDP pkt read, pkt_meta' > pkt_data, corner case +1, good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+{
+ "XDP pkt read, pkt_meta' > pkt_data, corner case -1, bad access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+{
"XDP pkt read, pkt_data > pkt_meta', good access",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
@@ -538,16 +857,16 @@
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
- "XDP pkt read, pkt_data > pkt_meta', bad access 1",
+ "XDP pkt read, pkt_data > pkt_meta', corner case -1, bad access",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
offsetof(struct xdp_md, data_meta)),
BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
@@ -575,6 +894,42 @@
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
+ "XDP pkt read, pkt_data > pkt_meta', corner case, good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+{
+ "XDP pkt read, pkt_data > pkt_meta', corner case +1, good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+{
"XDP pkt read, pkt_meta' < pkt_data, good access",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
@@ -593,16 +948,16 @@
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
- "XDP pkt read, pkt_meta' < pkt_data, bad access 1",
+ "XDP pkt read, pkt_meta' < pkt_data, corner case -1, bad access",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
offsetof(struct xdp_md, data_meta)),
BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
@@ -630,7 +985,43 @@
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
- "XDP pkt read, pkt_data < pkt_meta', good access",
+ "XDP pkt read, pkt_meta' < pkt_data, corner case, good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+ BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+{
+ "XDP pkt read, pkt_meta' < pkt_data, corner case +1, good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+{
+ "XDP pkt read, pkt_data < pkt_meta', corner case, good access",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
offsetof(struct xdp_md, data_meta)),
@@ -683,6 +1074,41 @@
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
+ "XDP pkt read, pkt_data < pkt_meta', corner case +1, good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9),
+ BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+{
+ "XDP pkt read, pkt_data < pkt_meta', corner case -1, bad access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+ BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+{
"XDP pkt read, pkt_meta' >= pkt_data, good access",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
@@ -700,15 +1126,15 @@
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
- "XDP pkt read, pkt_meta' >= pkt_data, bad access 1",
+ "XDP pkt read, pkt_meta' >= pkt_data, corner case -1, bad access",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
offsetof(struct xdp_md, data_meta)),
BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
@@ -736,7 +1162,41 @@
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
- "XDP pkt read, pkt_data >= pkt_meta', good access",
+ "XDP pkt read, pkt_meta' >= pkt_data, corner case, good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+{
+ "XDP pkt read, pkt_meta' >= pkt_data, corner case +1, good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+{
+ "XDP pkt read, pkt_data >= pkt_meta', corner case, good access",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
offsetof(struct xdp_md, data_meta)),
@@ -791,7 +1251,44 @@
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
- "XDP pkt read, pkt_meta' <= pkt_data, good access",
+ "XDP pkt read, pkt_data >= pkt_meta', corner case +1, good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+{
+ "XDP pkt read, pkt_data >= pkt_meta', corner case -1, bad access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+{
+ "XDP pkt read, pkt_meta' <= pkt_data, corner case, good access",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
offsetof(struct xdp_md, data_meta)),
@@ -846,6 +1343,43 @@
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
+ "XDP pkt read, pkt_meta' <= pkt_data, corner case +1, good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+{
+ "XDP pkt read, pkt_meta' <= pkt_data, corner case -1, bad access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+{
"XDP pkt read, pkt_data <= pkt_meta', good access",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
@@ -863,15 +1397,15 @@
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
- "XDP pkt read, pkt_data <= pkt_meta', bad access 1",
+ "XDP pkt read, pkt_data <= pkt_meta', corner case -1, bad access",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
offsetof(struct xdp_md, data_meta)),
BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
@@ -898,3 +1432,37 @@
.prog_type = BPF_PROG_TYPE_XDP,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
+{
+ "XDP pkt read, pkt_data <= pkt_meta', corner case, good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+{
+ "XDP pkt read, pkt_data <= pkt_meta', corner case +1, good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh
index 027198768fad..b3afd43549fa 100755
--- a/tools/testing/selftests/bpf/vmtest.sh
+++ b/tools/testing/selftests/bpf/vmtest.sh
@@ -4,18 +4,35 @@
set -u
set -e
-# This script currently only works for x86_64, as
-# it is based on the VM image used by the BPF CI which is
-# x86_64.
-QEMU_BINARY="${QEMU_BINARY:="qemu-system-x86_64"}"
-X86_BZIMAGE="arch/x86/boot/bzImage"
+# This script currently only works for x86_64 and s390x, as
+# it is based on the VM image used by the BPF CI, which is
+# available only for these architectures.
+ARCH="$(uname -m)"
+case "${ARCH}" in
+s390x)
+ QEMU_BINARY=qemu-system-s390x
+ QEMU_CONSOLE="ttyS1"
+ QEMU_FLAGS=(-smp 2)
+ BZIMAGE="arch/s390/boot/compressed/vmlinux"
+ ;;
+x86_64)
+ QEMU_BINARY=qemu-system-x86_64
+ QEMU_CONSOLE="ttyS0,115200"
+ QEMU_FLAGS=(-cpu host -smp 8)
+ BZIMAGE="arch/x86/boot/bzImage"
+ ;;
+*)
+ echo "Unsupported architecture"
+ exit 1
+ ;;
+esac
DEFAULT_COMMAND="./test_progs"
MOUNT_DIR="mnt"
ROOTFS_IMAGE="root.img"
OUTPUT_DIR="$HOME/.bpf_selftests"
-KCONFIG_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/latest.config"
-KCONFIG_API_URL="https://api.github.com/repos/libbpf/libbpf/contents/travis-ci/vmtest/configs/latest.config"
-INDEX_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/INDEX"
+KCONFIG_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/config-latest.${ARCH}"
+KCONFIG_API_URL="https://api.github.com/repos/libbpf/libbpf/contents/travis-ci/vmtest/configs/config-latest.${ARCH}"
+INDEX_URL="https://raw.githubusercontent.com/libbpf/ci/master/INDEX"
NUM_COMPILE_JOBS="$(nproc)"
LOG_FILE_BASE="$(date +"bpf_selftests.%Y-%m-%d_%H-%M-%S")"
LOG_FILE="${LOG_FILE_BASE}.log"
@@ -85,7 +102,7 @@ newest_rootfs_version()
{
{
for file in "${!URLS[@]}"; do
- if [[ $file =~ ^libbpf-vmtest-rootfs-(.*)\.tar\.zst$ ]]; then
+ if [[ $file =~ ^"${ARCH}"/libbpf-vmtest-rootfs-(.*)\.tar\.zst$ ]]; then
echo "${BASH_REMATCH[1]}"
fi
done
@@ -102,7 +119,7 @@ download_rootfs()
exit 1
fi
- download "libbpf-vmtest-rootfs-$rootfsversion.tar.zst" |
+ download "${ARCH}/libbpf-vmtest-rootfs-$rootfsversion.tar.zst" |
zstd -d | sudo tar -C "$dir" -x
}
@@ -224,13 +241,12 @@ EOF
-nodefaults \
-display none \
-serial mon:stdio \
- -cpu host \
+ "${qemu_flags[@]}" \
-enable-kvm \
- -smp 8 \
-m 4G \
-drive file="${rootfs_img}",format=raw,index=1,media=disk,if=virtio,cache=none \
-kernel "${kernel_bzimage}" \
- -append "root=/dev/vda rw console=ttyS0,115200"
+ -append "root=/dev/vda rw console=${QEMU_CONSOLE}"
}
copy_logs()
@@ -282,7 +298,7 @@ main()
local kernel_checkout=$(realpath "${script_dir}"/../../../../)
# By default the script searches for the kernel in the checkout directory but
# it also obeys environment variables O= and KBUILD_OUTPUT=
- local kernel_bzimage="${kernel_checkout}/${X86_BZIMAGE}"
+ local kernel_bzimage="${kernel_checkout}/${BZIMAGE}"
local command="${DEFAULT_COMMAND}"
local update_image="no"
local exit_command="poweroff -f"
@@ -337,13 +353,13 @@ main()
if is_rel_path "${O}"; then
O="$(realpath "${PWD}/${O}")"
fi
- kernel_bzimage="${O}/${X86_BZIMAGE}"
+ kernel_bzimage="${O}/${BZIMAGE}"
make_command="${make_command} O=${O}"
elif [[ "${KBUILD_OUTPUT:=""}" != "" ]]; then
if is_rel_path "${KBUILD_OUTPUT}"; then
KBUILD_OUTPUT="$(realpath "${PWD}/${KBUILD_OUTPUT}")"
fi
- kernel_bzimage="${KBUILD_OUTPUT}/${X86_BZIMAGE}"
+ kernel_bzimage="${KBUILD_OUTPUT}/${BZIMAGE}"
make_command="${make_command} KBUILD_OUTPUT=${KBUILD_OUTPUT}"
fi
diff --git a/tools/testing/selftests/bpf/xdp_redirect_multi.c b/tools/testing/selftests/bpf/xdp_redirect_multi.c
index f5ffba341c17..51c8224b4ccc 100644
--- a/tools/testing/selftests/bpf/xdp_redirect_multi.c
+++ b/tools/testing/selftests/bpf/xdp_redirect_multi.c
@@ -85,10 +85,7 @@ int main(int argc, char **argv)
{
int prog_fd, group_all, mac_map;
struct bpf_program *ingress_prog, *egress_prog;
- struct bpf_prog_load_attr prog_load_attr = {
- .prog_type = BPF_PROG_TYPE_UNSPEC,
- };
- int i, ret, opt, egress_prog_fd = 0;
+ int i, err, ret, opt, egress_prog_fd = 0;
struct bpf_devmap_val devmap_val;
bool attach_egress_prog = false;
unsigned char mac_addr[6];
@@ -147,10 +144,14 @@ int main(int argc, char **argv)
printf("\n");
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- prog_load_attr.file = filename;
-
- if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
+ obj = bpf_object__open_file(filename, NULL);
+ err = libbpf_get_error(obj);
+ if (err)
+ goto err_out;
+ err = bpf_object__load(obj);
+ if (err)
goto err_out;
+ prog_fd = bpf_program__fd(bpf_object__next_program(obj, NULL));
if (attach_egress_prog)
group_all = bpf_object__find_map_fd_by_name(obj, "map_egress");
diff --git a/tools/testing/selftests/bpf/xdping.c b/tools/testing/selftests/bpf/xdping.c
index 30f12637f4e4..baa870a759a2 100644
--- a/tools/testing/selftests/bpf/xdping.c
+++ b/tools/testing/selftests/bpf/xdping.c
@@ -22,6 +22,7 @@
#include "bpf/libbpf.h"
#include "xdping.h"
+#include "testing_helpers.h"
static int ifindex;
static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
@@ -173,7 +174,7 @@ int main(int argc, char **argv)
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- if (bpf_prog_load(filename, BPF_PROG_TYPE_XDP, &obj, &prog_fd)) {
+ if (bpf_prog_test_load(filename, BPF_PROG_TYPE_XDP, &obj, &prog_fd)) {
fprintf(stderr, "load of %s failed\n", filename);
return 1;
}
diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index 6c7cf8aadc79..0a5d23da486d 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -100,6 +100,12 @@
#include "xdpxceiver.h"
#include "../kselftest.h"
+/* AF_XDP APIs were moved into libxdp and marked as deprecated in libbpf.
+ * Until xdpxceiver is either moved or re-writed into libxdp, suppress
+ * deprecation warnings in this file
+ */
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+
static const char *MAC1 = "\x00\x0A\x56\x9E\xEE\x62";
static const char *MAC2 = "\x00\x0A\x56\x9E\xEE\x61";
static const char *IP1 = "192.168.100.162";
@@ -744,7 +750,6 @@ static void receive_pkts(struct pkt_stream *pkt_stream, struct xsk_socket_info *
struct pkt *pkt = pkt_stream_get_next_rx_pkt(pkt_stream);
struct xsk_umem_info *umem = xsk->umem;
u32 idx_rx = 0, idx_fq = 0, rcvd, i;
- u32 total = 0;
int ret;
while (pkt) {
@@ -799,7 +804,6 @@ static void receive_pkts(struct pkt_stream *pkt_stream, struct xsk_socket_info *
pthread_mutex_lock(&pacing_mutex);
pkts_in_flight -= rcvd;
- total += rcvd;
if (pkts_in_flight < umem->num_frames)
pthread_cond_signal(&pacing_cond);
pthread_mutex_unlock(&pacing_mutex);
@@ -1219,7 +1223,7 @@ static bool hugepages_present(struct ifobject *ifobject)
void *bufs;
bufs = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE | MAP_HUGETLB, -1, 0);
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
if (bufs == MAP_FAILED)
return false;
@@ -1366,6 +1370,10 @@ static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_
testapp_invalid_desc(test);
break;
case TEST_TYPE_UNALIGNED_INV_DESC:
+ if (!hugepages_present(test->ifobj_tx)) {
+ ksft_test_result_skip("No 2M huge pages present.\n");
+ return;
+ }
test_spec_set_name(test, "UNALIGNED_INV_DESC");
test->ifobj_tx->umem->unaligned_mode = true;
test->ifobj_rx->umem->unaligned_mode = true;
diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile
index 59e222460581..745fe25fa0b9 100644
--- a/tools/testing/selftests/cgroup/Makefile
+++ b/tools/testing/selftests/cgroup/Makefile
@@ -11,10 +11,12 @@ TEST_GEN_PROGS += test_core
TEST_GEN_PROGS += test_freezer
TEST_GEN_PROGS += test_kill
+LOCAL_HDRS += $(selfdir)/clone3/clone3_selftests.h $(selfdir)/pidfd/pidfd.h
+
include ../lib.mk
-$(OUTPUT)/test_memcontrol: cgroup_util.c ../clone3/clone3_selftests.h
-$(OUTPUT)/test_kmem: cgroup_util.c ../clone3/clone3_selftests.h
-$(OUTPUT)/test_core: cgroup_util.c ../clone3/clone3_selftests.h
-$(OUTPUT)/test_freezer: cgroup_util.c ../clone3/clone3_selftests.h
-$(OUTPUT)/test_kill: cgroup_util.c ../clone3/clone3_selftests.h ../pidfd/pidfd.h
+$(OUTPUT)/test_memcontrol: cgroup_util.c
+$(OUTPUT)/test_kmem: cgroup_util.c
+$(OUTPUT)/test_core: cgroup_util.c
+$(OUTPUT)/test_freezer: cgroup_util.c
+$(OUTPUT)/test_kill: cgroup_util.c
diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c
index 623cec04ad42..0cf7e90c0052 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.c
+++ b/tools/testing/selftests/cgroup/cgroup_util.c
@@ -221,7 +221,7 @@ int cg_find_unified_root(char *root, size_t len)
int cg_create(const char *cgroup)
{
- return mkdir(cgroup, 0644);
+ return mkdir(cgroup, 0755);
}
int cg_wait_for_proc_count(const char *cgroup, int count)
diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h
index 82e59cdf16e7..4f66d10626d2 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.h
+++ b/tools/testing/selftests/cgroup/cgroup_util.h
@@ -2,9 +2,9 @@
#include <stdbool.h>
#include <stdlib.h>
-#define PAGE_SIZE 4096
+#include "../kselftest.h"
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#define PAGE_SIZE 4096
#define MB(x) (x << 20)
diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c
index 3df648c37876..600123503063 100644
--- a/tools/testing/selftests/cgroup/test_core.c
+++ b/tools/testing/selftests/cgroup/test_core.c
@@ -1,11 +1,14 @@
/* SPDX-License-Identifier: GPL-2.0 */
+#define _GNU_SOURCE
#include <linux/limits.h>
+#include <linux/sched.h>
#include <sys/types.h>
#include <sys/mman.h>
#include <sys/wait.h>
#include <unistd.h>
#include <fcntl.h>
+#include <sched.h>
#include <stdio.h>
#include <errno.h>
#include <signal.h>
@@ -674,6 +677,166 @@ cleanup:
return ret;
}
+/*
+ * cgroup migration permission check should be performed based on the
+ * credentials at the time of open instead of write.
+ */
+static int test_cgcore_lesser_euid_open(const char *root)
+{
+ const uid_t test_euid = 65534; /* usually nobody, any !root is fine */
+ int ret = KSFT_FAIL;
+ char *cg_test_a = NULL, *cg_test_b = NULL;
+ char *cg_test_a_procs = NULL, *cg_test_b_procs = NULL;
+ int cg_test_b_procs_fd = -1;
+ uid_t saved_uid;
+
+ cg_test_a = cg_name(root, "cg_test_a");
+ cg_test_b = cg_name(root, "cg_test_b");
+
+ if (!cg_test_a || !cg_test_b)
+ goto cleanup;
+
+ cg_test_a_procs = cg_name(cg_test_a, "cgroup.procs");
+ cg_test_b_procs = cg_name(cg_test_b, "cgroup.procs");
+
+ if (!cg_test_a_procs || !cg_test_b_procs)
+ goto cleanup;
+
+ if (cg_create(cg_test_a) || cg_create(cg_test_b))
+ goto cleanup;
+
+ if (cg_enter_current(cg_test_a))
+ goto cleanup;
+
+ if (chown(cg_test_a_procs, test_euid, -1) ||
+ chown(cg_test_b_procs, test_euid, -1))
+ goto cleanup;
+
+ saved_uid = geteuid();
+ if (seteuid(test_euid))
+ goto cleanup;
+
+ cg_test_b_procs_fd = open(cg_test_b_procs, O_RDWR);
+
+ if (seteuid(saved_uid))
+ goto cleanup;
+
+ if (cg_test_b_procs_fd < 0)
+ goto cleanup;
+
+ if (write(cg_test_b_procs_fd, "0", 1) >= 0 || errno != EACCES)
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ cg_enter_current(root);
+ if (cg_test_b_procs_fd >= 0)
+ close(cg_test_b_procs_fd);
+ if (cg_test_b)
+ cg_destroy(cg_test_b);
+ if (cg_test_a)
+ cg_destroy(cg_test_a);
+ free(cg_test_b_procs);
+ free(cg_test_a_procs);
+ free(cg_test_b);
+ free(cg_test_a);
+ return ret;
+}
+
+struct lesser_ns_open_thread_arg {
+ const char *path;
+ int fd;
+ int err;
+};
+
+static int lesser_ns_open_thread_fn(void *arg)
+{
+ struct lesser_ns_open_thread_arg *targ = arg;
+
+ targ->fd = open(targ->path, O_RDWR);
+ targ->err = errno;
+ return 0;
+}
+
+/*
+ * cgroup migration permission check should be performed based on the cgroup
+ * namespace at the time of open instead of write.
+ */
+static int test_cgcore_lesser_ns_open(const char *root)
+{
+ static char stack[65536];
+ const uid_t test_euid = 65534; /* usually nobody, any !root is fine */
+ int ret = KSFT_FAIL;
+ char *cg_test_a = NULL, *cg_test_b = NULL;
+ char *cg_test_a_procs = NULL, *cg_test_b_procs = NULL;
+ int cg_test_b_procs_fd = -1;
+ struct lesser_ns_open_thread_arg targ = { .fd = -1 };
+ pid_t pid;
+ int status;
+
+ cg_test_a = cg_name(root, "cg_test_a");
+ cg_test_b = cg_name(root, "cg_test_b");
+
+ if (!cg_test_a || !cg_test_b)
+ goto cleanup;
+
+ cg_test_a_procs = cg_name(cg_test_a, "cgroup.procs");
+ cg_test_b_procs = cg_name(cg_test_b, "cgroup.procs");
+
+ if (!cg_test_a_procs || !cg_test_b_procs)
+ goto cleanup;
+
+ if (cg_create(cg_test_a) || cg_create(cg_test_b))
+ goto cleanup;
+
+ if (cg_enter_current(cg_test_b))
+ goto cleanup;
+
+ if (chown(cg_test_a_procs, test_euid, -1) ||
+ chown(cg_test_b_procs, test_euid, -1))
+ goto cleanup;
+
+ targ.path = cg_test_b_procs;
+ pid = clone(lesser_ns_open_thread_fn, stack + sizeof(stack),
+ CLONE_NEWCGROUP | CLONE_FILES | CLONE_VM | SIGCHLD,
+ &targ);
+ if (pid < 0)
+ goto cleanup;
+
+ if (waitpid(pid, &status, 0) < 0)
+ goto cleanup;
+
+ if (!WIFEXITED(status))
+ goto cleanup;
+
+ cg_test_b_procs_fd = targ.fd;
+ if (cg_test_b_procs_fd < 0)
+ goto cleanup;
+
+ if (cg_enter_current(cg_test_a))
+ goto cleanup;
+
+ if ((status = write(cg_test_b_procs_fd, "0", 1)) >= 0 || errno != ENOENT)
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ cg_enter_current(root);
+ if (cg_test_b_procs_fd >= 0)
+ close(cg_test_b_procs_fd);
+ if (cg_test_b)
+ cg_destroy(cg_test_b);
+ if (cg_test_a)
+ cg_destroy(cg_test_a);
+ free(cg_test_b_procs);
+ free(cg_test_a_procs);
+ free(cg_test_b);
+ free(cg_test_a);
+ return ret;
+}
+
#define T(x) { x, #x }
struct corecg_test {
int (*fn)(const char *root);
@@ -689,6 +852,8 @@ struct corecg_test {
T(test_cgcore_proc_migration),
T(test_cgcore_thread_migration),
T(test_cgcore_destroy),
+ T(test_cgcore_lesser_euid_open),
+ T(test_cgcore_lesser_ns_open),
};
#undef T
diff --git a/tools/testing/selftests/clone3/clone3.c b/tools/testing/selftests/clone3/clone3.c
index 42be3b925830..076cf4325f78 100644
--- a/tools/testing/selftests/clone3/clone3.c
+++ b/tools/testing/selftests/clone3/clone3.c
@@ -52,6 +52,12 @@ static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode)
size = sizeof(struct __clone_args);
switch (test_mode) {
+ case CLONE3_ARGS_NO_TEST:
+ /*
+ * Uses default 'flags' and 'SIGCHLD'
+ * assignment.
+ */
+ break;
case CLONE3_ARGS_ALL_0:
args.flags = 0;
args.exit_signal = 0;
diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c
index aa7d13d91963..749239930ca8 100644
--- a/tools/testing/selftests/core/close_range_test.c
+++ b/tools/testing/selftests/core/close_range_test.c
@@ -50,10 +50,6 @@ static inline int sys_close_range(unsigned int fd, unsigned int max_fd,
return syscall(__NR_close_range, fd, max_fd, flags);
}
-#ifndef ARRAY_SIZE
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-#endif
-
TEST(core_close_range)
{
int i, ret;
diff --git a/tools/testing/selftests/damon/.gitignore b/tools/testing/selftests/damon/.gitignore
new file mode 100644
index 000000000000..c6c2965a6607
--- /dev/null
+++ b/tools/testing/selftests/damon/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+huge_count_read_write
diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile
index 8a3f2cd9fec0..937d36ae9a69 100644
--- a/tools/testing/selftests/damon/Makefile
+++ b/tools/testing/selftests/damon/Makefile
@@ -1,7 +1,10 @@
# SPDX-License-Identifier: GPL-2.0
# Makefile for damon selftests
-TEST_FILES = _chk_dependency.sh
-TEST_PROGS = debugfs_attrs.sh
+TEST_GEN_FILES += huge_count_read_write
+
+TEST_FILES = _chk_dependency.sh _debugfs_common.sh
+TEST_PROGS = debugfs_attrs.sh debugfs_schemes.sh debugfs_target_ids.sh
+TEST_PROGS += debugfs_empty_targets.sh debugfs_huge_count_read_write.sh
include ../lib.mk
diff --git a/tools/testing/selftests/damon/_debugfs_common.sh b/tools/testing/selftests/damon/_debugfs_common.sh
new file mode 100644
index 000000000000..48989d4813ae
--- /dev/null
+++ b/tools/testing/selftests/damon/_debugfs_common.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+test_write_result() {
+ file=$1
+ content=$2
+ orig_content=$3
+ expect_reason=$4
+ expected=$5
+
+ echo "$content" > "$file"
+ if [ $? -ne "$expected" ]
+ then
+ echo "writing $content to $file doesn't return $expected"
+ echo "expected because: $expect_reason"
+ echo "$orig_content" > "$file"
+ exit 1
+ fi
+}
+
+test_write_succ() {
+ test_write_result "$1" "$2" "$3" "$4" 0
+}
+
+test_write_fail() {
+ test_write_result "$1" "$2" "$3" "$4" 1
+}
+
+test_content() {
+ file=$1
+ orig_content=$2
+ expected=$3
+ expect_reason=$4
+
+ content=$(cat "$file")
+ if [ "$content" != "$expected" ]
+ then
+ echo "reading $file expected $expected but $content"
+ echo "expected because: $expect_reason"
+ echo "$orig_content" > "$file"
+ exit 1
+ fi
+}
+
+source ./_chk_dependency.sh
+
+damon_onoff="$DBGFS/monitor_on"
+if [ $(cat "$damon_onoff") = "on" ]
+then
+ echo "monitoring is on"
+ exit $ksft_skip
+fi
diff --git a/tools/testing/selftests/damon/debugfs_attrs.sh b/tools/testing/selftests/damon/debugfs_attrs.sh
index 196b6640bf37..902e312bca89 100644
--- a/tools/testing/selftests/damon/debugfs_attrs.sh
+++ b/tools/testing/selftests/damon/debugfs_attrs.sh
@@ -1,48 +1,7 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-test_write_result() {
- file=$1
- content=$2
- orig_content=$3
- expect_reason=$4
- expected=$5
-
- echo "$content" > "$file"
- if [ $? -ne "$expected" ]
- then
- echo "writing $content to $file doesn't return $expected"
- echo "expected because: $expect_reason"
- echo "$orig_content" > "$file"
- exit 1
- fi
-}
-
-test_write_succ() {
- test_write_result "$1" "$2" "$3" "$4" 0
-}
-
-test_write_fail() {
- test_write_result "$1" "$2" "$3" "$4" 1
-}
-
-test_content() {
- file=$1
- orig_content=$2
- expected=$3
- expect_reason=$4
-
- content=$(cat "$file")
- if [ "$content" != "$expected" ]
- then
- echo "reading $file expected $expected but $content"
- echo "expected because: $expect_reason"
- echo "$orig_content" > "$file"
- exit 1
- fi
-}
-
-source ./_chk_dependency.sh
+source _debugfs_common.sh
# Test attrs file
# ===============
@@ -56,33 +15,3 @@ test_write_fail "$file" "1 2 3 5 4" "$orig_content" \
"min_nr_regions > max_nr_regions"
test_content "$file" "$orig_content" "1 2 3 4 5" "successfully written"
echo "$orig_content" > "$file"
-
-# Test schemes file
-# =================
-
-file="$DBGFS/schemes"
-orig_content=$(cat "$file")
-
-test_write_succ "$file" "1 2 3 4 5 6 4 0 0 0 1 2 3 1 100 3 2 1" \
- "$orig_content" "valid input"
-test_write_fail "$file" "1 2
-3 4 5 6 3 0 0 0 1 2 3 1 100 3 2 1" "$orig_content" "multi lines"
-test_write_succ "$file" "" "$orig_content" "disabling"
-echo "$orig_content" > "$file"
-
-# Test target_ids file
-# ====================
-
-file="$DBGFS/target_ids"
-orig_content=$(cat "$file")
-
-test_write_succ "$file" "1 2 3 4" "$orig_content" "valid input"
-test_write_succ "$file" "1 2 abc 4" "$orig_content" "still valid input"
-test_content "$file" "$orig_content" "1 2" "non-integer was there"
-test_write_succ "$file" "abc 2 3" "$orig_content" "the file allows wrong input"
-test_content "$file" "$orig_content" "" "wrong input written"
-test_write_succ "$file" "" "$orig_content" "empty input"
-test_content "$file" "$orig_content" "" "empty input written"
-echo "$orig_content" > "$file"
-
-echo "PASS"
diff --git a/tools/testing/selftests/damon/debugfs_empty_targets.sh b/tools/testing/selftests/damon/debugfs_empty_targets.sh
new file mode 100644
index 000000000000..87aff8083822
--- /dev/null
+++ b/tools/testing/selftests/damon/debugfs_empty_targets.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source _debugfs_common.sh
+
+# Test empty targets case
+# =======================
+
+orig_target_ids=$(cat "$DBGFS/target_ids")
+echo "" > "$DBGFS/target_ids"
+orig_monitor_on=$(cat "$DBGFS/monitor_on")
+test_write_fail "$DBGFS/monitor_on" "on" "orig_monitor_on" "empty target ids"
+echo "$orig_target_ids" > "$DBGFS/target_ids"
diff --git a/tools/testing/selftests/damon/debugfs_huge_count_read_write.sh b/tools/testing/selftests/damon/debugfs_huge_count_read_write.sh
new file mode 100644
index 000000000000..922cadac2950
--- /dev/null
+++ b/tools/testing/selftests/damon/debugfs_huge_count_read_write.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source _debugfs_common.sh
+
+# Test huge count read write
+# ==========================
+
+dmesg -C
+
+for file in "$DBGFS/"*
+do
+ ./huge_count_read_write "$file"
+done
+
+if dmesg | grep -q WARNING
+then
+ dmesg
+ exit 1
+else
+ exit 0
+fi
diff --git a/tools/testing/selftests/damon/debugfs_schemes.sh b/tools/testing/selftests/damon/debugfs_schemes.sh
new file mode 100644
index 000000000000..5b39ab44731c
--- /dev/null
+++ b/tools/testing/selftests/damon/debugfs_schemes.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source _debugfs_common.sh
+
+# Test schemes file
+# =================
+
+file="$DBGFS/schemes"
+orig_content=$(cat "$file")
+
+test_write_succ "$file" "1 2 3 4 5 6 4 0 0 0 1 2 3 1 100 3 2 1" \
+ "$orig_content" "valid input"
+test_write_fail "$file" "1 2
+3 4 5 6 3 0 0 0 1 2 3 1 100 3 2 1" "$orig_content" "multi lines"
+test_write_succ "$file" "" "$orig_content" "disabling"
+test_write_fail "$file" "2 1 2 1 10 1 3 10 1 1 1 1 1 1 1 1 2 3" \
+ "$orig_content" "wrong condition ranges"
+echo "$orig_content" > "$file"
diff --git a/tools/testing/selftests/damon/debugfs_target_ids.sh b/tools/testing/selftests/damon/debugfs_target_ids.sh
new file mode 100644
index 000000000000..49aeabdb0aae
--- /dev/null
+++ b/tools/testing/selftests/damon/debugfs_target_ids.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source _debugfs_common.sh
+
+# Test target_ids file
+# ====================
+
+file="$DBGFS/target_ids"
+orig_content=$(cat "$file")
+
+test_write_succ "$file" "1 2 3 4" "$orig_content" "valid input"
+test_write_succ "$file" "1 2 abc 4" "$orig_content" "still valid input"
+test_content "$file" "$orig_content" "1 2" "non-integer was there"
+test_write_succ "$file" "abc 2 3" "$orig_content" "the file allows wrong input"
+test_content "$file" "$orig_content" "" "wrong input written"
+test_write_succ "$file" "" "$orig_content" "empty input"
+test_content "$file" "$orig_content" "" "empty input written"
+echo "$orig_content" > "$file"
diff --git a/tools/testing/selftests/damon/huge_count_read_write.c b/tools/testing/selftests/damon/huge_count_read_write.c
new file mode 100644
index 000000000000..ad7a6b4cf338
--- /dev/null
+++ b/tools/testing/selftests/damon/huge_count_read_write.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Author: SeongJae Park <sj@kernel.org>
+ */
+
+#include <fcntl.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdio.h>
+
+void write_read_with_huge_count(char *file)
+{
+ int filedesc = open(file, O_RDWR);
+ char buf[25];
+ int ret;
+
+ printf("%s %s\n", __func__, file);
+ if (filedesc < 0) {
+ fprintf(stderr, "failed opening %s\n", file);
+ exit(1);
+ }
+
+ write(filedesc, "", 0xfffffffful);
+ perror("after write: ");
+ ret = read(filedesc, buf, 0xfffffffful);
+ perror("after read: ");
+ close(filedesc);
+}
+
+int main(int argc, char *argv[])
+{
+ if (argc != 2) {
+ fprintf(stderr, "Usage: %s <file>\n", argv[0]);
+ exit(1);
+ }
+ write_read_with_huge_count(argv[1]);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh
index 10e0f3dbc930..5f6eb965cfd1 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh
@@ -217,9 +217,11 @@ short_payload_get()
dest_mac=$(mac_get $h1)
p=$(:
)"08:"$( : VXLAN flags
- )"01:00:00:"$( : VXLAN reserved
+ )"00:00:00:"$( : VXLAN reserved
)"00:03:e8:"$( : VXLAN VNI : 1000
)"00:"$( : VXLAN reserved
+ )"$dest_mac:"$( : ETH daddr
+ )"00:00:00:00:00:00:"$( : ETH saddr
)
echo $p
}
@@ -263,7 +265,8 @@ decap_error_test()
corrupted_packet_test "Decap error: Reserved bits in use" \
"reserved_bits_payload_get"
- corrupted_packet_test "Decap error: No L2 header" "short_payload_get"
+ corrupted_packet_test "Decap error: Too short inner packet" \
+ "short_payload_get"
}
mc_smac_payload_get()
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan_ipv6.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan_ipv6.sh
new file mode 100755
index 000000000000..f6c16cbb6cf7
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan_ipv6.sh
@@ -0,0 +1,342 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test devlink-trap tunnel drops and exceptions functionality over mlxsw.
+# Check all traps to make sure they are triggered under the right
+# conditions.
+
+# +------------------------+
+# | H1 (vrf) |
+# | + $h1 |
+# | | 2001:db8:1::1/64 |
+# +----|-------------------+
+# |
+# +----|----------------------------------------------------------------------+
+# | SW | |
+# | +--|--------------------------------------------------------------------+ |
+# | | + $swp1 BR1 (802.1d) | |
+# | | | |
+# | | + vx1 (vxlan) | |
+# | | local 2001:db8:3::1 | |
+# | | id 1000 dstport $VXPORT | |
+# | +-----------------------------------------------------------------------+ |
+# | |
+# | + $rp1 |
+# | | 2001:db8:3::1/64 |
+# +----|----------------------------------------------------------------------+
+# |
+# +----|--------------------------------------------------------+
+# | | VRF2 |
+# | + $rp2 |
+# | 2001:db8:3::2/64 |
+# | |
+# +-------------------------------------------------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ decap_error_test
+ overlay_smac_is_mc_test
+"
+
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+
+: ${VXPORT:=4789}
+export VXPORT
+
+h1_create()
+{
+ simple_if_init $h1 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 2001:db8:1::1/64
+}
+
+switch_create()
+{
+ ip link add name br1 type bridge vlan_filtering 0 mcast_snooping 0
+ # Make sure the bridge uses the MAC address of the local port and not
+ # that of the VxLAN's device.
+ ip link set dev br1 address $(mac_get $swp1)
+ ip link set dev br1 up
+
+ tc qdisc add dev $swp1 clsact
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+
+ ip link add name vx1 type vxlan id 1000 local 2001:db8:3::1 \
+ dstport "$VXPORT" nolearning udp6zerocsumrx udp6zerocsumtx \
+ tos inherit ttl 100
+ ip link set dev vx1 master br1
+ ip link set dev vx1 up
+
+ ip link set dev $rp1 up
+ ip address add dev $rp1 2001:db8:3::1/64
+}
+
+switch_destroy()
+{
+ ip address del dev $rp1 2001:db8:3::1/64
+ ip link set dev $rp1 down
+
+ ip link set dev vx1 down
+ ip link set dev vx1 nomaster
+ ip link del dev vx1
+
+ ip link set dev $swp1 down
+ ip link set dev $swp1 nomaster
+ tc qdisc del dev $swp1 clsact
+
+ ip link set dev br1 down
+ ip link del dev br1
+}
+
+vrf2_create()
+{
+ simple_if_init $rp2 2001:db8:3::2/64
+}
+
+vrf2_destroy()
+{
+ simple_if_fini $rp2 2001:db8:3::2/64
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ rp1=${NETIFS[p3]}
+ rp2=${NETIFS[p4]}
+
+ vrf_prepare
+ forwarding_enable
+ h1_create
+ switch_create
+ vrf2_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ vrf2_destroy
+ switch_destroy
+ h1_destroy
+ forwarding_restore
+ vrf_cleanup
+}
+
+ecn_payload_get()
+{
+ local dest_mac=$(mac_get $h1)
+ local saddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:03"
+ local daddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01"
+ p=$(:
+ )"08:"$( : VXLAN flags
+ )"00:00:00:"$( : VXLAN reserved
+ )"00:03:e8:"$( : VXLAN VNI : 1000
+ )"00:"$( : VXLAN reserved
+ )"$dest_mac:"$( : ETH daddr
+ )"00:00:00:00:00:00:"$( : ETH saddr
+ )"86:dd:"$( : ETH type
+ )"6"$( : IP version
+ )"0:0"$( : Traffic class
+ )"0:00:00:"$( : Flow label
+ )"00:08:"$( : Payload length
+ )"3a:"$( : Next header
+ )"04:"$( : Hop limit
+ )"$saddr:"$( : IP saddr
+ )"$daddr:"$( : IP daddr
+ )"80:"$( : ICMPv6.type
+ )"00:"$( : ICMPv6.code
+ )"00:"$( : ICMPv6.checksum
+ )
+ echo $p
+}
+
+ecn_decap_test()
+{
+ local trap_name="decap_error"
+ local desc=$1; shift
+ local ecn_desc=$1; shift
+ local outer_tos=$1; shift
+ local mz_pid
+
+ RET=0
+
+ tc filter add dev $swp1 egress protocol ipv6 pref 1 handle 101 \
+ flower src_ip 2001:db8:1::3 dst_ip 2001:db8:1::1 action pass
+
+ rp1_mac=$(mac_get $rp1)
+ payload=$(ecn_payload_get)
+
+ ip vrf exec v$rp2 $MZ -6 $rp2 -c 0 -d 1msec -b $rp1_mac \
+ -B 2001:db8:3::1 -t udp \
+ sp=12345,dp=$VXPORT,tos=$outer_tos,p=$payload -q &
+ mz_pid=$!
+
+ devlink_trap_exception_test $trap_name
+
+ tc_check_packets "dev $swp1 egress" 101 0
+ check_err $? "Packets were not dropped"
+
+ log_test "$desc: Inner ECN is not ECT and outer is $ecn_desc"
+
+ kill $mz_pid && wait $mz_pid &> /dev/null
+ tc filter del dev $swp1 egress protocol ipv6 pref 1 handle 101 flower
+}
+
+reserved_bits_payload_get()
+{
+ local dest_mac=$(mac_get $h1)
+ local saddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:03"
+ local daddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01"
+ p=$(:
+ )"08:"$( : VXLAN flags
+ )"01:00:00:"$( : VXLAN reserved
+ )"00:03:e8:"$( : VXLAN VNI : 1000
+ )"00:"$( : VXLAN reserved
+ )"$dest_mac:"$( : ETH daddr
+ )"00:00:00:00:00:00:"$( : ETH saddr
+ )"86:dd:"$( : ETH type
+ )"6"$( : IP version
+ )"0:0"$( : Traffic class
+ )"0:00:00:"$( : Flow label
+ )"00:08:"$( : Payload length
+ )"3a:"$( : Next header
+ )"04:"$( : Hop limit
+ )"$saddr:"$( : IP saddr
+ )"$daddr:"$( : IP daddr
+ )"80:"$( : ICMPv6.type
+ )"00:"$( : ICMPv6.code
+ )"00:"$( : ICMPv6.checksum
+ )
+ echo $p
+}
+
+short_payload_get()
+{
+ dest_mac=$(mac_get $h1)
+ p=$(:
+ )"08:"$( : VXLAN flags
+ )"00:00:00:"$( : VXLAN reserved
+ )"00:03:e8:"$( : VXLAN VNI : 1000
+ )"00:"$( : VXLAN reserved
+ )"$dest_mac:"$( : ETH daddr
+ )"00:00:00:00:00:00:"$( : ETH saddr
+ )
+ echo $p
+}
+
+corrupted_packet_test()
+{
+ local trap_name="decap_error"
+ local desc=$1; shift
+ local payload_get=$1; shift
+ local mz_pid
+
+ RET=0
+
+ # In case of too short packet, there is no any inner packet,
+ # so the matching will always succeed
+ tc filter add dev $swp1 egress protocol ipv6 pref 1 handle 101 \
+ flower skip_hw src_ip 2001:db8:3::1 dst_ip 2001:db8:1::1 \
+ action pass
+
+ rp1_mac=$(mac_get $rp1)
+ payload=$($payload_get)
+ ip vrf exec v$rp2 $MZ -6 $rp2 -c 0 -d 1msec -b $rp1_mac \
+ -B 2001:db8:3::1 -t udp sp=12345,dp=$VXPORT,p=$payload -q &
+ mz_pid=$!
+
+ devlink_trap_exception_test $trap_name
+
+ tc_check_packets "dev $swp1 egress" 101 0
+ check_err $? "Packets were not dropped"
+
+ log_test "$desc"
+
+ kill $mz_pid && wait $mz_pid &> /dev/null
+ tc filter del dev $swp1 egress protocol ipv6 pref 1 handle 101 flower
+}
+
+decap_error_test()
+{
+ ecn_decap_test "Decap error" "ECT(1)" 01
+ ecn_decap_test "Decap error" "ECT(0)" 02
+ ecn_decap_test "Decap error" "CE" 03
+
+ corrupted_packet_test "Decap error: Reserved bits in use" \
+ "reserved_bits_payload_get"
+ corrupted_packet_test "Decap error: Too short inner packet" \
+ "short_payload_get"
+}
+
+mc_smac_payload_get()
+{
+ local dest_mac=$(mac_get $h1)
+ local source_mac="01:02:03:04:05:06"
+ local saddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:03"
+ local daddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01"
+ p=$(:
+ )"08:"$( : VXLAN flags
+ )"00:00:00:"$( : VXLAN reserved
+ )"00:03:e8:"$( : VXLAN VNI : 1000
+ )"00:"$( : VXLAN reserved
+ )"$dest_mac:"$( : ETH daddr
+ )"$source_mac:"$( : ETH saddr
+ )"86:dd:"$( : ETH type
+ )"6"$( : IP version
+ )"0:0"$( : Traffic class
+ )"0:00:00:"$( : Flow label
+ )"00:08:"$( : Payload length
+ )"3a:"$( : Next header
+ )"04:"$( : Hop limit
+ )"$saddr:"$( : IP saddr
+ )"$daddr:"$( : IP daddr
+ )"80:"$( : ICMPv6.type
+ )"00:"$( : ICMPv6.code
+ )"00:"$( : ICMPv6.checksum
+ )
+ echo $p
+}
+
+overlay_smac_is_mc_test()
+{
+ local trap_name="overlay_smac_is_mc"
+ local mz_pid
+
+ RET=0
+
+ # The matching will be checked on devlink_trap_drop_test()
+ # and the filter will be removed on devlink_trap_drop_cleanup()
+ tc filter add dev $swp1 egress protocol ipv6 pref 1 handle 101 \
+ flower src_mac 01:02:03:04:05:06 action pass
+
+ rp1_mac=$(mac_get $rp1)
+ payload=$(mc_smac_payload_get)
+
+ ip vrf exec v$rp2 $MZ -6 $rp2 -c 0 -d 1msec -b $rp1_mac \
+ -B 2001:db8:3::1 -t udp sp=12345,dp=$VXPORT,p=$payload -q &
+ mz_pid=$!
+
+ devlink_trap_drop_test $trap_name $swp1 101
+
+ log_test "Overlay source MAC is multicast"
+
+ devlink_trap_drop_cleanup $mz_pid $swp1 "ipv6" 1 101
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles_occ.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles_occ.sh
index b513f64d9092..026a126f584d 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles_occ.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles_occ.sh
@@ -72,6 +72,35 @@ rif_mac_profile_replacement_test()
ip link set $h1.10 address $h1_10_mac
}
+rif_mac_profile_consolidation_test()
+{
+ local count=$1; shift
+ local h1_20_mac
+
+ RET=0
+
+ if [[ $count -eq 1 ]]; then
+ return
+ fi
+
+ h1_20_mac=$(mac_get $h1.20)
+
+ # Set the MAC of $h1.20 to that of $h1.10 and confirm that they are
+ # using the same MAC profile.
+ ip link set $h1.20 address 00:11:11:11:11:11
+ check_err $?
+
+ occ=$(devlink -j resource show $DEVLINK_DEV \
+ | jq '.[][][] | select(.name=="rif_mac_profiles") |.["occ"]')
+
+ [[ $occ -eq $((count - 1)) ]]
+ check_err $? "MAC profile occupancy did not decrease"
+
+ log_test "RIF MAC profile consolidation"
+
+ ip link set $h1.20 address $h1_20_mac
+}
+
rif_mac_profile_shared_replacement_test()
{
local count=$1; shift
@@ -104,6 +133,7 @@ rif_mac_profile_edit_test()
create_max_rif_mac_profiles $count
rif_mac_profile_replacement_test
+ rif_mac_profile_consolidation_test $count
rif_mac_profile_shared_replacement_test $count
}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/vxlan_flooding_ipv6.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/vxlan_flooding_ipv6.sh
new file mode 100755
index 000000000000..429f7ee735cf
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/vxlan_flooding_ipv6.sh
@@ -0,0 +1,322 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test VxLAN flooding. The device stores flood records in a singly linked list
+# where each record stores up to four IPv6 addresses of remote VTEPs. The test
+# verifies that packets are correctly flooded in various cases such as deletion
+# of a record in the middle of the list.
+#
+# +-----------------------+
+# | H1 (vrf) |
+# | + $h1 |
+# | | 2001:db8:1::1/64 |
+# +----|------------------+
+# |
+# +----|----------------------------------------------------------------------+
+# | SW | |
+# | +--|--------------------------------------------------------------------+ |
+# | | + $swp1 BR0 (802.1d) | |
+# | | | |
+# | | + vxlan0 (vxlan) | |
+# | | local 2001:db8:2::1 | |
+# | | remote 2001:db8:2::{2..17} | |
+# | | id 10 dstport 4789 | |
+# | +-----------------------------------------------------------------------+ |
+# | |
+# | 2001:db8:2::0/64 via 2001:db8:3::2 |
+# | |
+# | + $rp1 |
+# | | 2001:db8:3::1/64 |
+# +----|----------------------------------------------------------------------+
+# |
+# +----|--------------------------------------------------------+
+# | | R2 (vrf) |
+# | + $rp2 |
+# | 2001:db8:3::2/64 |
+# | |
+# +-------------------------------------------------------------+
+
+lib_dir=$(dirname $0)/../../../../net/forwarding
+
+ALL_TESTS="flooding_test"
+NUM_NETIFS=4
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 2001:db8:1::1/64
+}
+
+switch_create()
+{
+ # Make sure the bridge uses the MAC address of the local port and
+ # not that of the VxLAN's device
+ ip link add dev br0 type bridge mcast_snooping 0
+ ip link set dev br0 address $(mac_get $swp1)
+
+ ip link add name vxlan0 type vxlan id 10 nolearning \
+ udp6zerocsumrx udp6zerocsumtx ttl 20 tos inherit \
+ local 2001:db8:2::1 dstport 4789
+
+ ip address add 2001:db8:2::1/128 dev lo
+
+ ip link set dev $swp1 master br0
+ ip link set dev vxlan0 master br0
+
+ ip link set dev br0 up
+ ip link set dev $swp1 up
+ ip link set dev vxlan0 up
+}
+
+switch_destroy()
+{
+ ip link set dev vxlan0 down
+ ip link set dev $swp1 down
+ ip link set dev br0 down
+
+ ip link set dev vxlan0 nomaster
+ ip link set dev $swp1 nomaster
+
+ ip address del 2001:db8:2::1/128 dev lo
+
+ ip link del dev vxlan0
+
+ ip link del dev br0
+}
+
+router1_create()
+{
+ # This router is in the default VRF, where the VxLAN device is
+ # performing the L3 lookup
+ ip link set dev $rp1 up
+ ip address add 2001:db8:3::1/64 dev $rp1
+ ip route add 2001:db8:2::0/64 via 2001:db8:3::2
+}
+
+router1_destroy()
+{
+ ip route del 2001:db8:2::0/64 via 2001:db8:3::2
+ ip address del 2001:db8:3::1/64 dev $rp1
+ ip link set dev $rp1 down
+}
+
+router2_create()
+{
+ # This router is not in the default VRF, so use simple_if_init()
+ simple_if_init $rp2 2001:db8:3::2/64
+}
+
+router2_destroy()
+{
+ simple_if_fini $rp2 2001:db8:3::2/64
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ rp1=${NETIFS[p3]}
+ rp2=${NETIFS[p4]}
+
+ vrf_prepare
+
+ h1_create
+
+ switch_create
+
+ router1_create
+ router2_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ router2_destroy
+ router1_destroy
+
+ switch_destroy
+
+ h1_destroy
+
+ vrf_cleanup
+}
+
+flooding_remotes_add()
+{
+ local num_remotes=$1
+ local lsb
+ local i
+
+ for i in $(eval echo {1..$num_remotes}); do
+ lsb=$((i + 1))
+
+ bridge fdb append 00:00:00:00:00:00 dev vxlan0 self \
+ dst 2001:db8:2::$lsb
+ done
+}
+
+flooding_filters_add()
+{
+ local num_remotes=$1
+ local lsb
+ local i
+
+ tc qdisc add dev $rp2 clsact
+
+ for i in $(eval echo {1..$num_remotes}); do
+ lsb=$((i + 1))
+
+ tc filter add dev $rp2 ingress protocol ipv6 pref $i handle $i \
+ flower ip_proto udp dst_ip 2001:db8:2::$lsb \
+ dst_port 4789 skip_sw action drop
+ done
+}
+
+flooding_filters_del()
+{
+ local num_remotes=$1
+ local i
+
+ for i in $(eval echo {1..$num_remotes}); do
+ tc filter del dev $rp2 ingress protocol ipv6 pref $i \
+ handle $i flower
+ done
+
+ tc qdisc del dev $rp2 clsact
+}
+
+flooding_check_packets()
+{
+ local packets=("$@")
+ local num_remotes=${#packets[@]}
+ local i
+
+ for i in $(eval echo {1..$num_remotes}); do
+ tc_check_packets "dev $rp2 ingress" $i ${packets[i - 1]}
+ check_err $? "remote $i - did not get expected number of packets"
+ done
+}
+
+flooding_test()
+{
+ # Use 16 remote VTEPs that will be stored in 4 records. The array
+ # 'packets' will store how many packets are expected to be received
+ # by each remote VTEP at each stage of the test
+ declare -a packets=(1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1)
+ local num_remotes=16
+
+ RET=0
+
+ # Add FDB entries for remote VTEPs and corresponding tc filters on the
+ # ingress of the nexthop router. These filters will count how many
+ # packets were flooded to each remote VTEP
+ flooding_remotes_add $num_remotes
+ flooding_filters_add $num_remotes
+
+ # Send one packet and make sure it is flooded to all the remote VTEPs
+ $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 1 packet"
+
+ # Delete the third record which corresponds to VTEPs with LSB 10..13
+ # and check that packet is flooded correctly when we remove a record
+ # from the middle of the list
+ RET=0
+
+ packets=(2 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2)
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::10
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::11
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::12
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::13
+
+ $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 2 packets"
+
+ # Delete the first record and make sure the packet is flooded correctly
+ RET=0
+
+ packets=(2 2 2 2 3 3 3 3 1 1 1 1 3 3 3 3)
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::2
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::3
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::4
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::5
+
+ $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 3 packets"
+
+ # Delete the last record and make sure the packet is flooded correctly
+ RET=0
+
+ packets=(2 2 2 2 4 4 4 4 1 1 1 1 3 3 3 3)
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::14
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::15
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::16
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::17
+
+ $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 4 packets"
+
+ # Delete the last record, one entry at a time and make sure single
+ # entries are correctly removed
+ RET=0
+
+ packets=(2 2 2 2 4 5 5 5 1 1 1 1 3 3 3 3)
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::6
+
+ $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 5 packets"
+
+ RET=0
+
+ packets=(2 2 2 2 4 5 6 6 1 1 1 1 3 3 3 3)
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::7
+
+ $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 6 packets"
+
+ RET=0
+
+ packets=(2 2 2 2 4 5 6 7 1 1 1 1 3 3 3 3)
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::8
+
+ $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 7 packets"
+
+ RET=0
+
+ packets=(2 2 2 2 4 5 6 7 1 1 1 1 3 3 3 3)
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::9
+
+ $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 8 packets"
+
+ flooding_filters_del $num_remotes
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/vxlan_flooding_ipv6.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/vxlan_flooding_ipv6.sh
new file mode 100755
index 000000000000..d8fd875ad527
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/vxlan_flooding_ipv6.sh
@@ -0,0 +1,334 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test VxLAN flooding. The device stores flood records in a singly linked list
+# where each record stores up to five IPv6 addresses of remote VTEPs. The test
+# verifies that packets are correctly flooded in various cases such as deletion
+# of a record in the middle of the list.
+#
+# +-----------------------+
+# | H1 (vrf) |
+# | + $h1 |
+# | | 2001:db8:1::1/64 |
+# +----|------------------+
+# |
+# +----|----------------------------------------------------------------------+
+# | SW | |
+# | +--|--------------------------------------------------------------------+ |
+# | | + $swp1 BR0 (802.1d) | |
+# | | | |
+# | | + vxlan0 (vxlan) | |
+# | | local 2001:db8:2::1 | |
+# | | remote 2001:db8:2::{2..21} | |
+# | | id 10 dstport 4789 | |
+# | +-----------------------------------------------------------------------+ |
+# | |
+# | 2001:db8:2::0/64 via 2001:db8:3::2 |
+# | |
+# | + $rp1 |
+# | | 2001:db8:3::1/64 |
+# +----|----------------------------------------------------------------------+
+# |
+# +----|--------------------------------------------------------+
+# | | R2 (vrf) |
+# | + $rp2 |
+# | 2001:db8:3::2/64 |
+# | |
+# +-------------------------------------------------------------+
+
+lib_dir=$(dirname $0)/../../../../net/forwarding
+
+ALL_TESTS="flooding_test"
+NUM_NETIFS=4
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 2001:db8:1::1/64
+}
+
+switch_create()
+{
+ # Make sure the bridge uses the MAC address of the local port and
+ # not that of the VxLAN's device
+ ip link add dev br0 type bridge mcast_snooping 0
+ ip link set dev br0 address $(mac_get $swp1)
+
+ ip link add name vxlan0 type vxlan id 10 nolearning \
+ udp6zerocsumrx udp6zerocsumtx ttl 20 tos inherit \
+ local 2001:db8:2::1 dstport 4789
+
+ ip address add 2001:db8:2::1/128 dev lo
+
+ ip link set dev $swp1 master br0
+ ip link set dev vxlan0 master br0
+
+ ip link set dev br0 up
+ ip link set dev $swp1 up
+ ip link set dev vxlan0 up
+}
+
+switch_destroy()
+{
+ ip link set dev vxlan0 down
+ ip link set dev $swp1 down
+ ip link set dev br0 down
+
+ ip link set dev vxlan0 nomaster
+ ip link set dev $swp1 nomaster
+
+ ip address del 2001:db8:2::1/128 dev lo
+
+ ip link del dev vxlan0
+
+ ip link del dev br0
+}
+
+router1_create()
+{
+ # This router is in the default VRF, where the VxLAN device is
+ # performing the L3 lookup
+ ip link set dev $rp1 up
+ ip address add 2001:db8:3::1/64 dev $rp1
+ ip route add 2001:db8:2::0/64 via 2001:db8:3::2
+}
+
+router1_destroy()
+{
+ ip route del 2001:db8:2::0/64 via 2001:db8:3::2
+ ip address del 2001:db8:3::1/64 dev $rp1
+ ip link set dev $rp1 down
+}
+
+router2_create()
+{
+ # This router is not in the default VRF, so use simple_if_init()
+ simple_if_init $rp2 2001:db8:3::2/64
+}
+
+router2_destroy()
+{
+ simple_if_fini $rp2 2001:db8:3::2/64
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ rp1=${NETIFS[p3]}
+ rp2=${NETIFS[p4]}
+
+ vrf_prepare
+
+ h1_create
+
+ switch_create
+
+ router1_create
+ router2_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ router2_destroy
+ router1_destroy
+
+ switch_destroy
+
+ h1_destroy
+
+ vrf_cleanup
+}
+
+flooding_remotes_add()
+{
+ local num_remotes=$1
+ local lsb
+ local i
+
+ for i in $(eval echo {1..$num_remotes}); do
+ lsb=$((i + 1))
+
+ bridge fdb append 00:00:00:00:00:00 dev vxlan0 self \
+ dst 2001:db8:2::$lsb
+ done
+}
+
+flooding_filters_add()
+{
+ local num_remotes=$1
+ local lsb
+ local i
+
+ tc qdisc add dev $rp2 clsact
+
+ for i in $(eval echo {1..$num_remotes}); do
+ lsb=$((i + 1))
+
+ tc filter add dev $rp2 ingress protocol ipv6 pref $i handle $i \
+ flower ip_proto udp dst_ip 2001:db8:2::$lsb \
+ dst_port 4789 skip_sw action drop
+ done
+}
+
+flooding_filters_del()
+{
+ local num_remotes=$1
+ local i
+
+ for i in $(eval echo {1..$num_remotes}); do
+ tc filter del dev $rp2 ingress protocol ipv6 pref $i \
+ handle $i flower
+ done
+
+ tc qdisc del dev $rp2 clsact
+}
+
+flooding_check_packets()
+{
+ local packets=("$@")
+ local num_remotes=${#packets[@]}
+ local i
+
+ for i in $(eval echo {1..$num_remotes}); do
+ tc_check_packets "dev $rp2 ingress" $i ${packets[i - 1]}
+ check_err $? "remote $i - did not get expected number of packets"
+ done
+}
+
+flooding_test()
+{
+ # Use 20 remote VTEPs that will be stored in 4 records. The array
+ # 'packets' will store how many packets are expected to be received
+ # by each remote VTEP at each stage of the test
+ declare -a packets=(1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1)
+ local num_remotes=20
+
+ RET=0
+
+ # Add FDB entries for remote VTEPs and corresponding tc filters on the
+ # ingress of the nexthop router. These filters will count how many
+ # packets were flooded to each remote VTEP
+ flooding_remotes_add $num_remotes
+ flooding_filters_add $num_remotes
+
+ # Send one packet and make sure it is flooded to all the remote VTEPs
+ $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 1 packet"
+
+ # Delete the third record which corresponds to VTEPs with LSB 12..16
+ # and check that packet is flooded correctly when we remove a record
+ # from the middle of the list
+ RET=0
+
+ packets=(2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 2 2 2 2 2)
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::12
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::13
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::14
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::15
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::16
+
+ $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 2 packets"
+
+ # Delete the first record and make sure the packet is flooded correctly
+ RET=0
+
+ packets=(2 2 2 2 2 3 3 3 3 3 1 1 1 1 1 3 3 3 3 3)
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::2
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::3
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::4
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::5
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::6
+
+ $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 3 packets"
+
+ # Delete the last record and make sure the packet is flooded correctly
+ RET=0
+
+ packets=(2 2 2 2 2 4 4 4 4 4 1 1 1 1 1 3 3 3 3 3)
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::17
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::18
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::19
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::20
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::21
+
+ $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 4 packets"
+
+ # Delete the last record, one entry at a time and make sure single
+ # entries are correctly removed
+ RET=0
+
+ packets=(2 2 2 2 2 4 5 5 5 5 1 1 1 1 1 3 3 3 3 3)
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::7
+
+ $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 5 packets"
+
+ RET=0
+
+ packets=(2 2 2 2 2 4 5 6 6 6 1 1 1 1 1 3 3 3 3 3)
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::8
+
+ $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 6 packets"
+
+ RET=0
+
+ packets=(2 2 2 2 2 4 5 6 7 7 1 1 1 1 1 3 3 3 3 3)
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::9
+
+ $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 7 packets"
+
+ RET=0
+
+ packets=(2 2 2 2 2 4 5 6 7 8 1 1 1 1 1 3 3 3 3 3)
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::10
+
+ $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 8 packets"
+
+ RET=0
+
+ packets=(2 2 2 2 2 4 5 6 7 8 1 1 1 1 1 3 3 3 3 3)
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::11
+
+ $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 9 packets"
+
+ flooding_filters_del $num_remotes
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh
index 729a86cc4ede..99a332b712f0 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh
@@ -4,10 +4,35 @@
# Test various aspects of VxLAN offloading which are specific to mlxsw, such
# as sanitization of invalid configurations and offload indication.
-lib_dir=$(dirname $0)/../../../net/forwarding
+: ${ADDR_FAMILY:=ipv4}
+export ADDR_FAMILY
+
+: ${LOCAL_IP_1:=198.51.100.1}
+export LOCAL_IP_1
+
+: ${LOCAL_IP_2:=198.51.100.2}
+export LOCAL_IP_2
+
+: ${PREFIX_LEN:=32}
+export PREFIX_LEN
+
+: ${UDPCSUM_FLAFS:=noudpcsum}
+export UDPCSUM_FLAFS
+
+: ${MC_IP:=239.0.0.1}
+export MC_IP
-ALL_TESTS="sanitization_test offload_indication_test \
- sanitization_vlan_aware_test offload_indication_vlan_aware_test"
+: ${IP_FLAG:=""}
+export IP_FLAG
+
+: ${ALL_TESTS:="
+ sanitization_test
+ offload_indication_test
+ sanitization_vlan_aware_test
+ offload_indication_vlan_aware_test
+"}
+
+lib_dir=$(dirname $0)/../../../net/forwarding
NUM_NETIFS=2
: ${TIMEOUT:=20000} # ms
source $lib_dir/lib.sh
@@ -63,8 +88,8 @@ sanitization_single_dev_valid_test()
ip link add dev br0 type bridge mcast_snooping 0
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
sanitization_single_dev_test_pass
@@ -80,8 +105,8 @@ sanitization_single_dev_vlan_aware_test()
ip link add dev br0 type bridge mcast_snooping 0 vlan_filtering 1
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
sanitization_single_dev_test_pass
@@ -97,8 +122,8 @@ sanitization_single_dev_mcast_enabled_test()
ip link add dev br0 type bridge
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
sanitization_single_dev_test_fail
@@ -115,9 +140,9 @@ sanitization_single_dev_mcast_group_test()
ip link add dev br0 type bridge mcast_snooping 0
ip link add name dummy1 up type dummy
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789 \
- dev dummy1 group 239.0.0.1
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 \
+ dev dummy1 group $MC_IP
sanitization_single_dev_test_fail
@@ -134,7 +159,7 @@ sanitization_single_dev_no_local_ip_test()
ip link add dev br0 type bridge mcast_snooping 0
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
ttl 20 tos inherit dstport 4789
sanitization_single_dev_test_fail
@@ -145,31 +170,14 @@ sanitization_single_dev_no_local_ip_test()
log_test "vxlan device with no local ip"
}
-sanitization_single_dev_local_ipv6_test()
-{
- RET=0
-
- ip link add dev br0 type bridge mcast_snooping 0
-
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 2001:db8::1 dstport 4789
-
- sanitization_single_dev_test_fail
-
- ip link del dev vxlan0
- ip link del dev br0
-
- log_test "vxlan device with local ipv6 address"
-}
-
-sanitization_single_dev_learning_enabled_test()
+sanitization_single_dev_learning_enabled_ipv4_test()
{
RET=0
ip link add dev br0 type bridge mcast_snooping 0
- ip link add name vxlan0 up type vxlan id 10 learning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ ip link add name vxlan0 up type vxlan id 10 learning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
sanitization_single_dev_test_pass
@@ -186,8 +194,8 @@ sanitization_single_dev_local_interface_test()
ip link add dev br0 type bridge mcast_snooping 0
ip link add name dummy1 up type dummy
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789 dev dummy1
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 dev dummy1
sanitization_single_dev_test_fail
@@ -204,8 +212,8 @@ sanitization_single_dev_port_range_test()
ip link add dev br0 type bridge mcast_snooping 0
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789 \
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 \
srcport 4000 5000
sanitization_single_dev_test_fail
@@ -222,8 +230,8 @@ sanitization_single_dev_tos_static_test()
ip link add dev br0 type bridge mcast_snooping 0
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos 20 local 198.51.100.1 dstport 4789
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos 20 local $LOCAL_IP_1 dstport 4789
sanitization_single_dev_test_fail
@@ -239,8 +247,8 @@ sanitization_single_dev_ttl_inherit_test()
ip link add dev br0 type bridge mcast_snooping 0
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl inherit tos inherit local 198.51.100.1 dstport 4789
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl inherit tos inherit local $LOCAL_IP_1 dstport 4789
sanitization_single_dev_test_fail
@@ -250,14 +258,14 @@ sanitization_single_dev_ttl_inherit_test()
log_test "vxlan device with inherit ttl"
}
-sanitization_single_dev_udp_checksum_test()
+sanitization_single_dev_udp_checksum_ipv4_test()
{
RET=0
ip link add dev br0 type bridge mcast_snooping 0
ip link add name vxlan0 up type vxlan id 10 nolearning udpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
sanitization_single_dev_test_fail
@@ -276,13 +284,12 @@ sanitization_single_dev_test()
sanitization_single_dev_mcast_enabled_test
sanitization_single_dev_mcast_group_test
sanitization_single_dev_no_local_ip_test
- sanitization_single_dev_local_ipv6_test
- sanitization_single_dev_learning_enabled_test
+ sanitization_single_dev_learning_enabled_"$ADDR_FAMILY"_test
sanitization_single_dev_local_interface_test
sanitization_single_dev_port_range_test
sanitization_single_dev_tos_static_test
sanitization_single_dev_ttl_inherit_test
- sanitization_single_dev_udp_checksum_test
+ sanitization_single_dev_udp_checksum_"$ADDR_FAMILY"_test
}
sanitization_multi_devs_test_pass()
@@ -334,10 +341,10 @@ sanitization_multi_devs_valid_test()
ip link add dev br0 type bridge mcast_snooping 0
ip link add dev br1 type bridge mcast_snooping 0
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789
- ip link add name vxlan1 up type vxlan id 20 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
+ ip link add name vxlan1 up type vxlan id 20 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
sanitization_multi_devs_test_pass
@@ -356,10 +363,10 @@ sanitization_multi_devs_ttl_test()
ip link add dev br0 type bridge mcast_snooping 0
ip link add dev br1 type bridge mcast_snooping 0
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789
- ip link add name vxlan1 up type vxlan id 20 nolearning noudpcsum \
- ttl 40 tos inherit local 198.51.100.1 dstport 4789
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
+ ip link add name vxlan1 up type vxlan id 20 nolearning $UDPCSUM_FLAFS \
+ ttl 40 tos inherit local $LOCAL_IP_1 dstport 4789
sanitization_multi_devs_test_fail
@@ -378,10 +385,10 @@ sanitization_multi_devs_udp_dstport_test()
ip link add dev br0 type bridge mcast_snooping 0
ip link add dev br1 type bridge mcast_snooping 0
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789
- ip link add name vxlan1 up type vxlan id 20 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 5789
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
+ ip link add name vxlan1 up type vxlan id 20 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 5789
sanitization_multi_devs_test_fail
@@ -400,10 +407,10 @@ sanitization_multi_devs_local_ip_test()
ip link add dev br0 type bridge mcast_snooping 0
ip link add dev br1 type bridge mcast_snooping 0
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789
- ip link add name vxlan1 up type vxlan id 20 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.2 dstport 4789
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
+ ip link add name vxlan1 up type vxlan id 20 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_2 dstport 4789
sanitization_multi_devs_test_fail
@@ -443,12 +450,12 @@ offload_indication_setup_create()
ip link set dev $swp1 master br0
ip link set dev $swp2 master br1
- ip address add 198.51.100.1/32 dev lo
+ ip address add $LOCAL_IP_1/$PREFIX_LEN dev lo
ip link add name vxlan0 up master br0 type vxlan id 10 nolearning \
- noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
ip link add name vxlan1 up master br1 type vxlan id 20 nolearning \
- noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
}
offload_indication_setup_destroy()
@@ -456,7 +463,7 @@ offload_indication_setup_destroy()
ip link del dev vxlan1
ip link del dev vxlan0
- ip address del 198.51.100.1/32 dev lo
+ ip address del $LOCAL_IP_1/$PREFIX_LEN dev lo
ip link set dev $swp2 nomaster
ip link set dev $swp1 nomaster
@@ -469,7 +476,7 @@ offload_indication_fdb_flood_test()
{
RET=0
- bridge fdb append 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.2
+ bridge fdb append 00:00:00:00:00:00 dev vxlan0 self dst $LOCAL_IP_2
busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb 00:00:00:00:00:00 \
bridge fdb show brport vxlan0
@@ -485,7 +492,7 @@ offload_indication_fdb_bridge_test()
RET=0
bridge fdb add de:ad:be:ef:13:37 dev vxlan0 self master static \
- dst 198.51.100.2
+ dst $LOCAL_IP_2
busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
de:ad:be:ef:13:37 self bridge fdb show brport vxlan0
@@ -536,7 +543,7 @@ offload_indication_fdb_bridge_test()
# marked as offloaded in both drivers
RET=0
- bridge fdb add de:ad:be:ef:13:37 dev vxlan0 self dst 198.51.100.2
+ bridge fdb add de:ad:be:ef:13:37 dev vxlan0 self dst $LOCAL_IP_2
busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
de:ad:be:ef:13:37 self bridge fdb show brport vxlan0
check_err $?
@@ -560,17 +567,17 @@ offload_indication_decap_route_test()
RET=0
busywait "$TIMEOUT" wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
ip link set dev vxlan0 down
busywait "$TIMEOUT" wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
ip link set dev vxlan1 down
busywait "$TIMEOUT" not wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
log_test "vxlan decap route - vxlan device down"
@@ -579,26 +586,26 @@ offload_indication_decap_route_test()
ip link set dev vxlan1 up
busywait "$TIMEOUT" wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
ip link set dev vxlan0 up
busywait "$TIMEOUT" wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
log_test "vxlan decap route - vxlan device up"
RET=0
- ip address delete 198.51.100.1/32 dev lo
+ ip address delete $LOCAL_IP_1/$PREFIX_LEN dev lo
busywait "$TIMEOUT" not wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
- ip address add 198.51.100.1/32 dev lo
+ ip address add $LOCAL_IP_1/$PREFIX_LEN dev lo
busywait "$TIMEOUT" wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
log_test "vxlan decap route - add local route"
@@ -607,18 +614,18 @@ offload_indication_decap_route_test()
ip link set dev $swp1 nomaster
busywait "$TIMEOUT" wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
ip link set dev $swp2 nomaster
busywait "$TIMEOUT" not wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
ip link set dev $swp1 master br0
ip link set dev $swp2 master br1
busywait "$TIMEOUT" wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
log_test "vxlan decap route - local ports enslavement"
@@ -627,12 +634,12 @@ offload_indication_decap_route_test()
ip link del dev br0
busywait "$TIMEOUT" wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
ip link del dev br1
busywait "$TIMEOUT" not wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
log_test "vxlan decap route - bridge device deletion"
@@ -646,25 +653,25 @@ offload_indication_decap_route_test()
ip link set dev vxlan0 master br0
ip link set dev vxlan1 master br1
busywait "$TIMEOUT" wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
ip link del dev vxlan0
busywait "$TIMEOUT" wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
ip link del dev vxlan1
busywait "$TIMEOUT" not wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
log_test "vxlan decap route - vxlan device deletion"
ip link add name vxlan0 up master br0 type vxlan id 10 nolearning \
- noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
ip link add name vxlan1 up master br1 type vxlan id 20 nolearning \
- noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
}
check_fdb_offloaded()
@@ -721,10 +728,10 @@ __offload_indication_join_vxlan_first()
local mac=00:11:22:33:44:55
local zmac=00:00:00:00:00:00
- bridge fdb append $zmac dev vxlan0 self dst 198.51.100.2
+ bridge fdb append $zmac dev vxlan0 self dst $LOCAL_IP_2
ip link set dev vxlan0 master br0
- bridge fdb add dev vxlan0 $mac self master static dst 198.51.100.2
+ bridge fdb add dev vxlan0 $mac self master static dst $LOCAL_IP_2
RET=0
check_vxlan_fdb_not_offloaded
@@ -774,8 +781,8 @@ __offload_indication_join_vxlan_first()
offload_indication_join_vxlan_first()
{
ip link add dev br0 up type bridge mcast_snooping 0
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
__offload_indication_join_vxlan_first
@@ -789,7 +796,7 @@ __offload_indication_join_vxlan_last()
RET=0
- bridge fdb append $zmac dev vxlan0 self dst 198.51.100.2
+ bridge fdb append $zmac dev vxlan0 self dst $LOCAL_IP_2
ip link set dev $swp1 master br0
@@ -809,8 +816,8 @@ __offload_indication_join_vxlan_last()
offload_indication_join_vxlan_last()
{
ip link add dev br0 up type bridge mcast_snooping 0
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
__offload_indication_join_vxlan_last
@@ -837,10 +844,10 @@ sanitization_vlan_aware_test()
ip link add dev br0 type bridge mcast_snooping 0 vlan_filtering 1
ip link add name vxlan10 up master br0 type vxlan id 10 nolearning \
- noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
ip link add name vxlan20 up master br0 type vxlan id 20 nolearning \
- noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
# Test that when each VNI is mapped to a different VLAN we can enslave
# a port to the bridge
@@ -884,20 +891,20 @@ sanitization_vlan_aware_test()
# Use the offload indication of the local route to ensure the VXLAN
# configuration was correctly rollbacked.
- ip address add 198.51.100.1/32 dev lo
+ ip address add $LOCAL_IP_1/$PREFIX_LEN dev lo
ip link set dev vxlan10 type vxlan ttl 10
ip link set dev $swp1 master br0 &> /dev/null
check_fail $?
busywait "$TIMEOUT" not wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
log_test "vlan-aware - failed enslavement to bridge due to conflict"
ip link set dev vxlan10 type vxlan ttl 20
- ip address del 198.51.100.1/32 dev lo
+ ip address del $LOCAL_IP_1/$PREFIX_LEN dev lo
ip link del dev vxlan20
ip link del dev vxlan10
@@ -916,12 +923,12 @@ offload_indication_vlan_aware_setup_create()
bridge vlan add vid 10 dev $swp1
bridge vlan add vid 20 dev $swp1
- ip address add 198.51.100.1/32 dev lo
+ ip address add $LOCAL_IP_1/$PREFIX_LEN dev lo
ip link add name vxlan10 up master br0 type vxlan id 10 nolearning \
- noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
ip link add name vxlan20 up master br0 type vxlan id 20 nolearning \
- noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
bridge vlan add vid 10 dev vxlan10 pvid untagged
bridge vlan add vid 20 dev vxlan20 pvid untagged
@@ -935,7 +942,7 @@ offload_indication_vlan_aware_setup_destroy()
ip link del dev vxlan20
ip link del dev vxlan10
- ip address del 198.51.100.1/32 dev lo
+ ip address del $LOCAL_IP_1/$PREFIX_LEN dev lo
bridge vlan del vid 20 dev $swp1
bridge vlan del vid 10 dev $swp1
@@ -952,7 +959,7 @@ offload_indication_vlan_aware_fdb_test()
log_info "vxlan entry offload indication - vlan-aware"
bridge fdb add de:ad:be:ef:13:37 dev vxlan10 self master static \
- dst 198.51.100.2 vlan 10
+ dst $LOCAL_IP_2 vlan 10
busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
de:ad:be:ef:13:37 self bridge fdb show brport vxlan10
@@ -1003,7 +1010,7 @@ offload_indication_vlan_aware_fdb_test()
# marked as offloaded in both drivers
RET=0
- bridge fdb add de:ad:be:ef:13:37 dev vxlan10 self dst 198.51.100.2
+ bridge fdb add de:ad:be:ef:13:37 dev vxlan10 self dst $LOCAL_IP_2
busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
de:ad:be:ef:13:37 self bridge fdb show brport vxlan10
check_err $?
@@ -1021,7 +1028,7 @@ offload_indication_vlan_aware_decap_route_test()
RET=0
busywait "$TIMEOUT" wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
# Toggle PVID flag on one VxLAN device and make sure route is still
@@ -1029,7 +1036,7 @@ offload_indication_vlan_aware_decap_route_test()
bridge vlan add vid 10 dev vxlan10 untagged
busywait "$TIMEOUT" wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
# Toggle PVID flag on second VxLAN device and make sure route is no
@@ -1037,14 +1044,15 @@ offload_indication_vlan_aware_decap_route_test()
bridge vlan add vid 20 dev vxlan20 untagged
busywait "$TIMEOUT" not wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
# Toggle PVID flag back and make sure route is marked as offloaded
bridge vlan add vid 10 dev vxlan10 pvid untagged
bridge vlan add vid 20 dev vxlan20 pvid untagged
- busywait "$TIMEOUT" wait_for_offload ip route show table local 198.51.100.1
+ busywait "$TIMEOUT" wait_for_offload ip $IP_FLAG route show table local \
+ $LOCAL_IP_1
check_err $?
log_test "vxlan decap route - vni map/unmap"
@@ -1054,8 +1062,8 @@ offload_indication_vlan_aware_join_vxlan_first()
{
ip link add dev br0 up type bridge mcast_snooping 0 \
vlan_filtering 1 vlan_default_pvid 1
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
__offload_indication_join_vxlan_first 1
@@ -1067,8 +1075,8 @@ offload_indication_vlan_aware_join_vxlan_last()
{
ip link add dev br0 up type bridge mcast_snooping 0 \
vlan_filtering 1 vlan_default_pvid 1
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
__offload_indication_join_vxlan_last
@@ -1085,14 +1093,14 @@ offload_indication_vlan_aware_l3vni_test()
sysctl_set net.ipv6.conf.default.disable_ipv6 1
ip link add dev br0 up type bridge mcast_snooping 0 \
vlan_filtering 1 vlan_default_pvid 0
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
ip link set dev $swp1 master br0
# The test will use the offload indication on the FDB entry to
# understand if the tunnel is offloaded or not
- bridge fdb append $zmac dev vxlan0 self dst 192.0.2.1
+ bridge fdb append $zmac dev vxlan0 self dst $LOCAL_IP_2
ip link set dev vxlan0 master br0
bridge vlan add dev vxlan0 vid 10 pvid untagged
diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto.sh
index 749ba3cfda1d..38148f51877a 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto.sh
@@ -4,6 +4,21 @@
# Test vetoing of FDB entries that mlxsw can not offload. This exercises several
# different veto vectors to test various rollback scenarios in the vxlan driver.
+: ${LOCAL_IP:=198.51.100.1}
+export LOCAL_IP
+
+: ${REMOTE_IP_1:=198.51.100.2}
+export REMOTE_IP_1
+
+: ${REMOTE_IP_2:=198.51.100.3}
+export REMOTE_IP_2
+
+: ${UDPCSUM_FLAFS:=noudpcsum}
+export UDPCSUM_FLAFS
+
+: ${MC_IP:=224.0.0.1}
+export MC_IP
+
lib_dir=$(dirname $0)/../../../net/forwarding
ALL_TESTS="
@@ -26,8 +41,8 @@ setup_prepare()
ip link set dev $swp1 master br0
ip link set dev $swp2 up
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP dstport 4789
ip link set dev vxlan0 master br0
}
@@ -50,11 +65,11 @@ fdb_create_veto_test()
RET=0
bridge fdb add 01:02:03:04:05:06 dev vxlan0 self static \
- dst 198.51.100.2 2>/dev/null
+ dst $REMOTE_IP_1 2>/dev/null
check_fail $? "multicast MAC not rejected"
bridge fdb add 01:02:03:04:05:06 dev vxlan0 self static \
- dst 198.51.100.2 2>&1 >/dev/null | grep -q mlxsw_spectrum
+ dst $REMOTE_IP_1 2>&1 >/dev/null | grep -q mlxsw_spectrum
check_err $? "multicast MAC rejected without extack"
log_test "vxlan FDB veto - create"
@@ -65,15 +80,15 @@ fdb_replace_veto_test()
RET=0
bridge fdb add 00:01:02:03:04:05 dev vxlan0 self static \
- dst 198.51.100.2
+ dst $REMOTE_IP_1
check_err $? "valid FDB rejected"
bridge fdb replace 00:01:02:03:04:05 dev vxlan0 self static \
- dst 198.51.100.2 port 1234 2>/dev/null
+ dst $REMOTE_IP_1 port 1234 2>/dev/null
check_fail $? "FDB with an explicit port not rejected"
bridge fdb replace 00:01:02:03:04:05 dev vxlan0 self static \
- dst 198.51.100.2 port 1234 2>&1 >/dev/null \
+ dst $REMOTE_IP_1 port 1234 2>&1 >/dev/null \
| grep -q mlxsw_spectrum
check_err $? "FDB with an explicit port rejected without extack"
@@ -85,15 +100,15 @@ fdb_append_veto_test()
RET=0
bridge fdb add 00:00:00:00:00:00 dev vxlan0 self static \
- dst 198.51.100.2
+ dst $REMOTE_IP_1
check_err $? "valid FDB rejected"
bridge fdb append 00:00:00:00:00:00 dev vxlan0 self static \
- dst 198.51.100.3 port 1234 2>/dev/null
+ dst $REMOTE_IP_2 port 1234 2>/dev/null
check_fail $? "FDB with an explicit port not rejected"
bridge fdb append 00:00:00:00:00:00 dev vxlan0 self static \
- dst 198.51.100.3 port 1234 2>&1 >/dev/null \
+ dst $REMOTE_IP_2 port 1234 2>&1 >/dev/null \
| grep -q mlxsw_spectrum
check_err $? "FDB with an explicit port rejected without extack"
@@ -105,11 +120,11 @@ fdb_changelink_veto_test()
RET=0
ip link set dev vxlan0 type vxlan \
- group 224.0.0.1 dev lo 2>/dev/null
+ group $MC_IP dev lo 2>/dev/null
check_fail $? "FDB with a multicast IP not rejected"
ip link set dev vxlan0 type vxlan \
- group 224.0.0.1 dev lo 2>&1 >/dev/null \
+ group $MC_IP dev lo 2>&1 >/dev/null \
| grep -q mlxsw_spectrum
check_err $? "FDB with a multicast IP rejected without extack"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto_ipv6.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto_ipv6.sh
new file mode 100755
index 000000000000..66c87aab86f6
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto_ipv6.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# A wrapper to run VXLAN test for IPv6.
+
+LOCAL_IP=2001:db8:1::1
+REMOTE_IP_1=2001:db8:2::1
+REMOTE_IP_2=2001:db8:3::1
+UDPCSUM_FLAFS="udp6zerocsumrx udp6zerocsumtx"
+MC_IP=FF02::2
+
+source vxlan_fdb_veto.sh
diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan_ipv6.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan_ipv6.sh
new file mode 100755
index 000000000000..f2ea0163ddea
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan_ipv6.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# A wrapper to run VXLAN test for IPv6.
+
+ADDR_FAMILY=ipv6
+LOCAL_IP_1=2001:db8:1::1
+LOCAL_IP_2=2001:db8:1::2
+PREFIX_LEN=128
+UDPCSUM_FLAFS="udp6zerocsumrx udp6zerocsumtx"
+MC_IP=FF02::2
+IP_FLAG="-6"
+
+ALL_TESTS="
+ sanitization_test
+ offload_indication_test
+ sanitization_vlan_aware_test
+ offload_indication_vlan_aware_test
+"
+
+sanitization_single_dev_learning_enabled_ipv6_test()
+{
+ RET=0
+
+ ip link add dev br0 type bridge mcast_snooping 0
+
+ ip link add name vxlan0 up type vxlan id 10 learning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
+
+ sanitization_single_dev_test_fail
+
+ ip link del dev vxlan0
+ ip link del dev br0
+
+ log_test "vxlan device with learning enabled"
+}
+
+sanitization_single_dev_udp_checksum_ipv6_test()
+{
+ RET=0
+
+ ip link add dev br0 type bridge mcast_snooping 0
+
+ ip link add name vxlan0 up type vxlan id 10 nolearning \
+ noudp6zerocsumrx udp6zerocsumtx ttl 20 tos inherit \
+ local $LOCAL_IP_1 dstport 4789
+
+ sanitization_single_dev_test_fail
+ log_test "vxlan device without zero udp checksum at RX"
+
+ ip link del dev vxlan0
+
+ ip link add name vxlan0 up type vxlan id 10 nolearning \
+ udp6zerocsumrx noudp6zerocsumtx ttl 20 tos inherit \
+ local $LOCAL_IP_1 dstport 4789
+
+ sanitization_single_dev_test_fail
+ log_test "vxlan device without zero udp checksum at TX"
+
+ ip link del dev vxlan0
+ ip link del dev br0
+
+}
+
+source vxlan.sh
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc b/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc
index 98166fa3eb91..34fb89b0c61f 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc
@@ -1,6 +1,6 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
-# description: Kprobe dynamic event - adding and removing
+# description: Kprobe profile
# requires: kprobe_events
! grep -q 'myevent' kprobe_profile
diff --git a/tools/testing/selftests/gpio/.gitignore b/tools/testing/selftests/gpio/.gitignore
index a4969f7ee020..ededb077a3a6 100644
--- a/tools/testing/selftests/gpio/.gitignore
+++ b/tools/testing/selftests/gpio/.gitignore
@@ -1,2 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
gpio-mockup-cdev
+gpio-chip-info
+gpio-line-name
diff --git a/tools/testing/selftests/gpio/Makefile b/tools/testing/selftests/gpio/Makefile
index d7b312b44a62..71b306602368 100644
--- a/tools/testing/selftests/gpio/Makefile
+++ b/tools/testing/selftests/gpio/Makefile
@@ -1,8 +1,8 @@
# SPDX-License-Identifier: GPL-2.0
-TEST_PROGS := gpio-mockup.sh
+TEST_PROGS := gpio-mockup.sh gpio-sim.sh
TEST_FILES := gpio-mockup-sysfs.sh
-TEST_GEN_PROGS_EXTENDED := gpio-mockup-cdev
+TEST_GEN_PROGS_EXTENDED := gpio-mockup-cdev gpio-chip-info gpio-line-name
CFLAGS += -O2 -g -Wall -I../../../../usr/include/
include ../lib.mk
diff --git a/tools/testing/selftests/gpio/config b/tools/testing/selftests/gpio/config
index ce100342c20b..409a8532facc 100644
--- a/tools/testing/selftests/gpio/config
+++ b/tools/testing/selftests/gpio/config
@@ -1,3 +1,4 @@
CONFIG_GPIOLIB=y
CONFIG_GPIO_CDEV=y
CONFIG_GPIO_MOCKUP=m
+CONFIG_GPIO_SIM=m
diff --git a/tools/testing/selftests/gpio/gpio-chip-info.c b/tools/testing/selftests/gpio/gpio-chip-info.c
new file mode 100644
index 000000000000..fdc07e742fba
--- /dev/null
+++ b/tools/testing/selftests/gpio/gpio-chip-info.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * GPIO character device helper for reading chip information.
+ *
+ * Copyright (C) 2021 Bartosz Golaszewski <brgl@bgdev.pl>
+ */
+
+#include <fcntl.h>
+#include <linux/gpio.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+
+static void print_usage(void)
+{
+ printf("usage:\n");
+ printf(" gpio-chip-info <chip path> [name|label|num-lines]\n");
+}
+
+int main(int argc, char **argv)
+{
+ struct gpiochip_info info;
+ int fd, ret;
+
+ if (argc != 3) {
+ print_usage();
+ return EXIT_FAILURE;
+ }
+
+ fd = open(argv[1], O_RDWR);
+ if (fd < 0) {
+ perror("unable to open the GPIO chip");
+ return EXIT_FAILURE;
+ }
+
+ memset(&info, 0, sizeof(info));
+ ret = ioctl(fd, GPIO_GET_CHIPINFO_IOCTL, &info);
+ if (ret) {
+ perror("chip info ioctl failed");
+ return EXIT_FAILURE;
+ }
+
+ if (strcmp(argv[2], "name") == 0) {
+ printf("%s\n", info.name);
+ } else if (strcmp(argv[2], "label") == 0) {
+ printf("%s\n", info.label);
+ } else if (strcmp(argv[2], "num-lines") == 0) {
+ printf("%u\n", info.lines);
+ } else {
+ fprintf(stderr, "unknown command: %s\n", argv[2]);
+ return EXIT_FAILURE;
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/tools/testing/selftests/gpio/gpio-line-name.c b/tools/testing/selftests/gpio/gpio-line-name.c
new file mode 100644
index 000000000000..e635cfadbded
--- /dev/null
+++ b/tools/testing/selftests/gpio/gpio-line-name.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * GPIO character device helper for reading line names.
+ *
+ * Copyright (C) 2021 Bartosz Golaszewski <brgl@bgdev.pl>
+ */
+
+#include <fcntl.h>
+#include <linux/gpio.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+
+static void print_usage(void)
+{
+ printf("usage:\n");
+ printf(" gpio-line-name <chip path> <line offset>\n");
+}
+
+int main(int argc, char **argv)
+{
+ struct gpio_v2_line_info info;
+ int fd, ret;
+ char *endp;
+
+ if (argc != 3) {
+ print_usage();
+ return EXIT_FAILURE;
+ }
+
+ fd = open(argv[1], O_RDWR);
+ if (fd < 0) {
+ perror("unable to open the GPIO chip");
+ return EXIT_FAILURE;
+ }
+
+ memset(&info, 0, sizeof(info));
+ info.offset = strtoul(argv[2], &endp, 10);
+ if (*endp != '\0') {
+ print_usage();
+ return EXIT_FAILURE;
+ }
+
+ ret = ioctl(fd, GPIO_V2_GET_LINEINFO_IOCTL, &info);
+ if (ret) {
+ perror("line info ioctl failed");
+ return EXIT_FAILURE;
+ }
+
+ printf("%s\n", info.name);
+
+ return EXIT_SUCCESS;
+}
diff --git a/tools/testing/selftests/gpio/gpio-sim.sh b/tools/testing/selftests/gpio/gpio-sim.sh
new file mode 100755
index 000000000000..341e3de00896
--- /dev/null
+++ b/tools/testing/selftests/gpio/gpio-sim.sh
@@ -0,0 +1,396 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2021 Bartosz Golaszewski <brgl@bgdev.pl>
+
+BASE_DIR=`dirname $0`
+CONFIGFS_DIR="/sys/kernel/config/gpio-sim"
+MODULE="gpio-sim"
+
+fail() {
+ echo "$*" >&2
+ echo "GPIO $MODULE test FAIL"
+ exit 1
+}
+
+skip() {
+ echo "$*" >&2
+ echo "GPIO $MODULE test SKIP"
+ exit 4
+}
+
+remove_chip() {
+ local CHIP=$1
+
+ for FILE in $CONFIGFS_DIR/$CHIP/*; do
+ BANK=`basename $FILE`
+ if [ "$BANK" = "live" -o "$BANK" = "dev_name" ]; then
+ continue
+ fi
+
+ LINES=`ls $CONFIGFS_DIR/$CHIP/$BANK/ | egrep ^line`
+ if [ "$?" = 0 ]; then
+ for LINE in $LINES; do
+ if [ -e $CONFIGFS_DIR/$CHIP/$BANK/$LINE/hog ]; then
+ rmdir $CONFIGFS_DIR/$CHIP/$BANK/$LINE/hog || \
+ fail "Unable to remove the hog"
+ fi
+
+ rmdir $CONFIGFS_DIR/$CHIP/$BANK/$LINE || \
+ fail "Unable to remove the line"
+ done
+ fi
+
+ rmdir $CONFIGFS_DIR/$CHIP/$BANK
+ done
+
+ rmdir $CONFIGFS_DIR/$CHIP || fail "Unable to remove the chip"
+}
+
+configfs_cleanup() {
+ for CHIP in `ls $CONFIGFS_DIR/`; do
+ remove_chip $CHIP
+ done
+}
+
+create_chip() {
+ local CHIP=$1
+
+ mkdir $CONFIGFS_DIR/$CHIP
+}
+
+create_bank() {
+ local CHIP=$1
+ local BANK=$2
+
+ mkdir $CONFIGFS_DIR/$CHIP/$BANK
+}
+
+set_label() {
+ local CHIP=$1
+ local BANK=$2
+ local LABEL=$3
+
+ echo $LABEL > $CONFIGFS_DIR/$CHIP/$BANK/label || fail "Unable to set the chip label"
+}
+
+set_num_lines() {
+ local CHIP=$1
+ local BANK=$2
+ local NUM_LINES=$3
+
+ echo $NUM_LINES > $CONFIGFS_DIR/$CHIP/$BANK/num_lines || \
+ fail "Unable to set the number of lines"
+}
+
+set_line_name() {
+ local CHIP=$1
+ local BANK=$2
+ local OFFSET=$3
+ local NAME=$4
+ local LINE_DIR=$CONFIGFS_DIR/$CHIP/$BANK/line$OFFSET
+
+ test -d $LINE_DIR || mkdir $LINE_DIR
+ echo $NAME > $LINE_DIR/name || fail "Unable to set the line name"
+}
+
+enable_chip() {
+ local CHIP=$1
+
+ echo 1 > $CONFIGFS_DIR/$CHIP/live || fail "Unable to enable the chip"
+}
+
+disable_chip() {
+ local CHIP=$1
+
+ echo 0 > $CONFIGFS_DIR/$CHIP/live || fail "Unable to disable the chip"
+}
+
+configfs_chip_name() {
+ local CHIP=$1
+ local BANK=$2
+
+ cat $CONFIGFS_DIR/$CHIP/$BANK/chip_name 2> /dev/null || \
+ fail "unable to read the chip name from configfs"
+}
+
+configfs_dev_name() {
+ local CHIP=$1
+
+ cat $CONFIGFS_DIR/$CHIP/dev_name 2> /dev/null || \
+ fail "unable to read the device name from configfs"
+}
+
+get_chip_num_lines() {
+ local CHIP=$1
+ local BANK=$2
+
+ $BASE_DIR/gpio-chip-info /dev/`configfs_chip_name $CHIP $BANK` num-lines || \
+ fail "unable to read the number of lines from the character device"
+}
+
+get_chip_label() {
+ local CHIP=$1
+ local BANK=$2
+
+ $BASE_DIR/gpio-chip-info /dev/`configfs_chip_name $CHIP $BANK` label || \
+ fail "unable to read the chip label from the character device"
+}
+
+get_line_name() {
+ local CHIP=$1
+ local BANK=$2
+ local OFFSET=$3
+
+ $BASE_DIR/gpio-line-name /dev/`configfs_chip_name $CHIP $BANK` $OFFSET || \
+ fail "unable to read the line name from the character device"
+}
+
+sysfs_set_pull() {
+ local DEV=$1
+ local BANK=$2
+ local OFFSET=$3
+ local PULL=$4
+ local DEVNAME=`configfs_dev_name $DEV`
+ local CHIPNAME=`configfs_chip_name $DEV $BANK`
+ local SYSFSPATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio$OFFSET/pull"
+
+ echo $PULL > $SYSFSPATH || fail "Unable to set line pull in sysfs"
+}
+
+# Load the gpio-sim module. This will pull in configfs if needed too.
+modprobe gpio-sim || skip "unable to load the gpio-sim module"
+# Make sure configfs is mounted at /sys/kernel/config. Wait a bit if needed.
+for IDX in `seq 5`; do
+ if [ "$IDX" -eq "5" ]; then
+ skip "configfs not mounted at /sys/kernel/config"
+ fi
+
+ mountpoint -q /sys/kernel/config && break
+ sleep 0.1
+done
+# If the module was already loaded: remove all previous chips
+configfs_cleanup
+
+trap "exit 1" SIGTERM SIGINT
+trap configfs_cleanup EXIT
+
+echo "1. chip_name and dev_name attributes"
+
+echo "1.1. Chip name is communicated to user"
+create_chip chip
+create_bank chip bank
+enable_chip chip
+test -n `cat $CONFIGFS_DIR/chip/bank/chip_name` || fail "chip_name doesn't work"
+remove_chip chip
+
+echo "1.2. chip_name returns 'none' if the chip is still pending"
+create_chip chip
+create_bank chip bank
+test "`cat $CONFIGFS_DIR/chip/bank/chip_name`" = "none" || \
+ fail "chip_name doesn't return 'none' for a pending chip"
+remove_chip chip
+
+echo "1.3. Device name is communicated to user"
+create_chip chip
+create_bank chip bank
+enable_chip chip
+test -n `cat $CONFIGFS_DIR/chip/dev_name` || fail "dev_name doesn't work"
+remove_chip chip
+
+echo "2. Creating and configuring simulated chips"
+
+echo "2.1. Default number of lines is 1"
+create_chip chip
+create_bank chip bank
+enable_chip chip
+test "`get_chip_num_lines chip bank`" = "1" || fail "default number of lines is not 1"
+remove_chip chip
+
+echo "2.2. Number of lines can be specified"
+create_chip chip
+create_bank chip bank
+set_num_lines chip bank 16
+enable_chip chip
+test "`get_chip_num_lines chip bank`" = "16" || fail "number of lines is not 16"
+remove_chip chip
+
+echo "2.3. Label can be set"
+create_chip chip
+create_bank chip bank
+set_label chip bank foobar
+enable_chip chip
+test "`get_chip_label chip bank`" = "foobar" || fail "label is incorrect"
+remove_chip chip
+
+echo "2.4. Label can be left empty"
+create_chip chip
+create_bank chip bank
+enable_chip chip
+test -z "`cat $CONFIGFS_DIR/chip/bank/label`" || fail "label is not empty"
+remove_chip chip
+
+echo "2.5. Line names can be configured"
+create_chip chip
+create_bank chip bank
+set_num_lines chip bank 16
+set_line_name chip bank 0 foo
+set_line_name chip bank 2 bar
+enable_chip chip
+test "`get_line_name chip bank 0`" = "foo" || fail "line name is incorrect"
+test "`get_line_name chip bank 2`" = "bar" || fail "line name is incorrect"
+remove_chip chip
+
+echo "2.6. Line config can remain unused if offset is greater than number of lines"
+create_chip chip
+create_bank chip bank
+set_num_lines chip bank 2
+set_line_name chip bank 5 foobar
+enable_chip chip
+test "`get_line_name chip bank 0`" = "" || fail "line name is incorrect"
+test "`get_line_name chip bank 1`" = "" || fail "line name is incorrect"
+remove_chip chip
+
+echo "2.7. Line configfs directory names are sanitized"
+create_chip chip
+create_bank chip bank
+mkdir $CONFIGFS_DIR/chip/bank/line12foobar 2> /dev/null && \
+ fail "invalid configfs line name accepted"
+mkdir $CONFIGFS_DIR/chip/bank/line_no_offset 2> /dev/null && \
+ fail "invalid configfs line name accepted"
+remove_chip chip
+
+echo "2.8. Multiple chips can be created"
+CHIPS="chip0 chip1 chip2"
+for CHIP in $CHIPS; do
+ create_chip $CHIP
+ create_bank $CHIP bank
+ enable_chip $CHIP
+done
+for CHIP in $CHIPS; do
+ remove_chip $CHIP
+done
+
+echo "2.9. Can't modify settings when chip is live"
+create_chip chip
+create_bank chip bank
+enable_chip chip
+echo foobar > $CONFIGFS_DIR/chip/bank/label 2> /dev/null && \
+ fail "Setting label of a live chip should fail"
+echo 8 > $CONFIGFS_DIR/chip/bank/num_lines 2> /dev/null && \
+ fail "Setting number of lines of a live chip should fail"
+remove_chip chip
+
+echo "2.10. Can't create line items when chip is live"
+create_chip chip
+create_bank chip bank
+enable_chip chip
+mkdir $CONFIGFS_DIR/chip/bank/line0 2> /dev/null && fail "Creating line item should fail"
+remove_chip chip
+
+echo "2.11. Probe errors are propagated to user-space"
+create_chip chip
+create_bank chip bank
+set_num_lines chip bank 99999
+echo 1 > $CONFIGFS_DIR/chip/live 2> /dev/null && fail "Probe error was not propagated"
+remove_chip chip
+
+echo "2.12. Cannot enable a chip without any GPIO banks"
+create_chip chip
+echo 1 > $CONFIGFS_DIR/chip/live 2> /dev/null && fail "Chip enabled without any GPIO banks"
+remove_chip chip
+
+echo "2.13. Duplicate chip labels are not allowed"
+create_chip chip
+create_bank chip bank0
+set_label chip bank0 foobar
+create_bank chip bank1
+set_label chip bank1 foobar
+echo 1 > $CONFIGFS_DIR/chip/live 2> /dev/null && fail "Duplicate chip labels were not rejected"
+remove_chip chip
+
+echo "2.14. Lines can be hogged"
+create_chip chip
+create_bank chip bank
+set_num_lines chip bank 8
+mkdir -p $CONFIGFS_DIR/chip/bank/line4/hog
+enable_chip chip
+$BASE_DIR/gpio-mockup-cdev -s 1 /dev/`configfs_chip_name chip bank` 4 2> /dev/null && \
+ fail "Setting the value of a hogged line shouldn't succeed"
+remove_chip chip
+
+echo "3. Controlling simulated chips"
+
+echo "3.1. Pull can be set over sysfs"
+create_chip chip
+create_bank chip bank
+set_num_lines chip bank 8
+enable_chip chip
+sysfs_set_pull chip bank 0 pull-up
+$BASE_DIR/gpio-mockup-cdev /dev/`configfs_chip_name chip bank` 0
+test "$?" = "1" || fail "pull set incorrectly"
+sysfs_set_pull chip bank 0 pull-down
+$BASE_DIR/gpio-mockup-cdev /dev/`configfs_chip_name chip bank` 1
+test "$?" = "0" || fail "pull set incorrectly"
+remove_chip chip
+
+echo "3.2. Pull can be read from sysfs"
+create_chip chip
+create_bank chip bank
+set_num_lines chip bank 8
+enable_chip chip
+DEVNAME=`configfs_dev_name chip`
+CHIPNAME=`configfs_chip_name chip bank`
+SYSFS_PATH=/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/pull
+test `cat $SYSFS_PATH` = "pull-down" || fail "reading the pull failed"
+sysfs_set_pull chip bank 0 pull-up
+test `cat $SYSFS_PATH` = "pull-up" || fail "reading the pull failed"
+remove_chip chip
+
+echo "3.3. Incorrect input in sysfs is rejected"
+create_chip chip
+create_bank chip bank
+set_num_lines chip bank 8
+enable_chip chip
+DEVNAME=`configfs_dev_name chip`
+CHIPNAME=`configfs_chip_name chip bank`
+SYSFS_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/pull"
+echo foobar > $SYSFS_PATH 2> /dev/null && fail "invalid input not detected"
+remove_chip chip
+
+echo "3.4. Can't write to value"
+create_chip chip
+create_bank chip bank
+enable_chip chip
+DEVNAME=`configfs_dev_name chip`
+CHIPNAME=`configfs_chip_name chip bank`
+SYSFS_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/value"
+echo 1 > $SYSFS_PATH 2> /dev/null && fail "writing to 'value' succeeded unexpectedly"
+remove_chip chip
+
+echo "4. Simulated GPIO chips are functional"
+
+echo "4.1. Values can be read from sysfs"
+create_chip chip
+create_bank chip bank
+set_num_lines chip bank 8
+enable_chip chip
+DEVNAME=`configfs_dev_name chip`
+CHIPNAME=`configfs_chip_name chip bank`
+SYSFS_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/value"
+test `cat $SYSFS_PATH` = "0" || fail "incorrect value read from sysfs"
+$BASE_DIR/gpio-mockup-cdev -s 1 /dev/`configfs_chip_name chip bank` 0 &
+sleep 0.1 # FIXME Any better way?
+test `cat $SYSFS_PATH` = "1" || fail "incorrect value read from sysfs"
+kill $!
+remove_chip chip
+
+echo "4.2. Bias settings work correctly"
+create_chip chip
+create_bank chip bank
+set_num_lines chip bank 8
+enable_chip chip
+$BASE_DIR/gpio-mockup-cdev -b pull-up /dev/`configfs_chip_name chip bank` 0
+test `cat $SYSFS_PATH` = "1" || fail "bias setting does not work"
+remove_chip chip
+
+echo "GPIO $MODULE test PASS"
diff --git a/tools/testing/selftests/ir/ir_loopback.c b/tools/testing/selftests/ir/ir_loopback.c
index af7f9c7d59bc..06256c96df12 100644
--- a/tools/testing/selftests/ir/ir_loopback.c
+++ b/tools/testing/selftests/ir/ir_loopback.c
@@ -26,7 +26,6 @@
#include "../kselftest.h"
#define TEST_SCANCODES 10
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
#define SYSFS_PATH_MAX 256
#define DNAME_PATH_MAX 256
diff --git a/tools/testing/selftests/kexec/Makefile b/tools/testing/selftests/kexec/Makefile
index aa91d2063249..806a150648c3 100644
--- a/tools/testing/selftests/kexec/Makefile
+++ b/tools/testing/selftests/kexec/Makefile
@@ -4,7 +4,7 @@
uname_M := $(shell uname -m 2>/dev/null || echo not)
ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
-ifeq ($(ARCH),x86)
+ifeq ($(ARCH),$(filter $(ARCH),x86 ppc64le))
TEST_PROGS := test_kexec_load.sh test_kexec_file_load.sh
TEST_FILES := kexec_common_lib.sh
diff --git a/tools/testing/selftests/kexec/kexec_common_lib.sh b/tools/testing/selftests/kexec/kexec_common_lib.sh
index 43017cfe88f7..0e114b34d5d7 100755
--- a/tools/testing/selftests/kexec/kexec_common_lib.sh
+++ b/tools/testing/selftests/kexec/kexec_common_lib.sh
@@ -91,6 +91,27 @@ get_efi_var_secureboot_mode()
return 0;
}
+# On powerpc platform, check device-tree property
+# /proc/device-tree/ibm,secureboot/os-secureboot-enforcing
+# to detect secureboot state.
+get_ppc64_secureboot_mode()
+{
+ local secure_boot_file="/proc/device-tree/ibm,secureboot/os-secureboot-enforcing"
+ # Check for secure boot file existence
+ if [ -f $secure_boot_file ]; then
+ log_info "Secureboot is enabled (Device tree)"
+ return 1;
+ fi
+ log_info "Secureboot is not enabled (Device tree)"
+ return 0;
+}
+
+# Return the architecture of the system
+get_arch()
+{
+ echo $(arch)
+}
+
# Check efivar SecureBoot-$(the UUID) and SetupMode-$(the UUID).
# The secure boot mode can be accessed either as the last integer
# of "od -An -t u1 /sys/firmware/efi/efivars/SecureBoot-*" or from
@@ -100,14 +121,19 @@ get_efi_var_secureboot_mode()
get_secureboot_mode()
{
local secureboot_mode=0
+ local system_arch=$(get_arch)
- get_efivarfs_secureboot_mode
- secureboot_mode=$?
-
- # fallback to using the efi_var files
- if [ $secureboot_mode -eq 0 ]; then
- get_efi_var_secureboot_mode
+ if [ "$system_arch" == "ppc64le" ]; then
+ get_ppc64_secureboot_mode
+ secureboot_mode=$?
+ else
+ get_efivarfs_secureboot_mode
secureboot_mode=$?
+ # fallback to using the efi_var files
+ if [ $secureboot_mode -eq 0 ]; then
+ get_efi_var_secureboot_mode
+ secureboot_mode=$?
+ fi
fi
if [ $secureboot_mode -eq 0 ]; then
@@ -138,15 +164,20 @@ kconfig_enabled()
return 0
}
-# Attempt to get the kernel config first via proc, and then by
-# extracting it from the kernel image or the configs.ko using
-# scripts/extract-ikconfig.
+# Attempt to get the kernel config first by checking the modules directory
+# then via proc, and finally by extracting it from the kernel image or the
+# configs.ko using scripts/extract-ikconfig.
# Return 1 for found.
get_kconfig()
{
local proc_config="/proc/config.gz"
local module_dir="/lib/modules/`uname -r`"
- local configs_module="$module_dir/kernel/kernel/configs.ko"
+ local configs_module="$module_dir/kernel/kernel/configs.ko*"
+
+ if [ -f $module_dir/config ]; then
+ IKCONFIG=$module_dir/config
+ return 1
+ fi
if [ ! -f $proc_config ]; then
modprobe configs > /dev/null 2>&1
diff --git a/tools/testing/selftests/kexec/test_kexec_file_load.sh b/tools/testing/selftests/kexec/test_kexec_file_load.sh
index 2ff600388c30..c9ccb3c93d72 100755
--- a/tools/testing/selftests/kexec/test_kexec_file_load.sh
+++ b/tools/testing/selftests/kexec/test_kexec_file_load.sh
@@ -97,10 +97,11 @@ check_for_imasig()
check_for_modsig()
{
local module_sig_string="~Module signature appended~"
- local sig="$(tail --bytes $((${#module_sig_string} + 1)) $KERNEL_IMAGE)"
local ret=0
- if [ "$sig" == "$module_sig_string" ]; then
+ tail --bytes $((${#module_sig_string} + 1)) $KERNEL_IMAGE | \
+ grep -q "$module_sig_string"
+ if [ $? -eq 0 ]; then
ret=1
log_info "kexec kernel image modsig signed"
else
@@ -225,8 +226,12 @@ get_secureboot_mode
secureboot=$?
# Are there pe and ima signatures
-check_for_pesig
-pe_signed=$?
+if [ "$(get_arch)" == 'ppc64le' ]; then
+ pe_signed=0
+else
+ check_for_pesig
+ pe_signed=$?
+fi
check_for_imasig
ima_signed=$?
diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h
index 8d50483fe204..f1180987492c 100644
--- a/tools/testing/selftests/kselftest.h
+++ b/tools/testing/selftests/kselftest.h
@@ -48,6 +48,10 @@
#include <stdarg.h>
#include <stdio.h>
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#endif
+
/* define kselftest exit codes */
#define KSFT_PASS 0
#define KSFT_FAIL 1
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index ae0f0f33b2a6..471eaa7b3a3f 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -671,7 +671,9 @@
#define EXPECT_STRNE(expected, seen) \
__EXPECT_STR(expected, seen, !=, 0)
+#ifndef ARRAY_SIZE
#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
+#endif
/* Support an optional handler after and ASSERT_* or EXPECT_*. The approach is
* not thread-safe, but it should be fine in most sane test scenarios.
@@ -969,7 +971,7 @@ void __run_test(struct __fixture_metadata *f,
t->passed = 1;
t->skip = 0;
t->trigger = 0;
- t->step = 0;
+ t->step = 1;
t->no_print = 0;
memset(t->results->reason, 0, sizeof(t->results->reason));
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 3763105029fb..dce7de7755e6 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -4,14 +4,16 @@
/aarch64/get-reg-list
/aarch64/psci_cpu_on_test
/aarch64/vgic_init
+/aarch64/vgic_irq
/s390x/memop
/s390x/resets
/s390x/sync_regs_test
+/x86_64/amx_test
+/x86_64/cpuid_test
/x86_64/cr4_cpuid_sync_test
/x86_64/debug_regs
/x86_64/evmcs_test
/x86_64/emulator_error_test
-/x86_64/get_cpuid_test
/x86_64/get_msr_index_features
/x86_64/kvm_clock_test
/x86_64/kvm_pv_test
@@ -21,6 +23,7 @@
/x86_64/mmio_warning_test
/x86_64/mmu_role_test
/x86_64/platform_info_test
+/x86_64/pmu_event_filter_test
/x86_64/set_boot_cpu_id
/x86_64/set_sregs_test
/x86_64/sev_migrate_tests
@@ -30,10 +33,13 @@
/x86_64/svm_int_ctl_test
/x86_64/sync_regs_test
/x86_64/tsc_msrs_test
+/x86_64/userspace_io_test
/x86_64/userspace_msr_exit_test
/x86_64/vmx_apic_access_test
/x86_64/vmx_close_while_nested_test
/x86_64/vmx_dirty_log_test
+/x86_64/vmx_exception_with_invalid_guest_state
+/x86_64/vmx_invalid_nested_guest_state
/x86_64/vmx_preemption_timer_test
/x86_64/vmx_set_nested_state_test
/x86_64/vmx_tsc_adjust_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index c4e34717826a..0e4926bc9a58 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -32,17 +32,22 @@ endif
ifeq ($(ARCH),s390)
UNAME_M := s390x
endif
+# Set UNAME_M riscv compile/install to work
+ifeq ($(ARCH),riscv)
+ UNAME_M := riscv
+endif
LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/rbtree.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c lib/perf_test_util.c
LIBKVM_x86_64 = lib/x86_64/apic.c lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S
LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c lib/aarch64/handlers.S lib/aarch64/spinlock.c lib/aarch64/gic.c lib/aarch64/gic_v3.c lib/aarch64/vgic.c
LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c
+LIBKVM_riscv = lib/riscv/processor.c lib/riscv/ucall.c
-TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test
+TEST_GEN_PROGS_x86_64 = x86_64/cpuid_test
+TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test
TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features
TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
TEST_GEN_PROGS_x86_64 += x86_64/emulator_error_test
-TEST_GEN_PROGS_x86_64 += x86_64/get_cpuid_test
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_features
@@ -51,6 +56,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test
TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test
TEST_GEN_PROGS_x86_64 += x86_64/mmu_role_test
TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test
+TEST_GEN_PROGS_x86_64 += x86_64/pmu_event_filter_test
TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id
TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test
TEST_GEN_PROGS_x86_64 += x86_64/smm_test
@@ -59,10 +65,13 @@ TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test
TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test
TEST_GEN_PROGS_x86_64 += x86_64/svm_int_ctl_test
TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test
+TEST_GEN_PROGS_x86_64 += x86_64/userspace_io_test
TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test
+TEST_GEN_PROGS_x86_64 += x86_64/vmx_exception_with_invalid_guest_state
+TEST_GEN_PROGS_x86_64 += x86_64/vmx_invalid_nested_guest_state
TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_nested_tsc_scaling_test
@@ -75,6 +84,8 @@ TEST_GEN_PROGS_x86_64 += x86_64/xen_shinfo_test
TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_pi_mmio_test
TEST_GEN_PROGS_x86_64 += x86_64/sev_migrate_tests
+TEST_GEN_PROGS_x86_64 += x86_64/amx_test
+TEST_GEN_PROGS_x86_64 += access_tracking_perf_test
TEST_GEN_PROGS_x86_64 += demand_paging_test
TEST_GEN_PROGS_x86_64 += dirty_log_test
TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
@@ -94,6 +105,7 @@ TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions
TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list
TEST_GEN_PROGS_aarch64 += aarch64/psci_cpu_on_test
TEST_GEN_PROGS_aarch64 += aarch64/vgic_init
+TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq
TEST_GEN_PROGS_aarch64 += demand_paging_test
TEST_GEN_PROGS_aarch64 += dirty_log_test
TEST_GEN_PROGS_aarch64 += dirty_log_perf_test
@@ -117,6 +129,13 @@ TEST_GEN_PROGS_s390x += rseq_test
TEST_GEN_PROGS_s390x += set_memory_region_test
TEST_GEN_PROGS_s390x += kvm_binary_stats_test
+TEST_GEN_PROGS_riscv += demand_paging_test
+TEST_GEN_PROGS_riscv += dirty_log_test
+TEST_GEN_PROGS_riscv += kvm_create_max_vcpus
+TEST_GEN_PROGS_riscv += kvm_page_table_test
+TEST_GEN_PROGS_riscv += set_memory_region_test
+TEST_GEN_PROGS_riscv += kvm_binary_stats_test
+
TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M))
LIBKVM += $(LIBKVM_$(UNAME_M))
@@ -131,7 +150,7 @@ endif
CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
-fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \
-I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \
- -I$(<D) -Iinclude/$(UNAME_M) -I..
+ -I$(<D) -Iinclude/$(UNAME_M) -I.. $(EXTRA_CFLAGS)
no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \
$(CC) -Werror -no-pie -x c - -o "$$TMP", -no-pie)
diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c
index bf6a45b0b8dc..9ad38bd360a4 100644
--- a/tools/testing/selftests/kvm/aarch64/arch_timer.c
+++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c
@@ -382,7 +382,7 @@ static struct kvm_vm *test_vm_create(void)
ucall_init(vm, NULL);
test_init_timer_irq(vm);
- vgic_v3_setup(vm, nr_vcpus, GICD_BASE_GPA, GICR_BASE_GPA);
+ vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA);
/* Make all the test's cmdline args visible to the guest */
sync_global_to_guest(vm, test_args);
diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
index cc898181faab..f769fc6cd927 100644
--- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c
+++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
@@ -1014,6 +1014,22 @@ static __u64 sve_rejects_set[] = {
KVM_REG_ARM64_SVE_VLS,
};
+static __u64 pauth_addr_regs[] = {
+ ARM64_SYS_REG(3, 0, 2, 1, 0), /* APIAKEYLO_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 1, 1), /* APIAKEYHI_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 1, 2), /* APIBKEYLO_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 1, 3), /* APIBKEYHI_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 2, 0), /* APDAKEYLO_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 2, 1), /* APDAKEYHI_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 2, 2), /* APDBKEYLO_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 2, 3) /* APDBKEYHI_EL1 */
+};
+
+static __u64 pauth_generic_regs[] = {
+ ARM64_SYS_REG(3, 0, 2, 3, 0), /* APGAKEYLO_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 3, 1), /* APGAKEYHI_EL1 */
+};
+
#define BASE_SUBLIST \
{ "base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), }
#define VREGS_SUBLIST \
@@ -1025,6 +1041,21 @@ static __u64 sve_rejects_set[] = {
{ "sve", .capability = KVM_CAP_ARM_SVE, .feature = KVM_ARM_VCPU_SVE, .finalize = true, \
.regs = sve_regs, .regs_n = ARRAY_SIZE(sve_regs), \
.rejects_set = sve_rejects_set, .rejects_set_n = ARRAY_SIZE(sve_rejects_set), }
+#define PAUTH_SUBLIST \
+ { \
+ .name = "pauth_address", \
+ .capability = KVM_CAP_ARM_PTRAUTH_ADDRESS, \
+ .feature = KVM_ARM_VCPU_PTRAUTH_ADDRESS, \
+ .regs = pauth_addr_regs, \
+ .regs_n = ARRAY_SIZE(pauth_addr_regs), \
+ }, \
+ { \
+ .name = "pauth_generic", \
+ .capability = KVM_CAP_ARM_PTRAUTH_GENERIC, \
+ .feature = KVM_ARM_VCPU_PTRAUTH_GENERIC, \
+ .regs = pauth_generic_regs, \
+ .regs_n = ARRAY_SIZE(pauth_generic_regs), \
+ }
static struct vcpu_config vregs_config = {
.sublists = {
@@ -1056,11 +1087,30 @@ static struct vcpu_config sve_pmu_config = {
{0},
},
};
+static struct vcpu_config pauth_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ VREGS_SUBLIST,
+ PAUTH_SUBLIST,
+ {0},
+ },
+};
+static struct vcpu_config pauth_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ VREGS_SUBLIST,
+ PAUTH_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
static struct vcpu_config *vcpu_configs[] = {
&vregs_config,
&vregs_pmu_config,
&sve_config,
&sve_pmu_config,
+ &pauth_config,
+ &pauth_pmu_config,
};
static int vcpu_configs_n = ARRAY_SIZE(vcpu_configs);
diff --git a/tools/testing/selftests/kvm/aarch64/vgic_irq.c b/tools/testing/selftests/kvm/aarch64/vgic_irq.c
new file mode 100644
index 000000000000..e6c7d7f8fbd1
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/vgic_irq.c
@@ -0,0 +1,853 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * vgic_irq.c - Test userspace injection of IRQs
+ *
+ * This test validates the injection of IRQs from userspace using various
+ * methods (e.g., KVM_IRQ_LINE) and modes (e.g., EOI). The guest "asks" the
+ * host to inject a specific intid via a GUEST_SYNC call, and then checks that
+ * it received it.
+ */
+
+#include <asm/kvm.h>
+#include <asm/kvm_para.h>
+#include <sys/eventfd.h>
+#include <linux/sizes.h>
+
+#include "processor.h"
+#include "test_util.h"
+#include "kvm_util.h"
+#include "gic.h"
+#include "gic_v3.h"
+#include "vgic.h"
+
+#define GICD_BASE_GPA 0x08000000ULL
+#define GICR_BASE_GPA 0x080A0000ULL
+#define VCPU_ID 0
+
+/*
+ * Stores the user specified args; it's passed to the guest and to every test
+ * function.
+ */
+struct test_args {
+ uint32_t nr_irqs; /* number of KVM supported IRQs. */
+ bool eoi_split; /* 1 is eoir+dir, 0 is eoir only */
+ bool level_sensitive; /* 1 is level, 0 is edge */
+ int kvm_max_routes; /* output of KVM_CAP_IRQ_ROUTING */
+ bool kvm_supports_irqfd; /* output of KVM_CAP_IRQFD */
+};
+
+/*
+ * KVM implements 32 priority levels:
+ * 0x00 (highest priority) - 0xF8 (lowest priority), in steps of 8
+ *
+ * Note that these macros will still be correct in the case that KVM implements
+ * more priority levels. Also note that 32 is the minimum for GICv3 and GICv2.
+ */
+#define KVM_NUM_PRIOS 32
+#define KVM_PRIO_SHIFT 3 /* steps of 8 = 1 << 3 */
+#define KVM_PRIO_STEPS (1 << KVM_PRIO_SHIFT) /* 8 */
+#define LOWEST_PRIO (KVM_NUM_PRIOS - 1)
+#define CPU_PRIO_MASK (LOWEST_PRIO << KVM_PRIO_SHIFT) /* 0xf8 */
+#define IRQ_DEFAULT_PRIO (LOWEST_PRIO - 1)
+#define IRQ_DEFAULT_PRIO_REG (IRQ_DEFAULT_PRIO << KVM_PRIO_SHIFT) /* 0xf0 */
+
+static void *dist = (void *)GICD_BASE_GPA;
+static void *redist = (void *)GICR_BASE_GPA;
+
+/*
+ * The kvm_inject_* utilities are used by the guest to ask the host to inject
+ * interrupts (e.g., using the KVM_IRQ_LINE ioctl).
+ */
+
+typedef enum {
+ KVM_INJECT_EDGE_IRQ_LINE = 1,
+ KVM_SET_IRQ_LINE,
+ KVM_SET_IRQ_LINE_HIGH,
+ KVM_SET_LEVEL_INFO_HIGH,
+ KVM_INJECT_IRQFD,
+ KVM_WRITE_ISPENDR,
+ KVM_WRITE_ISACTIVER,
+} kvm_inject_cmd;
+
+struct kvm_inject_args {
+ kvm_inject_cmd cmd;
+ uint32_t first_intid;
+ uint32_t num;
+ int level;
+ bool expect_failure;
+};
+
+/* Used on the guest side to perform the hypercall. */
+static void kvm_inject_call(kvm_inject_cmd cmd, uint32_t first_intid,
+ uint32_t num, int level, bool expect_failure);
+
+/* Used on the host side to get the hypercall info. */
+static void kvm_inject_get_call(struct kvm_vm *vm, struct ucall *uc,
+ struct kvm_inject_args *args);
+
+#define _KVM_INJECT_MULTI(cmd, intid, num, expect_failure) \
+ kvm_inject_call(cmd, intid, num, -1 /* not used */, expect_failure)
+
+#define KVM_INJECT_MULTI(cmd, intid, num) \
+ _KVM_INJECT_MULTI(cmd, intid, num, false)
+
+#define _KVM_INJECT(cmd, intid, expect_failure) \
+ _KVM_INJECT_MULTI(cmd, intid, 1, expect_failure)
+
+#define KVM_INJECT(cmd, intid) \
+ _KVM_INJECT_MULTI(cmd, intid, 1, false)
+
+#define KVM_ACTIVATE(cmd, intid) \
+ kvm_inject_call(cmd, intid, 1, 1, false);
+
+struct kvm_inject_desc {
+ kvm_inject_cmd cmd;
+ /* can inject PPIs, PPIs, and/or SPIs. */
+ bool sgi, ppi, spi;
+};
+
+static struct kvm_inject_desc inject_edge_fns[] = {
+ /* sgi ppi spi */
+ { KVM_INJECT_EDGE_IRQ_LINE, false, false, true },
+ { KVM_INJECT_IRQFD, false, false, true },
+ { KVM_WRITE_ISPENDR, true, false, true },
+ { 0, },
+};
+
+static struct kvm_inject_desc inject_level_fns[] = {
+ /* sgi ppi spi */
+ { KVM_SET_IRQ_LINE_HIGH, false, true, true },
+ { KVM_SET_LEVEL_INFO_HIGH, false, true, true },
+ { KVM_INJECT_IRQFD, false, false, true },
+ { KVM_WRITE_ISPENDR, false, true, true },
+ { 0, },
+};
+
+static struct kvm_inject_desc set_active_fns[] = {
+ /* sgi ppi spi */
+ { KVM_WRITE_ISACTIVER, true, true, true },
+ { 0, },
+};
+
+#define for_each_inject_fn(t, f) \
+ for ((f) = (t); (f)->cmd; (f)++)
+
+#define for_each_supported_inject_fn(args, t, f) \
+ for_each_inject_fn(t, f) \
+ if ((args)->kvm_supports_irqfd || (f)->cmd != KVM_INJECT_IRQFD)
+
+#define for_each_supported_activate_fn(args, t, f) \
+ for_each_supported_inject_fn((args), (t), (f))
+
+/* Shared between the guest main thread and the IRQ handlers. */
+volatile uint64_t irq_handled;
+volatile uint32_t irqnr_received[MAX_SPI + 1];
+
+static void reset_stats(void)
+{
+ int i;
+
+ irq_handled = 0;
+ for (i = 0; i <= MAX_SPI; i++)
+ irqnr_received[i] = 0;
+}
+
+static uint64_t gic_read_ap1r0(void)
+{
+ uint64_t reg = read_sysreg_s(SYS_ICV_AP1R0_EL1);
+
+ dsb(sy);
+ return reg;
+}
+
+static void gic_write_ap1r0(uint64_t val)
+{
+ write_sysreg_s(val, SYS_ICV_AP1R0_EL1);
+ isb();
+}
+
+static void guest_set_irq_line(uint32_t intid, uint32_t level);
+
+static void guest_irq_generic_handler(bool eoi_split, bool level_sensitive)
+{
+ uint32_t intid = gic_get_and_ack_irq();
+
+ if (intid == IAR_SPURIOUS)
+ return;
+
+ GUEST_ASSERT(gic_irq_get_active(intid));
+
+ if (!level_sensitive)
+ GUEST_ASSERT(!gic_irq_get_pending(intid));
+
+ if (level_sensitive)
+ guest_set_irq_line(intid, 0);
+
+ GUEST_ASSERT(intid < MAX_SPI);
+ irqnr_received[intid] += 1;
+ irq_handled += 1;
+
+ gic_set_eoi(intid);
+ GUEST_ASSERT_EQ(gic_read_ap1r0(), 0);
+ if (eoi_split)
+ gic_set_dir(intid);
+
+ GUEST_ASSERT(!gic_irq_get_active(intid));
+ GUEST_ASSERT(!gic_irq_get_pending(intid));
+}
+
+static void kvm_inject_call(kvm_inject_cmd cmd, uint32_t first_intid,
+ uint32_t num, int level, bool expect_failure)
+{
+ struct kvm_inject_args args = {
+ .cmd = cmd,
+ .first_intid = first_intid,
+ .num = num,
+ .level = level,
+ .expect_failure = expect_failure,
+ };
+ GUEST_SYNC(&args);
+}
+
+#define GUEST_ASSERT_IAR_EMPTY() \
+do { \
+ uint32_t _intid; \
+ _intid = gic_get_and_ack_irq(); \
+ GUEST_ASSERT(_intid == 0 || _intid == IAR_SPURIOUS); \
+} while (0)
+
+#define CAT_HELPER(a, b) a ## b
+#define CAT(a, b) CAT_HELPER(a, b)
+#define PREFIX guest_irq_handler_
+#define GUEST_IRQ_HANDLER_NAME(split, lev) CAT(PREFIX, CAT(split, lev))
+#define GENERATE_GUEST_IRQ_HANDLER(split, lev) \
+static void CAT(PREFIX, CAT(split, lev))(struct ex_regs *regs) \
+{ \
+ guest_irq_generic_handler(split, lev); \
+}
+
+GENERATE_GUEST_IRQ_HANDLER(0, 0);
+GENERATE_GUEST_IRQ_HANDLER(0, 1);
+GENERATE_GUEST_IRQ_HANDLER(1, 0);
+GENERATE_GUEST_IRQ_HANDLER(1, 1);
+
+static void (*guest_irq_handlers[2][2])(struct ex_regs *) = {
+ {GUEST_IRQ_HANDLER_NAME(0, 0), GUEST_IRQ_HANDLER_NAME(0, 1),},
+ {GUEST_IRQ_HANDLER_NAME(1, 0), GUEST_IRQ_HANDLER_NAME(1, 1),},
+};
+
+static void reset_priorities(struct test_args *args)
+{
+ int i;
+
+ for (i = 0; i < args->nr_irqs; i++)
+ gic_set_priority(i, IRQ_DEFAULT_PRIO_REG);
+}
+
+static void guest_set_irq_line(uint32_t intid, uint32_t level)
+{
+ kvm_inject_call(KVM_SET_IRQ_LINE, intid, 1, level, false);
+}
+
+static void test_inject_fail(struct test_args *args,
+ uint32_t intid, kvm_inject_cmd cmd)
+{
+ reset_stats();
+
+ _KVM_INJECT(cmd, intid, true);
+ /* no IRQ to handle on entry */
+
+ GUEST_ASSERT_EQ(irq_handled, 0);
+ GUEST_ASSERT_IAR_EMPTY();
+}
+
+static void guest_inject(struct test_args *args,
+ uint32_t first_intid, uint32_t num,
+ kvm_inject_cmd cmd)
+{
+ uint32_t i;
+
+ reset_stats();
+
+ /* Cycle over all priorities to make things more interesting. */
+ for (i = first_intid; i < num + first_intid; i++)
+ gic_set_priority(i, (i % (KVM_NUM_PRIOS - 1)) << 3);
+
+ asm volatile("msr daifset, #2" : : : "memory");
+ KVM_INJECT_MULTI(cmd, first_intid, num);
+
+ while (irq_handled < num) {
+ asm volatile("wfi\n"
+ "msr daifclr, #2\n"
+ /* handle IRQ */
+ "msr daifset, #2\n"
+ : : : "memory");
+ }
+ asm volatile("msr daifclr, #2" : : : "memory");
+
+ GUEST_ASSERT_EQ(irq_handled, num);
+ for (i = first_intid; i < num + first_intid; i++)
+ GUEST_ASSERT_EQ(irqnr_received[i], 1);
+ GUEST_ASSERT_IAR_EMPTY();
+
+ reset_priorities(args);
+}
+
+/*
+ * Restore the active state of multiple concurrent IRQs (given by
+ * concurrent_irqs). This does what a live-migration would do on the
+ * destination side assuming there are some active IRQs that were not
+ * deactivated yet.
+ */
+static void guest_restore_active(struct test_args *args,
+ uint32_t first_intid, uint32_t num,
+ kvm_inject_cmd cmd)
+{
+ uint32_t prio, intid, ap1r;
+ int i;
+
+ /* Set the priorities of the first (KVM_NUM_PRIOS - 1) IRQs
+ * in descending order, so intid+1 can preempt intid.
+ */
+ for (i = 0, prio = (num - 1) * 8; i < num; i++, prio -= 8) {
+ GUEST_ASSERT(prio >= 0);
+ intid = i + first_intid;
+ gic_set_priority(intid, prio);
+ }
+
+ /* In a real migration, KVM would restore all GIC state before running
+ * guest code.
+ */
+ for (i = 0; i < num; i++) {
+ intid = i + first_intid;
+ KVM_ACTIVATE(cmd, intid);
+ ap1r = gic_read_ap1r0();
+ ap1r |= 1U << i;
+ gic_write_ap1r0(ap1r);
+ }
+
+ /* This is where the "migration" would occur. */
+
+ /* finish handling the IRQs starting with the highest priority one. */
+ for (i = 0; i < num; i++) {
+ intid = num - i - 1 + first_intid;
+ gic_set_eoi(intid);
+ if (args->eoi_split)
+ gic_set_dir(intid);
+ }
+
+ for (i = 0; i < num; i++)
+ GUEST_ASSERT(!gic_irq_get_active(i + first_intid));
+ GUEST_ASSERT_EQ(gic_read_ap1r0(), 0);
+ GUEST_ASSERT_IAR_EMPTY();
+}
+
+/*
+ * Polls the IAR until it's not a spurious interrupt.
+ *
+ * This function should only be used in test_inject_preemption (with IRQs
+ * masked).
+ */
+static uint32_t wait_for_and_activate_irq(void)
+{
+ uint32_t intid;
+
+ do {
+ asm volatile("wfi" : : : "memory");
+ intid = gic_get_and_ack_irq();
+ } while (intid == IAR_SPURIOUS);
+
+ return intid;
+}
+
+/*
+ * Inject multiple concurrent IRQs (num IRQs starting at first_intid) and
+ * handle them without handling the actual exceptions. This is done by masking
+ * interrupts for the whole test.
+ */
+static void test_inject_preemption(struct test_args *args,
+ uint32_t first_intid, int num,
+ kvm_inject_cmd cmd)
+{
+ uint32_t intid, prio, step = KVM_PRIO_STEPS;
+ int i;
+
+ /* Set the priorities of the first (KVM_NUM_PRIOS - 1) IRQs
+ * in descending order, so intid+1 can preempt intid.
+ */
+ for (i = 0, prio = (num - 1) * step; i < num; i++, prio -= step) {
+ GUEST_ASSERT(prio >= 0);
+ intid = i + first_intid;
+ gic_set_priority(intid, prio);
+ }
+
+ local_irq_disable();
+
+ for (i = 0; i < num; i++) {
+ uint32_t tmp;
+ intid = i + first_intid;
+ KVM_INJECT(cmd, intid);
+ /* Each successive IRQ will preempt the previous one. */
+ tmp = wait_for_and_activate_irq();
+ GUEST_ASSERT_EQ(tmp, intid);
+ if (args->level_sensitive)
+ guest_set_irq_line(intid, 0);
+ }
+
+ /* finish handling the IRQs starting with the highest priority one. */
+ for (i = 0; i < num; i++) {
+ intid = num - i - 1 + first_intid;
+ gic_set_eoi(intid);
+ if (args->eoi_split)
+ gic_set_dir(intid);
+ }
+
+ local_irq_enable();
+
+ for (i = 0; i < num; i++)
+ GUEST_ASSERT(!gic_irq_get_active(i + first_intid));
+ GUEST_ASSERT_EQ(gic_read_ap1r0(), 0);
+ GUEST_ASSERT_IAR_EMPTY();
+
+ reset_priorities(args);
+}
+
+static void test_injection(struct test_args *args, struct kvm_inject_desc *f)
+{
+ uint32_t nr_irqs = args->nr_irqs;
+
+ if (f->sgi) {
+ guest_inject(args, MIN_SGI, 1, f->cmd);
+ guest_inject(args, 0, 16, f->cmd);
+ }
+
+ if (f->ppi)
+ guest_inject(args, MIN_PPI, 1, f->cmd);
+
+ if (f->spi) {
+ guest_inject(args, MIN_SPI, 1, f->cmd);
+ guest_inject(args, nr_irqs - 1, 1, f->cmd);
+ guest_inject(args, MIN_SPI, nr_irqs - MIN_SPI, f->cmd);
+ }
+}
+
+static void test_injection_failure(struct test_args *args,
+ struct kvm_inject_desc *f)
+{
+ uint32_t bad_intid[] = { args->nr_irqs, 1020, 1024, 1120, 5120, ~0U, };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(bad_intid); i++)
+ test_inject_fail(args, bad_intid[i], f->cmd);
+}
+
+static void test_preemption(struct test_args *args, struct kvm_inject_desc *f)
+{
+ /*
+ * Test up to 4 levels of preemption. The reason is that KVM doesn't
+ * currently implement the ability to have more than the number-of-LRs
+ * number of concurrently active IRQs. The number of LRs implemented is
+ * IMPLEMENTATION DEFINED, however, it seems that most implement 4.
+ */
+ if (f->sgi)
+ test_inject_preemption(args, MIN_SGI, 4, f->cmd);
+
+ if (f->ppi)
+ test_inject_preemption(args, MIN_PPI, 4, f->cmd);
+
+ if (f->spi)
+ test_inject_preemption(args, MIN_SPI, 4, f->cmd);
+}
+
+static void test_restore_active(struct test_args *args, struct kvm_inject_desc *f)
+{
+ /* Test up to 4 active IRQs. Same reason as in test_preemption. */
+ if (f->sgi)
+ guest_restore_active(args, MIN_SGI, 4, f->cmd);
+
+ if (f->ppi)
+ guest_restore_active(args, MIN_PPI, 4, f->cmd);
+
+ if (f->spi)
+ guest_restore_active(args, MIN_SPI, 4, f->cmd);
+}
+
+static void guest_code(struct test_args args)
+{
+ uint32_t i, nr_irqs = args.nr_irqs;
+ bool level_sensitive = args.level_sensitive;
+ struct kvm_inject_desc *f, *inject_fns;
+
+ gic_init(GIC_V3, 1, dist, redist);
+
+ for (i = 0; i < nr_irqs; i++)
+ gic_irq_enable(i);
+
+ for (i = MIN_SPI; i < nr_irqs; i++)
+ gic_irq_set_config(i, !args.level_sensitive);
+
+ gic_set_eoi_split(args.eoi_split);
+
+ reset_priorities(&args);
+ gic_set_priority_mask(CPU_PRIO_MASK);
+
+ inject_fns = level_sensitive ? inject_level_fns
+ : inject_edge_fns;
+
+ local_irq_enable();
+
+ /* Start the tests. */
+ for_each_supported_inject_fn(&args, inject_fns, f) {
+ test_injection(&args, f);
+ test_preemption(&args, f);
+ test_injection_failure(&args, f);
+ }
+
+ /* Restore the active state of IRQs. This would happen when live
+ * migrating IRQs in the middle of being handled.
+ */
+ for_each_supported_activate_fn(&args, set_active_fns, f)
+ test_restore_active(&args, f);
+
+ GUEST_DONE();
+}
+
+static void kvm_irq_line_check(struct kvm_vm *vm, uint32_t intid, int level,
+ struct test_args *test_args, bool expect_failure)
+{
+ int ret;
+
+ if (!expect_failure) {
+ kvm_arm_irq_line(vm, intid, level);
+ } else {
+ /* The interface doesn't allow larger intid's. */
+ if (intid > KVM_ARM_IRQ_NUM_MASK)
+ return;
+
+ ret = _kvm_arm_irq_line(vm, intid, level);
+ TEST_ASSERT(ret != 0 && errno == EINVAL,
+ "Bad intid %i did not cause KVM_IRQ_LINE "
+ "error: rc: %i errno: %i", intid, ret, errno);
+ }
+}
+
+void kvm_irq_set_level_info_check(int gic_fd, uint32_t intid, int level,
+ bool expect_failure)
+{
+ if (!expect_failure) {
+ kvm_irq_set_level_info(gic_fd, intid, level);
+ } else {
+ int ret = _kvm_irq_set_level_info(gic_fd, intid, level);
+ /*
+ * The kernel silently fails for invalid SPIs and SGIs (which
+ * are not level-sensitive). It only checks for intid to not
+ * spill over 1U << 10 (the max reserved SPI). Also, callers
+ * are supposed to mask the intid with 0x3ff (1023).
+ */
+ if (intid > VGIC_MAX_RESERVED)
+ TEST_ASSERT(ret != 0 && errno == EINVAL,
+ "Bad intid %i did not cause VGIC_GRP_LEVEL_INFO "
+ "error: rc: %i errno: %i", intid, ret, errno);
+ else
+ TEST_ASSERT(!ret, "KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO "
+ "for intid %i failed, rc: %i errno: %i",
+ intid, ret, errno);
+ }
+}
+
+static void kvm_set_gsi_routing_irqchip_check(struct kvm_vm *vm,
+ uint32_t intid, uint32_t num, uint32_t kvm_max_routes,
+ bool expect_failure)
+{
+ struct kvm_irq_routing *routing;
+ int ret;
+ uint64_t i;
+
+ assert(num <= kvm_max_routes && kvm_max_routes <= KVM_MAX_IRQ_ROUTES);
+
+ routing = kvm_gsi_routing_create();
+ for (i = intid; i < (uint64_t)intid + num; i++)
+ kvm_gsi_routing_irqchip_add(routing, i - MIN_SPI, i - MIN_SPI);
+
+ if (!expect_failure) {
+ kvm_gsi_routing_write(vm, routing);
+ } else {
+ ret = _kvm_gsi_routing_write(vm, routing);
+ /* The kernel only checks for KVM_IRQCHIP_NUM_PINS. */
+ if (intid >= KVM_IRQCHIP_NUM_PINS)
+ TEST_ASSERT(ret != 0 && errno == EINVAL,
+ "Bad intid %u did not cause KVM_SET_GSI_ROUTING "
+ "error: rc: %i errno: %i", intid, ret, errno);
+ else
+ TEST_ASSERT(ret == 0, "KVM_SET_GSI_ROUTING "
+ "for intid %i failed, rc: %i errno: %i",
+ intid, ret, errno);
+ }
+}
+
+static void kvm_irq_write_ispendr_check(int gic_fd, uint32_t intid,
+ uint32_t vcpu, bool expect_failure)
+{
+ /*
+ * Ignore this when expecting failure as invalid intids will lead to
+ * either trying to inject SGIs when we configured the test to be
+ * level_sensitive (or the reverse), or inject large intids which
+ * will lead to writing above the ISPENDR register space (and we
+ * don't want to do that either).
+ */
+ if (!expect_failure)
+ kvm_irq_write_ispendr(gic_fd, intid, vcpu);
+}
+
+static void kvm_routing_and_irqfd_check(struct kvm_vm *vm,
+ uint32_t intid, uint32_t num, uint32_t kvm_max_routes,
+ bool expect_failure)
+{
+ int fd[MAX_SPI];
+ uint64_t val;
+ int ret, f;
+ uint64_t i;
+
+ /*
+ * There is no way to try injecting an SGI or PPI as the interface
+ * starts counting from the first SPI (above the private ones), so just
+ * exit.
+ */
+ if (INTID_IS_SGI(intid) || INTID_IS_PPI(intid))
+ return;
+
+ kvm_set_gsi_routing_irqchip_check(vm, intid, num,
+ kvm_max_routes, expect_failure);
+
+ /*
+ * If expect_failure, then just to inject anyway. These
+ * will silently fail. And in any case, the guest will check
+ * that no actual interrupt was injected for those cases.
+ */
+
+ for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
+ fd[f] = eventfd(0, 0);
+ TEST_ASSERT(fd[f] != -1,
+ "eventfd failed, errno: %i\n", errno);
+ }
+
+ for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
+ struct kvm_irqfd irqfd = {
+ .fd = fd[f],
+ .gsi = i - MIN_SPI,
+ };
+ assert(i <= (uint64_t)UINT_MAX);
+ vm_ioctl(vm, KVM_IRQFD, &irqfd);
+ }
+
+ for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
+ val = 1;
+ ret = write(fd[f], &val, sizeof(uint64_t));
+ TEST_ASSERT(ret == sizeof(uint64_t),
+ "Write to KVM_IRQFD failed with ret: %d\n", ret);
+ }
+
+ for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++)
+ close(fd[f]);
+}
+
+/* handles the valid case: intid=0xffffffff num=1 */
+#define for_each_intid(first, num, tmp, i) \
+ for ((tmp) = (i) = (first); \
+ (tmp) < (uint64_t)(first) + (uint64_t)(num); \
+ (tmp)++, (i)++)
+
+static void run_guest_cmd(struct kvm_vm *vm, int gic_fd,
+ struct kvm_inject_args *inject_args,
+ struct test_args *test_args)
+{
+ kvm_inject_cmd cmd = inject_args->cmd;
+ uint32_t intid = inject_args->first_intid;
+ uint32_t num = inject_args->num;
+ int level = inject_args->level;
+ bool expect_failure = inject_args->expect_failure;
+ uint64_t tmp;
+ uint32_t i;
+
+ /* handles the valid case: intid=0xffffffff num=1 */
+ assert(intid < UINT_MAX - num || num == 1);
+
+ switch (cmd) {
+ case KVM_INJECT_EDGE_IRQ_LINE:
+ for_each_intid(intid, num, tmp, i)
+ kvm_irq_line_check(vm, i, 1, test_args,
+ expect_failure);
+ for_each_intid(intid, num, tmp, i)
+ kvm_irq_line_check(vm, i, 0, test_args,
+ expect_failure);
+ break;
+ case KVM_SET_IRQ_LINE:
+ for_each_intid(intid, num, tmp, i)
+ kvm_irq_line_check(vm, i, level, test_args,
+ expect_failure);
+ break;
+ case KVM_SET_IRQ_LINE_HIGH:
+ for_each_intid(intid, num, tmp, i)
+ kvm_irq_line_check(vm, i, 1, test_args,
+ expect_failure);
+ break;
+ case KVM_SET_LEVEL_INFO_HIGH:
+ for_each_intid(intid, num, tmp, i)
+ kvm_irq_set_level_info_check(gic_fd, i, 1,
+ expect_failure);
+ break;
+ case KVM_INJECT_IRQFD:
+ kvm_routing_and_irqfd_check(vm, intid, num,
+ test_args->kvm_max_routes,
+ expect_failure);
+ break;
+ case KVM_WRITE_ISPENDR:
+ for (i = intid; i < intid + num; i++)
+ kvm_irq_write_ispendr_check(gic_fd, i,
+ VCPU_ID, expect_failure);
+ break;
+ case KVM_WRITE_ISACTIVER:
+ for (i = intid; i < intid + num; i++)
+ kvm_irq_write_isactiver(gic_fd, i, VCPU_ID);
+ break;
+ default:
+ break;
+ }
+}
+
+static void kvm_inject_get_call(struct kvm_vm *vm, struct ucall *uc,
+ struct kvm_inject_args *args)
+{
+ struct kvm_inject_args *kvm_args_hva;
+ vm_vaddr_t kvm_args_gva;
+
+ kvm_args_gva = uc->args[1];
+ kvm_args_hva = (struct kvm_inject_args *)addr_gva2hva(vm, kvm_args_gva);
+ memcpy(args, kvm_args_hva, sizeof(struct kvm_inject_args));
+}
+
+static void print_args(struct test_args *args)
+{
+ printf("nr-irqs=%d level-sensitive=%d eoi-split=%d\n",
+ args->nr_irqs, args->level_sensitive,
+ args->eoi_split);
+}
+
+static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split)
+{
+ struct ucall uc;
+ int gic_fd;
+ struct kvm_vm *vm;
+ struct kvm_inject_args inject_args;
+
+ struct test_args args = {
+ .nr_irqs = nr_irqs,
+ .level_sensitive = level_sensitive,
+ .eoi_split = eoi_split,
+ .kvm_max_routes = kvm_check_cap(KVM_CAP_IRQ_ROUTING),
+ .kvm_supports_irqfd = kvm_check_cap(KVM_CAP_IRQFD),
+ };
+
+ print_args(&args);
+
+ vm = vm_create_default(VCPU_ID, 0, guest_code);
+ ucall_init(vm, NULL);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vm, VCPU_ID);
+
+ /* Setup the guest args page (so it gets the args). */
+ vcpu_args_set(vm, 0, 1, args);
+
+ gic_fd = vgic_v3_setup(vm, 1, nr_irqs,
+ GICD_BASE_GPA, GICR_BASE_GPA);
+
+ vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT,
+ guest_irq_handlers[args.eoi_split][args.level_sensitive]);
+
+ while (1) {
+ vcpu_run(vm, VCPU_ID);
+
+ switch (get_ucall(vm, VCPU_ID, &uc)) {
+ case UCALL_SYNC:
+ kvm_inject_get_call(vm, &uc, &inject_args);
+ run_guest_cmd(vm, gic_fd, &inject_args, &args);
+ break;
+ case UCALL_ABORT:
+ TEST_FAIL("%s at %s:%ld\n\tvalues: %#lx, %#lx",
+ (const char *)uc.args[0],
+ __FILE__, uc.args[1], uc.args[2], uc.args[3]);
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+
+done:
+ close(gic_fd);
+ kvm_vm_free(vm);
+}
+
+static void help(const char *name)
+{
+ printf(
+ "\n"
+ "usage: %s [-n num_irqs] [-e eoi_split] [-l level_sensitive]\n", name);
+ printf(" -n: specify number of IRQs to setup the vgic with. "
+ "It has to be a multiple of 32 and between 64 and 1024.\n");
+ printf(" -e: if 1 then EOI is split into a write to DIR on top "
+ "of writing EOI.\n");
+ printf(" -l: specify whether the IRQs are level-sensitive (1) or not (0).");
+ puts("");
+ exit(1);
+}
+
+int main(int argc, char **argv)
+{
+ uint32_t nr_irqs = 64;
+ bool default_args = true;
+ bool level_sensitive = false;
+ int opt;
+ bool eoi_split = false;
+
+ /* Tell stdout not to buffer its content */
+ setbuf(stdout, NULL);
+
+ while ((opt = getopt(argc, argv, "hn:e:l:")) != -1) {
+ switch (opt) {
+ case 'n':
+ nr_irqs = atoi(optarg);
+ if (nr_irqs > 1024 || nr_irqs % 32)
+ help(argv[0]);
+ break;
+ case 'e':
+ eoi_split = (bool)atoi(optarg);
+ default_args = false;
+ break;
+ case 'l':
+ level_sensitive = (bool)atoi(optarg);
+ default_args = false;
+ break;
+ case 'h':
+ default:
+ help(argv[0]);
+ break;
+ }
+ }
+
+ /* If the user just specified nr_irqs and/or gic_version, then run all
+ * combinations.
+ */
+ if (default_args) {
+ test_vgic(nr_irqs, false /* level */, false /* eoi_split */);
+ test_vgic(nr_irqs, false /* level */, true /* eoi_split */);
+ test_vgic(nr_irqs, true /* level */, false /* eoi_split */);
+ test_vgic(nr_irqs, true /* level */, true /* eoi_split */);
+ } else {
+ test_vgic(nr_irqs, level_sensitive, eoi_split);
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/include/aarch64/gic.h b/tools/testing/selftests/kvm/include/aarch64/gic.h
index 85dd1e53048e..b217ea17cac5 100644
--- a/tools/testing/selftests/kvm/include/aarch64/gic.h
+++ b/tools/testing/selftests/kvm/include/aarch64/gic.h
@@ -11,11 +11,37 @@ enum gic_type {
GIC_TYPE_MAX,
};
+#define MIN_SGI 0
+#define MIN_PPI 16
+#define MIN_SPI 32
+#define MAX_SPI 1019
+#define IAR_SPURIOUS 1023
+
+#define INTID_IS_SGI(intid) (0 <= (intid) && (intid) < MIN_PPI)
+#define INTID_IS_PPI(intid) (MIN_PPI <= (intid) && (intid) < MIN_SPI)
+#define INTID_IS_SPI(intid) (MIN_SPI <= (intid) && (intid) <= MAX_SPI)
+
void gic_init(enum gic_type type, unsigned int nr_cpus,
void *dist_base, void *redist_base);
void gic_irq_enable(unsigned int intid);
void gic_irq_disable(unsigned int intid);
unsigned int gic_get_and_ack_irq(void);
void gic_set_eoi(unsigned int intid);
+void gic_set_dir(unsigned int intid);
+
+/*
+ * Sets the EOI mode. When split is false, EOI just drops the priority. When
+ * split is true, EOI drops the priority and deactivates the interrupt.
+ */
+void gic_set_eoi_split(bool split);
+void gic_set_priority_mask(uint64_t mask);
+void gic_set_priority(uint32_t intid, uint32_t prio);
+void gic_irq_set_active(unsigned int intid);
+void gic_irq_clear_active(unsigned int intid);
+bool gic_irq_get_active(unsigned int intid);
+void gic_irq_set_pending(unsigned int intid);
+void gic_irq_clear_pending(unsigned int intid);
+bool gic_irq_get_pending(unsigned int intid);
+void gic_irq_set_config(unsigned int intid, bool is_edge);
#endif /* SELFTEST_KVM_GIC_H */
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.h b/tools/testing/selftests/kvm/include/aarch64/gic_v3.h
index b51536d469a6..ba0886e8a2bb 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.h
+++ b/tools/testing/selftests/kvm/include/aarch64/gic_v3.h
@@ -16,8 +16,12 @@
#define GICD_IGROUPR 0x0080
#define GICD_ISENABLER 0x0100
#define GICD_ICENABLER 0x0180
+#define GICD_ISPENDR 0x0200
+#define GICD_ICPENDR 0x0280
#define GICD_ICACTIVER 0x0380
+#define GICD_ISACTIVER 0x0300
#define GICD_IPRIORITYR 0x0400
+#define GICD_ICFGR 0x0C00
/*
* The assumption is that the guest runs in a non-secure mode.
@@ -49,16 +53,24 @@
#define GICR_IGROUPR0 GICD_IGROUPR
#define GICR_ISENABLER0 GICD_ISENABLER
#define GICR_ICENABLER0 GICD_ICENABLER
+#define GICR_ISPENDR0 GICD_ISPENDR
+#define GICR_ISACTIVER0 GICD_ISACTIVER
#define GICR_ICACTIVER0 GICD_ICACTIVER
+#define GICR_ICENABLER GICD_ICENABLER
+#define GICR_ICACTIVER GICD_ICACTIVER
#define GICR_IPRIORITYR0 GICD_IPRIORITYR
/* CPU interface registers */
#define SYS_ICC_PMR_EL1 sys_reg(3, 0, 4, 6, 0)
#define SYS_ICC_IAR1_EL1 sys_reg(3, 0, 12, 12, 0)
#define SYS_ICC_EOIR1_EL1 sys_reg(3, 0, 12, 12, 1)
+#define SYS_ICC_DIR_EL1 sys_reg(3, 0, 12, 11, 1)
+#define SYS_ICC_CTLR_EL1 sys_reg(3, 0, 12, 12, 4)
#define SYS_ICC_SRE_EL1 sys_reg(3, 0, 12, 12, 5)
#define SYS_ICC_GRPEN1_EL1 sys_reg(3, 0, 12, 12, 7)
+#define SYS_ICV_AP1R0_EL1 sys_reg(3, 0, 12, 9, 0)
+
#define ICC_PMR_DEF_PRIO 0xf0
#define ICC_SRE_EL1_SRE (1U << 0)
diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h
index 27d8e1bb5b36..8f9f46979a00 100644
--- a/tools/testing/selftests/kvm/include/aarch64/processor.h
+++ b/tools/testing/selftests/kvm/include/aarch64/processor.h
@@ -113,6 +113,9 @@ enum {
#define ESR_EC_WP_CURRENT 0x35
#define ESR_EC_BRK_INS 0x3c
+void aarch64_get_supported_page_sizes(uint32_t ipa,
+ bool *ps4k, bool *ps16k, bool *ps64k);
+
void vm_init_descriptor_tables(struct kvm_vm *vm);
void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid);
diff --git a/tools/testing/selftests/kvm/include/aarch64/vgic.h b/tools/testing/selftests/kvm/include/aarch64/vgic.h
index 0ecfb253893c..4442081221a0 100644
--- a/tools/testing/selftests/kvm/include/aarch64/vgic.h
+++ b/tools/testing/selftests/kvm/include/aarch64/vgic.h
@@ -14,7 +14,21 @@
((uint64_t)(flags) << 12) | \
index)
-int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus,
+int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs,
uint64_t gicd_base_gpa, uint64_t gicr_base_gpa);
-#endif /* SELFTEST_KVM_VGIC_H */
+#define VGIC_MAX_RESERVED 1023
+
+void kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level);
+int _kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level);
+
+void kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level);
+int _kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level);
+
+/* The vcpu arg only applies to private interrupts. */
+void kvm_irq_write_ispendr(int gic_fd, uint32_t intid, uint32_t vcpu);
+void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, uint32_t vcpu);
+
+#define KVM_IRQCHIP_NUM_PINS (1020 - 32)
+
+#endif // SELFTEST_KVM_VGIC_H
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 6a1a37f30494..c9286811a4cb 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -7,411 +7,7 @@
#ifndef SELFTEST_KVM_UTIL_H
#define SELFTEST_KVM_UTIL_H
-#include "test_util.h"
-
-#include "asm/kvm.h"
-#include "linux/list.h"
-#include "linux/kvm.h"
-#include <sys/ioctl.h>
-
-#include "sparsebit.h"
-
-#define KVM_DEV_PATH "/dev/kvm"
-#define KVM_MAX_VCPUS 512
-
-#define NSEC_PER_SEC 1000000000L
-
-/*
- * Callers of kvm_util only have an incomplete/opaque description of the
- * structure kvm_util is using to maintain the state of a VM.
- */
-struct kvm_vm;
-
-typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */
-typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */
-
-/* Minimum allocated guest virtual and physical addresses */
-#define KVM_UTIL_MIN_VADDR 0x2000
-#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
-
-#define DEFAULT_GUEST_PHY_PAGES 512
-#define DEFAULT_GUEST_STACK_VADDR_MIN 0xab6000
-#define DEFAULT_STACK_PGS 5
-
-enum vm_guest_mode {
- VM_MODE_P52V48_4K,
- VM_MODE_P52V48_64K,
- VM_MODE_P48V48_4K,
- VM_MODE_P48V48_64K,
- VM_MODE_P40V48_4K,
- VM_MODE_P40V48_64K,
- VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */
- VM_MODE_P47V64_4K,
- VM_MODE_P44V64_4K,
- NUM_VM_MODES,
-};
-
-#if defined(__aarch64__)
-
-#define VM_MODE_DEFAULT VM_MODE_P40V48_4K
-#define MIN_PAGE_SHIFT 12U
-#define ptes_per_page(page_size) ((page_size) / 8)
-
-#elif defined(__x86_64__)
-
-#define VM_MODE_DEFAULT VM_MODE_PXXV48_4K
-#define MIN_PAGE_SHIFT 12U
-#define ptes_per_page(page_size) ((page_size) / 8)
-
-#elif defined(__s390x__)
-
-#define VM_MODE_DEFAULT VM_MODE_P44V64_4K
-#define MIN_PAGE_SHIFT 12U
-#define ptes_per_page(page_size) ((page_size) / 16)
-
-#endif
-
-#define MIN_PAGE_SIZE (1U << MIN_PAGE_SHIFT)
-#define PTES_PER_MIN_PAGE ptes_per_page(MIN_PAGE_SIZE)
-
-struct vm_guest_mode_params {
- unsigned int pa_bits;
- unsigned int va_bits;
- unsigned int page_size;
- unsigned int page_shift;
-};
-extern const struct vm_guest_mode_params vm_guest_mode_params[];
-
-int open_path_or_exit(const char *path, int flags);
-int open_kvm_dev_path_or_exit(void);
-int kvm_check_cap(long cap);
-int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap);
-int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id,
- struct kvm_enable_cap *cap);
-void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size);
-const char *vm_guest_mode_string(uint32_t i);
-
-struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
-void kvm_vm_free(struct kvm_vm *vmp);
-void kvm_vm_restart(struct kvm_vm *vmp, int perm);
-void kvm_vm_release(struct kvm_vm *vmp);
-void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log);
-void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
- uint64_t first_page, uint32_t num_pages);
-uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm);
-
-int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva,
- size_t len);
-
-void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename);
-
-void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
-
-/*
- * VM VCPU Dump
- *
- * Input Args:
- * stream - Output FILE stream
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- * indent - Left margin indent amount
- *
- * Output Args: None
- *
- * Return: None
- *
- * Dumps the current state of the VCPU specified by @vcpuid, within the VM
- * given by @vm, to the FILE stream given by @stream.
- */
-void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid,
- uint8_t indent);
-
-void vm_create_irqchip(struct kvm_vm *vm);
-
-void vm_userspace_mem_region_add(struct kvm_vm *vm,
- enum vm_mem_backing_src_type src_type,
- uint64_t guest_paddr, uint32_t slot, uint64_t npages,
- uint32_t flags);
-
-void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl,
- void *arg);
-int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl,
- void *arg);
-void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
-int _vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg);
-void kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
-int _kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
-void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
-void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa);
-void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot);
-void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid);
-vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min);
-vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages);
-vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm);
-
-void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- unsigned int npages);
-void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa);
-void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva);
-vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva);
-void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa);
-
-/*
- * Address Guest Virtual to Guest Physical
- *
- * Input Args:
- * vm - Virtual Machine
- * gva - VM virtual address
- *
- * Output Args: None
- *
- * Return:
- * Equivalent VM physical address
- *
- * Returns the VM physical address of the translated VM virtual
- * address given by @gva.
- */
-vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva);
-
-struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid);
-void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
-int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
-int vcpu_get_fd(struct kvm_vm *vm, uint32_t vcpuid);
-void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid);
-void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_guest_debug *debug);
-void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_mp_state *mp_state);
-struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vm *vm, uint32_t vcpuid);
-void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs);
-void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs);
-
-/*
- * VM VCPU Args Set
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- * num - number of arguments
- * ... - arguments, each of type uint64_t
- *
- * Output Args: None
- *
- * Return: None
- *
- * Sets the first @num function input registers of the VCPU with @vcpuid,
- * per the C calling convention of the architecture, to the values given
- * as variable args. Each of the variable args is expected to be of type
- * uint64_t. The maximum @num can be is specific to the architecture.
- */
-void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...);
-
-void vcpu_sregs_get(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_sregs *sregs);
-void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_sregs *sregs);
-int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_sregs *sregs);
-void vcpu_fpu_get(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_fpu *fpu);
-void vcpu_fpu_set(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_fpu *fpu);
-void vcpu_get_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg);
-void vcpu_set_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg);
-#ifdef __KVM_HAVE_VCPU_EVENTS
-void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_vcpu_events *events);
-void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_vcpu_events *events);
-#endif
-#ifdef __x86_64__
-void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_nested_state *state);
-int vcpu_nested_state_set(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_nested_state *state, bool ignore_error);
-#endif
-void *vcpu_map_dirty_ring(struct kvm_vm *vm, uint32_t vcpuid);
-
-int _kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr);
-int kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr);
-int _kvm_create_device(struct kvm_vm *vm, uint64_t type, bool test, int *fd);
-int kvm_create_device(struct kvm_vm *vm, uint64_t type, bool test);
-int _kvm_device_access(int dev_fd, uint32_t group, uint64_t attr,
- void *val, bool write);
-int kvm_device_access(int dev_fd, uint32_t group, uint64_t attr,
- void *val, bool write);
-
-int _vcpu_has_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
- uint64_t attr);
-int vcpu_has_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
- uint64_t attr);
-int _vcpu_access_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
- uint64_t attr, void *val, bool write);
-int vcpu_access_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
- uint64_t attr, void *val, bool write);
-
-const char *exit_reason_str(unsigned int exit_reason);
-
-void virt_pgd_alloc(struct kvm_vm *vm);
-
-/*
- * VM Virtual Page Map
- *
- * Input Args:
- * vm - Virtual Machine
- * vaddr - VM Virtual Address
- * paddr - VM Physical Address
- * memslot - Memory region slot for new virtual translation tables
- *
- * Output Args: None
- *
- * Return: None
- *
- * Within @vm, creates a virtual translation for the page starting
- * at @vaddr to the page starting at @paddr.
- */
-void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr);
-
-vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
- uint32_t memslot);
-vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
- vm_paddr_t paddr_min, uint32_t memslot);
-vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm);
-
-/*
- * Create a VM with reasonable defaults
- *
- * Input Args:
- * vcpuid - The id of the single VCPU to add to the VM.
- * extra_mem_pages - The number of extra pages to add (this will
- * decide how much extra space we will need to
- * setup the page tables using memslot 0)
- * guest_code - The vCPU's entry point
- *
- * Output Args: None
- *
- * Return:
- * Pointer to opaque structure that describes the created VM.
- */
-struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
- void *guest_code);
-
-/* Same as vm_create_default, but can be used for more than one vcpu */
-struct kvm_vm *vm_create_default_with_vcpus(uint32_t nr_vcpus, uint64_t extra_mem_pages,
- uint32_t num_percpu_pages, void *guest_code,
- uint32_t vcpuids[]);
-
-/* Like vm_create_default_with_vcpus, but accepts mode and slot0 memory as a parameter */
-struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
- uint64_t slot0_mem_pages, uint64_t extra_mem_pages,
- uint32_t num_percpu_pages, void *guest_code,
- uint32_t vcpuids[]);
-
-/*
- * Adds a vCPU with reasonable defaults (e.g. a stack)
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - The id of the VCPU to add to the VM.
- * guest_code - The vCPU's entry point
- */
-void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code);
-
-bool vm_is_unrestricted_guest(struct kvm_vm *vm);
-
-unsigned int vm_get_page_size(struct kvm_vm *vm);
-unsigned int vm_get_page_shift(struct kvm_vm *vm);
-uint64_t vm_get_max_gfn(struct kvm_vm *vm);
-int vm_get_fd(struct kvm_vm *vm);
-
-unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size);
-unsigned int vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages);
-unsigned int vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages);
-static inline unsigned int
-vm_adjust_num_guest_pages(enum vm_guest_mode mode, unsigned int num_guest_pages)
-{
- unsigned int n;
- n = vm_num_guest_pages(mode, vm_num_host_pages(mode, num_guest_pages));
-#ifdef __s390x__
- /* s390 requires 1M aligned guest sizes */
- n = (n + 255) & ~255;
-#endif
- return n;
-}
-
-struct kvm_userspace_memory_region *
-kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
- uint64_t end);
-
-struct kvm_dirty_log *
-allocate_kvm_dirty_log(struct kvm_userspace_memory_region *region);
-
-int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd);
-
-#define sync_global_to_guest(vm, g) ({ \
- typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \
- memcpy(_p, &(g), sizeof(g)); \
-})
-
-#define sync_global_from_guest(vm, g) ({ \
- typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \
- memcpy(&(g), _p, sizeof(g)); \
-})
-
-void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid);
-
-/* Common ucalls */
-enum {
- UCALL_NONE,
- UCALL_SYNC,
- UCALL_ABORT,
- UCALL_DONE,
- UCALL_UNHANDLED,
-};
-
-#define UCALL_MAX_ARGS 6
-
-struct ucall {
- uint64_t cmd;
- uint64_t args[UCALL_MAX_ARGS];
-};
-
-void ucall_init(struct kvm_vm *vm, void *arg);
-void ucall_uninit(struct kvm_vm *vm);
-void ucall(uint64_t cmd, int nargs, ...);
-uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc);
-
-#define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4) \
- ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4)
-#define GUEST_SYNC(stage) ucall(UCALL_SYNC, 2, "hello", stage)
-#define GUEST_DONE() ucall(UCALL_DONE, 0)
-#define __GUEST_ASSERT(_condition, _condstr, _nargs, _args...) do { \
- if (!(_condition)) \
- ucall(UCALL_ABORT, 2 + _nargs, \
- "Failed guest assert: " \
- _condstr, __LINE__, _args); \
-} while (0)
-
-#define GUEST_ASSERT(_condition) \
- __GUEST_ASSERT(_condition, #_condition, 0, 0)
-
-#define GUEST_ASSERT_1(_condition, arg1) \
- __GUEST_ASSERT(_condition, #_condition, 1, (arg1))
-
-#define GUEST_ASSERT_2(_condition, arg1, arg2) \
- __GUEST_ASSERT(_condition, #_condition, 2, (arg1), (arg2))
-
-#define GUEST_ASSERT_3(_condition, arg1, arg2, arg3) \
- __GUEST_ASSERT(_condition, #_condition, 3, (arg1), (arg2), (arg3))
-
-#define GUEST_ASSERT_4(_condition, arg1, arg2, arg3, arg4) \
- __GUEST_ASSERT(_condition, #_condition, 4, (arg1), (arg2), (arg3), (arg4))
-
-#define GUEST_ASSERT_EQ(a, b) __GUEST_ASSERT((a) == (b), #a " == " #b, 2, a, b)
-
-int vm_get_stats_fd(struct kvm_vm *vm);
-int vcpu_get_stats_fd(struct kvm_vm *vm, uint32_t vcpuid);
-
-uint32_t guest_get_vcpuid(void);
+#include "kvm_util_base.h"
+#include "ucall_common.h"
#endif /* SELFTEST_KVM_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h
new file mode 100644
index 000000000000..4ed6aa049a91
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/kvm_util_base.h
@@ -0,0 +1,398 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * tools/testing/selftests/kvm/include/kvm_util_base.h
+ *
+ * Copyright (C) 2018, Google LLC.
+ */
+#ifndef SELFTEST_KVM_UTIL_BASE_H
+#define SELFTEST_KVM_UTIL_BASE_H
+
+#include "test_util.h"
+
+#include "asm/kvm.h"
+#include "linux/list.h"
+#include "linux/kvm.h"
+#include <sys/ioctl.h>
+
+#include "sparsebit.h"
+
+#define KVM_DEV_PATH "/dev/kvm"
+#define KVM_MAX_VCPUS 512
+
+#define NSEC_PER_SEC 1000000000L
+
+/*
+ * Callers of kvm_util only have an incomplete/opaque description of the
+ * structure kvm_util is using to maintain the state of a VM.
+ */
+struct kvm_vm;
+
+typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */
+typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */
+
+/* Minimum allocated guest virtual and physical addresses */
+#define KVM_UTIL_MIN_VADDR 0x2000
+#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
+
+#define DEFAULT_GUEST_PHY_PAGES 512
+#define DEFAULT_GUEST_STACK_VADDR_MIN 0xab6000
+#define DEFAULT_STACK_PGS 5
+
+enum vm_guest_mode {
+ VM_MODE_P52V48_4K,
+ VM_MODE_P52V48_64K,
+ VM_MODE_P48V48_4K,
+ VM_MODE_P48V48_16K,
+ VM_MODE_P48V48_64K,
+ VM_MODE_P40V48_4K,
+ VM_MODE_P40V48_16K,
+ VM_MODE_P40V48_64K,
+ VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */
+ VM_MODE_P47V64_4K,
+ VM_MODE_P44V64_4K,
+ VM_MODE_P36V48_4K,
+ VM_MODE_P36V48_16K,
+ VM_MODE_P36V48_64K,
+ VM_MODE_P36V47_16K,
+ NUM_VM_MODES,
+};
+
+#if defined(__aarch64__)
+
+extern enum vm_guest_mode vm_mode_default;
+
+#define VM_MODE_DEFAULT vm_mode_default
+#define MIN_PAGE_SHIFT 12U
+#define ptes_per_page(page_size) ((page_size) / 8)
+
+#elif defined(__x86_64__)
+
+#define VM_MODE_DEFAULT VM_MODE_PXXV48_4K
+#define MIN_PAGE_SHIFT 12U
+#define ptes_per_page(page_size) ((page_size) / 8)
+
+#elif defined(__s390x__)
+
+#define VM_MODE_DEFAULT VM_MODE_P44V64_4K
+#define MIN_PAGE_SHIFT 12U
+#define ptes_per_page(page_size) ((page_size) / 16)
+
+#elif defined(__riscv)
+
+#if __riscv_xlen == 32
+#error "RISC-V 32-bit kvm selftests not supported"
+#endif
+
+#define VM_MODE_DEFAULT VM_MODE_P40V48_4K
+#define MIN_PAGE_SHIFT 12U
+#define ptes_per_page(page_size) ((page_size) / 8)
+
+#endif
+
+#define MIN_PAGE_SIZE (1U << MIN_PAGE_SHIFT)
+#define PTES_PER_MIN_PAGE ptes_per_page(MIN_PAGE_SIZE)
+
+struct vm_guest_mode_params {
+ unsigned int pa_bits;
+ unsigned int va_bits;
+ unsigned int page_size;
+ unsigned int page_shift;
+};
+extern const struct vm_guest_mode_params vm_guest_mode_params[];
+
+int open_path_or_exit(const char *path, int flags);
+int open_kvm_dev_path_or_exit(void);
+int kvm_check_cap(long cap);
+int vm_check_cap(struct kvm_vm *vm, long cap);
+int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap);
+int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id,
+ struct kvm_enable_cap *cap);
+void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size);
+const char *vm_guest_mode_string(uint32_t i);
+
+struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
+void kvm_vm_free(struct kvm_vm *vmp);
+void kvm_vm_restart(struct kvm_vm *vmp, int perm);
+void kvm_vm_release(struct kvm_vm *vmp);
+void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log);
+void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
+ uint64_t first_page, uint32_t num_pages);
+uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm);
+
+int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva,
+ size_t len);
+
+void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename);
+
+void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
+
+/*
+ * VM VCPU Dump
+ *
+ * Input Args:
+ * stream - Output FILE stream
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * indent - Left margin indent amount
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Dumps the current state of the VCPU specified by @vcpuid, within the VM
+ * given by @vm, to the FILE stream given by @stream.
+ */
+void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid,
+ uint8_t indent);
+
+void vm_create_irqchip(struct kvm_vm *vm);
+
+void vm_userspace_mem_region_add(struct kvm_vm *vm,
+ enum vm_mem_backing_src_type src_type,
+ uint64_t guest_paddr, uint32_t slot, uint64_t npages,
+ uint32_t flags);
+
+void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl,
+ void *arg);
+int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl,
+ void *arg);
+void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
+int _vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg);
+void kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
+int _kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
+void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
+void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa);
+void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot);
+void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid);
+vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min);
+vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages);
+vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm);
+
+void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+ unsigned int npages);
+void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa);
+void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva);
+vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva);
+void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa);
+
+/*
+ * Address Guest Virtual to Guest Physical
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * gva - VM virtual address
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Equivalent VM physical address
+ *
+ * Returns the VM physical address of the translated VM virtual
+ * address given by @gva.
+ */
+vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva);
+
+struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid);
+void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
+int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
+int vcpu_get_fd(struct kvm_vm *vm, uint32_t vcpuid);
+void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid);
+void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_guest_debug *debug);
+void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_mp_state *mp_state);
+struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vm *vm, uint32_t vcpuid);
+void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs);
+void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs);
+
+/*
+ * VM VCPU Args Set
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * num - number of arguments
+ * ... - arguments, each of type uint64_t
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the first @num function input registers of the VCPU with @vcpuid,
+ * per the C calling convention of the architecture, to the values given
+ * as variable args. Each of the variable args is expected to be of type
+ * uint64_t. The maximum @num can be is specific to the architecture.
+ */
+void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...);
+
+void vcpu_sregs_get(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_sregs *sregs);
+void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_sregs *sregs);
+int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_sregs *sregs);
+void vcpu_fpu_get(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_fpu *fpu);
+void vcpu_fpu_set(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_fpu *fpu);
+void vcpu_get_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg);
+void vcpu_set_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg);
+#ifdef __KVM_HAVE_VCPU_EVENTS
+void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_vcpu_events *events);
+void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_vcpu_events *events);
+#endif
+#ifdef __x86_64__
+void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_nested_state *state);
+int vcpu_nested_state_set(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_nested_state *state, bool ignore_error);
+#endif
+void *vcpu_map_dirty_ring(struct kvm_vm *vm, uint32_t vcpuid);
+
+int _kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr);
+int kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr);
+int _kvm_create_device(struct kvm_vm *vm, uint64_t type, bool test, int *fd);
+int kvm_create_device(struct kvm_vm *vm, uint64_t type, bool test);
+int _kvm_device_access(int dev_fd, uint32_t group, uint64_t attr,
+ void *val, bool write);
+int kvm_device_access(int dev_fd, uint32_t group, uint64_t attr,
+ void *val, bool write);
+void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level);
+int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level);
+
+int _vcpu_has_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
+ uint64_t attr);
+int vcpu_has_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
+ uint64_t attr);
+int _vcpu_access_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
+ uint64_t attr, void *val, bool write);
+int vcpu_access_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
+ uint64_t attr, void *val, bool write);
+
+#define KVM_MAX_IRQ_ROUTES 4096
+
+struct kvm_irq_routing *kvm_gsi_routing_create(void);
+void kvm_gsi_routing_irqchip_add(struct kvm_irq_routing *routing,
+ uint32_t gsi, uint32_t pin);
+int _kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing);
+void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing);
+
+const char *exit_reason_str(unsigned int exit_reason);
+
+void virt_pgd_alloc(struct kvm_vm *vm);
+
+/*
+ * VM Virtual Page Map
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vaddr - VM Virtual Address
+ * paddr - VM Physical Address
+ * memslot - Memory region slot for new virtual translation tables
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Within @vm, creates a virtual translation for the page starting
+ * at @vaddr to the page starting at @paddr.
+ */
+void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr);
+
+vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
+ uint32_t memslot);
+vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
+ vm_paddr_t paddr_min, uint32_t memslot);
+vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm);
+
+/*
+ * Create a VM with reasonable defaults
+ *
+ * Input Args:
+ * vcpuid - The id of the single VCPU to add to the VM.
+ * extra_mem_pages - The number of extra pages to add (this will
+ * decide how much extra space we will need to
+ * setup the page tables using memslot 0)
+ * guest_code - The vCPU's entry point
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Pointer to opaque structure that describes the created VM.
+ */
+struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
+ void *guest_code);
+
+/* Same as vm_create_default, but can be used for more than one vcpu */
+struct kvm_vm *vm_create_default_with_vcpus(uint32_t nr_vcpus, uint64_t extra_mem_pages,
+ uint32_t num_percpu_pages, void *guest_code,
+ uint32_t vcpuids[]);
+
+/* Like vm_create_default_with_vcpus, but accepts mode and slot0 memory as a parameter */
+struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
+ uint64_t slot0_mem_pages, uint64_t extra_mem_pages,
+ uint32_t num_percpu_pages, void *guest_code,
+ uint32_t vcpuids[]);
+
+/*
+ * Adds a vCPU with reasonable defaults (e.g. a stack)
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - The id of the VCPU to add to the VM.
+ * guest_code - The vCPU's entry point
+ */
+void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code);
+
+bool vm_is_unrestricted_guest(struct kvm_vm *vm);
+
+unsigned int vm_get_page_size(struct kvm_vm *vm);
+unsigned int vm_get_page_shift(struct kvm_vm *vm);
+unsigned long vm_compute_max_gfn(struct kvm_vm *vm);
+uint64_t vm_get_max_gfn(struct kvm_vm *vm);
+int vm_get_fd(struct kvm_vm *vm);
+
+unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size);
+unsigned int vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages);
+unsigned int vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages);
+static inline unsigned int
+vm_adjust_num_guest_pages(enum vm_guest_mode mode, unsigned int num_guest_pages)
+{
+ unsigned int n;
+ n = vm_num_guest_pages(mode, vm_num_host_pages(mode, num_guest_pages));
+#ifdef __s390x__
+ /* s390 requires 1M aligned guest sizes */
+ n = (n + 255) & ~255;
+#endif
+ return n;
+}
+
+struct kvm_userspace_memory_region *
+kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
+ uint64_t end);
+
+struct kvm_dirty_log *
+allocate_kvm_dirty_log(struct kvm_userspace_memory_region *region);
+
+int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd);
+
+#define sync_global_to_guest(vm, g) ({ \
+ typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \
+ memcpy(_p, &(g), sizeof(g)); \
+})
+
+#define sync_global_from_guest(vm, g) ({ \
+ typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \
+ memcpy(&(g), _p, sizeof(g)); \
+})
+
+void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid);
+
+int vm_get_stats_fd(struct kvm_vm *vm);
+int vcpu_get_stats_fd(struct kvm_vm *vm, uint32_t vcpuid);
+
+uint32_t guest_get_vcpuid(void);
+
+#endif /* SELFTEST_KVM_UTIL_BASE_H */
diff --git a/tools/testing/selftests/kvm/include/riscv/processor.h b/tools/testing/selftests/kvm/include/riscv/processor.h
new file mode 100644
index 000000000000..dc284c6bdbc3
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/riscv/processor.h
@@ -0,0 +1,135 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * RISC-V processor specific defines
+ *
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ */
+#ifndef SELFTEST_KVM_PROCESSOR_H
+#define SELFTEST_KVM_PROCESSOR_H
+
+#include "kvm_util.h"
+#include <linux/stringify.h>
+
+static inline uint64_t __kvm_reg_id(uint64_t type, uint64_t idx,
+ uint64_t size)
+{
+ return KVM_REG_RISCV | type | idx | size;
+}
+
+#if __riscv_xlen == 64
+#define KVM_REG_SIZE_ULONG KVM_REG_SIZE_U64
+#else
+#define KVM_REG_SIZE_ULONG KVM_REG_SIZE_U32
+#endif
+
+#define RISCV_CONFIG_REG(name) __kvm_reg_id(KVM_REG_RISCV_CONFIG, \
+ KVM_REG_RISCV_CONFIG_REG(name), \
+ KVM_REG_SIZE_ULONG)
+
+#define RISCV_CORE_REG(name) __kvm_reg_id(KVM_REG_RISCV_CORE, \
+ KVM_REG_RISCV_CORE_REG(name), \
+ KVM_REG_SIZE_ULONG)
+
+#define RISCV_CSR_REG(name) __kvm_reg_id(KVM_REG_RISCV_CSR, \
+ KVM_REG_RISCV_CSR_REG(name), \
+ KVM_REG_SIZE_ULONG)
+
+#define RISCV_TIMER_REG(name) __kvm_reg_id(KVM_REG_RISCV_TIMER, \
+ KVM_REG_RISCV_TIMER_REG(name), \
+ KVM_REG_SIZE_U64)
+
+static inline void get_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id,
+ unsigned long *addr)
+{
+ struct kvm_one_reg reg;
+
+ reg.id = id;
+ reg.addr = (unsigned long)addr;
+ vcpu_get_reg(vm, vcpuid, &reg);
+}
+
+static inline void set_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id,
+ unsigned long val)
+{
+ struct kvm_one_reg reg;
+
+ reg.id = id;
+ reg.addr = (unsigned long)&val;
+ vcpu_set_reg(vm, vcpuid, &reg);
+}
+
+/* L3 index Bit[47:39] */
+#define PGTBL_L3_INDEX_MASK 0x0000FF8000000000ULL
+#define PGTBL_L3_INDEX_SHIFT 39
+#define PGTBL_L3_BLOCK_SHIFT 39
+#define PGTBL_L3_BLOCK_SIZE 0x0000008000000000ULL
+#define PGTBL_L3_MAP_MASK (~(PGTBL_L3_BLOCK_SIZE - 1))
+/* L2 index Bit[38:30] */
+#define PGTBL_L2_INDEX_MASK 0x0000007FC0000000ULL
+#define PGTBL_L2_INDEX_SHIFT 30
+#define PGTBL_L2_BLOCK_SHIFT 30
+#define PGTBL_L2_BLOCK_SIZE 0x0000000040000000ULL
+#define PGTBL_L2_MAP_MASK (~(PGTBL_L2_BLOCK_SIZE - 1))
+/* L1 index Bit[29:21] */
+#define PGTBL_L1_INDEX_MASK 0x000000003FE00000ULL
+#define PGTBL_L1_INDEX_SHIFT 21
+#define PGTBL_L1_BLOCK_SHIFT 21
+#define PGTBL_L1_BLOCK_SIZE 0x0000000000200000ULL
+#define PGTBL_L1_MAP_MASK (~(PGTBL_L1_BLOCK_SIZE - 1))
+/* L0 index Bit[20:12] */
+#define PGTBL_L0_INDEX_MASK 0x00000000001FF000ULL
+#define PGTBL_L0_INDEX_SHIFT 12
+#define PGTBL_L0_BLOCK_SHIFT 12
+#define PGTBL_L0_BLOCK_SIZE 0x0000000000001000ULL
+#define PGTBL_L0_MAP_MASK (~(PGTBL_L0_BLOCK_SIZE - 1))
+
+#define PGTBL_PTE_ADDR_MASK 0x003FFFFFFFFFFC00ULL
+#define PGTBL_PTE_ADDR_SHIFT 10
+#define PGTBL_PTE_RSW_MASK 0x0000000000000300ULL
+#define PGTBL_PTE_RSW_SHIFT 8
+#define PGTBL_PTE_DIRTY_MASK 0x0000000000000080ULL
+#define PGTBL_PTE_DIRTY_SHIFT 7
+#define PGTBL_PTE_ACCESSED_MASK 0x0000000000000040ULL
+#define PGTBL_PTE_ACCESSED_SHIFT 6
+#define PGTBL_PTE_GLOBAL_MASK 0x0000000000000020ULL
+#define PGTBL_PTE_GLOBAL_SHIFT 5
+#define PGTBL_PTE_USER_MASK 0x0000000000000010ULL
+#define PGTBL_PTE_USER_SHIFT 4
+#define PGTBL_PTE_EXECUTE_MASK 0x0000000000000008ULL
+#define PGTBL_PTE_EXECUTE_SHIFT 3
+#define PGTBL_PTE_WRITE_MASK 0x0000000000000004ULL
+#define PGTBL_PTE_WRITE_SHIFT 2
+#define PGTBL_PTE_READ_MASK 0x0000000000000002ULL
+#define PGTBL_PTE_READ_SHIFT 1
+#define PGTBL_PTE_PERM_MASK (PGTBL_PTE_EXECUTE_MASK | \
+ PGTBL_PTE_WRITE_MASK | \
+ PGTBL_PTE_READ_MASK)
+#define PGTBL_PTE_VALID_MASK 0x0000000000000001ULL
+#define PGTBL_PTE_VALID_SHIFT 0
+
+#define PGTBL_PAGE_SIZE PGTBL_L0_BLOCK_SIZE
+#define PGTBL_PAGE_SIZE_SHIFT PGTBL_L0_BLOCK_SHIFT
+
+#define SATP_PPN _AC(0x00000FFFFFFFFFFF, UL)
+#define SATP_MODE_39 _AC(0x8000000000000000, UL)
+#define SATP_MODE_48 _AC(0x9000000000000000, UL)
+#define SATP_ASID_BITS 16
+#define SATP_ASID_SHIFT 44
+#define SATP_ASID_MASK _AC(0xFFFF, UL)
+
+#define SBI_EXT_EXPERIMENTAL_START 0x08000000
+#define SBI_EXT_EXPERIMENTAL_END 0x08FFFFFF
+
+#define KVM_RISCV_SELFTESTS_SBI_EXT SBI_EXT_EXPERIMENTAL_END
+
+struct sbiret {
+ long error;
+ long value;
+};
+
+struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
+ unsigned long arg1, unsigned long arg2,
+ unsigned long arg3, unsigned long arg4,
+ unsigned long arg5);
+
+#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/ucall_common.h b/tools/testing/selftests/kvm/include/ucall_common.h
new file mode 100644
index 000000000000..9eecc9d40b79
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/ucall_common.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * tools/testing/selftests/kvm/include/kvm_util.h
+ *
+ * Copyright (C) 2018, Google LLC.
+ */
+#ifndef SELFTEST_KVM_UCALL_COMMON_H
+#define SELFTEST_KVM_UCALL_COMMON_H
+
+/* Common ucalls */
+enum {
+ UCALL_NONE,
+ UCALL_SYNC,
+ UCALL_ABORT,
+ UCALL_DONE,
+ UCALL_UNHANDLED,
+};
+
+#define UCALL_MAX_ARGS 6
+
+struct ucall {
+ uint64_t cmd;
+ uint64_t args[UCALL_MAX_ARGS];
+};
+
+void ucall_init(struct kvm_vm *vm, void *arg);
+void ucall_uninit(struct kvm_vm *vm);
+void ucall(uint64_t cmd, int nargs, ...);
+uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc);
+
+#define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4) \
+ ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4)
+#define GUEST_SYNC(stage) ucall(UCALL_SYNC, 2, "hello", stage)
+#define GUEST_DONE() ucall(UCALL_DONE, 0)
+#define __GUEST_ASSERT(_condition, _condstr, _nargs, _args...) do { \
+ if (!(_condition)) \
+ ucall(UCALL_ABORT, 2 + _nargs, \
+ "Failed guest assert: " \
+ _condstr, __LINE__, _args); \
+} while (0)
+
+#define GUEST_ASSERT(_condition) \
+ __GUEST_ASSERT(_condition, #_condition, 0, 0)
+
+#define GUEST_ASSERT_1(_condition, arg1) \
+ __GUEST_ASSERT(_condition, #_condition, 1, (arg1))
+
+#define GUEST_ASSERT_2(_condition, arg1, arg2) \
+ __GUEST_ASSERT(_condition, #_condition, 2, (arg1), (arg2))
+
+#define GUEST_ASSERT_3(_condition, arg1, arg2, arg3) \
+ __GUEST_ASSERT(_condition, #_condition, 3, (arg1), (arg2), (arg3))
+
+#define GUEST_ASSERT_4(_condition, arg1, arg2, arg3, arg4) \
+ __GUEST_ASSERT(_condition, #_condition, 4, (arg1), (arg2), (arg3), (arg4))
+
+#define GUEST_ASSERT_EQ(a, b) __GUEST_ASSERT((a) == (b), #a " == " #b, 2, a, b)
+
+#endif /* SELFTEST_KVM_UCALL_COMMON_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 05e65ca1c30c..8a470da7b71a 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -10,8 +10,10 @@
#include <assert.h>
#include <stdint.h>
+#include <syscall.h>
#include <asm/msr-index.h>
+#include <asm/prctl.h>
#include "../kvm_util.h"
@@ -92,6 +94,21 @@ struct desc_ptr {
uint64_t address;
} __attribute__((packed));
+struct kvm_x86_state {
+ struct kvm_xsave *xsave;
+ struct kvm_vcpu_events events;
+ struct kvm_mp_state mp_state;
+ struct kvm_regs regs;
+ struct kvm_xcrs xcrs;
+ struct kvm_sregs sregs;
+ struct kvm_debugregs debugregs;
+ union {
+ struct kvm_nested_state nested;
+ char nested_[16384];
+ };
+ struct kvm_msrs msrs;
+};
+
static inline uint64_t get_desc64_base(const struct desc64 *desc)
{
return ((uint64_t)desc->base3 << 32) |
@@ -347,17 +364,37 @@ static inline unsigned long get_xmm(int n)
}
bool is_intel_cpu(void);
+bool is_amd_cpu(void);
+
+static inline unsigned int x86_family(unsigned int eax)
+{
+ unsigned int x86;
+
+ x86 = (eax >> 8) & 0xf;
+
+ if (x86 == 0xf)
+ x86 += (eax >> 20) & 0xff;
+
+ return x86;
+}
+
+static inline unsigned int x86_model(unsigned int eax)
+{
+ return ((eax >> 12) & 0xf0) | ((eax >> 4) & 0x0f);
+}
-struct kvm_x86_state;
struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid);
void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_x86_state *state);
+void kvm_x86_state_cleanup(struct kvm_x86_state *state);
struct kvm_msr_list *kvm_get_msr_index_list(void);
uint64_t kvm_get_feature_msr(uint64_t msr_index);
struct kvm_cpuid2 *kvm_get_supported_cpuid(void);
struct kvm_cpuid2 *vcpu_get_cpuid(struct kvm_vm *vm, uint32_t vcpuid);
+int __vcpu_set_cpuid(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_cpuid2 *cpuid);
void vcpu_set_cpuid(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_cpuid2 *cpuid);
@@ -402,6 +439,11 @@ void vm_set_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr,
uint64_t pte);
/*
+ * get_cpuid() - find matching CPUID entry and return pointer to it.
+ */
+struct kvm_cpuid_entry2 *get_cpuid(struct kvm_cpuid2 *cpuid, uint32_t function,
+ uint32_t index);
+/*
* set_cpuid() - overwrites a matching cpuid entry with the provided value.
* matches based on ent->function && ent->index. returns true
* if a match was found and successfully overwritten.
@@ -416,6 +458,7 @@ uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void);
void vcpu_set_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid);
struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid);
+void vm_xsave_req_perm(int bit);
enum x86_page_size {
X86_PAGE_SIZE_4K = 0,
@@ -443,4 +486,11 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
/* VMX_EPT_VPID_CAP bits */
#define VMX_EPT_VPID_CAP_AD_BITS (1ULL << 21)
+#define XSTATE_XTILE_CFG_BIT 17
+#define XSTATE_XTILE_DATA_BIT 18
+
+#define XSTATE_XTILE_CFG_MASK (1ULL << XSTATE_XTILE_CFG_BIT)
+#define XSTATE_XTILE_DATA_MASK (1ULL << XSTATE_XTILE_DATA_BIT)
+#define XFEATURE_XTILE_MASK (XSTATE_XTILE_CFG_MASK | \
+ XSTATE_XTILE_DATA_MASK)
#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
index f968dfd4ee88..aed9dc3ca1e9 100644
--- a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
+++ b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
@@ -12,6 +12,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <sys/resource.h>
#include "test_util.h"
@@ -40,11 +41,40 @@ int main(int argc, char *argv[])
{
int kvm_max_vcpu_id = kvm_check_cap(KVM_CAP_MAX_VCPU_ID);
int kvm_max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
+ /*
+ * Number of file descriptors reqired, KVM_CAP_MAX_VCPUS for vCPU fds +
+ * an arbitrary number for everything else.
+ */
+ int nr_fds_wanted = kvm_max_vcpus + 100;
+ struct rlimit rl;
pr_info("KVM_CAP_MAX_VCPU_ID: %d\n", kvm_max_vcpu_id);
pr_info("KVM_CAP_MAX_VCPUS: %d\n", kvm_max_vcpus);
/*
+ * Check that we're allowed to open nr_fds_wanted file descriptors and
+ * try raising the limits if needed.
+ */
+ TEST_ASSERT(!getrlimit(RLIMIT_NOFILE, &rl), "getrlimit() failed!");
+
+ if (rl.rlim_cur < nr_fds_wanted) {
+ rl.rlim_cur = nr_fds_wanted;
+ if (rl.rlim_max < nr_fds_wanted) {
+ int old_rlim_max = rl.rlim_max;
+ rl.rlim_max = nr_fds_wanted;
+
+ int r = setrlimit(RLIMIT_NOFILE, &rl);
+ if (r < 0) {
+ printf("RLIMIT_NOFILE hard limit is too low (%d, wanted %d)\n",
+ old_rlim_max, nr_fds_wanted);
+ exit(KSFT_SKIP);
+ }
+ } else {
+ TEST_ASSERT(!setrlimit(RLIMIT_NOFILE, &rl), "setrlimit() failed!");
+ }
+ }
+
+ /*
* Upstream KVM prior to 4.8 does not support KVM_CAP_MAX_VCPU_ID.
* Userspace is supposed to use KVM_CAP_MAX_VCPUS as the maximum ID
* in this case.
diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c
index 3836322add00..ba1fdc3dcf4a 100644
--- a/tools/testing/selftests/kvm/kvm_page_table_test.c
+++ b/tools/testing/selftests/kvm/kvm_page_table_test.c
@@ -280,7 +280,7 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg)
#ifdef __s390x__
alignment = max(0x100000, alignment);
#endif
- guest_test_phys_mem = align_down(guest_test_virt_mem, alignment);
+ guest_test_phys_mem = align_down(guest_test_phys_mem, alignment);
/* Set up the shared data structure test_args */
test_args.vm = vm;
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic.c b/tools/testing/selftests/kvm/lib/aarch64/gic.c
index fff4fc27504d..55668631d546 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/gic.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/gic.c
@@ -93,3 +93,69 @@ void gic_set_eoi(unsigned int intid)
GUEST_ASSERT(gic_common_ops);
gic_common_ops->gic_write_eoir(intid);
}
+
+void gic_set_dir(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_write_dir(intid);
+}
+
+void gic_set_eoi_split(bool split)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_set_eoi_split(split);
+}
+
+void gic_set_priority_mask(uint64_t pmr)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_set_priority_mask(pmr);
+}
+
+void gic_set_priority(unsigned int intid, unsigned int prio)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_set_priority(intid, prio);
+}
+
+void gic_irq_set_active(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_set_active(intid);
+}
+
+void gic_irq_clear_active(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_clear_active(intid);
+}
+
+bool gic_irq_get_active(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ return gic_common_ops->gic_irq_get_active(intid);
+}
+
+void gic_irq_set_pending(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_set_pending(intid);
+}
+
+void gic_irq_clear_pending(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_clear_pending(intid);
+}
+
+bool gic_irq_get_pending(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ return gic_common_ops->gic_irq_get_pending(intid);
+}
+
+void gic_irq_set_config(unsigned int intid, bool is_edge)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_set_config(intid, is_edge);
+}
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_private.h b/tools/testing/selftests/kvm/lib/aarch64/gic_private.h
index d81d739433dc..75d07313c893 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/gic_private.h
+++ b/tools/testing/selftests/kvm/lib/aarch64/gic_private.h
@@ -14,6 +14,17 @@ struct gic_common_ops {
void (*gic_irq_disable)(unsigned int intid);
uint64_t (*gic_read_iar)(void);
void (*gic_write_eoir)(uint32_t irq);
+ void (*gic_write_dir)(uint32_t irq);
+ void (*gic_set_eoi_split)(bool split);
+ void (*gic_set_priority_mask)(uint64_t mask);
+ void (*gic_set_priority)(uint32_t intid, uint32_t prio);
+ void (*gic_irq_set_active)(uint32_t intid);
+ void (*gic_irq_clear_active)(uint32_t intid);
+ bool (*gic_irq_get_active)(uint32_t intid);
+ void (*gic_irq_set_pending)(uint32_t intid);
+ void (*gic_irq_clear_pending)(uint32_t intid);
+ bool (*gic_irq_get_pending)(uint32_t intid);
+ void (*gic_irq_set_config)(uint32_t intid, bool is_edge);
};
extern const struct gic_common_ops gicv3_ops;
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c b/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c
index 2dbf3339b62e..00f613c0583c 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c
@@ -19,7 +19,8 @@ struct gicv3_data {
unsigned int nr_spis;
};
-#define sgi_base_from_redist(redist_base) (redist_base + SZ_64K)
+#define sgi_base_from_redist(redist_base) (redist_base + SZ_64K)
+#define DIST_BIT (1U << 31)
enum gicv3_intid_range {
SGI_RANGE,
@@ -50,6 +51,14 @@ static void gicv3_gicr_wait_for_rwp(void *redist_base)
}
}
+static void gicv3_wait_for_rwp(uint32_t cpu_or_dist)
+{
+ if (cpu_or_dist & DIST_BIT)
+ gicv3_gicd_wait_for_rwp();
+ else
+ gicv3_gicr_wait_for_rwp(gicv3_data.redist_base[cpu_or_dist]);
+}
+
static enum gicv3_intid_range get_intid_range(unsigned int intid)
{
switch (intid) {
@@ -81,39 +90,175 @@ static void gicv3_write_eoir(uint32_t irq)
isb();
}
-static void
-gicv3_config_irq(unsigned int intid, unsigned int offset)
+static void gicv3_write_dir(uint32_t irq)
+{
+ write_sysreg_s(irq, SYS_ICC_DIR_EL1);
+ isb();
+}
+
+static void gicv3_set_priority_mask(uint64_t mask)
+{
+ write_sysreg_s(mask, SYS_ICC_PMR_EL1);
+}
+
+static void gicv3_set_eoi_split(bool split)
+{
+ uint32_t val;
+
+ /* All other fields are read-only, so no need to read CTLR first. In
+ * fact, the kernel does the same.
+ */
+ val = split ? (1U << 1) : 0;
+ write_sysreg_s(val, SYS_ICC_CTLR_EL1);
+ isb();
+}
+
+uint32_t gicv3_reg_readl(uint32_t cpu_or_dist, uint64_t offset)
+{
+ void *base = cpu_or_dist & DIST_BIT ? gicv3_data.dist_base
+ : sgi_base_from_redist(gicv3_data.redist_base[cpu_or_dist]);
+ return readl(base + offset);
+}
+
+void gicv3_reg_writel(uint32_t cpu_or_dist, uint64_t offset, uint32_t reg_val)
+{
+ void *base = cpu_or_dist & DIST_BIT ? gicv3_data.dist_base
+ : sgi_base_from_redist(gicv3_data.redist_base[cpu_or_dist]);
+ writel(reg_val, base + offset);
+}
+
+uint32_t gicv3_getl_fields(uint32_t cpu_or_dist, uint64_t offset, uint32_t mask)
+{
+ return gicv3_reg_readl(cpu_or_dist, offset) & mask;
+}
+
+void gicv3_setl_fields(uint32_t cpu_or_dist, uint64_t offset,
+ uint32_t mask, uint32_t reg_val)
+{
+ uint32_t tmp = gicv3_reg_readl(cpu_or_dist, offset) & ~mask;
+
+ tmp |= (reg_val & mask);
+ gicv3_reg_writel(cpu_or_dist, offset, tmp);
+}
+
+/*
+ * We use a single offset for the distributor and redistributor maps as they
+ * have the same value in both. The only exceptions are registers that only
+ * exist in one and not the other, like GICR_WAKER that doesn't exist in the
+ * distributor map. Such registers are conveniently marked as reserved in the
+ * map that doesn't implement it; like GICR_WAKER's offset of 0x0014 being
+ * marked as "Reserved" in the Distributor map.
+ */
+static void gicv3_access_reg(uint32_t intid, uint64_t offset,
+ uint32_t reg_bits, uint32_t bits_per_field,
+ bool write, uint32_t *val)
{
uint32_t cpu = guest_get_vcpuid();
- uint32_t mask = 1 << (intid % 32);
enum gicv3_intid_range intid_range = get_intid_range(intid);
- void *reg;
-
- /* We care about 'cpu' only for SGIs or PPIs */
- if (intid_range == SGI_RANGE || intid_range == PPI_RANGE) {
- GUEST_ASSERT(cpu < gicv3_data.nr_cpus);
-
- reg = sgi_base_from_redist(gicv3_data.redist_base[cpu]) +
- offset;
- writel(mask, reg);
- gicv3_gicr_wait_for_rwp(gicv3_data.redist_base[cpu]);
- } else if (intid_range == SPI_RANGE) {
- reg = gicv3_data.dist_base + offset + (intid / 32) * 4;
- writel(mask, reg);
- gicv3_gicd_wait_for_rwp();
- } else {
- GUEST_ASSERT(0);
- }
+ uint32_t fields_per_reg, index, mask, shift;
+ uint32_t cpu_or_dist;
+
+ GUEST_ASSERT(bits_per_field <= reg_bits);
+ GUEST_ASSERT(*val < (1U << bits_per_field));
+ /* Some registers like IROUTER are 64 bit long. Those are currently not
+ * supported by readl nor writel, so just asserting here until then.
+ */
+ GUEST_ASSERT(reg_bits == 32);
+
+ fields_per_reg = reg_bits / bits_per_field;
+ index = intid % fields_per_reg;
+ shift = index * bits_per_field;
+ mask = ((1U << bits_per_field) - 1) << shift;
+
+ /* Set offset to the actual register holding intid's config. */
+ offset += (intid / fields_per_reg) * (reg_bits / 8);
+
+ cpu_or_dist = (intid_range == SPI_RANGE) ? DIST_BIT : cpu;
+
+ if (write)
+ gicv3_setl_fields(cpu_or_dist, offset, mask, *val << shift);
+ *val = gicv3_getl_fields(cpu_or_dist, offset, mask) >> shift;
+}
+
+static void gicv3_write_reg(uint32_t intid, uint64_t offset,
+ uint32_t reg_bits, uint32_t bits_per_field, uint32_t val)
+{
+ gicv3_access_reg(intid, offset, reg_bits,
+ bits_per_field, true, &val);
+}
+
+static uint32_t gicv3_read_reg(uint32_t intid, uint64_t offset,
+ uint32_t reg_bits, uint32_t bits_per_field)
+{
+ uint32_t val;
+
+ gicv3_access_reg(intid, offset, reg_bits,
+ bits_per_field, false, &val);
+ return val;
+}
+
+static void gicv3_set_priority(uint32_t intid, uint32_t prio)
+{
+ gicv3_write_reg(intid, GICD_IPRIORITYR, 32, 8, prio);
+}
+
+/* Sets the intid to be level-sensitive or edge-triggered. */
+static void gicv3_irq_set_config(uint32_t intid, bool is_edge)
+{
+ uint32_t val;
+
+ /* N/A for private interrupts. */
+ GUEST_ASSERT(get_intid_range(intid) == SPI_RANGE);
+ val = is_edge ? 2 : 0;
+ gicv3_write_reg(intid, GICD_ICFGR, 32, 2, val);
+}
+
+static void gicv3_irq_enable(uint32_t intid)
+{
+ bool is_spi = get_intid_range(intid) == SPI_RANGE;
+ uint32_t cpu = guest_get_vcpuid();
+
+ gicv3_write_reg(intid, GICD_ISENABLER, 32, 1, 1);
+ gicv3_wait_for_rwp(is_spi ? DIST_BIT : cpu);
+}
+
+static void gicv3_irq_disable(uint32_t intid)
+{
+ bool is_spi = get_intid_range(intid) == SPI_RANGE;
+ uint32_t cpu = guest_get_vcpuid();
+
+ gicv3_write_reg(intid, GICD_ICENABLER, 32, 1, 1);
+ gicv3_wait_for_rwp(is_spi ? DIST_BIT : cpu);
+}
+
+static void gicv3_irq_set_active(uint32_t intid)
+{
+ gicv3_write_reg(intid, GICD_ISACTIVER, 32, 1, 1);
+}
+
+static void gicv3_irq_clear_active(uint32_t intid)
+{
+ gicv3_write_reg(intid, GICD_ICACTIVER, 32, 1, 1);
+}
+
+static bool gicv3_irq_get_active(uint32_t intid)
+{
+ return gicv3_read_reg(intid, GICD_ISACTIVER, 32, 1);
+}
+
+static void gicv3_irq_set_pending(uint32_t intid)
+{
+ gicv3_write_reg(intid, GICD_ISPENDR, 32, 1, 1);
}
-static void gicv3_irq_enable(unsigned int intid)
+static void gicv3_irq_clear_pending(uint32_t intid)
{
- gicv3_config_irq(intid, GICD_ISENABLER);
+ gicv3_write_reg(intid, GICD_ICPENDR, 32, 1, 1);
}
-static void gicv3_irq_disable(unsigned int intid)
+static bool gicv3_irq_get_pending(uint32_t intid)
{
- gicv3_config_irq(intid, GICD_ICENABLER);
+ return gicv3_read_reg(intid, GICD_ISPENDR, 32, 1);
}
static void gicv3_enable_redist(void *redist_base)
@@ -237,4 +382,15 @@ const struct gic_common_ops gicv3_ops = {
.gic_irq_disable = gicv3_irq_disable,
.gic_read_iar = gicv3_read_iar,
.gic_write_eoir = gicv3_write_eoir,
+ .gic_write_dir = gicv3_write_dir,
+ .gic_set_priority_mask = gicv3_set_priority_mask,
+ .gic_set_eoi_split = gicv3_set_eoi_split,
+ .gic_set_priority = gicv3_set_priority,
+ .gic_irq_set_active = gicv3_irq_set_active,
+ .gic_irq_clear_active = gicv3_irq_clear_active,
+ .gic_irq_get_active = gicv3_irq_get_active,
+ .gic_irq_set_pending = gicv3_irq_set_pending,
+ .gic_irq_clear_pending = gicv3_irq_clear_pending,
+ .gic_irq_get_pending = gicv3_irq_get_pending,
+ .gic_irq_set_config = gicv3_irq_set_config,
};
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
index b4eeeafd2a70..9343d82519b4 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -8,6 +8,7 @@
#include <linux/compiler.h>
#include <assert.h>
+#include "guest_modes.h"
#include "kvm_util.h"
#include "../kvm_util_internal.h"
#include "processor.h"
@@ -237,6 +238,7 @@ void aarch64_vcpu_setup(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_vcpu_init
get_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), &sctlr_el1);
get_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_TCR_EL1), &tcr_el1);
+ /* Configure base granule size */
switch (vm->mode) {
case VM_MODE_P52V48_4K:
TEST_FAIL("AArch64 does not support 4K sized pages "
@@ -245,25 +247,47 @@ void aarch64_vcpu_setup(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_vcpu_init
TEST_FAIL("AArch64 does not support 4K sized pages "
"with ANY-bit physical address ranges");
case VM_MODE_P52V48_64K:
+ case VM_MODE_P48V48_64K:
+ case VM_MODE_P40V48_64K:
+ case VM_MODE_P36V48_64K:
tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
- tcr_el1 |= 6ul << 32; /* IPS = 52 bits */
+ break;
+ case VM_MODE_P48V48_16K:
+ case VM_MODE_P40V48_16K:
+ case VM_MODE_P36V48_16K:
+ case VM_MODE_P36V47_16K:
+ tcr_el1 |= 2ul << 14; /* TG0 = 16KB */
break;
case VM_MODE_P48V48_4K:
+ case VM_MODE_P40V48_4K:
+ case VM_MODE_P36V48_4K:
tcr_el1 |= 0ul << 14; /* TG0 = 4KB */
- tcr_el1 |= 5ul << 32; /* IPS = 48 bits */
break;
+ default:
+ TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
+ }
+
+ /* Configure output size */
+ switch (vm->mode) {
+ case VM_MODE_P52V48_64K:
+ tcr_el1 |= 6ul << 32; /* IPS = 52 bits */
+ break;
+ case VM_MODE_P48V48_4K:
+ case VM_MODE_P48V48_16K:
case VM_MODE_P48V48_64K:
- tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
tcr_el1 |= 5ul << 32; /* IPS = 48 bits */
break;
case VM_MODE_P40V48_4K:
- tcr_el1 |= 0ul << 14; /* TG0 = 4KB */
- tcr_el1 |= 2ul << 32; /* IPS = 40 bits */
- break;
+ case VM_MODE_P40V48_16K:
case VM_MODE_P40V48_64K:
- tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
tcr_el1 |= 2ul << 32; /* IPS = 40 bits */
break;
+ case VM_MODE_P36V48_4K:
+ case VM_MODE_P36V48_16K:
+ case VM_MODE_P36V48_64K:
+ case VM_MODE_P36V47_16K:
+ tcr_el1 |= 1ul << 32; /* IPS = 36 bits */
+ break;
default:
TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
}
@@ -432,3 +456,47 @@ uint32_t guest_get_vcpuid(void)
{
return read_sysreg(tpidr_el1);
}
+
+void aarch64_get_supported_page_sizes(uint32_t ipa,
+ bool *ps4k, bool *ps16k, bool *ps64k)
+{
+ struct kvm_vcpu_init preferred_init;
+ int kvm_fd, vm_fd, vcpu_fd, err;
+ uint64_t val;
+ struct kvm_one_reg reg = {
+ .id = KVM_ARM64_SYS_REG(SYS_ID_AA64MMFR0_EL1),
+ .addr = (uint64_t)&val,
+ };
+
+ kvm_fd = open_kvm_dev_path_or_exit();
+ vm_fd = ioctl(kvm_fd, KVM_CREATE_VM, ipa);
+ TEST_ASSERT(vm_fd >= 0, "Can't create VM");
+
+ vcpu_fd = ioctl(vm_fd, KVM_CREATE_VCPU, 0);
+ TEST_ASSERT(vcpu_fd >= 0, "Can't create vcpu");
+
+ err = ioctl(vm_fd, KVM_ARM_PREFERRED_TARGET, &preferred_init);
+ TEST_ASSERT(err == 0, "Can't get target");
+ err = ioctl(vcpu_fd, KVM_ARM_VCPU_INIT, &preferred_init);
+ TEST_ASSERT(err == 0, "Can't get init vcpu");
+
+ err = ioctl(vcpu_fd, KVM_GET_ONE_REG, &reg);
+ TEST_ASSERT(err == 0, "Can't get MMFR0");
+
+ *ps4k = ((val >> 28) & 0xf) != 0xf;
+ *ps64k = ((val >> 24) & 0xf) == 0;
+ *ps16k = ((val >> 20) & 0xf) != 0;
+
+ close(vcpu_fd);
+ close(vm_fd);
+ close(kvm_fd);
+}
+
+/*
+ * arm64 doesn't have a true default mode, so start by computing the
+ * available IPA space and page sizes early.
+ */
+void __attribute__((constructor)) init_guest_modes(void)
+{
+ guest_modes_append_default();
+}
diff --git a/tools/testing/selftests/kvm/lib/aarch64/vgic.c b/tools/testing/selftests/kvm/lib/aarch64/vgic.c
index b9b271ff520d..b3a0fca0d780 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/vgic.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/vgic.c
@@ -5,11 +5,14 @@
#include <linux/kvm.h>
#include <linux/sizes.h>
+#include <asm/kvm_para.h>
#include <asm/kvm.h>
#include "kvm_util.h"
#include "../kvm_util_internal.h"
#include "vgic.h"
+#include "gic.h"
+#include "gic_v3.h"
/*
* vGIC-v3 default host setup
@@ -28,7 +31,7 @@
* redistributor regions of the guest. Since it depends on the number of
* vCPUs for the VM, it must be called after all the vCPUs have been created.
*/
-int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus,
+int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs,
uint64_t gicd_base_gpa, uint64_t gicr_base_gpa)
{
int gic_fd;
@@ -50,6 +53,13 @@ int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus,
/* Distributor setup */
gic_fd = kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3, false);
+
+ kvm_device_access(gic_fd, KVM_DEV_ARM_VGIC_GRP_NR_IRQS,
+ 0, &nr_irqs, true);
+
+ kvm_device_access(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL, true);
+
kvm_device_access(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
KVM_VGIC_V3_ADDR_TYPE_DIST, &gicd_base_gpa, true);
nr_gic_pages = vm_calc_num_guest_pages(vm->mode, KVM_VGIC_V3_DIST_SIZE);
@@ -68,3 +78,94 @@ int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus,
return gic_fd;
}
+
+/* should only work for level sensitive interrupts */
+int _kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level)
+{
+ uint64_t attr = 32 * (intid / 32);
+ uint64_t index = intid % 32;
+ uint64_t val;
+ int ret;
+
+ ret = _kvm_device_access(gic_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO,
+ attr, &val, false);
+ if (ret != 0)
+ return ret;
+
+ val |= 1U << index;
+ ret = _kvm_device_access(gic_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO,
+ attr, &val, true);
+ return ret;
+}
+
+void kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level)
+{
+ int ret = _kvm_irq_set_level_info(gic_fd, intid, level);
+
+ TEST_ASSERT(ret == 0, "KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO failed, "
+ "rc: %i errno: %i", ret, errno);
+}
+
+int _kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level)
+{
+ uint32_t irq = intid & KVM_ARM_IRQ_NUM_MASK;
+
+ TEST_ASSERT(!INTID_IS_SGI(intid), "KVM_IRQ_LINE's interface itself "
+ "doesn't allow injecting SGIs. There's no mask for it.");
+
+ if (INTID_IS_PPI(intid))
+ irq |= KVM_ARM_IRQ_TYPE_PPI << KVM_ARM_IRQ_TYPE_SHIFT;
+ else
+ irq |= KVM_ARM_IRQ_TYPE_SPI << KVM_ARM_IRQ_TYPE_SHIFT;
+
+ return _kvm_irq_line(vm, irq, level);
+}
+
+void kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level)
+{
+ int ret = _kvm_arm_irq_line(vm, intid, level);
+
+ TEST_ASSERT(ret == 0, "KVM_IRQ_LINE failed, rc: %i errno: %i",
+ ret, errno);
+}
+
+static void vgic_poke_irq(int gic_fd, uint32_t intid,
+ uint32_t vcpu, uint64_t reg_off)
+{
+ uint64_t reg = intid / 32;
+ uint64_t index = intid % 32;
+ uint64_t attr = reg_off + reg * 4;
+ uint64_t val;
+ bool intid_is_private = INTID_IS_SGI(intid) || INTID_IS_PPI(intid);
+
+ /* Check that the addr part of the attr is within 32 bits. */
+ assert(attr <= KVM_DEV_ARM_VGIC_OFFSET_MASK);
+
+ uint32_t group = intid_is_private ? KVM_DEV_ARM_VGIC_GRP_REDIST_REGS
+ : KVM_DEV_ARM_VGIC_GRP_DIST_REGS;
+
+ if (intid_is_private) {
+ /* TODO: only vcpu 0 implemented for now. */
+ assert(vcpu == 0);
+ attr += SZ_64K;
+ }
+
+ /* All calls will succeed, even with invalid intid's, as long as the
+ * addr part of the attr is within 32 bits (checked above). An invalid
+ * intid will just make the read/writes point to above the intended
+ * register space (i.e., ICPENDR after ISPENDR).
+ */
+ kvm_device_access(gic_fd, group, attr, &val, false);
+ val |= 1ULL << index;
+ kvm_device_access(gic_fd, group, attr, &val, true);
+}
+
+void kvm_irq_write_ispendr(int gic_fd, uint32_t intid, uint32_t vcpu)
+{
+ vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISPENDR);
+}
+
+void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, uint32_t vcpu)
+{
+ vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISACTIVER);
+}
diff --git a/tools/testing/selftests/kvm/lib/guest_modes.c b/tools/testing/selftests/kvm/lib/guest_modes.c
index c330f414ef96..8784013b747c 100644
--- a/tools/testing/selftests/kvm/lib/guest_modes.c
+++ b/tools/testing/selftests/kvm/lib/guest_modes.c
@@ -4,22 +4,59 @@
*/
#include "guest_modes.h"
+#ifdef __aarch64__
+#include "processor.h"
+enum vm_guest_mode vm_mode_default;
+#endif
+
struct guest_mode guest_modes[NUM_VM_MODES];
void guest_modes_append_default(void)
{
+#ifndef __aarch64__
guest_mode_append(VM_MODE_DEFAULT, true, true);
-
-#ifdef __aarch64__
- guest_mode_append(VM_MODE_P40V48_64K, true, true);
+#else
{
unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE);
+ bool ps4k, ps16k, ps64k;
+ int i;
+
+ aarch64_get_supported_page_sizes(limit, &ps4k, &ps16k, &ps64k);
+
+ vm_mode_default = NUM_VM_MODES;
+
if (limit >= 52)
- guest_mode_append(VM_MODE_P52V48_64K, true, true);
+ guest_mode_append(VM_MODE_P52V48_64K, ps64k, ps64k);
if (limit >= 48) {
- guest_mode_append(VM_MODE_P48V48_4K, true, true);
- guest_mode_append(VM_MODE_P48V48_64K, true, true);
+ guest_mode_append(VM_MODE_P48V48_4K, ps4k, ps4k);
+ guest_mode_append(VM_MODE_P48V48_16K, ps16k, ps16k);
+ guest_mode_append(VM_MODE_P48V48_64K, ps64k, ps64k);
+ }
+ if (limit >= 40) {
+ guest_mode_append(VM_MODE_P40V48_4K, ps4k, ps4k);
+ guest_mode_append(VM_MODE_P40V48_16K, ps16k, ps16k);
+ guest_mode_append(VM_MODE_P40V48_64K, ps64k, ps64k);
+ if (ps4k)
+ vm_mode_default = VM_MODE_P40V48_4K;
}
+ if (limit >= 36) {
+ guest_mode_append(VM_MODE_P36V48_4K, ps4k, ps4k);
+ guest_mode_append(VM_MODE_P36V48_16K, ps16k, ps16k);
+ guest_mode_append(VM_MODE_P36V48_64K, ps64k, ps64k);
+ guest_mode_append(VM_MODE_P36V47_16K, ps16k, ps16k);
+ }
+
+ /*
+ * Pick the first supported IPA size if the default
+ * isn't available.
+ */
+ for (i = 0; vm_mode_default == NUM_VM_MODES && i < NUM_VM_MODES; i++) {
+ if (guest_modes[i].supported && guest_modes[i].enabled)
+ vm_mode_default = i;
+ }
+
+ TEST_ASSERT(vm_mode_default != NUM_VM_MODES,
+ "No supported mode!");
}
#endif
#ifdef __s390x__
@@ -38,6 +75,16 @@ void guest_modes_append_default(void)
guest_mode_append(VM_MODE_P47V64_4K, true, true);
}
#endif
+#ifdef __riscv
+ {
+ unsigned int sz = kvm_check_cap(KVM_CAP_VM_GPA_BITS);
+
+ if (sz >= 52)
+ guest_mode_append(VM_MODE_P52V48_4K, true, true);
+ if (sz >= 48)
+ guest_mode_append(VM_MODE_P48V48_4K, true, true);
+ }
+#endif
}
void for_each_guest_mode(void (*func)(enum vm_guest_mode, void *), void *arg)
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 8f2e0bb1ef96..d8cf851ab119 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -85,6 +85,33 @@ int kvm_check_cap(long cap)
return ret;
}
+/* VM Check Capability
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * cap - Capability
+ *
+ * Output Args: None
+ *
+ * Return:
+ * On success, the Value corresponding to the capability (KVM_CAP_*)
+ * specified by the value of cap. On failure a TEST_ASSERT failure
+ * is produced.
+ *
+ * Looks up and returns the value corresponding to the capability
+ * (KVM_CAP_*) given by cap.
+ */
+int vm_check_cap(struct kvm_vm *vm, long cap)
+{
+ int ret;
+
+ ret = ioctl(vm->fd, KVM_CHECK_EXTENSION, cap);
+ TEST_ASSERT(ret >= 0, "KVM_CHECK_EXTENSION VM IOCTL failed,\n"
+ " rc: %i errno: %i", ret, errno);
+
+ return ret;
+}
+
/* VM Enable Capability
*
* Input Args:
@@ -166,12 +193,18 @@ const char *vm_guest_mode_string(uint32_t i)
[VM_MODE_P52V48_4K] = "PA-bits:52, VA-bits:48, 4K pages",
[VM_MODE_P52V48_64K] = "PA-bits:52, VA-bits:48, 64K pages",
[VM_MODE_P48V48_4K] = "PA-bits:48, VA-bits:48, 4K pages",
+ [VM_MODE_P48V48_16K] = "PA-bits:48, VA-bits:48, 16K pages",
[VM_MODE_P48V48_64K] = "PA-bits:48, VA-bits:48, 64K pages",
[VM_MODE_P40V48_4K] = "PA-bits:40, VA-bits:48, 4K pages",
+ [VM_MODE_P40V48_16K] = "PA-bits:40, VA-bits:48, 16K pages",
[VM_MODE_P40V48_64K] = "PA-bits:40, VA-bits:48, 64K pages",
[VM_MODE_PXXV48_4K] = "PA-bits:ANY, VA-bits:48, 4K pages",
[VM_MODE_P47V64_4K] = "PA-bits:47, VA-bits:64, 4K pages",
[VM_MODE_P44V64_4K] = "PA-bits:44, VA-bits:64, 4K pages",
+ [VM_MODE_P36V48_4K] = "PA-bits:36, VA-bits:48, 4K pages",
+ [VM_MODE_P36V48_16K] = "PA-bits:36, VA-bits:48, 16K pages",
+ [VM_MODE_P36V48_64K] = "PA-bits:36, VA-bits:48, 64K pages",
+ [VM_MODE_P36V47_16K] = "PA-bits:36, VA-bits:47, 16K pages",
};
_Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES,
"Missing new mode strings?");
@@ -185,12 +218,18 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = {
[VM_MODE_P52V48_4K] = { 52, 48, 0x1000, 12 },
[VM_MODE_P52V48_64K] = { 52, 48, 0x10000, 16 },
[VM_MODE_P48V48_4K] = { 48, 48, 0x1000, 12 },
+ [VM_MODE_P48V48_16K] = { 48, 48, 0x4000, 14 },
[VM_MODE_P48V48_64K] = { 48, 48, 0x10000, 16 },
[VM_MODE_P40V48_4K] = { 40, 48, 0x1000, 12 },
+ [VM_MODE_P40V48_16K] = { 40, 48, 0x4000, 14 },
[VM_MODE_P40V48_64K] = { 40, 48, 0x10000, 16 },
[VM_MODE_PXXV48_4K] = { 0, 0, 0x1000, 12 },
[VM_MODE_P47V64_4K] = { 47, 64, 0x1000, 12 },
[VM_MODE_P44V64_4K] = { 44, 64, 0x1000, 12 },
+ [VM_MODE_P36V48_4K] = { 36, 48, 0x1000, 12 },
+ [VM_MODE_P36V48_16K] = { 36, 48, 0x4000, 14 },
+ [VM_MODE_P36V48_64K] = { 36, 48, 0x10000, 16 },
+ [VM_MODE_P36V47_16K] = { 36, 47, 0x4000, 14 },
};
_Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES,
"Missing new mode params?");
@@ -252,9 +291,19 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
vm->pgtable_levels = 3;
break;
case VM_MODE_P40V48_4K:
+ case VM_MODE_P36V48_4K:
vm->pgtable_levels = 4;
break;
case VM_MODE_P40V48_64K:
+ case VM_MODE_P36V48_64K:
+ vm->pgtable_levels = 3;
+ break;
+ case VM_MODE_P48V48_16K:
+ case VM_MODE_P40V48_16K:
+ case VM_MODE_P36V48_16K:
+ vm->pgtable_levels = 4;
+ break;
+ case VM_MODE_P36V47_16K:
vm->pgtable_levels = 3;
break;
case VM_MODE_PXXV48_4K:
@@ -302,7 +351,7 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
(1ULL << (vm->va_bits - 1)) >> vm->page_shift);
/* Limit physical addresses to PA-bits. */
- vm->max_gfn = ((1ULL << vm->pa_bits) >> vm->page_shift) - 1;
+ vm->max_gfn = vm_compute_max_gfn(vm);
/* Allocate and setup memory for guest. */
vm->vpages_mapped = sparsebit_alloc();
@@ -443,9 +492,11 @@ void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log)
void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
uint64_t first_page, uint32_t num_pages)
{
- struct kvm_clear_dirty_log args = { .dirty_bitmap = log, .slot = slot,
- .first_page = first_page,
- .num_pages = num_pages };
+ struct kvm_clear_dirty_log args = {
+ .dirty_bitmap = log, .slot = slot,
+ .first_page = first_page,
+ .num_pages = num_pages
+ };
int ret;
ret = ioctl(vm->fd, KVM_CLEAR_DIRTY_LOG, &args);
@@ -2087,6 +2138,78 @@ int vcpu_access_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
}
/*
+ * IRQ related functions.
+ */
+
+int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level)
+{
+ struct kvm_irq_level irq_level = {
+ .irq = irq,
+ .level = level,
+ };
+
+ return _vm_ioctl(vm, KVM_IRQ_LINE, &irq_level);
+}
+
+void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level)
+{
+ int ret = _kvm_irq_line(vm, irq, level);
+
+ TEST_ASSERT(ret >= 0, "KVM_IRQ_LINE failed, rc: %i errno: %i", ret, errno);
+}
+
+struct kvm_irq_routing *kvm_gsi_routing_create(void)
+{
+ struct kvm_irq_routing *routing;
+ size_t size;
+
+ size = sizeof(struct kvm_irq_routing);
+ /* Allocate space for the max number of entries: this wastes 196 KBs. */
+ size += KVM_MAX_IRQ_ROUTES * sizeof(struct kvm_irq_routing_entry);
+ routing = calloc(1, size);
+ assert(routing);
+
+ return routing;
+}
+
+void kvm_gsi_routing_irqchip_add(struct kvm_irq_routing *routing,
+ uint32_t gsi, uint32_t pin)
+{
+ int i;
+
+ assert(routing);
+ assert(routing->nr < KVM_MAX_IRQ_ROUTES);
+
+ i = routing->nr;
+ routing->entries[i].gsi = gsi;
+ routing->entries[i].type = KVM_IRQ_ROUTING_IRQCHIP;
+ routing->entries[i].flags = 0;
+ routing->entries[i].u.irqchip.irqchip = 0;
+ routing->entries[i].u.irqchip.pin = pin;
+ routing->nr++;
+}
+
+int _kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing)
+{
+ int ret;
+
+ assert(routing);
+ ret = ioctl(vm_get_fd(vm), KVM_SET_GSI_ROUTING, routing);
+ free(routing);
+
+ return ret;
+}
+
+void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing)
+{
+ int ret;
+
+ ret = _kvm_gsi_routing_write(vm, routing);
+ TEST_ASSERT(ret == 0, "KVM_SET_GSI_ROUTING failed, rc: %i errno: %i",
+ ret, errno);
+}
+
+/*
* VM Dump
*
* Input Args:
@@ -2328,6 +2451,11 @@ unsigned int vm_get_page_shift(struct kvm_vm *vm)
return vm->page_shift;
}
+unsigned long __attribute__((weak)) vm_compute_max_gfn(struct kvm_vm *vm)
+{
+ return ((1ULL << vm->pa_bits) >> vm->page_shift) - 1;
+}
+
uint64_t vm_get_max_gfn(struct kvm_vm *vm)
{
return vm->max_gfn;
diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c
new file mode 100644
index 000000000000..d377f2603d98
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/riscv/processor.c
@@ -0,0 +1,362 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * RISC-V code
+ *
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ */
+
+#include <linux/compiler.h>
+#include <assert.h>
+
+#include "kvm_util.h"
+#include "../kvm_util_internal.h"
+#include "processor.h"
+
+#define DEFAULT_RISCV_GUEST_STACK_VADDR_MIN 0xac0000
+
+static uint64_t page_align(struct kvm_vm *vm, uint64_t v)
+{
+ return (v + vm->page_size) & ~(vm->page_size - 1);
+}
+
+static uint64_t pte_addr(struct kvm_vm *vm, uint64_t entry)
+{
+ return ((entry & PGTBL_PTE_ADDR_MASK) >> PGTBL_PTE_ADDR_SHIFT) <<
+ PGTBL_PAGE_SIZE_SHIFT;
+}
+
+static uint64_t ptrs_per_pte(struct kvm_vm *vm)
+{
+ return PGTBL_PAGE_SIZE / sizeof(uint64_t);
+}
+
+static uint64_t pte_index_mask[] = {
+ PGTBL_L0_INDEX_MASK,
+ PGTBL_L1_INDEX_MASK,
+ PGTBL_L2_INDEX_MASK,
+ PGTBL_L3_INDEX_MASK,
+};
+
+static uint32_t pte_index_shift[] = {
+ PGTBL_L0_INDEX_SHIFT,
+ PGTBL_L1_INDEX_SHIFT,
+ PGTBL_L2_INDEX_SHIFT,
+ PGTBL_L3_INDEX_SHIFT,
+};
+
+static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level)
+{
+ TEST_ASSERT(level > -1,
+ "Negative page table level (%d) not possible", level);
+ TEST_ASSERT(level < vm->pgtable_levels,
+ "Invalid page table level (%d)", level);
+
+ return (gva & pte_index_mask[level]) >> pte_index_shift[level];
+}
+
+void virt_pgd_alloc(struct kvm_vm *vm)
+{
+ if (!vm->pgd_created) {
+ vm_paddr_t paddr = vm_phy_pages_alloc(vm,
+ page_align(vm, ptrs_per_pte(vm) * 8) / vm->page_size,
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
+ vm->pgd = paddr;
+ vm->pgd_created = true;
+ }
+}
+
+void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
+{
+ uint64_t *ptep, next_ppn;
+ int level = vm->pgtable_levels - 1;
+
+ TEST_ASSERT((vaddr % vm->page_size) == 0,
+ "Virtual address not on page boundary,\n"
+ " vaddr: 0x%lx vm->page_size: 0x%x", vaddr, vm->page_size);
+ TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
+ (vaddr >> vm->page_shift)),
+ "Invalid virtual address, vaddr: 0x%lx", vaddr);
+ TEST_ASSERT((paddr % vm->page_size) == 0,
+ "Physical address not on page boundary,\n"
+ " paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size);
+ TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
+ "Physical address beyond maximum supported,\n"
+ " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+ paddr, vm->max_gfn, vm->page_size);
+
+ ptep = addr_gpa2hva(vm, vm->pgd) + pte_index(vm, vaddr, level) * 8;
+ if (!*ptep) {
+ next_ppn = vm_alloc_page_table(vm) >> PGTBL_PAGE_SIZE_SHIFT;
+ *ptep = (next_ppn << PGTBL_PTE_ADDR_SHIFT) |
+ PGTBL_PTE_VALID_MASK;
+ }
+ level--;
+
+ while (level > -1) {
+ ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) +
+ pte_index(vm, vaddr, level) * 8;
+ if (!*ptep && level > 0) {
+ next_ppn = vm_alloc_page_table(vm) >>
+ PGTBL_PAGE_SIZE_SHIFT;
+ *ptep = (next_ppn << PGTBL_PTE_ADDR_SHIFT) |
+ PGTBL_PTE_VALID_MASK;
+ }
+ level--;
+ }
+
+ paddr = paddr >> PGTBL_PAGE_SIZE_SHIFT;
+ *ptep = (paddr << PGTBL_PTE_ADDR_SHIFT) |
+ PGTBL_PTE_PERM_MASK | PGTBL_PTE_VALID_MASK;
+}
+
+vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+ uint64_t *ptep;
+ int level = vm->pgtable_levels - 1;
+
+ if (!vm->pgd_created)
+ goto unmapped_gva;
+
+ ptep = addr_gpa2hva(vm, vm->pgd) + pte_index(vm, gva, level) * 8;
+ if (!ptep)
+ goto unmapped_gva;
+ level--;
+
+ while (level > -1) {
+ ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) +
+ pte_index(vm, gva, level) * 8;
+ if (!ptep)
+ goto unmapped_gva;
+ level--;
+ }
+
+ return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1));
+
+unmapped_gva:
+ TEST_FAIL("No mapping for vm virtual address gva: 0x%lx level: %d",
+ gva, level);
+ exit(1);
+}
+
+static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent,
+ uint64_t page, int level)
+{
+#ifdef DEBUG
+ static const char *const type[] = { "pte", "pmd", "pud", "p4d"};
+ uint64_t pte, *ptep;
+
+ if (level < 0)
+ return;
+
+ for (pte = page; pte < page + ptrs_per_pte(vm) * 8; pte += 8) {
+ ptep = addr_gpa2hva(vm, pte);
+ if (!*ptep)
+ continue;
+ fprintf(stream, "%*s%s: %lx: %lx at %p\n", indent, "",
+ type[level], pte, *ptep, ptep);
+ pte_dump(stream, vm, indent + 1,
+ pte_addr(vm, *ptep), level - 1);
+ }
+#endif
+}
+
+void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+ int level = vm->pgtable_levels - 1;
+ uint64_t pgd, *ptep;
+
+ if (!vm->pgd_created)
+ return;
+
+ for (pgd = vm->pgd; pgd < vm->pgd + ptrs_per_pte(vm) * 8; pgd += 8) {
+ ptep = addr_gpa2hva(vm, pgd);
+ if (!*ptep)
+ continue;
+ fprintf(stream, "%*spgd: %lx: %lx at %p\n", indent, "",
+ pgd, *ptep, ptep);
+ pte_dump(stream, vm, indent + 1,
+ pte_addr(vm, *ptep), level - 1);
+ }
+}
+
+void riscv_vcpu_mmu_setup(struct kvm_vm *vm, int vcpuid)
+{
+ unsigned long satp;
+
+ /*
+ * The RISC-V Sv48 MMU mode supports 56-bit physical address
+ * for 48-bit virtual address with 4KB last level page size.
+ */
+ switch (vm->mode) {
+ case VM_MODE_P52V48_4K:
+ case VM_MODE_P48V48_4K:
+ case VM_MODE_P40V48_4K:
+ break;
+ default:
+ TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
+ }
+
+ satp = (vm->pgd >> PGTBL_PAGE_SIZE_SHIFT) & SATP_PPN;
+ satp |= SATP_MODE_48;
+
+ set_reg(vm, vcpuid, RISCV_CSR_REG(satp), satp);
+}
+
+void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
+{
+ struct kvm_riscv_core core;
+
+ get_reg(vm, vcpuid, RISCV_CORE_REG(mode), &core.mode);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.pc), &core.regs.pc);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.ra), &core.regs.ra);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.sp), &core.regs.sp);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.gp), &core.regs.gp);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.tp), &core.regs.tp);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.t0), &core.regs.t0);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.t1), &core.regs.t1);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.t2), &core.regs.t2);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s0), &core.regs.s0);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s1), &core.regs.s1);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a0), &core.regs.a0);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a1), &core.regs.a1);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a2), &core.regs.a2);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a3), &core.regs.a3);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a4), &core.regs.a4);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a5), &core.regs.a5);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a6), &core.regs.a6);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a7), &core.regs.a7);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s2), &core.regs.s2);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s3), &core.regs.s3);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s4), &core.regs.s4);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s5), &core.regs.s5);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s6), &core.regs.s6);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s7), &core.regs.s7);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s8), &core.regs.s8);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s9), &core.regs.s9);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s10), &core.regs.s10);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s11), &core.regs.s11);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.t3), &core.regs.t3);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.t4), &core.regs.t4);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.t5), &core.regs.t5);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.t6), &core.regs.t6);
+
+ fprintf(stream,
+ " MODE: 0x%lx\n", core.mode);
+ fprintf(stream,
+ " PC: 0x%016lx RA: 0x%016lx SP: 0x%016lx GP: 0x%016lx\n",
+ core.regs.pc, core.regs.ra, core.regs.sp, core.regs.gp);
+ fprintf(stream,
+ " TP: 0x%016lx T0: 0x%016lx T1: 0x%016lx T2: 0x%016lx\n",
+ core.regs.tp, core.regs.t0, core.regs.t1, core.regs.t2);
+ fprintf(stream,
+ " S0: 0x%016lx S1: 0x%016lx A0: 0x%016lx A1: 0x%016lx\n",
+ core.regs.s0, core.regs.s1, core.regs.a0, core.regs.a1);
+ fprintf(stream,
+ " A2: 0x%016lx A3: 0x%016lx A4: 0x%016lx A5: 0x%016lx\n",
+ core.regs.a2, core.regs.a3, core.regs.a4, core.regs.a5);
+ fprintf(stream,
+ " A6: 0x%016lx A7: 0x%016lx S2: 0x%016lx S3: 0x%016lx\n",
+ core.regs.a6, core.regs.a7, core.regs.s2, core.regs.s3);
+ fprintf(stream,
+ " S4: 0x%016lx S5: 0x%016lx S6: 0x%016lx S7: 0x%016lx\n",
+ core.regs.s4, core.regs.s5, core.regs.s6, core.regs.s7);
+ fprintf(stream,
+ " S8: 0x%016lx S9: 0x%016lx S10: 0x%016lx S11: 0x%016lx\n",
+ core.regs.s8, core.regs.s9, core.regs.s10, core.regs.s11);
+ fprintf(stream,
+ " T3: 0x%016lx T4: 0x%016lx T5: 0x%016lx T6: 0x%016lx\n",
+ core.regs.t3, core.regs.t4, core.regs.t5, core.regs.t6);
+}
+
+static void guest_hang(void)
+{
+ while (1)
+ ;
+}
+
+void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
+{
+ int r;
+ size_t stack_size = vm->page_size == 4096 ?
+ DEFAULT_STACK_PGS * vm->page_size :
+ vm->page_size;
+ unsigned long stack_vaddr = vm_vaddr_alloc(vm, stack_size,
+ DEFAULT_RISCV_GUEST_STACK_VADDR_MIN);
+ unsigned long current_gp = 0;
+ struct kvm_mp_state mps;
+
+ vm_vcpu_add(vm, vcpuid);
+ riscv_vcpu_mmu_setup(vm, vcpuid);
+
+ /*
+ * With SBI HSM support in KVM RISC-V, all secondary VCPUs are
+ * powered-off by default so we ensure that all secondary VCPUs
+ * are powered-on using KVM_SET_MP_STATE ioctl().
+ */
+ mps.mp_state = KVM_MP_STATE_RUNNABLE;
+ r = _vcpu_ioctl(vm, vcpuid, KVM_SET_MP_STATE, &mps);
+ TEST_ASSERT(!r, "IOCTL KVM_SET_MP_STATE failed (error %d)", r);
+
+ /* Setup global pointer of guest to be same as the host */
+ asm volatile (
+ "add %0, gp, zero" : "=r" (current_gp) : : "memory");
+ set_reg(vm, vcpuid, RISCV_CORE_REG(regs.gp), current_gp);
+
+ /* Setup stack pointer and program counter of guest */
+ set_reg(vm, vcpuid, RISCV_CORE_REG(regs.sp),
+ stack_vaddr + stack_size);
+ set_reg(vm, vcpuid, RISCV_CORE_REG(regs.pc),
+ (unsigned long)guest_code);
+
+ /* Setup default exception vector of guest */
+ set_reg(vm, vcpuid, RISCV_CSR_REG(stvec),
+ (unsigned long)guest_hang);
+}
+
+void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
+{
+ va_list ap;
+ uint64_t id = RISCV_CORE_REG(regs.a0);
+ int i;
+
+ TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n"
+ " num: %u\n", num);
+
+ va_start(ap, num);
+
+ for (i = 0; i < num; i++) {
+ switch (i) {
+ case 0:
+ id = RISCV_CORE_REG(regs.a0);
+ break;
+ case 1:
+ id = RISCV_CORE_REG(regs.a1);
+ break;
+ case 2:
+ id = RISCV_CORE_REG(regs.a2);
+ break;
+ case 3:
+ id = RISCV_CORE_REG(regs.a3);
+ break;
+ case 4:
+ id = RISCV_CORE_REG(regs.a4);
+ break;
+ case 5:
+ id = RISCV_CORE_REG(regs.a5);
+ break;
+ case 6:
+ id = RISCV_CORE_REG(regs.a6);
+ break;
+ case 7:
+ id = RISCV_CORE_REG(regs.a7);
+ break;
+ };
+ set_reg(vm, vcpuid, id, va_arg(ap, uint64_t));
+ }
+
+ va_end(ap);
+}
+
+void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid)
+{
+}
diff --git a/tools/testing/selftests/kvm/lib/riscv/ucall.c b/tools/testing/selftests/kvm/lib/riscv/ucall.c
new file mode 100644
index 000000000000..9e42d8248fa6
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/riscv/ucall.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ucall support. A ucall is a "hypercall to userspace".
+ *
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ */
+
+#include <linux/kvm.h>
+
+#include "kvm_util.h"
+#include "../kvm_util_internal.h"
+#include "processor.h"
+
+void ucall_init(struct kvm_vm *vm, void *arg)
+{
+}
+
+void ucall_uninit(struct kvm_vm *vm)
+{
+}
+
+struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
+ unsigned long arg1, unsigned long arg2,
+ unsigned long arg3, unsigned long arg4,
+ unsigned long arg5)
+{
+ register uintptr_t a0 asm ("a0") = (uintptr_t)(arg0);
+ register uintptr_t a1 asm ("a1") = (uintptr_t)(arg1);
+ register uintptr_t a2 asm ("a2") = (uintptr_t)(arg2);
+ register uintptr_t a3 asm ("a3") = (uintptr_t)(arg3);
+ register uintptr_t a4 asm ("a4") = (uintptr_t)(arg4);
+ register uintptr_t a5 asm ("a5") = (uintptr_t)(arg5);
+ register uintptr_t a6 asm ("a6") = (uintptr_t)(fid);
+ register uintptr_t a7 asm ("a7") = (uintptr_t)(ext);
+ struct sbiret ret;
+
+ asm volatile (
+ "ecall"
+ : "+r" (a0), "+r" (a1)
+ : "r" (a2), "r" (a3), "r" (a4), "r" (a5), "r" (a6), "r" (a7)
+ : "memory");
+ ret.error = a0;
+ ret.value = a1;
+
+ return ret;
+}
+
+void ucall(uint64_t cmd, int nargs, ...)
+{
+ struct ucall uc = {
+ .cmd = cmd,
+ };
+ va_list va;
+ int i;
+
+ nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS;
+
+ va_start(va, nargs);
+ for (i = 0; i < nargs; ++i)
+ uc.args[i] = va_arg(va, uint64_t);
+ va_end(va);
+
+ sbi_ecall(KVM_RISCV_SELFTESTS_SBI_EXT, 0, (vm_vaddr_t)&uc,
+ 0, 0, 0, 0, 0);
+}
+
+uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
+{
+ struct kvm_run *run = vcpu_state(vm, vcpu_id);
+ struct ucall ucall = {};
+
+ if (uc)
+ memset(uc, 0, sizeof(*uc));
+
+ if (run->exit_reason == KVM_EXIT_RISCV_SBI &&
+ run->riscv_sbi.extension_id == KVM_RISCV_SELFTESTS_SBI_EXT &&
+ run->riscv_sbi.function_id == 0) {
+ memcpy(&ucall, addr_gva2hva(vm, run->riscv_sbi.args[0]),
+ sizeof(ucall));
+
+ vcpu_run_complete_io(vm, vcpu_id);
+ if (uc)
+ memcpy(uc, &ucall, sizeof(ucall));
+ }
+
+ return ucall.cmd;
+}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index 82c39db91369..9f000dfb5594 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -650,6 +650,60 @@ static void vcpu_setup(struct kvm_vm *vm, int vcpuid)
vcpu_sregs_set(vm, vcpuid, &sregs);
}
+#define CPUID_XFD_BIT (1 << 4)
+static bool is_xfd_supported(void)
+{
+ int eax, ebx, ecx, edx;
+ const int leaf = 0xd, subleaf = 0x1;
+
+ __asm__ __volatile__(
+ "cpuid"
+ : /* output */ "=a"(eax), "=b"(ebx),
+ "=c"(ecx), "=d"(edx)
+ : /* input */ "0"(leaf), "2"(subleaf));
+
+ return !!(eax & CPUID_XFD_BIT);
+}
+
+void vm_xsave_req_perm(int bit)
+{
+ int kvm_fd;
+ u64 bitmask;
+ long rc;
+ struct kvm_device_attr attr = {
+ .group = 0,
+ .attr = KVM_X86_XCOMP_GUEST_SUPP,
+ .addr = (unsigned long) &bitmask
+ };
+
+ kvm_fd = open_kvm_dev_path_or_exit();
+ rc = ioctl(kvm_fd, KVM_GET_DEVICE_ATTR, &attr);
+ close(kvm_fd);
+ if (rc == -1 && (errno == ENXIO || errno == EINVAL))
+ exit(KSFT_SKIP);
+ TEST_ASSERT(rc == 0, "KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) error: %ld", rc);
+ if (!(bitmask & (1ULL << bit)))
+ exit(KSFT_SKIP);
+
+ if (!is_xfd_supported())
+ exit(KSFT_SKIP);
+
+ rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, bit);
+
+ /*
+ * The older kernel version(<5.15) can't support
+ * ARCH_REQ_XCOMP_GUEST_PERM and directly return.
+ */
+ if (rc)
+ return;
+
+ rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask);
+ TEST_ASSERT(rc == 0, "prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc);
+ TEST_ASSERT(bitmask & (1ULL << bit),
+ "prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure bitmask=0x%lx",
+ bitmask);
+}
+
void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
{
struct kvm_mp_state mp_state;
@@ -847,6 +901,17 @@ kvm_get_supported_cpuid_index(uint32_t function, uint32_t index)
return entry;
}
+
+int __vcpu_set_cpuid(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_cpuid2 *cpuid)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ return ioctl(vcpu->fd, KVM_SET_CPUID2, cpuid);
+}
+
/*
* VM VCPU CPUID Set
*
@@ -864,12 +929,9 @@ kvm_get_supported_cpuid_index(uint32_t function, uint32_t index)
void vcpu_set_cpuid(struct kvm_vm *vm,
uint32_t vcpuid, struct kvm_cpuid2 *cpuid)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
int rc;
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
-
- rc = ioctl(vcpu->fd, KVM_SET_CPUID2, cpuid);
+ rc = __vcpu_set_cpuid(vm, vcpuid, cpuid);
TEST_ASSERT(rc == 0, "KVM_SET_CPUID2 failed, rc: %i errno: %i",
rc, errno);
@@ -1017,21 +1079,6 @@ void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
sregs_dump(stream, &sregs, indent + 4);
}
-struct kvm_x86_state {
- struct kvm_vcpu_events events;
- struct kvm_mp_state mp_state;
- struct kvm_regs regs;
- struct kvm_xsave xsave;
- struct kvm_xcrs xcrs;
- struct kvm_sregs sregs;
- struct kvm_debugregs debugregs;
- union {
- struct kvm_nested_state nested;
- char nested_[16384];
- };
- struct kvm_msrs msrs;
-};
-
static int kvm_get_num_msrs_fd(int kvm_fd)
{
struct kvm_msr_list nmsrs;
@@ -1069,6 +1116,22 @@ struct kvm_msr_list *kvm_get_msr_index_list(void)
return list;
}
+static int vcpu_save_xsave_state(struct kvm_vm *vm, struct vcpu *vcpu,
+ struct kvm_x86_state *state)
+{
+ int size;
+
+ size = vm_check_cap(vm, KVM_CAP_XSAVE2);
+ if (!size)
+ size = sizeof(struct kvm_xsave);
+
+ state->xsave = malloc(size);
+ if (size == sizeof(struct kvm_xsave))
+ return ioctl(vcpu->fd, KVM_GET_XSAVE, state->xsave);
+ else
+ return ioctl(vcpu->fd, KVM_GET_XSAVE2, state->xsave);
+}
+
struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid)
{
struct vcpu *vcpu = vcpu_find(vm, vcpuid);
@@ -1096,25 +1159,25 @@ struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid)
list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
list->nmsrs = nmsrs;
r = ioctl(vm->kvm_fd, KVM_GET_MSR_INDEX_LIST, list);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i",
- r);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i",
+ r);
state = malloc(sizeof(*state) + nmsrs * sizeof(state->msrs.entries[0]));
r = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, &state->events);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_VCPU_EVENTS, r: %i",
- r);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_VCPU_EVENTS, r: %i",
+ r);
r = ioctl(vcpu->fd, KVM_GET_MP_STATE, &state->mp_state);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MP_STATE, r: %i",
- r);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MP_STATE, r: %i",
+ r);
r = ioctl(vcpu->fd, KVM_GET_REGS, &state->regs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_REGS, r: %i",
- r);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_REGS, r: %i",
+ r);
- r = ioctl(vcpu->fd, KVM_GET_XSAVE, &state->xsave);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XSAVE, r: %i",
- r);
+ r = vcpu_save_xsave_state(vm, vcpu, state);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XSAVE, r: %i",
+ r);
if (kvm_check_cap(KVM_CAP_XCRS)) {
r = ioctl(vcpu->fd, KVM_GET_XCRS, &state->xcrs);
@@ -1123,17 +1186,17 @@ struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid)
}
r = ioctl(vcpu->fd, KVM_GET_SREGS, &state->sregs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_SREGS, r: %i",
- r);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_SREGS, r: %i",
+ r);
if (nested_size) {
state->nested.size = sizeof(state->nested_);
r = ioctl(vcpu->fd, KVM_GET_NESTED_STATE, &state->nested);
TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_NESTED_STATE, r: %i",
- r);
+ r);
TEST_ASSERT(state->nested.size <= nested_size,
- "Nested state size too big, %i (KVM_CHECK_CAP gave %i)",
- state->nested.size, nested_size);
+ "Nested state size too big, %i (KVM_CHECK_CAP gave %i)",
+ state->nested.size, nested_size);
} else
state->nested.size = 0;
@@ -1141,12 +1204,12 @@ struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid)
for (i = 0; i < nmsrs; i++)
state->msrs.entries[i].index = list->indices[i];
r = ioctl(vcpu->fd, KVM_GET_MSRS, &state->msrs);
- TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed MSR was 0x%x)",
- r, r == nmsrs ? -1 : list->indices[r]);
+ TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed MSR was 0x%x)",
+ r, r == nmsrs ? -1 : list->indices[r]);
r = ioctl(vcpu->fd, KVM_GET_DEBUGREGS, &state->debugregs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_DEBUGREGS, r: %i",
- r);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_DEBUGREGS, r: %i",
+ r);
free(list);
return state;
@@ -1157,9 +1220,14 @@ void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *s
struct vcpu *vcpu = vcpu_find(vm, vcpuid);
int r;
- r = ioctl(vcpu->fd, KVM_SET_XSAVE, &state->xsave);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i",
- r);
+ r = ioctl(vcpu->fd, KVM_SET_SREGS, &state->sregs);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_SREGS, r: %i",
+ r);
+
+ r = ioctl(vcpu->fd, KVM_SET_MSRS, &state->msrs);
+ TEST_ASSERT(r == state->msrs.nmsrs,
+ "Unexpected result from KVM_SET_MSRS, r: %i (failed at %x)",
+ r, r == state->msrs.nmsrs ? -1 : state->msrs.entries[r].index);
if (kvm_check_cap(KVM_CAP_XCRS)) {
r = ioctl(vcpu->fd, KVM_SET_XCRS, &state->xcrs);
@@ -1167,41 +1235,43 @@ void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *s
r);
}
- r = ioctl(vcpu->fd, KVM_SET_SREGS, &state->sregs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_SREGS, r: %i",
- r);
-
- r = ioctl(vcpu->fd, KVM_SET_MSRS, &state->msrs);
- TEST_ASSERT(r == state->msrs.nmsrs, "Unexpected result from KVM_SET_MSRS, r: %i (failed at %x)",
- r, r == state->msrs.nmsrs ? -1 : state->msrs.entries[r].index);
+ r = ioctl(vcpu->fd, KVM_SET_XSAVE, state->xsave);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i",
+ r);
r = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, &state->events);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_VCPU_EVENTS, r: %i",
- r);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_VCPU_EVENTS, r: %i",
+ r);
r = ioctl(vcpu->fd, KVM_SET_MP_STATE, &state->mp_state);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_MP_STATE, r: %i",
- r);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_MP_STATE, r: %i",
+ r);
r = ioctl(vcpu->fd, KVM_SET_DEBUGREGS, &state->debugregs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_DEBUGREGS, r: %i",
- r);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_DEBUGREGS, r: %i",
+ r);
r = ioctl(vcpu->fd, KVM_SET_REGS, &state->regs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_REGS, r: %i",
- r);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_REGS, r: %i",
+ r);
if (state->nested.size) {
r = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, &state->nested);
TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_NESTED_STATE, r: %i",
- r);
+ r);
}
}
-bool is_intel_cpu(void)
+void kvm_x86_state_cleanup(struct kvm_x86_state *state)
{
+ free(state->xsave);
+ free(state);
+}
+
+static bool cpu_vendor_string_is(const char *vendor)
+{
+ const uint32_t *chunk = (const uint32_t *)vendor;
int eax, ebx, ecx, edx;
- const uint32_t *chunk;
const int leaf = 0;
__asm__ __volatile__(
@@ -1210,10 +1280,22 @@ bool is_intel_cpu(void)
"=c"(ecx), "=d"(edx)
: /* input */ "0"(leaf), "2"(0));
- chunk = (const uint32_t *)("GenuineIntel");
return (ebx == chunk[0] && edx == chunk[1] && ecx == chunk[2]);
}
+bool is_intel_cpu(void)
+{
+ return cpu_vendor_string_is("GenuineIntel");
+}
+
+/*
+ * Exclude early K5 samples with a vendor string of "AMDisbetter!"
+ */
+bool is_amd_cpu(void)
+{
+ return cpu_vendor_string_is("AuthenticAMD");
+}
+
uint32_t kvm_get_cpuid_max_basic(void)
{
return kvm_get_supported_cpuid_entry(0)->eax;
@@ -1337,6 +1419,23 @@ void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid)
}
}
+struct kvm_cpuid_entry2 *get_cpuid(struct kvm_cpuid2 *cpuid, uint32_t function,
+ uint32_t index)
+{
+ int i;
+
+ for (i = 0; i < cpuid->nent; i++) {
+ struct kvm_cpuid_entry2 *cur = &cpuid->entries[i];
+
+ if (cur->function == function && cur->index == index)
+ return cur;
+ }
+
+ TEST_FAIL("CPUID function 0x%x index 0x%x not found ", function, index);
+
+ return NULL;
+}
+
bool set_cpuid(struct kvm_cpuid2 *cpuid,
struct kvm_cpuid_entry2 *ent)
{
@@ -1431,3 +1530,52 @@ struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpui
return cpuid;
}
+
+unsigned long vm_compute_max_gfn(struct kvm_vm *vm)
+{
+ const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */
+ unsigned long ht_gfn, max_gfn, max_pfn;
+ uint32_t eax, ebx, ecx, edx, max_ext_leaf;
+
+ max_gfn = (1ULL << (vm->pa_bits - vm->page_shift)) - 1;
+
+ /* Avoid reserved HyperTransport region on AMD processors. */
+ if (!is_amd_cpu())
+ return max_gfn;
+
+ /* On parts with <40 physical address bits, the area is fully hidden */
+ if (vm->pa_bits < 40)
+ return max_gfn;
+
+ /* Before family 17h, the HyperTransport area is just below 1T. */
+ ht_gfn = (1 << 28) - num_ht_pages;
+ eax = 1;
+ ecx = 0;
+ cpuid(&eax, &ebx, &ecx, &edx);
+ if (x86_family(eax) < 0x17)
+ goto done;
+
+ /*
+ * Otherwise it's at the top of the physical address space, possibly
+ * reduced due to SME by bits 11:6 of CPUID[0x8000001f].EBX. Use
+ * the old conservative value if MAXPHYADDR is not enumerated.
+ */
+ eax = 0x80000000;
+ cpuid(&eax, &ebx, &ecx, &edx);
+ max_ext_leaf = eax;
+ if (max_ext_leaf < 0x80000008)
+ goto done;
+
+ eax = 0x80000008;
+ cpuid(&eax, &ebx, &ecx, &edx);
+ max_pfn = (1ULL << ((eax & 0xff) - vm->page_shift)) - 1;
+ if (max_ext_leaf >= 0x8000001f) {
+ eax = 0x8000001f;
+ cpuid(&eax, &ebx, &ecx, &edx);
+ max_pfn >>= (ebx >> 6) & 0x3f;
+ }
+
+ ht_gfn = max_pfn - num_ht_pages;
+done:
+ return min(max_gfn, ht_gfn - 1);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86_64/amx_test.c
new file mode 100644
index 000000000000..52a3ef6629e8
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/amx_test.c
@@ -0,0 +1,450 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * amx tests
+ *
+ * Copyright (C) 2021, Intel, Inc.
+ *
+ * Tests for amx #NM exception and save/restore.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/syscall.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#ifndef __x86_64__
+# error This test is 64-bit only
+#endif
+
+#define VCPU_ID 0
+#define X86_FEATURE_XSAVE (1 << 26)
+#define X86_FEATURE_OSXSAVE (1 << 27)
+
+#define PAGE_SIZE (1 << 12)
+#define NUM_TILES 8
+#define TILE_SIZE 1024
+#define XSAVE_SIZE ((NUM_TILES * TILE_SIZE) + PAGE_SIZE)
+
+/* Tile configuration associated: */
+#define MAX_TILES 16
+#define RESERVED_BYTES 14
+
+#define XFEATURE_XTILECFG 17
+#define XFEATURE_XTILEDATA 18
+#define XFEATURE_MASK_XTILECFG (1 << XFEATURE_XTILECFG)
+#define XFEATURE_MASK_XTILEDATA (1 << XFEATURE_XTILEDATA)
+#define XFEATURE_MASK_XTILE (XFEATURE_MASK_XTILECFG | XFEATURE_MASK_XTILEDATA)
+
+#define TILE_CPUID 0x1d
+#define XSTATE_CPUID 0xd
+#define TILE_PALETTE_CPUID_SUBLEAVE 0x1
+#define XSTATE_USER_STATE_SUBLEAVE 0x0
+
+#define XSAVE_HDR_OFFSET 512
+
+struct xsave_data {
+ u8 area[XSAVE_SIZE];
+} __aligned(64);
+
+struct tile_config {
+ u8 palette_id;
+ u8 start_row;
+ u8 reserved[RESERVED_BYTES];
+ u16 colsb[MAX_TILES];
+ u8 rows[MAX_TILES];
+};
+
+struct tile_data {
+ u8 data[NUM_TILES * TILE_SIZE];
+};
+
+struct xtile_info {
+ u16 bytes_per_tile;
+ u16 bytes_per_row;
+ u16 max_names;
+ u16 max_rows;
+ u32 xsave_offset;
+ u32 xsave_size;
+};
+
+static struct xtile_info xtile;
+
+static inline u64 __xgetbv(u32 index)
+{
+ u32 eax, edx;
+
+ asm volatile("xgetbv;"
+ : "=a" (eax), "=d" (edx)
+ : "c" (index));
+ return eax + ((u64)edx << 32);
+}
+
+static inline void __xsetbv(u32 index, u64 value)
+{
+ u32 eax = value;
+ u32 edx = value >> 32;
+
+ asm volatile("xsetbv" :: "a" (eax), "d" (edx), "c" (index));
+}
+
+static inline void __ldtilecfg(void *cfg)
+{
+ asm volatile(".byte 0xc4,0xe2,0x78,0x49,0x00"
+ : : "a"(cfg));
+}
+
+static inline void __tileloadd(void *tile)
+{
+ asm volatile(".byte 0xc4,0xe2,0x7b,0x4b,0x04,0x10"
+ : : "a"(tile), "d"(0));
+}
+
+static inline void __tilerelease(void)
+{
+ asm volatile(".byte 0xc4, 0xe2, 0x78, 0x49, 0xc0" ::);
+}
+
+static inline void __xsavec(struct xsave_data *data, uint64_t rfbm)
+{
+ uint32_t rfbm_lo = rfbm;
+ uint32_t rfbm_hi = rfbm >> 32;
+
+ asm volatile("xsavec (%%rdi)"
+ : : "D" (data), "a" (rfbm_lo), "d" (rfbm_hi)
+ : "memory");
+}
+
+static inline void check_cpuid_xsave(void)
+{
+ uint32_t eax, ebx, ecx, edx;
+
+ eax = 1;
+ ecx = 0;
+ cpuid(&eax, &ebx, &ecx, &edx);
+ if (!(ecx & X86_FEATURE_XSAVE))
+ GUEST_ASSERT(!"cpuid: no CPU xsave support!");
+ if (!(ecx & X86_FEATURE_OSXSAVE))
+ GUEST_ASSERT(!"cpuid: no OS xsave support!");
+}
+
+static bool check_xsave_supports_xtile(void)
+{
+ return __xgetbv(0) & XFEATURE_MASK_XTILE;
+}
+
+static bool enum_xtile_config(void)
+{
+ u32 eax, ebx, ecx, edx;
+
+ eax = TILE_CPUID;
+ ecx = TILE_PALETTE_CPUID_SUBLEAVE;
+
+ cpuid(&eax, &ebx, &ecx, &edx);
+ if (!eax || !ebx || !ecx)
+ return false;
+
+ xtile.max_names = ebx >> 16;
+ if (xtile.max_names < NUM_TILES)
+ return false;
+
+ xtile.bytes_per_tile = eax >> 16;
+ if (xtile.bytes_per_tile < TILE_SIZE)
+ return false;
+
+ xtile.bytes_per_row = ebx;
+ xtile.max_rows = ecx;
+
+ return true;
+}
+
+static bool enum_xsave_tile(void)
+{
+ u32 eax, ebx, ecx, edx;
+
+ eax = XSTATE_CPUID;
+ ecx = XFEATURE_XTILEDATA;
+
+ cpuid(&eax, &ebx, &ecx, &edx);
+ if (!eax || !ebx)
+ return false;
+
+ xtile.xsave_offset = ebx;
+ xtile.xsave_size = eax;
+
+ return true;
+}
+
+static bool check_xsave_size(void)
+{
+ u32 eax, ebx, ecx, edx;
+ bool valid = false;
+
+ eax = XSTATE_CPUID;
+ ecx = XSTATE_USER_STATE_SUBLEAVE;
+
+ cpuid(&eax, &ebx, &ecx, &edx);
+ if (ebx && ebx <= XSAVE_SIZE)
+ valid = true;
+
+ return valid;
+}
+
+static bool check_xtile_info(void)
+{
+ bool ret = false;
+
+ if (!check_xsave_size())
+ return ret;
+
+ if (!enum_xsave_tile())
+ return ret;
+
+ if (!enum_xtile_config())
+ return ret;
+
+ if (sizeof(struct tile_data) >= xtile.xsave_size)
+ ret = true;
+
+ return ret;
+}
+
+static void set_tilecfg(struct tile_config *cfg)
+{
+ int i;
+
+ /* Only palette id 1 */
+ cfg->palette_id = 1;
+ for (i = 0; i < xtile.max_names; i++) {
+ cfg->colsb[i] = xtile.bytes_per_row;
+ cfg->rows[i] = xtile.max_rows;
+ }
+}
+
+static void set_xstatebv(void *data, uint64_t bv)
+{
+ *(uint64_t *)(data + XSAVE_HDR_OFFSET) = bv;
+}
+
+static u64 get_xstatebv(void *data)
+{
+ return *(u64 *)(data + XSAVE_HDR_OFFSET);
+}
+
+static void init_regs(void)
+{
+ uint64_t cr4, xcr0;
+
+ /* turn on CR4.OSXSAVE */
+ cr4 = get_cr4();
+ cr4 |= X86_CR4_OSXSAVE;
+ set_cr4(cr4);
+
+ xcr0 = __xgetbv(0);
+ xcr0 |= XFEATURE_MASK_XTILE;
+ __xsetbv(0x0, xcr0);
+}
+
+static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg,
+ struct tile_data *tiledata,
+ struct xsave_data *xsave_data)
+{
+ init_regs();
+ check_cpuid_xsave();
+ GUEST_ASSERT(check_xsave_supports_xtile());
+ GUEST_ASSERT(check_xtile_info());
+
+ /* check xtile configs */
+ GUEST_ASSERT(xtile.xsave_offset == 2816);
+ GUEST_ASSERT(xtile.xsave_size == 8192);
+ GUEST_ASSERT(xtile.max_names == 8);
+ GUEST_ASSERT(xtile.bytes_per_tile == 1024);
+ GUEST_ASSERT(xtile.bytes_per_row == 64);
+ GUEST_ASSERT(xtile.max_rows == 16);
+ GUEST_SYNC(1);
+
+ /* xfd=0, enable amx */
+ wrmsr(MSR_IA32_XFD, 0);
+ GUEST_SYNC(2);
+ GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == 0);
+ set_tilecfg(amx_cfg);
+ __ldtilecfg(amx_cfg);
+ GUEST_SYNC(3);
+ /* Check save/restore when trap to userspace */
+ __tileloadd(tiledata);
+ GUEST_SYNC(4);
+ __tilerelease();
+ GUEST_SYNC(5);
+ /* bit 18 not in the XCOMP_BV after xsavec() */
+ set_xstatebv(xsave_data, XFEATURE_MASK_XTILEDATA);
+ __xsavec(xsave_data, XFEATURE_MASK_XTILEDATA);
+ GUEST_ASSERT((get_xstatebv(xsave_data) & XFEATURE_MASK_XTILEDATA) == 0);
+
+ /* xfd=0x40000, disable amx tiledata */
+ wrmsr(MSR_IA32_XFD, XFEATURE_MASK_XTILEDATA);
+ GUEST_SYNC(6);
+ GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILEDATA);
+ set_tilecfg(amx_cfg);
+ __ldtilecfg(amx_cfg);
+ /* Trigger #NM exception */
+ __tileloadd(tiledata);
+ GUEST_SYNC(10);
+
+ GUEST_DONE();
+}
+
+void guest_nm_handler(struct ex_regs *regs)
+{
+ /* Check if #NM is triggered by XFEATURE_MASK_XTILEDATA */
+ GUEST_SYNC(7);
+ GUEST_ASSERT(rdmsr(MSR_IA32_XFD_ERR) == XFEATURE_MASK_XTILEDATA);
+ GUEST_SYNC(8);
+ GUEST_ASSERT(rdmsr(MSR_IA32_XFD_ERR) == XFEATURE_MASK_XTILEDATA);
+ /* Clear xfd_err */
+ wrmsr(MSR_IA32_XFD_ERR, 0);
+ /* xfd=0, enable amx */
+ wrmsr(MSR_IA32_XFD, 0);
+ GUEST_SYNC(9);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_cpuid_entry2 *entry;
+ struct kvm_regs regs1, regs2;
+ bool amx_supported = false;
+ struct kvm_vm *vm;
+ struct kvm_run *run;
+ struct kvm_x86_state *state;
+ int xsave_restore_size = 0;
+ vm_vaddr_t amx_cfg, tiledata, xsavedata;
+ struct ucall uc;
+ u32 amx_offset;
+ int stage, ret;
+
+ vm_xsave_req_perm(XSTATE_XTILE_DATA_BIT);
+
+ /* Create VM */
+ vm = vm_create_default(VCPU_ID, 0, guest_code);
+
+ entry = kvm_get_supported_cpuid_entry(1);
+ if (!(entry->ecx & X86_FEATURE_XSAVE)) {
+ print_skip("XSAVE feature not supported");
+ exit(KSFT_SKIP);
+ }
+
+ if (kvm_get_cpuid_max_basic() >= 0xd) {
+ entry = kvm_get_supported_cpuid_index(0xd, 0);
+ amx_supported = entry && !!(entry->eax & XFEATURE_MASK_XTILE);
+ if (!amx_supported) {
+ print_skip("AMX is not supported by the vCPU (eax=0x%x)", entry->eax);
+ exit(KSFT_SKIP);
+ }
+ /* Get xsave/restore max size */
+ xsave_restore_size = entry->ecx;
+ }
+
+ run = vcpu_state(vm, VCPU_ID);
+ vcpu_regs_get(vm, VCPU_ID, &regs1);
+
+ /* Register #NM handler */
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vm, VCPU_ID);
+ vm_install_exception_handler(vm, NM_VECTOR, guest_nm_handler);
+
+ /* amx cfg for guest_code */
+ amx_cfg = vm_vaddr_alloc_page(vm);
+ memset(addr_gva2hva(vm, amx_cfg), 0x0, getpagesize());
+
+ /* amx tiledata for guest_code */
+ tiledata = vm_vaddr_alloc_pages(vm, 2);
+ memset(addr_gva2hva(vm, tiledata), rand() | 1, 2 * getpagesize());
+
+ /* xsave data for guest_code */
+ xsavedata = vm_vaddr_alloc_pages(vm, 3);
+ memset(addr_gva2hva(vm, xsavedata), 0, 3 * getpagesize());
+ vcpu_args_set(vm, VCPU_ID, 3, amx_cfg, tiledata, xsavedata);
+
+ for (stage = 1; ; stage++) {
+ _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Stage %d: unexpected exit reason: %u (%s),\n",
+ stage, run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vm, VCPU_ID, &uc)) {
+ case UCALL_ABORT:
+ TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
+ __FILE__, uc.args[1]);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ switch (uc.args[1]) {
+ case 1:
+ case 2:
+ case 3:
+ case 5:
+ case 6:
+ case 7:
+ case 8:
+ fprintf(stderr, "GUEST_SYNC(%ld)\n", uc.args[1]);
+ break;
+ case 4:
+ case 10:
+ fprintf(stderr,
+ "GUEST_SYNC(%ld), check save/restore status\n", uc.args[1]);
+
+ /* Compacted mode, get amx offset by xsave area
+ * size subtract 8K amx size.
+ */
+ amx_offset = xsave_restore_size - NUM_TILES*TILE_SIZE;
+ state = vcpu_save_state(vm, VCPU_ID);
+ void *amx_start = (void *)state->xsave + amx_offset;
+ void *tiles_data = (void *)addr_gva2hva(vm, tiledata);
+ /* Only check TMM0 register, 1 tile */
+ ret = memcmp(amx_start, tiles_data, TILE_SIZE);
+ TEST_ASSERT(ret == 0, "memcmp failed, ret=%d\n", ret);
+ kvm_x86_state_cleanup(state);
+ break;
+ case 9:
+ fprintf(stderr,
+ "GUEST_SYNC(%ld), #NM exception and enable amx\n", uc.args[1]);
+ break;
+ }
+ break;
+ case UCALL_DONE:
+ fprintf(stderr, "UCALL_DONE\n");
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+ state = vcpu_save_state(vm, VCPU_ID);
+ memset(&regs1, 0, sizeof(regs1));
+ vcpu_regs_get(vm, VCPU_ID, &regs1);
+
+ kvm_vm_release(vm);
+
+ /* Restore state in a new VM. */
+ kvm_vm_restart(vm, O_RDWR);
+ vm_vcpu_add(vm, VCPU_ID);
+ vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+ vcpu_load_state(vm, VCPU_ID, state);
+ run = vcpu_state(vm, VCPU_ID);
+ kvm_x86_state_cleanup(state);
+
+ memset(&regs2, 0, sizeof(regs2));
+ vcpu_regs_get(vm, VCPU_ID, &regs2);
+ TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
+ "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
+ (ulong) regs2.rdi, (ulong) regs2.rsi);
+ }
+done:
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c b/tools/testing/selftests/kvm/x86_64/cpuid_test.c
index a711f83749ea..16d2465c5634 100644
--- a/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c
+++ b/tools/testing/selftests/kvm/x86_64/cpuid_test.c
@@ -154,6 +154,34 @@ struct kvm_cpuid2 *vcpu_alloc_cpuid(struct kvm_vm *vm, vm_vaddr_t *p_gva, struct
return guest_cpuids;
}
+static void set_cpuid_after_run(struct kvm_vm *vm, struct kvm_cpuid2 *cpuid)
+{
+ struct kvm_cpuid_entry2 *ent;
+ int rc;
+ u32 eax, ebx, x;
+
+ /* Setting unmodified CPUID is allowed */
+ rc = __vcpu_set_cpuid(vm, VCPU_ID, cpuid);
+ TEST_ASSERT(!rc, "Setting unmodified CPUID after KVM_RUN failed: %d", rc);
+
+ /* Changing CPU features is forbidden */
+ ent = get_cpuid(cpuid, 0x7, 0);
+ ebx = ent->ebx;
+ ent->ebx--;
+ rc = __vcpu_set_cpuid(vm, VCPU_ID, cpuid);
+ TEST_ASSERT(rc, "Changing CPU features should fail");
+ ent->ebx = ebx;
+
+ /* Changing MAXPHYADDR is forbidden */
+ ent = get_cpuid(cpuid, 0x80000008, 0);
+ eax = ent->eax;
+ x = eax & 0xff;
+ ent->eax = (eax & ~0xffu) | (x - 1);
+ rc = __vcpu_set_cpuid(vm, VCPU_ID, cpuid);
+ TEST_ASSERT(rc, "Changing MAXPHYADDR should fail");
+ ent->eax = eax;
+}
+
int main(void)
{
struct kvm_cpuid2 *supp_cpuid, *cpuid2;
@@ -175,5 +203,7 @@ int main(void)
for (stage = 0; stage < 3; stage++)
run_vcpu(vm, VCPU_ID, stage);
+ set_cpuid_after_run(vm, cpuid2);
+
kvm_vm_free(vm);
}
diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
index 2b46dcca86a8..4c7841dfd481 100644
--- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
@@ -129,7 +129,7 @@ static void save_restore_vm(struct kvm_vm *vm)
vcpu_set_hv_cpuid(vm, VCPU_ID);
vcpu_enable_evmcs(vm, VCPU_ID);
vcpu_load_state(vm, VCPU_ID, state);
- free(state);
+ kvm_x86_state_cleanup(state);
memset(&regs2, 0, sizeof(regs2));
vcpu_regs_get(vm, VCPU_ID, &regs2);
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
index 91d88aaa9899..672915ce73d8 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
@@ -165,10 +165,10 @@ static void hv_set_cpuid(struct kvm_vm *vm, struct kvm_cpuid2 *cpuid,
vcpu_set_cpuid(vm, VCPU_ID, cpuid);
}
-static void guest_test_msrs_access(struct kvm_vm *vm, struct msr_data *msr,
- struct kvm_cpuid2 *best)
+static void guest_test_msrs_access(void)
{
struct kvm_run *run;
+ struct kvm_vm *vm;
struct ucall uc;
int stage = 0, r;
struct kvm_cpuid_entry2 feat = {
@@ -180,11 +180,34 @@ static void guest_test_msrs_access(struct kvm_vm *vm, struct msr_data *msr,
struct kvm_cpuid_entry2 dbg = {
.function = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES
};
- struct kvm_enable_cap cap = {0};
-
- run = vcpu_state(vm, VCPU_ID);
+ struct kvm_cpuid2 *best;
+ vm_vaddr_t msr_gva;
+ struct kvm_enable_cap cap = {
+ .cap = KVM_CAP_HYPERV_ENFORCE_CPUID,
+ .args = {1}
+ };
+ struct msr_data *msr;
while (true) {
+ vm = vm_create_default(VCPU_ID, 0, guest_msr);
+
+ msr_gva = vm_vaddr_alloc_page(vm);
+ memset(addr_gva2hva(vm, msr_gva), 0x0, getpagesize());
+ msr = addr_gva2hva(vm, msr_gva);
+
+ vcpu_args_set(vm, VCPU_ID, 1, msr_gva);
+ vcpu_enable_cap(vm, VCPU_ID, &cap);
+
+ vcpu_set_hv_cpuid(vm, VCPU_ID);
+
+ best = kvm_get_supported_hv_cpuid();
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vm, VCPU_ID);
+ vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
+
+ run = vcpu_state(vm, VCPU_ID);
+
switch (stage) {
case 0:
/*
@@ -315,6 +338,7 @@ static void guest_test_msrs_access(struct kvm_vm *vm, struct msr_data *msr,
* capability enabled and guest visible CPUID bit unset.
*/
cap.cap = KVM_CAP_HYPERV_SYNIC2;
+ cap.args[0] = 0;
vcpu_enable_cap(vm, VCPU_ID, &cap);
break;
case 22:
@@ -461,9 +485,9 @@ static void guest_test_msrs_access(struct kvm_vm *vm, struct msr_data *msr,
switch (get_ucall(vm, VCPU_ID, &uc)) {
case UCALL_SYNC:
- TEST_ASSERT(uc.args[1] == stage,
- "Unexpected stage: %ld (%d expected)\n",
- uc.args[1], stage);
+ TEST_ASSERT(uc.args[1] == 0,
+ "Unexpected stage: %ld (0 expected)\n",
+ uc.args[1]);
break;
case UCALL_ABORT:
TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
@@ -474,13 +498,14 @@ static void guest_test_msrs_access(struct kvm_vm *vm, struct msr_data *msr,
}
stage++;
+ kvm_vm_free(vm);
}
}
-static void guest_test_hcalls_access(struct kvm_vm *vm, struct hcall_data *hcall,
- void *input, void *output, struct kvm_cpuid2 *best)
+static void guest_test_hcalls_access(void)
{
struct kvm_run *run;
+ struct kvm_vm *vm;
struct ucall uc;
int stage = 0, r;
struct kvm_cpuid_entry2 feat = {
@@ -493,10 +518,38 @@ static void guest_test_hcalls_access(struct kvm_vm *vm, struct hcall_data *hcall
struct kvm_cpuid_entry2 dbg = {
.function = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES
};
-
- run = vcpu_state(vm, VCPU_ID);
+ struct kvm_enable_cap cap = {
+ .cap = KVM_CAP_HYPERV_ENFORCE_CPUID,
+ .args = {1}
+ };
+ vm_vaddr_t hcall_page, hcall_params;
+ struct hcall_data *hcall;
+ struct kvm_cpuid2 *best;
while (true) {
+ vm = vm_create_default(VCPU_ID, 0, guest_hcall);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vm, VCPU_ID);
+ vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
+
+ /* Hypercall input/output */
+ hcall_page = vm_vaddr_alloc_pages(vm, 2);
+ hcall = addr_gva2hva(vm, hcall_page);
+ memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
+
+ hcall_params = vm_vaddr_alloc_page(vm);
+ memset(addr_gva2hva(vm, hcall_params), 0x0, getpagesize());
+
+ vcpu_args_set(vm, VCPU_ID, 2, addr_gva2gpa(vm, hcall_page), hcall_params);
+ vcpu_enable_cap(vm, VCPU_ID, &cap);
+
+ vcpu_set_hv_cpuid(vm, VCPU_ID);
+
+ best = kvm_get_supported_hv_cpuid();
+
+ run = vcpu_state(vm, VCPU_ID);
+
switch (stage) {
case 0:
hcall->control = 0xdeadbeef;
@@ -606,9 +659,9 @@ static void guest_test_hcalls_access(struct kvm_vm *vm, struct hcall_data *hcall
switch (get_ucall(vm, VCPU_ID, &uc)) {
case UCALL_SYNC:
- TEST_ASSERT(uc.args[1] == stage,
- "Unexpected stage: %ld (%d expected)\n",
- uc.args[1], stage);
+ TEST_ASSERT(uc.args[1] == 0,
+ "Unexpected stage: %ld (0 expected)\n",
+ uc.args[1]);
break;
case UCALL_ABORT:
TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
@@ -619,66 +672,15 @@ static void guest_test_hcalls_access(struct kvm_vm *vm, struct hcall_data *hcall
}
stage++;
+ kvm_vm_free(vm);
}
}
int main(void)
{
- struct kvm_cpuid2 *best;
- struct kvm_vm *vm;
- vm_vaddr_t msr_gva, hcall_page, hcall_params;
- struct kvm_enable_cap cap = {
- .cap = KVM_CAP_HYPERV_ENFORCE_CPUID,
- .args = {1}
- };
-
- /* Test MSRs */
- vm = vm_create_default(VCPU_ID, 0, guest_msr);
-
- msr_gva = vm_vaddr_alloc_page(vm);
- memset(addr_gva2hva(vm, msr_gva), 0x0, getpagesize());
- vcpu_args_set(vm, VCPU_ID, 1, msr_gva);
- vcpu_enable_cap(vm, VCPU_ID, &cap);
-
- vcpu_set_hv_cpuid(vm, VCPU_ID);
-
- best = kvm_get_supported_hv_cpuid();
-
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vm, VCPU_ID);
- vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
-
pr_info("Testing access to Hyper-V specific MSRs\n");
- guest_test_msrs_access(vm, addr_gva2hva(vm, msr_gva),
- best);
- kvm_vm_free(vm);
-
- /* Test hypercalls */
- vm = vm_create_default(VCPU_ID, 0, guest_hcall);
-
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vm, VCPU_ID);
- vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
-
- /* Hypercall input/output */
- hcall_page = vm_vaddr_alloc_pages(vm, 2);
- memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
-
- hcall_params = vm_vaddr_alloc_page(vm);
- memset(addr_gva2hva(vm, hcall_params), 0x0, getpagesize());
-
- vcpu_args_set(vm, VCPU_ID, 2, addr_gva2gpa(vm, hcall_page), hcall_params);
- vcpu_enable_cap(vm, VCPU_ID, &cap);
-
- vcpu_set_hv_cpuid(vm, VCPU_ID);
-
- best = kvm_get_supported_hv_cpuid();
+ guest_test_msrs_access();
pr_info("Testing access to Hyper-V hypercalls\n");
- guest_test_hcalls_access(vm, addr_gva2hva(vm, hcall_params),
- addr_gva2hva(vm, hcall_page),
- addr_gva2hva(vm, hcall_page) + getpagesize(),
- best);
-
- kvm_vm_free(vm);
+ guest_test_hcalls_access();
}
diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
new file mode 100644
index 000000000000..c715adcbd487
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
@@ -0,0 +1,434 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test for x86 KVM_SET_PMU_EVENT_FILTER.
+ *
+ * Copyright (C) 2022, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Verifies the expected behavior of allow lists and deny lists for
+ * virtual PMU events.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+/*
+ * In lieu of copying perf_event.h into tools...
+ */
+#define ARCH_PERFMON_EVENTSEL_OS (1ULL << 17)
+#define ARCH_PERFMON_EVENTSEL_ENABLE (1ULL << 22)
+
+union cpuid10_eax {
+ struct {
+ unsigned int version_id:8;
+ unsigned int num_counters:8;
+ unsigned int bit_width:8;
+ unsigned int mask_length:8;
+ } split;
+ unsigned int full;
+};
+
+union cpuid10_ebx {
+ struct {
+ unsigned int no_unhalted_core_cycles:1;
+ unsigned int no_instructions_retired:1;
+ unsigned int no_unhalted_reference_cycles:1;
+ unsigned int no_llc_reference:1;
+ unsigned int no_llc_misses:1;
+ unsigned int no_branch_instruction_retired:1;
+ unsigned int no_branch_misses_retired:1;
+ } split;
+ unsigned int full;
+};
+
+/* End of stuff taken from perf_event.h. */
+
+/* Oddly, this isn't in perf_event.h. */
+#define ARCH_PERFMON_BRANCHES_RETIRED 5
+
+#define VCPU_ID 0
+#define NUM_BRANCHES 42
+
+/*
+ * This is how the event selector and unit mask are stored in an AMD
+ * core performance event-select register. Intel's format is similar,
+ * but the event selector is only 8 bits.
+ */
+#define EVENT(select, umask) ((select & 0xf00UL) << 24 | (select & 0xff) | \
+ (umask & 0xff) << 8)
+
+/*
+ * "Branch instructions retired", from the Intel SDM, volume 3,
+ * "Pre-defined Architectural Performance Events."
+ */
+
+#define INTEL_BR_RETIRED EVENT(0xc4, 0)
+
+/*
+ * "Retired branch instructions", from Processor Programming Reference
+ * (PPR) for AMD Family 17h Model 01h, Revision B1 Processors,
+ * Preliminary Processor Programming Reference (PPR) for AMD Family
+ * 17h Model 31h, Revision B0 Processors, and Preliminary Processor
+ * Programming Reference (PPR) for AMD Family 19h Model 01h, Revision
+ * B1 Processors Volume 1 of 2.
+ */
+
+#define AMD_ZEN_BR_RETIRED EVENT(0xc2, 0)
+
+/*
+ * This event list comprises Intel's eight architectural events plus
+ * AMD's "retired branch instructions" for Zen[123] (and possibly
+ * other AMD CPUs).
+ */
+static const uint64_t event_list[] = {
+ EVENT(0x3c, 0),
+ EVENT(0xc0, 0),
+ EVENT(0x3c, 1),
+ EVENT(0x2e, 0x4f),
+ EVENT(0x2e, 0x41),
+ EVENT(0xc4, 0),
+ EVENT(0xc5, 0),
+ EVENT(0xa4, 1),
+ AMD_ZEN_BR_RETIRED,
+};
+
+/*
+ * If we encounter a #GP during the guest PMU sanity check, then the guest
+ * PMU is not functional. Inform the hypervisor via GUEST_SYNC(0).
+ */
+static void guest_gp_handler(struct ex_regs *regs)
+{
+ GUEST_SYNC(0);
+}
+
+/*
+ * Check that we can write a new value to the given MSR and read it back.
+ * The caller should provide a non-empty set of bits that are safe to flip.
+ *
+ * Return on success. GUEST_SYNC(0) on error.
+ */
+static void check_msr(uint32_t msr, uint64_t bits_to_flip)
+{
+ uint64_t v = rdmsr(msr) ^ bits_to_flip;
+
+ wrmsr(msr, v);
+ if (rdmsr(msr) != v)
+ GUEST_SYNC(0);
+
+ v ^= bits_to_flip;
+ wrmsr(msr, v);
+ if (rdmsr(msr) != v)
+ GUEST_SYNC(0);
+}
+
+static void intel_guest_code(void)
+{
+ check_msr(MSR_CORE_PERF_GLOBAL_CTRL, 1);
+ check_msr(MSR_P6_EVNTSEL0, 0xffff);
+ check_msr(MSR_IA32_PMC0, 0xffff);
+ GUEST_SYNC(1);
+
+ for (;;) {
+ uint64_t br0, br1;
+
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+ wrmsr(MSR_P6_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
+ ARCH_PERFMON_EVENTSEL_OS | INTEL_BR_RETIRED);
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 1);
+ br0 = rdmsr(MSR_IA32_PMC0);
+ __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
+ br1 = rdmsr(MSR_IA32_PMC0);
+ GUEST_SYNC(br1 - br0);
+ }
+}
+
+/*
+ * To avoid needing a check for CPUID.80000001:ECX.PerfCtrExtCore[bit 23],
+ * this code uses the always-available, legacy K7 PMU MSRs, which alias to
+ * the first four of the six extended core PMU MSRs.
+ */
+static void amd_guest_code(void)
+{
+ check_msr(MSR_K7_EVNTSEL0, 0xffff);
+ check_msr(MSR_K7_PERFCTR0, 0xffff);
+ GUEST_SYNC(1);
+
+ for (;;) {
+ uint64_t br0, br1;
+
+ wrmsr(MSR_K7_EVNTSEL0, 0);
+ wrmsr(MSR_K7_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
+ ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_BR_RETIRED);
+ br0 = rdmsr(MSR_K7_PERFCTR0);
+ __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
+ br1 = rdmsr(MSR_K7_PERFCTR0);
+ GUEST_SYNC(br1 - br0);
+ }
+}
+
+/*
+ * Run the VM to the next GUEST_SYNC(value), and return the value passed
+ * to the sync. Any other exit from the guest is fatal.
+ */
+static uint64_t run_vm_to_sync(struct kvm_vm *vm)
+{
+ struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ struct ucall uc;
+
+ vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Exit_reason other than KVM_EXIT_IO: %u (%s)\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+ get_ucall(vm, VCPU_ID, &uc);
+ TEST_ASSERT(uc.cmd == UCALL_SYNC,
+ "Received ucall other than UCALL_SYNC: %lu", uc.cmd);
+ return uc.args[1];
+}
+
+/*
+ * In a nested environment or if the vPMU is disabled, the guest PMU
+ * might not work as architected (accessing the PMU MSRs may raise
+ * #GP, or writes could simply be discarded). In those situations,
+ * there is no point in running these tests. The guest code will perform
+ * a sanity check and then GUEST_SYNC(success). In the case of failure,
+ * the behavior of the guest on resumption is undefined.
+ */
+static bool sanity_check_pmu(struct kvm_vm *vm)
+{
+ bool success;
+
+ vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
+ success = run_vm_to_sync(vm);
+ vm_install_exception_handler(vm, GP_VECTOR, NULL);
+
+ return success;
+}
+
+static struct kvm_pmu_event_filter *make_pmu_event_filter(uint32_t nevents)
+{
+ struct kvm_pmu_event_filter *f;
+ int size = sizeof(*f) + nevents * sizeof(f->events[0]);
+
+ f = malloc(size);
+ TEST_ASSERT(f, "Out of memory");
+ memset(f, 0, size);
+ f->nevents = nevents;
+ return f;
+}
+
+static struct kvm_pmu_event_filter *event_filter(uint32_t action)
+{
+ struct kvm_pmu_event_filter *f;
+ int i;
+
+ f = make_pmu_event_filter(ARRAY_SIZE(event_list));
+ f->action = action;
+ for (i = 0; i < ARRAY_SIZE(event_list); i++)
+ f->events[i] = event_list[i];
+
+ return f;
+}
+
+/*
+ * Remove the first occurrence of 'event' (if any) from the filter's
+ * event list.
+ */
+static struct kvm_pmu_event_filter *remove_event(struct kvm_pmu_event_filter *f,
+ uint64_t event)
+{
+ bool found = false;
+ int i;
+
+ for (i = 0; i < f->nevents; i++) {
+ if (found)
+ f->events[i - 1] = f->events[i];
+ else
+ found = f->events[i] == event;
+ }
+ if (found)
+ f->nevents--;
+ return f;
+}
+
+static void test_without_filter(struct kvm_vm *vm)
+{
+ uint64_t count = run_vm_to_sync(vm);
+
+ if (count != NUM_BRANCHES)
+ pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
+ __func__, count, NUM_BRANCHES);
+ TEST_ASSERT(count, "Allowed PMU event is not counting");
+}
+
+static uint64_t test_with_filter(struct kvm_vm *vm,
+ struct kvm_pmu_event_filter *f)
+{
+ vm_ioctl(vm, KVM_SET_PMU_EVENT_FILTER, (void *)f);
+ return run_vm_to_sync(vm);
+}
+
+static void test_member_deny_list(struct kvm_vm *vm)
+{
+ struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY);
+ uint64_t count = test_with_filter(vm, f);
+
+ free(f);
+ if (count)
+ pr_info("%s: Branch instructions retired = %lu (expected 0)\n",
+ __func__, count);
+ TEST_ASSERT(!count, "Disallowed PMU Event is counting");
+}
+
+static void test_member_allow_list(struct kvm_vm *vm)
+{
+ struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW);
+ uint64_t count = test_with_filter(vm, f);
+
+ free(f);
+ if (count != NUM_BRANCHES)
+ pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
+ __func__, count, NUM_BRANCHES);
+ TEST_ASSERT(count, "Allowed PMU event is not counting");
+}
+
+static void test_not_member_deny_list(struct kvm_vm *vm)
+{
+ struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY);
+ uint64_t count;
+
+ remove_event(f, INTEL_BR_RETIRED);
+ remove_event(f, AMD_ZEN_BR_RETIRED);
+ count = test_with_filter(vm, f);
+ free(f);
+ if (count != NUM_BRANCHES)
+ pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
+ __func__, count, NUM_BRANCHES);
+ TEST_ASSERT(count, "Allowed PMU event is not counting");
+}
+
+static void test_not_member_allow_list(struct kvm_vm *vm)
+{
+ struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW);
+ uint64_t count;
+
+ remove_event(f, INTEL_BR_RETIRED);
+ remove_event(f, AMD_ZEN_BR_RETIRED);
+ count = test_with_filter(vm, f);
+ free(f);
+ if (count)
+ pr_info("%s: Branch instructions retired = %lu (expected 0)\n",
+ __func__, count);
+ TEST_ASSERT(!count, "Disallowed PMU Event is counting");
+}
+
+/*
+ * Check for a non-zero PMU version, at least one general-purpose
+ * counter per logical processor, an EBX bit vector of length greater
+ * than 5, and EBX[5] clear.
+ */
+static bool check_intel_pmu_leaf(struct kvm_cpuid_entry2 *entry)
+{
+ union cpuid10_eax eax = { .full = entry->eax };
+ union cpuid10_ebx ebx = { .full = entry->ebx };
+
+ return eax.split.version_id && eax.split.num_counters > 0 &&
+ eax.split.mask_length > ARCH_PERFMON_BRANCHES_RETIRED &&
+ !ebx.split.no_branch_instruction_retired;
+}
+
+/*
+ * Note that CPUID leaf 0xa is Intel-specific. This leaf should be
+ * clear on AMD hardware.
+ */
+static bool use_intel_pmu(void)
+{
+ struct kvm_cpuid_entry2 *entry;
+
+ entry = kvm_get_supported_cpuid_index(0xa, 0);
+ return is_intel_cpu() && entry && check_intel_pmu_leaf(entry);
+}
+
+static bool is_zen1(uint32_t eax)
+{
+ return x86_family(eax) == 0x17 && x86_model(eax) <= 0x0f;
+}
+
+static bool is_zen2(uint32_t eax)
+{
+ return x86_family(eax) == 0x17 &&
+ x86_model(eax) >= 0x30 && x86_model(eax) <= 0x3f;
+}
+
+static bool is_zen3(uint32_t eax)
+{
+ return x86_family(eax) == 0x19 && x86_model(eax) <= 0x0f;
+}
+
+/*
+ * Determining AMD support for a PMU event requires consulting the AMD
+ * PPR for the CPU or reference material derived therefrom. The AMD
+ * test code herein has been verified to work on Zen1, Zen2, and Zen3.
+ *
+ * Feel free to add more AMD CPUs that are documented to support event
+ * select 0xc2 umask 0 as "retired branch instructions."
+ */
+static bool use_amd_pmu(void)
+{
+ struct kvm_cpuid_entry2 *entry;
+
+ entry = kvm_get_supported_cpuid_index(1, 0);
+ return is_amd_cpu() && entry &&
+ (is_zen1(entry->eax) ||
+ is_zen2(entry->eax) ||
+ is_zen3(entry->eax));
+}
+
+int main(int argc, char *argv[])
+{
+ void (*guest_code)(void) = NULL;
+ struct kvm_vm *vm;
+ int r;
+
+ /* Tell stdout not to buffer its content */
+ setbuf(stdout, NULL);
+
+ r = kvm_check_cap(KVM_CAP_PMU_EVENT_FILTER);
+ if (!r) {
+ print_skip("KVM_CAP_PMU_EVENT_FILTER not supported");
+ exit(KSFT_SKIP);
+ }
+
+ if (use_intel_pmu())
+ guest_code = intel_guest_code;
+ else if (use_amd_pmu())
+ guest_code = amd_guest_code;
+
+ if (!guest_code) {
+ print_skip("Don't know how to test this guest PMU");
+ exit(KSFT_SKIP);
+ }
+
+ vm = vm_create_default(VCPU_ID, 0, guest_code);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vm, VCPU_ID);
+
+ if (!sanity_check_pmu(vm)) {
+ print_skip("Guest PMU is not functional");
+ exit(KSFT_SKIP);
+ }
+
+ test_without_filter(vm);
+ test_member_deny_list(vm);
+ test_member_allow_list(vm);
+ test_not_member_deny_list(vm);
+ test_not_member_allow_list(vm);
+
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c b/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c
index 5ba325cd64bf..80056bbbb003 100644
--- a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c
+++ b/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c
@@ -21,7 +21,7 @@
#define NR_LOCK_TESTING_THREADS 3
#define NR_LOCK_TESTING_ITERATIONS 10000
-static void sev_ioctl(int vm_fd, int cmd_id, void *data)
+static int __sev_ioctl(int vm_fd, int cmd_id, void *data, __u32 *fw_error)
{
struct kvm_sev_cmd cmd = {
.id = cmd_id,
@@ -31,9 +31,19 @@ static void sev_ioctl(int vm_fd, int cmd_id, void *data)
int ret;
ret = ioctl(vm_fd, KVM_MEMORY_ENCRYPT_OP, &cmd);
- TEST_ASSERT((ret == 0 || cmd.error == SEV_RET_SUCCESS),
+ *fw_error = cmd.error;
+ return ret;
+}
+
+static void sev_ioctl(int vm_fd, int cmd_id, void *data)
+{
+ int ret;
+ __u32 fw_error;
+
+ ret = __sev_ioctl(vm_fd, cmd_id, data, &fw_error);
+ TEST_ASSERT(ret == 0 && fw_error == SEV_RET_SUCCESS,
"%d failed: return code: %d, errno: %d, fw error: %d",
- cmd_id, ret, errno, cmd.error);
+ cmd_id, ret, errno, fw_error);
}
static struct kvm_vm *sev_vm_create(bool es)
@@ -54,12 +64,15 @@ static struct kvm_vm *sev_vm_create(bool es)
return vm;
}
-static struct kvm_vm *__vm_create(void)
+static struct kvm_vm *aux_vm_create(bool with_vcpus)
{
struct kvm_vm *vm;
int i;
vm = vm_create(VM_MODE_DEFAULT, 0, O_RDWR);
+ if (!with_vcpus)
+ return vm;
+
for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
vm_vcpu_add(vm, i);
@@ -89,11 +102,11 @@ static void test_sev_migrate_from(bool es)
{
struct kvm_vm *src_vm;
struct kvm_vm *dst_vms[NR_MIGRATE_TEST_VMS];
- int i;
+ int i, ret;
src_vm = sev_vm_create(es);
for (i = 0; i < NR_MIGRATE_TEST_VMS; ++i)
- dst_vms[i] = __vm_create();
+ dst_vms[i] = aux_vm_create(true);
/* Initial migration from the src to the first dst. */
sev_migrate_from(dst_vms[0]->fd, src_vm->fd);
@@ -102,7 +115,10 @@ static void test_sev_migrate_from(bool es)
sev_migrate_from(dst_vms[i]->fd, dst_vms[i - 1]->fd);
/* Migrate the guest back to the original VM. */
- sev_migrate_from(src_vm->fd, dst_vms[NR_MIGRATE_TEST_VMS - 1]->fd);
+ ret = __sev_migrate_from(src_vm->fd, dst_vms[NR_MIGRATE_TEST_VMS - 1]->fd);
+ TEST_ASSERT(ret == -1 && errno == EIO,
+ "VM that was migrated from should be dead. ret %d, errno: %d\n", ret,
+ errno);
kvm_vm_free(src_vm);
for (i = 0; i < NR_MIGRATE_TEST_VMS; ++i)
@@ -146,6 +162,8 @@ static void test_sev_migrate_locking(void)
for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i)
pthread_join(pt[i], NULL);
+ for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i)
+ kvm_vm_free(input[i].vm);
}
static void test_sev_migrate_parameters(void)
@@ -157,12 +175,11 @@ static void test_sev_migrate_parameters(void)
sev_vm = sev_vm_create(/* es= */ false);
sev_es_vm = sev_vm_create(/* es= */ true);
vm_no_vcpu = vm_create(VM_MODE_DEFAULT, 0, O_RDWR);
- vm_no_sev = __vm_create();
+ vm_no_sev = aux_vm_create(true);
sev_es_vm_no_vmsa = vm_create(VM_MODE_DEFAULT, 0, O_RDWR);
sev_ioctl(sev_es_vm_no_vmsa->fd, KVM_SEV_ES_INIT, NULL);
vm_vcpu_add(sev_es_vm_no_vmsa, 1);
-
ret = __sev_migrate_from(sev_vm->fd, sev_es_vm->fd);
TEST_ASSERT(
ret == -1 && errno == EINVAL,
@@ -191,13 +208,186 @@ static void test_sev_migrate_parameters(void)
TEST_ASSERT(ret == -1 && errno == EINVAL,
"Migrations require SEV enabled. ret %d, errno: %d\n", ret,
errno);
+
+ kvm_vm_free(sev_vm);
+ kvm_vm_free(sev_es_vm);
+ kvm_vm_free(sev_es_vm_no_vmsa);
+ kvm_vm_free(vm_no_vcpu);
+ kvm_vm_free(vm_no_sev);
+}
+
+static int __sev_mirror_create(int dst_fd, int src_fd)
+{
+ struct kvm_enable_cap cap = {
+ .cap = KVM_CAP_VM_COPY_ENC_CONTEXT_FROM,
+ .args = { src_fd }
+ };
+
+ return ioctl(dst_fd, KVM_ENABLE_CAP, &cap);
+}
+
+
+static void sev_mirror_create(int dst_fd, int src_fd)
+{
+ int ret;
+
+ ret = __sev_mirror_create(dst_fd, src_fd);
+ TEST_ASSERT(!ret, "Copying context failed, ret: %d, errno: %d\n", ret, errno);
+}
+
+static void verify_mirror_allowed_cmds(int vm_fd)
+{
+ struct kvm_sev_guest_status status;
+
+ for (int cmd_id = KVM_SEV_INIT; cmd_id < KVM_SEV_NR_MAX; ++cmd_id) {
+ int ret;
+ __u32 fw_error;
+
+ /*
+ * These commands are allowed for mirror VMs, all others are
+ * not.
+ */
+ switch (cmd_id) {
+ case KVM_SEV_LAUNCH_UPDATE_VMSA:
+ case KVM_SEV_GUEST_STATUS:
+ case KVM_SEV_DBG_DECRYPT:
+ case KVM_SEV_DBG_ENCRYPT:
+ continue;
+ default:
+ break;
+ }
+
+ /*
+ * These commands should be disallowed before the data
+ * parameter is examined so NULL is OK here.
+ */
+ ret = __sev_ioctl(vm_fd, cmd_id, NULL, &fw_error);
+ TEST_ASSERT(
+ ret == -1 && errno == EINVAL,
+ "Should not be able call command: %d. ret: %d, errno: %d\n",
+ cmd_id, ret, errno);
+ }
+
+ sev_ioctl(vm_fd, KVM_SEV_GUEST_STATUS, &status);
+}
+
+static void test_sev_mirror(bool es)
+{
+ struct kvm_vm *src_vm, *dst_vm;
+ int i;
+
+ src_vm = sev_vm_create(es);
+ dst_vm = aux_vm_create(false);
+
+ sev_mirror_create(dst_vm->fd, src_vm->fd);
+
+ /* Check that we can complete creation of the mirror VM. */
+ for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
+ vm_vcpu_add(dst_vm, i);
+
+ if (es)
+ sev_ioctl(dst_vm->fd, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
+
+ verify_mirror_allowed_cmds(dst_vm->fd);
+
+ kvm_vm_free(src_vm);
+ kvm_vm_free(dst_vm);
+}
+
+static void test_sev_mirror_parameters(void)
+{
+ struct kvm_vm *sev_vm, *sev_es_vm, *vm_no_vcpu, *vm_with_vcpu;
+ int ret;
+
+ sev_vm = sev_vm_create(/* es= */ false);
+ sev_es_vm = sev_vm_create(/* es= */ true);
+ vm_with_vcpu = aux_vm_create(true);
+ vm_no_vcpu = aux_vm_create(false);
+
+ ret = __sev_mirror_create(sev_vm->fd, sev_vm->fd);
+ TEST_ASSERT(
+ ret == -1 && errno == EINVAL,
+ "Should not be able copy context to self. ret: %d, errno: %d\n",
+ ret, errno);
+
+ ret = __sev_mirror_create(sev_vm->fd, sev_es_vm->fd);
+ TEST_ASSERT(
+ ret == -1 && errno == EINVAL,
+ "Should not be able copy context to SEV enabled VM. ret: %d, errno: %d\n",
+ ret, errno);
+
+ ret = __sev_mirror_create(sev_es_vm->fd, sev_vm->fd);
+ TEST_ASSERT(
+ ret == -1 && errno == EINVAL,
+ "Should not be able copy context to SEV-ES enabled VM. ret: %d, errno: %d\n",
+ ret, errno);
+
+ ret = __sev_mirror_create(vm_no_vcpu->fd, vm_with_vcpu->fd);
+ TEST_ASSERT(ret == -1 && errno == EINVAL,
+ "Copy context requires SEV enabled. ret %d, errno: %d\n", ret,
+ errno);
+
+ ret = __sev_mirror_create(vm_with_vcpu->fd, sev_vm->fd);
+ TEST_ASSERT(
+ ret == -1 && errno == EINVAL,
+ "SEV copy context requires no vCPUS on the destination. ret: %d, errno: %d\n",
+ ret, errno);
+
+ kvm_vm_free(sev_vm);
+ kvm_vm_free(sev_es_vm);
+ kvm_vm_free(vm_with_vcpu);
+ kvm_vm_free(vm_no_vcpu);
+}
+
+static void test_sev_move_copy(void)
+{
+ struct kvm_vm *dst_vm, *sev_vm, *mirror_vm, *dst_mirror_vm;
+ int ret;
+
+ sev_vm = sev_vm_create(/* es= */ false);
+ dst_vm = aux_vm_create(true);
+ mirror_vm = aux_vm_create(false);
+ dst_mirror_vm = aux_vm_create(false);
+
+ sev_mirror_create(mirror_vm->fd, sev_vm->fd);
+ ret = __sev_migrate_from(dst_vm->fd, sev_vm->fd);
+ TEST_ASSERT(ret == -1 && errno == EBUSY,
+ "Cannot migrate VM that has mirrors. ret %d, errno: %d\n", ret,
+ errno);
+
+ /* The mirror itself can be migrated. */
+ sev_migrate_from(dst_mirror_vm->fd, mirror_vm->fd);
+ ret = __sev_migrate_from(dst_vm->fd, sev_vm->fd);
+ TEST_ASSERT(ret == -1 && errno == EBUSY,
+ "Cannot migrate VM that has mirrors. ret %d, errno: %d\n", ret,
+ errno);
+
+ /*
+ * mirror_vm is not a mirror anymore, dst_mirror_vm is. Thus,
+ * the owner can be copied as soon as dst_mirror_vm is gone.
+ */
+ kvm_vm_free(dst_mirror_vm);
+ sev_migrate_from(dst_vm->fd, sev_vm->fd);
+
+ kvm_vm_free(mirror_vm);
+ kvm_vm_free(dst_vm);
+ kvm_vm_free(sev_vm);
}
int main(int argc, char *argv[])
{
- test_sev_migrate_from(/* es= */ false);
- test_sev_migrate_from(/* es= */ true);
- test_sev_migrate_locking();
- test_sev_migrate_parameters();
+ if (kvm_check_cap(KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM)) {
+ test_sev_migrate_from(/* es= */ false);
+ test_sev_migrate_from(/* es= */ true);
+ test_sev_migrate_locking();
+ test_sev_migrate_parameters();
+ if (kvm_check_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM))
+ test_sev_move_copy();
+ }
+ if (kvm_check_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM)) {
+ test_sev_mirror(/* es= */ false);
+ test_sev_mirror(/* es= */ true);
+ test_sev_mirror_parameters();
+ }
return 0;
}
diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c
index d0fe2fdce58c..a626d40fdb48 100644
--- a/tools/testing/selftests/kvm/x86_64/smm_test.c
+++ b/tools/testing/selftests/kvm/x86_64/smm_test.c
@@ -105,7 +105,6 @@ static void guest_code(void *arg)
if (cpu_has_svm()) {
run_guest(svm->vmcb, svm->vmcb_gpa);
- svm->vmcb->save.rip += 3;
run_guest(svm->vmcb, svm->vmcb_gpa);
} else {
vmlaunch();
@@ -212,7 +211,7 @@ int main(int argc, char *argv[])
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
vcpu_load_state(vm, VCPU_ID, state);
run = vcpu_state(vm, VCPU_ID);
- free(state);
+ kvm_x86_state_cleanup(state);
}
done:
diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c
index 32854c1462ad..2e0a92da8ff5 100644
--- a/tools/testing/selftests/kvm/x86_64/state_test.c
+++ b/tools/testing/selftests/kvm/x86_64/state_test.c
@@ -218,7 +218,7 @@ int main(int argc, char *argv[])
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
vcpu_load_state(vm, VCPU_ID, state);
run = vcpu_state(vm, VCPU_ID);
- free(state);
+ kvm_x86_state_cleanup(state);
memset(&regs2, 0, sizeof(regs2));
vcpu_regs_get(vm, VCPU_ID, &regs2);
diff --git a/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c b/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c
index df04f56ce859..30a81038df46 100644
--- a/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c
+++ b/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c
@@ -75,7 +75,7 @@ static void l1_guest_code(struct svm_test_data *svm)
vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
/* No intercepts for real and virtual interrupts */
- vmcb->control.intercept &= ~(1ULL << INTERCEPT_INTR | INTERCEPT_VINTR);
+ vmcb->control.intercept &= ~(BIT(INTERCEPT_INTR) | BIT(INTERCEPT_VINTR));
/* Make a virtual interrupt VINTR_IRQ_NUMBER pending */
vmcb->control.int_ctl |= V_IRQ_MASK | (0x1 << V_INTR_PRIO_SHIFT);
diff --git a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
index 5a6a662f2e59..a426078b16a3 100644
--- a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
@@ -77,8 +77,8 @@ static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid, int stage)
switch (get_ucall(vm, vcpuid, &uc)) {
case UCALL_SYNC:
TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
- uc.args[1] == stage + 1, "Stage %d: Unexpected register values vmexit, got %lx",
- stage + 1, (ulong)uc.args[1]);
+ uc.args[1] == stage + 1, "Stage %d: Unexpected register values vmexit, got %lx",
+ stage + 1, (ulong)uc.args[1]);
return;
case UCALL_DONE:
return;
diff --git a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c b/tools/testing/selftests/kvm/x86_64/userspace_io_test.c
new file mode 100644
index 000000000000..e4bef2e05686
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/userspace_io_test.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+
+#define VCPU_ID 1
+
+static void guest_ins_port80(uint8_t *buffer, unsigned int count)
+{
+ unsigned long end;
+
+ if (count == 2)
+ end = (unsigned long)buffer + 1;
+ else
+ end = (unsigned long)buffer + 8192;
+
+ asm volatile("cld; rep; insb" : "+D"(buffer), "+c"(count) : "d"(0x80) : "memory");
+ GUEST_ASSERT_1(count == 0, count);
+ GUEST_ASSERT_2((unsigned long)buffer == end, buffer, end);
+}
+
+static void guest_code(void)
+{
+ uint8_t buffer[8192];
+ int i;
+
+ /*
+ * Special case tests. main() will adjust RCX 2 => 1 and 3 => 8192 to
+ * test that KVM doesn't explode when userspace modifies the "count" on
+ * a userspace I/O exit. KVM isn't required to play nice with the I/O
+ * itself as KVM doesn't support manipulating the count, it just needs
+ * to not explode or overflow a buffer.
+ */
+ guest_ins_port80(buffer, 2);
+ guest_ins_port80(buffer, 3);
+
+ /* Verify KVM fills the buffer correctly when not stuffing RCX. */
+ memset(buffer, 0, sizeof(buffer));
+ guest_ins_port80(buffer, 8192);
+ for (i = 0; i < 8192; i++)
+ GUEST_ASSERT_2(buffer[i] == 0xaa, i, buffer[i]);
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_regs regs;
+ struct kvm_run *run;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ int rc;
+
+ /* Tell stdout not to buffer its content */
+ setbuf(stdout, NULL);
+
+ /* Create VM */
+ vm = vm_create_default(VCPU_ID, 0, guest_code);
+ run = vcpu_state(vm, VCPU_ID);
+
+ memset(&regs, 0, sizeof(regs));
+
+ while (1) {
+ rc = _vcpu_run(vm, VCPU_ID);
+
+ TEST_ASSERT(rc == 0, "vcpu_run failed: %d\n", rc);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Unexpected exit reason: %u (%s),\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ if (get_ucall(vm, VCPU_ID, &uc))
+ break;
+
+ TEST_ASSERT(run->io.port == 0x80,
+ "Expected I/O at port 0x80, got port 0x%x\n", run->io.port);
+
+ /*
+ * Modify the rep string count in RCX: 2 => 1 and 3 => 8192.
+ * Note, this abuses KVM's batching of rep string I/O to avoid
+ * getting stuck in an infinite loop. That behavior isn't in
+ * scope from a testing perspective as it's not ABI in any way,
+ * i.e. it really is abusing internal KVM knowledge.
+ */
+ vcpu_regs_get(vm, VCPU_ID, &regs);
+ if (regs.rcx == 2)
+ regs.rcx = 1;
+ if (regs.rcx == 3)
+ regs.rcx = 8192;
+ memset((void *)run + run->io.data_offset, 0xaa, 4096);
+ vcpu_regs_set(vm, VCPU_ID, &regs);
+ }
+
+ switch (uc.cmd) {
+ case UCALL_DONE:
+ break;
+ case UCALL_ABORT:
+ TEST_FAIL("%s at %s:%ld : argN+1 = 0x%lx, argN+2 = 0x%lx",
+ (const char *)uc.args[0], __FILE__, uc.args[1],
+ uc.args[2], uc.args[3]);
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
index 2835a17f1b7a..edac8839e717 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
@@ -30,8 +30,8 @@ static struct kvm_vm *vm;
static void l2_guest_code(void)
{
/* Exit to L0 */
- asm volatile("inb %%dx, %%al"
- : : [port] "d" (PORT_L0_EXIT) : "rax");
+ asm volatile("inb %%dx, %%al"
+ : : [port] "d" (PORT_L0_EXIT) : "rax");
}
static void l1_guest_code(struct vmx_pages *vmx_pages)
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c b/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c
new file mode 100644
index 000000000000..27a850f3d7ce
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#include <signal.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+
+#include "kselftest.h"
+
+#define VCPU_ID 0
+
+static struct kvm_vm *vm;
+
+static void guest_ud_handler(struct ex_regs *regs)
+{
+ /* Loop on the ud2 until guest state is made invalid. */
+}
+
+static void guest_code(void)
+{
+ asm volatile("ud2");
+}
+
+static void __run_vcpu_with_invalid_state(void)
+{
+ struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+
+ vcpu_run(vm, VCPU_ID);
+
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_INTERNAL_ERROR,
+ "Expected KVM_EXIT_INTERNAL_ERROR, got %d (%s)\n",
+ run->exit_reason, exit_reason_str(run->exit_reason));
+ TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION,
+ "Expected emulation failure, got %d\n",
+ run->emulation_failure.suberror);
+}
+
+static void run_vcpu_with_invalid_state(void)
+{
+ /*
+ * Always run twice to verify KVM handles the case where _KVM_ queues
+ * an exception with invalid state and then exits to userspace, i.e.
+ * that KVM doesn't explode if userspace ignores the initial error.
+ */
+ __run_vcpu_with_invalid_state();
+ __run_vcpu_with_invalid_state();
+}
+
+static void set_timer(void)
+{
+ struct itimerval timer;
+
+ timer.it_value.tv_sec = 0;
+ timer.it_value.tv_usec = 200;
+ timer.it_interval = timer.it_value;
+ ASSERT_EQ(setitimer(ITIMER_REAL, &timer, NULL), 0);
+}
+
+static void set_or_clear_invalid_guest_state(bool set)
+{
+ static struct kvm_sregs sregs;
+
+ if (!sregs.cr0)
+ vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ sregs.tr.unusable = !!set;
+ vcpu_sregs_set(vm, VCPU_ID, &sregs);
+}
+
+static void set_invalid_guest_state(void)
+{
+ set_or_clear_invalid_guest_state(true);
+}
+
+static void clear_invalid_guest_state(void)
+{
+ set_or_clear_invalid_guest_state(false);
+}
+
+static void sigalrm_handler(int sig)
+{
+ struct kvm_vcpu_events events;
+
+ TEST_ASSERT(sig == SIGALRM, "Unexpected signal = %d", sig);
+
+ vcpu_events_get(vm, VCPU_ID, &events);
+
+ /*
+ * If an exception is pending, attempt KVM_RUN with invalid guest,
+ * otherwise rearm the timer and keep doing so until the timer fires
+ * between KVM queueing an exception and re-entering the guest.
+ */
+ if (events.exception.pending) {
+ set_invalid_guest_state();
+ run_vcpu_with_invalid_state();
+ } else {
+ set_timer();
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ if (!is_intel_cpu() || vm_is_unrestricted_guest(NULL)) {
+ print_skip("Must be run with kvm_intel.unrestricted_guest=0");
+ exit(KSFT_SKIP);
+ }
+
+ vm = vm_create_default(VCPU_ID, 0, (void *)guest_code);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vm, VCPU_ID);
+
+ vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
+
+ /*
+ * Stuff invalid guest state for L2 by making TR unusuable. The next
+ * KVM_RUN should induce a TRIPLE_FAULT in L2 as KVM doesn't support
+ * emulating invalid guest state for L2.
+ */
+ set_invalid_guest_state();
+ run_vcpu_with_invalid_state();
+
+ /*
+ * Verify KVM also handles the case where userspace gains control while
+ * an exception is pending and stuffs invalid state. Run with valid
+ * guest state and a timer firing every 200us, and attempt to enter the
+ * guest with invalid state when the handler interrupts KVM with an
+ * exception pending.
+ */
+ clear_invalid_guest_state();
+ TEST_ASSERT(signal(SIGALRM, sigalrm_handler) != SIG_ERR,
+ "Failed to register SIGALRM handler, errno = %d (%s)",
+ errno, strerror(errno));
+
+ set_timer();
+ run_vcpu_with_invalid_state();
+}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c b/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c
new file mode 100644
index 000000000000..489fbed4ca6f
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "kselftest.h"
+
+#define VCPU_ID 0
+#define ARBITRARY_IO_PORT 0x2000
+
+static struct kvm_vm *vm;
+
+static void l2_guest_code(void)
+{
+ /*
+ * Generate an exit to L0 userspace, i.e. main(), via I/O to an
+ * arbitrary port.
+ */
+ asm volatile("inb %%dx, %%al"
+ : : [port] "d" (ARBITRARY_IO_PORT) : "rax");
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+
+ /* Prepare the VMCS for L2 execution. */
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /*
+ * L2 must be run without unrestricted guest, verify that the selftests
+ * library hasn't enabled it. Because KVM selftests jump directly to
+ * 64-bit mode, unrestricted guest support isn't required.
+ */
+ GUEST_ASSERT(!(vmreadz(CPU_BASED_VM_EXEC_CONTROL) & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) ||
+ !(vmreadz(SECONDARY_VM_EXEC_CONTROL) & SECONDARY_EXEC_UNRESTRICTED_GUEST));
+
+ GUEST_ASSERT(!vmlaunch());
+
+ /* L2 should triple fault after main() stuffs invalid guest state. */
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_TRIPLE_FAULT);
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t vmx_pages_gva;
+ struct kvm_sregs sregs;
+ struct kvm_run *run;
+ struct ucall uc;
+
+ nested_vmx_check_supported();
+
+ vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
+
+ /* Allocate VMX pages and shared descriptors (vmx_pages). */
+ vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+
+ vcpu_run(vm, VCPU_ID);
+
+ run = vcpu_state(vm, VCPU_ID);
+
+ /*
+ * The first exit to L0 userspace should be an I/O access from L2.
+ * Running L1 should launch L2 without triggering an exit to userspace.
+ */
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Expected KVM_EXIT_IO, got: %u (%s)\n",
+ run->exit_reason, exit_reason_str(run->exit_reason));
+
+ TEST_ASSERT(run->io.port == ARBITRARY_IO_PORT,
+ "Expected IN from port %d from L2, got port %d",
+ ARBITRARY_IO_PORT, run->io.port);
+
+ /*
+ * Stuff invalid guest state for L2 by making TR unusuable. The next
+ * KVM_RUN should induce a TRIPLE_FAULT in L2 as KVM doesn't support
+ * emulating invalid guest state for L2.
+ */
+ memset(&sregs, 0, sizeof(sregs));
+ vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ sregs.tr.unusable = 1;
+ vcpu_sregs_set(vm, VCPU_ID, &sregs);
+
+ vcpu_run(vm, VCPU_ID);
+
+ switch (get_ucall(vm, VCPU_ID, &uc)) {
+ case UCALL_DONE:
+ break;
+ case UCALL_ABORT:
+ TEST_FAIL("%s", (const char *)uc.args[0]);
+ default:
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+ }
+}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_pmu_msrs_test.c b/tools/testing/selftests/kvm/x86_64/vmx_pmu_msrs_test.c
index 23051d84b907..2454a1f2ca0c 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_pmu_msrs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_pmu_msrs_test.c
@@ -110,22 +110,5 @@ int main(int argc, char *argv[])
ret = _vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, PMU_CAP_LBR_FMT);
TEST_ASSERT(ret == 0, "Bad PERF_CAPABILITIES didn't fail.");
- /* testcase 4, set capabilities when we don't have PDCM bit */
- entry_1_0->ecx &= ~X86_FEATURE_PDCM;
- vcpu_set_cpuid(vm, VCPU_ID, cpuid);
- ret = _vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
- TEST_ASSERT(ret == 0, "Bad PERF_CAPABILITIES didn't fail.");
-
- /* testcase 5, set capabilities when we don't have PMU version bits */
- entry_1_0->ecx |= X86_FEATURE_PDCM;
- eax.split.version_id = 0;
- entry_1_0->ecx = eax.full;
- vcpu_set_cpuid(vm, VCPU_ID, cpuid);
- ret = _vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, PMU_CAP_FW_WRITES);
- TEST_ASSERT(ret == 0, "Bad PERF_CAPABILITIES didn't fail.");
-
- vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, 0);
- ASSERT_EQ(vcpu_get_msr(vm, VCPU_ID, MSR_IA32_PERF_CAPABILITIES), 0);
-
kvm_vm_free(vm);
}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
index a07480aed397..ff92e25b6f1e 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
@@ -244,7 +244,7 @@ int main(int argc, char *argv[])
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
vcpu_load_state(vm, VCPU_ID, state);
run = vcpu_state(vm, VCPU_ID);
- free(state);
+ kvm_x86_state_cleanup(state);
memset(&regs2, 0, sizeof(regs2));
vcpu_regs_get(vm, VCPU_ID, &regs2);
diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
index a0699f00b3d6..865e17146815 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
@@ -14,6 +14,9 @@
#include <stdint.h>
#include <time.h>
#include <sched.h>
+#include <signal.h>
+
+#include <sys/eventfd.h>
#define VCPU_ID 5
@@ -22,10 +25,15 @@
#define SHINFO_REGION_SLOT 10
#define PAGE_SIZE 4096
+#define DUMMY_REGION_GPA (SHINFO_REGION_GPA + (2 * PAGE_SIZE))
+#define DUMMY_REGION_SLOT 11
+
+#define SHINFO_ADDR (SHINFO_REGION_GPA)
#define PVTIME_ADDR (SHINFO_REGION_GPA + PAGE_SIZE)
#define RUNSTATE_ADDR (SHINFO_REGION_GPA + PAGE_SIZE + 0x20)
#define VCPU_INFO_ADDR (SHINFO_REGION_GPA + 0x40)
+#define SHINFO_VADDR (SHINFO_REGION_GVA)
#define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + 0x20)
#define VCPU_INFO_VADDR (SHINFO_REGION_GVA + 0x40)
@@ -38,20 +46,20 @@ static struct kvm_vm *vm;
#define MIN_STEAL_TIME 50000
struct pvclock_vcpu_time_info {
- u32 version;
- u32 pad0;
- u64 tsc_timestamp;
- u64 system_time;
- u32 tsc_to_system_mul;
- s8 tsc_shift;
- u8 flags;
- u8 pad[2];
+ u32 version;
+ u32 pad0;
+ u64 tsc_timestamp;
+ u64 system_time;
+ u32 tsc_to_system_mul;
+ s8 tsc_shift;
+ u8 flags;
+ u8 pad[2];
} __attribute__((__packed__)); /* 32 bytes */
struct pvclock_wall_clock {
- u32 version;
- u32 sec;
- u32 nsec;
+ u32 version;
+ u32 sec;
+ u32 nsec;
} __attribute__((__packed__));
struct vcpu_runstate_info {
@@ -66,22 +74,44 @@ struct arch_vcpu_info {
};
struct vcpu_info {
- uint8_t evtchn_upcall_pending;
- uint8_t evtchn_upcall_mask;
- unsigned long evtchn_pending_sel;
- struct arch_vcpu_info arch;
- struct pvclock_vcpu_time_info time;
+ uint8_t evtchn_upcall_pending;
+ uint8_t evtchn_upcall_mask;
+ unsigned long evtchn_pending_sel;
+ struct arch_vcpu_info arch;
+ struct pvclock_vcpu_time_info time;
}; /* 64 bytes (x86) */
+struct shared_info {
+ struct vcpu_info vcpu_info[32];
+ unsigned long evtchn_pending[64];
+ unsigned long evtchn_mask[64];
+ struct pvclock_wall_clock wc;
+ uint32_t wc_sec_hi;
+ /* arch_shared_info here */
+};
+
#define RUNSTATE_running 0
#define RUNSTATE_runnable 1
#define RUNSTATE_blocked 2
#define RUNSTATE_offline 3
+static const char *runstate_names[] = {
+ "running",
+ "runnable",
+ "blocked",
+ "offline"
+};
+
+struct {
+ struct kvm_irq_routing info;
+ struct kvm_irq_routing_entry entries[2];
+} irq_routes;
+
static void evtchn_handler(struct ex_regs *regs)
{
struct vcpu_info *vi = (void *)VCPU_INFO_VADDR;
vi->evtchn_upcall_pending = 0;
+ vi->evtchn_pending_sel = 0;
GUEST_SYNC(0x20);
}
@@ -127,7 +157,25 @@ static void guest_code(void)
GUEST_SYNC(6);
GUEST_ASSERT(rs->time[RUNSTATE_runnable] >= MIN_STEAL_TIME);
- GUEST_DONE();
+ /* Attempt to deliver a *masked* interrupt */
+ GUEST_SYNC(7);
+
+ /* Wait until we see the bit set */
+ struct shared_info *si = (void *)SHINFO_VADDR;
+ while (!si->evtchn_pending[0])
+ __asm__ __volatile__ ("rep nop" : : : "memory");
+
+ /* Now deliver an *unmasked* interrupt */
+ GUEST_SYNC(8);
+
+ while (!si->evtchn_pending[1])
+ __asm__ __volatile__ ("rep nop" : : : "memory");
+
+ /* Change memslots and deliver an interrupt */
+ GUEST_SYNC(9);
+
+ for (;;)
+ __asm__ __volatile__ ("rep nop" : : : "memory");
}
static int cmp_timespec(struct timespec *a, struct timespec *b)
@@ -144,9 +192,18 @@ static int cmp_timespec(struct timespec *a, struct timespec *b)
return 0;
}
+static void handle_alrm(int sig)
+{
+ TEST_FAIL("IRQ delivery timed out");
+}
+
int main(int argc, char *argv[])
{
struct timespec min_ts, max_ts, vm_ts;
+ bool verbose;
+
+ verbose = argc > 1 && (!strncmp(argv[1], "-v", 3) ||
+ !strncmp(argv[1], "--verbose", 10));
int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
if (!(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO) ) {
@@ -155,6 +212,7 @@ int main(int argc, char *argv[])
}
bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE);
+ bool do_eventfd_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL);
clock_gettime(CLOCK_REALTIME, &min_ts);
@@ -166,6 +224,11 @@ int main(int argc, char *argv[])
SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 2, 0);
virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 2);
+ struct shared_info *shinfo = addr_gpa2hva(vm, SHINFO_VADDR);
+
+ int zero_fd = open("/dev/zero", O_RDONLY);
+ TEST_ASSERT(zero_fd != -1, "Failed to open /dev/zero");
+
struct kvm_xen_hvm_config hvmc = {
.flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
.msr = XEN_HYPERCALL_MSR,
@@ -184,6 +247,16 @@ int main(int argc, char *argv[])
};
vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ha);
+ /*
+ * Test what happens when the HVA of the shinfo page is remapped after
+ * the kernel has a reference to it. But make sure we copy the clock
+ * info over since that's only set at setup time, and we test it later.
+ */
+ struct pvclock_wall_clock wc_copy = shinfo->wc;
+ void *m = mmap(shinfo, PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_PRIVATE, zero_fd, 0);
+ TEST_ASSERT(m == shinfo, "Failed to map /dev/zero over shared info");
+ shinfo->wc = wc_copy;
+
struct kvm_xen_vcpu_attr vi = {
.type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO,
.u.gpa = VCPU_INFO_ADDR,
@@ -214,6 +287,49 @@ int main(int argc, char *argv[])
vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &st);
}
+ int irq_fd[2] = { -1, -1 };
+
+ if (do_eventfd_tests) {
+ irq_fd[0] = eventfd(0, 0);
+ irq_fd[1] = eventfd(0, 0);
+
+ /* Unexpected, but not a KVM failure */
+ if (irq_fd[0] == -1 || irq_fd[1] == -1)
+ do_eventfd_tests = false;
+ }
+
+ if (do_eventfd_tests) {
+ irq_routes.info.nr = 2;
+
+ irq_routes.entries[0].gsi = 32;
+ irq_routes.entries[0].type = KVM_IRQ_ROUTING_XEN_EVTCHN;
+ irq_routes.entries[0].u.xen_evtchn.port = 15;
+ irq_routes.entries[0].u.xen_evtchn.vcpu = VCPU_ID;
+ irq_routes.entries[0].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
+
+ irq_routes.entries[1].gsi = 33;
+ irq_routes.entries[1].type = KVM_IRQ_ROUTING_XEN_EVTCHN;
+ irq_routes.entries[1].u.xen_evtchn.port = 66;
+ irq_routes.entries[1].u.xen_evtchn.vcpu = VCPU_ID;
+ irq_routes.entries[1].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
+
+ vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes);
+
+ struct kvm_irqfd ifd = { };
+
+ ifd.fd = irq_fd[0];
+ ifd.gsi = 32;
+ vm_ioctl(vm, KVM_IRQFD, &ifd);
+
+ ifd.fd = irq_fd[1];
+ ifd.gsi = 33;
+ vm_ioctl(vm, KVM_IRQFD, &ifd);
+
+ struct sigaction sa = { };
+ sa.sa_handler = handle_alrm;
+ sigaction(SIGALRM, &sa, NULL);
+ }
+
struct vcpu_info *vinfo = addr_gpa2hva(vm, VCPU_INFO_VADDR);
vinfo->evtchn_upcall_pending = 0;
@@ -248,6 +364,8 @@ int main(int argc, char *argv[])
switch (uc.args[1]) {
case 0:
+ if (verbose)
+ printf("Delivering evtchn upcall\n");
evtchn_irq_expected = true;
vinfo->evtchn_upcall_pending = 1;
break;
@@ -256,11 +374,16 @@ int main(int argc, char *argv[])
TEST_ASSERT(!evtchn_irq_expected, "Event channel IRQ not seen");
if (!do_runstate_tests)
goto done;
+ if (verbose)
+ printf("Testing runstate %s\n", runstate_names[uc.args[1]]);
rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT;
rst.u.runstate.state = uc.args[1];
vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst);
break;
+
case 4:
+ if (verbose)
+ printf("Testing RUNSTATE_ADJUST\n");
rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST;
memset(&rst.u, 0, sizeof(rst.u));
rst.u.runstate.state = (uint64_t)-1;
@@ -274,6 +397,8 @@ int main(int argc, char *argv[])
break;
case 5:
+ if (verbose)
+ printf("Testing RUNSTATE_DATA\n");
rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA;
memset(&rst.u, 0, sizeof(rst.u));
rst.u.runstate.state = RUNSTATE_running;
@@ -282,16 +407,54 @@ int main(int argc, char *argv[])
rst.u.runstate.time_offline = 0x5a;
vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst);
break;
+
case 6:
+ if (verbose)
+ printf("Testing steal time\n");
/* Yield until scheduler delay exceeds target */
rundelay = get_run_delay() + MIN_STEAL_TIME;
do {
sched_yield();
} while (get_run_delay() < rundelay);
break;
+
+ case 7:
+ if (!do_eventfd_tests)
+ goto done;
+ if (verbose)
+ printf("Testing masked event channel\n");
+ shinfo->evtchn_mask[0] = 0x8000;
+ eventfd_write(irq_fd[0], 1UL);
+ alarm(1);
+ break;
+
+ case 8:
+ if (verbose)
+ printf("Testing unmasked event channel\n");
+ /* Unmask that, but deliver the other one */
+ shinfo->evtchn_pending[0] = 0;
+ shinfo->evtchn_mask[0] = 0;
+ eventfd_write(irq_fd[1], 1UL);
+ evtchn_irq_expected = true;
+ alarm(1);
+ break;
+
+ case 9:
+ if (verbose)
+ printf("Testing event channel after memslot change\n");
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ DUMMY_REGION_GPA, DUMMY_REGION_SLOT, 1, 0);
+ eventfd_write(irq_fd[0], 1UL);
+ evtchn_irq_expected = true;
+ alarm(1);
+ break;
+
case 0x20:
TEST_ASSERT(evtchn_irq_expected, "Unexpected event channel IRQ");
evtchn_irq_expected = false;
+ if (shinfo->evtchn_pending[1] &&
+ shinfo->evtchn_pending[0])
+ goto done;
break;
}
break;
@@ -318,9 +481,19 @@ int main(int argc, char *argv[])
ti = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0x40 + 0x20);
ti2 = addr_gpa2hva(vm, PVTIME_ADDR);
+ if (verbose) {
+ printf("Wall clock (v %d) %d.%09d\n", wc->version, wc->sec, wc->nsec);
+ printf("Time info 1: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n",
+ ti->version, ti->tsc_timestamp, ti->system_time, ti->tsc_to_system_mul,
+ ti->tsc_shift, ti->flags);
+ printf("Time info 2: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n",
+ ti2->version, ti2->tsc_timestamp, ti2->system_time, ti2->tsc_to_system_mul,
+ ti2->tsc_shift, ti2->flags);
+ }
+
vm_ts.tv_sec = wc->sec;
vm_ts.tv_nsec = wc->nsec;
- TEST_ASSERT(wc->version && !(wc->version & 1),
+ TEST_ASSERT(wc->version && !(wc->version & 1),
"Bad wallclock version %x", wc->version);
TEST_ASSERT(cmp_timespec(&min_ts, &vm_ts) <= 0, "VM time too old");
TEST_ASSERT(cmp_timespec(&max_ts, &vm_ts) >= 0, "VM time too new");
@@ -341,6 +514,15 @@ int main(int argc, char *argv[])
};
vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_GET_ATTR, &rst);
+ if (verbose) {
+ printf("Runstate: %s(%d), entry %" PRIu64 " ns\n",
+ rs->state <= RUNSTATE_offline ? runstate_names[rs->state] : "unknown",
+ rs->state, rs->state_entry_time);
+ for (int i = RUNSTATE_running; i <= RUNSTATE_offline; i++) {
+ printf("State %s: %" PRIu64 " ns\n",
+ runstate_names[i], rs->time[i]);
+ }
+ }
TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch");
TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time,
"State entry time mismatch");
diff --git a/tools/testing/selftests/landlock/common.h b/tools/testing/selftests/landlock/common.h
index 20e2a9286d71..183b7e8e1b95 100644
--- a/tools/testing/selftests/landlock/common.h
+++ b/tools/testing/selftests/landlock/common.h
@@ -17,10 +17,6 @@
#include "../kselftest_harness.h"
-#ifndef ARRAY_SIZE
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-#endif
-
/*
* TEST_F_FORK() is useful when a test drop privileges but the corresponding
* FIXTURE_TEARDOWN() requires them (e.g. to remove files from a directory
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index fe7ee2b0f29c..a40add31a2e3 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -141,7 +141,7 @@ endif
# Selftest makefiles can override those targets by setting
# OVERRIDE_TARGETS = 1.
ifeq ($(OVERRIDE_TARGETS),)
-LOCAL_HDRS := $(selfdir)/kselftest_harness.h $(selfdir)/kselftest.h
+LOCAL_HDRS += $(selfdir)/kselftest_harness.h $(selfdir)/kselftest.h
$(OUTPUT)/%:%.c $(LOCAL_HDRS)
$(LINK.c) $(filter-out $(LOCAL_HDRS),$^) $(LDLIBS) -o $@
diff --git a/tools/testing/selftests/lkdtm/stack-entropy.sh b/tools/testing/selftests/lkdtm/stack-entropy.sh
index 1b4d95d575f8..14fedeef762e 100755
--- a/tools/testing/selftests/lkdtm/stack-entropy.sh
+++ b/tools/testing/selftests/lkdtm/stack-entropy.sh
@@ -4,13 +4,27 @@
# Measure kernel stack entropy by sampling via LKDTM's REPORT_STACK test.
set -e
samples="${1:-1000}"
+TRIGGER=/sys/kernel/debug/provoke-crash/DIRECT
+KSELFTEST_SKIP_TEST=4
+
+# Verify we have LKDTM available in the kernel.
+if [ ! -r $TRIGGER ] ; then
+ /sbin/modprobe -q lkdtm || true
+ if [ ! -r $TRIGGER ] ; then
+ echo "Cannot find $TRIGGER (missing CONFIG_LKDTM?)"
+ else
+ echo "Cannot write $TRIGGER (need to run as root?)"
+ fi
+ # Skip this test
+ exit $KSELFTEST_SKIP_TEST
+fi
# Capture dmesg continuously since it may fill up depending on sample size.
log=$(mktemp -t stack-entropy-XXXXXX)
dmesg --follow >"$log" & pid=$!
report=-1
for i in $(seq 1 $samples); do
- echo "REPORT_STACK" >/sys/kernel/debug/provoke-crash/DIRECT
+ echo "REPORT_STACK" > $TRIGGER
if [ -t 1 ]; then
percent=$(( 100 * $i / $samples ))
if [ "$percent" -ne "$report" ]; then
diff --git a/tools/testing/selftests/mount/unprivileged-remount-test.c b/tools/testing/selftests/mount/unprivileged-remount-test.c
index 584dc6bc3b06..d2917054fe3a 100644
--- a/tools/testing/selftests/mount/unprivileged-remount-test.c
+++ b/tools/testing/selftests/mount/unprivileged-remount-test.c
@@ -204,7 +204,7 @@ bool test_unpriv_remount(const char *fstype, const char *mount_options,
if (!WIFEXITED(status)) {
die("child did not terminate cleanly\n");
}
- return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false;
+ return WEXITSTATUS(status) == EXIT_SUCCESS;
}
create_and_enter_userns();
@@ -282,7 +282,7 @@ static bool test_priv_mount_unpriv_remount(void)
if (!WIFEXITED(status)) {
die("child did not terminate cleanly\n");
}
- return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false;
+ return WEXITSTATUS(status) == EXIT_SUCCESS;
}
orig_mnt_flags = read_mnt_flags(orig_path);
diff --git a/tools/testing/selftests/move_mount_set_group/move_mount_set_group_test.c b/tools/testing/selftests/move_mount_set_group/move_mount_set_group_test.c
index 860198f83a53..50ed5d475dd1 100644
--- a/tools/testing/selftests/move_mount_set_group/move_mount_set_group_test.c
+++ b/tools/testing/selftests/move_mount_set_group/move_mount_set_group_test.c
@@ -191,7 +191,7 @@ static bool is_shared_mount(const char *path)
#define SET_GROUP_FROM "/tmp/move_mount_set_group_supported_from"
#define SET_GROUP_TO "/tmp/move_mount_set_group_supported_to"
-static int move_mount_set_group_supported(void)
+static bool move_mount_set_group_supported(void)
{
int ret;
@@ -222,7 +222,7 @@ static int move_mount_set_group_supported(void)
AT_FDCWD, SET_GROUP_TO, MOVE_MOUNT_SET_GROUP);
umount2("/tmp", MNT_DETACH);
- return ret < 0 ? false : true;
+ return ret >= 0;
}
FIXTURE(move_mount_set_group) {
@@ -232,7 +232,7 @@ FIXTURE(move_mount_set_group) {
FIXTURE_SETUP(move_mount_set_group)
{
- int ret;
+ bool ret;
ASSERT_EQ(prepare_unpriv_mountns(), 0);
@@ -254,7 +254,7 @@ FIXTURE_SETUP(move_mount_set_group)
FIXTURE_TEARDOWN(move_mount_set_group)
{
- int ret;
+ bool ret;
ret = move_mount_set_group_supported();
ASSERT_GE(ret, 0);
@@ -348,7 +348,7 @@ TEST_F(move_mount_set_group, complex_sharing_copying)
.shared = false,
};
pid_t pid;
- int ret;
+ bool ret;
ret = move_mount_set_group_supported();
ASSERT_GE(ret, 0);
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 7615f29831eb..9897fa9ab953 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -34,6 +34,7 @@ TEST_PROGS += srv6_end_dt46_l3vpn_test.sh
TEST_PROGS += srv6_end_dt4_l3vpn_test.sh
TEST_PROGS += srv6_end_dt6_l3vpn_test.sh
TEST_PROGS += vrf_strict_mode_test.sh
+TEST_PROGS += arp_ndisc_evict_nocarrier.sh
TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh
TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh
TEST_GEN_FILES = socket nettest
diff --git a/tools/testing/selftests/net/amt.sh b/tools/testing/selftests/net/amt.sh
index 75528788cb95..75528788cb95 100644..100755
--- a/tools/testing/selftests/net/amt.sh
+++ b/tools/testing/selftests/net/amt.sh
diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh
index 3313566ce906..3f4c8cfe7aca 100755
--- a/tools/testing/selftests/net/fcnal-test.sh
+++ b/tools/testing/selftests/net/fcnal-test.sh
@@ -66,6 +66,10 @@ NSB_LO_IP=172.16.2.2
NSA_LO_IP6=2001:db8:2::1
NSB_LO_IP6=2001:db8:2::2
+# non-local addresses for freebind tests
+NL_IP=172.17.1.1
+NL_IP6=2001:db8:4::1
+
MD5_PW=abc123
MD5_WRONG_PW=abc1234
@@ -316,6 +320,9 @@ addr2str()
${NSB_LO_IP6}) echo "ns-B loopback IPv6";;
${NSB_LINKIP6}|${NSB_LINKIP6}%*) echo "ns-B IPv6 LLA";;
+ ${NL_IP}) echo "nonlocal IP";;
+ ${NL_IP6}) echo "nonlocal IPv6";;
+
${VRF_IP}) echo "VRF IP";;
${VRF_IP6}) echo "VRF IPv6";;
@@ -455,6 +462,22 @@ cleanup()
ip netns del ${NSC} >/dev/null 2>&1
}
+cleanup_vrf_dup()
+{
+ ip link del ${NSA_DEV2} >/dev/null 2>&1
+ ip netns pids ${NSC} | xargs kill 2>/dev/null
+ ip netns del ${NSC} >/dev/null 2>&1
+}
+
+setup_vrf_dup()
+{
+ # some VRF tests use ns-C which has the same config as
+ # ns-B but for a device NOT in the VRF
+ create_ns ${NSC} "-" "-"
+ connect_ns ${NSA} ${NSA_DEV2} ${NSA_IP}/24 ${NSA_IP6}/64 \
+ ${NSC} ${NSC_DEV} ${NSB_IP}/24 ${NSB_IP6}/64
+}
+
setup()
{
local with_vrf=${1}
@@ -484,12 +507,6 @@ setup()
ip -netns ${NSB} ro add ${VRF_IP}/32 via ${NSA_IP} dev ${NSB_DEV}
ip -netns ${NSB} -6 ro add ${VRF_IP6}/128 via ${NSA_IP6} dev ${NSB_DEV}
-
- # some VRF tests use ns-C which has the same config as
- # ns-B but for a device NOT in the VRF
- create_ns ${NSC} "-" "-"
- connect_ns ${NSA} ${NSA_DEV2} ${NSA_IP}/24 ${NSA_IP6}/64 \
- ${NSC} ${NSC_DEV} ${NSB_IP}/24 ${NSB_IP6}/64
else
ip -netns ${NSA} ro add ${NSB_LO_IP}/32 via ${NSB_IP} dev ${NSA_DEV}
ip -netns ${NSA} ro add ${NSB_LO_IP6}/128 via ${NSB_IP6} dev ${NSA_DEV}
@@ -1240,7 +1257,9 @@ ipv4_tcp_vrf()
log_test_addr ${a} $? 1 "Global server, local connection"
# run MD5 tests
+ setup_vrf_dup
ipv4_tcp_md5
+ cleanup_vrf_dup
#
# enable VRF global server
@@ -1768,6 +1787,14 @@ ipv4_addr_bind_novrf()
done
#
+ # raw socket with nonlocal bind
+ #
+ a=${NL_IP}
+ log_start
+ run_cmd nettest -s -R -P icmp -f -l ${a} -I ${NSA_DEV} -b
+ log_test_addr ${a} $? 0 "Raw socket bind to nonlocal address after device bind"
+
+ #
# tcp sockets
#
a=${NSA_IP}
@@ -1798,8 +1825,9 @@ ipv4_addr_bind_vrf()
for a in ${NSA_IP} ${VRF_IP}
do
log_start
+ show_hint "Socket not bound to VRF, but address is in VRF"
run_cmd nettest -s -R -P icmp -l ${a} -b
- log_test_addr ${a} $? 0 "Raw socket bind to local address"
+ log_test_addr ${a} $? 1 "Raw socket bind to local address"
log_start
run_cmd nettest -s -R -P icmp -l ${a} -I ${NSA_DEV} -b
@@ -1816,6 +1844,14 @@ ipv4_addr_bind_vrf()
log_test_addr ${a} $? 1 "Raw socket bind to out of scope address after VRF bind"
#
+ # raw socket with nonlocal bind
+ #
+ a=${NL_IP}
+ log_start
+ run_cmd nettest -s -R -P icmp -f -l ${a} -I ${VRF} -b
+ log_test_addr ${a} $? 0 "Raw socket bind to nonlocal address after VRF bind"
+
+ #
# tcp sockets
#
for a in ${NSA_IP} ${VRF_IP}
@@ -1965,6 +2001,7 @@ ipv4_rt()
a=${NSA_IP}
log_start
+
run_cmd nettest ${varg} -s &
sleep 1
run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} &
@@ -2191,7 +2228,7 @@ ipv6_ping_vrf()
log_start
show_hint "Fails since VRF device does not support linklocal or multicast"
run_cmd ${ping6} -c1 -w1 ${a}
- log_test_addr ${a} $? 2 "ping out, VRF bind"
+ log_test_addr ${a} $? 1 "ping out, VRF bind"
done
for a in ${NSB_IP6} ${NSB_LO_IP6} ${NSB_LINKIP6}%${NSA_DEV} ${MCAST}%${NSA_DEV}
@@ -2719,7 +2756,9 @@ ipv6_tcp_vrf()
log_test_addr ${a} $? 1 "Global server, local connection"
# run MD5 tests
+ setup_vrf_dup
ipv6_tcp_md5
+ cleanup_vrf_dup
#
# enable VRF global server
@@ -3403,6 +3442,14 @@ ipv6_addr_bind_novrf()
done
#
+ # raw socket with nonlocal bind
+ #
+ a=${NL_IP6}
+ log_start
+ run_cmd nettest -6 -s -R -P icmp -f -l ${a} -I ${NSA_DEV} -b
+ log_test_addr ${a} $? 0 "Raw socket bind to nonlocal address"
+
+ #
# tcp sockets
#
a=${NSA_IP6}
@@ -3414,11 +3461,14 @@ ipv6_addr_bind_novrf()
run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b
log_test_addr ${a} $? 0 "TCP socket bind to local address after device bind"
+ # Sadly, the kernel allows binding a socket to a device and then
+ # binding to an address not on the device. So this test passes
+ # when it really should not
a=${NSA_LO_IP6}
log_start
- show_hint "Should fail with 'Cannot assign requested address'"
+ show_hint "Tecnically should fail since address is not on device but kernel allows"
run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b
- log_test_addr ${a} $? 1 "TCP socket bind to out of scope local address"
+ log_test_addr ${a} $? 0 "TCP socket bind to out of scope local address"
}
ipv6_addr_bind_vrf()
@@ -3444,6 +3494,14 @@ ipv6_addr_bind_vrf()
log_test_addr ${a} $? 1 "Raw socket bind to invalid local address after vrf bind"
#
+ # raw socket with nonlocal bind
+ #
+ a=${NL_IP6}
+ log_start
+ run_cmd nettest -6 -s -R -P icmp -f -l ${a} -I ${VRF} -b
+ log_test_addr ${a} $? 0 "Raw socket bind to nonlocal address after VRF bind"
+
+ #
# tcp sockets
#
# address on enslaved device is valid for the VRF or device in a VRF
@@ -3459,10 +3517,15 @@ ipv6_addr_bind_vrf()
run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b
log_test_addr ${a} $? 0 "TCP socket bind to local address with device bind"
+ # Sadly, the kernel allows binding a socket to a device and then
+ # binding to an address not on the device. The only restriction
+ # is that the address is valid in the L3 domain. So this test
+ # passes when it really should not
a=${VRF_IP6}
log_start
+ show_hint "Tecnically should fail since address is not on device but kernel allows"
run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b
- log_test_addr ${a} $? 1 "TCP socket bind to VRF address with device bind"
+ log_test_addr ${a} $? 0 "TCP socket bind to VRF address with device bind"
a=${NSA_LO_IP6}
log_start
@@ -3996,14 +4059,17 @@ usage: ${0##*/} OPTS
-p Pause on fail
-P Pause after each test
-v Be verbose
+
+Tests:
+ $TESTS_IPV4 $TESTS_IPV6 $TESTS_OTHER
EOF
}
################################################################################
# main
-TESTS_IPV4="ipv4_ping ipv4_tcp ipv4_udp ipv4_addr_bind ipv4_runtime ipv4_netfilter"
-TESTS_IPV6="ipv6_ping ipv6_tcp ipv6_udp ipv6_addr_bind ipv6_runtime ipv6_netfilter"
+TESTS_IPV4="ipv4_ping ipv4_tcp ipv4_udp ipv4_bind ipv4_runtime ipv4_netfilter"
+TESTS_IPV6="ipv6_ping ipv6_tcp ipv6_udp ipv6_bind ipv6_runtime ipv6_netfilter"
TESTS_OTHER="use_cases"
PAUSE_ON_FAIL=no
@@ -4068,8 +4134,6 @@ do
# setup namespaces and config, but do not run any tests
setup) setup; exit 0;;
vrf_setup) setup "yes"; exit 0;;
-
- help) echo "Test names: $TESTS"; exit 0;;
esac
done
@@ -4077,3 +4141,11 @@ cleanup 2>/dev/null
printf "\nTests passed: %3d\n" ${nsuccess}
printf "Tests failed: %3d\n" ${nfail}
+
+if [ $nfail -ne 0 ]; then
+ exit 1 # KSFT_FAIL
+elif [ $nsuccess -eq 0 ]; then
+ exit $ksft_skip
+fi
+
+exit 0 # KSFT_PASS
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index b5a69ad191b0..d444ee6aa3cb 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -629,6 +629,66 @@ ipv6_fcnal()
log_test $? 0 "Nexthops removed on admin down"
}
+ipv6_grp_refs()
+{
+ if [ ! -x "$(command -v mausezahn)" ]; then
+ echo "SKIP: Could not run test; need mausezahn tool"
+ return
+ fi
+
+ run_cmd "$IP link set dev veth1 up"
+ run_cmd "$IP link add veth1.10 link veth1 up type vlan id 10"
+ run_cmd "$IP link add veth1.20 link veth1 up type vlan id 20"
+ run_cmd "$IP -6 addr add 2001:db8:91::1/64 dev veth1.10"
+ run_cmd "$IP -6 addr add 2001:db8:92::1/64 dev veth1.20"
+ run_cmd "$IP -6 neigh add 2001:db8:91::2 lladdr 00:11:22:33:44:55 dev veth1.10"
+ run_cmd "$IP -6 neigh add 2001:db8:92::2 lladdr 00:11:22:33:44:55 dev veth1.20"
+ run_cmd "$IP nexthop add id 100 via 2001:db8:91::2 dev veth1.10"
+ run_cmd "$IP nexthop add id 101 via 2001:db8:92::2 dev veth1.20"
+ run_cmd "$IP nexthop add id 102 group 100"
+ run_cmd "$IP route add 2001:db8:101::1/128 nhid 102"
+
+ # create per-cpu dsts through nh 100
+ run_cmd "ip netns exec me mausezahn -6 veth1.10 -B 2001:db8:101::1 -A 2001:db8:91::1 -c 5 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1"
+
+ # remove nh 100 from the group to delete the route potentially leaving
+ # a stale per-cpu dst which holds a reference to the nexthop's net
+ # device and to the IPv6 route
+ run_cmd "$IP nexthop replace id 102 group 101"
+ run_cmd "$IP route del 2001:db8:101::1/128"
+
+ # add both nexthops to the group so a reference is taken on them
+ run_cmd "$IP nexthop replace id 102 group 100/101"
+
+ # if the bug described in commit "net: nexthop: release IPv6 per-cpu
+ # dsts when replacing a nexthop group" exists at this point we have
+ # an unlinked IPv6 route (but not freed due to stale dst) with a
+ # reference over the group so we delete the group which will again
+ # only unlink it due to the route reference
+ run_cmd "$IP nexthop del id 102"
+
+ # delete the nexthop with stale dst, since we have an unlinked
+ # group with a ref to it and an unlinked IPv6 route with ref to the
+ # group, the nh will only be unlinked and not freed so the stale dst
+ # remains forever and we get a net device refcount imbalance
+ run_cmd "$IP nexthop del id 100"
+
+ # if a reference was lost this command will hang because the net device
+ # cannot be removed
+ timeout -s KILL 5 ip netns exec me ip link del veth1.10 >/dev/null 2>&1
+
+ # we can't cleanup if the command is hung trying to delete the netdev
+ if [ $? -eq 137 ]; then
+ return 1
+ fi
+
+ # cleanup
+ run_cmd "$IP link del veth1.20"
+ run_cmd "$IP nexthop flush"
+
+ return 0
+}
+
ipv6_grp_fcnal()
{
local rc
@@ -734,6 +794,9 @@ ipv6_grp_fcnal()
run_cmd "$IP nexthop add id 108 group 31/24"
log_test $? 2 "Nexthop group can not have a blackhole and another nexthop"
+
+ ipv6_grp_refs
+ log_test $? 0 "Nexthop group replace refcounts"
}
ipv6_res_grp_fcnal()
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 5abe92d55b69..996af1ae3d3d 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -444,24 +444,63 @@ fib_rp_filter_test()
setup
set -e
+ ip netns add ns2
+ ip netns set ns2 auto
+
+ ip -netns ns2 link set dev lo up
+
+ $IP link add name veth1 type veth peer name veth2
+ $IP link set dev veth2 netns ns2
+ $IP address add 192.0.2.1/24 dev veth1
+ ip -netns ns2 address add 192.0.2.1/24 dev veth2
+ $IP link set dev veth1 up
+ ip -netns ns2 link set dev veth2 up
+
$IP link set dev lo address 52:54:00:6a:c7:5e
- $IP link set dummy0 address 52:54:00:6a:c7:5e
- $IP link add dummy1 type dummy
- $IP link set dummy1 address 52:54:00:6a:c7:5e
- $IP link set dev dummy1 up
+ $IP link set dev veth1 address 52:54:00:6a:c7:5e
+ ip -netns ns2 link set dev lo address 52:54:00:6a:c7:5e
+ ip -netns ns2 link set dev veth2 address 52:54:00:6a:c7:5e
+
+ # 1. (ns2) redirect lo's egress to veth2's egress
+ ip netns exec ns2 tc qdisc add dev lo parent root handle 1: fq_codel
+ ip netns exec ns2 tc filter add dev lo parent 1: protocol arp basic \
+ action mirred egress redirect dev veth2
+ ip netns exec ns2 tc filter add dev lo parent 1: protocol ip basic \
+ action mirred egress redirect dev veth2
+
+ # 2. (ns1) redirect veth1's ingress to lo's ingress
+ $NS_EXEC tc qdisc add dev veth1 ingress
+ $NS_EXEC tc filter add dev veth1 ingress protocol arp basic \
+ action mirred ingress redirect dev lo
+ $NS_EXEC tc filter add dev veth1 ingress protocol ip basic \
+ action mirred ingress redirect dev lo
+
+ # 3. (ns1) redirect lo's egress to veth1's egress
+ $NS_EXEC tc qdisc add dev lo parent root handle 1: fq_codel
+ $NS_EXEC tc filter add dev lo parent 1: protocol arp basic \
+ action mirred egress redirect dev veth1
+ $NS_EXEC tc filter add dev lo parent 1: protocol ip basic \
+ action mirred egress redirect dev veth1
+
+ # 4. (ns2) redirect veth2's ingress to lo's ingress
+ ip netns exec ns2 tc qdisc add dev veth2 ingress
+ ip netns exec ns2 tc filter add dev veth2 ingress protocol arp basic \
+ action mirred ingress redirect dev lo
+ ip netns exec ns2 tc filter add dev veth2 ingress protocol ip basic \
+ action mirred ingress redirect dev lo
+
$NS_EXEC sysctl -qw net.ipv4.conf.all.rp_filter=1
$NS_EXEC sysctl -qw net.ipv4.conf.all.accept_local=1
$NS_EXEC sysctl -qw net.ipv4.conf.all.route_localnet=1
-
- $NS_EXEC tc qd add dev dummy1 parent root handle 1: fq_codel
- $NS_EXEC tc filter add dev dummy1 parent 1: protocol arp basic action mirred egress redirect dev lo
- $NS_EXEC tc filter add dev dummy1 parent 1: protocol ip basic action mirred egress redirect dev lo
+ ip netns exec ns2 sysctl -qw net.ipv4.conf.all.rp_filter=1
+ ip netns exec ns2 sysctl -qw net.ipv4.conf.all.accept_local=1
+ ip netns exec ns2 sysctl -qw net.ipv4.conf.all.route_localnet=1
set +e
- run_cmd "ip netns exec ns1 ping -I dummy1 -w1 -c1 198.51.100.1"
+ run_cmd "ip netns exec ns2 ping -w1 -c1 192.0.2.1"
log_test $? 0 "rp_filter passes local packets"
- run_cmd "ip netns exec ns1 ping -I dummy1 -w1 -c1 127.0.0.1"
+ run_cmd "ip netns exec ns2 ping -w1 -c1 127.0.0.1"
log_test $? 0 "rp_filter passes loopback packets"
cleanup
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh
new file mode 100755
index 000000000000..8748d1b1d95b
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh
@@ -0,0 +1,543 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="vlmc_control_test vlmc_querier_test vlmc_igmp_mld_version_test \
+ vlmc_last_member_test vlmc_startup_query_test vlmc_membership_test \
+ vlmc_querier_intvl_test vlmc_query_intvl_test vlmc_query_response_intvl_test \
+ vlmc_router_port_test vlmc_filtering_test"
+NUM_NETIFS=4
+CHECK_TC="yes"
+TEST_GROUP="239.10.10.10"
+
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+ ip link add l $h1 $h1.10 up type vlan id 10
+}
+
+h1_destroy()
+{
+ ip link del $h1.10
+ simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/24 2001:db8:1::2/64
+ ip link add l $h2 $h2.10 up type vlan id 10
+}
+
+h2_destroy()
+{
+ ip link del $h2.10
+ simple_if_fini $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+switch_create()
+{
+ ip link add dev br0 type bridge mcast_snooping 1 mcast_querier 1 vlan_filtering 1
+
+ ip link set dev $swp1 master br0
+ ip link set dev $swp2 master br0
+
+ ip link set dev br0 up
+ ip link set dev $swp1 up
+ ip link set dev $swp2 up
+
+ tc qdisc add dev $swp1 clsact
+ tc qdisc add dev $swp2 clsact
+
+ bridge vlan add vid 10-11 dev $swp1 master
+ bridge vlan add vid 10-11 dev $swp2 master
+
+ ip link set dev br0 type bridge mcast_vlan_snooping 1
+ check_err $? "Could not enable global vlan multicast snooping"
+ log_test "Vlan multicast snooping enable"
+}
+
+switch_destroy()
+{
+ ip link set dev $swp2 down
+ ip link set dev $swp1 down
+
+ ip link del dev br0
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+vlmc_v2join_test()
+{
+ local expect=$1
+
+ RET=0
+ ip address add dev $h2.10 $TEST_GROUP/32 autojoin
+ check_err $? "Could not join $TEST_GROUP"
+
+ sleep 5
+ bridge -j mdb show dev br0 |
+ jq -e ".[].mdb[] | select(.grp == \"$TEST_GROUP\" and .vid == 10)" &>/dev/null
+ if [ $expect -eq 0 ]; then
+ check_err $? "IGMPv2 report didn't create mdb entry for $TEST_GROUP"
+ else
+ check_fail $? "IGMPv2 report shouldn't have created mdb entry for $TEST_GROUP"
+ fi
+
+ # check if we need to cleanup
+ if [ $RET -eq 0 ]; then
+ ip address del dev $h2.10 $TEST_GROUP/32 2>&1 1>/dev/null
+ sleep 5
+ bridge -j mdb show dev br0 |
+ jq -e ".[].mdb[] | select(.grp == \"$TEST_GROUP\" and \
+ .vid == 10)" &>/dev/null
+ check_fail $? "IGMPv2 leave didn't remove mdb entry for $TEST_GROUP"
+ fi
+}
+
+vlmc_control_test()
+{
+ RET=0
+ local goutput=`bridge -j vlan global show`
+ echo -n $goutput |
+ jq -e ".[].vlans[] | select(.vlan == 10)" &>/dev/null
+ check_err $? "Could not find vlan 10's global options"
+ log_test "Vlan global options existence"
+
+ RET=0
+ echo -n $goutput |
+ jq -e ".[].vlans[] | select(.vlan == 10 and .mcast_snooping == 1) " &>/dev/null
+ check_err $? "Wrong default mcast_snooping global option value"
+ log_test "Vlan mcast_snooping global option default value"
+
+ RET=0
+ vlmc_v2join_test 0
+ bridge vlan global set vid 10 dev br0 mcast_snooping 0
+ check_err $? "Could not disable multicast snooping in vlan 10"
+ vlmc_v2join_test 1
+ log_test "Vlan 10 multicast snooping control"
+}
+
+# setup for general query counting
+vlmc_query_cnt_xstats()
+{
+ local type=$1
+ local version=$2
+ local dev=$3
+
+ ip -j link xstats type bridge_slave dev $dev | \
+ jq -e ".[].multicast.${type}_queries.tx_v${version}"
+}
+
+vlmc_query_cnt_setup()
+{
+ local type=$1
+ local dev=$2
+
+ if [[ $type == "igmp" ]]; then
+ tc filter add dev $dev egress pref 10 prot 802.1Q \
+ flower vlan_id 10 vlan_ethtype ipv4 dst_ip 224.0.0.1 ip_proto 2 \
+ action pass
+ else
+ tc filter add dev $dev egress pref 10 prot 802.1Q \
+ flower vlan_id 10 vlan_ethtype ipv6 dst_ip ff02::1 ip_proto icmpv6 \
+ action pass
+ fi
+
+ ip link set dev br0 type bridge mcast_stats_enabled 1
+}
+
+vlmc_query_cnt_cleanup()
+{
+ local dev=$1
+
+ ip link set dev br0 type bridge mcast_stats_enabled 0
+ tc filter del dev $dev egress pref 10
+}
+
+vlmc_check_query()
+{
+ local type=$1
+ local version=$2
+ local dev=$3
+ local expect=$4
+ local time=$5
+ local ret=0
+
+ vlmc_query_cnt_setup $type $dev
+
+ local pre_tx_xstats=$(vlmc_query_cnt_xstats $type $version $dev)
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_querier 1
+ ret=$?
+ if [[ $ret -eq 0 ]]; then
+ sleep $time
+
+ local tcstats=$(tc_rule_stats_get $dev 10 egress)
+ local post_tx_xstats=$(vlmc_query_cnt_xstats $type $version $dev)
+
+ if [[ $tcstats != $expect || \
+ $(($post_tx_xstats-$pre_tx_xstats)) != $expect || \
+ $tcstats != $(($post_tx_xstats-$pre_tx_xstats)) ]]; then
+ ret=1
+ fi
+ fi
+
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_querier 0
+ vlmc_query_cnt_cleanup $dev
+
+ return $ret
+}
+
+vlmc_querier_test()
+{
+ RET=0
+ local goutput=`bridge -j vlan global show`
+ echo -n $goutput |
+ jq -e ".[].vlans[] | select(.vlan == 10)" &>/dev/null
+ check_err $? "Could not find vlan 10's global options"
+
+ echo -n $goutput |
+ jq -e ".[].vlans[] | select(.vlan == 10 and .mcast_querier == 0) " &>/dev/null
+ check_err $? "Wrong default mcast_querier global vlan option value"
+ log_test "Vlan mcast_querier global option default value"
+
+ RET=0
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_querier 1
+ check_err $? "Could not enable querier in vlan 10"
+ log_test "Vlan 10 multicast querier enable"
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_querier 0
+
+ RET=0
+ vlmc_check_query igmp 2 $swp1 1 1
+ check_err $? "No vlan tagged IGMPv2 general query packets sent"
+ log_test "Vlan 10 tagged IGMPv2 general query sent"
+
+ RET=0
+ vlmc_check_query mld 1 $swp1 1 1
+ check_err $? "No vlan tagged MLD general query packets sent"
+ log_test "Vlan 10 tagged MLD general query sent"
+}
+
+vlmc_igmp_mld_version_test()
+{
+ RET=0
+ local goutput=`bridge -j vlan global show`
+ echo -n $goutput |
+ jq -e ".[].vlans[] | select(.vlan == 10)" &>/dev/null
+ check_err $? "Could not find vlan 10's global options"
+
+ echo -n $goutput |
+ jq -e ".[].vlans[] | select(.vlan == 10 and .mcast_igmp_version == 2) " &>/dev/null
+ check_err $? "Wrong default mcast_igmp_version global vlan option value"
+ log_test "Vlan mcast_igmp_version global option default value"
+
+ RET=0
+ echo -n $goutput |
+ jq -e ".[].vlans[] | select(.vlan == 10 and .mcast_mld_version == 1) " &>/dev/null
+ check_err $? "Wrong default mcast_mld_version global vlan option value"
+ log_test "Vlan mcast_mld_version global option default value"
+
+ RET=0
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_igmp_version 3
+ check_err $? "Could not set mcast_igmp_version in vlan 10"
+ log_test "Vlan 10 mcast_igmp_version option changed to 3"
+
+ RET=0
+ vlmc_check_query igmp 3 $swp1 1 1
+ check_err $? "No vlan tagged IGMPv3 general query packets sent"
+ log_test "Vlan 10 tagged IGMPv3 general query sent"
+
+ RET=0
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_mld_version 2
+ check_err $? "Could not set mcast_mld_version in vlan 10"
+ log_test "Vlan 10 mcast_mld_version option changed to 2"
+
+ RET=0
+ vlmc_check_query mld 2 $swp1 1 1
+ check_err $? "No vlan tagged MLDv2 general query packets sent"
+ log_test "Vlan 10 tagged MLDv2 general query sent"
+
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_igmp_version 2
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_mld_version 1
+}
+
+vlmc_last_member_test()
+{
+ RET=0
+ local goutput=`bridge -j vlan global show`
+ echo -n $goutput |
+ jq -e ".[].vlans[] | select(.vlan == 10)" &>/dev/null
+ check_err $? "Could not find vlan 10's global options"
+
+ echo -n $goutput |
+ jq -e ".[].vlans[] | select(.vlan == 10 and \
+ .mcast_last_member_count == 2) " &>/dev/null
+ check_err $? "Wrong default mcast_last_member_count global vlan option value"
+ log_test "Vlan mcast_last_member_count global option default value"
+
+ RET=0
+ echo -n $goutput |
+ jq -e ".[].vlans[] | select(.vlan == 10 and \
+ .mcast_last_member_interval == 100) " &>/dev/null
+ check_err $? "Wrong default mcast_last_member_interval global vlan option value"
+ log_test "Vlan mcast_last_member_interval global option default value"
+
+ RET=0
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_last_member_count 3
+ check_err $? "Could not set mcast_last_member_count in vlan 10"
+ log_test "Vlan 10 mcast_last_member_count option changed to 3"
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_last_member_count 2
+
+ RET=0
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_last_member_interval 200
+ check_err $? "Could not set mcast_last_member_interval in vlan 10"
+ log_test "Vlan 10 mcast_last_member_interval option changed to 200"
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_last_member_interval 100
+}
+
+vlmc_startup_query_test()
+{
+ RET=0
+ local goutput=`bridge -j vlan global show`
+ echo -n $goutput |
+ jq -e ".[].vlans[] | select(.vlan == 10)" &>/dev/null
+ check_err $? "Could not find vlan 10's global options"
+
+ echo -n $goutput |
+ jq -e ".[].vlans[] | select(.vlan == 10 and \
+ .mcast_startup_query_interval == 3125) " &>/dev/null
+ check_err $? "Wrong default mcast_startup_query_interval global vlan option value"
+ log_test "Vlan mcast_startup_query_interval global option default value"
+
+ RET=0
+ echo -n $goutput |
+ jq -e ".[].vlans[] | select(.vlan == 10 and \
+ .mcast_startup_query_count == 2) " &>/dev/null
+ check_err $? "Wrong default mcast_startup_query_count global vlan option value"
+ log_test "Vlan mcast_startup_query_count global option default value"
+
+ RET=0
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_startup_query_interval 100
+ check_err $? "Could not set mcast_startup_query_interval in vlan 10"
+ vlmc_check_query igmp 2 $swp1 2 3
+ check_err $? "Wrong number of tagged IGMPv2 general queries sent"
+ log_test "Vlan 10 mcast_startup_query_interval option changed to 100"
+
+ RET=0
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_startup_query_count 3
+ check_err $? "Could not set mcast_startup_query_count in vlan 10"
+ vlmc_check_query igmp 2 $swp1 3 4
+ check_err $? "Wrong number of tagged IGMPv2 general queries sent"
+ log_test "Vlan 10 mcast_startup_query_count option changed to 3"
+
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_startup_query_interval 3125
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_startup_query_count 2
+}
+
+vlmc_membership_test()
+{
+ RET=0
+ local goutput=`bridge -j vlan global show`
+ echo -n $goutput |
+ jq -e ".[].vlans[] | select(.vlan == 10)" &>/dev/null
+ check_err $? "Could not find vlan 10's global options"
+
+ echo -n $goutput |
+ jq -e ".[].vlans[] | select(.vlan == 10 and \
+ .mcast_membership_interval == 26000) " &>/dev/null
+ check_err $? "Wrong default mcast_membership_interval global vlan option value"
+ log_test "Vlan mcast_membership_interval global option default value"
+
+ RET=0
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_membership_interval 200
+ check_err $? "Could not set mcast_membership_interval in vlan 10"
+ log_test "Vlan 10 mcast_membership_interval option changed to 200"
+
+ RET=0
+ vlmc_v2join_test 1
+ log_test "Vlan 10 mcast_membership_interval mdb entry expire"
+
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_membership_interval 26000
+}
+
+vlmc_querier_intvl_test()
+{
+ RET=0
+ local goutput=`bridge -j vlan global show`
+ echo -n $goutput |
+ jq -e ".[].vlans[] | select(.vlan == 10)" &>/dev/null
+ check_err $? "Could not find vlan 10's global options"
+
+ echo -n $goutput |
+ jq -e ".[].vlans[] | select(.vlan == 10 and \
+ .mcast_querier_interval == 25500) " &>/dev/null
+ check_err $? "Wrong default mcast_querier_interval global vlan option value"
+ log_test "Vlan mcast_querier_interval global option default value"
+
+ RET=0
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_querier_interval 100
+ check_err $? "Could not set mcast_querier_interval in vlan 10"
+ log_test "Vlan 10 mcast_querier_interval option changed to 100"
+
+ RET=0
+ ip link add dev br1 type bridge mcast_snooping 1 mcast_querier 1 vlan_filtering 1 \
+ mcast_vlan_snooping 1
+ bridge vlan add vid 10 dev br1 self pvid untagged
+ ip link set dev $h1 master br1
+ ip link set dev br1 up
+ bridge vlan add vid 10 dev $h1 master
+ bridge vlan global set vid 10 dev br1 mcast_snooping 1 mcast_querier 1
+ sleep 2
+ ip link del dev br1
+ ip addr replace 2001:db8:1::1/64 dev $h1
+ vlmc_check_query igmp 2 $swp1 1 1
+ check_err $? "Wrong number of IGMPv2 general queries after querier interval"
+ log_test "Vlan 10 mcast_querier_interval expire after outside query"
+
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_querier_interval 25500
+}
+
+vlmc_query_intvl_test()
+{
+ RET=0
+ local goutput=`bridge -j vlan global show`
+ echo -n $goutput |
+ jq -e ".[].vlans[] | select(.vlan == 10)" &>/dev/null
+ check_err $? "Could not find vlan 10's global options"
+
+ echo -n $goutput |
+ jq -e ".[].vlans[] | select(.vlan == 10 and \
+ .mcast_query_interval == 12500) " &>/dev/null
+ check_err $? "Wrong default mcast_query_interval global vlan option value"
+ log_test "Vlan mcast_query_interval global option default value"
+
+ RET=0
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_startup_query_count 0
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_query_interval 200
+ check_err $? "Could not set mcast_query_interval in vlan 10"
+ # 1 is sent immediately, then 2 more in the next 5 seconds
+ vlmc_check_query igmp 2 $swp1 3 5
+ check_err $? "Wrong number of tagged IGMPv2 general queries sent"
+ log_test "Vlan 10 mcast_query_interval option changed to 200"
+
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_startup_query_count 2
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_query_interval 12500
+}
+
+vlmc_query_response_intvl_test()
+{
+ RET=0
+ local goutput=`bridge -j vlan global show`
+ echo -n $goutput |
+ jq -e ".[].vlans[] | select(.vlan == 10)" &>/dev/null
+ check_err $? "Could not find vlan 10's global options"
+
+ echo -n $goutput |
+ jq -e ".[].vlans[] | select(.vlan == 10 and \
+ .mcast_query_response_interval == 1000) " &>/dev/null
+ check_err $? "Wrong default mcast_query_response_interval global vlan option value"
+ log_test "Vlan mcast_query_response_interval global option default value"
+
+ RET=0
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_query_response_interval 200
+ check_err $? "Could not set mcast_query_response_interval in vlan 10"
+ log_test "Vlan 10 mcast_query_response_interval option changed to 200"
+
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_query_response_interval 1000
+}
+
+vlmc_router_port_test()
+{
+ RET=0
+ local goutput=`bridge -j -d vlan show`
+ echo -n $goutput |
+ jq -e ".[] | select(.ifname == \"$swp1\" and \
+ .vlans[].vlan == 10)" &>/dev/null
+ check_err $? "Could not find port vlan 10's options"
+
+ echo -n $goutput |
+ jq -e ".[] | select(.ifname == \"$swp1\" and \
+ .vlans[].vlan == 10 and \
+ .vlans[].mcast_router == 1)" &>/dev/null
+ check_err $? "Wrong default port mcast_router option value"
+ log_test "Port vlan 10 option mcast_router default value"
+
+ RET=0
+ bridge vlan set vid 10 dev $swp1 mcast_router 2
+ check_err $? "Could not set port vlan 10's mcast_router option"
+ log_test "Port vlan 10 mcast_router option changed to 2"
+
+ RET=0
+ tc filter add dev $swp1 egress pref 10 prot 802.1Q \
+ flower vlan_id 10 vlan_ethtype ipv4 dst_ip 239.1.1.1 ip_proto udp action pass
+ tc filter add dev $swp2 egress pref 10 prot 802.1Q \
+ flower vlan_id 10 vlan_ethtype ipv4 dst_ip 239.1.1.1 ip_proto udp action pass
+ bridge vlan set vid 10 dev $swp2 mcast_router 0
+ # we need to enable querier and disable query response interval to
+ # make sure packets are flooded only to router ports
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_querier 1 \
+ mcast_query_response_interval 0
+ bridge vlan add vid 10 dev br0 self
+ sleep 1
+ mausezahn br0 -Q 10 -c 10 -p 128 -b 01:00:5e:01:01:01 -B 239.1.1.1 \
+ -t udp "dp=1024" &>/dev/null
+ local swp1_tcstats=$(tc_rule_stats_get $swp1 10 egress)
+ if [[ $swp1_tcstats != 10 ]]; then
+ check_err 1 "Wrong number of vlan 10 multicast packets flooded"
+ fi
+ local swp2_tcstats=$(tc_rule_stats_get $swp2 10 egress)
+ check_err $swp2_tcstats "Vlan 10 multicast packets flooded to non-router port"
+ log_test "Flood unknown vlan multicast packets to router port only"
+
+ tc filter del dev $swp2 egress pref 10
+ tc filter del dev $swp1 egress pref 10
+ bridge vlan del vid 10 dev br0 self
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_query_response_interval 1000
+ bridge vlan set vid 10 dev $swp2 mcast_router 1
+ bridge vlan set vid 10 dev $swp1 mcast_router 1
+}
+
+vlmc_filtering_test()
+{
+ RET=0
+ ip link set dev br0 type bridge vlan_filtering 0
+ ip -j -d link show dev br0 | \
+ jq -e "select(.[0].linkinfo.info_data.mcast_vlan_snooping == 1)" &>/dev/null
+ check_fail $? "Vlan filtering is disabled but multicast vlan snooping is still enabled"
+ log_test "Disable multicast vlan snooping when vlan filtering is disabled"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample
index bf17e485684f..b0980a2efa31 100644
--- a/tools/testing/selftests/net/forwarding/forwarding.config.sample
+++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample
@@ -13,6 +13,8 @@ NETIFS[p5]=veth4
NETIFS[p6]=veth5
NETIFS[p7]=veth6
NETIFS[p8]=veth7
+NETIFS[p9]=veth8
+NETIFS[p10]=veth9
# Port that does not have a cable connected.
NETIF_NO_CABLE=eth8
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index dfd827b7a9f9..7da783d6f453 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -20,6 +20,7 @@ NETIF_TYPE=${NETIF_TYPE:=veth}
NETIF_CREATE=${NETIF_CREATE:=yes}
MCD=${MCD:=smcrouted}
MC_CLI=${MC_CLI:=smcroutectl}
+PING_COUNT=${PING_COUNT:=10}
PING_TIMEOUT=${PING_TIMEOUT:=5}
WAIT_TIMEOUT=${WAIT_TIMEOUT:=20}
INTERFACE_TIMEOUT=${INTERFACE_TIMEOUT:=600}
@@ -1111,7 +1112,8 @@ ping_do()
vrf_name=$(master_name_get $if_name)
ip vrf exec $vrf_name \
- $PING $args $dip -c 10 -i 0.1 -w $PING_TIMEOUT &> /dev/null
+ $PING $args $dip -c $PING_COUNT -i 0.1 \
+ -w $PING_TIMEOUT &> /dev/null
}
ping_test()
@@ -1132,7 +1134,8 @@ ping6_do()
vrf_name=$(master_name_get $if_name)
ip vrf exec $vrf_name \
- $PING6 $args $dip -c 10 -i 0.1 -w $PING_TIMEOUT &> /dev/null
+ $PING6 $args $dip -c $PING_COUNT -i 0.1 \
+ -w $PING_TIMEOUT &> /dev/null
}
ping6_test()
diff --git a/tools/testing/selftests/net/forwarding/q_in_vni_ipv6.sh b/tools/testing/selftests/net/forwarding/q_in_vni_ipv6.sh
new file mode 100755
index 000000000000..0548b2b0d416
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/q_in_vni_ipv6.sh
@@ -0,0 +1,347 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +-----------------------+ +------------------------+
+# | H1 (vrf) | | H2 (vrf) |
+# | + $h1.10 | | + $h2.10 |
+# | | 2001:db8:1::1/64 | | | 2001:db8:1::2/64 |
+# | | | | | |
+# | | + $h1.20 | | | + $h2.20 |
+# | \ | 2001:db8:2::1/64 | | \ | 2001:db8:2::2/64 |
+# | \| | | \| |
+# | + $h1 | | + $h2 |
+# +----|------------------+ +----|-------------------+
+# | |
+# +----|--------------------------------------------------|-------------------+
+# | SW | | |
+# | +--|--------------------------------------------------|-----------------+ |
+# | | + $swp1 BR1 (802.1ad) + $swp2 | |
+# | | vid 100 pvid untagged vid 100 pvid | |
+# | | untagged | |
+# | | + vx100 (vxlan) | |
+# | | local 2001:db8:3::1 | |
+# | | remote 2001:db8:4::1 2001:db8:5::1 | |
+# | | id 1000 dstport $VXPORT | |
+# | | vid 100 pvid untagged | |
+# | +-----------------------------------------------------------------------+ |
+# | |
+# | 2001:db8:4::0/64 via 2001:db8:3::2 |
+# | 2001:db8:5::0/64 via 2001:db8:3::2 |
+# | |
+# | + $rp1 |
+# | | 2001:db8:3::1/64 |
+# +----|----------------------------------------------------------------------+
+# |
+# +----|----------------------------------------------------------+
+# | | VRP2 (vrf) |
+# | + $rp2 |
+# | 2001:db8:3::2/64 |
+# | | (maybe) HW
+# =============================================================================
+# | | (likely) SW
+# | + v1 (veth) + v3 (veth) |
+# | | 2001:db8:4::2/64 | 2001:db8:5::2/64 |
+# +----|---------------------------------------|------------------+
+# | |
+# +----|--------------------------------+ +----|-------------------------------+
+# | + v2 (veth) NS1 (netns) | | + v4 (veth) NS2 (netns) |
+# | 2001:db8:4::1/64 | | 2001:db8:5::1/64 |
+# | | | |
+# | 2001:db8:3::0/64 via 2001:db8:4::2 | | 2001:db8:3::0/64 via 2001:db8:5::2 |
+# | 2001:db8:5::1/128 via 2001:db8:4::2 | | 2001:db8:4::1/128 via |
+# | | | 2001:db8:5::2 |
+# | +-------------------------------+ | | +-------------------------------+ |
+# | | BR2 (802.1ad) | | | | BR2 (802.1ad) | |
+# | | + vx100 (vxlan) | | | | + vx100 (vxlan) | |
+# | | local 2001:db8:4::1 | | | | local 2001:db8:5::1 | |
+# | | remote 2001:db8:3::1 | | | | remote 2001:db8:3::1 | |
+# | | remote 2001:db8:5::1 | | | | remote 2001:db8:4::1 | |
+# | | id 1000 dstport $VXPORT | | | | id 1000 dstport $VXPORT | |
+# | | vid 100 pvid untagged | | | | vid 100 pvid untagged | |
+# | | | | | | | |
+# | | + w1 (veth) | | | | + w1 (veth) | |
+# | | | vid 100 pvid untagged | | | | | vid 100 pvid untagged | |
+# | +--|----------------------------+ | | +--|----------------------------+ |
+# | | | | | |
+# | +--|----------------------------+ | | +--|----------------------------+ |
+# | | | VW2 (vrf) | | | | | VW2 (vrf) | |
+# | | + w2 (veth) | | | | + w2 (veth) | |
+# | | |\ | | | | |\ | |
+# | | | + w2.10 | | | | | + w2.10 | |
+# | | | 2001:db8:1::3/64 | | | | | 2001:db8:1::4/64 | |
+# | | | | | | | | | |
+# | | + w2.20 | | | | + w2.20 | |
+# | | 2001:db8:2::3/64 | | | | 2001:db8:2::4/64 | |
+# | +-------------------------------+ | | +-------------------------------+ |
+# +-------------------------------------+ +------------------------------------+
+
+: ${VXPORT:=4789}
+export VXPORT
+
+: ${ALL_TESTS:="
+ ping_ipv6
+ "}
+
+NUM_NETIFS=6
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1
+ tc qdisc add dev $h1 clsact
+ vlan_create $h1 10 v$h1 2001:db8:1::1/64
+ vlan_create $h1 20 v$h1 2001:db8:2::1/64
+}
+
+h1_destroy()
+{
+ vlan_destroy $h1 20
+ vlan_destroy $h1 10
+ tc qdisc del dev $h1 clsact
+ simple_if_fini $h1
+}
+
+h2_create()
+{
+ simple_if_init $h2
+ tc qdisc add dev $h2 clsact
+ vlan_create $h2 10 v$h2 2001:db8:1::2/64
+ vlan_create $h2 20 v$h2 2001:db8:2::2/64
+}
+
+h2_destroy()
+{
+ vlan_destroy $h2 20
+ vlan_destroy $h2 10
+ tc qdisc del dev $h2 clsact
+ simple_if_fini $h2
+}
+
+rp1_set_addr()
+{
+ ip address add dev $rp1 2001:db8:3::1/64
+
+ ip route add 2001:db8:4::0/64 nexthop via 2001:db8:3::2
+ ip route add 2001:db8:5::0/64 nexthop via 2001:db8:3::2
+}
+
+rp1_unset_addr()
+{
+ ip route del 2001:db8:5::0/64 nexthop via 2001:db8:3::2
+ ip route del 2001:db8:4::0/64 nexthop via 2001:db8:3::2
+
+ ip address del dev $rp1 2001:db8:3::1/64
+}
+
+switch_create()
+{
+ ip link add name br1 type bridge vlan_filtering 1 vlan_protocol 802.1ad \
+ vlan_default_pvid 0 mcast_snooping 0
+ # Make sure the bridge uses the MAC address of the local port and not
+ # that of the VxLAN's device.
+ ip link set dev br1 address $(mac_get $swp1)
+ ip link set dev br1 up
+
+ ip link set dev $rp1 up
+ rp1_set_addr
+
+ ip link add name vx100 type vxlan id 1000 \
+ local 2001:db8:3::1 dstport "$VXPORT" \
+ nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100
+ ip link set dev vx100 up
+
+ ip link set dev vx100 master br1
+ bridge vlan add vid 100 dev vx100 pvid untagged
+
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+ bridge vlan add vid 100 dev $swp1 pvid untagged
+
+ ip link set dev $swp2 master br1
+ ip link set dev $swp2 up
+ bridge vlan add vid 100 dev $swp2 pvid untagged
+
+ bridge fdb append dev vx100 00:00:00:00:00:00 dst 2001:db8:4::1 self
+ bridge fdb append dev vx100 00:00:00:00:00:00 dst 2001:db8:5::1 self
+}
+
+switch_destroy()
+{
+ bridge fdb del dev vx100 00:00:00:00:00:00 dst 2001:db8:5::1 self
+ bridge fdb del dev vx100 00:00:00:00:00:00 dst 2001:db8:4::1 self
+
+ bridge vlan del vid 100 dev $swp2
+ ip link set dev $swp2 down
+ ip link set dev $swp2 nomaster
+
+ bridge vlan del vid 100 dev $swp1
+ ip link set dev $swp1 down
+ ip link set dev $swp1 nomaster
+
+ ip link set dev vx100 nomaster
+ ip link set dev vx100 down
+ ip link del dev vx100
+
+ rp1_unset_addr
+ ip link set dev $rp1 down
+
+ ip link set dev br1 down
+ ip link del dev br1
+}
+
+vrp2_create()
+{
+ simple_if_init $rp2 2001:db8:3::2/64
+ __simple_if_init v1 v$rp2 2001:db8:4::2/64
+ __simple_if_init v3 v$rp2 2001:db8:5::2/64
+ tc qdisc add dev v1 clsact
+}
+
+vrp2_destroy()
+{
+ tc qdisc del dev v1 clsact
+ __simple_if_fini v3 2001:db8:5::2/64
+ __simple_if_fini v1 2001:db8:4::2/64
+ simple_if_fini $rp2 2001:db8:3::2/64
+}
+
+ns_init_common()
+{
+ local in_if=$1; shift
+ local in_addr=$1; shift
+ local other_in_addr=$1; shift
+ local nh_addr=$1; shift
+ local host_addr1=$1; shift
+ local host_addr2=$1; shift
+
+ ip link set dev $in_if up
+ ip address add dev $in_if $in_addr/64
+ tc qdisc add dev $in_if clsact
+
+ ip link add name br2 type bridge vlan_filtering 1 vlan_protocol 802.1ad \
+ vlan_default_pvid 0
+ ip link set dev br2 up
+
+ ip link add name w1 type veth peer name w2
+
+ ip link set dev w1 master br2
+ ip link set dev w1 up
+ bridge vlan add vid 100 dev w1 pvid untagged
+
+ ip link add name vx100 type vxlan id 1000 local $in_addr \
+ dstport "$VXPORT" udp6zerocsumrx
+ ip link set dev vx100 up
+ bridge fdb append dev vx100 00:00:00:00:00:00 dst 2001:db8:3::1 self
+ bridge fdb append dev vx100 00:00:00:00:00:00 dst $other_in_addr self
+
+ ip link set dev vx100 master br2
+ tc qdisc add dev vx100 clsact
+
+ bridge vlan add vid 100 dev vx100 pvid untagged
+
+ simple_if_init w2
+ vlan_create w2 10 vw2 $host_addr1/64
+ vlan_create w2 20 vw2 $host_addr2/64
+
+ ip route add 2001:db8:3::0/64 nexthop via $nh_addr
+ ip route add $other_in_addr/128 nexthop via $nh_addr
+}
+export -f ns_init_common
+
+ns1_create()
+{
+ ip netns add ns1
+ ip link set dev v2 netns ns1
+ in_ns ns1 \
+ ns_init_common v2 2001:db8:4::1 2001:db8:5::1 2001:db8:4::2 \
+ 2001:db8:1::3 2001:db8:2::3
+}
+
+ns1_destroy()
+{
+ ip netns exec ns1 ip link set dev v2 netns 1
+ ip netns del ns1
+}
+
+ns2_create()
+{
+ ip netns add ns2
+ ip link set dev v4 netns ns2
+ in_ns ns2 \
+ ns_init_common v4 2001:db8:5::1 2001:db8:4::1 2001:db8:5::2 \
+ 2001:db8:1::4 2001:db8:2::4
+}
+
+ns2_destroy()
+{
+ ip netns exec ns2 ip link set dev v4 netns 1
+ ip netns del ns2
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ rp1=${NETIFS[p5]}
+ rp2=${NETIFS[p6]}
+
+ vrf_prepare
+ forwarding_enable
+
+ h1_create
+ h2_create
+ switch_create
+
+ ip link add name v1 type veth peer name v2
+ ip link add name v3 type veth peer name v4
+ vrp2_create
+ ns1_create
+ ns2_create
+
+ r1_mac=$(in_ns ns1 mac_get w2)
+ r2_mac=$(in_ns ns2 mac_get w2)
+ h2_mac=$(mac_get $h2)
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ns2_destroy
+ ns1_destroy
+ vrp2_destroy
+ ip link del dev v3
+ ip link del dev v1
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+
+ forwarding_restore
+ vrf_cleanup
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:1::2 ": local->local"
+ ping6_test $h1 2001:db8:1::3 ": local->remote 1"
+ ping6_test $h1 2001:db8:1::4 ": local->remote 2"
+}
+
+test_all()
+{
+ echo "Running tests with UDP port $VXPORT"
+ tests_run
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+test_all
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/vxlan_asymmetric_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_asymmetric_ipv6.sh
new file mode 100755
index 000000000000..f4930098974f
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/vxlan_asymmetric_ipv6.sh
@@ -0,0 +1,504 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +--------------------------------+ +-----------------------------+
+# | vrf-h1 | | vrf-h2 |
+# | + $h1 | | + $h2 |
+# | | 2001:db8:1::1/64 | | | 2001:db8:2::1/64 |
+# | | default via 2001:db8:1::3 | | | default via 2001:db8:2::3 |
+# +----|---------------------------+ +-|---------------------------+
+# | |
+# +----|------------------------------------------|---------------------------+
+# | SW | | |
+# | +--|------------------------------------------|-------------------------+ |
+# | | + $swp1 br1 + $swp2 | |
+# | | vid 10 pvid untagged vid 20 pvid untagged | |
+# | | | |
+# | | + vx10 + vx20 | |
+# | | local 2001:db8:3::1 local 2001:db8:3::1 | |
+# | | remote 2001:db8:3::2 remote 2001:db8:3::2 | |
+# | | id 1000 id 2000 | |
+# | | dstport 4789 dstport 4789 | |
+# | | vid 10 pvid untagged vid 20 pvid untagged | |
+# | | | |
+# | +-----------------------------------+-----------------------------------+ |
+# | | |
+# | +-----------------------------------|-----------------------------------+ |
+# | | | | |
+# | | +--------------------------------+--------------------------------+ | |
+# | | | | | |
+# | | + vlan10 vlan20 + | |
+# | | | 2001:db8:1::2/64 2001:db8:2::2/64 | | |
+# | | | | | |
+# | | + vlan10-v (macvlan) vlan20-v (macvlan) + | |
+# | | 2001:db8:1::3/64 2001:db8:2::3/64 | |
+# | | 00:00:5e:00:01:01 00:00:5e:00:01:01 | |
+# | | vrf-green | |
+# | +-----------------------------------------------------------------------+ |
+# | |
+# | + $rp1 +lo |
+# | | 2001:db8:4::1/64 2001:db8:3::1/128 |
+# +----|----------------------------------------------------------------------+
+# |
+# +----|--------------------------------------------------------+
+# | | vrf-spine |
+# | + $rp2 |
+# | 2001:db8:4::2/64 |
+# | | (maybe) HW
+# =============================================================================
+# | | (likely) SW
+# | |
+# | + v1 (veth) |
+# | | 2001:db8:5::2/64 |
+# +----|--------------------------------------------------------+
+# |
+# +----|----------------------------------------------------------------------+
+# | + v2 (veth) +lo NS1 (netns) |
+# | 2001:db8:5::1/64 2001:db8:3::2/128 |
+# | |
+# | +-----------------------------------------------------------------------+ |
+# | | vrf-green | |
+# | | + vlan10-v (macvlan) vlan20-v (macvlan) + | |
+# | | | 2001:db8:1::3/64 2001:db8:2::3/64 | | |
+# | | | 00:00:5e:00:01:01 00:00:5e:00:01:01 | | |
+# | | | | | |
+# | | + vlan10 vlan20 + | |
+# | | | 2001:db8:1::3/64 2001:db8:2::3/64 | | |
+# | | | | | |
+# | | +--------------------------------+--------------------------------+ | |
+# | | | | |
+# | +-----------------------------------|-----------------------------------+ |
+# | | |
+# | +-----------------------------------+-----------------------------------+ |
+# | | | |
+# | | + vx10 + vx20 | |
+# | | local 2001:db8:3::2 local 2001:db8:3::2 | |
+# | | remote 2001:db8:3::1 remote 2001:db8:3::1 | |
+# | | id 1000 id 2000 | |
+# | | dstport 4789 dstport 4789 | |
+# | | vid 10 pvid untagged vid 20 pvid untagged | |
+# | | | |
+# | | + w1 (veth) + w3 (veth) | |
+# | | | vid 10 pvid untagged br1 | vid 20 pvid untagged | |
+# | +--|------------------------------------------|-------------------------+ |
+# | | | |
+# | | | |
+# | +--|----------------------+ +--|-------------------------+ |
+# | | | vrf-h1 | | | vrf-h2 | |
+# | | + w2 (veth) | | + w4 (veth) | |
+# | | 2001:db8:1::4/64 | | 2001:db8:2::4/64 | |
+# | | default via | | default via | |
+# | | 2001:db8:1::3/64 | | 2001:db8:2::3/64 | |
+# | +-------------------------+ +----------------------------+ |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+ ping_ipv6
+ arp_decap
+"
+NUM_NETIFS=6
+source lib.sh
+
+require_command $ARPING
+
+hx_create()
+{
+ local vrf_name=$1; shift
+ local if_name=$1; shift
+ local ip_addr=$1; shift
+ local gw_ip=$1; shift
+
+ vrf_create $vrf_name
+ ip link set dev $if_name master $vrf_name
+ ip link set dev $vrf_name up
+ ip link set dev $if_name up
+
+ ip address add $ip_addr/64 dev $if_name
+ ip neigh replace $gw_ip lladdr 00:00:5e:00:01:01 nud permanent \
+ dev $if_name
+ ip route add default vrf $vrf_name nexthop via $gw_ip
+}
+export -f hx_create
+
+hx_destroy()
+{
+ local vrf_name=$1; shift
+ local if_name=$1; shift
+ local ip_addr=$1; shift
+ local gw_ip=$1; shift
+
+ ip route del default vrf $vrf_name nexthop via $gw_ip
+ ip neigh del $gw_ip dev $if_name
+ ip address del $ip_addr/64 dev $if_name
+
+ ip link set dev $if_name down
+ vrf_destroy $vrf_name
+}
+
+h1_create()
+{
+ hx_create "vrf-h1" $h1 2001:db8:1::1 2001:db8:1::3
+}
+
+h1_destroy()
+{
+ hx_destroy "vrf-h1" $h1 2001:db8:1::1 2001:db8:1::3
+}
+
+h2_create()
+{
+ hx_create "vrf-h2" $h2 2001:db8:2::1 2001:db8:2::3
+}
+
+h2_destroy()
+{
+ hx_destroy "vrf-h2" $h2 2001:db8:2::1 2001:db8:2::3
+}
+
+switch_create()
+{
+ ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0 \
+ mcast_snooping 0
+ # Make sure the bridge uses the MAC address of the local port and not
+ # that of the VxLAN's device.
+ ip link set dev br1 address $(mac_get $swp1)
+ ip link set dev br1 up
+
+ ip link set dev $rp1 up
+ ip address add dev $rp1 2001:db8:4::1/64
+ ip route add 2001:db8:3::2/128 nexthop via 2001:db8:4::2
+
+ ip link add name vx10 type vxlan id 1000 \
+ local 2001:db8:3::1 remote 2001:db8:3::2 dstport 4789 \
+ nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100
+ ip link set dev vx10 up
+
+ ip link set dev vx10 master br1
+ bridge vlan add vid 10 dev vx10 pvid untagged
+
+ ip link add name vx20 type vxlan id 2000 \
+ local 2001:db8:3::1 remote 2001:db8:3::2 dstport 4789 \
+ nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100
+ ip link set dev vx20 up
+
+ ip link set dev vx20 master br1
+ bridge vlan add vid 20 dev vx20 pvid untagged
+
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+ bridge vlan add vid 10 dev $swp1 pvid untagged
+
+ ip link set dev $swp2 master br1
+ ip link set dev $swp2 up
+ bridge vlan add vid 20 dev $swp2 pvid untagged
+
+ ip address add 2001:db8:3::1/128 dev lo
+
+ # Create SVIs
+ vrf_create "vrf-green"
+ ip link set dev vrf-green up
+
+ ip link add link br1 name vlan10 up master vrf-green type vlan id 10
+ ip address add 2001:db8:1::2/64 dev vlan10
+ ip link add link vlan10 name vlan10-v up master vrf-green \
+ address 00:00:5e:00:01:01 type macvlan mode private
+ ip address add 2001:db8:1::3/64 dev vlan10-v
+
+ ip link add link br1 name vlan20 up master vrf-green type vlan id 20
+ ip address add 2001:db8:2::2/64 dev vlan20
+ ip link add link vlan20 name vlan20-v up master vrf-green \
+ address 00:00:5e:00:01:01 type macvlan mode private
+ ip address add 2001:db8:2::3/64 dev vlan20-v
+
+ bridge vlan add vid 10 dev br1 self
+ bridge vlan add vid 20 dev br1 self
+
+ bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10
+ bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20
+
+}
+
+switch_destroy()
+{
+ bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 20
+ bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 10
+
+ bridge vlan del vid 20 dev br1 self
+ bridge vlan del vid 10 dev br1 self
+
+ ip link del dev vlan20
+
+ ip link del dev vlan10
+
+ vrf_destroy "vrf-green"
+
+ ip address del 2001:db8:3::1/128 dev lo
+
+ bridge vlan del vid 20 dev $swp2
+ ip link set dev $swp2 down
+ ip link set dev $swp2 nomaster
+
+ bridge vlan del vid 10 dev $swp1
+ ip link set dev $swp1 down
+ ip link set dev $swp1 nomaster
+
+ bridge vlan del vid 20 dev vx20
+ ip link set dev vx20 nomaster
+
+ ip link set dev vx20 down
+ ip link del dev vx20
+
+ bridge vlan del vid 10 dev vx10
+ ip link set dev vx10 nomaster
+
+ ip link set dev vx10 down
+ ip link del dev vx10
+
+ ip route del 2001:db8:3::2 nexthop via 2001:db8:4::2
+ ip address del dev $rp1 2001:db8:4::1/64
+ ip link set dev $rp1 down
+
+ ip link set dev br1 down
+ ip link del dev br1
+}
+
+spine_create()
+{
+ vrf_create "vrf-spine"
+ ip link set dev $rp2 master vrf-spine
+ ip link set dev v1 master vrf-spine
+ ip link set dev vrf-spine up
+ ip link set dev $rp2 up
+ ip link set dev v1 up
+
+ ip address add 2001:db8:4::2/64 dev $rp2
+ ip address add 2001:db8:5::2/64 dev v1
+
+ ip route add 2001:db8:3::1/128 vrf vrf-spine nexthop via \
+ 2001:db8:4::1
+ ip route add 2001:db8:3::2/128 vrf vrf-spine nexthop via \
+ 2001:db8:5::1
+}
+
+spine_destroy()
+{
+ ip route del 2001:db8:3::2/128 vrf vrf-spine nexthop via \
+ 2001:db8:5::1
+ ip route del 2001:db8:3::1/128 vrf vrf-spine nexthop via \
+ 2001:db8:4::1
+
+ ip address del 2001:db8:5::2/64 dev v1
+ ip address del 2001:db8:4::2/64 dev $rp2
+
+ ip link set dev v1 down
+ ip link set dev $rp2 down
+ vrf_destroy "vrf-spine"
+}
+
+ns_h1_create()
+{
+ hx_create "vrf-h1" w2 2001:db8:1::4 2001:db8:1::3
+}
+export -f ns_h1_create
+
+ns_h2_create()
+{
+ hx_create "vrf-h2" w4 2001:db8:2::4 2001:db8:2::3
+}
+export -f ns_h2_create
+
+ns_switch_create()
+{
+ ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0 \
+ mcast_snooping 0
+ ip link set dev br1 up
+
+ ip link set dev v2 up
+ ip address add dev v2 2001:db8:5::1/64
+ ip route add 2001:db8:3::1 nexthop via 2001:db8:5::2
+
+ ip link add name vx10 type vxlan id 1000 \
+ local 2001:db8:3::2 remote 2001:db8:3::1 dstport 4789 \
+ nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100
+ ip link set dev vx10 up
+
+ ip link set dev vx10 master br1
+ bridge vlan add vid 10 dev vx10 pvid untagged
+
+ ip link add name vx20 type vxlan id 2000 \
+ local 2001:db8:3::2 remote 2001:db8:3::1 dstport 4789 \
+ nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100
+ ip link set dev vx20 up
+
+ ip link set dev vx20 master br1
+ bridge vlan add vid 20 dev vx20 pvid untagged
+
+ ip link set dev w1 master br1
+ ip link set dev w1 up
+ bridge vlan add vid 10 dev w1 pvid untagged
+
+ ip link set dev w3 master br1
+ ip link set dev w3 up
+ bridge vlan add vid 20 dev w3 pvid untagged
+
+ ip address add 2001:db8:3::2/128 dev lo
+
+ # Create SVIs
+ vrf_create "vrf-green"
+ ip link set dev vrf-green up
+
+ ip link add link br1 name vlan10 up master vrf-green type vlan id 10
+ ip address add 2001:db8:1::3/64 dev vlan10
+ ip link add link vlan10 name vlan10-v up master vrf-green \
+ address 00:00:5e:00:01:01 type macvlan mode private
+ ip address add 2001:db8:1::3/64 dev vlan10-v
+
+ ip link add link br1 name vlan20 up master vrf-green type vlan id 20
+ ip address add 2001:db8:2::3/64 dev vlan20
+ ip link add link vlan20 name vlan20-v up master vrf-green \
+ address 00:00:5e:00:01:01 type macvlan mode private
+ ip address add 2001:db8:2::3/64 dev vlan20-v
+
+ bridge vlan add vid 10 dev br1 self
+ bridge vlan add vid 20 dev br1 self
+
+ bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10
+ bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20
+}
+export -f ns_switch_create
+
+ns_init()
+{
+ ip link add name w1 type veth peer name w2
+ ip link add name w3 type veth peer name w4
+
+ ip link set dev lo up
+
+ ns_h1_create
+ ns_h2_create
+ ns_switch_create
+}
+export -f ns_init
+
+ns1_create()
+{
+ ip netns add ns1
+ ip link set dev v2 netns ns1
+ in_ns ns1 ns_init
+}
+
+ns1_destroy()
+{
+ ip netns exec ns1 ip link set dev v2 netns 1
+ ip netns del ns1
+}
+
+macs_populate()
+{
+ local mac1=$1; shift
+ local mac2=$1; shift
+ local ip1=$1; shift
+ local ip2=$1; shift
+ local dst=$1; shift
+
+ bridge fdb add $mac1 dev vx10 self master extern_learn static \
+ dst $dst vlan 10
+ bridge fdb add $mac2 dev vx20 self master extern_learn static \
+ dst $dst vlan 20
+
+ ip neigh add $ip1 lladdr $mac1 nud noarp dev vlan10 \
+ extern_learn
+ ip neigh add $ip2 lladdr $mac2 nud noarp dev vlan20 \
+ extern_learn
+}
+export -f macs_populate
+
+macs_initialize()
+{
+ local h1_ns_mac=$(in_ns ns1 mac_get w2)
+ local h2_ns_mac=$(in_ns ns1 mac_get w4)
+ local h1_mac=$(mac_get $h1)
+ local h2_mac=$(mac_get $h2)
+
+ macs_populate $h1_ns_mac $h2_ns_mac 2001:db8:1::4 2001:db8:2::4 \
+ 2001:db8:3::2
+ in_ns ns1 macs_populate $h1_mac $h2_mac 2001:db8:1::1 2001:db8:2::1 \
+ 2001:db8:3::1
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ rp1=${NETIFS[p5]}
+ rp2=${NETIFS[p6]}
+
+ vrf_prepare
+ forwarding_enable
+
+ h1_create
+ h2_create
+ switch_create
+
+ ip link add name v1 type veth peer name v2
+ spine_create
+ ns1_create
+ in_ns ns1 forwarding_enable
+
+ macs_initialize
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ns1_destroy
+ spine_destroy
+ ip link del dev v1
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+
+ forwarding_restore
+ vrf_cleanup
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:2::1 ": local->local vid 10->vid 20"
+ ping6_test $h1 2001:db8:1::4 ": local->remote vid 10->vid 10"
+ ping6_test $h2 2001:db8:2::4 ": local->remote vid 20->vid 20"
+ ping6_test $h1 2001:db8:2::4 ": local->remote vid 10->vid 20"
+ ping6_test $h2 2001:db8:1::4 ": local->remote vid 20->vid 10"
+}
+
+arp_decap()
+{
+ # Repeat the ping tests, but without populating the neighbours. This
+ # makes sure we correctly decapsulate ARP packets
+ log_info "deleting neighbours from vlan interfaces"
+
+ ip neigh del 2001:db8:1::4 dev vlan10
+ ip neigh del 2001:db8:2::4 dev vlan20
+
+ ping_ipv6
+
+ ip neigh replace 2001:db8:1::4 lladdr $(in_ns ns1 mac_get w2) \
+ nud noarp dev vlan10 extern_learn
+ ip neigh replace 2001:db8:2::4 lladdr $(in_ns ns1 mac_get w4) \
+ nud noarp dev vlan20 extern_learn
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh
new file mode 100755
index 000000000000..ac97f07e5ce8
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh
@@ -0,0 +1,804 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +-----------------------+ +------------------------+
+# | H1 (vrf) | | H2 (vrf) |
+# | + $h1 | | + $h2 |
+# | | 192.0.2.1/28 | | | 192.0.2.2/28 |
+# | | 2001:db8:1::1/64 | | | 2001:db8:1::2/64 |
+# +----|------------------+ +----|-------------------+
+# | |
+# +----|--------------------------------------------------|-------------------+
+# | SW | | |
+# | +--|--------------------------------------------------|-----------------+ |
+# | | + $swp1 BR1 (802.1d) + $swp2 | |
+# | | | |
+# | | + vx1 (vxlan) | |
+# | | local 2001:db8:3::1 | |
+# | | remote 2001:db8:4::1 2001:db8:5::1 | |
+# | | id 1000 dstport $VXPORT | |
+# | +-----------------------------------------------------------------------+ |
+# | |
+# | 2001:db8:4::0/64 via 2001:db8:3::2 |
+# | 2001:db8:5::0/64 via 2001:db8:3::2 |
+# | |
+# | + $rp1 |
+# | | 2001:db8:3::1/64 |
+# +----|----------------------------------------------------------------------+
+# |
+# +----|----------------------------------------------------------+
+# | | VRP2 (vrf) |
+# | + $rp2 |
+# | 2001:db8:3::2/64 |
+# | | (maybe) HW
+# =============================================================================
+# | | (likely) SW
+# | + v1 (veth) + v3 (veth) |
+# | | 2001:db8:4::2/64 | 2001:db8:5::2/64 |
+# +----|---------------------------------------|------------------+
+# | |
+# +----|--------------------------------+ +----|-------------------------------+
+# | + v2 (veth) NS1 (netns) | | + v4 (veth) NS2 (netns) |
+# | 2001:db8:4::1/64 | | 2001:db8:5::1/64 |
+# | | | |
+# | 2001:db8:3::0/64 via 2001:db8:4::2 | | 2001:db8:3::0/64 via 2001:db8:5::2 |
+# | 2001:db8:5::1/128 via 2001:db8:4::2 | | 2001:db8:4::1/128 via |
+# | | | 2001:db8:5::2 |
+# | | | |
+# | +-------------------------------+ | | +-------------------------------+ |
+# | | BR2 (802.1d) | | | | BR2 (802.1d) | |
+# | | + vx2 (vxlan) | | | | + vx2 (vxlan) | |
+# | | local 2001:db8:4::1 | | | | local 2001:db8:5::1 | |
+# | | remote 2001:db8:3::1 | | | | remote 2001:db8:3::1 | |
+# | | remote 2001:db8:5::1 | | | | remote 2001:db8:4::1 | |
+# | | id 1000 dstport $VXPORT | | | | id 1000 dstport $VXPORT | |
+# | | | | | | | |
+# | | + w1 (veth) | | | | + w1 (veth) | |
+# | +--|----------------------------+ | | +--|----------------------------+ |
+# | | | | | |
+# | +--|----------------------------+ | | +--|----------------------------+ |
+# | | + w2 (veth) VW2 (vrf) | | | | + w2 (veth) VW2 (vrf) | |
+# | | 192.0.2.3/28 | | | | 192.0.2.4/28 | |
+# | | 2001:db8:1::3/64 | | | | 2001:db8:1::4/64 | |
+# | +-------------------------------+ | | +-------------------------------+ |
+# +-------------------------------------+ +------------------------------------+
+
+: ${VXPORT:=4789}
+export VXPORT
+
+: ${ALL_TESTS:="
+ ping_ipv4
+ ping_ipv6
+ test_flood
+ test_unicast
+ test_ttl
+ test_tos
+ test_ecn_encap
+ test_ecn_decap
+ reapply_config
+ ping_ipv4
+ ping_ipv6
+ test_flood
+ test_unicast
+"}
+
+NUM_NETIFS=6
+source lib.sh
+source tc_common.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
+ tc qdisc add dev $h1 clsact
+}
+
+h1_destroy()
+{
+ tc qdisc del dev $h1 clsact
+ simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/28 2001:db8:1::2/64
+ tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+ tc qdisc del dev $h2 clsact
+ simple_if_fini $h2 192.0.2.2/28 2001:db8:1::2/64
+}
+
+rp1_set_addr()
+{
+ ip address add dev $rp1 2001:db8:3::1/64
+
+ ip route add 2001:db8:4::0/64 nexthop via 2001:db8:3::2
+ ip route add 2001:db8:5::0/64 nexthop via 2001:db8:3::2
+}
+
+rp1_unset_addr()
+{
+ ip route del 2001:db8:5::0/64 nexthop via 2001:db8:3::2
+ ip route del 2001:db8:4::0/64 nexthop via 2001:db8:3::2
+
+ ip address del dev $rp1 2001:db8:3::1/64
+}
+
+switch_create()
+{
+ ip link add name br1 type bridge vlan_filtering 0 mcast_snooping 0
+ # Make sure the bridge uses the MAC address of the local port and not
+ # that of the VxLAN's device.
+ ip link set dev br1 address $(mac_get $swp1)
+ ip link set dev br1 up
+
+ ip link set dev $rp1 up
+ rp1_set_addr
+ tc qdisc add dev $rp1 clsact
+
+ ip link add name vx1 type vxlan id 1000 local 2001:db8:3::1 \
+ dstport "$VXPORT" nolearning udp6zerocsumrx udp6zerocsumtx \
+ tos inherit ttl 100
+ ip link set dev vx1 up
+
+ ip link set dev vx1 master br1
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+ tc qdisc add dev $swp1 clsact
+
+ ip link set dev $swp2 master br1
+ ip link set dev $swp2 up
+
+ bridge fdb append dev vx1 00:00:00:00:00:00 dst 2001:db8:4::1 self
+ bridge fdb append dev vx1 00:00:00:00:00:00 dst 2001:db8:5::1 self
+}
+
+switch_destroy()
+{
+ bridge fdb del dev vx1 00:00:00:00:00:00 dst 2001:db8:5::1 self
+ bridge fdb del dev vx1 00:00:00:00:00:00 dst 2001:db8:4::1 self
+
+ ip link set dev $swp2 down
+ ip link set dev $swp2 nomaster
+
+ tc qdisc del dev $swp1 clsact
+ ip link set dev $swp1 down
+ ip link set dev $swp1 nomaster
+
+ ip link set dev vx1 nomaster
+ ip link set dev vx1 down
+ ip link del dev vx1
+
+ tc qdisc del dev $rp1 clsact
+ rp1_unset_addr
+ ip link set dev $rp1 down
+
+ ip link set dev br1 down
+ ip link del dev br1
+}
+
+vrp2_create()
+{
+ simple_if_init $rp2 2001:db8:3::2/64
+ __simple_if_init v1 v$rp2 2001:db8:4::2/64
+ __simple_if_init v3 v$rp2 2001:db8:5::2/64
+ tc qdisc add dev v1 clsact
+}
+
+vrp2_destroy()
+{
+ tc qdisc del dev v1 clsact
+ __simple_if_fini v3 2001:db8:5::2/64
+ __simple_if_fini v1 2001:db8:4::2/64
+ simple_if_fini $rp2 2001:db8:3::2/64
+}
+
+ns_init_common()
+{
+ local in_if=$1; shift
+ local in_addr=$1; shift
+ local other_in_addr=$1; shift
+ local nh_addr=$1; shift
+ local host_addr_ipv4=$1; shift
+ local host_addr_ipv6=$1; shift
+
+ ip link set dev $in_if up
+ ip address add dev $in_if $in_addr/64
+ tc qdisc add dev $in_if clsact
+
+ ip link add name br2 type bridge vlan_filtering 0
+ ip link set dev br2 up
+
+ ip link add name w1 type veth peer name w2
+
+ ip link set dev w1 master br2
+ ip link set dev w1 up
+
+ ip link add name vx2 type vxlan id 1000 local $in_addr \
+ dstport "$VXPORT" udp6zerocsumrx
+ ip link set dev vx2 up
+ bridge fdb append dev vx2 00:00:00:00:00:00 dst 2001:db8:3::1 self
+ bridge fdb append dev vx2 00:00:00:00:00:00 dst $other_in_addr self
+
+ ip link set dev vx2 master br2
+ tc qdisc add dev vx2 clsact
+
+ simple_if_init w2 $host_addr_ipv4/28 $host_addr_ipv6/64
+
+ ip route add 2001:db8:3::0/64 nexthop via $nh_addr
+ ip route add $other_in_addr/128 nexthop via $nh_addr
+}
+export -f ns_init_common
+
+ns1_create()
+{
+ ip netns add ns1
+ ip link set dev v2 netns ns1
+ in_ns ns1 \
+ ns_init_common v2 2001:db8:4::1 2001:db8:5::1 2001:db8:4::2 \
+ 192.0.2.3 2001:db8:1::3
+}
+
+ns1_destroy()
+{
+ ip netns exec ns1 ip link set dev v2 netns 1
+ ip netns del ns1
+}
+
+ns2_create()
+{
+ ip netns add ns2
+ ip link set dev v4 netns ns2
+ in_ns ns2 \
+ ns_init_common v4 2001:db8:5::1 2001:db8:4::1 2001:db8:5::2 \
+ 192.0.2.4 2001:db8:1::4
+}
+
+ns2_destroy()
+{
+ ip netns exec ns2 ip link set dev v4 netns 1
+ ip netns del ns2
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ rp1=${NETIFS[p5]}
+ rp2=${NETIFS[p6]}
+
+ vrf_prepare
+ forwarding_enable
+
+ h1_create
+ h2_create
+ switch_create
+
+ ip link add name v1 type veth peer name v2
+ ip link add name v3 type veth peer name v4
+ vrp2_create
+ ns1_create
+ ns2_create
+
+ r1_mac=$(in_ns ns1 mac_get w2)
+ r2_mac=$(in_ns ns2 mac_get w2)
+ h2_mac=$(mac_get $h2)
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ns2_destroy
+ ns1_destroy
+ vrp2_destroy
+ ip link del dev v3
+ ip link del dev v1
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+
+ forwarding_restore
+ vrf_cleanup
+}
+
+# For the first round of tests, vx1 is the first device to get
+# attached to the bridge, and at that point the local IP is already
+# configured. Try the other scenario of attaching the devices to a an
+# already-offloaded bridge, and only then assign the local IP.
+reapply_config()
+{
+ log_info "Reapplying configuration"
+
+ bridge fdb del dev vx1 00:00:00:00:00:00 dst 2001:db8:5::1 self
+ bridge fdb del dev vx1 00:00:00:00:00:00 dst 2001:db8:4::1 self
+ ip link set dev vx1 nomaster
+ rp1_unset_addr
+ sleep 5
+
+ ip link set dev vx1 master br1
+ bridge fdb append dev vx1 00:00:00:00:00:00 dst 2001:db8:4::1 self
+ bridge fdb append dev vx1 00:00:00:00:00:00 dst 2001:db8:5::1 self
+ sleep 1
+ rp1_set_addr
+ sleep 5
+}
+
+__ping_ipv4()
+{
+ local vxlan_local_ip=$1; shift
+ local vxlan_remote_ip=$1; shift
+ local src_ip=$1; shift
+ local dst_ip=$1; shift
+ local dev=$1; shift
+ local info=$1; shift
+
+ RET=0
+
+ tc filter add dev $rp1 egress protocol ipv6 pref 1 handle 101 \
+ flower ip_proto udp src_ip $vxlan_local_ip \
+ dst_ip $vxlan_remote_ip dst_port $VXPORT $TC_FLAG action pass
+ # Match ICMP-reply packets after decapsulation, so source IP is
+ # destination IP of the ping and destination IP is source IP of the
+ # ping.
+ tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \
+ flower src_ip $dst_ip dst_ip $src_ip \
+ $TC_FLAG action pass
+
+ # Send 100 packets and verify that at least 100 packets hit the rule,
+ # to overcome ARP noise.
+ PING_COUNT=100 PING_TIMEOUT=11 ping_do $dev $dst_ip
+ check_err $? "Ping failed"
+
+ tc_check_at_least_x_packets "dev $rp1 egress" 101 10 100
+ check_err $? "Encapsulated packets did not go through router"
+
+ tc_check_at_least_x_packets "dev $swp1 egress" 101 10 100
+ check_err $? "Decapsulated packets did not go through switch"
+
+ log_test "ping: $info"
+
+ tc filter del dev $swp1 egress
+ tc filter del dev $rp1 egress
+}
+
+ping_ipv4()
+{
+ RET=0
+
+ local local_sw_ip=2001:db8:3::1
+ local remote_ns1_ip=2001:db8:4::1
+ local remote_ns2_ip=2001:db8:5::1
+ local h1_ip=192.0.2.1
+ local w2_ns1_ip=192.0.2.3
+ local w2_ns2_ip=192.0.2.4
+
+ ping_test $h1 192.0.2.2 ": local->local"
+
+ __ping_ipv4 $local_sw_ip $remote_ns1_ip $h1_ip $w2_ns1_ip $h1 \
+ "local->remote 1"
+ __ping_ipv4 $local_sw_ip $remote_ns2_ip $h1_ip $w2_ns2_ip $h1 \
+ "local->remote 2"
+}
+
+__ping_ipv6()
+{
+ local vxlan_local_ip=$1; shift
+ local vxlan_remote_ip=$1; shift
+ local src_ip=$1; shift
+ local dst_ip=$1; shift
+ local dev=$1; shift
+ local info=$1; shift
+
+ RET=0
+
+ tc filter add dev $rp1 egress protocol ipv6 pref 1 handle 101 \
+ flower ip_proto udp src_ip $vxlan_local_ip \
+ dst_ip $vxlan_remote_ip dst_port $VXPORT $TC_FLAG action pass
+ # Match ICMP-reply packets after decapsulation, so source IP is
+ # destination IP of the ping and destination IP is source IP of the
+ # ping.
+ tc filter add dev $swp1 egress protocol ipv6 pref 1 handle 101 \
+ flower src_ip $dst_ip dst_ip $src_ip $TC_FLAG action pass
+
+ # Send 100 packets and verify that at least 100 packets hit the rule,
+ # to overcome neighbor discovery noise.
+ PING_COUNT=100 PING_TIMEOUT=11 ping6_do $dev $dst_ip
+ check_err $? "Ping failed"
+
+ tc_check_at_least_x_packets "dev $rp1 egress" 101 100
+ check_err $? "Encapsulated packets did not go through router"
+
+ tc_check_at_least_x_packets "dev $swp1 egress" 101 100
+ check_err $? "Decapsulated packets did not go through switch"
+
+ log_test "ping6: $info"
+
+ tc filter del dev $swp1 egress
+ tc filter del dev $rp1 egress
+}
+
+ping_ipv6()
+{
+ RET=0
+
+ local local_sw_ip=2001:db8:3::1
+ local remote_ns1_ip=2001:db8:4::1
+ local remote_ns2_ip=2001:db8:5::1
+ local h1_ip=2001:db8:1::1
+ local w2_ns1_ip=2001:db8:1::3
+ local w2_ns2_ip=2001:db8:1::4
+
+ ping6_test $h1 2001:db8:1::2 ": local->local"
+
+ __ping_ipv6 $local_sw_ip $remote_ns1_ip $h1_ip $w2_ns1_ip $h1 \
+ "local->remote 1"
+ __ping_ipv6 $local_sw_ip $remote_ns2_ip $h1_ip $w2_ns2_ip $h1 \
+ "local->remote 2"
+}
+
+maybe_in_ns()
+{
+ echo ${1:+in_ns} $1
+}
+
+__flood_counter_add_del()
+{
+ local add_del=$1; shift
+ local dst_ip=$1; shift
+ local dev=$1; shift
+ local ns=$1; shift
+
+ # Putting the ICMP capture both to HW and to SW will end up
+ # double-counting the packets that are trapped to slow path, such as for
+ # the unicast test. Adding either skip_hw or skip_sw fixes this problem,
+ # but with skip_hw, the flooded packets are not counted at all, because
+ # those are dropped due to MAC address mismatch; and skip_sw is a no-go
+ # for veth-based topologies.
+ #
+ # So try to install with skip_sw and fall back to skip_sw if that fails.
+
+ $(maybe_in_ns $ns) tc filter $add_del dev "$dev" ingress \
+ proto ipv6 pref 100 flower dst_ip $dst_ip ip_proto \
+ icmpv6 skip_sw action pass 2>/dev/null || \
+ $(maybe_in_ns $ns) tc filter $add_del dev "$dev" ingress \
+ proto ipv6 pref 100 flower dst_ip $dst_ip ip_proto \
+ icmpv6 skip_hw action pass
+}
+
+flood_counter_install()
+{
+ __flood_counter_add_del add "$@"
+}
+
+flood_counter_uninstall()
+{
+ __flood_counter_add_del del "$@"
+}
+
+flood_fetch_stat()
+{
+ local dev=$1; shift
+ local ns=$1; shift
+
+ $(maybe_in_ns $ns) tc_rule_stats_get $dev 100 ingress
+}
+
+flood_fetch_stats()
+{
+ local counters=("${@}")
+ local counter
+
+ for counter in "${counters[@]}"; do
+ flood_fetch_stat $counter
+ done
+}
+
+vxlan_flood_test()
+{
+ local mac=$1; shift
+ local dst=$1; shift
+ local -a expects=("${@}")
+
+ local -a counters=($h2 "vx2 ns1" "vx2 ns2")
+ local counter
+ local key
+
+ for counter in "${counters[@]}"; do
+ flood_counter_install $dst $counter
+ done
+
+ local -a t0s=($(flood_fetch_stats "${counters[@]}"))
+ $MZ -6 $h1 -c 10 -d 100msec -p 64 -b $mac -B $dst -t icmp6 type=128 -q
+ sleep 1
+ local -a t1s=($(flood_fetch_stats "${counters[@]}"))
+
+ for key in ${!t0s[@]}; do
+ local delta=$((t1s[$key] - t0s[$key]))
+ local expect=${expects[$key]}
+
+ ((expect == delta))
+ check_err $? "${counters[$key]}: Expected to capture $expect packets, got $delta."
+ done
+
+ for counter in "${counters[@]}"; do
+ flood_counter_uninstall $dst $counter
+ done
+}
+
+__test_flood()
+{
+ local mac=$1; shift
+ local dst=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ vxlan_flood_test $mac $dst 10 10 10
+
+ log_test "VXLAN: $what"
+}
+
+test_flood()
+{
+ __test_flood de:ad:be:ef:13:37 2001:db8:1::100 "flood"
+}
+
+vxlan_fdb_add_del()
+{
+ local add_del=$1; shift
+ local mac=$1; shift
+ local dev=$1; shift
+ local dst=$1; shift
+
+ bridge fdb $add_del dev $dev $mac self static permanent \
+ ${dst:+dst} $dst 2>/dev/null
+ bridge fdb $add_del dev $dev $mac master static 2>/dev/null
+}
+
+__test_unicast()
+{
+ local mac=$1; shift
+ local dst=$1; shift
+ local hit_idx=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ local -a expects=(0 0 0)
+ expects[$hit_idx]=10
+
+ vxlan_flood_test $mac $dst "${expects[@]}"
+
+ log_test "VXLAN: $what"
+}
+
+test_unicast()
+{
+ local -a targets=("$h2_mac $h2"
+ "$r1_mac vx1 2001:db8:4::1"
+ "$r2_mac vx1 2001:db8:5::1")
+ local target
+
+ for target in "${targets[@]}"; do
+ vxlan_fdb_add_del add $target
+ done
+
+ __test_unicast $h2_mac 2001:db8:1::2 0 "local MAC unicast"
+ __test_unicast $r1_mac 2001:db8:1::3 1 "remote MAC 1 unicast"
+ __test_unicast $r2_mac 2001:db8:1::4 2 "remote MAC 2 unicast"
+
+ for target in "${targets[@]}"; do
+ vxlan_fdb_add_del del $target
+ done
+}
+
+vxlan_ping_test()
+{
+ local ping_dev=$1; shift
+ local ping_dip=$1; shift
+ local ping_args=$1; shift
+ local capture_dev=$1; shift
+ local capture_dir=$1; shift
+ local capture_pref=$1; shift
+ local expect=$1; shift
+
+ local t0=$(tc_rule_stats_get $capture_dev $capture_pref $capture_dir)
+ ping6_do $ping_dev $ping_dip "$ping_args"
+ local t1=$(tc_rule_stats_get $capture_dev $capture_pref $capture_dir)
+ local delta=$((t1 - t0))
+
+ # Tolerate a couple stray extra packets.
+ ((expect <= delta && delta <= expect + 2))
+ check_err $? "$capture_dev: Expected to capture $expect packets, got $delta."
+}
+
+test_ttl()
+{
+ RET=0
+
+ tc filter add dev v1 egress pref 77 protocol ipv6 \
+ flower ip_ttl 99 action pass
+ vxlan_ping_test $h1 2001:db8:1::3 "" v1 egress 77 10
+ tc filter del dev v1 egress pref 77 protocol ipv6
+
+ log_test "VXLAN: envelope TTL"
+}
+
+test_tos()
+{
+ RET=0
+
+ tc filter add dev v1 egress pref 77 protocol ipv6 \
+ flower ip_tos 0x14 action pass
+ vxlan_ping_test $h1 2001:db8:1::3 "-Q 0x14" v1 egress 77 10
+ vxlan_ping_test $h1 2001:db8:1::3 "-Q 0x18" v1 egress 77 0
+ tc filter del dev v1 egress pref 77 protocol ipv6
+
+ log_test "VXLAN: envelope TOS inheritance"
+}
+
+__test_ecn_encap()
+{
+ local q=$1; shift
+ local tos=$1; shift
+
+ RET=0
+
+ tc filter add dev v1 egress pref 77 protocol ipv6 \
+ flower ip_tos $tos action pass
+ sleep 1
+ vxlan_ping_test $h1 2001:db8:1::3 "-Q $q" v1 egress 77 10
+ tc filter del dev v1 egress pref 77 protocol ipv6
+
+ log_test "VXLAN: ECN encap: $q->$tos"
+}
+
+test_ecn_encap()
+{
+ # In accordance with INET_ECN_encapsulate()
+ __test_ecn_encap 0x00 0x00
+ __test_ecn_encap 0x01 0x01
+ __test_ecn_encap 0x02 0x02
+ __test_ecn_encap 0x03 0x02
+}
+
+vxlan_encapped_ping_do()
+{
+ local count=$1; shift
+ local dev=$1; shift
+ local next_hop_mac=$1; shift
+ local dest_ip=$1; shift
+ local dest_mac=$1; shift
+ local inner_tos=$1; shift
+ local outer_tos=$1; shift
+ local saddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:03"
+ local daddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01"
+
+ $MZ -6 $dev -c $count -d 100msec -q \
+ -b $next_hop_mac -B $dest_ip \
+ -t udp tos=$outer_tos,sp=23456,dp=$VXPORT,p=$(:
+ )"08:"$( : VXLAN flags
+ )"00:00:00:"$( : VXLAN reserved
+ )"00:03:e8:"$( : VXLAN VNI
+ )"00:"$( : VXLAN reserved
+ )"$dest_mac:"$( : ETH daddr
+ )"$(mac_get w2):"$( : ETH saddr
+ )"86:dd:"$( : ETH type
+ )"6"$( : IP version
+ )"$inner_tos"$( : Traffic class
+ )"0:00:00:"$( : Flow label
+ )"00:08:"$( : Payload length
+ )"3a:"$( : Next header
+ )"04:"$( : Hop limit
+ )"$saddr:"$( : IP saddr
+ )"$daddr:"$( : IP daddr
+ )"80:"$( : ICMPv6.type
+ )"00:"$( : ICMPv6.code
+ )"00:"$( : ICMPv6.checksum
+ )
+}
+export -f vxlan_encapped_ping_do
+
+vxlan_encapped_ping_test()
+{
+ local ping_dev=$1; shift
+ local nh_dev=$1; shift
+ local ping_dip=$1; shift
+ local inner_tos=$1; shift
+ local outer_tos=$1; shift
+ local stat_get=$1; shift
+ local expect=$1; shift
+
+ local t0=$($stat_get)
+
+ in_ns ns1 \
+ vxlan_encapped_ping_do 10 $ping_dev $(mac_get $nh_dev) \
+ $ping_dip $(mac_get $h1) \
+ $inner_tos $outer_tos
+ sleep 1
+ local t1=$($stat_get)
+ local delta=$((t1 - t0))
+
+ # Tolerate a couple stray extra packets.
+ ((expect <= delta && delta <= expect + 2))
+ check_err $? "Expected to capture $expect packets, got $delta."
+}
+export -f vxlan_encapped_ping_test
+
+__test_ecn_decap()
+{
+ local orig_inner_tos=$1; shift
+ local orig_outer_tos=$1; shift
+ local decapped_tos=$1; shift
+
+ RET=0
+
+ tc filter add dev $h1 ingress pref 77 protocol ipv6 \
+ flower src_ip 2001:db8:1::3 dst_ip 2001:db8:1::1 \
+ ip_tos $decapped_tos action drop
+ sleep 1
+ vxlan_encapped_ping_test v2 v1 2001:db8:3::1 \
+ $orig_inner_tos $orig_outer_tos \
+ "tc_rule_stats_get $h1 77 ingress" 10
+ tc filter del dev $h1 ingress pref 77
+
+ log_test "VXLAN: ECN decap: $orig_outer_tos/$orig_inner_tos->$decapped_tos"
+}
+
+test_ecn_decap_error()
+{
+ local orig_inner_tos="0:0"
+ local orig_outer_tos=03
+
+ RET=0
+
+ vxlan_encapped_ping_test v2 v1 2001:db8:3::1 \
+ $orig_inner_tos $orig_outer_tos \
+ "link_stats_rx_errors_get vx1" 10
+
+ log_test "VXLAN: ECN decap: $orig_outer_tos/$orig_inner_tos->error"
+}
+
+test_ecn_decap()
+{
+ # In accordance with INET_ECN_decapsulate()
+ __test_ecn_decap "0:0" 00 0x00
+ __test_ecn_decap "0:0" 01 0x00
+ __test_ecn_decap "0:0" 02 0x00
+ # 00 03 is tested in test_ecn_decap_error()
+ __test_ecn_decap "0:1" 00 0x01
+ __test_ecn_decap "0:1" 01 0x01
+ __test_ecn_decap "0:1" 02 0x01
+ __test_ecn_decap "0:1" 03 0x03
+ __test_ecn_decap "0:2" 00 0x02
+ __test_ecn_decap "0:2" 01 0x01
+ __test_ecn_decap "0:2" 02 0x02
+ __test_ecn_decap "0:2" 03 0x03
+ __test_ecn_decap "0:3" 00 0x03
+ __test_ecn_decap "0:3" 01 0x03
+ __test_ecn_decap "0:3" 02 0x03
+ __test_ecn_decap "0:3" 03 0x03
+ test_ecn_decap_error
+}
+
+test_all()
+{
+ log_info "Running tests with UDP port $VXPORT"
+ tests_run
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+test_all
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_port_8472_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_port_8472_ipv6.sh
new file mode 100755
index 000000000000..00540317737a
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_port_8472_ipv6.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# A wrapper to run VXLAN tests with an unusual port number.
+
+VXPORT=8472
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+"
+source vxlan_bridge_1d_ipv6.sh
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh
index a5789721ba92..a596bbf3ed6a 100755
--- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh
@@ -680,26 +680,6 @@ test_pvid()
log_test "VXLAN: flood after vlan re-add"
}
-vxlan_ping_test()
-{
- local ping_dev=$1; shift
- local ping_dip=$1; shift
- local ping_args=$1; shift
- local capture_dev=$1; shift
- local capture_dir=$1; shift
- local capture_pref=$1; shift
- local expect=$1; shift
-
- local t0=$(tc_rule_stats_get $capture_dev $capture_pref $capture_dir)
- ping_do $ping_dev $ping_dip "$ping_args"
- local t1=$(tc_rule_stats_get $capture_dev $capture_pref $capture_dir)
- local delta=$((t1 - t0))
-
- # Tolerate a couple stray extra packets.
- ((expect <= delta && delta <= expect + 2))
- check_err $? "$capture_dev: Expected to capture $expect packets, got $delta."
-}
-
__test_learning()
{
local -a expects=(0 0 0 0 0)
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_ipv6.sh
new file mode 100755
index 000000000000..d880df89bc8b
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_ipv6.sh
@@ -0,0 +1,837 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +-----------------------+ +------------------------+
+# | H1 (vrf) | | H2 (vrf) |
+# | + $h1.10 | | + $h2.10 |
+# | | 192.0.2.1/28 | | | 192.0.2.2/28 |
+# | | 2001:db8:1::1/64 | | | 2001:db8:1::2/64 |
+# | | | | | |
+# | | + $h1.20 | | | + $h2.20 |
+# | \ | 198.51.100.1/24 | | \ | 198.51.100.2/24 |
+# | \ | 2001:db8:2::1/64 | | \ | 2001:db8:2::2/64 |
+# | \| | | \| |
+# | + $h1 | | + $h2 |
+# +----|------------------+ +----|-------------------+
+# | |
+# +----|--------------------------------------------------|-------------------+
+# | SW | | |
+# | +--|--------------------------------------------------|-----------------+ |
+# | | + $swp1 BR1 (802.1q) + $swp2 | |
+# | | vid 10 vid 10 | |
+# | | vid 20 vid 20 | |
+# | | | |
+# | | + vx10 (vxlan) + vx20 (vxlan) | |
+# | | local: local: | |
+# | | 2001:db8:3::1 2001:db8:3::1 | |
+# | | remote: remote: | |
+# | | 2001:db8:4::1 2001:db8:5::1 2001:db8:4::1 2001:db8:5::1 | |
+# | | id 1000 dstport $VXPORT id 2000 dstport $VXPORT | |
+# | | vid 10 pvid untagged vid 20 pvid untagged | |
+# | +-----------------------------------------------------------------------+ |
+# | |
+# | 2001:db8:4::0/64 via 2001:db8:3::2 |
+# | 2001:db8:5::0/64 via 2001:db8:3::2 |
+# | |
+# | + $rp1 |
+# | | 2001:db8:3::1/64 |
+# +----|----------------------------------------------------------------------+
+# |
+# +----|----------------------------------------------------------+
+# | | VRP2 (vrf) |
+# | + $rp2 |
+# | 2001:db8:3::2/64 |
+# | | (maybe) HW
+# =============================================================================
+# | | (likely) SW
+# | + v1 (veth) + v3 (veth) |
+# | | 2001:db8:4::2/64 | 2001:db8:5::2/64 |
+# +----|---------------------------------------|------------------+
+# | |
+# +----|--------------------------------+ +----|-------------------------------+
+# | + v2 (veth) NS1 (netns) | | + v4 (veth) NS2 (netns) |
+# | 2001:db8:4::1/64 | | 2001:db8:5::1/64 |
+# | | | |
+# | 2001:db8:3::0/64 via 2001:db8:4::2 | | 2001:db8:3::0/64 via 2001:db8:5::2 |
+# | 2001:db8:5::1/128 via 2001:db8:4::2 | | 2001:db8:4::1/128 via |
+# | | | 2001:db8:5::2 |
+# | | | |
+# | +-------------------------------+ | | +-------------------------------+ |
+# | | BR2 (802.1q) | | | | BR2 (802.1q) | |
+# | | + vx10 (vxlan) | | | | + vx10 (vxlan) | |
+# | | local 2001:db8:4::1 | | | | local 2001:db8:5::1 | |
+# | | remote 2001:db8:3::1 | | | | remote 2001:db8:3::1 | |
+# | | remote 2001:db8:5::1 | | | | remote 2001:db8:4::1 | |
+# | | id 1000 dstport $VXPORT | | | | id 1000 dstport $VXPORT | |
+# | | vid 10 pvid untagged | | | | vid 10 pvid untagged | |
+# | | | | | | | |
+# | | + vx20 (vxlan) | | | | + vx20 (vxlan) | |
+# | | local 2001:db8:4::1 | | | | local 2001:db8:5::1 | |
+# | | remote 2001:db8:3::1 | | | | remote 2001:db8:3::1 | |
+# | | remote 2001:db8:5::1 | | | | remote 2001:db8:4::1 | |
+# | | id 2000 dstport $VXPORT | | | | id 2000 dstport $VXPORT | |
+# | | vid 20 pvid untagged | | | | vid 20 pvid untagged | |
+# | | | | | | | |
+# | | + w1 (veth) | | | | + w1 (veth) | |
+# | | | vid 10 | | | | | vid 10 | |
+# | | | vid 20 | | | | | vid 20 | |
+# | +--|----------------------------+ | | +--|----------------------------+ |
+# | | | | | |
+# | +--|----------------------------+ | | +--|----------------------------+ |
+# | | + w2 (veth) VW2 (vrf) | | | | + w2 (veth) VW2 (vrf) | |
+# | | |\ | | | | |\ | |
+# | | | + w2.10 | | | | | + w2.10 | |
+# | | | 192.0.2.3/28 | | | | | 192.0.2.4/28 | |
+# | | | 2001:db8:1::3/64 | | | | | 2001:db8:1::4/64 | |
+# | | | | | | | | | |
+# | | + w2.20 | | | | + w2.20 | |
+# | | 198.51.100.3/24 | | | | 198.51.100.4/24 | |
+# | | 2001:db8:2::3/64 | | | | 2001:db8:2::4/64 | |
+# | +-------------------------------+ | | +-------------------------------+ |
+# +-------------------------------------+ +------------------------------------+
+
+: ${VXPORT:=4789}
+export VXPORT
+
+: ${ALL_TESTS:="
+ ping_ipv4
+ ping_ipv6
+ test_flood
+ test_unicast
+ reapply_config
+ ping_ipv4
+ ping_ipv6
+ test_flood
+ test_unicast
+ test_pvid
+ ping_ipv4
+ ping_ipv6
+ test_flood
+ test_pvid
+"}
+
+NUM_NETIFS=6
+source lib.sh
+source tc_common.sh
+
+h1_create()
+{
+ simple_if_init $h1
+ tc qdisc add dev $h1 clsact
+ vlan_create $h1 10 v$h1 192.0.2.1/28 2001:db8:1::1/64
+ vlan_create $h1 20 v$h1 198.51.100.1/24 2001:db8:2::1/64
+}
+
+h1_destroy()
+{
+ vlan_destroy $h1 20
+ vlan_destroy $h1 10
+ tc qdisc del dev $h1 clsact
+ simple_if_fini $h1
+}
+
+h2_create()
+{
+ simple_if_init $h2
+ tc qdisc add dev $h2 clsact
+ vlan_create $h2 10 v$h2 192.0.2.2/28 2001:db8:1::2/64
+ vlan_create $h2 20 v$h2 198.51.100.2/24 2001:db8:2::2/64
+}
+
+h2_destroy()
+{
+ vlan_destroy $h2 20
+ vlan_destroy $h2 10
+ tc qdisc del dev $h2 clsact
+ simple_if_fini $h2
+}
+
+rp1_set_addr()
+{
+ ip address add dev $rp1 2001:db8:3::1/64
+
+ ip route add 2001:db8:4::0/64 nexthop via 2001:db8:3::2
+ ip route add 2001:db8:5::0/64 nexthop via 2001:db8:3::2
+}
+
+rp1_unset_addr()
+{
+ ip route del 2001:db8:5::0/64 nexthop via 2001:db8:3::2
+ ip route del 2001:db8:4::0/64 nexthop via 2001:db8:3::2
+
+ ip address del dev $rp1 2001:db8:3::1/64
+}
+
+switch_create()
+{
+ ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0 \
+ mcast_snooping 0
+ # Make sure the bridge uses the MAC address of the local port and not
+ # that of the VxLAN's device.
+ ip link set dev br1 address $(mac_get $swp1)
+ ip link set dev br1 up
+
+ ip link set dev $rp1 up
+ rp1_set_addr
+ tc qdisc add dev $rp1 clsact
+
+ ip link add name vx10 type vxlan id 1000 local 2001:db8:3::1 \
+ dstport "$VXPORT" nolearning udp6zerocsumrx udp6zerocsumtx \
+ tos inherit ttl 100
+ ip link set dev vx10 up
+
+ ip link set dev vx10 master br1
+ bridge vlan add vid 10 dev vx10 pvid untagged
+
+ ip link add name vx20 type vxlan id 2000 local 2001:db8:3::1 \
+ dstport "$VXPORT" nolearning udp6zerocsumrx udp6zerocsumtx \
+ tos inherit ttl 100
+ ip link set dev vx20 up
+
+ ip link set dev vx20 master br1
+ bridge vlan add vid 20 dev vx20 pvid untagged
+
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+ tc qdisc add dev $swp1 clsact
+ bridge vlan add vid 10 dev $swp1
+ bridge vlan add vid 20 dev $swp1
+
+ ip link set dev $swp2 master br1
+ ip link set dev $swp2 up
+ bridge vlan add vid 10 dev $swp2
+ bridge vlan add vid 20 dev $swp2
+
+ bridge fdb append dev vx10 00:00:00:00:00:00 dst 2001:db8:4::1 self
+ bridge fdb append dev vx10 00:00:00:00:00:00 dst 2001:db8:5::1 self
+
+ bridge fdb append dev vx20 00:00:00:00:00:00 dst 2001:db8:4::1 self
+ bridge fdb append dev vx20 00:00:00:00:00:00 dst 2001:db8:5::1 self
+}
+
+switch_destroy()
+{
+ bridge fdb del dev vx20 00:00:00:00:00:00 dst 2001:db8:5::1 self
+ bridge fdb del dev vx20 00:00:00:00:00:00 dst 2001:db8:4::1 self
+
+ bridge fdb del dev vx10 00:00:00:00:00:00 dst 2001:db8:5::1 self
+ bridge fdb del dev vx10 00:00:00:00:00:00 dst 2001:db8:4::1 self
+
+ bridge vlan del vid 20 dev $swp2
+ bridge vlan del vid 10 dev $swp2
+ ip link set dev $swp2 down
+ ip link set dev $swp2 nomaster
+
+ bridge vlan del vid 20 dev $swp1
+ bridge vlan del vid 10 dev $swp1
+ tc qdisc del dev $swp1 clsact
+ ip link set dev $swp1 down
+ ip link set dev $swp1 nomaster
+
+ bridge vlan del vid 20 dev vx20
+ ip link set dev vx20 nomaster
+
+ ip link set dev vx20 down
+ ip link del dev vx20
+
+ bridge vlan del vid 10 dev vx10
+ ip link set dev vx10 nomaster
+
+ ip link set dev vx10 down
+ ip link del dev vx10
+
+ tc qdisc del dev $rp1 clsact
+ rp1_unset_addr
+ ip link set dev $rp1 down
+
+ ip link set dev br1 down
+ ip link del dev br1
+}
+
+vrp2_create()
+{
+ simple_if_init $rp2 2001:db8:3::2/64
+ __simple_if_init v1 v$rp2 2001:db8:4::2/64
+ __simple_if_init v3 v$rp2 2001:db8:5::2/64
+ tc qdisc add dev v1 clsact
+}
+
+vrp2_destroy()
+{
+ tc qdisc del dev v1 clsact
+ __simple_if_fini v3 2001:db8:5::2/64
+ __simple_if_fini v1 2001:db8:4::2/64
+ simple_if_fini $rp2 2001:db8:3::2/64
+}
+
+ns_init_common()
+{
+ local in_if=$1; shift
+ local in_addr=$1; shift
+ local other_in_addr=$1; shift
+ local nh_addr=$1; shift
+ local host_addr1_ipv4=$1; shift
+ local host_addr1_ipv6=$1; shift
+ local host_addr2_ipv4=$1; shift
+ local host_addr2_ipv6=$1; shift
+
+ ip link set dev $in_if up
+ ip address add dev $in_if $in_addr/64
+ tc qdisc add dev $in_if clsact
+
+ ip link add name br2 type bridge vlan_filtering 1 vlan_default_pvid 0
+ ip link set dev br2 up
+
+ ip link add name w1 type veth peer name w2
+
+ ip link set dev w1 master br2
+ ip link set dev w1 up
+
+ bridge vlan add vid 10 dev w1
+ bridge vlan add vid 20 dev w1
+
+ ip link add name vx10 type vxlan id 1000 local $in_addr \
+ dstport "$VXPORT" udp6zerocsumrx
+ ip link set dev vx10 up
+ bridge fdb append dev vx10 00:00:00:00:00:00 dst 2001:db8:3::1 self
+ bridge fdb append dev vx10 00:00:00:00:00:00 dst $other_in_addr self
+
+ ip link set dev vx10 master br2
+ tc qdisc add dev vx10 clsact
+
+ bridge vlan add vid 10 dev vx10 pvid untagged
+
+ ip link add name vx20 type vxlan id 2000 local $in_addr \
+ dstport "$VXPORT" udp6zerocsumrx
+ ip link set dev vx20 up
+ bridge fdb append dev vx20 00:00:00:00:00:00 dst 2001:db8:3::1 self
+ bridge fdb append dev vx20 00:00:00:00:00:00 dst $other_in_addr self
+
+ ip link set dev vx20 master br2
+ tc qdisc add dev vx20 clsact
+
+ bridge vlan add vid 20 dev vx20 pvid untagged
+
+ simple_if_init w2
+ vlan_create w2 10 vw2 $host_addr1_ipv4/28 $host_addr1_ipv6/64
+ vlan_create w2 20 vw2 $host_addr2_ipv4/24 $host_addr2_ipv6/64
+
+ ip route add 2001:db8:3::0/64 nexthop via $nh_addr
+ ip route add $other_in_addr/128 nexthop via $nh_addr
+}
+export -f ns_init_common
+
+ns1_create()
+{
+ ip netns add ns1
+ ip link set dev v2 netns ns1
+ in_ns ns1 \
+ ns_init_common v2 2001:db8:4::1 2001:db8:5::1 2001:db8:4::2 \
+ 192.0.2.3 2001:db8:1::3 198.51.100.3 2001:db8:2::3
+}
+
+ns1_destroy()
+{
+ ip netns exec ns1 ip link set dev v2 netns 1
+ ip netns del ns1
+}
+
+ns2_create()
+{
+ ip netns add ns2
+ ip link set dev v4 netns ns2
+ in_ns ns2 \
+ ns_init_common v4 2001:db8:5::1 2001:db8:4::1 2001:db8:5::2 \
+ 192.0.2.4 2001:db8:1::4 198.51.100.4 2001:db8:2::4
+}
+
+ns2_destroy()
+{
+ ip netns exec ns2 ip link set dev v4 netns 1
+ ip netns del ns2
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ rp1=${NETIFS[p5]}
+ rp2=${NETIFS[p6]}
+
+ vrf_prepare
+ forwarding_enable
+
+ h1_create
+ h2_create
+ switch_create
+
+ ip link add name v1 type veth peer name v2
+ ip link add name v3 type veth peer name v4
+ vrp2_create
+ ns1_create
+ ns2_create
+
+ r1_mac=$(in_ns ns1 mac_get w2)
+ r2_mac=$(in_ns ns2 mac_get w2)
+ h2_mac=$(mac_get $h2)
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ns2_destroy
+ ns1_destroy
+ vrp2_destroy
+ ip link del dev v3
+ ip link del dev v1
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+
+ forwarding_restore
+ vrf_cleanup
+}
+
+# For the first round of tests, vx10 and vx20 were the first devices to get
+# attached to the bridge, and at that point the local IP is already
+# configured. Try the other scenario of attaching these devices to a bridge
+# that already has local ports members, and only then assign the local IP.
+reapply_config()
+{
+ log_info "Reapplying configuration"
+
+ bridge fdb del dev vx20 00:00:00:00:00:00 dst 2001:db8:5::1 self
+ bridge fdb del dev vx20 00:00:00:00:00:00 dst 2001:db8:4::1 self
+
+ bridge fdb del dev vx10 00:00:00:00:00:00 dst 2001:db8:5::1 self
+ bridge fdb del dev vx10 00:00:00:00:00:00 dst 2001:db8:4::1 self
+
+ ip link set dev vx20 nomaster
+ ip link set dev vx10 nomaster
+
+ rp1_unset_addr
+ sleep 5
+
+ ip link set dev vx10 master br1
+ bridge vlan add vid 10 dev vx10 pvid untagged
+
+ ip link set dev vx20 master br1
+ bridge vlan add vid 20 dev vx20 pvid untagged
+
+ bridge fdb append dev vx10 00:00:00:00:00:00 dst 2001:db8:4::1 self
+ bridge fdb append dev vx10 00:00:00:00:00:00 dst 2001:db8:5::1 self
+
+ bridge fdb append dev vx20 00:00:00:00:00:00 dst 2001:db8:4::1 self
+ bridge fdb append dev vx20 00:00:00:00:00:00 dst 2001:db8:5::1 self
+
+ rp1_set_addr
+ sleep 5
+}
+
+__ping_ipv4()
+{
+ local vxlan_local_ip=$1; shift
+ local vxlan_remote_ip=$1; shift
+ local src_ip=$1; shift
+ local dst_ip=$1; shift
+ local dev=$1; shift
+ local info=$1; shift
+
+ RET=0
+
+ tc filter add dev $rp1 egress protocol ipv6 pref 1 handle 101 \
+ flower ip_proto udp src_ip $vxlan_local_ip \
+ dst_ip $vxlan_remote_ip dst_port $VXPORT $TC_FLAG action pass
+ # Match ICMP-reply packets after decapsulation, so source IP is
+ # destination IP of the ping and destination IP is source IP of the
+ # ping.
+ tc filter add dev $swp1 egress protocol 802.1q pref 1 handle 101 \
+ flower vlan_ethtype ipv4 src_ip $dst_ip dst_ip $src_ip \
+ $TC_FLAG action pass
+
+ # Send 100 packets and verify that at least 100 packets hit the rule,
+ # to overcome ARP noise.
+ PING_COUNT=100 PING_TIMEOUT=11 ping_do $dev $dst_ip
+ check_err $? "Ping failed"
+
+ tc_check_at_least_x_packets "dev $rp1 egress" 101 10 100
+ check_err $? "Encapsulated packets did not go through router"
+
+ tc_check_at_least_x_packets "dev $swp1 egress" 101 10 100
+ check_err $? "Decapsulated packets did not go through switch"
+
+ log_test "ping: $info"
+
+ tc filter del dev $swp1 egress
+ tc filter del dev $rp1 egress
+}
+
+ping_ipv4()
+{
+ RET=0
+
+ local local_sw_ip=2001:db8:3::1
+ local remote_ns1_ip=2001:db8:4::1
+ local remote_ns2_ip=2001:db8:5::1
+ local h1_10_ip=192.0.2.1
+ local h1_20_ip=198.51.100.1
+ local w2_10_ns1_ip=192.0.2.3
+ local w2_10_ns2_ip=192.0.2.4
+ local w2_20_ns1_ip=198.51.100.3
+ local w2_20_ns2_ip=198.51.100.4
+
+ ping_test $h1.10 192.0.2.2 ": local->local vid 10"
+ ping_test $h1.20 198.51.100.2 ": local->local vid 20"
+
+ __ping_ipv4 $local_sw_ip $remote_ns1_ip $h1_10_ip $w2_10_ns1_ip $h1.10 \
+ "local->remote 1 vid 10"
+ __ping_ipv4 $local_sw_ip $remote_ns2_ip $h1_10_ip $w2_10_ns2_ip $h1.10 \
+ "local->remote 2 vid 10"
+ __ping_ipv4 $local_sw_ip $remote_ns1_ip $h1_20_ip $w2_20_ns1_ip $h1.20 \
+ "local->remote 1 vid 20"
+ __ping_ipv4 $local_sw_ip $remote_ns2_ip $h1_20_ip $w2_20_ns2_ip $h1.20 \
+ "local->remote 2 vid 20"
+}
+
+__ping_ipv6()
+{
+ local vxlan_local_ip=$1; shift
+ local vxlan_remote_ip=$1; shift
+ local src_ip=$1; shift
+ local dst_ip=$1; shift
+ local dev=$1; shift
+ local info=$1; shift
+
+ RET=0
+
+ tc filter add dev $rp1 egress protocol ipv6 pref 1 handle 101 \
+ flower ip_proto udp src_ip $vxlan_local_ip \
+ dst_ip $vxlan_remote_ip dst_port $VXPORT $TC_FLAG action pass
+ # Match ICMP-reply packets after decapsulation, so source IP is
+ # destination IP of the ping and destination IP is source IP of the
+ # ping.
+ tc filter add dev $swp1 egress protocol 802.1q pref 1 handle 101 \
+ flower vlan_ethtype ipv6 src_ip $dst_ip dst_ip $src_ip \
+ $TC_FLAG action pass
+
+ # Send 100 packets and verify that at least 100 packets hit the rule,
+ # to overcome neighbor discovery noise.
+ PING_COUNT=100 PING_TIMEOUT=11 ping6_do $dev $dst_ip
+ check_err $? "Ping failed"
+
+ tc_check_at_least_x_packets "dev $rp1 egress" 101 100
+ check_err $? "Encapsulated packets did not go through router"
+
+ tc_check_at_least_x_packets "dev $swp1 egress" 101 100
+ check_err $? "Decapsulated packets did not go through switch"
+
+ log_test "ping6: $info"
+
+ tc filter del dev $swp1 egress
+ tc filter del dev $rp1 egress
+}
+
+ping_ipv6()
+{
+ RET=0
+
+ local local_sw_ip=2001:db8:3::1
+ local remote_ns1_ip=2001:db8:4::1
+ local remote_ns2_ip=2001:db8:5::1
+ local h1_10_ip=2001:db8:1::1
+ local h1_20_ip=2001:db8:2::1
+ local w2_10_ns1_ip=2001:db8:1::3
+ local w2_10_ns2_ip=2001:db8:1::4
+ local w2_20_ns1_ip=2001:db8:2::3
+ local w2_20_ns2_ip=2001:db8:2::4
+
+ ping6_test $h1.10 2001:db8:1::2 ": local->local vid 10"
+ ping6_test $h1.20 2001:db8:2::2 ": local->local vid 20"
+
+ __ping_ipv6 $local_sw_ip $remote_ns1_ip $h1_10_ip $w2_10_ns1_ip $h1.10 \
+ "local->remote 1 vid 10"
+ __ping_ipv6 $local_sw_ip $remote_ns2_ip $h1_10_ip $w2_10_ns2_ip $h1.10 \
+ "local->remote 2 vid 10"
+ __ping_ipv6 $local_sw_ip $remote_ns1_ip $h1_20_ip $w2_20_ns1_ip $h1.20 \
+ "local->remote 1 vid 20"
+ __ping_ipv6 $local_sw_ip $remote_ns2_ip $h1_20_ip $w2_20_ns2_ip $h1.20 \
+ "local->remote 2 vid 20"
+}
+
+maybe_in_ns()
+{
+ echo ${1:+in_ns} $1
+}
+
+__flood_counter_add_del()
+{
+ local add_del=$1; shift
+ local dst_ip=$1; shift
+ local dev=$1; shift
+ local ns=$1; shift
+
+ # Putting the ICMP capture both to HW and to SW will end up
+ # double-counting the packets that are trapped to slow path, such as for
+ # the unicast test. Adding either skip_hw or skip_sw fixes this problem,
+ # but with skip_hw, the flooded packets are not counted at all, because
+ # those are dropped due to MAC address mismatch; and skip_sw is a no-go
+ # for veth-based topologies.
+ #
+ # So try to install with skip_sw and fall back to skip_sw if that fails.
+
+ $(maybe_in_ns $ns) tc filter $add_del dev "$dev" ingress \
+ proto ipv6 pref 100 flower dst_ip $dst_ip ip_proto \
+ icmpv6 skip_sw action pass 2>/dev/null || \
+ $(maybe_in_ns $ns) tc filter $add_del dev "$dev" ingress \
+ proto ipv6 pref 100 flower dst_ip $dst_ip ip_proto \
+ icmpv6 skip_hw action pass
+}
+
+flood_counter_install()
+{
+ __flood_counter_add_del add "$@"
+}
+
+flood_counter_uninstall()
+{
+ __flood_counter_add_del del "$@"
+}
+
+flood_fetch_stat()
+{
+ local dev=$1; shift
+ local ns=$1; shift
+
+ $(maybe_in_ns $ns) tc_rule_stats_get $dev 100 ingress
+}
+
+flood_fetch_stats()
+{
+ local counters=("${@}")
+ local counter
+
+ for counter in "${counters[@]}"; do
+ flood_fetch_stat $counter
+ done
+}
+
+vxlan_flood_test()
+{
+ local mac=$1; shift
+ local dst=$1; shift
+ local vid=$1; shift
+ local -a expects=("${@}")
+
+ local -a counters=($h2 "vx10 ns1" "vx20 ns1" "vx10 ns2" "vx20 ns2")
+ local counter
+ local key
+
+ # Packets reach the local host tagged whereas they reach the VxLAN
+ # devices untagged. In order to be able to use the same filter for
+ # all counters, make sure the packets also reach the local host
+ # untagged
+ bridge vlan add vid $vid dev $swp2 untagged
+ for counter in "${counters[@]}"; do
+ flood_counter_install $dst $counter
+ done
+
+ local -a t0s=($(flood_fetch_stats "${counters[@]}"))
+ $MZ -6 $h1 -Q $vid -c 10 -d 100msec -p 64 -b $mac -B $dst -t icmp6 type=128 -q
+ sleep 1
+ local -a t1s=($(flood_fetch_stats "${counters[@]}"))
+
+ for key in ${!t0s[@]}; do
+ local delta=$((t1s[$key] - t0s[$key]))
+ local expect=${expects[$key]}
+
+ ((expect == delta))
+ check_err $? "${counters[$key]}: Expected to capture $expect packets, got $delta."
+ done
+
+ for counter in "${counters[@]}"; do
+ flood_counter_uninstall $dst $counter
+ done
+ bridge vlan add vid $vid dev $swp2
+}
+
+__test_flood()
+{
+ local mac=$1; shift
+ local dst=$1; shift
+ local vid=$1; shift
+ local what=$1; shift
+ local -a expects=("${@}")
+
+ RET=0
+
+ vxlan_flood_test $mac $dst $vid "${expects[@]}"
+
+ log_test "VXLAN: $what"
+}
+
+test_flood()
+{
+ __test_flood de:ad:be:ef:13:37 2001:db8:1::100 10 "flood vlan 10" \
+ 10 10 0 10 0
+ __test_flood ca:fe:be:ef:13:37 2001:db8:2::100 20 "flood vlan 20" \
+ 10 0 10 0 10
+}
+
+vxlan_fdb_add_del()
+{
+ local add_del=$1; shift
+ local vid=$1; shift
+ local mac=$1; shift
+ local dev=$1; shift
+ local dst=$1; shift
+
+ bridge fdb $add_del dev $dev $mac self static permanent \
+ ${dst:+dst} $dst 2>/dev/null
+ bridge fdb $add_del dev $dev $mac master static vlan $vid 2>/dev/null
+}
+
+__test_unicast()
+{
+ local mac=$1; shift
+ local dst=$1; shift
+ local hit_idx=$1; shift
+ local vid=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ local -a expects=(0 0 0 0 0)
+ expects[$hit_idx]=10
+
+ vxlan_flood_test $mac $dst $vid "${expects[@]}"
+
+ log_test "VXLAN: $what"
+}
+
+test_unicast()
+{
+ local -a targets=("$h2_mac $h2"
+ "$r1_mac vx10 2001:db8:4::1"
+ "$r2_mac vx10 2001:db8:5::1")
+ local target
+
+ log_info "unicast vlan 10"
+
+ for target in "${targets[@]}"; do
+ vxlan_fdb_add_del add 10 $target
+ done
+
+ __test_unicast $h2_mac 2001:db8:1::2 0 10 "local MAC unicast"
+ __test_unicast $r1_mac 2001:db8:1::3 1 10 "remote MAC 1 unicast"
+ __test_unicast $r2_mac 2001:db8:1::4 3 10 "remote MAC 2 unicast"
+
+ for target in "${targets[@]}"; do
+ vxlan_fdb_add_del del 10 $target
+ done
+
+ log_info "unicast vlan 20"
+
+ targets=("$h2_mac $h2" "$r1_mac vx20 2001:db8:4::1" \
+ "$r2_mac vx20 2001:db8:5::1")
+
+ for target in "${targets[@]}"; do
+ vxlan_fdb_add_del add 20 $target
+ done
+
+ __test_unicast $h2_mac 2001:db8:2::2 0 20 "local MAC unicast"
+ __test_unicast $r1_mac 2001:db8:2::3 2 20 "remote MAC 1 unicast"
+ __test_unicast $r2_mac 2001:db8:2::4 4 20 "remote MAC 2 unicast"
+
+ for target in "${targets[@]}"; do
+ vxlan_fdb_add_del del 20 $target
+ done
+}
+
+test_pvid()
+{
+ local -a expects=(0 0 0 0 0)
+ local mac=de:ad:be:ef:13:37
+ local dst=2001:db8:1::100
+ local vid=10
+
+ # Check that flooding works
+ RET=0
+
+ expects[0]=10; expects[1]=10; expects[3]=10
+ vxlan_flood_test $mac $dst $vid "${expects[@]}"
+
+ log_test "VXLAN: flood before pvid off"
+
+ # Toggle PVID off and test that flood to remote hosts does not work
+ RET=0
+
+ bridge vlan add vid 10 dev vx10
+
+ expects[0]=10; expects[1]=0; expects[3]=0
+ vxlan_flood_test $mac $dst $vid "${expects[@]}"
+
+ log_test "VXLAN: flood after pvid off"
+
+ # Toggle PVID on and test that flood to remote hosts does work
+ RET=0
+
+ bridge vlan add vid 10 dev vx10 pvid untagged
+
+ expects[0]=10; expects[1]=10; expects[3]=10
+ vxlan_flood_test $mac $dst $vid "${expects[@]}"
+
+ log_test "VXLAN: flood after pvid on"
+
+ # Add a new VLAN and test that it does not affect flooding
+ RET=0
+
+ bridge vlan add vid 30 dev vx10
+
+ expects[0]=10; expects[1]=10; expects[3]=10
+ vxlan_flood_test $mac $dst $vid "${expects[@]}"
+
+ bridge vlan del vid 30 dev vx10
+
+ log_test "VXLAN: flood after vlan add"
+
+ # Remove currently mapped VLAN and test that flood to remote hosts does
+ # not work
+ RET=0
+
+ bridge vlan del vid 10 dev vx10
+
+ expects[0]=10; expects[1]=0; expects[3]=0
+ vxlan_flood_test $mac $dst $vid "${expects[@]}"
+
+ log_test "VXLAN: flood after vlan delete"
+
+ # Re-add the VLAN and test that flood to remote hosts does work
+ RET=0
+
+ bridge vlan add vid 10 dev vx10 pvid untagged
+
+ expects[0]=10; expects[1]=10; expects[3]=10
+ vxlan_flood_test $mac $dst $vid "${expects[@]}"
+
+ log_test "VXLAN: flood after vlan re-add"
+}
+
+test_all()
+{
+ log_info "Running tests with UDP port $VXPORT"
+ tests_run
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+test_all
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_port_8472_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_port_8472_ipv6.sh
new file mode 100755
index 000000000000..344f43ccb755
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_port_8472_ipv6.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# A wrapper to run VXLAN tests with an unusual port number.
+
+VXPORT=8472
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+"
+source vxlan_bridge_1q_ipv6.sh
diff --git a/tools/testing/selftests/net/forwarding/vxlan_symmetric_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_symmetric_ipv6.sh
new file mode 100755
index 000000000000..904633427fd0
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/vxlan_symmetric_ipv6.sh
@@ -0,0 +1,563 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+
+# +--------------------------------+ +-----------------------------+
+# | vrf-h1 | | vrf-h2 |
+# | + $h1 | | + $h2 |
+# | | 2001:db8:1::1/64 | | | 2001:db8:2::1/64 |
+# | | default via 2001:db8:1::3 | | | default via 2001:db8:2::3 |
+# +----|---------------------------+ +-|---------------------------+
+# | |
+# +----|------------------------------------------|---------------------------+
+# | SW | | |
+# | +--|------------------------------------------|-------------------------+ |
+# | | + $swp1 br1 + $swp2 | |
+# | | vid 10 pvid untagged vid 20 pvid untagged | |
+# | | | |
+# | | + vx10 + vx20 | |
+# | | local 2001:db8:3::1 local 2001:db8:3::1 | |
+# | | remote 2001:db8:3::2 remote 2001:db8:3::2 | |
+# | | id 1010 id 1020 | |
+# | | dstport 4789 dstport 4789 | |
+# | | vid 10 pvid untagged vid 20 pvid untagged | |
+# | | | |
+# | | + vx4001 | |
+# | | local 2001:db8:3::1 | |
+# | | remote 2001:db8:3::2 | |
+# | | id 104001 | |
+# | | dstport 4789 | |
+# | | vid 4001 pvid untagged | |
+# | | | |
+# | +-----------------------------------+-----------------------------------+ |
+# | | |
+# | +-----------------------------------|-----------------------------------+ |
+# | | | | |
+# | | +--------------------------------+--------------------------------+ | |
+# | | | | | | |
+# | | + vlan10 | vlan20 + | |
+# | | | 2001:db8:1::2/64 | 2001:db8:2::2/64 | | |
+# | | | | | | |
+# | | + vlan10-v (macvlan) + vlan20-v (macvlan) + | |
+# | | 2001:db8:1::3/64 vlan4001 2001:db8:2::3/64 | |
+# | | 00:00:5e:00:01:01 00:00:5e:00:01:01 | |
+# | | vrf-green | |
+# | +-----------------------------------------------------------------------+ |
+# | |
+# | + $rp1 +lo |
+# | | 2001:db8:4::1/64 2001:db8:3::1 |
+# +----|----------------------------------------------------------------------+
+# |
+# +----|--------------------------------------------------------+
+# | | vrf-spine |
+# | + $rp2 |
+# | 2001:db8:4::2/64 |
+# | | (maybe) HW
+# =============================================================================
+# | | (likely) SW
+# | |
+# | + v1 (veth) |
+# | | 2001:db8:5::2/64 |
+# +----|--------------------------------------------------------+
+# |
+# +----|----------------------------------------------------------------------+
+# | + v2 (veth) +lo NS1 (netns) |
+# | 2001:db8:5::1/64 2001:db8:3::2/128 |
+# | |
+# | +-----------------------------------------------------------------------+ |
+# | | vrf-green | |
+# | | + vlan10-v (macvlan) vlan20-v (macvlan) + | |
+# | | | 2001:db8:1::3/64 2001:db8:2::3/64 | | |
+# | | | 00:00:5e:00:01:01 00:00:5e:00:01:01 | | |
+# | | | vlan4001 | | |
+# | | + vlan10 + vlan20 + | |
+# | | | 2001:db8:1::3/64 | 2001:db8:2::3/64 | | |
+# | | | | | | |
+# | | +--------------------------------+--------------------------------+ | |
+# | | | | |
+# | +-----------------------------------|-----------------------------------+ |
+# | | |
+# | +-----------------------------------+-----------------------------------+ |
+# | | | |
+# | | + vx10 + vx20 | |
+# | | local 2001:db8:3::2 local 2001:db8:3::2 | |
+# | | remote 2001:db8:3::1 remote 2001:db8:3::1 | |
+# | | id 1010 id 1020 | |
+# | | dstport 4789 dstport 4789 | |
+# | | vid 10 pvid untagged vid 20 pvid untagged | |
+# | | | |
+# | | + vx4001 | |
+# | | local 2001:db8:3::2 | |
+# | | remote 2001:db8:3::1 | |
+# | | id 104001 | |
+# | | dstport 4789 | |
+# | | vid 4001 pvid untagged | |
+# | | | |
+# | | + w1 (veth) + w3 (veth) | |
+# | | | vid 10 pvid untagged br1 | vid 20 pvid untagged | |
+# | +--|------------------------------------------|-------------------------+ |
+# | | | |
+# | | | |
+# | +--|----------------------+ +--|-------------------------+ |
+# | | | vrf-h1 | | | vrf-h2 | |
+# | | + w2 (veth) | | + w4 (veth) | |
+# | | 2001:db8:1::4/64 | | 2001:db8:2::4/64 | |
+# | | default via | | default via | |
+# | | 2001:db8:1::3/64 | | 2001:db8:2::3/64 | |
+# | +-------------------------+ +----------------------------+ |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+ ping_ipv6
+"
+NUM_NETIFS=6
+source lib.sh
+
+hx_create()
+{
+ local vrf_name=$1; shift
+ local if_name=$1; shift
+ local ip_addr=$1; shift
+ local gw_ip=$1; shift
+
+ vrf_create $vrf_name
+ ip link set dev $if_name master $vrf_name
+ ip link set dev $vrf_name up
+ ip link set dev $if_name up
+
+ ip address add $ip_addr/64 dev $if_name
+ ip neigh replace $gw_ip lladdr 00:00:5e:00:01:01 nud permanent \
+ dev $if_name
+ ip route add default vrf $vrf_name nexthop via $gw_ip
+}
+export -f hx_create
+
+hx_destroy()
+{
+ local vrf_name=$1; shift
+ local if_name=$1; shift
+ local ip_addr=$1; shift
+ local gw_ip=$1; shift
+
+ ip route del default vrf $vrf_name nexthop via $gw_ip
+ ip neigh del $gw_ip dev $if_name
+ ip address del $ip_addr/64 dev $if_name
+
+ ip link set dev $if_name down
+ vrf_destroy $vrf_name
+}
+
+h1_create()
+{
+ hx_create "vrf-h1" $h1 2001:db8:1::1 2001:db8:1::3
+}
+
+h1_destroy()
+{
+ hx_destroy "vrf-h1" $h1 2001:db8:1::1 2001:db8:1::3
+}
+
+h2_create()
+{
+ hx_create "vrf-h2" $h2 2001:db8:2::1 2001:db8:2::3
+}
+
+h2_destroy()
+{
+ hx_destroy "vrf-h2" $h2 2001:db8:2::1 2001:db8:2::3
+}
+
+switch_create()
+{
+ ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0 \
+ mcast_snooping 0
+ # Make sure the bridge uses the MAC address of the local port and not
+ # that of the VxLAN's device.
+ ip link set dev br1 address $(mac_get $swp1)
+ ip link set dev br1 up
+
+ ip link set dev $rp1 up
+ ip address add dev $rp1 2001:db8:4::1/64
+ ip route add 2001:db8:3::2/128 nexthop via 2001:db8:4::2
+
+ ip link add name vx10 type vxlan id 1010 \
+ local 2001:db8:3::1 remote 2001:db8:3::2 dstport 4789 \
+ nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100
+ ip link set dev vx10 up
+
+ ip link set dev vx10 master br1
+ bridge vlan add vid 10 dev vx10 pvid untagged
+
+ ip link add name vx20 type vxlan id 1020 \
+ local 2001:db8:3::1 remote 2001:db8:3::2 dstport 4789 \
+ nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100
+ ip link set dev vx20 up
+
+ ip link set dev vx20 master br1
+ bridge vlan add vid 20 dev vx20 pvid untagged
+
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+
+ ip link set dev $swp2 master br1
+ ip link set dev $swp2 up
+
+ ip link add name vx4001 type vxlan id 104001 \
+ local 2001:db8:3::1 dstport 4789 \
+ nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100
+ ip link set dev vx4001 up
+
+ ip link set dev vx4001 master br1
+ bridge vlan add vid 4001 dev vx4001 pvid untagged
+
+ ip address add 2001:db8:3::1/128 dev lo
+
+ # Create SVIs
+ vrf_create "vrf-green"
+ ip link set dev vrf-green up
+
+ ip link add link br1 name vlan10 up master vrf-green type vlan id 10
+ ip address add 2001:db8:1::2/64 dev vlan10
+ ip link add link vlan10 name vlan10-v up master vrf-green \
+ address 00:00:5e:00:01:01 type macvlan mode private
+ ip address add 2001:db8:1::3/64 dev vlan10-v
+
+ ip link add link br1 name vlan20 up master vrf-green type vlan id 20
+ ip address add 2001:db8:2::2/64 dev vlan20
+ ip link add link vlan20 name vlan20-v up master vrf-green \
+ address 00:00:5e:00:01:01 type macvlan mode private
+ ip address add 2001:db8:2::3/64 dev vlan20-v
+
+ ip link add link br1 name vlan4001 up master vrf-green \
+ type vlan id 4001
+
+ bridge vlan add vid 10 dev br1 self
+ bridge vlan add vid 20 dev br1 self
+ bridge vlan add vid 4001 dev br1 self
+
+ bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10
+ bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20
+
+ bridge vlan add vid 10 dev $swp1 pvid untagged
+ bridge vlan add vid 20 dev $swp2 pvid untagged
+}
+
+switch_destroy()
+{
+ bridge vlan del vid 20 dev br1 self
+ bridge vlan del vid 10 dev br1 self
+
+ bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 20
+ bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 10
+
+ bridge vlan del vid 4001 dev br1 self
+ ip link del dev vlan4001
+
+ ip link del dev vlan20
+
+ ip link del dev vlan10
+
+ vrf_destroy "vrf-green"
+
+ ip address del 2001:db8:3::1/128 dev lo
+
+ bridge vlan del vid 20 dev $swp2
+ ip link set dev $swp2 down
+ ip link set dev $swp2 nomaster
+
+ bridge vlan del vid 10 dev $swp1
+ ip link set dev $swp1 down
+ ip link set dev $swp1 nomaster
+
+ bridge vlan del vid 4001 dev vx4001
+ ip link set dev vx4001 nomaster
+
+ ip link set dev vx4001 down
+ ip link del dev vx4001
+
+ bridge vlan del vid 20 dev vx20
+ ip link set dev vx20 nomaster
+
+ ip link set dev vx20 down
+ ip link del dev vx20
+
+ bridge vlan del vid 10 dev vx10
+ ip link set dev vx10 nomaster
+
+ ip link set dev vx10 down
+ ip link del dev vx10
+
+ ip route del 2001:db8:3::2 nexthop via 2001:db8:4::2
+ ip address del dev $rp1 2001:db8:4::1/64
+ ip link set dev $rp1 down
+
+ ip link set dev br1 down
+ ip link del dev br1
+}
+
+spine_create()
+{
+ vrf_create "vrf-spine"
+ ip link set dev $rp2 master vrf-spine
+ ip link set dev v1 master vrf-spine
+ ip link set dev vrf-spine up
+ ip link set dev $rp2 up
+ ip link set dev v1 up
+
+ ip address add 2001:db8:4::2/64 dev $rp2
+ ip address add 2001:db8:5::2/64 dev v1
+
+ ip route add 2001:db8:3::1/128 vrf vrf-spine nexthop via \
+ 2001:db8:4::1
+ ip route add 2001:db8:3::2/128 vrf vrf-spine nexthop via \
+ 2001:db8:5::1
+}
+
+spine_destroy()
+{
+ ip route del 2001:db8:3::2/128 vrf vrf-spine nexthop via \
+ 2001:db8:5::1
+ ip route del 2001:db8:3::1/128 vrf vrf-spine nexthop via \
+ 2001:db8:4::1
+
+ ip address del 2001:db8:5::2/64 dev v1
+ ip address del 2001:db8:4::2/64 dev $rp2
+
+ ip link set dev v1 down
+ ip link set dev $rp2 down
+ vrf_destroy "vrf-spine"
+}
+
+ns_h1_create()
+{
+ hx_create "vrf-h1" w2 2001:db8:1::4 2001:db8:1::3
+}
+export -f ns_h1_create
+
+ns_h2_create()
+{
+ hx_create "vrf-h2" w4 2001:db8:2::4 2001:db8:2::3
+}
+export -f ns_h2_create
+
+ns_switch_create()
+{
+ ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0 \
+ mcast_snooping 0
+ ip link set dev br1 up
+
+ ip link set dev v2 up
+ ip address add dev v2 2001:db8:5::1/64
+ ip route add 2001:db8:3::1 nexthop via 2001:db8:5::2
+
+ ip link add name vx10 type vxlan id 1010 \
+ local 2001:db8:3::2 remote 2001:db8:3::1 dstport 4789 \
+ nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100
+ ip link set dev vx10 up
+
+ ip link set dev vx10 master br1
+ bridge vlan add vid 10 dev vx10 pvid untagged
+
+ ip link add name vx20 type vxlan id 1020 \
+ local 2001:db8:3::2 remote 2001:db8:3::1 dstport 4789 \
+ nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100
+ ip link set dev vx20 up
+
+ ip link set dev vx20 master br1
+ bridge vlan add vid 20 dev vx20 pvid untagged
+
+ ip link add name vx4001 type vxlan id 104001 \
+ local 2001:db8:3::2 dstport 4789 \
+ nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100
+ ip link set dev vx4001 up
+
+ ip link set dev vx4001 master br1
+ bridge vlan add vid 4001 dev vx4001 pvid untagged
+
+ ip link set dev w1 master br1
+ ip link set dev w1 up
+ bridge vlan add vid 10 dev w1 pvid untagged
+
+ ip link set dev w3 master br1
+ ip link set dev w3 up
+ bridge vlan add vid 20 dev w3 pvid untagged
+
+ ip address add 2001:db8:3::2/128 dev lo
+
+ # Create SVIs
+ vrf_create "vrf-green"
+ ip link set dev vrf-green up
+
+ ip link add link br1 name vlan10 up master vrf-green type vlan id 10
+ ip address add 2001:db8:1::3/64 dev vlan10
+ ip link add link vlan10 name vlan10-v up master vrf-green \
+ address 00:00:5e:00:01:01 type macvlan mode private
+ ip address add 2001:db8:1::3/64 dev vlan10-v
+
+ ip link add link br1 name vlan20 up master vrf-green type vlan id 20
+ ip address add 2001:db8:2::3/64 dev vlan20
+ ip link add link vlan20 name vlan20-v up master vrf-green \
+ address 00:00:5e:00:01:01 type macvlan mode private
+ ip address add 2001:db8:2::3/64 dev vlan20-v
+
+ ip link add link br1 name vlan4001 up master vrf-green \
+ type vlan id 4001
+
+ bridge vlan add vid 10 dev br1 self
+ bridge vlan add vid 20 dev br1 self
+ bridge vlan add vid 4001 dev br1 self
+
+ bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10
+ bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20
+}
+export -f ns_switch_create
+
+ns_init()
+{
+ ip link add name w1 type veth peer name w2
+ ip link add name w3 type veth peer name w4
+
+ ip link set dev lo up
+
+ ns_h1_create
+ ns_h2_create
+ ns_switch_create
+}
+export -f ns_init
+
+ns1_create()
+{
+ ip netns add ns1
+ ip link set dev v2 netns ns1
+ in_ns ns1 ns_init
+}
+
+ns1_destroy()
+{
+ ip netns exec ns1 ip link set dev v2 netns 1
+ ip netns del ns1
+}
+
+__l2_vni_init()
+{
+ local mac1=$1; shift
+ local mac2=$1; shift
+ local ip1=$1; shift
+ local ip2=$1; shift
+ local dst=$1; shift
+
+ bridge fdb add $mac1 dev vx10 self master extern_learn static \
+ dst $dst vlan 10
+ bridge fdb add $mac2 dev vx20 self master extern_learn static \
+ dst $dst vlan 20
+
+ ip neigh add $ip1 lladdr $mac1 nud noarp dev vlan10 \
+ extern_learn
+ ip neigh add $ip2 lladdr $mac2 nud noarp dev vlan20 \
+ extern_learn
+}
+export -f __l2_vni_init
+
+l2_vni_init()
+{
+ local h1_ns_mac=$(in_ns ns1 mac_get w2)
+ local h2_ns_mac=$(in_ns ns1 mac_get w4)
+ local h1_mac=$(mac_get $h1)
+ local h2_mac=$(mac_get $h2)
+
+ __l2_vni_init $h1_ns_mac $h2_ns_mac 2001:db8:1::4 2001:db8:2::4 \
+ 2001:db8:3::2
+ in_ns ns1 __l2_vni_init $h1_mac $h2_mac 2001:db8:1::1 2001:db8:2::1 \
+ 2001:db8:3::1
+}
+
+__l3_vni_init()
+{
+ local mac=$1; shift
+ local vtep_ip=$1; shift
+ local host1_ip=$1; shift
+ local host2_ip=$1; shift
+
+ bridge fdb add $mac dev vx4001 self master extern_learn static \
+ dst $vtep_ip vlan 4001
+
+ ip neigh add $vtep_ip lladdr $mac nud noarp dev vlan4001 extern_learn
+
+ ip route add $host1_ip/128 vrf vrf-green nexthop via $vtep_ip \
+ dev vlan4001 onlink
+ ip route add $host2_ip/128 vrf vrf-green nexthop via $vtep_ip \
+ dev vlan4001 onlink
+}
+export -f __l3_vni_init
+
+l3_vni_init()
+{
+ local vlan4001_ns_mac=$(in_ns ns1 mac_get vlan4001)
+ local vlan4001_mac=$(mac_get vlan4001)
+
+ __l3_vni_init $vlan4001_ns_mac 2001:db8:3::2 2001:db8:1::4 \
+ 2001:db8:2::4
+ in_ns ns1 __l3_vni_init $vlan4001_mac 2001:db8:3::1 2001:db8:1::1 \
+ 2001:db8:2::1
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ rp1=${NETIFS[p5]}
+ rp2=${NETIFS[p6]}
+
+ vrf_prepare
+ forwarding_enable
+
+ h1_create
+ h2_create
+ switch_create
+
+ ip link add name v1 type veth peer name v2
+ spine_create
+ ns1_create
+ in_ns ns1 forwarding_enable
+
+ l2_vni_init
+ l3_vni_init
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ns1_destroy
+ spine_destroy
+ ip link del dev v1
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+
+ forwarding_restore
+ vrf_cleanup
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:2::1 ": local->local vid 10->vid 20"
+ ping6_test $h1 2001:db8:1::4 ": local->remote vid 10->vid 10"
+ ping6_test $h2 2001:db8:2::4 ": local->remote vid 20->vid 20"
+ ping6_test $h1 2001:db8:2::4 ": local->remote vid 10->vid 20"
+ ping6_test $h2 2001:db8:1::4 ": local->remote vid 20->vid 10"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/net/gro.c
index cf37ce86b0fd..30024d0ed373 100644
--- a/tools/testing/selftests/net/gro.c
+++ b/tools/testing/selftests/net/gro.c
@@ -57,17 +57,14 @@
#include <string.h>
#include <unistd.h>
+#include "../kselftest.h"
+
#define DPORT 8000
#define SPORT 1500
#define PAYLOAD_LEN 100
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
#define NUM_PACKETS 4
#define START_SEQ 100
#define START_ACK 100
-#define SIP6 "fdaa::2"
-#define DIP6 "fdaa::1"
-#define SIP4 "192.168.1.200"
-#define DIP4 "192.168.1.100"
#define ETH_P_NONE 0
#define TOTAL_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr))
#define MSS (4096 - sizeof(struct tcphdr) - sizeof(struct ipv6hdr))
@@ -75,6 +72,10 @@
#define NUM_LARGE_PKT (MAX_PAYLOAD / MSS)
#define MAX_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr))
+static const char *addr6_src = "fdaa::2";
+static const char *addr6_dst = "fdaa::1";
+static const char *addr4_src = "192.168.1.200";
+static const char *addr4_dst = "192.168.1.100";
static int proto = -1;
static uint8_t src_mac[ETH_ALEN], dst_mac[ETH_ALEN];
static char *testname = "data";
@@ -178,18 +179,18 @@ static uint16_t tcp_checksum(void *buf, int payload_len)
uint32_t sum = 0;
if (proto == PF_INET6) {
- if (inet_pton(AF_INET6, SIP6, &ph6.saddr) != 1)
+ if (inet_pton(AF_INET6, addr6_src, &ph6.saddr) != 1)
error(1, errno, "inet_pton6 source ip pseudo");
- if (inet_pton(AF_INET6, DIP6, &ph6.daddr) != 1)
+ if (inet_pton(AF_INET6, addr6_dst, &ph6.daddr) != 1)
error(1, errno, "inet_pton6 dest ip pseudo");
ph6.protocol = htons(IPPROTO_TCP);
ph6.payload_len = htons(sizeof(struct tcphdr) + payload_len);
sum = checksum_nofold(&ph6, sizeof(ph6), 0);
} else if (proto == PF_INET) {
- if (inet_pton(AF_INET, SIP4, &ph4.saddr) != 1)
+ if (inet_pton(AF_INET, addr4_src, &ph4.saddr) != 1)
error(1, errno, "inet_pton source ip pseudo");
- if (inet_pton(AF_INET, DIP4, &ph4.daddr) != 1)
+ if (inet_pton(AF_INET, addr4_dst, &ph4.daddr) != 1)
error(1, errno, "inet_pton dest ip pseudo");
ph4.protocol = htons(IPPROTO_TCP);
ph4.payload_len = htons(sizeof(struct tcphdr) + payload_len);
@@ -229,9 +230,9 @@ static void fill_networklayer(void *buf, int payload_len)
ip6h->payload_len = htons(sizeof(struct tcphdr) + payload_len);
ip6h->nexthdr = IPPROTO_TCP;
ip6h->hop_limit = 8;
- if (inet_pton(AF_INET6, SIP6, &ip6h->saddr) != 1)
+ if (inet_pton(AF_INET6, addr6_src, &ip6h->saddr) != 1)
error(1, errno, "inet_pton source ip6");
- if (inet_pton(AF_INET6, DIP6, &ip6h->daddr) != 1)
+ if (inet_pton(AF_INET6, addr6_dst, &ip6h->daddr) != 1)
error(1, errno, "inet_pton dest ip6");
} else if (proto == PF_INET) {
memset(iph, 0, sizeof(*iph));
@@ -243,9 +244,9 @@ static void fill_networklayer(void *buf, int payload_len)
iph->tot_len = htons(sizeof(struct tcphdr) +
payload_len + sizeof(struct iphdr));
iph->frag_off = htons(0x4000); /* DF = 1, MF = 0 */
- if (inet_pton(AF_INET, SIP4, &iph->saddr) != 1)
+ if (inet_pton(AF_INET, addr4_src, &iph->saddr) != 1)
error(1, errno, "inet_pton source ip");
- if (inet_pton(AF_INET, DIP4, &iph->daddr) != 1)
+ if (inet_pton(AF_INET, addr4_dst, &iph->daddr) != 1)
error(1, errno, "inet_pton dest ip");
iph->check = checksum_fold(buf, sizeof(struct iphdr), 0);
}
@@ -731,7 +732,7 @@ static void set_timeout(int fd)
{
struct timeval timeout;
- timeout.tv_sec = 120;
+ timeout.tv_sec = 3;
timeout.tv_usec = 0;
if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout,
sizeof(timeout)) < 0)
@@ -1023,11 +1024,13 @@ static void gro_receiver(void)
static void parse_args(int argc, char **argv)
{
static const struct option opts[] = {
+ { "daddr", required_argument, NULL, 'd' },
{ "dmac", required_argument, NULL, 'D' },
{ "iface", required_argument, NULL, 'i' },
{ "ipv4", no_argument, NULL, '4' },
{ "ipv6", no_argument, NULL, '6' },
{ "rx", no_argument, NULL, 'r' },
+ { "saddr", required_argument, NULL, 's' },
{ "smac", required_argument, NULL, 'S' },
{ "test", required_argument, NULL, 't' },
{ "verbose", no_argument, NULL, 'v' },
@@ -1035,7 +1038,7 @@ static void parse_args(int argc, char **argv)
};
int c;
- while ((c = getopt_long(argc, argv, "46D:i:rS:t:v", opts, NULL)) != -1) {
+ while ((c = getopt_long(argc, argv, "46d:D:i:rs:S:t:v", opts, NULL)) != -1) {
switch (c) {
case '4':
proto = PF_INET;
@@ -1045,6 +1048,9 @@ static void parse_args(int argc, char **argv)
proto = PF_INET6;
ethhdr_proto = htons(ETH_P_IPV6);
break;
+ case 'd':
+ addr4_dst = addr6_dst = optarg;
+ break;
case 'D':
dmac = optarg;
break;
@@ -1054,6 +1060,9 @@ static void parse_args(int argc, char **argv)
case 'r':
tx_socket = false;
break;
+ case 's':
+ addr4_src = addr6_src = optarg;
+ break;
case 'S':
smac = optarg;
break;
@@ -1091,5 +1100,7 @@ int main(int argc, char **argv)
gro_sender();
else
gro_receiver();
+
+ fprintf(stderr, "Gro::%s test passed.\n", testname);
return 0;
}
diff --git a/tools/testing/selftests/net/icmp_redirect.sh b/tools/testing/selftests/net/icmp_redirect.sh
index ecbf57f264ed..7b9d6e31b8e7 100755
--- a/tools/testing/selftests/net/icmp_redirect.sh
+++ b/tools/testing/selftests/net/icmp_redirect.sh
@@ -311,7 +311,7 @@ check_exception()
ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \
grep -E -v 'mtu|redirected' | grep -q "cache"
fi
- log_test $? 0 "IPv4: ${desc}"
+ log_test $? 0 "IPv4: ${desc}" 0
# No PMTU info for test "redirect" and "mtu exception plus redirect"
if [ "$with_redirect" = "yes" ] && [ "$desc" != "redirect exception plus mtu" ]; then
diff --git a/tools/testing/selftests/net/ioam6_parser.c b/tools/testing/selftests/net/ioam6_parser.c
index 8f6997d35816..d9d1d4190126 100644
--- a/tools/testing/selftests/net/ioam6_parser.c
+++ b/tools/testing/selftests/net/ioam6_parser.c
@@ -240,11 +240,8 @@ static int check_ioam6_data(__u8 **p, struct ioam6_trace_hdr *ioam6h,
*p += sizeof(__u32);
}
- if (ioam6h->type.bit6) {
- if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
- return 1;
+ if (ioam6h->type.bit6)
*p += sizeof(__u32);
- }
if (ioam6h->type.bit7) {
if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c
index 3d7dde2c321b..cc10c10c5ed9 100644
--- a/tools/testing/selftests/net/ipsec.c
+++ b/tools/testing/selftests/net/ipsec.c
@@ -41,7 +41,6 @@
#define pr_err(fmt, ...) printk(fmt ": %m", ##__VA_ARGS__)
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
#define IPV4_STR_SZ 16 /* xxx.xxx.xxx.xxx is longest + \0 */
diff --git a/tools/testing/selftests/net/mptcp/.gitignore b/tools/testing/selftests/net/mptcp/.gitignore
index 7569d892967a..49daae73c41e 100644
--- a/tools/testing/selftests/net/mptcp/.gitignore
+++ b/tools/testing/selftests/net/mptcp/.gitignore
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
mptcp_connect
+mptcp_inq
mptcp_sockopt
pm_nl_ctl
*.pcap
diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile
index bbf4e448bad9..0356c4501c99 100644
--- a/tools/testing/selftests/net/mptcp/Makefile
+++ b/tools/testing/selftests/net/mptcp/Makefile
@@ -8,7 +8,7 @@ CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include
TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \
simult_flows.sh mptcp_sockopt.sh
-TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt
+TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq
TEST_FILES := settings
diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config
index 0faaccd21447..d36b7da5082a 100644
--- a/tools/testing/selftests/net/mptcp/config
+++ b/tools/testing/selftests/net/mptcp/config
@@ -9,9 +9,13 @@ CONFIG_NETFILTER=y
CONFIG_NETFILTER_ADVANCED=y
CONFIG_NETFILTER_NETLINK=m
CONFIG_NF_TABLES=m
-CONFIG_NFT_COUNTER=m
CONFIG_NFT_COMPAT=m
CONFIG_NETFILTER_XTABLES=m
CONFIG_NETFILTER_XT_MATCH_BPF=m
-CONFIG_NF_TABLES_IPV4=y
-CONFIG_NF_TABLES_IPV6=y
+CONFIG_NF_TABLES_INET=y
+CONFIG_NFT_TPROXY=m
+CONFIG_NFT_SOCKET=m
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IPV6_MULTIPLE_TABLES=y
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index 95e81d557b08..8628aa61b763 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -16,6 +16,7 @@
#include <unistd.h>
#include <time.h>
+#include <sys/ioctl.h>
#include <sys/poll.h>
#include <sys/sendfile.h>
#include <sys/stat.h>
@@ -28,6 +29,7 @@
#include <linux/tcp.h>
#include <linux/time_types.h>
+#include <linux/sockios.h>
extern int optind;
@@ -59,7 +61,6 @@ static enum cfg_peek cfg_peek = CFG_NONE_PEEK;
static const char *cfg_host;
static const char *cfg_port = "12000";
static int cfg_sock_proto = IPPROTO_MPTCP;
-static bool tcpulp_audit;
static int pf = AF_INET;
static int cfg_sndbuf;
static int cfg_rcvbuf;
@@ -69,32 +70,56 @@ static unsigned int cfg_time;
static unsigned int cfg_do_w;
static int cfg_wait;
static uint32_t cfg_mark;
+static char *cfg_input;
+static int cfg_repeat = 1;
struct cfg_cmsg_types {
unsigned int cmsg_enabled:1;
unsigned int timestampns:1;
+ unsigned int tcp_inq:1;
};
+struct cfg_sockopt_types {
+ unsigned int transparent:1;
+};
+
+struct tcp_inq_state {
+ unsigned int last;
+ bool expect_eof;
+};
+
+static struct tcp_inq_state tcp_inq;
+
static struct cfg_cmsg_types cfg_cmsg_types;
+static struct cfg_sockopt_types cfg_sockopt_types;
static void die_usage(void)
{
- fprintf(stderr, "Usage: mptcp_connect [-6] [-u] [-s MPTCP|TCP] [-p port] [-m mode]"
- "[-l] [-w sec] [-t num] [-T num] connect_address\n");
+ fprintf(stderr, "Usage: mptcp_connect [-6] [-c cmsg] [-i file] [-I num] [-j] [-l] "
+ "[-m mode] [-M mark] [-o option] [-p port] [-P mode] [-j] [-l] [-r num] "
+ "[-s MPTCP|TCP] [-S num] [-r num] [-t num] [-T num] [-u] [-w sec] connect_address\n");
fprintf(stderr, "\t-6 use ipv6\n");
+ fprintf(stderr, "\t-c cmsg -- test cmsg type <cmsg>\n");
+ fprintf(stderr, "\t-i file -- read the data to send from the given file instead of stdin");
+ fprintf(stderr, "\t-I num -- repeat the transfer 'num' times. In listen mode accepts num "
+ "incoming connections, in client mode, disconnect and reconnect to the server\n");
+ fprintf(stderr, "\t-j -- add additional sleep at connection start and tear down "
+ "-- for MPJ tests\n");
+ fprintf(stderr, "\t-l -- listens mode, accepts incoming connection\n");
+ fprintf(stderr, "\t-m [poll|mmap|sendfile] -- use poll(default)/mmap+write/sendfile\n");
+ fprintf(stderr, "\t-M mark -- set socket packet mark\n");
+ fprintf(stderr, "\t-o option -- test sockopt <option>\n");
+ fprintf(stderr, "\t-p num -- use port num\n");
+ fprintf(stderr,
+ "\t-P [saveWithPeek|saveAfterPeek] -- save data with/after MSG_PEEK form tcp socket\n");
fprintf(stderr, "\t-t num -- set poll timeout to num\n");
fprintf(stderr, "\t-T num -- set expected runtime to num ms\n");
- fprintf(stderr, "\t-S num -- set SO_SNDBUF to num\n");
+ fprintf(stderr, "\t-r num -- enable slow mode, limiting each write to num bytes "
+ "-- for remove addr tests\n");
fprintf(stderr, "\t-R num -- set SO_RCVBUF to num\n");
- fprintf(stderr, "\t-p num -- use port num\n");
fprintf(stderr, "\t-s [MPTCP|TCP] -- use mptcp(default) or tcp sockets\n");
- fprintf(stderr, "\t-m [poll|mmap|sendfile] -- use poll(default)/mmap+write/sendfile\n");
- fprintf(stderr, "\t-M mark -- set socket packet mark\n");
- fprintf(stderr, "\t-u -- check mptcp ulp\n");
+ fprintf(stderr, "\t-S num -- set SO_SNDBUF to num\n");
fprintf(stderr, "\t-w num -- wait num sec before closing the socket\n");
- fprintf(stderr, "\t-c cmsg -- test cmsg type <cmsg>\n");
- fprintf(stderr,
- "\t-P [saveWithPeek|saveAfterPeek] -- save data with/after MSG_PEEK form tcp socket\n");
exit(1);
}
@@ -185,6 +210,58 @@ static void set_mark(int fd, uint32_t mark)
}
}
+static void set_transparent(int fd, int pf)
+{
+ int one = 1;
+
+ switch (pf) {
+ case AF_INET:
+ if (-1 == setsockopt(fd, SOL_IP, IP_TRANSPARENT, &one, sizeof(one)))
+ perror("IP_TRANSPARENT");
+ break;
+ case AF_INET6:
+ if (-1 == setsockopt(fd, IPPROTO_IPV6, IPV6_TRANSPARENT, &one, sizeof(one)))
+ perror("IPV6_TRANSPARENT");
+ break;
+ }
+}
+
+static int do_ulp_so(int sock, const char *name)
+{
+ return setsockopt(sock, IPPROTO_TCP, TCP_ULP, name, strlen(name));
+}
+
+#define X(m) xerror("%s:%u: %s: failed for proto %d at line %u", __FILE__, __LINE__, (m), proto, line)
+static void sock_test_tcpulp(int sock, int proto, unsigned int line)
+{
+ socklen_t buflen = 8;
+ char buf[8] = "";
+ int ret = getsockopt(sock, IPPROTO_TCP, TCP_ULP, buf, &buflen);
+
+ if (ret != 0)
+ X("getsockopt");
+
+ if (buflen > 0) {
+ if (strcmp(buf, "mptcp") != 0)
+ xerror("unexpected ULP '%s' for proto %d at line %u", buf, proto, line);
+ ret = do_ulp_so(sock, "tls");
+ if (ret == 0)
+ X("setsockopt");
+ } else if (proto == IPPROTO_MPTCP) {
+ ret = do_ulp_so(sock, "tls");
+ if (ret != -1)
+ X("setsockopt");
+ }
+
+ ret = do_ulp_so(sock, "mptcp");
+ if (ret != -1)
+ X("setsockopt");
+
+#undef X
+}
+
+#define SOCK_TEST_TCPULP(s, p) sock_test_tcpulp((s), (p), __LINE__)
+
static int sock_listen_mptcp(const char * const listenaddr,
const char * const port)
{
@@ -208,10 +285,15 @@ static int sock_listen_mptcp(const char * const listenaddr,
if (sock < 0)
continue;
+ SOCK_TEST_TCPULP(sock, cfg_sock_proto);
+
if (-1 == setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &one,
sizeof(one)))
perror("setsockopt");
+ if (cfg_sockopt_types.transparent)
+ set_transparent(sock, pf);
+
if (bind(sock, a->ai_addr, a->ai_addrlen) == 0)
break; /* success */
@@ -227,54 +309,22 @@ static int sock_listen_mptcp(const char * const listenaddr,
return sock;
}
+ SOCK_TEST_TCPULP(sock, cfg_sock_proto);
+
if (listen(sock, 20)) {
perror("listen");
close(sock);
return -1;
}
- return sock;
-}
+ SOCK_TEST_TCPULP(sock, cfg_sock_proto);
-static bool sock_test_tcpulp(const char * const remoteaddr,
- const char * const port)
-{
- struct addrinfo hints = {
- .ai_protocol = IPPROTO_TCP,
- .ai_socktype = SOCK_STREAM,
- };
- struct addrinfo *a, *addr;
- int sock = -1, ret = 0;
- bool test_pass = false;
-
- hints.ai_family = AF_INET;
-
- xgetaddrinfo(remoteaddr, port, &hints, &addr);
- for (a = addr; a; a = a->ai_next) {
- sock = socket(a->ai_family, a->ai_socktype, IPPROTO_TCP);
- if (sock < 0) {
- perror("socket");
- continue;
- }
- ret = setsockopt(sock, IPPROTO_TCP, TCP_ULP, "mptcp",
- sizeof("mptcp"));
- if (ret == -1 && errno == EOPNOTSUPP)
- test_pass = true;
- close(sock);
-
- if (test_pass)
- break;
- if (!ret)
- fprintf(stderr,
- "setsockopt(TCP_ULP) returned 0\n");
- else
- perror("setsockopt(TCP_ULP)");
- }
- return test_pass;
+ return sock;
}
static int sock_connect_mptcp(const char * const remoteaddr,
- const char * const port, int proto)
+ const char * const port, int proto,
+ struct addrinfo **peer)
{
struct addrinfo hints = {
.ai_protocol = IPPROTO_TCP,
@@ -293,11 +343,15 @@ static int sock_connect_mptcp(const char * const remoteaddr,
continue;
}
+ SOCK_TEST_TCPULP(sock, proto);
+
if (cfg_mark)
set_mark(sock, cfg_mark);
- if (connect(sock, a->ai_addr, a->ai_addrlen) == 0)
+ if (connect(sock, a->ai_addr, a->ai_addrlen) == 0) {
+ *peer = a;
break; /* success */
+ }
perror("connect()");
close(sock);
@@ -305,6 +359,8 @@ static int sock_connect_mptcp(const char * const remoteaddr,
}
freeaddrinfo(addr);
+ if (sock != -1)
+ SOCK_TEST_TCPULP(sock, proto);
return sock;
}
@@ -364,7 +420,9 @@ static size_t do_write(const int fd, char *buf, const size_t len)
static void process_cmsg(struct msghdr *msgh)
{
struct __kernel_timespec ts;
+ bool inq_found = false;
bool ts_found = false;
+ unsigned int inq = 0;
struct cmsghdr *cmsg;
for (cmsg = CMSG_FIRSTHDR(msgh); cmsg ; cmsg = CMSG_NXTHDR(msgh, cmsg)) {
@@ -373,12 +431,27 @@ static void process_cmsg(struct msghdr *msgh)
ts_found = true;
continue;
}
+ if (cmsg->cmsg_level == IPPROTO_TCP && cmsg->cmsg_type == TCP_CM_INQ) {
+ memcpy(&inq, CMSG_DATA(cmsg), sizeof(inq));
+ inq_found = true;
+ continue;
+ }
+
}
if (cfg_cmsg_types.timestampns) {
if (!ts_found)
xerror("TIMESTAMPNS not present\n");
}
+
+ if (cfg_cmsg_types.tcp_inq) {
+ if (!inq_found)
+ xerror("TCP_INQ not present\n");
+
+ if (inq > 1024)
+ xerror("tcp_inq %u is larger than one kbyte\n", inq);
+ tcp_inq.last = inq;
+ }
}
static ssize_t do_recvmsg_cmsg(const int fd, char *buf, const size_t len)
@@ -395,10 +468,23 @@ static ssize_t do_recvmsg_cmsg(const int fd, char *buf, const size_t len)
.msg_controllen = sizeof(msg_buf),
};
int flags = 0;
+ unsigned int last_hint = tcp_inq.last;
int ret = recvmsg(fd, &msg, flags);
- if (ret <= 0)
+ if (ret <= 0) {
+ if (ret == 0 && tcp_inq.expect_eof)
+ return ret;
+
+ if (ret == 0 && cfg_cmsg_types.tcp_inq)
+ if (last_hint != 1 && last_hint != 0)
+ xerror("EOF but last tcp_inq hint was %u\n", last_hint);
+
return ret;
+ }
+
+ if (tcp_inq.expect_eof)
+ xerror("expected EOF, last_hint %u, now %u\n",
+ last_hint, tcp_inq.last);
if (msg.msg_controllen && !cfg_cmsg_types.cmsg_enabled)
xerror("got %lu bytes of cmsg data, expected 0\n",
@@ -410,6 +496,19 @@ static ssize_t do_recvmsg_cmsg(const int fd, char *buf, const size_t len)
if (msg.msg_controllen)
process_cmsg(&msg);
+ if (cfg_cmsg_types.tcp_inq) {
+ if ((size_t)ret < len && last_hint > (unsigned int)ret) {
+ if (ret + 1 != (int)last_hint) {
+ int next = read(fd, msg_buf, sizeof(msg_buf));
+
+ xerror("read %u of %u, last_hint was %u tcp_inq hint now %u next_read returned %d/%m\n",
+ ret, (unsigned int)len, last_hint, tcp_inq.last, next);
+ } else {
+ tcp_inq.expect_eof = true;
+ }
+ }
+ }
+
return ret;
}
@@ -441,14 +540,17 @@ static ssize_t do_rnd_read(const int fd, char *buf, const size_t len)
return ret;
}
-static void set_nonblock(int fd)
+static void set_nonblock(int fd, bool nonblock)
{
int flags = fcntl(fd, F_GETFL);
if (flags == -1)
return;
- fcntl(fd, F_SETFL, flags | O_NONBLOCK);
+ if (nonblock)
+ fcntl(fd, F_SETFL, flags | O_NONBLOCK);
+ else
+ fcntl(fd, F_SETFL, flags & ~O_NONBLOCK);
}
static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after_out)
@@ -460,7 +562,7 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after
unsigned int woff = 0, wlen = 0;
char wbuf[8192];
- set_nonblock(peerfd);
+ set_nonblock(peerfd, true);
for (;;) {
char rbuf[8192];
@@ -555,7 +657,6 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after
if (cfg_remove)
usleep(cfg_wait);
- close(peerfd);
return 0;
}
@@ -697,7 +798,7 @@ static int copyfd_io_sendfile(int infd, int peerfd, int outfd,
return err;
}
-static int copyfd_io(int infd, int peerfd, int outfd)
+static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd)
{
bool in_closed_after_out = false;
struct timespec start, end;
@@ -736,6 +837,9 @@ static int copyfd_io(int infd, int peerfd, int outfd)
if (ret)
return ret;
+ if (close_peerfd)
+ close(peerfd);
+
if (cfg_time) {
unsigned int delta_ms;
@@ -847,7 +951,7 @@ static void maybe_close(int fd)
{
unsigned int r = rand();
- if (!(cfg_join || cfg_remove) && (r & 1))
+ if (!(cfg_join || cfg_remove || cfg_repeat > 1) && (r & 1))
close(fd);
}
@@ -857,7 +961,9 @@ int main_loop_s(int listensock)
struct pollfd polls;
socklen_t salen;
int remotesock;
+ int fd = 0;
+again:
polls.fd = listensock;
polls.events = POLLIN;
@@ -878,12 +984,27 @@ int main_loop_s(int listensock)
check_sockaddr(pf, &ss, salen);
check_getpeername(remotesock, &ss, salen);
- return copyfd_io(0, remotesock, 1);
+ if (cfg_input) {
+ fd = open(cfg_input, O_RDONLY);
+ if (fd < 0)
+ xerror("can't open %s: %d", cfg_input, errno);
+ }
+
+ SOCK_TEST_TCPULP(remotesock, 0);
+
+ copyfd_io(fd, remotesock, 1, true);
+ } else {
+ perror("accept");
+ return 1;
}
- perror("accept");
+ if (--cfg_repeat > 0) {
+ if (cfg_input)
+ close(fd);
+ goto again;
+ }
- return 1;
+ return 0;
}
static void init_rng(void)
@@ -919,6 +1040,8 @@ static void apply_cmsg_types(int fd, const struct cfg_cmsg_types *cmsg)
if (cmsg->timestampns)
xsetsockopt(fd, SOL_SOCKET, SO_TIMESTAMPNS_NEW, &on, sizeof(on));
+ if (cmsg->tcp_inq)
+ xsetsockopt(fd, IPPROTO_TCP, TCP_INQ, &on, sizeof(on));
}
static void parse_cmsg_types(const char *type)
@@ -940,21 +1063,81 @@ static void parse_cmsg_types(const char *type)
return;
}
+ if (strncmp(type, "TCPINQ", len) == 0) {
+ cfg_cmsg_types.tcp_inq = 1;
+ return;
+ }
+
fprintf(stderr, "Unrecognized cmsg option %s\n", type);
exit(1);
}
+static void parse_setsock_options(const char *name)
+{
+ char *next = strchr(name, ',');
+ unsigned int len = 0;
+
+ if (next) {
+ parse_setsock_options(next + 1);
+ len = next - name;
+ } else {
+ len = strlen(name);
+ }
+
+ if (strncmp(name, "TRANSPARENT", len) == 0) {
+ cfg_sockopt_types.transparent = 1;
+ return;
+ }
+
+ fprintf(stderr, "Unrecognized setsockopt option %s\n", name);
+ exit(1);
+}
+
+void xdisconnect(int fd, int addrlen)
+{
+ struct sockaddr_storage empty;
+ int msec_sleep = 10;
+ int queued = 1;
+ int i;
+
+ shutdown(fd, SHUT_WR);
+
+ /* while until the pending data is completely flushed, the later
+ * disconnect will bypass/ignore/drop any pending data.
+ */
+ for (i = 0; ; i += msec_sleep) {
+ if (ioctl(fd, SIOCOUTQ, &queued) < 0)
+ xerror("can't query out socket queue: %d", errno);
+
+ if (!queued)
+ break;
+
+ if (i > poll_timeout)
+ xerror("timeout while waiting for spool to complete");
+ usleep(msec_sleep * 1000);
+ }
+
+ memset(&empty, 0, sizeof(empty));
+ empty.ss_family = AF_UNSPEC;
+ if (connect(fd, (struct sockaddr *)&empty, addrlen) < 0)
+ xerror("can't disconnect: %d", errno);
+}
+
int main_loop(void)
{
- int fd;
+ int fd, ret, fd_in = 0;
+ struct addrinfo *peer;
/* listener is ready. */
- fd = sock_connect_mptcp(cfg_host, cfg_port, cfg_sock_proto);
+ fd = sock_connect_mptcp(cfg_host, cfg_port, cfg_sock_proto, &peer);
if (fd < 0)
return 2;
+again:
check_getpeername_connect(fd);
+ SOCK_TEST_TCPULP(fd, cfg_sock_proto);
+
if (cfg_rcvbuf)
set_rcvbuf(fd, cfg_rcvbuf);
if (cfg_sndbuf)
@@ -962,7 +1145,31 @@ int main_loop(void)
if (cfg_cmsg_types.cmsg_enabled)
apply_cmsg_types(fd, &cfg_cmsg_types);
- return copyfd_io(0, fd, 1);
+ if (cfg_input) {
+ fd_in = open(cfg_input, O_RDONLY);
+ if (fd < 0)
+ xerror("can't open %s:%d", cfg_input, errno);
+ }
+
+ /* close the client socket open only if we are not going to reconnect */
+ ret = copyfd_io(fd_in, fd, 1, cfg_repeat == 1);
+ if (ret)
+ return ret;
+
+ if (--cfg_repeat > 0) {
+ xdisconnect(fd, peer->ai_addrlen);
+
+ /* the socket could be unblocking at this point, we need the
+ * connect to be blocking
+ */
+ set_nonblock(fd, false);
+ if (connect(fd, peer->ai_addr, peer->ai_addrlen))
+ xerror("can't reconnect: %d", errno);
+ if (cfg_input)
+ close(fd_in);
+ goto again;
+ }
+ return 0;
}
int parse_proto(const char *proto)
@@ -1047,7 +1254,7 @@ static void parse_opts(int argc, char **argv)
{
int c;
- while ((c = getopt(argc, argv, "6jr:lp:s:hut:T:m:S:R:w:M:P:c:")) != -1) {
+ while ((c = getopt(argc, argv, "6c:hi:I:jlm:M:o:p:P:r:R:s:S:t:T:w:")) != -1) {
switch (c) {
case 'j':
cfg_join = true;
@@ -1061,6 +1268,12 @@ static void parse_opts(int argc, char **argv)
if (cfg_do_w <= 0)
cfg_do_w = 50;
break;
+ case 'i':
+ cfg_input = optarg;
+ break;
+ case 'I':
+ cfg_repeat = atoi(optarg);
+ break;
case 'l':
listen_mode = true;
break;
@@ -1073,9 +1286,6 @@ static void parse_opts(int argc, char **argv)
case 'h':
die_usage();
break;
- case 'u':
- tcpulp_audit = true;
- break;
case '6':
pf = AF_INET6;
break;
@@ -1108,6 +1318,9 @@ static void parse_opts(int argc, char **argv)
case 'c':
parse_cmsg_types(optarg);
break;
+ case 'o':
+ parse_setsock_options(optarg);
+ break;
}
}
@@ -1126,9 +1339,6 @@ int main(int argc, char *argv[])
signal(SIGUSR1, handle_signal);
parse_opts(argc, argv);
- if (tcpulp_audit)
- return sock_test_tcpulp(cfg_host, cfg_port) ? 0 : 1;
-
if (listen_mode) {
int fd = sock_listen_mptcp(cfg_host, cfg_port);
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 559173a8e387..cb5809b89081 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -7,6 +7,7 @@ optstring="S:R:d:e:l:r:h4cm:f:tC"
ret=0
sin=""
sout=""
+cin_disconnect=""
cin=""
cout=""
ksft_skip=4
@@ -24,6 +25,7 @@ options_log=true
do_tcp=0
checksum=false
filesize=0
+connect_per_transfer=1
if [ $tc_loss -eq 100 ];then
tc_loss=1%
@@ -127,6 +129,7 @@ TEST_COUNT=0
cleanup()
{
+ rm -f "$cin_disconnect" "$cout_disconnect"
rm -f "$cin" "$cout"
rm -f "$sin" "$sout"
rm -f "$capout"
@@ -149,6 +152,8 @@ sout=$(mktemp)
cin=$(mktemp)
cout=$(mktemp)
capout=$(mktemp)
+cin_disconnect="$cin".disconnect
+cout_disconnect="$cout".disconnect
trap cleanup EXIT
for i in "$ns1" "$ns2" "$ns3" "$ns4";do
@@ -296,24 +301,6 @@ check_mptcp_disabled()
return 0
}
-check_mptcp_ulp_setsockopt()
-{
- local t retval
- t="ns_ulp-$sech-$(mktemp -u XXXXXX)"
-
- ip netns add ${t} || exit $ksft_skip
- if ! ip netns exec ${t} ./mptcp_connect -u -p 10000 -s TCP 127.0.0.1 2>&1; then
- printf "setsockopt(..., TCP_ULP, \"mptcp\", ...) allowed\t[ FAIL ]\n"
- retval=1
- ret=$retval
- else
- printf "setsockopt(..., TCP_ULP, \"mptcp\", ...) blocked\t[ OK ]\n"
- retval=0
- fi
- ip netns del ${t}
- return $retval
-}
-
# $1: IP address
is_v6()
{
@@ -518,8 +505,8 @@ do_transfer()
cookies=${cookies##*=}
if [ ${cl_proto} = "MPTCP" ] && [ ${srv_proto} = "MPTCP" ]; then
- expect_synrx=$((stat_synrx_last_l+1))
- expect_ackrx=$((stat_ackrx_last_l+1))
+ expect_synrx=$((stat_synrx_last_l+$connect_per_transfer))
+ expect_ackrx=$((stat_ackrx_last_l+$connect_per_transfer))
fi
if [ ${stat_synrx_now_l} -lt ${expect_synrx} ]; then
@@ -671,6 +658,82 @@ run_tests()
run_tests_lo $1 $2 $3 0
}
+run_test_transparent()
+{
+ local connect_addr="$1"
+ local msg="$2"
+
+ local connector_ns="$ns1"
+ local listener_ns="$ns2"
+ local lret=0
+ local r6flag=""
+
+ # skip if we don't want v6
+ if ! $ipv6 && is_v6 "${connect_addr}"; then
+ return 0
+ fi
+
+ip netns exec "$listener_ns" nft -f /dev/stdin <<"EOF"
+flush ruleset
+table inet mangle {
+ chain divert {
+ type filter hook prerouting priority -150;
+
+ meta l4proto tcp socket transparent 1 meta mark set 1 accept
+ tcp dport 20000 tproxy to :20000 meta mark set 1 accept
+ }
+}
+EOF
+ if [ $? -ne 0 ]; then
+ echo "SKIP: $msg, could not load nft ruleset"
+ return
+ fi
+
+ local local_addr
+ if is_v6 "${connect_addr}"; then
+ local_addr="::"
+ r6flag="-6"
+ else
+ local_addr="0.0.0.0"
+ fi
+
+ ip -net "$listener_ns" $r6flag rule add fwmark 1 lookup 100
+ if [ $? -ne 0 ]; then
+ ip netns exec "$listener_ns" nft flush ruleset
+ echo "SKIP: $msg, ip $r6flag rule failed"
+ return
+ fi
+
+ ip -net "$listener_ns" route add local $local_addr/0 dev lo table 100
+ if [ $? -ne 0 ]; then
+ ip netns exec "$listener_ns" nft flush ruleset
+ ip -net "$listener_ns" $r6flag rule del fwmark 1 lookup 100
+ echo "SKIP: $msg, ip route add local $local_addr failed"
+ return
+ fi
+
+ echo "INFO: test $msg"
+
+ TEST_COUNT=10000
+ local extra_args="-o TRANSPARENT"
+ do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP \
+ ${connect_addr} ${local_addr} "${extra_args}"
+ lret=$?
+
+ ip netns exec "$listener_ns" nft flush ruleset
+ ip -net "$listener_ns" $r6flag rule del fwmark 1 lookup 100
+ ip -net "$listener_ns" route del local $local_addr/0 dev lo table 100
+
+ if [ $lret -ne 0 ]; then
+ echo "FAIL: $msg, mptcp connection error" 1>&2
+ ret=$lret
+ return 1
+ fi
+
+ echo "PASS: $msg"
+ return 0
+}
+
run_tests_peekmode()
{
local peekmode="$1"
@@ -680,6 +743,33 @@ run_tests_peekmode()
run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 "-P ${peekmode}"
}
+run_tests_disconnect()
+{
+ local peekmode="$1"
+ local old_cin=$cin
+ local old_sin=$sin
+
+ cat $cin $cin $cin > "$cin".disconnect
+
+ # force do_transfer to cope with the multiple tranmissions
+ sin="$cin.disconnect"
+ sin_disconnect=$old_sin
+ cin="$cin.disconnect"
+ cin_disconnect="$old_cin"
+ connect_per_transfer=3
+
+ echo "INFO: disconnect"
+ run_tests_lo "$ns1" "$ns1" 10.0.1.1 1 "-I 3 -i $old_cin"
+ run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 "-I 3 -i $old_cin"
+
+ # restore previous status
+ cout=$old_cout
+ cout_disconnect="$cout".disconnect
+ cin=$old_cin
+ cin_disconnect="$cin".disconnect
+ connect_per_transfer=1
+}
+
display_time()
{
time_end=$(date +%s)
@@ -704,8 +794,6 @@ make_file "$sin" "server"
check_mptcp_disabled
-check_mptcp_ulp_setsockopt
-
stop_if_error "The kernel configuration is not valid for MPTCP"
echo "INFO: validating network environment with pings"
@@ -794,5 +882,12 @@ run_tests_peekmode "saveWithPeek"
run_tests_peekmode "saveAfterPeek"
stop_if_error "Tests with peek mode have failed"
+# connect to ns4 ip address, ns2 should intercept/proxy
+run_test_transparent 10.0.3.1 "tproxy ipv4"
+run_test_transparent dead:beef:3::1 "tproxy ipv6"
+stop_if_error "Tests with tproxy have failed"
+
+run_tests_disconnect
+
display_time
exit $ret
diff --git a/tools/testing/selftests/net/mptcp/mptcp_inq.c b/tools/testing/selftests/net/mptcp/mptcp_inq.c
new file mode 100644
index 000000000000..29f75e2a1116
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/mptcp_inq.c
@@ -0,0 +1,602 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <string.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <unistd.h>
+#include <time.h>
+
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include <netdb.h>
+#include <netinet/in.h>
+
+#include <linux/tcp.h>
+#include <linux/sockios.h>
+
+#ifndef IPPROTO_MPTCP
+#define IPPROTO_MPTCP 262
+#endif
+#ifndef SOL_MPTCP
+#define SOL_MPTCP 284
+#endif
+
+static int pf = AF_INET;
+static int proto_tx = IPPROTO_MPTCP;
+static int proto_rx = IPPROTO_MPTCP;
+
+static void die_perror(const char *msg)
+{
+ perror(msg);
+ exit(1);
+}
+
+static void die_usage(int r)
+{
+ fprintf(stderr, "Usage: mptcp_inq [-6] [ -t tcp|mptcp ] [ -r tcp|mptcp]\n");
+ exit(r);
+}
+
+static void xerror(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ fputc('\n', stderr);
+ exit(1);
+}
+
+static const char *getxinfo_strerr(int err)
+{
+ if (err == EAI_SYSTEM)
+ return strerror(errno);
+
+ return gai_strerror(err);
+}
+
+static void xgetaddrinfo(const char *node, const char *service,
+ const struct addrinfo *hints,
+ struct addrinfo **res)
+{
+ int err = getaddrinfo(node, service, hints, res);
+
+ if (err) {
+ const char *errstr = getxinfo_strerr(err);
+
+ fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n",
+ node ? node : "", service ? service : "", errstr);
+ exit(1);
+ }
+}
+
+static int sock_listen_mptcp(const char * const listenaddr,
+ const char * const port)
+{
+ int sock;
+ struct addrinfo hints = {
+ .ai_protocol = IPPROTO_TCP,
+ .ai_socktype = SOCK_STREAM,
+ .ai_flags = AI_PASSIVE | AI_NUMERICHOST
+ };
+
+ hints.ai_family = pf;
+
+ struct addrinfo *a, *addr;
+ int one = 1;
+
+ xgetaddrinfo(listenaddr, port, &hints, &addr);
+ hints.ai_family = pf;
+
+ for (a = addr; a; a = a->ai_next) {
+ sock = socket(a->ai_family, a->ai_socktype, proto_rx);
+ if (sock < 0)
+ continue;
+
+ if (-1 == setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &one,
+ sizeof(one)))
+ perror("setsockopt");
+
+ if (bind(sock, a->ai_addr, a->ai_addrlen) == 0)
+ break; /* success */
+
+ perror("bind");
+ close(sock);
+ sock = -1;
+ }
+
+ freeaddrinfo(addr);
+
+ if (sock < 0)
+ xerror("could not create listen socket");
+
+ if (listen(sock, 20))
+ die_perror("listen");
+
+ return sock;
+}
+
+static int sock_connect_mptcp(const char * const remoteaddr,
+ const char * const port, int proto)
+{
+ struct addrinfo hints = {
+ .ai_protocol = IPPROTO_TCP,
+ .ai_socktype = SOCK_STREAM,
+ };
+ struct addrinfo *a, *addr;
+ int sock = -1;
+
+ hints.ai_family = pf;
+
+ xgetaddrinfo(remoteaddr, port, &hints, &addr);
+ for (a = addr; a; a = a->ai_next) {
+ sock = socket(a->ai_family, a->ai_socktype, proto);
+ if (sock < 0)
+ continue;
+
+ if (connect(sock, a->ai_addr, a->ai_addrlen) == 0)
+ break; /* success */
+
+ die_perror("connect");
+ }
+
+ if (sock < 0)
+ xerror("could not create connect socket");
+
+ freeaddrinfo(addr);
+ return sock;
+}
+
+static int protostr_to_num(const char *s)
+{
+ if (strcasecmp(s, "tcp") == 0)
+ return IPPROTO_TCP;
+ if (strcasecmp(s, "mptcp") == 0)
+ return IPPROTO_MPTCP;
+
+ die_usage(1);
+ return 0;
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ int c;
+
+ while ((c = getopt(argc, argv, "h6t:r:")) != -1) {
+ switch (c) {
+ case 'h':
+ die_usage(0);
+ break;
+ case '6':
+ pf = AF_INET6;
+ break;
+ case 't':
+ proto_tx = protostr_to_num(optarg);
+ break;
+ case 'r':
+ proto_rx = protostr_to_num(optarg);
+ break;
+ default:
+ die_usage(1);
+ break;
+ }
+ }
+}
+
+/* wait up to timeout milliseconds */
+static void wait_for_ack(int fd, int timeout, size_t total)
+{
+ int i;
+
+ for (i = 0; i < timeout; i++) {
+ int nsd, ret, queued = -1;
+ struct timespec req;
+
+ ret = ioctl(fd, TIOCOUTQ, &queued);
+ if (ret < 0)
+ die_perror("TIOCOUTQ");
+
+ ret = ioctl(fd, SIOCOUTQNSD, &nsd);
+ if (ret < 0)
+ die_perror("SIOCOUTQNSD");
+
+ if ((size_t)queued > total)
+ xerror("TIOCOUTQ %u, but only %zu expected\n", queued, total);
+ assert(nsd <= queued);
+
+ if (queued == 0)
+ return;
+
+ /* wait for peer to ack rx of all data */
+ req.tv_sec = 0;
+ req.tv_nsec = 1 * 1000 * 1000ul; /* 1ms */
+ nanosleep(&req, NULL);
+ }
+
+ xerror("still tx data queued after %u ms\n", timeout);
+}
+
+static void connect_one_server(int fd, int unixfd)
+{
+ size_t len, i, total, sent;
+ char buf[4096], buf2[4096];
+ ssize_t ret;
+
+ len = rand() % (sizeof(buf) - 1);
+
+ if (len < 128)
+ len = 128;
+
+ for (i = 0; i < len ; i++) {
+ buf[i] = rand() % 26;
+ buf[i] += 'A';
+ }
+
+ buf[i] = '\n';
+
+ /* un-block server */
+ ret = read(unixfd, buf2, 4);
+ assert(ret == 4);
+
+ assert(strncmp(buf2, "xmit", 4) == 0);
+
+ ret = write(unixfd, &len, sizeof(len));
+ assert(ret == (ssize_t)sizeof(len));
+
+ ret = write(fd, buf, len);
+ if (ret < 0)
+ die_perror("write");
+
+ if (ret != (ssize_t)len)
+ xerror("short write");
+
+ ret = read(unixfd, buf2, 4);
+ assert(strncmp(buf2, "huge", 4) == 0);
+
+ total = rand() % (16 * 1024 * 1024);
+ total += (1 * 1024 * 1024);
+ sent = total;
+
+ ret = write(unixfd, &total, sizeof(total));
+ assert(ret == (ssize_t)sizeof(total));
+
+ wait_for_ack(fd, 5000, len);
+
+ while (total > 0) {
+ if (total > sizeof(buf))
+ len = sizeof(buf);
+ else
+ len = total;
+
+ ret = write(fd, buf, len);
+ if (ret < 0)
+ die_perror("write");
+ total -= ret;
+
+ /* we don't have to care about buf content, only
+ * number of total bytes sent
+ */
+ }
+
+ ret = read(unixfd, buf2, 4);
+ assert(ret == 4);
+ assert(strncmp(buf2, "shut", 4) == 0);
+
+ wait_for_ack(fd, 5000, sent);
+
+ ret = write(fd, buf, 1);
+ assert(ret == 1);
+ close(fd);
+ ret = write(unixfd, "closed", 6);
+ assert(ret == 6);
+
+ close(unixfd);
+}
+
+static void get_tcp_inq(struct msghdr *msgh, unsigned int *inqv)
+{
+ struct cmsghdr *cmsg;
+
+ for (cmsg = CMSG_FIRSTHDR(msgh); cmsg ; cmsg = CMSG_NXTHDR(msgh, cmsg)) {
+ if (cmsg->cmsg_level == IPPROTO_TCP && cmsg->cmsg_type == TCP_CM_INQ) {
+ memcpy(inqv, CMSG_DATA(cmsg), sizeof(*inqv));
+ return;
+ }
+ }
+
+ xerror("could not find TCP_CM_INQ cmsg type");
+}
+
+static void process_one_client(int fd, int unixfd)
+{
+ unsigned int tcp_inq;
+ size_t expect_len;
+ char msg_buf[4096];
+ char buf[4096];
+ char tmp[16];
+ struct iovec iov = {
+ .iov_base = buf,
+ .iov_len = 1,
+ };
+ struct msghdr msg = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = msg_buf,
+ .msg_controllen = sizeof(msg_buf),
+ };
+ ssize_t ret, tot;
+
+ ret = write(unixfd, "xmit", 4);
+ assert(ret == 4);
+
+ ret = read(unixfd, &expect_len, sizeof(expect_len));
+ assert(ret == (ssize_t)sizeof(expect_len));
+
+ if (expect_len > sizeof(buf))
+ xerror("expect len %zu exceeds buffer size", expect_len);
+
+ for (;;) {
+ struct timespec req;
+ unsigned int queued;
+
+ ret = ioctl(fd, FIONREAD, &queued);
+ if (ret < 0)
+ die_perror("FIONREAD");
+ if (queued > expect_len)
+ xerror("FIONREAD returned %u, but only %zu expected\n",
+ queued, expect_len);
+ if (queued == expect_len)
+ break;
+
+ req.tv_sec = 0;
+ req.tv_nsec = 1000 * 1000ul;
+ nanosleep(&req, NULL);
+ }
+
+ /* read one byte, expect cmsg to return expected - 1 */
+ ret = recvmsg(fd, &msg, 0);
+ if (ret < 0)
+ die_perror("recvmsg");
+
+ if (msg.msg_controllen == 0)
+ xerror("msg_controllen is 0");
+
+ get_tcp_inq(&msg, &tcp_inq);
+
+ assert((size_t)tcp_inq == (expect_len - 1));
+
+ iov.iov_len = sizeof(buf);
+ ret = recvmsg(fd, &msg, 0);
+ if (ret < 0)
+ die_perror("recvmsg");
+
+ /* should have gotten exact remainder of all pending data */
+ assert(ret == (ssize_t)tcp_inq);
+
+ /* should be 0, all drained */
+ get_tcp_inq(&msg, &tcp_inq);
+ assert(tcp_inq == 0);
+
+ /* request a large swath of data. */
+ ret = write(unixfd, "huge", 4);
+ assert(ret == 4);
+
+ ret = read(unixfd, &expect_len, sizeof(expect_len));
+ assert(ret == (ssize_t)sizeof(expect_len));
+
+ /* peer should send us a few mb of data */
+ if (expect_len <= sizeof(buf))
+ xerror("expect len %zu too small\n", expect_len);
+
+ tot = 0;
+ do {
+ iov.iov_len = sizeof(buf);
+ ret = recvmsg(fd, &msg, 0);
+ if (ret < 0)
+ die_perror("recvmsg");
+
+ tot += ret;
+
+ get_tcp_inq(&msg, &tcp_inq);
+
+ if (tcp_inq > expect_len - tot)
+ xerror("inq %d, remaining %d total_len %d\n",
+ tcp_inq, expect_len - tot, (int)expect_len);
+
+ assert(tcp_inq <= expect_len - tot);
+ } while ((size_t)tot < expect_len);
+
+ ret = write(unixfd, "shut", 4);
+ assert(ret == 4);
+
+ /* wait for hangup. Should have received one more byte of data. */
+ ret = read(unixfd, tmp, sizeof(tmp));
+ assert(ret == 6);
+ assert(strncmp(tmp, "closed", 6) == 0);
+
+ sleep(1);
+
+ iov.iov_len = 1;
+ ret = recvmsg(fd, &msg, 0);
+ if (ret < 0)
+ die_perror("recvmsg");
+ assert(ret == 1);
+
+ get_tcp_inq(&msg, &tcp_inq);
+
+ /* tcp_inq should be 1 due to received fin. */
+ assert(tcp_inq == 1);
+
+ iov.iov_len = 1;
+ ret = recvmsg(fd, &msg, 0);
+ if (ret < 0)
+ die_perror("recvmsg");
+
+ /* expect EOF */
+ assert(ret == 0);
+ get_tcp_inq(&msg, &tcp_inq);
+ assert(tcp_inq == 1);
+
+ close(fd);
+}
+
+static int xaccept(int s)
+{
+ int fd = accept(s, NULL, 0);
+
+ if (fd < 0)
+ die_perror("accept");
+
+ return fd;
+}
+
+static int server(int unixfd)
+{
+ int fd = -1, r, on = 1;
+
+ switch (pf) {
+ case AF_INET:
+ fd = sock_listen_mptcp("127.0.0.1", "15432");
+ break;
+ case AF_INET6:
+ fd = sock_listen_mptcp("::1", "15432");
+ break;
+ default:
+ xerror("Unknown pf %d\n", pf);
+ break;
+ }
+
+ r = write(unixfd, "conn", 4);
+ assert(r == 4);
+
+ alarm(15);
+ r = xaccept(fd);
+
+ if (-1 == setsockopt(r, IPPROTO_TCP, TCP_INQ, &on, sizeof(on)))
+ die_perror("setsockopt");
+
+ process_one_client(r, unixfd);
+
+ return 0;
+}
+
+static int client(int unixfd)
+{
+ int fd = -1;
+
+ alarm(15);
+
+ switch (pf) {
+ case AF_INET:
+ fd = sock_connect_mptcp("127.0.0.1", "15432", proto_tx);
+ break;
+ case AF_INET6:
+ fd = sock_connect_mptcp("::1", "15432", proto_tx);
+ break;
+ default:
+ xerror("Unknown pf %d\n", pf);
+ }
+
+ connect_one_server(fd, unixfd);
+
+ return 0;
+}
+
+static void init_rng(void)
+{
+ int fd = open("/dev/urandom", O_RDONLY);
+ unsigned int foo;
+
+ if (fd > 0) {
+ int ret = read(fd, &foo, sizeof(foo));
+
+ if (ret < 0)
+ srand(fd + foo);
+ close(fd);
+ }
+
+ srand(foo);
+}
+
+static pid_t xfork(void)
+{
+ pid_t p = fork();
+
+ if (p < 0)
+ die_perror("fork");
+ else if (p == 0)
+ init_rng();
+
+ return p;
+}
+
+static int rcheck(int wstatus, const char *what)
+{
+ if (WIFEXITED(wstatus)) {
+ if (WEXITSTATUS(wstatus) == 0)
+ return 0;
+ fprintf(stderr, "%s exited, status=%d\n", what, WEXITSTATUS(wstatus));
+ return WEXITSTATUS(wstatus);
+ } else if (WIFSIGNALED(wstatus)) {
+ xerror("%s killed by signal %d\n", what, WTERMSIG(wstatus));
+ } else if (WIFSTOPPED(wstatus)) {
+ xerror("%s stopped by signal %d\n", what, WSTOPSIG(wstatus));
+ }
+
+ return 111;
+}
+
+int main(int argc, char *argv[])
+{
+ int e1, e2, wstatus;
+ pid_t s, c, ret;
+ int unixfds[2];
+
+ parse_opts(argc, argv);
+
+ e1 = socketpair(AF_UNIX, SOCK_DGRAM, 0, unixfds);
+ if (e1 < 0)
+ die_perror("pipe");
+
+ s = xfork();
+ if (s == 0)
+ return server(unixfds[1]);
+
+ close(unixfds[1]);
+
+ /* wait until server bound a socket */
+ e1 = read(unixfds[0], &e1, 4);
+ assert(e1 == 4);
+
+ c = xfork();
+ if (c == 0)
+ return client(unixfds[0]);
+
+ close(unixfds[0]);
+
+ ret = waitpid(s, &wstatus, 0);
+ if (ret == -1)
+ die_perror("waitpid");
+ e1 = rcheck(wstatus, "server");
+ ret = waitpid(c, &wstatus, 0);
+ if (ret == -1)
+ die_perror("waitpid");
+ e2 = rcheck(wstatus, "client");
+
+ return e1 ? e1 : e2;
+}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 7ef639a9d4a6..b8bdbec0cf69 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -75,6 +75,7 @@ init()
# let $ns2 reach any $ns1 address from any interface
ip -net "$ns2" route add default via 10.0.$i.1 dev ns2eth$i metric 10$i
+ ip -net "$ns2" route add default via dead:beef:$i::1 dev ns2eth$i metric 10$i
done
}
@@ -238,6 +239,45 @@ is_v6()
[ -z "${1##*:*}" ]
}
+# $1: ns, $2: port
+wait_local_port_listen()
+{
+ local listener_ns="${1}"
+ local port="${2}"
+
+ local port_hex i
+
+ port_hex="$(printf "%04X" "${port}")"
+ for i in $(seq 10); do
+ ip netns exec "${listener_ns}" cat /proc/net/tcp* | \
+ awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" &&
+ break
+ sleep 0.1
+ done
+}
+
+rm_addr_count()
+{
+ ns=${1}
+
+ ip netns exec ${ns} nstat -as | grep MPTcpExtRmAddr | awk '{print $2}'
+}
+
+# $1: ns, $2: old rm_addr counter in $ns
+wait_rm_addr()
+{
+ local ns="${1}"
+ local old_cnt="${2}"
+ local cnt
+ local i
+
+ for i in $(seq 10); do
+ cnt=$(rm_addr_count ${ns})
+ [ "$cnt" = "${old_cnt}" ] || break
+ sleep 0.1
+ done
+}
+
do_transfer()
{
listener_ns="$1"
@@ -307,7 +347,7 @@ do_transfer()
fi
spid=$!
- sleep 1
+ wait_local_port_listen "${listener_ns}" "${port}"
if [ "$test_link_fail" -eq 0 ];then
timeout ${timeout_test} \
@@ -324,10 +364,13 @@ do_transfer()
fi
cpid=$!
+ # let the mptcp subflow be established in background before
+ # do endpoint manipulation
+ [ $addr_nr_ns1 = "0" -a $addr_nr_ns2 = "0" ] || sleep 1
+
if [ $addr_nr_ns1 -gt 0 ]; then
let add_nr_ns1=addr_nr_ns1
counter=2
- sleep 1
while [ $add_nr_ns1 -gt 0 ]; do
local addr
if is_v6 "${connect_addr}"; then
@@ -339,7 +382,6 @@ do_transfer()
let counter+=1
let add_nr_ns1-=1
done
- sleep 1
elif [ $addr_nr_ns1 -lt 0 ]; then
let rm_nr_ns1=-addr_nr_ns1
if [ $rm_nr_ns1 -lt 8 ]; then
@@ -347,22 +389,19 @@ do_transfer()
pos=1
dump=(`ip netns exec ${listener_ns} ./pm_nl_ctl dump`)
if [ ${#dump[@]} -gt 0 ]; then
- sleep 1
-
while [ $counter -le $rm_nr_ns1 ]
do
id=${dump[$pos]}
+ rm_addr=$(rm_addr_count ${connector_ns})
ip netns exec ${listener_ns} ./pm_nl_ctl del $id
- sleep 1
+ wait_rm_addr ${connector_ns} ${rm_addr}
let counter+=1
let pos+=5
done
fi
elif [ $rm_nr_ns1 -eq 8 ]; then
- sleep 1
ip netns exec ${listener_ns} ./pm_nl_ctl flush
elif [ $rm_nr_ns1 -eq 9 ]; then
- sleep 1
ip netns exec ${listener_ns} ./pm_nl_ctl del 0 ${connect_addr}
fi
fi
@@ -373,10 +412,13 @@ do_transfer()
addr_nr_ns2=${addr_nr_ns2:9}
fi
+ # if newly added endpoints must be deleted, give the background msk
+ # some time to created them
+ [ $addr_nr_ns1 -gt 0 -a $addr_nr_ns2 -lt 0 ] && sleep 1
+
if [ $addr_nr_ns2 -gt 0 ]; then
let add_nr_ns2=addr_nr_ns2
counter=3
- sleep 1
while [ $add_nr_ns2 -gt 0 ]; do
local addr
if is_v6 "${connect_addr}"; then
@@ -388,7 +430,6 @@ do_transfer()
let counter+=1
let add_nr_ns2-=1
done
- sleep 1
elif [ $addr_nr_ns2 -lt 0 ]; then
let rm_nr_ns2=-addr_nr_ns2
if [ $rm_nr_ns2 -lt 8 ]; then
@@ -396,19 +437,18 @@ do_transfer()
pos=1
dump=(`ip netns exec ${connector_ns} ./pm_nl_ctl dump`)
if [ ${#dump[@]} -gt 0 ]; then
- sleep 1
-
while [ $counter -le $rm_nr_ns2 ]
do
+ # rm_addr are serialized, allow the previous one to complete
id=${dump[$pos]}
+ rm_addr=$(rm_addr_count ${listener_ns})
ip netns exec ${connector_ns} ./pm_nl_ctl del $id
- sleep 1
+ wait_rm_addr ${listener_ns} ${rm_addr}
let counter+=1
let pos+=5
done
fi
elif [ $rm_nr_ns2 -eq 8 ]; then
- sleep 1
ip netns exec ${connector_ns} ./pm_nl_ctl flush
elif [ $rm_nr_ns2 -eq 9 ]; then
local addr
@@ -417,7 +457,6 @@ do_transfer()
else
addr="10.0.1.2"
fi
- sleep 1
ip netns exec ${connector_ns} ./pm_nl_ctl del 0 $addr
fi
fi
@@ -539,6 +578,14 @@ run_tests()
lret=$?
}
+dump_stats()
+{
+ echo Server ns stats
+ ip netns exec $ns1 nstat -as | grep Tcp
+ echo Client ns stats
+ ip netns exec $ns2 nstat -as | grep Tcp
+}
+
chk_csum_nr()
{
local msg=${1:-""}
@@ -570,12 +617,7 @@ chk_csum_nr()
else
echo "[ ok ]"
fi
- if [ "${dump_stats}" = 1 ]; then
- echo Server ns stats
- ip netns exec $ns1 nstat -as | grep MPTcp
- echo Client ns stats
- ip netns exec $ns2 nstat -as | grep MPTcp
- fi
+ [ "${dump_stats}" = 1 ] && dump_stats
}
chk_fail_nr()
@@ -607,12 +649,7 @@ chk_fail_nr()
echo "[ ok ]"
fi
- if [ "${dump_stats}" = 1 ]; then
- echo Server ns stats
- ip netns exec $ns1 nstat -as | grep MPTcp
- echo Client ns stats
- ip netns exec $ns2 nstat -as | grep MPTcp
- fi
+ [ "${dump_stats}" = 1 ] && dump_stats
}
chk_join_nr()
@@ -656,12 +693,7 @@ chk_join_nr()
else
echo "[ ok ]"
fi
- if [ "${dump_stats}" = 1 ]; then
- echo Server ns stats
- ip netns exec $ns1 nstat -as | grep MPTcp
- echo Client ns stats
- ip netns exec $ns2 nstat -as | grep MPTcp
- fi
+ [ "${dump_stats}" = 1 ] && dump_stats
if [ $checksum -eq 1 ]; then
chk_csum_nr
chk_fail_nr 0 0
@@ -823,12 +855,7 @@ chk_add_nr()
echo ""
fi
- if [ "${dump_stats}" = 1 ]; then
- echo Server ns stats
- ip netns exec $ns1 nstat -as | grep MPTcp
- echo Client ns stats
- ip netns exec $ns2 nstat -as | grep MPTcp
- fi
+ [ "${dump_stats}" = 1 ] && dump_stats
}
chk_rm_nr()
@@ -871,12 +898,7 @@ chk_rm_nr()
echo "[ ok ]"
fi
- if [ "${dump_stats}" = 1 ]; then
- echo Server ns stats
- ip netns exec $ns1 nstat -as | grep MPTcp
- echo Client ns stats
- ip netns exec $ns2 nstat -as | grep MPTcp
- fi
+ [ "${dump_stats}" = 1 ] && dump_stats
}
chk_prio_nr()
@@ -908,12 +930,7 @@ chk_prio_nr()
echo "[ ok ]"
fi
- if [ "${dump_stats}" = 1 ]; then
- echo Server ns stats
- ip netns exec $ns1 nstat -as | grep MPTcp
- echo Client ns stats
- ip netns exec $ns2 nstat -as | grep MPTcp
- fi
+ [ "${dump_stats}" = 1 ] && dump_stats
}
chk_link_usage()
@@ -937,6 +954,22 @@ chk_link_usage()
fi
}
+wait_for_tw()
+{
+ local timeout_ms=$((timeout_poll * 1000))
+ local time=0
+ local ns=$1
+
+ while [ $time -lt $timeout_ms ]; do
+ local cnt=$(ip netns exec $ns ss -t state time-wait |wc -l)
+
+ [ "$cnt" = 1 ] && return 1
+ time=$((time + 100))
+ sleep 0.1
+ done
+ return 1
+}
+
subflows_tests()
{
reset
@@ -994,6 +1027,61 @@ subflows_tests()
chk_join_nr "single subflow, dev" 1 1 1
}
+subflows_error_tests()
+{
+ # If a single subflow is configured, and matches the MPC src
+ # address, no additional subflow should be created
+ reset
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.1.2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
+ chk_join_nr "no MPC reuse with single endpoint" 0 0 0
+
+ # multiple subflows, with subflow creation error
+ reset
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 2
+ ip netns exec $ns2 ./pm_nl_ctl limits 0 2
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
+ ip netns exec $ns1 iptables -A INPUT -s 10.0.3.2 -p tcp -j REJECT
+ run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
+ chk_join_nr "multi subflows, with failing subflow" 1 1 1
+
+ # multiple subflows, with subflow timeout on MPJ
+ reset
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 2
+ ip netns exec $ns2 ./pm_nl_ctl limits 0 2
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
+ ip netns exec $ns1 iptables -A INPUT -s 10.0.3.2 -p tcp -j DROP
+ run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
+ chk_join_nr "multi subflows, with subflow timeout" 1 1 1
+
+ # multiple subflows, check that the endpoint corresponding to
+ # closed subflow (due to reset) is not reused if additional
+ # subflows are added later
+ reset
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ ip netns exec $ns1 iptables -A INPUT -s 10.0.3.2 -p tcp -j REJECT
+ run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow &
+
+ # updates in the child shell do not have any effect here, we
+ # need to bump the test counter for the above case
+ TEST_COUNT=$((TEST_COUNT+1))
+
+ # mpj subflow will be in TW after the reset
+ wait_for_tw $ns2
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
+ wait
+
+ # additional subflow could be created only if the PM select
+ # the later endpoint, skipping the already used one
+ chk_join_nr "multi subflows, fair usage on close" 1 1 1
+}
+
signal_address_tests()
{
# add_address, unused
@@ -1071,7 +1159,10 @@ signal_address_tests()
ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags signal
ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags signal
run_tests $ns1 $ns2 10.0.1.1
- chk_add_nr 4 4
+
+ # the server will not signal the address terminating
+ # the MPC subflow
+ chk_add_nr 3 3
}
link_failure_tests()
@@ -1386,7 +1477,7 @@ ipv6_tests()
reset
ip netns exec $ns1 ./pm_nl_ctl limits 0 1
ip netns exec $ns2 ./pm_nl_ctl limits 0 1
- ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 flags subflow
+ ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 dev ns2eth3 flags subflow
run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow
chk_join_nr "single subflow IPv6" 1 1 1
@@ -1421,7 +1512,7 @@ ipv6_tests()
ip netns exec $ns1 ./pm_nl_ctl limits 0 2
ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal
ip netns exec $ns2 ./pm_nl_ctl limits 1 2
- ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 flags subflow
+ ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 dev ns2eth3 flags subflow
run_tests $ns1 $ns2 dead:beef:1::1 0 -1 -1 slow
chk_join_nr "remove subflow and signal IPv6" 2 2 2
chk_add_nr 1 1
@@ -1577,7 +1668,7 @@ add_addr_ports_tests()
ip netns exec $ns2 ./pm_nl_ctl limits 1 3
ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
- run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow
+ run_tests $ns1 $ns2 10.0.1.1 0 -8 -2 slow
chk_join_nr "flush subflows and signal with port" 3 3 3
chk_add_nr 1 1
chk_rm_nr 2 2
@@ -1802,6 +1893,7 @@ fullmesh_tests()
all_tests()
{
subflows_tests
+ subflows_error_tests
signal_address_tests
link_failure_tests
add_addr_timeout_tests
@@ -1821,6 +1913,7 @@ usage()
{
echo "mptcp_join usage:"
echo " -f subflows_tests"
+ echo " -e subflows_error_tests"
echo " -s signal_address_tests"
echo " -l link_failure_tests"
echo " -t add_addr_timeout_tests"
@@ -1869,11 +1962,14 @@ if [ $do_all_tests -eq 1 ]; then
exit $ret
fi
-while getopts 'fsltra64bpkdmchCS' opt; do
+while getopts 'fesltra64bpkdmchCS' opt; do
case $opt in
f)
subflows_tests
;;
+ e)
+ subflows_error_tests
+ ;;
s)
signal_address_tests
;;
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
index 417b11cafafe..ac9a4d9c1764 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
@@ -4,6 +4,7 @@
#include <assert.h>
#include <errno.h>
+#include <fcntl.h>
#include <limits.h>
#include <string.h>
#include <stdarg.h>
@@ -13,6 +14,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <strings.h>
+#include <time.h>
#include <unistd.h>
#include <sys/socket.h>
@@ -594,6 +596,44 @@ static int server(int pipefd)
return 0;
}
+static void test_ip_tos_sockopt(int fd)
+{
+ uint8_t tos_in, tos_out;
+ socklen_t s;
+ int r;
+
+ tos_in = rand() & 0xfc;
+ r = setsockopt(fd, SOL_IP, IP_TOS, &tos_in, sizeof(tos_out));
+ if (r != 0)
+ die_perror("setsockopt IP_TOS");
+
+ tos_out = 0;
+ s = sizeof(tos_out);
+ r = getsockopt(fd, SOL_IP, IP_TOS, &tos_out, &s);
+ if (r != 0)
+ die_perror("getsockopt IP_TOS");
+
+ if (tos_in != tos_out)
+ xerror("tos %x != %x socklen_t %d\n", tos_in, tos_out, s);
+
+ if (s != 1)
+ xerror("tos should be 1 byte");
+
+ s = 0;
+ r = getsockopt(fd, SOL_IP, IP_TOS, &tos_out, &s);
+ if (r != 0)
+ die_perror("getsockopt IP_TOS 0");
+ if (s != 0)
+ xerror("expect socklen_t == 0");
+
+ s = -1;
+ r = getsockopt(fd, SOL_IP, IP_TOS, &tos_out, &s);
+ if (r != -1 && errno != EINVAL)
+ die_perror("getsockopt IP_TOS did not indicate -EINVAL");
+ if (s != -1)
+ xerror("expect socklen_t == -1");
+}
+
static int client(int pipefd)
{
int fd = -1;
@@ -611,6 +651,8 @@ static int client(int pipefd)
xerror("Unknown pf %d\n", pf);
}
+ test_ip_tos_sockopt(fd);
+
connect_one_server(fd, pipefd);
return 0;
@@ -642,6 +684,25 @@ static int rcheck(int wstatus, const char *what)
return 111;
}
+static void init_rng(void)
+{
+ int fd = open("/dev/urandom", O_RDONLY);
+
+ if (fd >= 0) {
+ unsigned int foo;
+ ssize_t ret;
+
+ /* can't fail */
+ ret = read(fd, &foo, sizeof(foo));
+ assert(ret == sizeof(foo));
+
+ close(fd);
+ srand(foo);
+ } else {
+ srand(time(NULL));
+ }
+}
+
int main(int argc, char *argv[])
{
int e1, e2, wstatus;
@@ -650,6 +711,8 @@ int main(int argc, char *argv[])
parse_opts(argc, argv);
+ init_rng();
+
e1 = pipe(pipefds);
if (e1 < 0)
die_perror("pipe");
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
index 41de643788b8..0879da915014 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
@@ -178,7 +178,7 @@ do_transfer()
timeout ${timeout_test} \
ip netns exec ${listener_ns} \
- $mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} -c TIMESTAMPNS \
+ $mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} -c TIMESTAMPNS,TCPINQ \
${local_addr} < "$sin" > "$sout" &
spid=$!
@@ -186,7 +186,7 @@ do_transfer()
timeout ${timeout_test} \
ip netns exec ${connector_ns} \
- $mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} -c TIMESTAMPNS \
+ $mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} -c TIMESTAMPNS,TCPINQ \
$connect_addr < "$cin" > "$cout" &
cpid=$!
@@ -279,6 +279,45 @@ run_tests()
fi
}
+do_tcpinq_test()
+{
+ ip netns exec "$ns1" ./mptcp_inq "$@"
+ lret=$?
+ if [ $lret -ne 0 ];then
+ ret=$lret
+ echo "FAIL: mptcp_inq $@" 1>&2
+ return $lret
+ fi
+
+ echo "PASS: TCP_INQ cmsg/ioctl $@"
+ return $lret
+}
+
+do_tcpinq_tests()
+{
+ local lret=0
+
+ ip netns exec "$ns1" iptables -F
+ ip netns exec "$ns1" ip6tables -F
+
+ for args in "-t tcp" "-r tcp"; do
+ do_tcpinq_test $args
+ lret=$?
+ if [ $lret -ne 0 ] ; then
+ return $lret
+ fi
+ do_tcpinq_test -6 $args
+ lret=$?
+ if [ $lret -ne 0 ] ; then
+ return $lret
+ fi
+ done
+
+ do_tcpinq_test -r tcp -t tcp
+
+ return $?
+}
+
sin=$(mktemp)
sout=$(mktemp)
cin=$(mktemp)
@@ -300,4 +339,5 @@ if [ $ret -eq 0 ];then
echo "PASS: SOL_MPTCP getsockopt has expected information"
fi
+do_tcpinq_tests
exit $ret
diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c
index b599003eb5ba..d9a6fd2cd9d3 100644
--- a/tools/testing/selftests/net/nettest.c
+++ b/tools/testing/selftests/net/nettest.c
@@ -85,6 +85,7 @@ struct sock_args {
int version; /* AF_INET/AF_INET6 */
int use_setsockopt;
+ int use_freebind;
int use_cmsg;
const char *dev;
const char *server_dev;
@@ -514,6 +515,29 @@ static int set_membership(int sd, uint32_t grp, uint32_t addr, int ifindex)
return 0;
}
+static int set_freebind(int sd, int version)
+{
+ unsigned int one = 1;
+ int rc = 0;
+
+ switch (version) {
+ case AF_INET:
+ if (setsockopt(sd, SOL_IP, IP_FREEBIND, &one, sizeof(one))) {
+ log_err_errno("setsockopt(IP_FREEBIND)");
+ rc = -1;
+ }
+ break;
+ case AF_INET6:
+ if (setsockopt(sd, SOL_IPV6, IPV6_FREEBIND, &one, sizeof(one))) {
+ log_err_errno("setsockopt(IPV6_FREEBIND");
+ rc = -1;
+ }
+ break;
+ }
+
+ return rc;
+}
+
static int set_broadcast(int sd)
{
unsigned int one = 1;
@@ -1419,6 +1443,9 @@ static int lsock_init(struct sock_args *args)
set_unicast_if(sd, args->ifindex, args->version))
goto err;
+ if (args->use_freebind && set_freebind(sd, args->version))
+ goto err;
+
if (bind_socket(sd, args))
goto err;
@@ -1827,7 +1854,7 @@ static int ipc_parent(int cpid, int fd, struct sock_args *args)
return client_status;
}
-#define GETOPT_STR "sr:l:c:p:t:g:P:DRn:M:X:m:d:I:BN:O:SCi6xL:0:1:2:3:Fbq"
+#define GETOPT_STR "sr:l:c:p:t:g:P:DRn:M:X:m:d:I:BN:O:SCi6xL:0:1:2:3:Fbqf"
#define OPT_FORCE_BIND_KEY_IFINDEX 1001
#define OPT_NO_BIND_KEY_IFINDEX 1002
@@ -1864,6 +1891,7 @@ static void print_usage(char *prog)
" -I dev bind socket to given device name - server mode\n"
" -S use setsockopt (IP_UNICAST_IF or IP_MULTICAST_IF)\n"
" to set device binding\n"
+ " -f bind socket with the IP[V6]_FREEBIND option\n"
" -C use cmsg and IP_PKTINFO to specify device binding\n"
"\n"
" -L len send random message of given length\n"
@@ -1999,6 +2027,9 @@ int main(int argc, char *argv[])
case 'S':
args.use_setsockopt = 1;
break;
+ case 'f':
+ args.use_freebind = 1;
+ break;
case 'C':
args.use_cmsg = 1;
break;
diff --git a/tools/testing/selftests/net/reuseport_bpf.c b/tools/testing/selftests/net/reuseport_bpf.c
index b5277106df1f..072d709c96b4 100644
--- a/tools/testing/selftests/net/reuseport_bpf.c
+++ b/tools/testing/selftests/net/reuseport_bpf.c
@@ -24,9 +24,7 @@
#include <sys/resource.h>
#include <unistd.h>
-#ifndef ARRAY_SIZE
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
-#endif
+#include "../kselftest.h"
struct test_params {
int recv_family;
diff --git a/tools/testing/selftests/net/rxtimestamp.c b/tools/testing/selftests/net/rxtimestamp.c
index e4613ce4ed69..9eb42570294d 100644
--- a/tools/testing/selftests/net/rxtimestamp.c
+++ b/tools/testing/selftests/net/rxtimestamp.c
@@ -18,7 +18,7 @@
#include <linux/net_tstamp.h>
#include <linux/errqueue.h>
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#include "../kselftest.h"
struct options {
int so_timestamp;
diff --git a/tools/testing/selftests/net/settings b/tools/testing/selftests/net/settings
index 694d70710ff0..dfc27cdc6c05 100644
--- a/tools/testing/selftests/net/settings
+++ b/tools/testing/selftests/net/settings
@@ -1 +1 @@
-timeout=300
+timeout=1500
diff --git a/tools/testing/selftests/net/socket.c b/tools/testing/selftests/net/socket.c
index afca1ead677f..db1aeb8c5d1e 100644
--- a/tools/testing/selftests/net/socket.c
+++ b/tools/testing/selftests/net/socket.c
@@ -7,6 +7,8 @@
#include <sys/socket.h>
#include <netinet/in.h>
+#include "../kselftest.h"
+
struct socket_testcase {
int domain;
int type;
@@ -31,7 +33,6 @@ static struct socket_testcase tests[] = {
{ AF_INET, SOCK_STREAM, IPPROTO_UDP, -EPROTONOSUPPORT, 1 },
};
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
#define ERR_STRING_SZ 64
static int run_tests(void)
diff --git a/tools/testing/selftests/net/tcp_fastopen_backup_key.c b/tools/testing/selftests/net/tcp_fastopen_backup_key.c
index 9c55ec44fc43..c1cb0c75156a 100644
--- a/tools/testing/selftests/net/tcp_fastopen_backup_key.c
+++ b/tools/testing/selftests/net/tcp_fastopen_backup_key.c
@@ -26,6 +26,8 @@
#include <fcntl.h>
#include <time.h>
+#include "../kselftest.h"
+
#ifndef TCP_FASTOPEN_KEY
#define TCP_FASTOPEN_KEY 33
#endif
@@ -34,10 +36,6 @@
#define PROC_FASTOPEN_KEY "/proc/sys/net/ipv4/tcp_fastopen_key"
#define KEY_LENGTH 16
-#ifndef ARRAY_SIZE
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
-#endif
-
static bool do_ipv6;
static bool do_sockopt;
static bool do_rotate;
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index e61fc4c32ba2..6e468e0f42f7 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -31,6 +31,8 @@ struct tls_crypto_info_keys {
struct tls12_crypto_info_chacha20_poly1305 chacha20;
struct tls12_crypto_info_sm4_gcm sm4gcm;
struct tls12_crypto_info_sm4_ccm sm4ccm;
+ struct tls12_crypto_info_aes_ccm_128 aesccm128;
+ struct tls12_crypto_info_aes_gcm_256 aesgcm256;
};
size_t len;
};
@@ -61,6 +63,16 @@ static void tls_crypto_info_init(uint16_t tls_version, uint16_t cipher_type,
tls12->sm4ccm.info.version = tls_version;
tls12->sm4ccm.info.cipher_type = cipher_type;
break;
+ case TLS_CIPHER_AES_CCM_128:
+ tls12->len = sizeof(struct tls12_crypto_info_aes_ccm_128);
+ tls12->aesccm128.info.version = tls_version;
+ tls12->aesccm128.info.cipher_type = cipher_type;
+ break;
+ case TLS_CIPHER_AES_GCM_256:
+ tls12->len = sizeof(struct tls12_crypto_info_aes_gcm_256);
+ tls12->aesgcm256.info.version = tls_version;
+ tls12->aesgcm256.info.cipher_type = cipher_type;
+ break;
default:
break;
}
@@ -78,26 +90,21 @@ static void memrnd(void *s, size_t n)
*byte++ = rand();
}
-FIXTURE(tls_basic)
-{
- int fd, cfd;
- bool notls;
-};
-
-FIXTURE_SETUP(tls_basic)
+static void ulp_sock_pair(struct __test_metadata *_metadata,
+ int *fd, int *cfd, bool *notls)
{
struct sockaddr_in addr;
socklen_t len;
int sfd, ret;
- self->notls = false;
+ *notls = false;
len = sizeof(addr);
addr.sin_family = AF_INET;
addr.sin_addr.s_addr = htonl(INADDR_ANY);
addr.sin_port = 0;
- self->fd = socket(AF_INET, SOCK_STREAM, 0);
+ *fd = socket(AF_INET, SOCK_STREAM, 0);
sfd = socket(AF_INET, SOCK_STREAM, 0);
ret = bind(sfd, &addr, sizeof(addr));
@@ -108,26 +115,96 @@ FIXTURE_SETUP(tls_basic)
ret = getsockname(sfd, &addr, &len);
ASSERT_EQ(ret, 0);
- ret = connect(self->fd, &addr, sizeof(addr));
+ ret = connect(*fd, &addr, sizeof(addr));
ASSERT_EQ(ret, 0);
- self->cfd = accept(sfd, &addr, &len);
- ASSERT_GE(self->cfd, 0);
+ *cfd = accept(sfd, &addr, &len);
+ ASSERT_GE(*cfd, 0);
close(sfd);
- ret = setsockopt(self->fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
+ ret = setsockopt(*fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
if (ret != 0) {
ASSERT_EQ(errno, ENOENT);
- self->notls = true;
+ *notls = true;
printf("Failure setting TCP_ULP, testing without tls\n");
return;
}
- ret = setsockopt(self->cfd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
+ ret = setsockopt(*cfd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
ASSERT_EQ(ret, 0);
}
+/* Produce a basic cmsg */
+static int tls_send_cmsg(int fd, unsigned char record_type,
+ void *data, size_t len, int flags)
+{
+ char cbuf[CMSG_SPACE(sizeof(char))];
+ int cmsg_len = sizeof(char);
+ struct cmsghdr *cmsg;
+ struct msghdr msg;
+ struct iovec vec;
+
+ vec.iov_base = data;
+ vec.iov_len = len;
+ memset(&msg, 0, sizeof(struct msghdr));
+ msg.msg_iov = &vec;
+ msg.msg_iovlen = 1;
+ msg.msg_control = cbuf;
+ msg.msg_controllen = sizeof(cbuf);
+ cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_TLS;
+ /* test sending non-record types. */
+ cmsg->cmsg_type = TLS_SET_RECORD_TYPE;
+ cmsg->cmsg_len = CMSG_LEN(cmsg_len);
+ *CMSG_DATA(cmsg) = record_type;
+ msg.msg_controllen = cmsg->cmsg_len;
+
+ return sendmsg(fd, &msg, flags);
+}
+
+static int tls_recv_cmsg(struct __test_metadata *_metadata,
+ int fd, unsigned char record_type,
+ void *data, size_t len, int flags)
+{
+ char cbuf[CMSG_SPACE(sizeof(char))];
+ struct cmsghdr *cmsg;
+ unsigned char ctype;
+ struct msghdr msg;
+ struct iovec vec;
+ int n;
+
+ vec.iov_base = data;
+ vec.iov_len = len;
+ memset(&msg, 0, sizeof(struct msghdr));
+ msg.msg_iov = &vec;
+ msg.msg_iovlen = 1;
+ msg.msg_control = cbuf;
+ msg.msg_controllen = sizeof(cbuf);
+
+ n = recvmsg(fd, &msg, flags);
+
+ cmsg = CMSG_FIRSTHDR(&msg);
+ EXPECT_NE(cmsg, NULL);
+ EXPECT_EQ(cmsg->cmsg_level, SOL_TLS);
+ EXPECT_EQ(cmsg->cmsg_type, TLS_GET_RECORD_TYPE);
+ ctype = *((unsigned char *)CMSG_DATA(cmsg));
+ EXPECT_EQ(ctype, record_type);
+
+ return n;
+}
+
+FIXTURE(tls_basic)
+{
+ int fd, cfd;
+ bool notls;
+};
+
+FIXTURE_SETUP(tls_basic)
+{
+ ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls);
+}
+
FIXTURE_TEARDOWN(tls_basic)
{
close(self->fd);
@@ -196,63 +273,48 @@ FIXTURE_VARIANT_ADD(tls, 13_sm4_ccm)
.cipher_type = TLS_CIPHER_SM4_CCM,
};
+FIXTURE_VARIANT_ADD(tls, 12_aes_ccm)
+{
+ .tls_version = TLS_1_2_VERSION,
+ .cipher_type = TLS_CIPHER_AES_CCM_128,
+};
+
+FIXTURE_VARIANT_ADD(tls, 13_aes_ccm)
+{
+ .tls_version = TLS_1_3_VERSION,
+ .cipher_type = TLS_CIPHER_AES_CCM_128,
+};
+
+FIXTURE_VARIANT_ADD(tls, 12_aes_gcm_256)
+{
+ .tls_version = TLS_1_2_VERSION,
+ .cipher_type = TLS_CIPHER_AES_GCM_256,
+};
+
+FIXTURE_VARIANT_ADD(tls, 13_aes_gcm_256)
+{
+ .tls_version = TLS_1_3_VERSION,
+ .cipher_type = TLS_CIPHER_AES_GCM_256,
+};
+
FIXTURE_SETUP(tls)
{
struct tls_crypto_info_keys tls12;
- struct sockaddr_in addr;
- socklen_t len;
- int sfd, ret;
-
- self->notls = false;
- len = sizeof(addr);
+ int ret;
tls_crypto_info_init(variant->tls_version, variant->cipher_type,
&tls12);
- addr.sin_family = AF_INET;
- addr.sin_addr.s_addr = htonl(INADDR_ANY);
- addr.sin_port = 0;
-
- self->fd = socket(AF_INET, SOCK_STREAM, 0);
- sfd = socket(AF_INET, SOCK_STREAM, 0);
+ ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls);
- ret = bind(sfd, &addr, sizeof(addr));
- ASSERT_EQ(ret, 0);
- ret = listen(sfd, 10);
- ASSERT_EQ(ret, 0);
+ if (self->notls)
+ return;
- ret = getsockname(sfd, &addr, &len);
+ ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len);
ASSERT_EQ(ret, 0);
- ret = connect(self->fd, &addr, sizeof(addr));
+ ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len);
ASSERT_EQ(ret, 0);
-
- ret = setsockopt(self->fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
- if (ret != 0) {
- self->notls = true;
- printf("Failure setting TCP_ULP, testing without tls\n");
- }
-
- if (!self->notls) {
- ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12,
- tls12.len);
- ASSERT_EQ(ret, 0);
- }
-
- self->cfd = accept(sfd, &addr, &len);
- ASSERT_GE(self->cfd, 0);
-
- if (!self->notls) {
- ret = setsockopt(self->cfd, IPPROTO_TCP, TCP_ULP, "tls",
- sizeof("tls"));
- ASSERT_EQ(ret, 0);
-
- ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12,
- tls12.len);
- ASSERT_EQ(ret, 0);
- }
-
- close(sfd);
}
FIXTURE_TEARDOWN(tls)
@@ -613,6 +675,95 @@ TEST_F(tls, splice_to_pipe)
EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
}
+TEST_F(tls, splice_cmsg_to_pipe)
+{
+ char *test_str = "test_read";
+ char record_type = 100;
+ int send_len = 10;
+ char buf[10];
+ int p[2];
+
+ ASSERT_GE(pipe(p), 0);
+ EXPECT_EQ(tls_send_cmsg(self->fd, 100, test_str, send_len, 0), 10);
+ EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, send_len, 0), -1);
+ EXPECT_EQ(errno, EINVAL);
+ EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1);
+ EXPECT_EQ(errno, EIO);
+ EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, record_type,
+ buf, sizeof(buf), MSG_WAITALL),
+ send_len);
+ EXPECT_EQ(memcmp(test_str, buf, send_len), 0);
+}
+
+TEST_F(tls, splice_dec_cmsg_to_pipe)
+{
+ char *test_str = "test_read";
+ char record_type = 100;
+ int send_len = 10;
+ char buf[10];
+ int p[2];
+
+ ASSERT_GE(pipe(p), 0);
+ EXPECT_EQ(tls_send_cmsg(self->fd, 100, test_str, send_len, 0), 10);
+ EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1);
+ EXPECT_EQ(errno, EIO);
+ EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, send_len, 0), -1);
+ EXPECT_EQ(errno, EINVAL);
+ EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, record_type,
+ buf, sizeof(buf), MSG_WAITALL),
+ send_len);
+ EXPECT_EQ(memcmp(test_str, buf, send_len), 0);
+}
+
+TEST_F(tls, recv_and_splice)
+{
+ int send_len = TLS_PAYLOAD_MAX_LEN;
+ char mem_send[TLS_PAYLOAD_MAX_LEN];
+ char mem_recv[TLS_PAYLOAD_MAX_LEN];
+ int half = send_len / 2;
+ int p[2];
+
+ ASSERT_GE(pipe(p), 0);
+ EXPECT_EQ(send(self->fd, mem_send, send_len, 0), send_len);
+ /* Recv hald of the record, splice the other half */
+ EXPECT_EQ(recv(self->cfd, mem_recv, half, MSG_WAITALL), half);
+ EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, half, SPLICE_F_NONBLOCK),
+ half);
+ EXPECT_EQ(read(p[0], &mem_recv[half], half), half);
+ EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
+}
+
+TEST_F(tls, peek_and_splice)
+{
+ int send_len = TLS_PAYLOAD_MAX_LEN;
+ char mem_send[TLS_PAYLOAD_MAX_LEN];
+ char mem_recv[TLS_PAYLOAD_MAX_LEN];
+ int chunk = TLS_PAYLOAD_MAX_LEN / 4;
+ int n, i, p[2];
+
+ memrnd(mem_send, sizeof(mem_send));
+
+ ASSERT_GE(pipe(p), 0);
+ for (i = 0; i < 4; i++)
+ EXPECT_EQ(send(self->fd, &mem_send[chunk * i], chunk, 0),
+ chunk);
+
+ EXPECT_EQ(recv(self->cfd, mem_recv, chunk * 5 / 2,
+ MSG_WAITALL | MSG_PEEK),
+ chunk * 5 / 2);
+ EXPECT_EQ(memcmp(mem_send, mem_recv, chunk * 5 / 2), 0);
+
+ n = 0;
+ while (n < send_len) {
+ i = splice(self->cfd, NULL, p[1], NULL, send_len - n, 0);
+ EXPECT_GT(i, 0);
+ n += i;
+ }
+ EXPECT_EQ(n, send_len);
+ EXPECT_EQ(read(p[0], mem_recv, send_len), send_len);
+ EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
+}
+
TEST_F(tls, recvmsg_single)
{
char const *test_str = "test_recvmsg_single";
@@ -1193,60 +1344,30 @@ TEST_F(tls, mutliproc_sendpage_writers)
TEST_F(tls, control_msg)
{
- if (self->notls)
- return;
-
- char cbuf[CMSG_SPACE(sizeof(char))];
- char const *test_str = "test_read";
- int cmsg_len = sizeof(char);
+ char *test_str = "test_read";
char record_type = 100;
- struct cmsghdr *cmsg;
- struct msghdr msg;
int send_len = 10;
- struct iovec vec;
char buf[10];
- vec.iov_base = (char *)test_str;
- vec.iov_len = 10;
- memset(&msg, 0, sizeof(struct msghdr));
- msg.msg_iov = &vec;
- msg.msg_iovlen = 1;
- msg.msg_control = cbuf;
- msg.msg_controllen = sizeof(cbuf);
- cmsg = CMSG_FIRSTHDR(&msg);
- cmsg->cmsg_level = SOL_TLS;
- /* test sending non-record types. */
- cmsg->cmsg_type = TLS_SET_RECORD_TYPE;
- cmsg->cmsg_len = CMSG_LEN(cmsg_len);
- *CMSG_DATA(cmsg) = record_type;
- msg.msg_controllen = cmsg->cmsg_len;
+ if (self->notls)
+ SKIP(return, "no TLS support");
- EXPECT_EQ(sendmsg(self->fd, &msg, 0), send_len);
+ EXPECT_EQ(tls_send_cmsg(self->fd, record_type, test_str, send_len, 0),
+ send_len);
/* Should fail because we didn't provide a control message */
EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1);
- vec.iov_base = buf;
- EXPECT_EQ(recvmsg(self->cfd, &msg, MSG_WAITALL | MSG_PEEK), send_len);
-
- cmsg = CMSG_FIRSTHDR(&msg);
- EXPECT_NE(cmsg, NULL);
- EXPECT_EQ(cmsg->cmsg_level, SOL_TLS);
- EXPECT_EQ(cmsg->cmsg_type, TLS_GET_RECORD_TYPE);
- record_type = *((unsigned char *)CMSG_DATA(cmsg));
- EXPECT_EQ(record_type, 100);
+ EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, record_type,
+ buf, sizeof(buf), MSG_WAITALL | MSG_PEEK),
+ send_len);
EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
/* Recv the message again without MSG_PEEK */
- record_type = 0;
memset(buf, 0, sizeof(buf));
- EXPECT_EQ(recvmsg(self->cfd, &msg, MSG_WAITALL), send_len);
- cmsg = CMSG_FIRSTHDR(&msg);
- EXPECT_NE(cmsg, NULL);
- EXPECT_EQ(cmsg->cmsg_level, SOL_TLS);
- EXPECT_EQ(cmsg->cmsg_type, TLS_GET_RECORD_TYPE);
- record_type = *((unsigned char *)CMSG_DATA(cmsg));
- EXPECT_EQ(record_type, 100);
+ EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, record_type,
+ buf, sizeof(buf), MSG_WAITALL),
+ send_len);
EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
}
@@ -1301,6 +1422,160 @@ TEST_F(tls, shutdown_reuse)
EXPECT_EQ(errno, EISCONN);
}
+FIXTURE(tls_err)
+{
+ int fd, cfd;
+ int fd2, cfd2;
+ bool notls;
+};
+
+FIXTURE_VARIANT(tls_err)
+{
+ uint16_t tls_version;
+};
+
+FIXTURE_VARIANT_ADD(tls_err, 12_aes_gcm)
+{
+ .tls_version = TLS_1_2_VERSION,
+};
+
+FIXTURE_VARIANT_ADD(tls_err, 13_aes_gcm)
+{
+ .tls_version = TLS_1_3_VERSION,
+};
+
+FIXTURE_SETUP(tls_err)
+{
+ struct tls_crypto_info_keys tls12;
+ int ret;
+
+ tls_crypto_info_init(variant->tls_version, TLS_CIPHER_AES_GCM_128,
+ &tls12);
+
+ ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls);
+ ulp_sock_pair(_metadata, &self->fd2, &self->cfd2, &self->notls);
+ if (self->notls)
+ return;
+
+ ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len);
+ ASSERT_EQ(ret, 0);
+
+ ret = setsockopt(self->cfd2, SOL_TLS, TLS_RX, &tls12, tls12.len);
+ ASSERT_EQ(ret, 0);
+}
+
+FIXTURE_TEARDOWN(tls_err)
+{
+ close(self->fd);
+ close(self->cfd);
+ close(self->fd2);
+ close(self->cfd2);
+}
+
+TEST_F(tls_err, bad_rec)
+{
+ char buf[64];
+
+ if (self->notls)
+ SKIP(return, "no TLS support");
+
+ memset(buf, 0x55, sizeof(buf));
+ EXPECT_EQ(send(self->fd2, buf, sizeof(buf), 0), sizeof(buf));
+ EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
+ EXPECT_EQ(errno, EMSGSIZE);
+ EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), MSG_DONTWAIT), -1);
+ EXPECT_EQ(errno, EAGAIN);
+}
+
+TEST_F(tls_err, bad_auth)
+{
+ char buf[128];
+ int n;
+
+ if (self->notls)
+ SKIP(return, "no TLS support");
+
+ memrnd(buf, sizeof(buf) / 2);
+ EXPECT_EQ(send(self->fd, buf, sizeof(buf) / 2, 0), sizeof(buf) / 2);
+ n = recv(self->cfd, buf, sizeof(buf), 0);
+ EXPECT_GT(n, sizeof(buf) / 2);
+
+ buf[n - 1]++;
+
+ EXPECT_EQ(send(self->fd2, buf, n, 0), n);
+ EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
+ EXPECT_EQ(errno, EBADMSG);
+ EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
+ EXPECT_EQ(errno, EBADMSG);
+}
+
+TEST_F(tls_err, bad_in_large_read)
+{
+ char txt[3][64];
+ char cip[3][128];
+ char buf[3 * 128];
+ int i, n;
+
+ if (self->notls)
+ SKIP(return, "no TLS support");
+
+ /* Put 3 records in the sockets */
+ for (i = 0; i < 3; i++) {
+ memrnd(txt[i], sizeof(txt[i]));
+ EXPECT_EQ(send(self->fd, txt[i], sizeof(txt[i]), 0),
+ sizeof(txt[i]));
+ n = recv(self->cfd, cip[i], sizeof(cip[i]), 0);
+ EXPECT_GT(n, sizeof(txt[i]));
+ /* Break the third message */
+ if (i == 2)
+ cip[2][n - 1]++;
+ EXPECT_EQ(send(self->fd2, cip[i], n, 0), n);
+ }
+
+ /* We should be able to receive the first two messages */
+ EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), sizeof(txt[0]) * 2);
+ EXPECT_EQ(memcmp(buf, txt[0], sizeof(txt[0])), 0);
+ EXPECT_EQ(memcmp(buf + sizeof(txt[0]), txt[1], sizeof(txt[1])), 0);
+ /* Third mesasge is bad */
+ EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
+ EXPECT_EQ(errno, EBADMSG);
+ EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
+ EXPECT_EQ(errno, EBADMSG);
+}
+
+TEST_F(tls_err, bad_cmsg)
+{
+ char *test_str = "test_read";
+ int send_len = 10;
+ char cip[128];
+ char buf[128];
+ char txt[64];
+ int n;
+
+ if (self->notls)
+ SKIP(return, "no TLS support");
+
+ /* Queue up one data record */
+ memrnd(txt, sizeof(txt));
+ EXPECT_EQ(send(self->fd, txt, sizeof(txt), 0), sizeof(txt));
+ n = recv(self->cfd, cip, sizeof(cip), 0);
+ EXPECT_GT(n, sizeof(txt));
+ EXPECT_EQ(send(self->fd2, cip, n, 0), n);
+
+ EXPECT_EQ(tls_send_cmsg(self->fd, 100, test_str, send_len, 0), 10);
+ n = recv(self->cfd, cip, sizeof(cip), 0);
+ cip[n - 1]++; /* Break it */
+ EXPECT_GT(n, send_len);
+ EXPECT_EQ(send(self->fd2, cip, n, 0), n);
+
+ EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), sizeof(txt));
+ EXPECT_EQ(memcmp(buf, txt, sizeof(txt)), 0);
+ EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
+ EXPECT_EQ(errno, EBADMSG);
+ EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
+ EXPECT_EQ(errno, EBADMSG);
+}
+
TEST(non_established) {
struct tls12_crypto_info_aes_gcm_256 tls12;
struct sockaddr_in addr;
@@ -1355,64 +1630,82 @@ TEST(non_established) {
TEST(keysizes) {
struct tls12_crypto_info_aes_gcm_256 tls12;
- struct sockaddr_in addr;
- int sfd, ret, fd, cfd;
- socklen_t len;
+ int ret, fd, cfd;
bool notls;
- notls = false;
- len = sizeof(addr);
-
memset(&tls12, 0, sizeof(tls12));
tls12.info.version = TLS_1_2_VERSION;
tls12.info.cipher_type = TLS_CIPHER_AES_GCM_256;
- addr.sin_family = AF_INET;
- addr.sin_addr.s_addr = htonl(INADDR_ANY);
- addr.sin_port = 0;
+ ulp_sock_pair(_metadata, &fd, &cfd, &notls);
- fd = socket(AF_INET, SOCK_STREAM, 0);
- sfd = socket(AF_INET, SOCK_STREAM, 0);
+ if (!notls) {
+ ret = setsockopt(fd, SOL_TLS, TLS_TX, &tls12,
+ sizeof(tls12));
+ EXPECT_EQ(ret, 0);
+
+ ret = setsockopt(cfd, SOL_TLS, TLS_RX, &tls12,
+ sizeof(tls12));
+ EXPECT_EQ(ret, 0);
+ }
+
+ close(fd);
+ close(cfd);
+}
+
+TEST(tls_v6ops) {
+ struct tls_crypto_info_keys tls12;
+ struct sockaddr_in6 addr, addr2;
+ int sfd, ret, fd;
+ socklen_t len, len2;
+
+ tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_128, &tls12);
+
+ addr.sin6_family = AF_INET6;
+ addr.sin6_addr = in6addr_any;
+ addr.sin6_port = 0;
+
+ fd = socket(AF_INET6, SOCK_STREAM, 0);
+ sfd = socket(AF_INET6, SOCK_STREAM, 0);
ret = bind(sfd, &addr, sizeof(addr));
ASSERT_EQ(ret, 0);
ret = listen(sfd, 10);
ASSERT_EQ(ret, 0);
+ len = sizeof(addr);
ret = getsockname(sfd, &addr, &len);
ASSERT_EQ(ret, 0);
ret = connect(fd, &addr, sizeof(addr));
ASSERT_EQ(ret, 0);
+ len = sizeof(addr);
+ ret = getsockname(fd, &addr, &len);
+ ASSERT_EQ(ret, 0);
+
ret = setsockopt(fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
- if (ret != 0) {
- notls = true;
- printf("Failure setting TCP_ULP, testing without tls\n");
+ if (ret) {
+ ASSERT_EQ(errno, ENOENT);
+ SKIP(return, "no TLS support");
}
+ ASSERT_EQ(ret, 0);
- if (!notls) {
- ret = setsockopt(fd, SOL_TLS, TLS_TX, &tls12,
- sizeof(tls12));
- EXPECT_EQ(ret, 0);
- }
+ ret = setsockopt(fd, SOL_TLS, TLS_TX, &tls12, tls12.len);
+ ASSERT_EQ(ret, 0);
- cfd = accept(sfd, &addr, &len);
- ASSERT_GE(cfd, 0);
+ ret = setsockopt(fd, SOL_TLS, TLS_RX, &tls12, tls12.len);
+ ASSERT_EQ(ret, 0);
- if (!notls) {
- ret = setsockopt(cfd, IPPROTO_TCP, TCP_ULP, "tls",
- sizeof("tls"));
- EXPECT_EQ(ret, 0);
+ len2 = sizeof(addr2);
+ ret = getsockname(fd, &addr2, &len2);
+ ASSERT_EQ(ret, 0);
- ret = setsockopt(cfd, SOL_TLS, TLS_RX, &tls12,
- sizeof(tls12));
- EXPECT_EQ(ret, 0);
- }
+ EXPECT_EQ(len2, len);
+ EXPECT_EQ(memcmp(&addr, &addr2, len), 0);
- close(sfd);
close(fd);
- close(cfd);
+ close(sfd);
}
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/toeplitz.c b/tools/testing/selftests/net/toeplitz.c
index 710ac956bdb3..c5489341cfb8 100644
--- a/tools/testing/selftests/net/toeplitz.c
+++ b/tools/testing/selftests/net/toeplitz.c
@@ -498,7 +498,7 @@ static void parse_opts(int argc, char **argv)
bool have_toeplitz = false;
int index, c;
- while ((c = getopt_long(argc, argv, "46C:d:i:k:r:stT:u:v", long_options, &index)) != -1) {
+ while ((c = getopt_long(argc, argv, "46C:d:i:k:r:stT:uv", long_options, &index)) != -1) {
switch (c) {
case '4':
cfg_family = AF_INET;
diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh
index 7f26591f236b..6f05e06f6761 100755
--- a/tools/testing/selftests/net/udpgro_fwd.sh
+++ b/tools/testing/selftests/net/udpgro_fwd.sh
@@ -132,7 +132,7 @@ run_test() {
local rcv=`ip netns exec $NS_DST $ipt"-save" -c | grep 'dport 8000' | \
sed -e 's/\[//' -e 's/:.*//'`
if [ $rcv != $pkts ]; then
- echo " fail - received $rvs packets, expected $pkts"
+ echo " fail - received $rcv packets, expected $pkts"
ret=1
return
fi
@@ -185,6 +185,7 @@ for family in 4 6; do
IPT=iptables
SUFFIX=24
VXDEV=vxlan
+ PING=ping
if [ $family = 6 ]; then
BM_NET=$BM_NET_V6
@@ -192,6 +193,8 @@ for family in 4 6; do
SUFFIX="64 nodad"
VXDEV=vxlan6
IPT=ip6tables
+ # Use ping6 on systems where ping doesn't handle IPv6
+ ping -w 1 -c 1 ::1 > /dev/null 2>&1 || PING="ping6"
fi
echo "IPv$family"
@@ -237,7 +240,7 @@ for family in 4 6; do
# load arp cache before running the test to reduce the amount of
# stray traffic on top of the UDP tunnel
- ip netns exec $NS_SRC ping -q -c 1 $OL_NET$DST_NAT >/dev/null
+ ip netns exec $NS_SRC $PING -q -c 1 $OL_NET$DST_NAT >/dev/null
run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 1 1 $OL_NET$DST
cleanup
diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c
index c66da6ffd6d8..7badaf215de2 100644
--- a/tools/testing/selftests/net/udpgso.c
+++ b/tools/testing/selftests/net/udpgso.c
@@ -156,13 +156,13 @@ struct testcase testcases_v4[] = {
},
{
/* send max number of min sized segments */
- .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4,
+ .tlen = UDP_MAX_SEGMENTS,
.gso_len = 1,
- .r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4,
+ .r_num_mss = UDP_MAX_SEGMENTS,
},
{
/* send max number + 1 of min sized segments: fail */
- .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4 + 1,
+ .tlen = UDP_MAX_SEGMENTS + 1,
.gso_len = 1,
.tfail = true,
},
@@ -259,13 +259,13 @@ struct testcase testcases_v6[] = {
},
{
/* send max number of min sized segments */
- .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6,
+ .tlen = UDP_MAX_SEGMENTS,
.gso_len = 1,
- .r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6,
+ .r_num_mss = UDP_MAX_SEGMENTS,
},
{
/* send max number + 1 of min sized segments: fail */
- .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6 + 1,
+ .tlen = UDP_MAX_SEGMENTS + 1,
.gso_len = 1,
.tfail = true,
},
diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c
index 17512a43885e..f1fdaa270291 100644
--- a/tools/testing/selftests/net/udpgso_bench_tx.c
+++ b/tools/testing/selftests/net/udpgso_bench_tx.c
@@ -419,6 +419,7 @@ static void usage(const char *filepath)
static void parse_opts(int argc, char **argv)
{
+ const char *bind_addr = NULL;
int max_len, hdrlen;
int c;
@@ -446,7 +447,7 @@ static void parse_opts(int argc, char **argv)
cfg_cpu = strtol(optarg, NULL, 0);
break;
case 'D':
- setup_sockaddr(cfg_family, optarg, &cfg_dst_addr);
+ bind_addr = optarg;
break;
case 'l':
cfg_runtime_ms = strtoul(optarg, NULL, 10) * 1000;
@@ -492,6 +493,11 @@ static void parse_opts(int argc, char **argv)
}
}
+ if (!bind_addr)
+ bind_addr = cfg_family == PF_INET6 ? "::" : "0.0.0.0";
+
+ setup_sockaddr(cfg_family, bind_addr, &cfg_dst_addr);
+
if (optind != argc)
usage(argv[0]);
diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile
index 8748199ac109..ffca314897c4 100644
--- a/tools/testing/selftests/netfilter/Makefile
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -5,7 +5,8 @@ TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \
conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \
nft_concat_range.sh nft_conntrack_helper.sh \
nft_queue.sh nft_meta.sh nf_nat_edemux.sh \
- ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh
+ ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh \
+ conntrack_vrf.sh
LDLIBS = -lmnl
TEST_GEN_FILES = nf-queue
diff --git a/tools/testing/selftests/netfilter/conntrack_vrf.sh b/tools/testing/selftests/netfilter/conntrack_vrf.sh
new file mode 100755
index 000000000000..8b5ea9234588
--- /dev/null
+++ b/tools/testing/selftests/netfilter/conntrack_vrf.sh
@@ -0,0 +1,241 @@
+#!/bin/sh
+
+# This script demonstrates interaction of conntrack and vrf.
+# The vrf driver calls the netfilter hooks again, with oif/iif
+# pointing at the VRF device.
+#
+# For ingress, this means first iteration has iifname of lower/real
+# device. In this script, thats veth0.
+# Second iteration is iifname set to vrf device, tvrf in this script.
+#
+# For egress, this is reversed: first iteration has the vrf device,
+# second iteration is done with the lower/real/veth0 device.
+#
+# test_ct_zone_in demonstrates unexpected change of nftables
+# behavior # caused by commit 09e856d54bda5f28 "vrf: Reset skb conntrack
+# connection on VRF rcv"
+#
+# It was possible to assign conntrack zone to a packet (or mark it for
+# `notracking`) in the prerouting chain before conntrack, based on real iif.
+#
+# After the change, the zone assignment is lost and the zone is assigned based
+# on the VRF master interface (in case such a rule exists).
+# assignment is lost. Instead, assignment based on the `iif` matching
+# Thus it is impossible to distinguish packets based on the original
+# interface.
+#
+# test_masquerade_vrf and test_masquerade_veth0 demonstrate the problem
+# that was supposed to be fixed by the commit mentioned above to make sure
+# that any fix to test case 1 won't break masquerade again.
+
+ksft_skip=4
+
+IP0=172.30.30.1
+IP1=172.30.30.2
+PFXL=30
+ret=0
+
+sfx=$(mktemp -u "XXXXXXXX")
+ns0="ns0-$sfx"
+ns1="ns1-$sfx"
+
+cleanup()
+{
+ ip netns pids $ns0 | xargs kill 2>/dev/null
+ ip netns pids $ns1 | xargs kill 2>/dev/null
+
+ ip netns del $ns0 $ns1
+}
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without nft tool"
+ exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+ip netns add "$ns0"
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not create net namespace $ns0"
+ exit $ksft_skip
+fi
+ip netns add "$ns1"
+
+trap cleanup EXIT
+
+ip netns exec $ns0 sysctl -q -w net.ipv4.conf.default.rp_filter=0
+ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0
+ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0
+
+ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not add veth device"
+ exit $ksft_skip
+fi
+
+ip -net $ns0 li add tvrf type vrf table 9876
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not add vrf device"
+ exit $ksft_skip
+fi
+
+ip -net $ns0 li set lo up
+
+ip -net $ns0 li set veth0 master tvrf
+ip -net $ns0 li set tvrf up
+ip -net $ns0 li set veth0 up
+ip -net $ns1 li set veth0 up
+
+ip -net $ns0 addr add $IP0/$PFXL dev veth0
+ip -net $ns1 addr add $IP1/$PFXL dev veth0
+
+ip netns exec $ns1 iperf3 -s > /dev/null 2>&1&
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not start iperf3"
+ exit $ksft_skip
+fi
+
+# test vrf ingress handling.
+# The incoming connection should be placed in conntrack zone 1,
+# as decided by the first iteration of the ruleset.
+test_ct_zone_in()
+{
+ip netns exec $ns0 nft -f - <<EOF
+table testct {
+ chain rawpre {
+ type filter hook prerouting priority raw;
+
+ iif { veth0, tvrf } counter meta nftrace set 1
+ iif veth0 counter ct zone set 1 counter return
+ iif tvrf counter ct zone set 2 counter return
+ ip protocol icmp counter
+ notrack counter
+ }
+
+ chain rawout {
+ type filter hook output priority raw;
+
+ oif veth0 counter ct zone set 1 counter return
+ oif tvrf counter ct zone set 2 counter return
+ notrack counter
+ }
+}
+EOF
+ ip netns exec $ns1 ping -W 1 -c 1 -I veth0 $IP0 > /dev/null
+
+ # should be in zone 1, not zone 2
+ count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 1 2>/dev/null | wc -l)
+ if [ $count -eq 1 ]; then
+ echo "PASS: entry found in conntrack zone 1"
+ else
+ echo "FAIL: entry not found in conntrack zone 1"
+ count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 2 2> /dev/null | wc -l)
+ if [ $count -eq 1 ]; then
+ echo "FAIL: entry found in zone 2 instead"
+ else
+ echo "FAIL: entry not in zone 1 or 2, dumping table"
+ ip netns exec $ns0 conntrack -L
+ ip netns exec $ns0 nft list ruleset
+ fi
+ fi
+}
+
+# add masq rule that gets evaluated w. outif set to vrf device.
+# This tests the first iteration of the packet through conntrack,
+# oifname is the vrf device.
+test_masquerade_vrf()
+{
+ local qdisc=$1
+
+ if [ "$qdisc" != "default" ]; then
+ tc -net $ns0 qdisc add dev tvrf root $qdisc
+ fi
+
+ ip netns exec $ns0 conntrack -F 2>/dev/null
+
+ip netns exec $ns0 nft -f - <<EOF
+flush ruleset
+table ip nat {
+ chain rawout {
+ type filter hook output priority raw;
+
+ oif tvrf ct state untracked counter
+ }
+ chain postrouting2 {
+ type filter hook postrouting priority mangle;
+
+ oif tvrf ct state untracked counter
+ }
+ chain postrouting {
+ type nat hook postrouting priority 0;
+ # NB: masquerade should always be combined with 'oif(name) bla',
+ # lack of this is intentional here, we want to exercise double-snat.
+ ip saddr 172.30.30.0/30 counter masquerade random
+ }
+}
+EOF
+ ip netns exec $ns0 ip vrf exec tvrf iperf3 -t 1 -c $IP1 >/dev/null
+ if [ $? -ne 0 ]; then
+ echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on vrf device"
+ ret=1
+ return
+ fi
+
+ # must also check that nat table was evaluated on second (lower device) iteration.
+ ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2' &&
+ ip netns exec $ns0 nft list table ip nat |grep -q 'untracked counter packets [1-9]'
+ if [ $? -eq 0 ]; then
+ echo "PASS: iperf3 connect with masquerade + sport rewrite on vrf device ($qdisc qdisc)"
+ else
+ echo "FAIL: vrf rules have unexpected counter value"
+ ret=1
+ fi
+
+ if [ "$qdisc" != "default" ]; then
+ tc -net $ns0 qdisc del dev tvrf root
+ fi
+}
+
+# add masq rule that gets evaluated w. outif set to veth device.
+# This tests the 2nd iteration of the packet through conntrack,
+# oifname is the lower device (veth0 in this case).
+test_masquerade_veth()
+{
+ ip netns exec $ns0 conntrack -F 2>/dev/null
+ip netns exec $ns0 nft -f - <<EOF
+flush ruleset
+table ip nat {
+ chain postrouting {
+ type nat hook postrouting priority 0;
+ meta oif veth0 ip saddr 172.30.30.0/30 counter masquerade random
+ }
+}
+EOF
+ ip netns exec $ns0 ip vrf exec tvrf iperf3 -t 1 -c $IP1 > /dev/null
+ if [ $? -ne 0 ]; then
+ echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on veth device"
+ ret=1
+ return
+ fi
+
+ # must also check that nat table was evaluated on second (lower device) iteration.
+ ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2'
+ if [ $? -eq 0 ]; then
+ echo "PASS: iperf3 connect with masquerade + sport rewrite on veth device"
+ else
+ echo "FAIL: vrf masq rule has unexpected counter value"
+ ret=1
+ fi
+}
+
+test_ct_zone_in
+test_masquerade_vrf "default"
+test_masquerade_vrf "pfifo"
+test_masquerade_veth
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh b/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh
index 4a6f5c3b3215..eb9553e4986b 100755
--- a/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh
+++ b/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh
@@ -41,7 +41,7 @@ checktool (){
checktool "iptables --version" "run test without iptables"
checktool "ip -Version" "run test without ip tool"
-checktool "which nc" "run test without nc (netcat)"
+checktool "which socat" "run test without socat"
checktool "ip netns add ${r_a}" "create net namespace"
for n in ${r_b} ${r_w} ${c_a} ${c_b};do
@@ -60,11 +60,12 @@ trap cleanup EXIT
test_path() {
msg="$1"
- ip netns exec ${c_b} nc -n -w 3 -q 3 -u -l -p 5000 > ${rx} < /dev/null &
+ ip netns exec ${c_b} socat -t 3 - udp4-listen:5000,reuseaddr > ${rx} < /dev/null &
sleep 1
for i in 1 2 3; do
- head -c1400 /dev/zero | tr "\000" "a" | ip netns exec ${c_a} nc -n -w 1 -u 192.168.20.2 5000
+ head -c1400 /dev/zero | tr "\000" "a" | \
+ ip netns exec ${c_a} socat -t 1 -u STDIN UDP:192.168.20.2:5000
done
wait
@@ -189,7 +190,7 @@ ip netns exec ${r_w} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
#---------------------
#Now we send a 1400 bytes UDP packet from Client A to Client B:
-# clienta:~# head -c1400 /dev/zero | tr "\000" "a" | nc -u 192.168.20.2 5000
+# clienta:~# head -c1400 /dev/zero | tr "\000" "a" | socat -u STDIN UDP:192.168.20.2:5000
test_path "without"
# The IPv4 stack on Client A already knows the PMTU to Client B, so the
diff --git a/tools/testing/selftests/netfilter/nf_nat_edemux.sh b/tools/testing/selftests/netfilter/nf_nat_edemux.sh
index cfee3b65be0f..1092bbcb1fba 100755
--- a/tools/testing/selftests/netfilter/nf_nat_edemux.sh
+++ b/tools/testing/selftests/netfilter/nf_nat_edemux.sh
@@ -76,23 +76,23 @@ ip netns exec $ns2 ip route add 10.96.0.1 via 192.168.1.1
sleep 1
# add a persistent connection from the other namespace
-ip netns exec $ns2 nc -q 10 -w 10 192.168.1.1 5201 > /dev/null &
+ip netns exec $ns2 socat -t 10 - TCP:192.168.1.1:5201 > /dev/null &
sleep 1
# ip daddr:dport will be rewritten to 192.168.1.1 5201
# NAT must reallocate source port 10000 because
# 192.168.1.2:10000 -> 192.168.1.1:5201 is already in use
-echo test | ip netns exec $ns2 nc -w 3 -q 3 10.96.0.1 443 >/dev/null
+echo test | ip netns exec $ns2 socat -t 3 -u STDIN TCP:10.96.0.1:443 >/dev/null
ret=$?
kill $iperfs
-# Check nc can connect to 10.96.0.1:443 (aka 192.168.1.1:5201).
+# Check socat can connect to 10.96.0.1:443 (aka 192.168.1.1:5201).
if [ $ret -eq 0 ]; then
- echo "PASS: nc can connect via NAT'd address"
+ echo "PASS: socat can connect via NAT'd address"
else
- echo "FAIL: nc cannot connect via NAT'd address"
+ echo "FAIL: socat cannot connect via NAT'd address"
exit 1
fi
diff --git a/tools/testing/selftests/netfilter/nft_concat_range.sh b/tools/testing/selftests/netfilter/nft_concat_range.sh
index 5a4938d6dcf2..ed61f6cab60f 100755
--- a/tools/testing/selftests/netfilter/nft_concat_range.sh
+++ b/tools/testing/selftests/netfilter/nft_concat_range.sh
@@ -23,8 +23,8 @@ TESTS="reported_issues correctness concurrency timeout"
# Set types, defined by TYPE_ variables below
TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto
- net_port_net net_mac net_mac_icmp net6_mac_icmp net6_port_net6_port
- net_port_mac_proto_net"
+ net_port_net net_mac mac_net net_mac_icmp net6_mac_icmp
+ net6_port_net6_port net_port_mac_proto_net"
# Reported bugs, also described by TYPE_ variables below
BUGS="flush_remove_add"
@@ -277,6 +277,23 @@ perf_entries 1000
perf_proto ipv4
"
+TYPE_mac_net="
+display mac,net
+type_spec ether_addr . ipv4_addr
+chain_spec ether saddr . ip saddr
+dst
+src mac addr4
+start 1
+count 5
+src_delta 2000
+tools sendip nc bash
+proto udp
+
+race_repeat 0
+
+perf_duration 0
+"
+
TYPE_net_mac_icmp="
display net,mac - ICMP
type_spec ipv4_addr . ether_addr
@@ -984,7 +1001,8 @@ format() {
fi
done
for f in ${src}; do
- __expr="${__expr} . "
+ [ "${__expr}" != "{ " ] && __expr="${__expr} . "
+
__start="$(eval format_"${f}" "${srcstart}")"
__end="$(eval format_"${f}" "${srcend}")"
diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh
index da1c1e4b6c86..349a319a9e51 100755
--- a/tools/testing/selftests/netfilter/nft_nat.sh
+++ b/tools/testing/selftests/netfilter/nft_nat.sh
@@ -759,19 +759,21 @@ test_port_shadow()
local result=""
local logmsg=""
- echo ROUTER | ip netns exec "$ns0" nc -w 5 -u -l -p 1405 >/dev/null 2>&1 &
- nc_r=$!
+ # make shadow entry, from client (ns2), going to (ns1), port 41404, sport 1405.
+ echo "fake-entry" | ip netns exec "$ns2" timeout 1 socat -u STDIN UDP:"$daddrc":41404,sourceport=1405
- echo CLIENT | ip netns exec "$ns2" nc -w 5 -u -l -p 1405 >/dev/null 2>&1 &
- nc_c=$!
+ echo ROUTER | ip netns exec "$ns0" timeout 5 socat -u STDIN UDP4-LISTEN:1405 &
+ sc_r=$!
- # make shadow entry, from client (ns2), going to (ns1), port 41404, sport 1405.
- echo "fake-entry" | ip netns exec "$ns2" nc -w 1 -p 1405 -u "$daddrc" 41404 > /dev/null
+ echo CLIENT | ip netns exec "$ns2" timeout 5 socat -u STDIN UDP4-LISTEN:1405,reuseport &
+ sc_c=$!
+
+ sleep 0.3
# ns1 tries to connect to ns0:1405. With default settings this should connect
# to client, it matches the conntrack entry created above.
- result=$(echo "" | ip netns exec "$ns1" nc -w 1 -p 41404 -u "$daddrs" 1405)
+ result=$(echo "data" | ip netns exec "$ns1" timeout 1 socat - UDP:"$daddrs":1405,sourceport=41404)
if [ "$result" = "$expect" ] ;then
echo "PASS: portshadow test $test: got reply from ${expect}${logmsg}"
@@ -780,7 +782,7 @@ test_port_shadow()
ret=1
fi
- kill $nc_r $nc_c 2>/dev/null
+ kill $sc_r $sc_c 2>/dev/null
# flush udp entries for next test round, if any
ip netns exec "$ns0" conntrack -F >/dev/null 2>&1
@@ -816,11 +818,10 @@ table $family raw {
chain prerouting {
type filter hook prerouting priority -300; policy accept;
meta iif veth0 udp dport 1405 notrack
- udp dport 1405 notrack
}
chain output {
type filter hook output priority -300; policy accept;
- udp sport 1405 notrack
+ meta oif veth0 udp sport 1405 notrack
}
}
EOF
@@ -851,6 +852,18 @@ test_port_shadowing()
{
local family="ip"
+ conntrack -h >/dev/null 2>&1
+ if [ $? -ne 0 ];then
+ echo "SKIP: Could not run nat port shadowing test without conntrack tool"
+ return
+ fi
+
+ socat -h > /dev/null 2>&1
+ if [ $? -ne 0 ];then
+ echo "SKIP: Could not run nat port shadowing test without socat tool"
+ return
+ fi
+
ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
@@ -867,8 +880,9 @@ EOF
return $ksft_skip
fi
- # test default behaviour. Packet from ns1 to ns0 is redirected to ns2.
- test_port_shadow "default" "CLIENT"
+ # test default behaviour. Packet from ns1 to ns0 is not redirected
+ # due to automatic port translation.
+ test_port_shadow "default" "ROUTER"
# test packet filter based mitigation: prevent forwarding of
# packets claiming to come from the service port.
diff --git a/tools/testing/selftests/netfilter/nft_queue.sh b/tools/testing/selftests/netfilter/nft_queue.sh
index 3d202b90b33d..7d27f1f3bc01 100755
--- a/tools/testing/selftests/netfilter/nft_queue.sh
+++ b/tools/testing/selftests/netfilter/nft_queue.sh
@@ -16,6 +16,10 @@ timeout=4
cleanup()
{
+ ip netns pids ${ns1} | xargs kill 2>/dev/null
+ ip netns pids ${ns2} | xargs kill 2>/dev/null
+ ip netns pids ${nsrouter} | xargs kill 2>/dev/null
+
ip netns del ${ns1}
ip netns del ${ns2}
ip netns del ${nsrouter}
@@ -332,6 +336,55 @@ EOF
echo "PASS: tcp via loopback and re-queueing"
}
+test_icmp_vrf() {
+ ip -net $ns1 link add tvrf type vrf table 9876
+ if [ $? -ne 0 ];then
+ echo "SKIP: Could not add vrf device"
+ return
+ fi
+
+ ip -net $ns1 li set eth0 master tvrf
+ ip -net $ns1 li set tvrf up
+
+ ip -net $ns1 route add 10.0.2.0/24 via 10.0.1.1 dev eth0 table 9876
+ip netns exec ${ns1} nft -f /dev/stdin <<EOF
+flush ruleset
+table inet filter {
+ chain output {
+ type filter hook output priority 0; policy accept;
+ meta oifname "tvrf" icmp type echo-request counter queue num 1
+ meta oifname "eth0" icmp type echo-request counter queue num 1
+ }
+ chain post {
+ type filter hook postrouting priority 0; policy accept;
+ meta oifname "tvrf" icmp type echo-request counter queue num 1
+ meta oifname "eth0" icmp type echo-request counter queue num 1
+ }
+}
+EOF
+ ip netns exec ${ns1} ./nf-queue -q 1 -t $timeout &
+ local nfqpid=$!
+
+ sleep 1
+ ip netns exec ${ns1} ip vrf exec tvrf ping -c 1 10.0.2.99 > /dev/null
+
+ for n in output post; do
+ for d in tvrf eth0; do
+ ip netns exec ${ns1} nft list chain inet filter $n | grep -q "oifname \"$d\" icmp type echo-request counter packets 1"
+ if [ $? -ne 0 ] ; then
+ echo "FAIL: chain $n: icmp packet counter mismatch for device $d" 1>&2
+ ip netns exec ${ns1} nft list ruleset
+ ret=1
+ return
+ fi
+ done
+ done
+
+ wait $nfqpid
+ [ $? -eq 0 ] && echo "PASS: icmp+nfqueue via vrf"
+ wait 2>/dev/null
+}
+
ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
@@ -372,5 +425,6 @@ test_queue 20
test_tcp_forward
test_tcp_localhost
test_tcp_localhost_requeue
+test_icmp_vrf
exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_zones_many.sh b/tools/testing/selftests/netfilter/nft_zones_many.sh
index ac646376eb01..04633119b29a 100755
--- a/tools/testing/selftests/netfilter/nft_zones_many.sh
+++ b/tools/testing/selftests/netfilter/nft_zones_many.sh
@@ -18,11 +18,17 @@ cleanup()
ip netns del $ns
}
-ip netns add $ns
-if [ $? -ne 0 ];then
- echo "SKIP: Could not create net namespace $gw"
- exit $ksft_skip
-fi
+checktool (){
+ if ! $1 > /dev/null 2>&1; then
+ echo "SKIP: Could not $2"
+ exit $ksft_skip
+ fi
+}
+
+checktool "nft --version" "run test without nft tool"
+checktool "ip -Version" "run test without ip tool"
+checktool "socat -V" "run test without socat tool"
+checktool "ip netns add $ns" "create net namespace"
trap cleanup EXIT
@@ -71,7 +77,8 @@ EOF
local start=$(date +%s%3N)
i=$((i + 10000))
j=$((j + 1))
- dd if=/dev/zero of=/dev/stdout bs=8k count=10000 2>/dev/null | ip netns exec "$ns" nc -w 1 -q 1 -u -p 12345 127.0.0.1 12345 > /dev/null
+ # nft rule in output places each packet in a different zone.
+ dd if=/dev/zero of=/dev/stdout bs=8k count=10000 2>/dev/null | ip netns exec "$ns" socat STDIN UDP:127.0.0.1:12345,sourceport=12345
if [ $? -ne 0 ] ;then
ret=1
break
diff --git a/tools/testing/selftests/powerpc/security/mitigation-patching.sh b/tools/testing/selftests/powerpc/security/mitigation-patching.sh
index b0b20e0b4e30..f43aa4b77fba 100755
--- a/tools/testing/selftests/powerpc/security/mitigation-patching.sh
+++ b/tools/testing/selftests/powerpc/security/mitigation-patching.sh
@@ -44,7 +44,10 @@ mitigations="barrier_nospec stf_barrier count_cache_flush rfi_flush entry_flush
for m in $mitigations
do
- do_one "$m" &
+ if [[ -f /sys/kernel/debug/powerpc/$m ]]
+ then
+ do_one "$m" &
+ fi
done
echo "Spawned threads enabling/disabling mitigations ..."
diff --git a/tools/testing/selftests/powerpc/security/spectre_v2.c b/tools/testing/selftests/powerpc/security/spectre_v2.c
index adc2b7294e5f..83647b8277e7 100644
--- a/tools/testing/selftests/powerpc/security/spectre_v2.c
+++ b/tools/testing/selftests/powerpc/security/spectre_v2.c
@@ -193,7 +193,7 @@ int spectre_v2_test(void)
* We are not vulnerable and reporting otherwise, so
* missing such a mismatch is safe.
*/
- if (state == VULNERABLE)
+ if (miss_percent > 95)
return 4;
return 1;
diff --git a/tools/testing/selftests/powerpc/signal/.gitignore b/tools/testing/selftests/powerpc/signal/.gitignore
index ce3375cd8e73..9d0915777fed 100644
--- a/tools/testing/selftests/powerpc/signal/.gitignore
+++ b/tools/testing/selftests/powerpc/signal/.gitignore
@@ -4,3 +4,5 @@ signal_tm
sigfuz
sigreturn_vdso
sig_sc_double_restart
+sigreturn_kernel
+sigreturn_unaligned
diff --git a/tools/testing/selftests/powerpc/signal/Makefile b/tools/testing/selftests/powerpc/signal/Makefile
index d6ae54663aed..f679d260afc8 100644
--- a/tools/testing/selftests/powerpc/signal/Makefile
+++ b/tools/testing/selftests/powerpc/signal/Makefile
@@ -1,5 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
TEST_GEN_PROGS := signal signal_tm sigfuz sigreturn_vdso sig_sc_double_restart
+TEST_GEN_PROGS += sigreturn_kernel
+TEST_GEN_PROGS += sigreturn_unaligned
CFLAGS += -maltivec
$(OUTPUT)/signal_tm: CFLAGS += -mhtm
diff --git a/tools/testing/selftests/powerpc/signal/sigreturn_kernel.c b/tools/testing/selftests/powerpc/signal/sigreturn_kernel.c
new file mode 100644
index 000000000000..0a1b6e591eee
--- /dev/null
+++ b/tools/testing/selftests/powerpc/signal/sigreturn_kernel.c
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test that we can't sigreturn to kernel addresses, or to kernel mode.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "utils.h"
+
+#define MSR_PR (1ul << 14)
+
+static volatile unsigned long long sigreturn_addr;
+static volatile unsigned long long sigreturn_msr_mask;
+
+static void sigusr1_handler(int signo, siginfo_t *si, void *uc_ptr)
+{
+ ucontext_t *uc = (ucontext_t *)uc_ptr;
+
+ if (sigreturn_addr)
+ UCONTEXT_NIA(uc) = sigreturn_addr;
+
+ if (sigreturn_msr_mask)
+ UCONTEXT_MSR(uc) &= sigreturn_msr_mask;
+}
+
+static pid_t fork_child(void)
+{
+ pid_t pid;
+
+ pid = fork();
+ if (pid == 0) {
+ raise(SIGUSR1);
+ exit(0);
+ }
+
+ return pid;
+}
+
+static int expect_segv(pid_t pid)
+{
+ int child_ret;
+
+ waitpid(pid, &child_ret, 0);
+ FAIL_IF(WIFEXITED(child_ret));
+ FAIL_IF(!WIFSIGNALED(child_ret));
+ FAIL_IF(WTERMSIG(child_ret) != 11);
+
+ return 0;
+}
+
+int test_sigreturn_kernel(void)
+{
+ struct sigaction act;
+ int child_ret, i;
+ pid_t pid;
+
+ act.sa_sigaction = sigusr1_handler;
+ act.sa_flags = SA_SIGINFO;
+ sigemptyset(&act.sa_mask);
+
+ FAIL_IF(sigaction(SIGUSR1, &act, NULL));
+
+ for (i = 0; i < 2; i++) {
+ // Return to kernel
+ sigreturn_addr = 0xcull << 60;
+ pid = fork_child();
+ expect_segv(pid);
+
+ // Return to kernel virtual
+ sigreturn_addr = 0xc008ull << 48;
+ pid = fork_child();
+ expect_segv(pid);
+
+ // Return out of range
+ sigreturn_addr = 0xc010ull << 48;
+ pid = fork_child();
+ expect_segv(pid);
+
+ // Return to no-man's land, just below PAGE_OFFSET
+ sigreturn_addr = (0xcull << 60) - (64 * 1024);
+ pid = fork_child();
+ expect_segv(pid);
+
+ // Return to no-man's land, above TASK_SIZE_4PB
+ sigreturn_addr = 0x1ull << 52;
+ pid = fork_child();
+ expect_segv(pid);
+
+ // Return to 0xd space
+ sigreturn_addr = 0xdull << 60;
+ pid = fork_child();
+ expect_segv(pid);
+
+ // Return to 0xe space
+ sigreturn_addr = 0xeull << 60;
+ pid = fork_child();
+ expect_segv(pid);
+
+ // Return to 0xf space
+ sigreturn_addr = 0xfull << 60;
+ pid = fork_child();
+ expect_segv(pid);
+
+ // Attempt to set PR=0 for 2nd loop (should be blocked by kernel)
+ sigreturn_msr_mask = ~MSR_PR;
+ }
+
+ printf("All children killed as expected\n");
+
+ // Don't change address, just MSR, should return to user as normal
+ sigreturn_addr = 0;
+ sigreturn_msr_mask = ~MSR_PR;
+ pid = fork_child();
+ waitpid(pid, &child_ret, 0);
+ FAIL_IF(!WIFEXITED(child_ret));
+ FAIL_IF(WIFSIGNALED(child_ret));
+ FAIL_IF(WEXITSTATUS(child_ret) != 0);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(test_sigreturn_kernel, "sigreturn_kernel");
+}
diff --git a/tools/testing/selftests/powerpc/signal/sigreturn_unaligned.c b/tools/testing/selftests/powerpc/signal/sigreturn_unaligned.c
new file mode 100644
index 000000000000..6e58ee4f0fdf
--- /dev/null
+++ b/tools/testing/selftests/powerpc/signal/sigreturn_unaligned.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test sigreturn to an unaligned address, ie. low 2 bits set.
+ * Nothing bad should happen.
+ * This was able to trigger warnings with CONFIG_PPC_RFI_SRR_DEBUG=y.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ucontext.h>
+#include <unistd.h>
+
+#include "utils.h"
+
+
+static void sigusr1_handler(int signo, siginfo_t *info, void *ptr)
+{
+ ucontext_t *uc = ptr;
+
+ UCONTEXT_NIA(uc) |= 3;
+}
+
+static int test_sigreturn_unaligned(void)
+{
+ struct sigaction action;
+
+ memset(&action, 0, sizeof(action));
+ action.sa_sigaction = sigusr1_handler;
+ action.sa_flags = SA_SIGINFO;
+
+ FAIL_IF(sigaction(SIGUSR1, &action, NULL) == -1);
+
+ raise(SIGUSR1);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(test_sigreturn_unaligned, "sigreturn_unaligned");
+}
diff --git a/tools/testing/selftests/ptp/testptp.c b/tools/testing/selftests/ptp/testptp.c
index f7911aaeb007..c0f6a062364d 100644
--- a/tools/testing/selftests/ptp/testptp.c
+++ b/tools/testing/selftests/ptp/testptp.c
@@ -354,6 +354,18 @@ int main(int argc, char *argv[])
}
}
+ if (pin_index >= 0) {
+ memset(&desc, 0, sizeof(desc));
+ desc.index = pin_index;
+ desc.func = pin_func;
+ desc.chan = index;
+ if (ioctl(fd, PTP_PIN_SETFUNC, &desc)) {
+ perror("PTP_PIN_SETFUNC");
+ } else {
+ puts("set pin function okay");
+ }
+ }
+
if (extts) {
memset(&extts_request, 0, sizeof(extts_request));
extts_request.index = index;
@@ -444,18 +456,6 @@ int main(int argc, char *argv[])
}
}
- if (pin_index >= 0) {
- memset(&desc, 0, sizeof(desc));
- desc.index = pin_index;
- desc.func = pin_func;
- desc.chan = index;
- if (ioctl(fd, PTP_PIN_SETFUNC, &desc)) {
- perror("PTP_PIN_SETFUNC");
- } else {
- puts("set pin function okay");
- }
- }
-
if (pps != -1) {
int enable = pps ? 1 : 0;
if (ioctl(fd, PTP_ENABLE_PPS, enable)) {
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
index daf64b507038..2e9e9e2eedb6 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
@@ -30,9 +30,9 @@ editor=${EDITOR-vi}
files=
for i in ${rundir}/*/Make.out
do
- if egrep -q "error:|warning:" < $i
+ if egrep -q "error:|warning:|^ld: .*undefined reference to" < $i
then
- egrep "error:|warning:" < $i > $i.diags
+ egrep "error:|warning:|^ld: .*undefined reference to" < $i > $i.diags
files="$files $i.diags $i"
fi
done
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
index fbdf162b6acd..1c4c2c727dad 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
@@ -25,7 +25,7 @@ stopstate="`grep 'End-test grace-period state: g' $i/console.log 2> /dev/null |
tail -1 | sed -e 's/^\[[ 0-9.]*] //' |
awk '{ print \"[\" $1 \" \" $5 \" \" $6 \" \" $7 \"]\"; }' |
tr -d '\012\015'`"
-fwdprog="`grep 'rcu_torture_fwd_prog_cr Duration' $i/console.log 2> /dev/null | sed -e 's/^\[[^]]*] //' | sort -k15nr | head -1 | awk '{ print $14 " " $15 }'`"
+fwdprog="`grep 'rcu_torture_fwd_prog n_max_cbs: ' $i/console.log 2> /dev/null | sed -e 's/^\[[^]]*] //' | sort -k3nr | head -1 | awk '{ print $2 " " $3 }'`"
if test -z "$ngps"
then
echo "$configfile ------- " $stopstate
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
index c7d42ef80c53..e09b1bc78708 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
@@ -144,7 +144,7 @@ do
if test "$ret" -ne 0
then
echo System $i unreachable, giving up. | tee -a "$oldrun/remote-log"
- exit 4 | tee -a "$oldrun/remote-log"
+ exit 4
fi
done
@@ -157,8 +157,15 @@ do
ret=$?
if test "$ret" -ne 0
then
- echo Unable to download $T/binres.tgz to system $i, giving up. | tee -a "$oldrun/remote-log"
- exit 10 | tee -a "$oldrun/remote-log"
+ echo Unable to download $T/binres.tgz to system $i, waiting and then retrying. | tee -a "$oldrun/remote-log"
+ sleep 60
+ cat $T/binres.tgz | ssh $i "cd /tmp; tar -xzf -"
+ ret=$?
+ if test "$ret" -ne 0
+ then
+ echo Unable to download $T/binres.tgz to system $i, giving up. | tee -a "$oldrun/remote-log"
+ exit 10
+ fi
fi
done
@@ -177,16 +184,16 @@ checkremotefile () {
ret=$?
if test "$ret" -eq 255
then
- echo " ---" ssh failure to $1 checking for file $2, retry after $sleeptime seconds. `date`
+ echo " ---" ssh failure to $1 checking for file $2, retry after $sleeptime seconds. `date` | tee -a "$oldrun/remote-log"
elif test "$ret" -eq 0
then
return 0
elif test "$ret" -eq 1
then
- echo " ---" File \"$2\" not found: ssh $1 test -f \"$2\"
+ echo " ---" File \"$2\" not found: ssh $1 test -f \"$2\" | tee -a "$oldrun/remote-log"
return 1
else
- echo " ---" Exit code $ret: ssh $1 test -f \"$2\", retry after $sleeptime seconds. `date`
+ echo " ---" Exit code $ret: ssh $1 test -f \"$2\", retry after $sleeptime seconds. `date` | tee -a "$oldrun/remote-log"
return $ret
fi
sleep $sleeptime
@@ -245,7 +252,7 @@ do
sleep 30
fi
done
-echo All batches started. `date`
+echo All batches started. `date` | tee -a "$oldrun/remote-log"
# Wait for all remaining scenarios to complete and collect results.
for i in $systems
@@ -254,7 +261,7 @@ do
do
sleep 30
done
- echo " ---" Collecting results from $i `date`
+ echo " ---" Collecting results from $i `date` | tee -a "$oldrun/remote-log"
( cd "$oldrun"; ssh $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu[_-]pid */qemu-retval */qemu-affinity; rm -rf $T > /dev/null 2>&1" | tar -xzf - )
done
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 6cf9ec6a3d1c..6de0c183db5b 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -74,7 +74,9 @@ usage () {
echo " --help"
echo " --interactive"
echo " --jitter N [ maxsleep (us) [ maxspin (us) ] ]"
+ echo " --kasan"
echo " --kconfig Kconfig-options"
+ echo " --kcsan"
echo " --kmake-arg kernel-make-arguments"
echo " --mac nn:nn:nn:nn:nn:nn"
echo " --memory megabytes|nnnG"
@@ -83,6 +85,7 @@ usage () {
echo " --qemu-cmd qemu-system-..."
echo " --remote"
echo " --results absolute-pathname"
+ echo " --shutdown-grace seconds"
echo " --torture lock|rcu|rcuscale|refscale|scf"
echo " --trust-make"
exit 1
@@ -175,14 +178,14 @@ do
jitter="$2"
shift
;;
+ --kasan)
+ TORTURE_KCONFIG_KASAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KASAN=y"; export TORTURE_KCONFIG_KASAN_ARG
+ ;;
--kconfig|--kconfigs)
checkarg --kconfig "(Kconfig options)" $# "$2" '^CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\)\( CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\)\)*$' '^error$'
TORTURE_KCONFIG_ARG="`echo "$TORTURE_KCONFIG_ARG $2" | sed -e 's/^ *//' -e 's/ *$//'`"
shift
;;
- --kasan)
- TORTURE_KCONFIG_KASAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KASAN=y"; export TORTURE_KCONFIG_KASAN_ARG
- ;;
--kcsan)
TORTURE_KCONFIG_KCSAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KCSAN=y CONFIG_KCSAN_STRICT=y CONFIG_KCSAN_REPORT_ONCE_IN_MS=100000 CONFIG_KCSAN_VERBOSE=y CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y"; export TORTURE_KCONFIG_KCSAN_ARG
;;
diff --git a/tools/testing/selftests/rcutorture/bin/parse-build.sh b/tools/testing/selftests/rcutorture/bin/parse-build.sh
index 9313e5065ae9..2dbfca3589b1 100755
--- a/tools/testing/selftests/rcutorture/bin/parse-build.sh
+++ b/tools/testing/selftests/rcutorture/bin/parse-build.sh
@@ -39,7 +39,8 @@ fi
grep warning: < $F > $T/warnings
grep "include/linux/*rcu*\.h:" $T/warnings > $T/hwarnings
grep "kernel/rcu/[^/]*:" $T/warnings > $T/cwarnings
-cat $T/hwarnings $T/cwarnings > $T/rcuwarnings
+grep "^ld: .*undefined reference to" $T/warnings | head -1 > $T/ldwarnings
+cat $T/hwarnings $T/cwarnings $T/ldwarnings > $T/rcuwarnings
if test -s $T/rcuwarnings
then
print_warning $title build errors:
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-T b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-T
index d6557c38dfe4..c70cf0405f24 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-T
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-T
@@ -2,6 +2,7 @@ CONFIG_SMP=n
CONFIG_PREEMPT_NONE=y
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
#CHECK#CONFIG_TINY_SRCU=y
CONFIG_RCU_TRACE=n
CONFIG_DEBUG_LOCK_ALLOC=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-U b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-U
index 6bc24e99862f..bc9eeabaa1b1 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-U
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-U
@@ -2,6 +2,7 @@ CONFIG_SMP=n
CONFIG_PREEMPT_NONE=y
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
#CHECK#CONFIG_TINY_SRCU=y
CONFIG_RCU_TRACE=n
CONFIG_DEBUG_LOCK_ALLOC=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS01.boot b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01.boot
index 22cdeced98ea..30ca5b493c4b 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS01.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01.boot
@@ -1,2 +1,3 @@
rcutorture.torture_type=tasks
rcutree.use_softirq=0
+rcupdate.rcu_task_enqueue_lim=4
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TINY01 b/tools/testing/selftests/rcutorture/configs/rcu/TINY01
index 6db705e55487..0953c52fcfd7 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TINY01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TINY01
@@ -2,6 +2,7 @@ CONFIG_SMP=n
CONFIG_PREEMPT_NONE=y
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
#CHECK#CONFIG_TINY_RCU=y
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TINY02 b/tools/testing/selftests/rcutorture/configs/rcu/TINY02
index d8674264318d..30439f6fc20e 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TINY02
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TINY02
@@ -2,6 +2,7 @@ CONFIG_SMP=n
CONFIG_PREEMPT_NONE=y
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
#CHECK#CONFIG_TINY_RCU=y
CONFIG_HZ_PERIODIC=y
CONFIG_NO_HZ_IDLE=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01.boot b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01.boot
index 9675ad632dcc..ba6d636a4856 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01.boot
@@ -1 +1,2 @@
rcutorture.torture_type=tasks-tracing
+rcupdate.rcu_task_enqueue_lim=2
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE02.boot b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02.boot
index 9675ad632dcc..c70b5db6c2ae 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TRACE02.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02.boot
@@ -1 +1,2 @@
rcutorture.torture_type=tasks-tracing
+rcutorture.fwd_progress=2
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01 b/tools/testing/selftests/rcutorture/configs/rcu/TREE01
index b5b53973c01e..8ae41d5f81a3 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01
@@ -6,7 +6,6 @@ CONFIG_PREEMPT=y
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=y
CONFIG_NO_HZ_FULL=n
-CONFIG_RCU_FAST_NO_HZ=y
CONFIG_RCU_TRACE=y
CONFIG_HOTPLUG_CPU=y
CONFIG_MAXSMP=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE02 b/tools/testing/selftests/rcutorture/configs/rcu/TREE02
index 65daee4fbf5a..2871ee599891 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE02
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE02
@@ -7,7 +7,6 @@ CONFIG_PREEMPT=y
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=y
CONFIG_NO_HZ_FULL=n
-CONFIG_RCU_FAST_NO_HZ=n
CONFIG_RCU_TRACE=n
CONFIG_RCU_FANOUT=3
CONFIG_RCU_FANOUT_LEAF=3
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE02.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE02.boot
new file mode 100644
index 000000000000..dd914fa8f690
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE02.boot
@@ -0,0 +1 @@
+rcutorture.fwd_progress=2
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 b/tools/testing/selftests/rcutorture/configs/rcu/TREE04
index f6d6a40c0576..22ad0261728d 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04
@@ -7,7 +7,6 @@ CONFIG_PREEMPT=n
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=n
CONFIG_NO_HZ_FULL=y
-CONFIG_RCU_FAST_NO_HZ=y
CONFIG_RCU_TRACE=y
CONFIG_RCU_FANOUT=4
CONFIG_RCU_FANOUT_LEAF=3
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE05 b/tools/testing/selftests/rcutorture/configs/rcu/TREE05
index 4f95f8544f3f..9f48c73709ec 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE05
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE05
@@ -7,7 +7,6 @@ CONFIG_PREEMPT=n
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=y
CONFIG_NO_HZ_FULL=n
-CONFIG_RCU_FAST_NO_HZ=n
CONFIG_RCU_TRACE=n
CONFIG_HOTPLUG_CPU=y
CONFIG_RCU_FANOUT=6
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE06 b/tools/testing/selftests/rcutorture/configs/rcu/TREE06
index bf4980d606b5..db27651de04b 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE06
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE06
@@ -7,7 +7,6 @@ CONFIG_PREEMPT=n
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=y
CONFIG_NO_HZ_FULL=n
-CONFIG_RCU_FAST_NO_HZ=n
CONFIG_RCU_TRACE=n
CONFIG_RCU_FANOUT=6
CONFIG_RCU_FANOUT_LEAF=6
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE07 b/tools/testing/selftests/rcutorture/configs/rcu/TREE07
index d7afb271a586..2789b47e4ecd 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE07
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE07
@@ -7,7 +7,6 @@ CONFIG_PREEMPT=n
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=n
CONFIG_NO_HZ_FULL=y
-CONFIG_RCU_FAST_NO_HZ=n
CONFIG_RCU_TRACE=y
CONFIG_HOTPLUG_CPU=y
CONFIG_RCU_FANOUT=2
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08 b/tools/testing/selftests/rcutorture/configs/rcu/TREE08
index c810c5276a89..8b561355b9ef 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE08
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08
@@ -7,7 +7,6 @@ CONFIG_PREEMPT=y
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=y
CONFIG_NO_HZ_FULL=n
-CONFIG_RCU_FAST_NO_HZ=n
CONFIG_RCU_TRACE=n
CONFIG_RCU_FANOUT=3
CONFIG_RCU_FANOUT_LEAF=2
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE10 b/tools/testing/selftests/rcutorture/configs/rcu/TREE10
index 7311f84a5876..4a00539bfdd7 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE10
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE10
@@ -7,7 +7,6 @@ CONFIG_PREEMPT=n
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=y
CONFIG_NO_HZ_FULL=n
-CONFIG_RCU_FAST_NO_HZ=n
CONFIG_RCU_TRACE=n
CONFIG_RCU_NOCB_CPU=n
CONFIG_DEBUG_LOCK_ALLOC=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE10.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE10.boot
new file mode 100644
index 000000000000..dd914fa8f690
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE10.boot
@@ -0,0 +1 @@
+rcutorture.fwd_progress=2
diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/TINY b/tools/testing/selftests/rcutorture/configs/rcuscale/TINY
index fb05ef5279b4..0fa2dc086e10 100644
--- a/tools/testing/selftests/rcutorture/configs/rcuscale/TINY
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TINY
@@ -2,11 +2,11 @@ CONFIG_SMP=n
CONFIG_PREEMPT_NONE=y
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
#CHECK#CONFIG_TINY_RCU=y
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=y
CONFIG_NO_HZ_FULL=n
-CONFIG_RCU_FAST_NO_HZ=n
CONFIG_RCU_NOCB_CPU=n
CONFIG_DEBUG_LOCK_ALLOC=n
CONFIG_PROVE_LOCKING=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01 b/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01
index e6baa2fbaeb3..227aba7783af 100644
--- a/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01
@@ -5,7 +5,6 @@ CONFIG_PREEMPT=n
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=y
CONFIG_NO_HZ_FULL=n
-CONFIG_RCU_FAST_NO_HZ=n
CONFIG_RCU_NOCB_CPU=n
CONFIG_DEBUG_LOCK_ALLOC=n
CONFIG_PROVE_LOCKING=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/TREE b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE
index 4cc1cc581321..f110d9ffbe4c 100644
--- a/tools/testing/selftests/rcutorture/configs/rcuscale/TREE
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE
@@ -6,7 +6,6 @@ CONFIG_PREEMPT=y
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=y
CONFIG_NO_HZ_FULL=n
-CONFIG_RCU_FAST_NO_HZ=n
CONFIG_HOTPLUG_CPU=y
CONFIG_SUSPEND=n
CONFIG_HIBERNATION=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/TREE54 b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE54
index f5952061fde7..9f83e5372796 100644
--- a/tools/testing/selftests/rcutorture/configs/rcuscale/TREE54
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE54
@@ -7,7 +7,6 @@ CONFIG_PREEMPT=y
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=y
CONFIG_NO_HZ_FULL=n
-CONFIG_RCU_FAST_NO_HZ=n
CONFIG_HOTPLUG_CPU=y
CONFIG_SUSPEND=n
CONFIG_HIBERNATION=n
diff --git a/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT b/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT
index ad505a887bec..7f06838a91e6 100644
--- a/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT
+++ b/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT
@@ -6,7 +6,6 @@ CONFIG_PREEMPT=n
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=y
CONFIG_NO_HZ_FULL=n
-CONFIG_RCU_FAST_NO_HZ=n
CONFIG_HOTPLUG_CPU=y
CONFIG_SUSPEND=n
CONFIG_HIBERNATION=n
diff --git a/tools/testing/selftests/rcutorture/configs/refscale/PREEMPT b/tools/testing/selftests/rcutorture/configs/refscale/PREEMPT
index 4f08e641bb6b..52e3ef674056 100644
--- a/tools/testing/selftests/rcutorture/configs/refscale/PREEMPT
+++ b/tools/testing/selftests/rcutorture/configs/refscale/PREEMPT
@@ -6,7 +6,6 @@ CONFIG_PREEMPT=y
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=y
CONFIG_NO_HZ_FULL=n
-CONFIG_RCU_FAST_NO_HZ=n
CONFIG_HOTPLUG_CPU=y
CONFIG_SUSPEND=n
CONFIG_HIBERNATION=n
diff --git a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
index 1b96d68473b8..42acb1a64ce1 100644
--- a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
+++ b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
@@ -15,7 +15,6 @@ CONFIG_PROVE_RCU -- Hardwired to CONFIG_PROVE_LOCKING.
CONFIG_RCU_BOOST -- one of PREEMPT_RCU.
CONFIG_RCU_FANOUT -- Cover hierarchy, but overlap with others.
CONFIG_RCU_FANOUT_LEAF -- Do one non-default.
-CONFIG_RCU_FAST_NO_HZ -- Do one, but not with all nohz_full CPUs.
CONFIG_RCU_NOCB_CPU -- Do three, one with no rcu_nocbs CPUs, one with
rcu_nocbs=0, and one with all rcu_nocbs CPUs.
CONFIG_RCU_TRACE -- Do half.
diff --git a/tools/testing/selftests/rseq/basic_percpu_ops_test.c b/tools/testing/selftests/rseq/basic_percpu_ops_test.c
index eb3f6db36d36..b953a52ff706 100644
--- a/tools/testing/selftests/rseq/basic_percpu_ops_test.c
+++ b/tools/testing/selftests/rseq/basic_percpu_ops_test.c
@@ -9,10 +9,9 @@
#include <string.h>
#include <stddef.h>
+#include "../kselftest.h"
#include "rseq.h"
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
-
struct percpu_lock_entry {
intptr_t v;
} __attribute__((aligned(128)));
diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c
index 7159eb777fd3..fb440dfca158 100644
--- a/tools/testing/selftests/rseq/rseq.c
+++ b/tools/testing/selftests/rseq/rseq.c
@@ -27,10 +27,9 @@
#include <signal.h>
#include <limits.h>
+#include "../kselftest.h"
#include "rseq.h"
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
-
__thread volatile struct rseq __rseq_abi = {
.cpu_id = RSEQ_CPU_ID_UNINITIALIZED,
};
diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c
index 6e5102a7d7c9..5b5c9d558dee 100644
--- a/tools/testing/selftests/seccomp/seccomp_benchmark.c
+++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c
@@ -18,7 +18,7 @@
#include <sys/syscall.h>
#include <sys/types.h>
-#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
+#include "../kselftest.h"
unsigned long long timing(clockid_t clk_id, unsigned long long samples)
{
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index d425688cf59c..9d126d7fabdb 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -1487,7 +1487,7 @@ TEST_F(precedence, log_is_fifth_in_any_order)
#define PTRACE_EVENT_SECCOMP 7
#endif
-#define IS_SECCOMP_EVENT(status) ((status >> 16) == PTRACE_EVENT_SECCOMP)
+#define PTRACE_EVENT_MASK(status) ((status) >> 16)
bool tracer_running;
void tracer_stop(int sig)
{
@@ -1539,12 +1539,22 @@ void start_tracer(struct __test_metadata *_metadata, int fd, pid_t tracee,
if (wait(&status) != tracee)
continue;
- if (WIFSIGNALED(status) || WIFEXITED(status))
- /* Child is dead. Time to go. */
+
+ if (WIFSIGNALED(status)) {
+ /* Child caught a fatal signal. */
+ return;
+ }
+ if (WIFEXITED(status)) {
+ /* Child exited with code. */
return;
+ }
- /* Check if this is a seccomp event. */
- ASSERT_EQ(!ptrace_syscall, IS_SECCOMP_EVENT(status));
+ /* Check if we got an expected event. */
+ ASSERT_EQ(WIFCONTINUED(status), false);
+ ASSERT_EQ(WIFSTOPPED(status), true);
+ ASSERT_EQ(WSTOPSIG(status) & SIGTRAP, SIGTRAP) {
+ TH_LOG("Unexpected WSTOPSIG: %d", WSTOPSIG(status));
+ }
tracer_func(_metadata, tracee, status, args);
@@ -1961,6 +1971,11 @@ void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee,
int ret;
unsigned long msg;
+ EXPECT_EQ(PTRACE_EVENT_MASK(status), PTRACE_EVENT_SECCOMP) {
+ TH_LOG("Unexpected ptrace event: %d", PTRACE_EVENT_MASK(status));
+ return;
+ }
+
/* Make sure we got the right message. */
ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
EXPECT_EQ(0, ret);
@@ -2011,6 +2026,11 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
long *syscall_nr = NULL, *syscall_ret = NULL;
FIXTURE_DATA(TRACE_syscall) *self = args;
+ EXPECT_EQ(WSTOPSIG(status) & 0x80, 0x80) {
+ TH_LOG("Unexpected WSTOPSIG: %d", WSTOPSIG(status));
+ return;
+ }
+
/*
* The traditional way to tell PTRACE_SYSCALL entry/exit
* is by counting.
@@ -2128,6 +2148,7 @@ FIXTURE_SETUP(TRACE_syscall)
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
ASSERT_EQ(0, ret);
+ /* Do not install seccomp rewrite filters, as we'll use ptrace instead. */
if (variant->use_ptrace)
return;
@@ -2186,6 +2207,29 @@ TEST_F(TRACE_syscall, syscall_faked)
EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid));
}
+TEST_F_SIGNAL(TRACE_syscall, kill_immediate, SIGSYS)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_mknodat, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ /* Install "kill on mknodat" filter. */
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ /* This should immediately die with SIGSYS, regardless of tracer. */
+ EXPECT_EQ(-1, syscall(__NR_mknodat, -1, NULL, 0, 0));
+}
+
TEST_F(TRACE_syscall, skip_after)
{
struct sock_filter filter[] = {
@@ -4087,7 +4131,7 @@ TEST(user_notification_addfd)
* lowest available fd to be assigned here.
*/
EXPECT_EQ(fd, nextfd++);
- EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0);
+ ASSERT_EQ(filecmp(getpid(), pid, memfd, fd), 0);
/*
* This sets the ID of the ADD FD to the last request plus 1. The
diff --git a/tools/testing/selftests/sgx/Makefile b/tools/testing/selftests/sgx/Makefile
index 7f12d55b97f8..2956584e1e37 100644
--- a/tools/testing/selftests/sgx/Makefile
+++ b/tools/testing/selftests/sgx/Makefile
@@ -45,7 +45,7 @@ $(OUTPUT)/sign_key.o: sign_key.S
$(CC) $(HOST_CFLAGS) -c $< -o $@
$(OUTPUT)/test_encl.elf: test_encl.lds test_encl.c test_encl_bootstrap.S
- $(CC) $(ENCL_CFLAGS) -T $^ -o $@
+ $(CC) $(ENCL_CFLAGS) -T $^ -o $@ -Wl,--build-id=none
EXTRA_CLEAN := \
$(OUTPUT)/test_encl.elf \
diff --git a/tools/testing/selftests/sgx/defines.h b/tools/testing/selftests/sgx/defines.h
index f88562afcaa0..02d775789ea7 100644
--- a/tools/testing/selftests/sgx/defines.h
+++ b/tools/testing/selftests/sgx/defines.h
@@ -19,13 +19,38 @@
#include "../../../../arch/x86/include/uapi/asm/sgx.h"
enum encl_op_type {
- ENCL_OP_PUT,
- ENCL_OP_GET,
+ ENCL_OP_PUT_TO_BUFFER,
+ ENCL_OP_GET_FROM_BUFFER,
+ ENCL_OP_PUT_TO_ADDRESS,
+ ENCL_OP_GET_FROM_ADDRESS,
+ ENCL_OP_NOP,
+ ENCL_OP_MAX,
};
-struct encl_op {
+struct encl_op_header {
uint64_t type;
- uint64_t buffer;
+};
+
+struct encl_op_put_to_buf {
+ struct encl_op_header header;
+ uint64_t value;
+};
+
+struct encl_op_get_from_buf {
+ struct encl_op_header header;
+ uint64_t value;
+};
+
+struct encl_op_put_to_addr {
+ struct encl_op_header header;
+ uint64_t value;
+ uint64_t addr;
+};
+
+struct encl_op_get_from_addr {
+ struct encl_op_header header;
+ uint64_t value;
+ uint64_t addr;
};
#endif /* DEFINES_H */
diff --git a/tools/testing/selftests/sgx/load.c b/tools/testing/selftests/sgx/load.c
index 3ebe5d1fe337..9d4322c946e2 100644
--- a/tools/testing/selftests/sgx/load.c
+++ b/tools/testing/selftests/sgx/load.c
@@ -21,6 +21,8 @@
void encl_delete(struct encl *encl)
{
+ struct encl_segment *heap_seg = &encl->segment_tbl[encl->nr_segments - 1];
+
if (encl->encl_base)
munmap((void *)encl->encl_base, encl->encl_size);
@@ -30,6 +32,8 @@ void encl_delete(struct encl *encl)
if (encl->fd)
close(encl->fd);
+ munmap(heap_seg->src, heap_seg->size);
+
if (encl->segment_tbl)
free(encl->segment_tbl);
@@ -107,11 +111,14 @@ static bool encl_ioc_add_pages(struct encl *encl, struct encl_segment *seg)
memset(&secinfo, 0, sizeof(secinfo));
secinfo.flags = seg->flags;
- ioc.src = (uint64_t)encl->src + seg->offset;
+ ioc.src = (uint64_t)seg->src;
ioc.offset = seg->offset;
ioc.length = seg->size;
ioc.secinfo = (unsigned long)&secinfo;
- ioc.flags = SGX_PAGE_MEASURE;
+ if (seg->measure)
+ ioc.flags = SGX_PAGE_MEASURE;
+ else
+ ioc.flags = 0;
rc = ioctl(encl->fd, SGX_IOC_ENCLAVE_ADD_PAGES, &ioc);
if (rc < 0) {
@@ -122,11 +129,10 @@ static bool encl_ioc_add_pages(struct encl *encl, struct encl_segment *seg)
return true;
}
-
-
-bool encl_load(const char *path, struct encl *encl)
+bool encl_load(const char *path, struct encl *encl, unsigned long heap_size)
{
const char device_path[] = "/dev/sgx_enclave";
+ struct encl_segment *seg;
Elf64_Phdr *phdr_tbl;
off_t src_offset;
Elf64_Ehdr *ehdr;
@@ -178,6 +184,8 @@ bool encl_load(const char *path, struct encl *encl)
ehdr = encl->bin;
phdr_tbl = encl->bin + ehdr->e_phoff;
+ encl->nr_segments = 1; /* one for the heap */
+
for (i = 0; i < ehdr->e_phnum; i++) {
Elf64_Phdr *phdr = &phdr_tbl[i];
@@ -193,7 +201,6 @@ bool encl_load(const char *path, struct encl *encl)
for (i = 0, j = 0; i < ehdr->e_phnum; i++) {
Elf64_Phdr *phdr = &phdr_tbl[i];
unsigned int flags = phdr->p_flags;
- struct encl_segment *seg;
if (phdr->p_type != PT_LOAD)
continue;
@@ -216,6 +223,7 @@ bool encl_load(const char *path, struct encl *encl)
if (j == 0) {
src_offset = phdr->p_offset & PAGE_MASK;
+ encl->src = encl->bin + src_offset;
seg->prot = PROT_READ | PROT_WRITE;
seg->flags = SGX_PAGE_TYPE_TCS << 8;
@@ -228,15 +236,27 @@ bool encl_load(const char *path, struct encl *encl)
seg->offset = (phdr->p_offset & PAGE_MASK) - src_offset;
seg->size = (phdr->p_filesz + PAGE_SIZE - 1) & PAGE_MASK;
+ seg->src = encl->src + seg->offset;
+ seg->measure = true;
j++;
}
- assert(j == encl->nr_segments);
+ assert(j == encl->nr_segments - 1);
+
+ seg = &encl->segment_tbl[j];
+ seg->offset = encl->segment_tbl[j - 1].offset + encl->segment_tbl[j - 1].size;
+ seg->size = heap_size;
+ seg->src = mmap(NULL, heap_size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ seg->prot = PROT_READ | PROT_WRITE;
+ seg->flags = (SGX_PAGE_TYPE_REG << 8) | seg->prot;
+ seg->measure = false;
+
+ if (seg->src == MAP_FAILED)
+ goto err;
- encl->src = encl->bin + src_offset;
- encl->src_size = encl->segment_tbl[j - 1].offset +
- encl->segment_tbl[j - 1].size;
+ encl->src_size = encl->segment_tbl[j].offset + encl->segment_tbl[j].size;
for (encl->encl_size = 4096; encl->encl_size < encl->src_size; )
encl->encl_size <<= 1;
diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c
index e252015e0c15..370c4995f7c4 100644
--- a/tools/testing/selftests/sgx/main.c
+++ b/tools/testing/selftests/sgx/main.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright(c) 2016-20 Intel Corporation. */
+#include <cpuid.h>
#include <elf.h>
#include <errno.h>
#include <fcntl.h>
@@ -21,6 +22,7 @@
#include "main.h"
static const uint64_t MAGIC = 0x1122334455667788ULL;
+static const uint64_t MAGIC2 = 0x8877665544332211ULL;
vdso_sgx_enter_enclave_t vdso_sgx_enter_enclave;
struct vdso_symtab {
@@ -107,12 +109,32 @@ static Elf64_Sym *vdso_symtab_get(struct vdso_symtab *symtab, const char *name)
return NULL;
}
+/*
+ * Return the offset in the enclave where the data segment can be found.
+ * The first RW segment loaded is the TCS, skip that to get info on the
+ * data segment.
+ */
+static off_t encl_get_data_offset(struct encl *encl)
+{
+ int i;
+
+ for (i = 1; i < encl->nr_segments; i++) {
+ struct encl_segment *seg = &encl->segment_tbl[i];
+
+ if (seg->prot == (PROT_READ | PROT_WRITE))
+ return seg->offset;
+ }
+
+ return -1;
+}
+
FIXTURE(enclave) {
struct encl encl;
struct sgx_enclave_run run;
};
-FIXTURE_SETUP(enclave)
+static bool setup_test_encl(unsigned long heap_size, struct encl *encl,
+ struct __test_metadata *_metadata)
{
Elf64_Sym *sgx_enter_enclave_sym = NULL;
struct vdso_symtab symtab;
@@ -122,31 +144,25 @@ FIXTURE_SETUP(enclave)
unsigned int i;
void *addr;
- if (!encl_load("test_encl.elf", &self->encl)) {
- encl_delete(&self->encl);
- ksft_exit_skip("cannot load enclaves\n");
+ if (!encl_load("test_encl.elf", encl, heap_size)) {
+ encl_delete(encl);
+ TH_LOG("Failed to load the test enclave.\n");
}
- for (i = 0; i < self->encl.nr_segments; i++) {
- seg = &self->encl.segment_tbl[i];
-
- TH_LOG("0x%016lx 0x%016lx 0x%02x", seg->offset, seg->size, seg->prot);
- }
-
- if (!encl_measure(&self->encl))
+ if (!encl_measure(encl))
goto err;
- if (!encl_build(&self->encl))
+ if (!encl_build(encl))
goto err;
/*
* An enclave consumer only must do this.
*/
- for (i = 0; i < self->encl.nr_segments; i++) {
- struct encl_segment *seg = &self->encl.segment_tbl[i];
+ for (i = 0; i < encl->nr_segments; i++) {
+ struct encl_segment *seg = &encl->segment_tbl[i];
- addr = mmap((void *)self->encl.encl_base + seg->offset, seg->size,
- seg->prot, MAP_SHARED | MAP_FIXED, self->encl.fd, 0);
+ addr = mmap((void *)encl->encl_base + seg->offset, seg->size,
+ seg->prot, MAP_SHARED | MAP_FIXED, encl->fd, 0);
EXPECT_NE(addr, MAP_FAILED);
if (addr == MAP_FAILED)
goto err;
@@ -166,8 +182,16 @@ FIXTURE_SETUP(enclave)
vdso_sgx_enter_enclave = addr + sgx_enter_enclave_sym->st_value;
- memset(&self->run, 0, sizeof(self->run));
- self->run.tcs = self->encl.encl_base;
+ return true;
+
+err:
+ encl_delete(encl);
+
+ for (i = 0; i < encl->nr_segments; i++) {
+ seg = &encl->segment_tbl[i];
+
+ TH_LOG("0x%016lx 0x%016lx 0x%02x", seg->offset, seg->size, seg->prot);
+ }
maps_file = fopen("/proc/self/maps", "r");
if (maps_file != NULL) {
@@ -181,11 +205,13 @@ FIXTURE_SETUP(enclave)
fclose(maps_file);
}
-err:
- if (!sgx_enter_enclave_sym)
- encl_delete(&self->encl);
+ TH_LOG("Failed to initialize the test enclave.\n");
+
+ return false;
+}
- ASSERT_NE(sgx_enter_enclave_sym, NULL);
+FIXTURE_SETUP(enclave)
+{
}
FIXTURE_TEARDOWN(enclave)
@@ -215,44 +241,130 @@ FIXTURE_TEARDOWN(enclave)
TEST_F(enclave, unclobbered_vdso)
{
- struct encl_op op;
+ struct encl_op_get_from_buf get_op;
+ struct encl_op_put_to_buf put_op;
+
+ ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl, _metadata));
+
+ memset(&self->run, 0, sizeof(self->run));
+ self->run.tcs = self->encl.encl_base;
+
+ put_op.header.type = ENCL_OP_PUT_TO_BUFFER;
+ put_op.value = MAGIC;
+
+ EXPECT_EQ(ENCL_CALL(&put_op, &self->run, false), 0);
+
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.user_data, 0);
+
+ get_op.header.type = ENCL_OP_GET_FROM_BUFFER;
+ get_op.value = 0;
+
+ EXPECT_EQ(ENCL_CALL(&get_op, &self->run, false), 0);
+
+ EXPECT_EQ(get_op.value, MAGIC);
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.user_data, 0);
+}
+
+/*
+ * A section metric is concatenated in a way that @low bits 12-31 define the
+ * bits 12-31 of the metric and @high bits 0-19 define the bits 32-51 of the
+ * metric.
+ */
+static unsigned long sgx_calc_section_metric(unsigned int low,
+ unsigned int high)
+{
+ return (low & GENMASK_ULL(31, 12)) +
+ ((high & GENMASK_ULL(19, 0)) << 32);
+}
+
+/*
+ * Sum total available physical SGX memory across all EPC sections
+ *
+ * Return: total available physical SGX memory available on system
+ */
+static unsigned long get_total_epc_mem(void)
+{
+ unsigned int eax, ebx, ecx, edx;
+ unsigned long total_size = 0;
+ unsigned int type;
+ int section = 0;
+
+ while (true) {
+ __cpuid_count(SGX_CPUID, section + SGX_CPUID_EPC, eax, ebx, ecx, edx);
+
+ type = eax & SGX_CPUID_EPC_MASK;
+ if (type == SGX_CPUID_EPC_INVALID)
+ break;
- op.type = ENCL_OP_PUT;
- op.buffer = MAGIC;
+ if (type != SGX_CPUID_EPC_SECTION)
+ break;
- EXPECT_EQ(ENCL_CALL(&op, &self->run, false), 0);
+ total_size += sgx_calc_section_metric(ecx, edx);
+
+ section++;
+ }
+
+ return total_size;
+}
+
+TEST_F(enclave, unclobbered_vdso_oversubscribed)
+{
+ struct encl_op_get_from_buf get_op;
+ struct encl_op_put_to_buf put_op;
+ unsigned long total_mem;
+
+ total_mem = get_total_epc_mem();
+ ASSERT_NE(total_mem, 0);
+ ASSERT_TRUE(setup_test_encl(total_mem, &self->encl, _metadata));
+
+ memset(&self->run, 0, sizeof(self->run));
+ self->run.tcs = self->encl.encl_base;
+
+ put_op.header.type = ENCL_OP_PUT_TO_BUFFER;
+ put_op.value = MAGIC;
+
+ EXPECT_EQ(ENCL_CALL(&put_op, &self->run, false), 0);
EXPECT_EEXIT(&self->run);
EXPECT_EQ(self->run.user_data, 0);
- op.type = ENCL_OP_GET;
- op.buffer = 0;
+ get_op.header.type = ENCL_OP_GET_FROM_BUFFER;
+ get_op.value = 0;
- EXPECT_EQ(ENCL_CALL(&op, &self->run, false), 0);
+ EXPECT_EQ(ENCL_CALL(&get_op, &self->run, false), 0);
- EXPECT_EQ(op.buffer, MAGIC);
+ EXPECT_EQ(get_op.value, MAGIC);
EXPECT_EEXIT(&self->run);
EXPECT_EQ(self->run.user_data, 0);
+
}
TEST_F(enclave, clobbered_vdso)
{
- struct encl_op op;
+ struct encl_op_get_from_buf get_op;
+ struct encl_op_put_to_buf put_op;
- op.type = ENCL_OP_PUT;
- op.buffer = MAGIC;
+ ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl, _metadata));
- EXPECT_EQ(ENCL_CALL(&op, &self->run, true), 0);
+ memset(&self->run, 0, sizeof(self->run));
+ self->run.tcs = self->encl.encl_base;
+
+ put_op.header.type = ENCL_OP_PUT_TO_BUFFER;
+ put_op.value = MAGIC;
+
+ EXPECT_EQ(ENCL_CALL(&put_op, &self->run, true), 0);
EXPECT_EEXIT(&self->run);
EXPECT_EQ(self->run.user_data, 0);
- op.type = ENCL_OP_GET;
- op.buffer = 0;
+ get_op.header.type = ENCL_OP_GET_FROM_BUFFER;
+ get_op.value = 0;
- EXPECT_EQ(ENCL_CALL(&op, &self->run, true), 0);
+ EXPECT_EQ(ENCL_CALL(&get_op, &self->run, true), 0);
- EXPECT_EQ(op.buffer, MAGIC);
+ EXPECT_EQ(get_op.value, MAGIC);
EXPECT_EEXIT(&self->run);
EXPECT_EQ(self->run.user_data, 0);
}
@@ -267,27 +379,179 @@ static int test_handler(long rdi, long rsi, long rdx, long ursp, long r8, long r
TEST_F(enclave, clobbered_vdso_and_user_function)
{
- struct encl_op op;
+ struct encl_op_get_from_buf get_op;
+ struct encl_op_put_to_buf put_op;
+
+ ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl, _metadata));
+
+ memset(&self->run, 0, sizeof(self->run));
+ self->run.tcs = self->encl.encl_base;
self->run.user_handler = (__u64)test_handler;
self->run.user_data = 0xdeadbeef;
- op.type = ENCL_OP_PUT;
- op.buffer = MAGIC;
+ put_op.header.type = ENCL_OP_PUT_TO_BUFFER;
+ put_op.value = MAGIC;
- EXPECT_EQ(ENCL_CALL(&op, &self->run, true), 0);
+ EXPECT_EQ(ENCL_CALL(&put_op, &self->run, true), 0);
EXPECT_EEXIT(&self->run);
EXPECT_EQ(self->run.user_data, 0);
- op.type = ENCL_OP_GET;
- op.buffer = 0;
+ get_op.header.type = ENCL_OP_GET_FROM_BUFFER;
+ get_op.value = 0;
- EXPECT_EQ(ENCL_CALL(&op, &self->run, true), 0);
+ EXPECT_EQ(ENCL_CALL(&get_op, &self->run, true), 0);
- EXPECT_EQ(op.buffer, MAGIC);
+ EXPECT_EQ(get_op.value, MAGIC);
EXPECT_EEXIT(&self->run);
EXPECT_EQ(self->run.user_data, 0);
}
+/*
+ * Sanity check that it is possible to enter either of the two hardcoded TCS
+ */
+TEST_F(enclave, tcs_entry)
+{
+ struct encl_op_header op;
+
+ ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl, _metadata));
+
+ memset(&self->run, 0, sizeof(self->run));
+ self->run.tcs = self->encl.encl_base;
+
+ op.type = ENCL_OP_NOP;
+
+ EXPECT_EQ(ENCL_CALL(&op, &self->run, true), 0);
+
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.exception_vector, 0);
+ EXPECT_EQ(self->run.exception_error_code, 0);
+ EXPECT_EQ(self->run.exception_addr, 0);
+
+ /* Move to the next TCS. */
+ self->run.tcs = self->encl.encl_base + PAGE_SIZE;
+
+ EXPECT_EQ(ENCL_CALL(&op, &self->run, true), 0);
+
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.exception_vector, 0);
+ EXPECT_EQ(self->run.exception_error_code, 0);
+ EXPECT_EQ(self->run.exception_addr, 0);
+}
+
+/*
+ * Second page of .data segment is used to test changing PTE permissions.
+ * This spans the local encl_buffer within the test enclave.
+ *
+ * 1) Start with a sanity check: a value is written to the target page within
+ * the enclave and read back to ensure target page can be written to.
+ * 2) Change PTE permissions (RW -> RO) of target page within enclave.
+ * 3) Repeat (1) - this time expecting a regular #PF communicated via the
+ * vDSO.
+ * 4) Change PTE permissions of target page within enclave back to be RW.
+ * 5) Repeat (1) by resuming enclave, now expected to be possible to write to
+ * and read from target page within enclave.
+ */
+TEST_F(enclave, pte_permissions)
+{
+ struct encl_op_get_from_addr get_addr_op;
+ struct encl_op_put_to_addr put_addr_op;
+ unsigned long data_start;
+ int ret;
+
+ ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl, _metadata));
+
+ memset(&self->run, 0, sizeof(self->run));
+ self->run.tcs = self->encl.encl_base;
+
+ data_start = self->encl.encl_base +
+ encl_get_data_offset(&self->encl) +
+ PAGE_SIZE;
+
+ /*
+ * Sanity check to ensure it is possible to write to page that will
+ * have its permissions manipulated.
+ */
+
+ /* Write MAGIC to page */
+ put_addr_op.value = MAGIC;
+ put_addr_op.addr = data_start;
+ put_addr_op.header.type = ENCL_OP_PUT_TO_ADDRESS;
+
+ EXPECT_EQ(ENCL_CALL(&put_addr_op, &self->run, true), 0);
+
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.exception_vector, 0);
+ EXPECT_EQ(self->run.exception_error_code, 0);
+ EXPECT_EQ(self->run.exception_addr, 0);
+
+ /*
+ * Read memory that was just written to, confirming that it is the
+ * value previously written (MAGIC).
+ */
+ get_addr_op.value = 0;
+ get_addr_op.addr = data_start;
+ get_addr_op.header.type = ENCL_OP_GET_FROM_ADDRESS;
+
+ EXPECT_EQ(ENCL_CALL(&get_addr_op, &self->run, true), 0);
+
+ EXPECT_EQ(get_addr_op.value, MAGIC);
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.exception_vector, 0);
+ EXPECT_EQ(self->run.exception_error_code, 0);
+ EXPECT_EQ(self->run.exception_addr, 0);
+
+ /* Change PTE permissions of target page within the enclave */
+ ret = mprotect((void *)data_start, PAGE_SIZE, PROT_READ);
+ if (ret)
+ perror("mprotect");
+
+ /*
+ * PTE permissions of target page changed to read-only, EPCM
+ * permissions unchanged (EPCM permissions are RW), attempt to
+ * write to the page, expecting a regular #PF.
+ */
+
+ put_addr_op.value = MAGIC2;
+
+ EXPECT_EQ(ENCL_CALL(&put_addr_op, &self->run, true), 0);
+
+ EXPECT_EQ(self->run.exception_vector, 14);
+ EXPECT_EQ(self->run.exception_error_code, 0x7);
+ EXPECT_EQ(self->run.exception_addr, data_start);
+
+ self->run.exception_vector = 0;
+ self->run.exception_error_code = 0;
+ self->run.exception_addr = 0;
+
+ /*
+ * Change PTE permissions back to enable enclave to write to the
+ * target page and resume enclave - do not expect any exceptions this
+ * time.
+ */
+ ret = mprotect((void *)data_start, PAGE_SIZE, PROT_READ | PROT_WRITE);
+ if (ret)
+ perror("mprotect");
+
+ EXPECT_EQ(vdso_sgx_enter_enclave((unsigned long)&put_addr_op, 0,
+ 0, ERESUME, 0, 0, &self->run),
+ 0);
+
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.exception_vector, 0);
+ EXPECT_EQ(self->run.exception_error_code, 0);
+ EXPECT_EQ(self->run.exception_addr, 0);
+
+ get_addr_op.value = 0;
+
+ EXPECT_EQ(ENCL_CALL(&get_addr_op, &self->run, true), 0);
+
+ EXPECT_EQ(get_addr_op.value, MAGIC2);
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.exception_vector, 0);
+ EXPECT_EQ(self->run.exception_error_code, 0);
+ EXPECT_EQ(self->run.exception_addr, 0);
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/sgx/main.h b/tools/testing/selftests/sgx/main.h
index 68672fd86cf9..b45c52ec7ab3 100644
--- a/tools/testing/selftests/sgx/main.h
+++ b/tools/testing/selftests/sgx/main.h
@@ -6,11 +6,15 @@
#ifndef MAIN_H
#define MAIN_H
+#define ENCL_HEAP_SIZE_DEFAULT 4096
+
struct encl_segment {
+ void *src;
off_t offset;
size_t size;
unsigned int prot;
unsigned int flags;
+ bool measure;
};
struct encl {
@@ -31,7 +35,7 @@ extern unsigned char sign_key[];
extern unsigned char sign_key_end[];
void encl_delete(struct encl *ctx);
-bool encl_load(const char *path, struct encl *encl);
+bool encl_load(const char *path, struct encl *encl, unsigned long heap_size);
bool encl_measure(struct encl *encl);
bool encl_build(struct encl *encl);
diff --git a/tools/testing/selftests/sgx/sigstruct.c b/tools/testing/selftests/sgx/sigstruct.c
index 92bbc5a15c39..50c5ab1aa6fa 100644
--- a/tools/testing/selftests/sgx/sigstruct.c
+++ b/tools/testing/selftests/sgx/sigstruct.c
@@ -289,15 +289,17 @@ static bool mrenclave_eextend(EVP_MD_CTX *ctx, uint64_t offset,
static bool mrenclave_segment(EVP_MD_CTX *ctx, struct encl *encl,
struct encl_segment *seg)
{
- uint64_t end = seg->offset + seg->size;
+ uint64_t end = seg->size;
uint64_t offset;
- for (offset = seg->offset; offset < end; offset += PAGE_SIZE) {
- if (!mrenclave_eadd(ctx, offset, seg->flags))
+ for (offset = 0; offset < end; offset += PAGE_SIZE) {
+ if (!mrenclave_eadd(ctx, seg->offset + offset, seg->flags))
return false;
- if (!mrenclave_eextend(ctx, offset, encl->src + offset))
- return false;
+ if (seg->measure) {
+ if (!mrenclave_eextend(ctx, seg->offset + offset, seg->src + offset))
+ return false;
+ }
}
return true;
diff --git a/tools/testing/selftests/sgx/test_encl.c b/tools/testing/selftests/sgx/test_encl.c
index 734ea52f9924..4fca01cfd898 100644
--- a/tools/testing/selftests/sgx/test_encl.c
+++ b/tools/testing/selftests/sgx/test_encl.c
@@ -4,6 +4,11 @@
#include <stddef.h>
#include "defines.h"
+/*
+ * Data buffer spanning two pages that will be placed first in .data
+ * segment. Even if not used internally the second page is needed by
+ * external test manipulating page permissions.
+ */
static uint8_t encl_buffer[8192] = { 1 };
static void *memcpy(void *dest, const void *src, size_t n)
@@ -16,20 +21,51 @@ static void *memcpy(void *dest, const void *src, size_t n)
return dest;
}
-void encl_body(void *rdi, void *rsi)
+static void do_encl_op_put_to_buf(void *op)
+{
+ struct encl_op_put_to_buf *op2 = op;
+
+ memcpy(&encl_buffer[0], &op2->value, 8);
+}
+
+static void do_encl_op_get_from_buf(void *op)
{
- struct encl_op *op = (struct encl_op *)rdi;
+ struct encl_op_get_from_buf *op2 = op;
+
+ memcpy(&op2->value, &encl_buffer[0], 8);
+}
+
+static void do_encl_op_put_to_addr(void *_op)
+{
+ struct encl_op_put_to_addr *op = _op;
+
+ memcpy((void *)op->addr, &op->value, 8);
+}
- switch (op->type) {
- case ENCL_OP_PUT:
- memcpy(&encl_buffer[0], &op->buffer, 8);
- break;
+static void do_encl_op_get_from_addr(void *_op)
+{
+ struct encl_op_get_from_addr *op = _op;
+
+ memcpy(&op->value, (void *)op->addr, 8);
+}
+
+static void do_encl_op_nop(void *_op)
+{
+
+}
+
+void encl_body(void *rdi, void *rsi)
+{
+ const void (*encl_op_array[ENCL_OP_MAX])(void *) = {
+ do_encl_op_put_to_buf,
+ do_encl_op_get_from_buf,
+ do_encl_op_put_to_addr,
+ do_encl_op_get_from_addr,
+ do_encl_op_nop,
+ };
- case ENCL_OP_GET:
- memcpy(&op->buffer, &encl_buffer[0], 8);
- break;
+ struct encl_op_header *op = (struct encl_op_header *)rdi;
- default:
- break;
- }
+ if (op->type < ENCL_OP_MAX)
+ (*encl_op_array[op->type])(op);
}
diff --git a/tools/testing/selftests/sgx/test_encl_bootstrap.S b/tools/testing/selftests/sgx/test_encl_bootstrap.S
index 5d5680d4ea39..82fb0dfcbd23 100644
--- a/tools/testing/selftests/sgx/test_encl_bootstrap.S
+++ b/tools/testing/selftests/sgx/test_encl_bootstrap.S
@@ -12,7 +12,7 @@
.fill 1, 8, 0 # STATE (set by CPU)
.fill 1, 8, 0 # FLAGS
- .quad encl_ssa # OSSA
+ .quad encl_ssa_tcs1 # OSSA
.fill 1, 4, 0 # CSSA (set by CPU)
.fill 1, 4, 1 # NSSA
.quad encl_entry # OENTRY
@@ -23,10 +23,10 @@
.fill 1, 4, 0xFFFFFFFF # GSLIMIT
.fill 4024, 1, 0 # Reserved
- # Identical to the previous TCS.
+ # TCS2
.fill 1, 8, 0 # STATE (set by CPU)
.fill 1, 8, 0 # FLAGS
- .quad encl_ssa # OSSA
+ .quad encl_ssa_tcs2 # OSSA
.fill 1, 4, 0 # CSSA (set by CPU)
.fill 1, 4, 1 # NSSA
.quad encl_entry # OENTRY
@@ -40,8 +40,9 @@
.text
encl_entry:
- # RBX contains the base address for TCS, which is also the first address
- # inside the enclave. By adding the value of le_stack_end to it, we get
+ # RBX contains the base address for TCS, which is the first address
+ # inside the enclave for TCS #1 and one page into the enclave for
+ # TCS #2. By adding the value of encl_stack to it, we get
# the absolute address for the stack.
lea (encl_stack)(%rbx), %rax
xchg %rsp, %rax
@@ -81,9 +82,15 @@ encl_entry:
.section ".data", "aw"
-encl_ssa:
+encl_ssa_tcs1:
+ .space 4096
+encl_ssa_tcs2:
.space 4096
.balign 4096
- .space 8192
+ # Stack of TCS #1
+ .space 4096
encl_stack:
+ .balign 4096
+ # Stack of TCS #2
+ .space 4096
diff --git a/tools/testing/selftests/sparc64/drivers/adi-test.c b/tools/testing/selftests/sparc64/drivers/adi-test.c
index 95d93c6a88a5..84e5d9fd20b0 100644
--- a/tools/testing/selftests/sparc64/drivers/adi-test.c
+++ b/tools/testing/selftests/sparc64/drivers/adi-test.c
@@ -24,10 +24,6 @@
#define DEBUG_LEVEL_4_BIT (0x0008)
#define DEBUG_TIMING_BIT (0x1000)
-#ifndef ARRAY_SIZE
-# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-#endif
-
/* bit mask of enabled bits to print */
#define DEBUG 0x0001
diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config
index b71828df5a6d..a3239d5e40c7 100644
--- a/tools/testing/selftests/tc-testing/config
+++ b/tools/testing/selftests/tc-testing/config
@@ -60,6 +60,8 @@ CONFIG_NET_IFE_SKBTCINDEX=m
CONFIG_NET_SCH_FIFO=y
CONFIG_NET_SCH_ETS=m
CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_FQ_PIE=m
+CONFIG_NETDEVSIM=m
#
## Network testing
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
index 503982b8f295..91832400ddbd 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
@@ -68,7 +68,7 @@
"cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action.o section action-ok index 667",
"expExitCode": "0",
"verifyCmd": "$TC action get action bpf index 667",
- "matchPattern": "action order [0-9]*: bpf action.o:\\[action-ok\\] id [0-9]* tag [0-9a-f]{16}( jited)? default-action pipe.*index 667 ref",
+ "matchPattern": "action order [0-9]*: bpf action.o:\\[action-ok\\] id [0-9].* tag [0-9a-f]{16}( jited)? default-action pipe.*index 667 ref",
"matchCount": "1",
"teardown": [
"$TC action flush action bpf"
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json
index 8e45792703ed..b7205a069534 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json
@@ -812,5 +812,29 @@
"teardown": [
"$TC actions flush action police"
]
+ },
+ {
+ "id": "7d64",
+ "name": "Add police action with skip_hw option",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 1kbit burst 10k index 100 skip_hw",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action police | grep skip_hw",
+ "matchPattern": "skip_hw",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/matchall.json b/tools/testing/selftests/tc-testing/tc-tests/filters/matchall.json
index 51799874a972..2df68017dfb8 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/matchall.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/matchall.json
@@ -387,5 +387,77 @@
"$TC qdisc del dev $DUMMY ingress",
"$IP link del dev $DUMMY type dummy"
]
+ },
+ {
+ "id": "3329",
+ "name": "Validate flags of the matchall filter with skip_sw and police action with skip_hw",
+ "category": [
+ "filter",
+ "matchall"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY ingress",
+ "$TC actions flush action police",
+ "$TC actions add action police rate 1mbit burst 100k index 199 skip_hw"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ipv4 matchall skip_sw action police index 199",
+ "expExitCode": "2",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ipv4 matchall",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress",
+ "$IP link del dev $DUMMY type dummy",
+ "$TC actions del action police index 199"
+ ]
+ },
+ {
+ "id": "0eeb",
+ "name": "Validate flags of the matchall filter with skip_hw and police action",
+ "category": [
+ "filter",
+ "matchall"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY ingress",
+ "$TC actions flush action police",
+ "$TC actions add action police rate 1mbit burst 100k index 199"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ipv4 matchall skip_hw action police index 199",
+ "expExitCode": "2",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ipv4 matchall",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress",
+ "$IP link del dev $DUMMY type dummy",
+ "$TC actions del action police index 199"
+ ]
+ },
+ {
+ "id": "eee4",
+ "name": "Validate flags of the matchall filter with skip_sw and police action",
+ "category": [
+ "filter",
+ "matchall"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY ingress",
+ "$TC actions flush action police",
+ "$TC actions add action police rate 1mbit burst 100k index 199"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ipv4 matchall skip_sw action police index 199",
+ "expExitCode": "2",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ipv4 matchall",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress",
+ "$IP link del dev $DUMMY type dummy",
+ "$TC actions del action police index 199"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json
index 88a20c781e49..c6046096d9db 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json
@@ -15,7 +15,7 @@
"cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
"expExitCode": "0",
"verifyCmd": "$TC qdisc show dev $ETH",
- "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+ "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-4]",
"matchCount": "4",
"teardown": [
"echo \"1\" > /sys/bus/netdevsim/del_device"
@@ -37,7 +37,7 @@
"cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
"expExitCode": "0",
"verifyCmd": "$TC qdisc show dev $ETH",
- "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-9,a-f][0-9,a-f]{0,2} bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+ "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-9,a-f][0-9,a-f]{0,2}",
"matchCount": "256",
"teardown": [
"echo \"1\" > /sys/bus/netdevsim/del_device"
@@ -60,7 +60,7 @@
"cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
"expExitCode": "2",
"verifyCmd": "$TC qdisc show dev $ETH",
- "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+ "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-4]",
"matchCount": "4",
"teardown": [
"echo \"1\" > /sys/bus/netdevsim/del_device"
@@ -82,7 +82,7 @@
"cmdUnderTest": "$TC qdisc del dev $ETH root handle 1: mq",
"expExitCode": "2",
"verifyCmd": "$TC qdisc show dev $ETH",
- "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+ "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-4]",
"matchCount": "0",
"teardown": [
"echo \"1\" > /sys/bus/netdevsim/del_device"
@@ -106,7 +106,7 @@
"cmdUnderTest": "$TC qdisc del dev $ETH root handle 1: mq",
"expExitCode": "2",
"verifyCmd": "$TC qdisc show dev $ETH",
- "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+ "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-4]",
"matchCount": "0",
"teardown": [
"echo \"1\" > /sys/bus/netdevsim/del_device"
@@ -128,7 +128,7 @@
"cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
"expExitCode": "2",
"verifyCmd": "$TC qdisc show dev $ETH",
- "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+ "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-4]",
"matchCount": "0",
"teardown": [
"echo \"1\" > /sys/bus/netdevsim/del_device"
diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py
index a3e43189d940..ee22e3447ec7 100755
--- a/tools/testing/selftests/tc-testing/tdc.py
+++ b/tools/testing/selftests/tc-testing/tdc.py
@@ -716,6 +716,7 @@ def set_operation_mode(pm, parser, args, remaining):
list_test_cases(alltests)
exit(0)
+ exit_code = 0 # KSFT_PASS
if len(alltests):
req_plugins = pm.get_required_plugins(alltests)
try:
@@ -724,6 +725,8 @@ def set_operation_mode(pm, parser, args, remaining):
print('The following plugins were not found:')
print('{}'.format(pde.missing_pg))
catresults = test_runner(pm, args, alltests)
+ if catresults.count_failures() != 0:
+ exit_code = 1 # KSFT_FAIL
if args.format == 'none':
print('Test results output suppression requested\n')
else:
@@ -748,6 +751,8 @@ def set_operation_mode(pm, parser, args, remaining):
gid=int(os.getenv('SUDO_GID')))
else:
print('No tests found\n')
+ exit_code = 4 # KSFT_SKIP
+ exit(exit_code)
def main():
"""
@@ -767,8 +772,5 @@ def main():
set_operation_mode(pm, parser, args, remaining)
- exit(0)
-
-
if __name__ == "__main__":
main()
diff --git a/tools/testing/selftests/tc-testing/tdc.sh b/tools/testing/selftests/tc-testing/tdc.sh
index 7fe38c76db44..afb0cd86fa3d 100755
--- a/tools/testing/selftests/tc-testing/tdc.sh
+++ b/tools/testing/selftests/tc-testing/tdc.sh
@@ -1,5 +1,6 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
+modprobe netdevsim
./tdc.py -c actions --nobuildebpf
./tdc.py -c qdisc
diff --git a/tools/testing/selftests/timens/procfs.c b/tools/testing/selftests/timens/procfs.c
index f2519154208a..1833ca97eb24 100644
--- a/tools/testing/selftests/timens/procfs.c
+++ b/tools/testing/selftests/timens/procfs.c
@@ -24,8 +24,6 @@
#define DAY_IN_SEC (60*60*24)
#define TEN_DAYS_IN_SEC (10*DAY_IN_SEC)
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
-
static int child_ns, parent_ns;
static int switch_ns(int fd)
diff --git a/tools/testing/selftests/timens/timens.c b/tools/testing/selftests/timens/timens.c
index 52b6a1185f52..387220791a05 100644
--- a/tools/testing/selftests/timens/timens.c
+++ b/tools/testing/selftests/timens/timens.c
@@ -22,8 +22,6 @@
#define DAY_IN_SEC (60*60*24)
#define TEN_DAYS_IN_SEC (10*DAY_IN_SEC)
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
-
struct test_clock {
clockid_t id;
char *name;
diff --git a/tools/testing/selftests/timers/alarmtimer-suspend.c b/tools/testing/selftests/timers/alarmtimer-suspend.c
index 4da09dbf83ba..54da4b088f4c 100644
--- a/tools/testing/selftests/timers/alarmtimer-suspend.c
+++ b/tools/testing/selftests/timers/alarmtimer-suspend.c
@@ -79,7 +79,7 @@ char *clockstring(int clockid)
return "CLOCK_BOOTTIME_ALARM";
case CLOCK_TAI:
return "CLOCK_TAI";
- };
+ }
return "UNKNOWN_CLOCKID";
}
diff --git a/tools/testing/selftests/timers/inconsistency-check.c b/tools/testing/selftests/timers/inconsistency-check.c
index 022d3ffe3fbf..e6756d9c60a7 100644
--- a/tools/testing/selftests/timers/inconsistency-check.c
+++ b/tools/testing/selftests/timers/inconsistency-check.c
@@ -72,7 +72,7 @@ char *clockstring(int clockid)
return "CLOCK_BOOTTIME_ALARM";
case CLOCK_TAI:
return "CLOCK_TAI";
- };
+ }
return "UNKNOWN_CLOCKID";
}
diff --git a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh
index fe8fcfb334e0..a5cb4b09a46c 100644
--- a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh
+++ b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh
@@ -24,19 +24,23 @@ if [[ "$1" == "-cgroup-v2" ]]; then
reservation_usage_file=rsvd.current
fi
-cgroup_path=/dev/cgroup/memory
-if [[ ! -e $cgroup_path ]]; then
- mkdir -p $cgroup_path
- if [[ $cgroup2 ]]; then
+if [[ $cgroup2 ]]; then
+ cgroup_path=$(mount -t cgroup2 | head -1 | awk -e '{print $3}')
+ if [[ -z "$cgroup_path" ]]; then
+ cgroup_path=/dev/cgroup/memory
mount -t cgroup2 none $cgroup_path
- else
+ do_umount=1
+ fi
+ echo "+hugetlb" >$cgroup_path/cgroup.subtree_control
+else
+ cgroup_path=$(mount -t cgroup | grep ",hugetlb" | awk -e '{print $3}')
+ if [[ -z "$cgroup_path" ]]; then
+ cgroup_path=/dev/cgroup/memory
mount -t cgroup memory,hugetlb $cgroup_path
+ do_umount=1
fi
fi
-
-if [[ $cgroup2 ]]; then
- echo "+hugetlb" >/dev/cgroup/memory/cgroup.subtree_control
-fi
+export cgroup_path
function cleanup() {
if [[ $cgroup2 ]]; then
@@ -108,7 +112,7 @@ function setup_cgroup() {
function wait_for_hugetlb_memory_to_get_depleted() {
local cgroup="$1"
- local path="/dev/cgroup/memory/$cgroup/hugetlb.${MB}MB.$reservation_usage_file"
+ local path="$cgroup_path/$cgroup/hugetlb.${MB}MB.$reservation_usage_file"
# Wait for hugetlbfs memory to get depleted.
while [ $(cat $path) != 0 ]; do
echo Waiting for hugetlb memory to get depleted.
@@ -121,7 +125,7 @@ function wait_for_hugetlb_memory_to_get_reserved() {
local cgroup="$1"
local size="$2"
- local path="/dev/cgroup/memory/$cgroup/hugetlb.${MB}MB.$reservation_usage_file"
+ local path="$cgroup_path/$cgroup/hugetlb.${MB}MB.$reservation_usage_file"
# Wait for hugetlbfs memory to get written.
while [ $(cat $path) != $size ]; do
echo Waiting for hugetlb memory reservation to reach size $size.
@@ -134,7 +138,7 @@ function wait_for_hugetlb_memory_to_get_written() {
local cgroup="$1"
local size="$2"
- local path="/dev/cgroup/memory/$cgroup/hugetlb.${MB}MB.$fault_usage_file"
+ local path="$cgroup_path/$cgroup/hugetlb.${MB}MB.$fault_usage_file"
# Wait for hugetlbfs memory to get written.
while [ $(cat $path) != $size ]; do
echo Waiting for hugetlb memory to reach size $size.
@@ -574,5 +578,7 @@ for populate in "" "-o"; do
done # populate
done # method
-umount $cgroup_path
-rmdir $cgroup_path
+if [[ $do_umount ]]; then
+ umount $cgroup_path
+ rmdir $cgroup_path
+fi
diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c
index 864f126ffd78..203323967b50 100644
--- a/tools/testing/selftests/vm/hmm-tests.c
+++ b/tools/testing/selftests/vm/hmm-tests.c
@@ -1251,6 +1251,48 @@ TEST_F(hmm, anon_teardown)
/*
* Test memory snapshot without faulting in pages accessed by the device.
*/
+TEST_F(hmm, mixedmap)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned char *m;
+ int ret;
+
+ npages = 1;
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(npages);
+ ASSERT_NE(buffer->mirror, NULL);
+
+
+ /* Reserve a range of addresses. */
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE,
+ self->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Simulate a device snapshotting CPU pagetables. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device saw. */
+ m = buffer->mirror;
+ ASSERT_EQ(m[0], HMM_DMIRROR_PROT_READ);
+
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Test memory snapshot without faulting in pages accessed by the device.
+ */
TEST_F(hmm2, snapshot)
{
struct hmm_buffer *buffer;
diff --git a/tools/testing/selftests/vm/hugepage-mremap.c b/tools/testing/selftests/vm/hugepage-mremap.c
index 257df94697a5..2a7c33631a29 100644
--- a/tools/testing/selftests/vm/hugepage-mremap.c
+++ b/tools/testing/selftests/vm/hugepage-mremap.c
@@ -4,7 +4,11 @@
*
* Example of remapping huge page memory in a user application using the
* mremap system call. Code assumes a hugetlbfs filesystem is mounted
- * at './huge'. The code will use 10MB worth of huge pages.
+ * at './huge'. The amount of memory used by this test is decided by a command
+ * line argument in MBs. If missing, the default amount is 10MB.
+ *
+ * To make sure the test triggers pmd sharing and goes through the 'unshare'
+ * path in the mremap code use 1GB (1024) or more.
*/
#define _GNU_SOURCE
@@ -18,8 +22,10 @@
#include <linux/userfaultfd.h>
#include <sys/ioctl.h>
-#define LENGTH (1UL * 1024 * 1024 * 1024)
+#define DEFAULT_LENGTH_MB 10UL
+#define MB_TO_BYTES(x) (x * 1024 * 1024)
+#define FILE_NAME "huge/hugepagefile"
#define PROTECTION (PROT_READ | PROT_WRITE | PROT_EXEC)
#define FLAGS (MAP_SHARED | MAP_ANONYMOUS)
@@ -28,20 +34,20 @@ static void check_bytes(char *addr)
printf("First hex is %x\n", *((unsigned int *)addr));
}
-static void write_bytes(char *addr)
+static void write_bytes(char *addr, size_t len)
{
unsigned long i;
- for (i = 0; i < LENGTH; i++)
+ for (i = 0; i < len; i++)
*(addr + i) = (char)i;
}
-static int read_bytes(char *addr)
+static int read_bytes(char *addr, size_t len)
{
unsigned long i;
check_bytes(addr);
- for (i = 0; i < LENGTH; i++)
+ for (i = 0; i < len; i++)
if (*(addr + i) != (char)i) {
printf("Mismatch at %lu\n", i);
return 1;
@@ -99,11 +105,19 @@ static void register_region_with_uffd(char *addr, size_t len)
}
}
-int main(void)
+int main(int argc, char *argv[])
{
+ /* Read memory length as the first arg if valid, otherwise fallback to
+ * the default length. Any additional args are ignored.
+ */
+ size_t length = argc > 1 ? (size_t)atoi(argv[1]) : 0UL;
+
+ length = length > 0 ? length : DEFAULT_LENGTH_MB;
+ length = MB_TO_BYTES(length);
+
int ret = 0;
- int fd = open("/huge/test", O_CREAT | O_RDWR, 0755);
+ int fd = open(FILE_NAME, O_CREAT | O_RDWR, 0755);
if (fd < 0) {
perror("Open failed");
@@ -112,7 +126,7 @@ int main(void)
/* mmap to a PUD aligned address to hopefully trigger pmd sharing. */
unsigned long suggested_addr = 0x7eaa40000000;
- void *haddr = mmap((void *)suggested_addr, LENGTH, PROTECTION,
+ void *haddr = mmap((void *)suggested_addr, length, PROTECTION,
MAP_HUGETLB | MAP_SHARED | MAP_POPULATE, fd, 0);
printf("Map haddr: Returned address is %p\n", haddr);
if (haddr == MAP_FAILED) {
@@ -122,7 +136,7 @@ int main(void)
/* mmap again to a dummy address to hopefully trigger pmd sharing. */
suggested_addr = 0x7daa40000000;
- void *daddr = mmap((void *)suggested_addr, LENGTH, PROTECTION,
+ void *daddr = mmap((void *)suggested_addr, length, PROTECTION,
MAP_HUGETLB | MAP_SHARED | MAP_POPULATE, fd, 0);
printf("Map daddr: Returned address is %p\n", daddr);
if (daddr == MAP_FAILED) {
@@ -132,16 +146,16 @@ int main(void)
suggested_addr = 0x7faa40000000;
void *vaddr =
- mmap((void *)suggested_addr, LENGTH, PROTECTION, FLAGS, -1, 0);
+ mmap((void *)suggested_addr, length, PROTECTION, FLAGS, -1, 0);
printf("Map vaddr: Returned address is %p\n", vaddr);
if (vaddr == MAP_FAILED) {
perror("mmap2");
exit(1);
}
- register_region_with_uffd(haddr, LENGTH);
+ register_region_with_uffd(haddr, length);
- void *addr = mremap(haddr, LENGTH, LENGTH,
+ void *addr = mremap(haddr, length, length,
MREMAP_MAYMOVE | MREMAP_FIXED, vaddr);
if (addr == MAP_FAILED) {
perror("mremap");
@@ -150,10 +164,10 @@ int main(void)
printf("Mremap: Returned address is %p\n", addr);
check_bytes(addr);
- write_bytes(addr);
- ret = read_bytes(addr);
+ write_bytes(addr, length);
+ ret = read_bytes(addr, length);
- munmap(addr, LENGTH);
+ munmap(addr, length);
return ret;
}
diff --git a/tools/testing/selftests/vm/hugetlb_reparenting_test.sh b/tools/testing/selftests/vm/hugetlb_reparenting_test.sh
index 4a9a3afe9fd4..bf2d2a684edf 100644
--- a/tools/testing/selftests/vm/hugetlb_reparenting_test.sh
+++ b/tools/testing/selftests/vm/hugetlb_reparenting_test.sh
@@ -18,19 +18,24 @@ if [[ "$1" == "-cgroup-v2" ]]; then
usage_file=current
fi
-CGROUP_ROOT='/dev/cgroup/memory'
-MNT='/mnt/huge/'
-if [[ ! -e $CGROUP_ROOT ]]; then
- mkdir -p $CGROUP_ROOT
- if [[ $cgroup2 ]]; then
+if [[ $cgroup2 ]]; then
+ CGROUP_ROOT=$(mount -t cgroup2 | head -1 | awk -e '{print $3}')
+ if [[ -z "$CGROUP_ROOT" ]]; then
+ CGROUP_ROOT=/dev/cgroup/memory
mount -t cgroup2 none $CGROUP_ROOT
- sleep 1
- echo "+hugetlb +memory" >$CGROUP_ROOT/cgroup.subtree_control
- else
+ do_umount=1
+ fi
+ echo "+hugetlb +memory" >$CGROUP_ROOT/cgroup.subtree_control
+else
+ CGROUP_ROOT=$(mount -t cgroup | grep ",hugetlb" | awk -e '{print $3}')
+ if [[ -z "$CGROUP_ROOT" ]]; then
+ CGROUP_ROOT=/dev/cgroup/memory
mount -t cgroup memory,hugetlb $CGROUP_ROOT
+ do_umount=1
fi
fi
+MNT='/mnt/huge/'
function get_machine_hugepage_size() {
hpz=$(grep -i hugepagesize /proc/meminfo)
diff --git a/tools/testing/selftests/vm/mremap_test.c b/tools/testing/selftests/vm/mremap_test.c
index 0624d1bd71b5..7c0b0617b9f8 100644
--- a/tools/testing/selftests/vm/mremap_test.c
+++ b/tools/testing/selftests/vm/mremap_test.c
@@ -20,7 +20,6 @@
#define VALIDATION_DEFAULT_THRESHOLD 4 /* 4MB */
#define VALIDATION_NO_THRESHOLD 0 /* Verify the entire region */
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
struct config {
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h
index 622a85848f61..92f3be3dd8e5 100644
--- a/tools/testing/selftests/vm/pkey-helpers.h
+++ b/tools/testing/selftests/vm/pkey-helpers.h
@@ -13,6 +13,8 @@
#include <ucontext.h>
#include <sys/mman.h>
+#include "../kselftest.h"
+
/* Define some kernel-like types */
#define u8 __u8
#define u16 __u16
@@ -175,7 +177,6 @@ static inline void __pkey_write_allow(int pkey, int do_allow_write)
dprintf4("pkey_reg now: %016llx\n", read_pkey_reg());
}
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1))
#define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1))
#define ALIGN_PTR_UP(p, ptr_align_to) \
diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh
index a24d30af3094..75d401741394 100755
--- a/tools/testing/selftests/vm/run_vmtests.sh
+++ b/tools/testing/selftests/vm/run_vmtests.sh
@@ -111,7 +111,7 @@ fi
echo "-----------------------"
echo "running hugepage-mremap"
echo "-----------------------"
-./hugepage-mremap
+./hugepage-mremap 256
if [ $? -ne 0 ]; then
echo "[FAIL]"
exitcode=1
diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index 8a09057d2f22..d3fd24f9fae8 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -236,7 +236,8 @@ static void hugetlb_allocate_area(void **alloc_area)
*alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
(map_shared ? MAP_SHARED : MAP_PRIVATE) |
- MAP_HUGETLB,
+ MAP_HUGETLB |
+ (*alloc_area == area_src ? 0 : MAP_NORESERVE),
huge_fd, *alloc_area == area_src ? 0 :
nr_pages * page_size);
if (*alloc_area == MAP_FAILED)
@@ -644,7 +645,7 @@ static int uffd_read_msg(int ufd, struct uffd_msg *msg)
if (ret != sizeof(*msg)) {
if (ret < 0) {
- if (errno == EAGAIN)
+ if (errno == EAGAIN || errno == EINTR)
return 1;
err("blocking read error");
} else {
@@ -720,8 +721,11 @@ static void *uffd_poll_thread(void *arg)
for (;;) {
ret = poll(pollfd, 2, -1);
- if (ret <= 0)
+ if (ret <= 0) {
+ if (errno == EINTR || errno == EAGAIN)
+ continue;
err("poll error: %d", ret);
+ }
if (pollfd[1].revents & POLLIN) {
if (read(pollfd[1].fd, &tmp_chr, 1) != 1)
err("read pipefd error");
@@ -1413,7 +1417,6 @@ static void userfaultfd_pagemap_test(unsigned int test_pgsize)
static int userfaultfd_stress(void)
{
void *area;
- char *tmp_area;
unsigned long nr;
struct uffdio_register uffdio_register;
struct uffd_stats uffd_stats[nr_cpus];
@@ -1524,13 +1527,9 @@ static int userfaultfd_stress(void)
count_verify[nr], nr);
/* prepare next bounce */
- tmp_area = area_src;
- area_src = area_dst;
- area_dst = tmp_area;
+ swap(area_src, area_dst);
- tmp_area = area_src_alias;
- area_src_alias = area_dst_alias;
- area_dst_alias = tmp_area;
+ swap(area_src_alias, area_dst_alias);
uffd_stats_report(uffd_stats, nr_cpus);
}
diff --git a/tools/testing/selftests/vm/va_128TBswitch.c b/tools/testing/selftests/vm/va_128TBswitch.c
index 83acdff26a13..da6ec3b53ea8 100644
--- a/tools/testing/selftests/vm/va_128TBswitch.c
+++ b/tools/testing/selftests/vm/va_128TBswitch.c
@@ -9,7 +9,7 @@
#include <sys/mman.h>
#include <string.h>
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#include "../kselftest.h"
#ifdef __powerpc64__
#define PAGE_SIZE (64 << 10)
diff --git a/tools/testing/selftests/vm/write_hugetlb_memory.sh b/tools/testing/selftests/vm/write_hugetlb_memory.sh
index d3d0d108924d..70a02301f4c2 100644
--- a/tools/testing/selftests/vm/write_hugetlb_memory.sh
+++ b/tools/testing/selftests/vm/write_hugetlb_memory.sh
@@ -14,7 +14,7 @@ want_sleep=$8
reserve=$9
echo "Putting task in cgroup '$cgroup'"
-echo $$ > /dev/cgroup/memory/"$cgroup"/cgroup.procs
+echo $$ > ${cgroup_path:-/dev/cgroup/memory}/"$cgroup"/cgroup.procs
echo "Method is $method"
diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh
index ebc4ee0fe179..8a9461aa0878 100755
--- a/tools/testing/selftests/wireguard/netns.sh
+++ b/tools/testing/selftests/wireguard/netns.sh
@@ -276,7 +276,11 @@ n0 ping -W 1 -c 1 192.168.241.2
n1 wg set wg0 peer "$pub2" endpoint 192.168.241.2:7
ip2 link del wg0
ip2 link del wg1
-! n0 ping -W 1 -c 10 -f 192.168.241.2 || false # Should not crash kernel
+read _ _ tx_bytes_before < <(n0 wg show wg1 transfer)
+! n0 ping -W 1 -c 10 -f 192.168.241.2 || false
+sleep 1
+read _ _ tx_bytes_after < <(n0 wg show wg1 transfer)
+(( tx_bytes_after - tx_bytes_before < 70000 ))
ip0 link del wg1
ip1 link del wg0
@@ -609,6 +613,28 @@ ip0 link set wg0 up
kill $ncat_pid
ip0 link del wg0
+# Ensure that dst_cache references don't outlive netns lifetime
+ip1 link add dev wg0 type wireguard
+ip2 link add dev wg0 type wireguard
+configure_peers
+ip1 link add veth1 type veth peer name veth2
+ip1 link set veth2 netns $netns2
+ip1 addr add fd00:aa::1/64 dev veth1
+ip2 addr add fd00:aa::2/64 dev veth2
+ip1 link set veth1 up
+ip2 link set veth2 up
+waitiface $netns1 veth1
+waitiface $netns2 veth2
+ip1 -6 route add default dev veth1 via fd00:aa::2
+ip2 -6 route add default dev veth2 via fd00:aa::1
+n1 wg set wg0 peer "$pub2" endpoint [fd00:aa::2]:2
+n2 wg set wg0 peer "$pub1" endpoint [fd00:aa::1]:1
+n1 ping6 -c 1 fd00::2
+pp ip netns delete $netns1
+pp ip netns delete $netns2
+pp ip netns add $netns1
+pp ip netns add $netns2
+
# Ensure there aren't circular reference loops
ip1 link add wg1 type wireguard
ip2 link add wg2 type wireguard
@@ -627,7 +653,7 @@ while read -t 0.1 -r line 2>/dev/null || [[ $? -ne 142 ]]; do
done < /dev/kmsg
alldeleted=1
for object in "${!objects[@]}"; do
- if [[ ${objects["$object"]} != *createddestroyed ]]; then
+ if [[ ${objects["$object"]} != *createddestroyed && ${objects["$object"]} != *createdcreateddestroyeddestroyed ]]; then
echo "Error: $object: merely ${objects["$object"]}" >&3
alldeleted=0
fi
diff --git a/tools/testing/selftests/wireguard/qemu/debug.config b/tools/testing/selftests/wireguard/qemu/debug.config
index fe07d97df9fa..2b321b8a96cf 100644
--- a/tools/testing/selftests/wireguard/qemu/debug.config
+++ b/tools/testing/selftests/wireguard/qemu/debug.config
@@ -47,7 +47,7 @@ CONFIG_DEBUG_ATOMIC_SLEEP=y
CONFIG_TRACE_IRQFLAGS=y
CONFIG_DEBUG_BUGVERBOSE=y
CONFIG_DEBUG_LIST=y
-CONFIG_DEBUG_PI_LIST=y
+CONFIG_DEBUG_PLIST=y
CONFIG_PROVE_RCU=y
CONFIG_SPARSE_RCU_POINTER=y
CONFIG_RCU_CPU_STALL_TIMEOUT=21
diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config
index 74db83a0aedd..a9b5a520a1d2 100644
--- a/tools/testing/selftests/wireguard/qemu/kernel.config
+++ b/tools/testing/selftests/wireguard/qemu/kernel.config
@@ -66,6 +66,7 @@ CONFIG_PROC_SYSCTL=y
CONFIG_SYSFS=y
CONFIG_TMPFS=y
CONFIG_CONSOLE_LOGLEVEL_DEFAULT=15
+CONFIG_LOG_BUF_SHIFT=18
CONFIG_PRINTK_TIME=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_LEGACY_VSYSCALL_NONE=y
diff --git a/tools/thermal/tmon/pid.c b/tools/thermal/tmon/pid.c
index c54edb4f630c..296f69c00c57 100644
--- a/tools/thermal/tmon/pid.c
+++ b/tools/thermal/tmon/pid.c
@@ -54,7 +54,6 @@ static double xk_1, xk_2; /* input temperature x[k-#] */
*/
int init_thermal_controller(void)
{
- int ret = 0;
/* init pid params */
p_param.ts = ticktime;
@@ -65,7 +64,7 @@ int init_thermal_controller(void)
p_param.t_target = target_temp_user;
- return ret;
+ return 0;
}
void controller_reset(void)
diff --git a/tools/tracing/Makefile b/tools/tracing/Makefile
index 87e0ec48e2e7..95e485f12d97 100644
--- a/tools/tracing/Makefile
+++ b/tools/tracing/Makefile
@@ -1,11 +1,11 @@
# SPDX-License-Identifier: GPL-2.0
include ../scripts/Makefile.include
-all: latency
+all: latency rtla
-clean: latency_clean
+clean: latency_clean rtla_clean
-install: latency_install
+install: latency_install rtla_install
latency:
$(call descend,latency)
@@ -16,4 +16,14 @@ latency_install:
latency_clean:
$(call descend,latency,clean)
-.PHONY: all install clean latency latency_install latency_clean
+rtla:
+ $(call descend,rtla)
+
+rtla_install:
+ $(call descend,rtla,install)
+
+rtla_clean:
+ $(call descend,rtla,clean)
+
+.PHONY: all install clean latency latency_install latency_clean \
+ rtla rtla_install rtla_clean
diff --git a/tools/tracing/rtla/Makefile b/tools/tracing/rtla/Makefile
new file mode 100644
index 000000000000..7c39728d08de
--- /dev/null
+++ b/tools/tracing/rtla/Makefile
@@ -0,0 +1,102 @@
+NAME := rtla
+VERSION := 0.5
+
+# From libtracefs:
+# Makefiles suck: This macro sets a default value of $(2) for the
+# variable named by $(1), unless the variable has been set by
+# environment or command line. This is necessary for CC and AR
+# because make sets default values, so the simpler ?= approach
+# won't work as expected.
+define allow-override
+ $(if $(or $(findstring environment,$(origin $(1))),\
+ $(findstring command line,$(origin $(1)))),,\
+ $(eval $(1) = $(2)))
+endef
+
+# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix.
+$(call allow-override,CC,$(CROSS_COMPILE)gcc)
+$(call allow-override,AR,$(CROSS_COMPILE)ar)
+$(call allow-override,STRIP,$(CROSS_COMPILE)strip)
+$(call allow-override,PKG_CONFIG,pkg-config)
+$(call allow-override,LD_SO_CONF_PATH,/etc/ld.so.conf.d/)
+$(call allow-override,LDCONFIG,ldconfig)
+
+INSTALL = install
+FOPTS := -flto=auto -ffat-lto-objects -fexceptions -fstack-protector-strong \
+ -fasynchronous-unwind-tables -fstack-clash-protection
+WOPTS := -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -Wno-maybe-uninitialized
+
+TRACEFS_HEADERS := $$($(PKG_CONFIG) --cflags libtracefs)
+
+CFLAGS := -O -g -DVERSION=\"$(VERSION)\" $(FOPTS) $(MOPTS) $(WOPTS) $(TRACEFS_HEADERS)
+LDFLAGS := -ggdb
+LIBS := $$($(PKG_CONFIG) --libs libtracefs) -lprocps
+
+SRC := $(wildcard src/*.c)
+HDR := $(wildcard src/*.h)
+OBJ := $(SRC:.c=.o)
+DIRS := src
+FILES := Makefile README.txt
+CEXT := bz2
+TARBALL := $(NAME)-$(VERSION).tar.$(CEXT)
+TAROPTS := -cvjf $(TARBALL)
+BINDIR := /usr/bin
+DATADIR := /usr/share
+DOCDIR := $(DATADIR)/doc
+MANDIR := $(DATADIR)/man
+LICDIR := $(DATADIR)/licenses
+SRCTREE := $(if $(BUILD_SRC),$(BUILD_SRC),$(CURDIR))
+
+# If running from the tarball, man pages are stored in the Documentation
+# dir. If running from the kernel source, man pages are stored in
+# Documentation/tools/rtla/.
+ifneq ($(wildcard Documentation/.*),)
+DOCSRC = Documentation/
+else
+DOCSRC = $(SRCTREE)/../../../Documentation/tools/rtla/
+endif
+
+.PHONY: all
+all: rtla
+
+rtla: $(OBJ)
+ $(CC) -o rtla $(LDFLAGS) $(OBJ) $(LIBS)
+
+static: $(OBJ)
+ $(CC) -o rtla-static $(LDFLAGS) --static $(OBJ) $(LIBS) -lpthread -ldl
+
+.PHONY: install
+install: doc_install
+ $(INSTALL) -d -m 755 $(DESTDIR)$(BINDIR)
+ $(INSTALL) rtla -m 755 $(DESTDIR)$(BINDIR)
+ $(STRIP) $(DESTDIR)$(BINDIR)/rtla
+ @test ! -f $(DESTDIR)$(BINDIR)/osnoise || rm $(DESTDIR)$(BINDIR)/osnoise
+ ln -s $(DESTDIR)$(BINDIR)/rtla $(DESTDIR)$(BINDIR)/osnoise
+ @test ! -f $(DESTDIR)$(BINDIR)/timerlat || rm $(DESTDIR)$(BINDIR)/timerlat
+ ln -s $(DESTDIR)$(BINDIR)/rtla $(DESTDIR)$(BINDIR)/timerlat
+
+.PHONY: clean tarball
+clean: doc_clean
+ @test ! -f rtla || rm rtla
+ @test ! -f rtla-static || rm rtla-static
+ @test ! -f src/rtla.o || rm src/rtla.o
+ @test ! -f $(TARBALL) || rm -f $(TARBALL)
+ @rm -rf *~ $(OBJ) *.tar.$(CEXT)
+
+tarball: clean
+ rm -rf $(NAME)-$(VERSION) && mkdir $(NAME)-$(VERSION)
+ cp -r $(DIRS) $(FILES) $(NAME)-$(VERSION)
+ mkdir $(NAME)-$(VERSION)/Documentation/
+ cp -rp $(SRCTREE)/../../../Documentation/tools/rtla/* $(NAME)-$(VERSION)/Documentation/
+ tar $(TAROPTS) --exclude='*~' $(NAME)-$(VERSION)
+ rm -rf $(NAME)-$(VERSION)
+
+.PHONY: doc doc_clean doc_install
+doc:
+ $(MAKE) -C $(DOCSRC)
+
+doc_clean:
+ $(MAKE) -C $(DOCSRC) clean
+
+doc_install:
+ $(MAKE) -C $(DOCSRC) install
diff --git a/tools/tracing/rtla/README.txt b/tools/tracing/rtla/README.txt
new file mode 100644
index 000000000000..6c88446f7e74
--- /dev/null
+++ b/tools/tracing/rtla/README.txt
@@ -0,0 +1,36 @@
+RTLA: Real-Time Linux Analysis tools
+
+The rtla is a meta-tool that includes a set of commands that
+aims to analyze the real-time properties of Linux. But, instead of
+testing Linux as a black box, rtla leverages kernel tracing
+capabilities to provide precise information about the properties
+and root causes of unexpected results.
+
+Installing RTLA
+
+RTLA depends on some libraries and tools. More precisely, it depends on the
+following libraries:
+
+ - libtracefs
+ - libtraceevent
+ - procps
+
+It also depends on python3-docutils to compile man pages.
+
+For development, we suggest the following steps for compiling rtla:
+
+ $ git clone git://git.kernel.org/pub/scm/libs/libtrace/libtraceevent.git
+ $ cd libtraceevent/
+ $ make
+ $ sudo make install
+ $ cd ..
+ $ git clone git://git.kernel.org/pub/scm/libs/libtrace/libtracefs.git
+ $ cd libtracefs/
+ $ make
+ $ sudo make install
+ $ cd ..
+ $ cd $rtla_src
+ $ make
+ $ sudo make install
+
+For further information, please refer to the rtla man page.
diff --git a/tools/tracing/rtla/src/osnoise.c b/tools/tracing/rtla/src/osnoise.c
new file mode 100644
index 000000000000..7b73d1eccd0e
--- /dev/null
+++ b/tools/tracing/rtla/src/osnoise.c
@@ -0,0 +1,875 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+
+#include "osnoise.h"
+#include "utils.h"
+
+/*
+ * osnoise_get_cpus - return the original "osnoise/cpus" content
+ *
+ * It also saves the value to be restored.
+ */
+char *osnoise_get_cpus(struct osnoise_context *context)
+{
+ if (context->curr_cpus)
+ return context->curr_cpus;
+
+ if (context->orig_cpus)
+ return context->orig_cpus;
+
+ context->orig_cpus = tracefs_instance_file_read(NULL, "osnoise/cpus", NULL);
+
+ /*
+ * The error value (NULL) is the same for tracefs_instance_file_read()
+ * and this functions, so:
+ */
+ return context->orig_cpus;
+}
+
+/*
+ * osnoise_set_cpus - configure osnoise to run on *cpus
+ *
+ * "osnoise/cpus" file is used to set the cpus in which osnoise/timerlat
+ * will run. This function opens this file, saves the current value,
+ * and set the cpus passed as argument.
+ */
+int osnoise_set_cpus(struct osnoise_context *context, char *cpus)
+{
+ char *orig_cpus = osnoise_get_cpus(context);
+ char buffer[1024];
+ int retval;
+
+ if (!orig_cpus)
+ return -1;
+
+ context->curr_cpus = strdup(cpus);
+ if (!context->curr_cpus)
+ return -1;
+
+ snprintf(buffer, 1024, "%s\n", cpus);
+
+ debug_msg("setting cpus to %s from %s", cpus, context->orig_cpus);
+
+ retval = tracefs_instance_file_write(NULL, "osnoise/cpus", buffer);
+ if (retval < 0) {
+ free(context->curr_cpus);
+ context->curr_cpus = NULL;
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * osnoise_restore_cpus - restore the original "osnoise/cpus"
+ *
+ * osnoise_set_cpus() saves the original data for the "osnoise/cpus"
+ * file. This function restore the original config it was previously
+ * modified.
+ */
+void osnoise_restore_cpus(struct osnoise_context *context)
+{
+ int retval;
+
+ if (!context->orig_cpus)
+ return;
+
+ if (!context->curr_cpus)
+ return;
+
+ /* nothing to do? */
+ if (!strcmp(context->orig_cpus, context->curr_cpus))
+ goto out_done;
+
+ debug_msg("restoring cpus to %s", context->orig_cpus);
+
+ retval = tracefs_instance_file_write(NULL, "osnoise/cpus", context->orig_cpus);
+ if (retval < 0)
+ err_msg("could not restore original osnoise cpus\n");
+
+out_done:
+ free(context->curr_cpus);
+ context->curr_cpus = NULL;
+}
+
+/*
+ * osnoise_put_cpus - restore cpus config and cleanup data
+ */
+void osnoise_put_cpus(struct osnoise_context *context)
+{
+ osnoise_restore_cpus(context);
+
+ if (!context->orig_cpus)
+ return;
+
+ free(context->orig_cpus);
+ context->orig_cpus = NULL;
+}
+
+/*
+ * osnoise_read_ll_config - read a long long value from a config
+ *
+ * returns -1 on error.
+ */
+static long long osnoise_read_ll_config(char *rel_path)
+{
+ long long retval;
+ char *buffer;
+
+ buffer = tracefs_instance_file_read(NULL, rel_path, NULL);
+ if (!buffer)
+ return -1;
+
+ /* get_llong_from_str returns -1 on error */
+ retval = get_llong_from_str(buffer);
+
+ debug_msg("reading %s returned %lld\n", rel_path, retval);
+
+ free(buffer);
+
+ return retval;
+}
+
+/*
+ * osnoise_write_ll_config - write a long long value to a config in rel_path
+ *
+ * returns -1 on error.
+ */
+static long long osnoise_write_ll_config(char *rel_path, long long value)
+{
+ char buffer[BUFF_U64_STR_SIZE];
+ long long retval;
+
+ snprintf(buffer, sizeof(buffer), "%lld\n", value);
+
+ debug_msg("setting %s to %lld\n", rel_path, value);
+
+ retval = tracefs_instance_file_write(NULL, rel_path, buffer);
+ return retval;
+}
+
+/*
+ * osnoise_get_runtime - return the original "osnoise/runtime_us" value
+ *
+ * It also saves the value to be restored.
+ */
+unsigned long long osnoise_get_runtime(struct osnoise_context *context)
+{
+ long long runtime_us;
+
+ if (context->runtime_us != OSNOISE_TIME_INIT_VAL)
+ return context->runtime_us;
+
+ if (context->orig_runtime_us != OSNOISE_TIME_INIT_VAL)
+ return context->orig_runtime_us;
+
+ runtime_us = osnoise_read_ll_config("osnoise/runtime_us");
+ if (runtime_us < 0)
+ goto out_err;
+
+ context->orig_runtime_us = runtime_us;
+ return runtime_us;
+
+out_err:
+ return OSNOISE_TIME_INIT_VAL;
+}
+
+/*
+ * osnoise_get_period - return the original "osnoise/period_us" value
+ *
+ * It also saves the value to be restored.
+ */
+unsigned long long osnoise_get_period(struct osnoise_context *context)
+{
+ long long period_us;
+
+ if (context->period_us != OSNOISE_TIME_INIT_VAL)
+ return context->period_us;
+
+ if (context->orig_period_us != OSNOISE_TIME_INIT_VAL)
+ return context->orig_period_us;
+
+ period_us = osnoise_read_ll_config("osnoise/period_us");
+ if (period_us < 0)
+ goto out_err;
+
+ context->orig_period_us = period_us;
+ return period_us;
+
+out_err:
+ return OSNOISE_TIME_INIT_VAL;
+}
+
+static int __osnoise_write_runtime(struct osnoise_context *context,
+ unsigned long long runtime)
+{
+ int retval;
+
+ if (context->orig_runtime_us == OSNOISE_TIME_INIT_VAL)
+ return -1;
+
+ retval = osnoise_write_ll_config("osnoise/runtime_us", runtime);
+ if (retval < 0)
+ return -1;
+
+ context->runtime_us = runtime;
+ return 0;
+}
+
+static int __osnoise_write_period(struct osnoise_context *context,
+ unsigned long long period)
+{
+ int retval;
+
+ if (context->orig_period_us == OSNOISE_TIME_INIT_VAL)
+ return -1;
+
+ retval = osnoise_write_ll_config("osnoise/period_us", period);
+ if (retval < 0)
+ return -1;
+
+ context->period_us = period;
+ return 0;
+}
+
+/*
+ * osnoise_set_runtime_period - set osnoise runtime and period
+ *
+ * Osnoise's runtime and period are related as runtime <= period.
+ * Thus, this function saves the original values, and then tries
+ * to set the runtime and period if they are != 0.
+ */
+int osnoise_set_runtime_period(struct osnoise_context *context,
+ unsigned long long runtime,
+ unsigned long long period)
+{
+ unsigned long long curr_runtime_us;
+ unsigned long long curr_period_us;
+ int retval;
+
+ if (!period && !runtime)
+ return 0;
+
+ curr_runtime_us = osnoise_get_runtime(context);
+ curr_period_us = osnoise_get_period(context);
+
+ /* error getting any value? */
+ if (curr_period_us == OSNOISE_TIME_INIT_VAL || curr_runtime_us == OSNOISE_TIME_INIT_VAL)
+ return -1;
+
+ if (!period) {
+ if (runtime > curr_period_us)
+ return -1;
+ return __osnoise_write_runtime(context, runtime);
+ } else if (!runtime) {
+ if (period < curr_runtime_us)
+ return -1;
+ return __osnoise_write_period(context, period);
+ }
+
+ if (runtime > curr_period_us) {
+ retval = __osnoise_write_period(context, period);
+ if (retval)
+ return -1;
+ retval = __osnoise_write_runtime(context, runtime);
+ if (retval)
+ return -1;
+ } else {
+ retval = __osnoise_write_runtime(context, runtime);
+ if (retval)
+ return -1;
+ retval = __osnoise_write_period(context, period);
+ if (retval)
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * osnoise_restore_runtime_period - restore the original runtime and period
+ */
+void osnoise_restore_runtime_period(struct osnoise_context *context)
+{
+ unsigned long long orig_runtime = context->orig_runtime_us;
+ unsigned long long orig_period = context->orig_period_us;
+ unsigned long long curr_runtime = context->runtime_us;
+ unsigned long long curr_period = context->period_us;
+ int retval;
+
+ if ((orig_runtime == OSNOISE_TIME_INIT_VAL) && (orig_period == OSNOISE_TIME_INIT_VAL))
+ return;
+
+ if ((orig_period == curr_period) && (orig_runtime == curr_runtime))
+ goto out_done;
+
+ retval = osnoise_set_runtime_period(context, orig_runtime, orig_period);
+ if (retval)
+ err_msg("Could not restore original osnoise runtime/period\n");
+
+out_done:
+ context->runtime_us = OSNOISE_TIME_INIT_VAL;
+ context->period_us = OSNOISE_TIME_INIT_VAL;
+}
+
+/*
+ * osnoise_put_runtime_period - restore original values and cleanup data
+ */
+void osnoise_put_runtime_period(struct osnoise_context *context)
+{
+ osnoise_restore_runtime_period(context);
+
+ if (context->orig_runtime_us != OSNOISE_TIME_INIT_VAL)
+ context->orig_runtime_us = OSNOISE_TIME_INIT_VAL;
+
+ if (context->orig_period_us != OSNOISE_TIME_INIT_VAL)
+ context->orig_period_us = OSNOISE_TIME_INIT_VAL;
+}
+
+/*
+ * osnoise_get_timerlat_period_us - read and save the original "timerlat_period_us"
+ */
+static long long
+osnoise_get_timerlat_period_us(struct osnoise_context *context)
+{
+ long long timerlat_period_us;
+
+ if (context->timerlat_period_us != OSNOISE_TIME_INIT_VAL)
+ return context->timerlat_period_us;
+
+ if (context->orig_timerlat_period_us != OSNOISE_TIME_INIT_VAL)
+ return context->orig_timerlat_period_us;
+
+ timerlat_period_us = osnoise_read_ll_config("osnoise/timerlat_period_us");
+ if (timerlat_period_us < 0)
+ goto out_err;
+
+ context->orig_timerlat_period_us = timerlat_period_us;
+ return timerlat_period_us;
+
+out_err:
+ return OSNOISE_TIME_INIT_VAL;
+}
+
+/*
+ * osnoise_set_timerlat_period_us - set "timerlat_period_us"
+ */
+int osnoise_set_timerlat_period_us(struct osnoise_context *context, long long timerlat_period_us)
+{
+ long long curr_timerlat_period_us = osnoise_get_timerlat_period_us(context);
+ int retval;
+
+ if (curr_timerlat_period_us == OSNOISE_TIME_INIT_VAL)
+ return -1;
+
+ retval = osnoise_write_ll_config("osnoise/timerlat_period_us", timerlat_period_us);
+ if (retval < 0)
+ return -1;
+
+ context->timerlat_period_us = timerlat_period_us;
+
+ return 0;
+}
+
+/*
+ * osnoise_restore_timerlat_period_us - restore "timerlat_period_us"
+ */
+void osnoise_restore_timerlat_period_us(struct osnoise_context *context)
+{
+ int retval;
+
+ if (context->orig_timerlat_period_us == OSNOISE_TIME_INIT_VAL)
+ return;
+
+ if (context->orig_timerlat_period_us == context->timerlat_period_us)
+ goto out_done;
+
+ retval = osnoise_write_ll_config("osnoise/timerlat_period_us", context->orig_timerlat_period_us);
+ if (retval < 0)
+ err_msg("Could not restore original osnoise timerlat_period_us\n");
+
+out_done:
+ context->timerlat_period_us = OSNOISE_TIME_INIT_VAL;
+}
+
+/*
+ * osnoise_put_timerlat_period_us - restore original values and cleanup data
+ */
+void osnoise_put_timerlat_period_us(struct osnoise_context *context)
+{
+ osnoise_restore_timerlat_period_us(context);
+
+ if (context->orig_timerlat_period_us == OSNOISE_TIME_INIT_VAL)
+ return;
+
+ context->orig_timerlat_period_us = OSNOISE_TIME_INIT_VAL;
+}
+
+/*
+ * osnoise_get_stop_us - read and save the original "stop_tracing_us"
+ */
+static long long
+osnoise_get_stop_us(struct osnoise_context *context)
+{
+ long long stop_us;
+
+ if (context->stop_us != OSNOISE_OPTION_INIT_VAL)
+ return context->stop_us;
+
+ if (context->orig_stop_us != OSNOISE_OPTION_INIT_VAL)
+ return context->orig_stop_us;
+
+ stop_us = osnoise_read_ll_config("osnoise/stop_tracing_us");
+ if (stop_us < 0)
+ goto out_err;
+
+ context->orig_stop_us = stop_us;
+ return stop_us;
+
+out_err:
+ return OSNOISE_OPTION_INIT_VAL;
+}
+
+/*
+ * osnoise_set_stop_us - set "stop_tracing_us"
+ */
+int osnoise_set_stop_us(struct osnoise_context *context, long long stop_us)
+{
+ long long curr_stop_us = osnoise_get_stop_us(context);
+ int retval;
+
+ if (curr_stop_us == OSNOISE_OPTION_INIT_VAL)
+ return -1;
+
+ retval = osnoise_write_ll_config("osnoise/stop_tracing_us", stop_us);
+ if (retval < 0)
+ return -1;
+
+ context->stop_us = stop_us;
+
+ return 0;
+}
+
+/*
+ * osnoise_restore_stop_us - restore the original "stop_tracing_us"
+ */
+void osnoise_restore_stop_us(struct osnoise_context *context)
+{
+ int retval;
+
+ if (context->orig_stop_us == OSNOISE_OPTION_INIT_VAL)
+ return;
+
+ if (context->orig_stop_us == context->stop_us)
+ goto out_done;
+
+ retval = osnoise_write_ll_config("osnoise/stop_tracing_us", context->orig_stop_us);
+ if (retval < 0)
+ err_msg("Could not restore original osnoise stop_us\n");
+
+out_done:
+ context->stop_us = OSNOISE_OPTION_INIT_VAL;
+}
+
+/*
+ * osnoise_put_stop_us - restore original values and cleanup data
+ */
+void osnoise_put_stop_us(struct osnoise_context *context)
+{
+ osnoise_restore_stop_us(context);
+
+ if (context->orig_stop_us == OSNOISE_OPTION_INIT_VAL)
+ return;
+
+ context->orig_stop_us = OSNOISE_OPTION_INIT_VAL;
+}
+
+/*
+ * osnoise_get_stop_total_us - read and save the original "stop_tracing_total_us"
+ */
+static long long
+osnoise_get_stop_total_us(struct osnoise_context *context)
+{
+ long long stop_total_us;
+
+ if (context->stop_total_us != OSNOISE_OPTION_INIT_VAL)
+ return context->stop_total_us;
+
+ if (context->orig_stop_total_us != OSNOISE_OPTION_INIT_VAL)
+ return context->orig_stop_total_us;
+
+ stop_total_us = osnoise_read_ll_config("osnoise/stop_tracing_total_us");
+ if (stop_total_us < 0)
+ goto out_err;
+
+ context->orig_stop_total_us = stop_total_us;
+ return stop_total_us;
+
+out_err:
+ return OSNOISE_OPTION_INIT_VAL;
+}
+
+/*
+ * osnoise_set_stop_total_us - set "stop_tracing_total_us"
+ */
+int osnoise_set_stop_total_us(struct osnoise_context *context, long long stop_total_us)
+{
+ long long curr_stop_total_us = osnoise_get_stop_total_us(context);
+ int retval;
+
+ if (curr_stop_total_us == OSNOISE_OPTION_INIT_VAL)
+ return -1;
+
+ retval = osnoise_write_ll_config("osnoise/stop_tracing_total_us", stop_total_us);
+ if (retval < 0)
+ return -1;
+
+ context->stop_total_us = stop_total_us;
+
+ return 0;
+}
+
+/*
+ * osnoise_restore_stop_total_us - restore the original "stop_tracing_total_us"
+ */
+void osnoise_restore_stop_total_us(struct osnoise_context *context)
+{
+ int retval;
+
+ if (context->orig_stop_total_us == OSNOISE_OPTION_INIT_VAL)
+ return;
+
+ if (context->orig_stop_total_us == context->stop_total_us)
+ goto out_done;
+
+ retval = osnoise_write_ll_config("osnoise/stop_tracing_total_us",
+ context->orig_stop_total_us);
+ if (retval < 0)
+ err_msg("Could not restore original osnoise stop_total_us\n");
+
+out_done:
+ context->stop_total_us = OSNOISE_OPTION_INIT_VAL;
+}
+
+/*
+ * osnoise_put_stop_total_us - restore original values and cleanup data
+ */
+void osnoise_put_stop_total_us(struct osnoise_context *context)
+{
+ osnoise_restore_stop_total_us(context);
+
+ if (context->orig_stop_total_us == OSNOISE_OPTION_INIT_VAL)
+ return;
+
+ context->orig_stop_total_us = OSNOISE_OPTION_INIT_VAL;
+}
+
+/*
+ * osnoise_get_print_stack - read and save the original "print_stack"
+ */
+static long long
+osnoise_get_print_stack(struct osnoise_context *context)
+{
+ long long print_stack;
+
+ if (context->print_stack != OSNOISE_OPTION_INIT_VAL)
+ return context->print_stack;
+
+ if (context->orig_print_stack != OSNOISE_OPTION_INIT_VAL)
+ return context->orig_print_stack;
+
+ print_stack = osnoise_read_ll_config("osnoise/print_stack");
+ if (print_stack < 0)
+ goto out_err;
+
+ context->orig_print_stack = print_stack;
+ return print_stack;
+
+out_err:
+ return OSNOISE_OPTION_INIT_VAL;
+}
+
+/*
+ * osnoise_set_print_stack - set "print_stack"
+ */
+int osnoise_set_print_stack(struct osnoise_context *context, long long print_stack)
+{
+ long long curr_print_stack = osnoise_get_print_stack(context);
+ int retval;
+
+ if (curr_print_stack == OSNOISE_OPTION_INIT_VAL)
+ return -1;
+
+ retval = osnoise_write_ll_config("osnoise/print_stack", print_stack);
+ if (retval < 0)
+ return -1;
+
+ context->print_stack = print_stack;
+
+ return 0;
+}
+
+/*
+ * osnoise_restore_print_stack - restore the original "print_stack"
+ */
+void osnoise_restore_print_stack(struct osnoise_context *context)
+{
+ int retval;
+
+ if (context->orig_print_stack == OSNOISE_OPTION_INIT_VAL)
+ return;
+
+ if (context->orig_print_stack == context->print_stack)
+ goto out_done;
+
+ retval = osnoise_write_ll_config("osnoise/print_stack", context->orig_print_stack);
+ if (retval < 0)
+ err_msg("Could not restore original osnoise print_stack\n");
+
+out_done:
+ context->print_stack = OSNOISE_OPTION_INIT_VAL;
+}
+
+/*
+ * osnoise_put_print_stack - restore original values and cleanup data
+ */
+void osnoise_put_print_stack(struct osnoise_context *context)
+{
+ osnoise_restore_print_stack(context);
+
+ if (context->orig_print_stack == OSNOISE_OPTION_INIT_VAL)
+ return;
+
+ context->orig_print_stack = OSNOISE_OPTION_INIT_VAL;
+}
+
+/*
+ * enable_osnoise - enable osnoise tracer in the trace_instance
+ */
+int enable_osnoise(struct trace_instance *trace)
+{
+ return enable_tracer_by_name(trace->inst, "osnoise");
+}
+
+/*
+ * enable_timerlat - enable timerlat tracer in the trace_instance
+ */
+int enable_timerlat(struct trace_instance *trace)
+{
+ return enable_tracer_by_name(trace->inst, "timerlat");
+}
+
+enum {
+ FLAG_CONTEXT_NEWLY_CREATED = (1 << 0),
+ FLAG_CONTEXT_DELETED = (1 << 1),
+};
+
+/*
+ * osnoise_get_context - increase the usage of a context and return it
+ */
+int osnoise_get_context(struct osnoise_context *context)
+{
+ int ret;
+
+ if (context->flags & FLAG_CONTEXT_DELETED) {
+ ret = -1;
+ } else {
+ context->ref++;
+ ret = 0;
+ }
+
+ return ret;
+}
+
+/*
+ * osnoise_context_alloc - alloc an osnoise_context
+ *
+ * The osnoise context contains the information of the "osnoise/" configs.
+ * It is used to set and restore the config.
+ */
+struct osnoise_context *osnoise_context_alloc(void)
+{
+ struct osnoise_context *context;
+
+ context = calloc(1, sizeof(*context));
+ if (!context)
+ return NULL;
+
+ context->orig_stop_us = OSNOISE_OPTION_INIT_VAL;
+ context->stop_us = OSNOISE_OPTION_INIT_VAL;
+
+ context->orig_stop_total_us = OSNOISE_OPTION_INIT_VAL;
+ context->stop_total_us = OSNOISE_OPTION_INIT_VAL;
+
+ context->orig_print_stack = OSNOISE_OPTION_INIT_VAL;
+ context->print_stack = OSNOISE_OPTION_INIT_VAL;
+
+ osnoise_get_context(context);
+
+ return context;
+}
+
+/*
+ * osnoise_put_context - put the osnoise_put_context
+ *
+ * If there is no other user for the context, the original data
+ * is restored.
+ */
+void osnoise_put_context(struct osnoise_context *context)
+{
+ if (--context->ref < 1)
+ context->flags |= FLAG_CONTEXT_DELETED;
+
+ if (!(context->flags & FLAG_CONTEXT_DELETED))
+ return;
+
+ osnoise_put_cpus(context);
+ osnoise_put_runtime_period(context);
+ osnoise_put_stop_us(context);
+ osnoise_put_stop_total_us(context);
+ osnoise_put_timerlat_period_us(context);
+ osnoise_put_print_stack(context);
+
+ free(context);
+}
+
+/*
+ * osnoise_destroy_tool - disable trace, restore configs and free data
+ */
+void osnoise_destroy_tool(struct osnoise_tool *top)
+{
+ trace_instance_destroy(&top->trace);
+
+ if (top->context)
+ osnoise_put_context(top->context);
+
+ free(top);
+}
+
+/*
+ * osnoise_init_tool - init an osnoise tool
+ *
+ * It allocs data, create a context to store data and
+ * creates a new trace instance for the tool.
+ */
+struct osnoise_tool *osnoise_init_tool(char *tool_name)
+{
+ struct osnoise_tool *top;
+ int retval;
+
+ top = calloc(1, sizeof(*top));
+ if (!top)
+ return NULL;
+
+ top->context = osnoise_context_alloc();
+ if (!top->context)
+ goto out_err;
+
+ retval = trace_instance_init(&top->trace, tool_name);
+ if (retval)
+ goto out_err;
+
+ return top;
+out_err:
+ osnoise_destroy_tool(top);
+ return NULL;
+}
+
+/*
+ * osnoise_init_trace_tool - init a tracer instance to trace osnoise events
+ */
+struct osnoise_tool *osnoise_init_trace_tool(char *tracer)
+{
+ struct osnoise_tool *trace;
+ int retval;
+
+ trace = osnoise_init_tool("osnoise_trace");
+ if (!trace)
+ return NULL;
+
+ retval = tracefs_event_enable(trace->trace.inst, "osnoise", NULL);
+ if (retval < 0 && !errno) {
+ err_msg("Could not find osnoise events\n");
+ goto out_err;
+ }
+
+ retval = enable_tracer_by_name(trace->trace.inst, tracer);
+ if (retval) {
+ err_msg("Could not enable osnoiser tracer for tracing\n");
+ goto out_err;
+ }
+
+ return trace;
+out_err:
+ osnoise_destroy_tool(trace);
+ return NULL;
+}
+
+static void osnoise_usage(void)
+{
+ int i;
+
+ static const char *msg[] = {
+ "",
+ "osnoise version " VERSION,
+ "",
+ " usage: [rtla] osnoise [MODE] ...",
+ "",
+ " modes:",
+ " top - prints the summary from osnoise tracer",
+ " hist - prints a histogram of osnoise samples",
+ "",
+ "if no MODE is given, the top mode is called, passing the arguments",
+ NULL,
+ };
+
+ for (i = 0; msg[i]; i++)
+ fprintf(stderr, "%s\n", msg[i]);
+ exit(1);
+}
+
+int osnoise_main(int argc, char *argv[])
+{
+ if (argc == 0)
+ goto usage;
+
+ /*
+ * if osnoise was called without any argument, run the
+ * default cmdline.
+ */
+ if (argc == 1) {
+ osnoise_top_main(argc, argv);
+ exit(0);
+ }
+
+ if ((strcmp(argv[1], "-h") == 0) || (strcmp(argv[1], "--help") == 0)) {
+ osnoise_usage();
+ exit(0);
+ } else if (strncmp(argv[1], "-", 1) == 0) {
+ /* the user skipped the tool, call the default one */
+ osnoise_top_main(argc, argv);
+ exit(0);
+ } else if (strcmp(argv[1], "top") == 0) {
+ osnoise_top_main(argc-1, &argv[1]);
+ exit(0);
+ } else if (strcmp(argv[1], "hist") == 0) {
+ osnoise_hist_main(argc-1, &argv[1]);
+ exit(0);
+ }
+
+usage:
+ osnoise_usage();
+ exit(1);
+}
diff --git a/tools/tracing/rtla/src/osnoise.h b/tools/tracing/rtla/src/osnoise.h
new file mode 100644
index 000000000000..9e4b2e2a4559
--- /dev/null
+++ b/tools/tracing/rtla/src/osnoise.h
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "trace.h"
+
+/*
+ * osnoise_context - read, store, write, restore osnoise configs.
+ */
+struct osnoise_context {
+ int flags;
+ int ref;
+
+ char *curr_cpus;
+ char *orig_cpus;
+
+ /* 0 as init value */
+ unsigned long long orig_runtime_us;
+ unsigned long long runtime_us;
+
+ /* 0 as init value */
+ unsigned long long orig_period_us;
+ unsigned long long period_us;
+
+ /* 0 as init value */
+ long long orig_timerlat_period_us;
+ long long timerlat_period_us;
+
+ /* -1 as init value because 0 is disabled */
+ long long orig_stop_us;
+ long long stop_us;
+
+ /* -1 as init value because 0 is disabled */
+ long long orig_stop_total_us;
+ long long stop_total_us;
+
+ /* -1 as init value because 0 is disabled */
+ long long orig_print_stack;
+ long long print_stack;
+};
+
+/*
+ * *_INIT_VALs are also invalid values, they are used to
+ * communicate errors.
+ */
+#define OSNOISE_OPTION_INIT_VAL (-1)
+#define OSNOISE_TIME_INIT_VAL (0)
+
+struct osnoise_context *osnoise_context_alloc(void);
+int osnoise_get_context(struct osnoise_context *context);
+void osnoise_put_context(struct osnoise_context *context);
+
+int osnoise_set_cpus(struct osnoise_context *context, char *cpus);
+void osnoise_restore_cpus(struct osnoise_context *context);
+
+int osnoise_set_runtime_period(struct osnoise_context *context,
+ unsigned long long runtime,
+ unsigned long long period);
+void osnoise_restore_runtime_period(struct osnoise_context *context);
+
+int osnoise_set_stop_us(struct osnoise_context *context,
+ long long stop_us);
+void osnoise_restore_stop_us(struct osnoise_context *context);
+
+int osnoise_set_stop_total_us(struct osnoise_context *context,
+ long long stop_total_us);
+void osnoise_restore_stop_total_us(struct osnoise_context *context);
+
+int osnoise_set_timerlat_period_us(struct osnoise_context *context,
+ long long timerlat_period_us);
+void osnoise_restore_timerlat_period_us(struct osnoise_context *context);
+
+void osnoise_restore_print_stack(struct osnoise_context *context);
+int osnoise_set_print_stack(struct osnoise_context *context,
+ long long print_stack);
+
+/*
+ * osnoise_tool - osnoise based tool definition.
+ */
+struct osnoise_tool {
+ struct trace_instance trace;
+ struct osnoise_context *context;
+ void *data;
+ void *params;
+ time_t start_time;
+};
+
+void osnoise_destroy_tool(struct osnoise_tool *top);
+struct osnoise_tool *osnoise_init_tool(char *tool_name);
+struct osnoise_tool *osnoise_init_trace_tool(char *tracer);
+
+int osnoise_hist_main(int argc, char *argv[]);
+int osnoise_top_main(int argc, char **argv);
+int osnoise_main(int argc, char **argv);
diff --git a/tools/tracing/rtla/src/osnoise_hist.c b/tools/tracing/rtla/src/osnoise_hist.c
new file mode 100644
index 000000000000..180fcbe423cd
--- /dev/null
+++ b/tools/tracing/rtla/src/osnoise_hist.c
@@ -0,0 +1,801 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
+ */
+
+#include <getopt.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#include <errno.h>
+#include <stdio.h>
+#include <time.h>
+
+#include "utils.h"
+#include "osnoise.h"
+
+struct osnoise_hist_params {
+ char *cpus;
+ char *monitored_cpus;
+ char *trace_output;
+ unsigned long long runtime;
+ unsigned long long period;
+ long long stop_us;
+ long long stop_total_us;
+ int sleep_time;
+ int duration;
+ int set_sched;
+ int output_divisor;
+ struct sched_attr sched_param;
+
+ char no_header;
+ char no_summary;
+ char no_index;
+ char with_zeros;
+ int bucket_size;
+ int entries;
+};
+
+struct osnoise_hist_cpu {
+ int *samples;
+ int count;
+
+ unsigned long long min_sample;
+ unsigned long long sum_sample;
+ unsigned long long max_sample;
+
+};
+
+struct osnoise_hist_data {
+ struct tracefs_hist *trace_hist;
+ struct osnoise_hist_cpu *hist;
+ int entries;
+ int bucket_size;
+ int nr_cpus;
+};
+
+/*
+ * osnoise_free_histogram - free runtime data
+ */
+static void
+osnoise_free_histogram(struct osnoise_hist_data *data)
+{
+ int cpu;
+
+ /* one histogram for IRQ and one for thread, per CPU */
+ for (cpu = 0; cpu < data->nr_cpus; cpu++) {
+ if (data->hist[cpu].samples)
+ free(data->hist[cpu].samples);
+ }
+
+ /* one set of histograms per CPU */
+ if (data->hist)
+ free(data->hist);
+
+ free(data);
+}
+
+/*
+ * osnoise_alloc_histogram - alloc runtime data
+ */
+static struct osnoise_hist_data
+*osnoise_alloc_histogram(int nr_cpus, int entries, int bucket_size)
+{
+ struct osnoise_hist_data *data;
+ int cpu;
+
+ data = calloc(1, sizeof(*data));
+ if (!data)
+ return NULL;
+
+ data->entries = entries;
+ data->bucket_size = bucket_size;
+ data->nr_cpus = nr_cpus;
+
+ data->hist = calloc(1, sizeof(*data->hist) * nr_cpus);
+ if (!data->hist)
+ goto cleanup;
+
+ for (cpu = 0; cpu < nr_cpus; cpu++) {
+ data->hist[cpu].samples = calloc(1, sizeof(*data->hist->samples) * (entries + 1));
+ if (!data->hist[cpu].samples)
+ goto cleanup;
+ }
+
+ /* set the min to max */
+ for (cpu = 0; cpu < nr_cpus; cpu++)
+ data->hist[cpu].min_sample = ~0;
+
+ return data;
+
+cleanup:
+ osnoise_free_histogram(data);
+ return NULL;
+}
+
+static void osnoise_hist_update_multiple(struct osnoise_tool *tool, int cpu,
+ unsigned long long duration, int count)
+{
+ struct osnoise_hist_params *params = tool->params;
+ struct osnoise_hist_data *data = tool->data;
+ int entries = data->entries;
+ int bucket;
+ int *hist;
+
+ if (params->output_divisor)
+ duration = duration / params->output_divisor;
+
+ if (data->bucket_size)
+ bucket = duration / data->bucket_size;
+
+ hist = data->hist[cpu].samples;
+ data->hist[cpu].count += count;
+ update_min(&data->hist[cpu].min_sample, &duration);
+ update_sum(&data->hist[cpu].sum_sample, &duration);
+ update_max(&data->hist[cpu].max_sample, &duration);
+
+ if (bucket < entries)
+ hist[bucket] += count;
+ else
+ hist[entries] += count;
+}
+
+/*
+ * osnoise_destroy_trace_hist - disable events used to collect histogram
+ */
+static void osnoise_destroy_trace_hist(struct osnoise_tool *tool)
+{
+ struct osnoise_hist_data *data = tool->data;
+
+ tracefs_hist_pause(tool->trace.inst, data->trace_hist);
+ tracefs_hist_destroy(tool->trace.inst, data->trace_hist);
+}
+
+/*
+ * osnoise_init_trace_hist - enable events used to collect histogram
+ */
+static int osnoise_init_trace_hist(struct osnoise_tool *tool)
+{
+ struct osnoise_hist_params *params = tool->params;
+ struct osnoise_hist_data *data = tool->data;
+ int bucket_size;
+ char buff[128];
+ int retval = 0;
+
+ /*
+ * Set the size of the bucket.
+ */
+ bucket_size = params->output_divisor * params->bucket_size;
+ snprintf(buff, sizeof(buff), "duration.buckets=%d", bucket_size);
+
+ data->trace_hist = tracefs_hist_alloc(tool->trace.tep, "osnoise", "sample_threshold",
+ buff, TRACEFS_HIST_KEY_NORMAL);
+ if (!data->trace_hist)
+ return 1;
+
+ retval = tracefs_hist_add_key(data->trace_hist, "cpu", 0);
+ if (retval)
+ goto out_err;
+
+ retval = tracefs_hist_start(tool->trace.inst, data->trace_hist);
+ if (retval)
+ goto out_err;
+
+ return 0;
+
+out_err:
+ osnoise_destroy_trace_hist(tool);
+ return 1;
+}
+
+/*
+ * osnoise_read_trace_hist - parse histogram file and file osnoise histogram
+ */
+static void osnoise_read_trace_hist(struct osnoise_tool *tool)
+{
+ struct osnoise_hist_data *data = tool->data;
+ long long cpu, counter, duration;
+ char *content, *position;
+
+ tracefs_hist_pause(tool->trace.inst, data->trace_hist);
+
+ content = tracefs_event_file_read(tool->trace.inst, "osnoise",
+ "sample_threshold",
+ "hist", NULL);
+ if (!content)
+ return;
+
+ position = content;
+ while (true) {
+ position = strstr(position, "duration: ~");
+ if (!position)
+ break;
+ position += strlen("duration: ~");
+ duration = get_llong_from_str(position);
+ if (duration == -1)
+ err_msg("error reading duration from histogram\n");
+
+ position = strstr(position, "cpu:");
+ if (!position)
+ break;
+ position += strlen("cpu: ");
+ cpu = get_llong_from_str(position);
+ if (cpu == -1)
+ err_msg("error reading cpu from histogram\n");
+
+ position = strstr(position, "hitcount:");
+ if (!position)
+ break;
+ position += strlen("hitcount: ");
+ counter = get_llong_from_str(position);
+ if (counter == -1)
+ err_msg("error reading counter from histogram\n");
+
+ osnoise_hist_update_multiple(tool, cpu, duration, counter);
+ }
+ free(content);
+}
+
+/*
+ * osnoise_hist_header - print the header of the tracer to the output
+ */
+static void osnoise_hist_header(struct osnoise_tool *tool)
+{
+ struct osnoise_hist_params *params = tool->params;
+ struct osnoise_hist_data *data = tool->data;
+ struct trace_seq *s = tool->trace.seq;
+ char duration[26];
+ int cpu;
+
+ if (params->no_header)
+ return;
+
+ get_duration(tool->start_time, duration, sizeof(duration));
+ trace_seq_printf(s, "# RTLA osnoise histogram\n");
+ trace_seq_printf(s, "# Time unit is %s (%s)\n",
+ params->output_divisor == 1 ? "nanoseconds" : "microseconds",
+ params->output_divisor == 1 ? "ns" : "us");
+
+ trace_seq_printf(s, "# Duration: %s\n", duration);
+
+ if (!params->no_index)
+ trace_seq_printf(s, "Index");
+
+ for (cpu = 0; cpu < data->nr_cpus; cpu++) {
+ if (params->cpus && !params->monitored_cpus[cpu])
+ continue;
+
+ if (!data->hist[cpu].count)
+ continue;
+
+ trace_seq_printf(s, " CPU-%03d", cpu);
+ }
+ trace_seq_printf(s, "\n");
+
+ trace_seq_do_printf(s);
+ trace_seq_reset(s);
+}
+
+/*
+ * osnoise_print_summary - print the summary of the hist data to the output
+ */
+static void
+osnoise_print_summary(struct osnoise_hist_params *params,
+ struct trace_instance *trace,
+ struct osnoise_hist_data *data)
+{
+ int cpu;
+
+ if (params->no_summary)
+ return;
+
+ if (!params->no_index)
+ trace_seq_printf(trace->seq, "count:");
+
+ for (cpu = 0; cpu < data->nr_cpus; cpu++) {
+ if (params->cpus && !params->monitored_cpus[cpu])
+ continue;
+
+ if (!data->hist[cpu].count)
+ continue;
+
+ trace_seq_printf(trace->seq, "%9d ", data->hist[cpu].count);
+ }
+ trace_seq_printf(trace->seq, "\n");
+
+ if (!params->no_index)
+ trace_seq_printf(trace->seq, "min: ");
+
+ for (cpu = 0; cpu < data->nr_cpus; cpu++) {
+ if (params->cpus && !params->monitored_cpus[cpu])
+ continue;
+
+ if (!data->hist[cpu].count)
+ continue;
+
+ trace_seq_printf(trace->seq, "%9llu ", data->hist[cpu].min_sample);
+
+ }
+ trace_seq_printf(trace->seq, "\n");
+
+ if (!params->no_index)
+ trace_seq_printf(trace->seq, "avg: ");
+
+ for (cpu = 0; cpu < data->nr_cpus; cpu++) {
+ if (params->cpus && !params->monitored_cpus[cpu])
+ continue;
+
+ if (!data->hist[cpu].count)
+ continue;
+
+ if (data->hist[cpu].count)
+ trace_seq_printf(trace->seq, "%9llu ",
+ data->hist[cpu].sum_sample / data->hist[cpu].count);
+ else
+ trace_seq_printf(trace->seq, " - ");
+ }
+ trace_seq_printf(trace->seq, "\n");
+
+ if (!params->no_index)
+ trace_seq_printf(trace->seq, "max: ");
+
+ for (cpu = 0; cpu < data->nr_cpus; cpu++) {
+ if (params->cpus && !params->monitored_cpus[cpu])
+ continue;
+
+ if (!data->hist[cpu].count)
+ continue;
+
+ trace_seq_printf(trace->seq, "%9llu ", data->hist[cpu].max_sample);
+
+ }
+ trace_seq_printf(trace->seq, "\n");
+ trace_seq_do_printf(trace->seq);
+ trace_seq_reset(trace->seq);
+}
+
+/*
+ * osnoise_print_stats - print data for all CPUs
+ */
+static void
+osnoise_print_stats(struct osnoise_hist_params *params, struct osnoise_tool *tool)
+{
+ struct osnoise_hist_data *data = tool->data;
+ struct trace_instance *trace = &tool->trace;
+ int bucket, cpu;
+ int total;
+
+ osnoise_hist_header(tool);
+
+ for (bucket = 0; bucket < data->entries; bucket++) {
+ total = 0;
+
+ if (!params->no_index)
+ trace_seq_printf(trace->seq, "%-6d",
+ bucket * data->bucket_size);
+
+ for (cpu = 0; cpu < data->nr_cpus; cpu++) {
+ if (params->cpus && !params->monitored_cpus[cpu])
+ continue;
+
+ if (!data->hist[cpu].count)
+ continue;
+
+ total += data->hist[cpu].samples[bucket];
+ trace_seq_printf(trace->seq, "%9d ", data->hist[cpu].samples[bucket]);
+ }
+
+ if (total == 0 && !params->with_zeros) {
+ trace_seq_reset(trace->seq);
+ continue;
+ }
+
+ trace_seq_printf(trace->seq, "\n");
+ trace_seq_do_printf(trace->seq);
+ trace_seq_reset(trace->seq);
+ }
+
+ if (!params->no_index)
+ trace_seq_printf(trace->seq, "over: ");
+
+ for (cpu = 0; cpu < data->nr_cpus; cpu++) {
+ if (params->cpus && !params->monitored_cpus[cpu])
+ continue;
+
+ if (!data->hist[cpu].count)
+ continue;
+
+ trace_seq_printf(trace->seq, "%9d ",
+ data->hist[cpu].samples[data->entries]);
+ }
+ trace_seq_printf(trace->seq, "\n");
+ trace_seq_do_printf(trace->seq);
+ trace_seq_reset(trace->seq);
+
+ osnoise_print_summary(params, trace, data);
+}
+
+/*
+ * osnoise_hist_usage - prints osnoise hist usage message
+ */
+static void osnoise_hist_usage(char *usage)
+{
+ int i;
+
+ static const char * const msg[] = {
+ "",
+ " usage: rtla osnoise hist [-h] [-D] [-d s] [-p us] [-r us] [-s us] [-S us] [-t[=file]] \\",
+ " [-c cpu-list] [-P priority] [-b N] [-e N] [--no-header] [--no-summary] \\",
+ " [--no-index] [--with-zeros]",
+ "",
+ " -h/--help: print this menu",
+ " -p/--period us: osnoise period in us",
+ " -r/--runtime us: osnoise runtime in us",
+ " -s/--stop us: stop trace if a single sample is higher than the argument in us",
+ " -S/--stop-total us: stop trace if the total sample is higher than the argument in us",
+ " -c/--cpus cpu-list: list of cpus to run osnoise threads",
+ " -d/--duration time[s|m|h|d]: duration of the session",
+ " -D/--debug: print debug info",
+ " -t/--trace[=file]: save the stopped trace to [file|osnoise_trace.txt]",
+ " -b/--bucket-size N: set the histogram bucket size (default 1)",
+ " -e/--entries N: set the number of entries of the histogram (default 256)",
+ " --no-header: do not print header",
+ " --no-summary: do not print summary",
+ " --no-index: do not print index",
+ " --with-zeros: print zero only entries",
+ " -P/--priority o:prio|r:prio|f:prio|d:runtime:period: set scheduling parameters",
+ " o:prio - use SCHED_OTHER with prio",
+ " r:prio - use SCHED_RR with prio",
+ " f:prio - use SCHED_FIFO with prio",
+ " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period",
+ " in nanoseconds",
+ NULL,
+ };
+
+ if (usage)
+ fprintf(stderr, "%s\n", usage);
+
+ fprintf(stderr, "rtla osnoise hist: a per-cpu histogram of the OS noise (version %s)\n",
+ VERSION);
+
+ for (i = 0; msg[i]; i++)
+ fprintf(stderr, "%s\n", msg[i]);
+ exit(1);
+}
+
+/*
+ * osnoise_hist_parse_args - allocs, parse and fill the cmd line parameters
+ */
+static struct osnoise_hist_params
+*osnoise_hist_parse_args(int argc, char *argv[])
+{
+ struct osnoise_hist_params *params;
+ int retval;
+ int c;
+
+ params = calloc(1, sizeof(*params));
+ if (!params)
+ exit(1);
+
+ /* display data in microseconds */
+ params->output_divisor = 1000;
+ params->bucket_size = 1;
+ params->entries = 256;
+
+ while (1) {
+ static struct option long_options[] = {
+ {"bucket-size", required_argument, 0, 'b'},
+ {"entries", required_argument, 0, 'e'},
+ {"cpus", required_argument, 0, 'c'},
+ {"debug", no_argument, 0, 'D'},
+ {"duration", required_argument, 0, 'd'},
+ {"help", no_argument, 0, 'h'},
+ {"period", required_argument, 0, 'p'},
+ {"priority", required_argument, 0, 'P'},
+ {"runtime", required_argument, 0, 'r'},
+ {"stop", required_argument, 0, 's'},
+ {"stop-total", required_argument, 0, 'S'},
+ {"trace", optional_argument, 0, 't'},
+ {"no-header", no_argument, 0, '0'},
+ {"no-summary", no_argument, 0, '1'},
+ {"no-index", no_argument, 0, '2'},
+ {"with-zeros", no_argument, 0, '3'},
+ {0, 0, 0, 0}
+ };
+
+ /* getopt_long stores the option index here. */
+ int option_index = 0;
+
+ c = getopt_long(argc, argv, "c:b:d:e:Dhp:P:r:s:S:t::0123",
+ long_options, &option_index);
+
+ /* detect the end of the options. */
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 'b':
+ params->bucket_size = get_llong_from_str(optarg);
+ if ((params->bucket_size == 0) || (params->bucket_size >= 1000000))
+ osnoise_hist_usage("Bucket size needs to be > 0 and <= 1000000\n");
+ break;
+ case 'c':
+ retval = parse_cpu_list(optarg, &params->monitored_cpus);
+ if (retval)
+ osnoise_hist_usage("\nInvalid -c cpu list\n");
+ params->cpus = optarg;
+ break;
+ case 'D':
+ config_debug = 1;
+ break;
+ case 'd':
+ params->duration = parse_seconds_duration(optarg);
+ if (!params->duration)
+ osnoise_hist_usage("Invalid -D duration\n");
+ break;
+ case 'e':
+ params->entries = get_llong_from_str(optarg);
+ if ((params->entries < 10) || (params->entries > 9999999))
+ osnoise_hist_usage("Entries must be > 10 and < 9999999\n");
+ break;
+ case 'h':
+ case '?':
+ osnoise_hist_usage(NULL);
+ break;
+ case 'p':
+ params->period = get_llong_from_str(optarg);
+ if (params->period > 10000000)
+ osnoise_hist_usage("Period longer than 10 s\n");
+ break;
+ case 'P':
+ retval = parse_prio(optarg, &params->sched_param);
+ if (retval == -1)
+ osnoise_hist_usage("Invalid -P priority");
+ params->set_sched = 1;
+ break;
+ case 'r':
+ params->runtime = get_llong_from_str(optarg);
+ if (params->runtime < 100)
+ osnoise_hist_usage("Runtime shorter than 100 us\n");
+ break;
+ case 's':
+ params->stop_us = get_llong_from_str(optarg);
+ break;
+ case 'S':
+ params->stop_total_us = get_llong_from_str(optarg);
+ break;
+ case 't':
+ if (optarg)
+ /* skip = */
+ params->trace_output = &optarg[1];
+ else
+ params->trace_output = "osnoise_trace.txt";
+ break;
+ case '0': /* no header */
+ params->no_header = 1;
+ break;
+ case '1': /* no summary */
+ params->no_summary = 1;
+ break;
+ case '2': /* no index */
+ params->no_index = 1;
+ break;
+ case '3': /* with zeros */
+ params->with_zeros = 1;
+ break;
+ default:
+ osnoise_hist_usage("Invalid option");
+ }
+ }
+
+ if (geteuid()) {
+ err_msg("rtla needs root permission\n");
+ exit(EXIT_FAILURE);
+ }
+
+ if (params->no_index && !params->with_zeros)
+ osnoise_hist_usage("no-index set and with-zeros not set - it does not make sense");
+
+ return params;
+}
+
+/*
+ * osnoise_hist_apply_config - apply the hist configs to the initialized tool
+ */
+static int
+osnoise_hist_apply_config(struct osnoise_tool *tool, struct osnoise_hist_params *params)
+{
+ int retval;
+
+ if (!params->sleep_time)
+ params->sleep_time = 1;
+
+ if (params->cpus) {
+ retval = osnoise_set_cpus(tool->context, params->cpus);
+ if (retval) {
+ err_msg("Failed to apply CPUs config\n");
+ goto out_err;
+ }
+ }
+
+ if (params->runtime || params->period) {
+ retval = osnoise_set_runtime_period(tool->context,
+ params->runtime,
+ params->period);
+ if (retval) {
+ err_msg("Failed to set runtime and/or period\n");
+ goto out_err;
+ }
+ }
+
+ if (params->stop_us) {
+ retval = osnoise_set_stop_us(tool->context, params->stop_us);
+ if (retval) {
+ err_msg("Failed to set stop us\n");
+ goto out_err;
+ }
+ }
+
+ if (params->stop_total_us) {
+ retval = osnoise_set_stop_total_us(tool->context, params->stop_total_us);
+ if (retval) {
+ err_msg("Failed to set stop total us\n");
+ goto out_err;
+ }
+ }
+
+ return 0;
+
+out_err:
+ return -1;
+}
+
+/*
+ * osnoise_init_hist - initialize a osnoise hist tool with parameters
+ */
+static struct osnoise_tool
+*osnoise_init_hist(struct osnoise_hist_params *params)
+{
+ struct osnoise_tool *tool;
+ int nr_cpus;
+
+ nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+
+ tool = osnoise_init_tool("osnoise_hist");
+ if (!tool)
+ return NULL;
+
+ tool->data = osnoise_alloc_histogram(nr_cpus, params->entries, params->bucket_size);
+ if (!tool->data)
+ goto out_err;
+
+ tool->params = params;
+
+ return tool;
+
+out_err:
+ osnoise_destroy_tool(tool);
+ return NULL;
+}
+
+static int stop_tracing;
+static void stop_hist(int sig)
+{
+ stop_tracing = 1;
+}
+
+/*
+ * osnoise_hist_set_signals - handles the signal to stop the tool
+ */
+static void
+osnoise_hist_set_signals(struct osnoise_hist_params *params)
+{
+ signal(SIGINT, stop_hist);
+ if (params->duration) {
+ signal(SIGALRM, stop_hist);
+ alarm(params->duration);
+ }
+}
+
+int osnoise_hist_main(int argc, char *argv[])
+{
+ struct osnoise_hist_params *params;
+ struct trace_instance *trace;
+ struct osnoise_tool *record;
+ struct osnoise_tool *tool;
+ int return_value = 1;
+ int retval;
+
+ params = osnoise_hist_parse_args(argc, argv);
+ if (!params)
+ exit(1);
+
+ tool = osnoise_init_hist(params);
+ if (!tool) {
+ err_msg("Could not init osnoise hist\n");
+ goto out_exit;
+ }
+
+ retval = osnoise_hist_apply_config(tool, params);
+ if (retval) {
+ err_msg("Could not apply config\n");
+ goto out_destroy;
+ }
+
+ trace = &tool->trace;
+
+ retval = enable_osnoise(trace);
+ if (retval) {
+ err_msg("Failed to enable osnoise tracer\n");
+ goto out_destroy;
+ }
+
+ retval = osnoise_init_trace_hist(tool);
+ if (retval)
+ goto out_destroy;
+
+ if (params->set_sched) {
+ retval = set_comm_sched_attr("osnoise/", &params->sched_param);
+ if (retval) {
+ err_msg("Failed to set sched parameters\n");
+ goto out_hist;
+ }
+ }
+
+ trace_instance_start(trace);
+
+ if (params->trace_output) {
+ record = osnoise_init_trace_tool("osnoise");
+ if (!record) {
+ err_msg("Failed to enable the trace instance\n");
+ goto out_hist;
+ }
+ trace_instance_start(&record->trace);
+ }
+
+ tool->start_time = time(NULL);
+ osnoise_hist_set_signals(params);
+
+ while (!stop_tracing) {
+ sleep(params->sleep_time);
+
+ retval = tracefs_iterate_raw_events(trace->tep,
+ trace->inst,
+ NULL,
+ 0,
+ collect_registered_events,
+ trace);
+ if (retval < 0) {
+ err_msg("Error iterating on events\n");
+ goto out_hist;
+ }
+
+ if (!tracefs_trace_is_on(trace->inst))
+ break;
+ };
+
+ osnoise_read_trace_hist(tool);
+
+ osnoise_print_stats(params, tool);
+
+ return_value = 0;
+
+ if (!tracefs_trace_is_on(trace->inst)) {
+ printf("rtla timelat hit stop tracing\n");
+ if (params->trace_output) {
+ printf(" Saving trace to %s\n", params->trace_output);
+ save_trace_to_file(record->trace.inst, params->trace_output);
+ }
+ }
+
+out_hist:
+ osnoise_free_histogram(tool->data);
+out_destroy:
+ osnoise_destroy_tool(tool);
+ if (params->trace_output)
+ osnoise_destroy_tool(record);
+ free(params);
+out_exit:
+ exit(return_value);
+}
diff --git a/tools/tracing/rtla/src/osnoise_top.c b/tools/tracing/rtla/src/osnoise_top.c
new file mode 100644
index 000000000000..332b2ac205fc
--- /dev/null
+++ b/tools/tracing/rtla/src/osnoise_top.c
@@ -0,0 +1,579 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
+ */
+
+#include <getopt.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <time.h>
+
+#include "osnoise.h"
+#include "utils.h"
+
+/*
+ * osnoise top parameters
+ */
+struct osnoise_top_params {
+ char *cpus;
+ char *monitored_cpus;
+ char *trace_output;
+ unsigned long long runtime;
+ unsigned long long period;
+ long long stop_us;
+ long long stop_total_us;
+ int sleep_time;
+ int duration;
+ int quiet;
+ int set_sched;
+ struct sched_attr sched_param;
+};
+
+struct osnoise_top_cpu {
+ unsigned long long sum_runtime;
+ unsigned long long sum_noise;
+ unsigned long long max_noise;
+ unsigned long long max_sample;
+
+ unsigned long long hw_count;
+ unsigned long long nmi_count;
+ unsigned long long irq_count;
+ unsigned long long softirq_count;
+ unsigned long long thread_count;
+
+ int sum_cycles;
+};
+
+struct osnoise_top_data {
+ struct osnoise_top_cpu *cpu_data;
+ int nr_cpus;
+};
+
+/*
+ * osnoise_free_top - free runtime data
+ */
+static void
+osnoise_free_top(struct osnoise_top_data *data)
+{
+ free(data->cpu_data);
+ free(data);
+}
+
+/*
+ * osnoise_alloc_histogram - alloc runtime data
+ */
+static struct osnoise_top_data *osnoise_alloc_top(int nr_cpus)
+{
+ struct osnoise_top_data *data;
+
+ data = calloc(1, sizeof(*data));
+ if (!data)
+ return NULL;
+
+ data->nr_cpus = nr_cpus;
+
+ /* one set of histograms per CPU */
+ data->cpu_data = calloc(1, sizeof(*data->cpu_data) * nr_cpus);
+ if (!data->cpu_data)
+ goto cleanup;
+
+ return data;
+
+cleanup:
+ osnoise_free_top(data);
+ return NULL;
+}
+
+/*
+ * osnoise_top_handler - this is the handler for osnoise tracer events
+ */
+static int
+osnoise_top_handler(struct trace_seq *s, struct tep_record *record,
+ struct tep_event *event, void *context)
+{
+ struct trace_instance *trace = context;
+ struct osnoise_tool *tool;
+ unsigned long long val;
+ struct osnoise_top_cpu *cpu_data;
+ struct osnoise_top_data *data;
+ int cpu = record->cpu;
+
+ tool = container_of(trace, struct osnoise_tool, trace);
+
+ data = tool->data;
+ cpu_data = &data->cpu_data[cpu];
+
+ cpu_data->sum_cycles++;
+
+ tep_get_field_val(s, event, "runtime", record, &val, 1);
+ update_sum(&cpu_data->sum_runtime, &val);
+
+ tep_get_field_val(s, event, "noise", record, &val, 1);
+ update_max(&cpu_data->max_noise, &val);
+ update_sum(&cpu_data->sum_noise, &val);
+
+ tep_get_field_val(s, event, "max_sample", record, &val, 1);
+ update_max(&cpu_data->max_sample, &val);
+
+ tep_get_field_val(s, event, "hw_count", record, &val, 1);
+ update_sum(&cpu_data->hw_count, &val);
+
+ tep_get_field_val(s, event, "nmi_count", record, &val, 1);
+ update_sum(&cpu_data->nmi_count, &val);
+
+ tep_get_field_val(s, event, "irq_count", record, &val, 1);
+ update_sum(&cpu_data->irq_count, &val);
+
+ tep_get_field_val(s, event, "softirq_count", record, &val, 1);
+ update_sum(&cpu_data->softirq_count, &val);
+
+ tep_get_field_val(s, event, "thread_count", record, &val, 1);
+ update_sum(&cpu_data->thread_count, &val);
+
+ return 0;
+}
+
+/*
+ * osnoise_top_header - print the header of the tool output
+ */
+static void osnoise_top_header(struct osnoise_tool *top)
+{
+ struct trace_seq *s = top->trace.seq;
+ char duration[26];
+
+ get_duration(top->start_time, duration, sizeof(duration));
+
+ trace_seq_printf(s, "\033[2;37;40m");
+ trace_seq_printf(s, " Operating System Noise");
+ trace_seq_printf(s, " ");
+ trace_seq_printf(s, " ");
+ trace_seq_printf(s, "\033[0;0;0m");
+ trace_seq_printf(s, "\n");
+
+ trace_seq_printf(s, "duration: %9s | time is in us\n", duration);
+
+ trace_seq_printf(s, "\033[2;30;47m");
+ trace_seq_printf(s, "CPU Period Runtime ");
+ trace_seq_printf(s, " Noise ");
+ trace_seq_printf(s, " %% CPU Aval ");
+ trace_seq_printf(s, " Max Noise Max Single ");
+ trace_seq_printf(s, " HW NMI IRQ Softirq Thread");
+ trace_seq_printf(s, "\033[0;0;0m");
+ trace_seq_printf(s, "\n");
+}
+
+/*
+ * clear_terminal - clears the output terminal
+ */
+static void clear_terminal(struct trace_seq *seq)
+{
+ if (!config_debug)
+ trace_seq_printf(seq, "\033c");
+}
+
+/*
+ * osnoise_top_print - prints the output of a given CPU
+ */
+static void osnoise_top_print(struct osnoise_tool *tool, int cpu)
+{
+ struct trace_seq *s = tool->trace.seq;
+ struct osnoise_top_cpu *cpu_data;
+ struct osnoise_top_data *data;
+ int percentage;
+ int decimal;
+
+ data = tool->data;
+ cpu_data = &data->cpu_data[cpu];
+
+ if (!cpu_data->sum_runtime)
+ return;
+
+ percentage = ((cpu_data->sum_runtime - cpu_data->sum_noise) * 10000000)
+ / cpu_data->sum_runtime;
+ decimal = percentage % 100000;
+ percentage = percentage / 100000;
+
+ trace_seq_printf(s, "%3d #%-6d %12llu ", cpu, cpu_data->sum_cycles, cpu_data->sum_runtime);
+ trace_seq_printf(s, "%12llu ", cpu_data->sum_noise);
+ trace_seq_printf(s, " %3d.%05d", percentage, decimal);
+ trace_seq_printf(s, "%12llu %12llu", cpu_data->max_noise, cpu_data->max_sample);
+
+ trace_seq_printf(s, "%12llu ", cpu_data->hw_count);
+ trace_seq_printf(s, "%12llu ", cpu_data->nmi_count);
+ trace_seq_printf(s, "%12llu ", cpu_data->irq_count);
+ trace_seq_printf(s, "%12llu ", cpu_data->softirq_count);
+ trace_seq_printf(s, "%12llu\n", cpu_data->thread_count);
+}
+
+/*
+ * osnoise_print_stats - print data for all cpus
+ */
+static void
+osnoise_print_stats(struct osnoise_top_params *params, struct osnoise_tool *top)
+{
+ struct trace_instance *trace = &top->trace;
+ static int nr_cpus = -1;
+ int i;
+
+ if (nr_cpus == -1)
+ nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+
+ if (!params->quiet)
+ clear_terminal(trace->seq);
+
+ osnoise_top_header(top);
+
+ for (i = 0; i < nr_cpus; i++) {
+ if (params->cpus && !params->monitored_cpus[i])
+ continue;
+ osnoise_top_print(top, i);
+ }
+
+ trace_seq_do_printf(trace->seq);
+ trace_seq_reset(trace->seq);
+}
+
+/*
+ * osnoise_top_usage - prints osnoise top usage message
+ */
+void osnoise_top_usage(char *usage)
+{
+ int i;
+
+ static const char * const msg[] = {
+ " usage: rtla osnoise [top] [-h] [-q] [-D] [-d s] [-p us] [-r us] [-s us] [-S us] [-t[=file]] \\",
+ " [-c cpu-list] [-P priority]",
+ "",
+ " -h/--help: print this menu",
+ " -p/--period us: osnoise period in us",
+ " -r/--runtime us: osnoise runtime in us",
+ " -s/--stop us: stop trace if a single sample is higher than the argument in us",
+ " -S/--stop-total us: stop trace if the total sample is higher than the argument in us",
+ " -c/--cpus cpu-list: list of cpus to run osnoise threads",
+ " -d/--duration time[s|m|h|d]: duration of the session",
+ " -D/--debug: print debug info",
+ " -t/--trace[=file]: save the stopped trace to [file|osnoise_trace.txt]",
+ " -q/--quiet print only a summary at the end",
+ " -P/--priority o:prio|r:prio|f:prio|d:runtime:period : set scheduling parameters",
+ " o:prio - use SCHED_OTHER with prio",
+ " r:prio - use SCHED_RR with prio",
+ " f:prio - use SCHED_FIFO with prio",
+ " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period",
+ " in nanoseconds",
+ NULL,
+ };
+
+ if (usage)
+ fprintf(stderr, "%s\n", usage);
+
+ fprintf(stderr, "rtla osnoise top: a per-cpu summary of the OS noise (version %s)\n",
+ VERSION);
+
+ for (i = 0; msg[i]; i++)
+ fprintf(stderr, "%s\n", msg[i]);
+ exit(1);
+}
+
+/*
+ * osnoise_top_parse_args - allocs, parse and fill the cmd line parameters
+ */
+struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv)
+{
+ struct osnoise_top_params *params;
+ int retval;
+ int c;
+
+ params = calloc(1, sizeof(*params));
+ if (!params)
+ exit(1);
+
+ while (1) {
+ static struct option long_options[] = {
+ {"cpus", required_argument, 0, 'c'},
+ {"debug", no_argument, 0, 'D'},
+ {"duration", required_argument, 0, 'd'},
+ {"help", no_argument, 0, 'h'},
+ {"period", required_argument, 0, 'p'},
+ {"priority", required_argument, 0, 'P'},
+ {"quiet", no_argument, 0, 'q'},
+ {"runtime", required_argument, 0, 'r'},
+ {"stop", required_argument, 0, 's'},
+ {"stop-total", required_argument, 0, 'S'},
+ {"trace", optional_argument, 0, 't'},
+ {0, 0, 0, 0}
+ };
+
+ /* getopt_long stores the option index here. */
+ int option_index = 0;
+
+ c = getopt_long(argc, argv, "c:d:Dhp:P:qr:s:S:t::",
+ long_options, &option_index);
+
+ /* Detect the end of the options. */
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 'c':
+ retval = parse_cpu_list(optarg, &params->monitored_cpus);
+ if (retval)
+ osnoise_top_usage("\nInvalid -c cpu list\n");
+ params->cpus = optarg;
+ break;
+ case 'D':
+ config_debug = 1;
+ break;
+ case 'd':
+ params->duration = parse_seconds_duration(optarg);
+ if (!params->duration)
+ osnoise_top_usage("Invalid -D duration\n");
+ break;
+ case 'h':
+ case '?':
+ osnoise_top_usage(NULL);
+ break;
+ case 'p':
+ params->period = get_llong_from_str(optarg);
+ if (params->period > 10000000)
+ osnoise_top_usage("Period longer than 10 s\n");
+ break;
+ case 'P':
+ retval = parse_prio(optarg, &params->sched_param);
+ if (retval == -1)
+ osnoise_top_usage("Invalid -P priority");
+ params->set_sched = 1;
+ break;
+ case 'q':
+ params->quiet = 1;
+ break;
+ case 'r':
+ params->runtime = get_llong_from_str(optarg);
+ if (params->runtime < 100)
+ osnoise_top_usage("Runtime shorter than 100 us\n");
+ break;
+ case 's':
+ params->stop_us = get_llong_from_str(optarg);
+ break;
+ case 'S':
+ params->stop_total_us = get_llong_from_str(optarg);
+ break;
+ case 't':
+ if (optarg)
+ /* skip = */
+ params->trace_output = &optarg[1];
+ else
+ params->trace_output = "osnoise_trace.txt";
+ break;
+ default:
+ osnoise_top_usage("Invalid option");
+ }
+ }
+
+ if (geteuid()) {
+ err_msg("osnoise needs root permission\n");
+ exit(EXIT_FAILURE);
+ }
+
+ return params;
+}
+
+/*
+ * osnoise_top_apply_config - apply the top configs to the initialized tool
+ */
+static int
+osnoise_top_apply_config(struct osnoise_tool *tool, struct osnoise_top_params *params)
+{
+ int retval;
+
+ if (!params->sleep_time)
+ params->sleep_time = 1;
+
+ if (params->cpus) {
+ retval = osnoise_set_cpus(tool->context, params->cpus);
+ if (retval) {
+ err_msg("Failed to apply CPUs config\n");
+ goto out_err;
+ }
+ }
+
+ if (params->runtime || params->period) {
+ retval = osnoise_set_runtime_period(tool->context,
+ params->runtime,
+ params->period);
+ if (retval) {
+ err_msg("Failed to set runtime and/or period\n");
+ goto out_err;
+ }
+ }
+
+ if (params->stop_us) {
+ retval = osnoise_set_stop_us(tool->context, params->stop_us);
+ if (retval) {
+ err_msg("Failed to set stop us\n");
+ goto out_err;
+ }
+ }
+
+ if (params->stop_total_us) {
+ retval = osnoise_set_stop_total_us(tool->context, params->stop_total_us);
+ if (retval) {
+ err_msg("Failed to set stop total us\n");
+ goto out_err;
+ }
+ }
+
+ return 0;
+
+out_err:
+ return -1;
+}
+
+/*
+ * osnoise_init_top - initialize a osnoise top tool with parameters
+ */
+struct osnoise_tool *osnoise_init_top(struct osnoise_top_params *params)
+{
+ struct osnoise_tool *tool;
+ int nr_cpus;
+
+ nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+
+ tool = osnoise_init_tool("osnoise_top");
+ if (!tool)
+ return NULL;
+
+ tool->data = osnoise_alloc_top(nr_cpus);
+ if (!tool->data)
+ goto out_err;
+
+ tool->params = params;
+
+ tep_register_event_handler(tool->trace.tep, -1, "ftrace", "osnoise",
+ osnoise_top_handler, NULL);
+
+ return tool;
+
+out_err:
+ osnoise_free_top(tool->data);
+ osnoise_destroy_tool(tool);
+ return NULL;
+}
+
+static int stop_tracing;
+static void stop_top(int sig)
+{
+ stop_tracing = 1;
+}
+
+/*
+ * osnoise_top_set_signals - handles the signal to stop the tool
+ */
+static void osnoise_top_set_signals(struct osnoise_top_params *params)
+{
+ signal(SIGINT, stop_top);
+ if (params->duration) {
+ signal(SIGALRM, stop_top);
+ alarm(params->duration);
+ }
+}
+
+int osnoise_top_main(int argc, char **argv)
+{
+ struct osnoise_top_params *params;
+ struct trace_instance *trace;
+ struct osnoise_tool *record;
+ struct osnoise_tool *tool;
+ int return_value = 1;
+ int retval;
+
+ params = osnoise_top_parse_args(argc, argv);
+ if (!params)
+ exit(1);
+
+ tool = osnoise_init_top(params);
+ if (!tool) {
+ err_msg("Could not init osnoise top\n");
+ goto out_exit;
+ }
+
+ retval = osnoise_top_apply_config(tool, params);
+ if (retval) {
+ err_msg("Could not apply config\n");
+ goto out_top;
+ }
+
+ trace = &tool->trace;
+
+ retval = enable_osnoise(trace);
+ if (retval) {
+ err_msg("Failed to enable osnoise tracer\n");
+ goto out_top;
+ }
+
+ if (params->set_sched) {
+ retval = set_comm_sched_attr("osnoise/", &params->sched_param);
+ if (retval) {
+ err_msg("Failed to set sched parameters\n");
+ goto out_top;
+ }
+ }
+
+ trace_instance_start(trace);
+
+ if (params->trace_output) {
+ record = osnoise_init_trace_tool("osnoise");
+ if (!record) {
+ err_msg("Failed to enable the trace instance\n");
+ goto out_top;
+ }
+ trace_instance_start(&record->trace);
+ }
+
+ tool->start_time = time(NULL);
+ osnoise_top_set_signals(params);
+
+ do {
+ sleep(params->sleep_time);
+
+ retval = tracefs_iterate_raw_events(trace->tep,
+ trace->inst,
+ NULL,
+ 0,
+ collect_registered_events,
+ trace);
+ if (retval < 0) {
+ err_msg("Error iterating on events\n");
+ goto out_top;
+ }
+
+ if (!params->quiet)
+ osnoise_print_stats(params, tool);
+
+ if (!tracefs_trace_is_on(trace->inst))
+ break;
+
+ } while (!stop_tracing);
+
+ osnoise_print_stats(params, tool);
+
+ return_value = 0;
+
+ if (!tracefs_trace_is_on(trace->inst)) {
+ printf("osnoise hit stop tracing\n");
+ if (params->trace_output) {
+ printf(" Saving trace to %s\n", params->trace_output);
+ save_trace_to_file(record->trace.inst, params->trace_output);
+ }
+ }
+
+out_top:
+ osnoise_free_top(tool->data);
+ osnoise_destroy_tool(tool);
+ if (params->trace_output)
+ osnoise_destroy_tool(record);
+out_exit:
+ exit(return_value);
+}
diff --git a/tools/tracing/rtla/src/rtla.c b/tools/tracing/rtla/src/rtla.c
new file mode 100644
index 000000000000..09bd21b8af81
--- /dev/null
+++ b/tools/tracing/rtla/src/rtla.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
+ */
+
+#include <getopt.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "osnoise.h"
+#include "timerlat.h"
+
+/*
+ * rtla_usage - print rtla usage
+ */
+static void rtla_usage(void)
+{
+ int i;
+
+ static const char *msg[] = {
+ "",
+ "rtla version " VERSION,
+ "",
+ " usage: rtla COMMAND ...",
+ "",
+ " commands:",
+ " osnoise - gives information about the operating system noise (osnoise)",
+ " timerlat - measures the timer irq and thread latency",
+ "",
+ NULL,
+ };
+
+ for (i = 0; msg[i]; i++)
+ fprintf(stderr, "%s\n", msg[i]);
+ exit(1);
+}
+
+/*
+ * run_command - try to run a rtla tool command
+ *
+ * It returns 0 if it fails. The tool's main will generally not
+ * return as they should call exit().
+ */
+int run_command(int argc, char **argv, int start_position)
+{
+ if (strcmp(argv[start_position], "osnoise") == 0) {
+ osnoise_main(argc-start_position, &argv[start_position]);
+ goto ran;
+ } else if (strcmp(argv[start_position], "timerlat") == 0) {
+ timerlat_main(argc-start_position, &argv[start_position]);
+ goto ran;
+ }
+
+ return 0;
+ran:
+ return 1;
+}
+
+int main(int argc, char *argv[])
+{
+ int retval;
+
+ /* is it an alias? */
+ retval = run_command(argc, argv, 0);
+ if (retval)
+ exit(0);
+
+ if (argc < 2)
+ goto usage;
+
+ if (strcmp(argv[1], "-h") == 0) {
+ rtla_usage();
+ exit(0);
+ } else if (strcmp(argv[1], "--help") == 0) {
+ rtla_usage();
+ exit(0);
+ }
+
+ retval = run_command(argc, argv, 1);
+ if (retval)
+ exit(0);
+
+usage:
+ rtla_usage();
+ exit(1);
+}
diff --git a/tools/tracing/rtla/src/timerlat.c b/tools/tracing/rtla/src/timerlat.c
new file mode 100644
index 000000000000..97abbf494fee
--- /dev/null
+++ b/tools/tracing/rtla/src/timerlat.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
+ */
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+
+#include "timerlat.h"
+
+static void timerlat_usage(void)
+{
+ int i;
+
+ static const char * const msg[] = {
+ "",
+ "timerlat version " VERSION,
+ "",
+ " usage: [rtla] timerlat [MODE] ...",
+ "",
+ " modes:",
+ " top - prints the summary from timerlat tracer",
+ " hist - prints a histogram of timer latencies",
+ "",
+ "if no MODE is given, the top mode is called, passing the arguments",
+ NULL,
+ };
+
+ for (i = 0; msg[i]; i++)
+ fprintf(stderr, "%s\n", msg[i]);
+ exit(1);
+}
+
+int timerlat_main(int argc, char *argv[])
+{
+ if (argc == 0)
+ goto usage;
+
+ /*
+ * if timerlat was called without any argument, run the
+ * default cmdline.
+ */
+ if (argc == 1) {
+ timerlat_top_main(argc, argv);
+ exit(0);
+ }
+
+ if ((strcmp(argv[1], "-h") == 0) || (strcmp(argv[1], "--help") == 0)) {
+ timerlat_usage();
+ exit(0);
+ } else if (strncmp(argv[1], "-", 1) == 0) {
+ /* the user skipped the tool, call the default one */
+ timerlat_top_main(argc, argv);
+ exit(0);
+ } else if (strcmp(argv[1], "top") == 0) {
+ timerlat_top_main(argc-1, &argv[1]);
+ exit(0);
+ } else if (strcmp(argv[1], "hist") == 0) {
+ timerlat_hist_main(argc-1, &argv[1]);
+ exit(0);
+ }
+
+usage:
+ timerlat_usage();
+ exit(1);
+}
diff --git a/tools/tracing/rtla/src/timerlat.h b/tools/tracing/rtla/src/timerlat.h
new file mode 100644
index 000000000000..88561bfd14f3
--- /dev/null
+++ b/tools/tracing/rtla/src/timerlat.h
@@ -0,0 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
+int timerlat_hist_main(int argc, char *argv[]);
+int timerlat_top_main(int argc, char *argv[]);
+int timerlat_main(int argc, char *argv[]);
diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c
new file mode 100644
index 000000000000..235f9620ef3d
--- /dev/null
+++ b/tools/tracing/rtla/src/timerlat_hist.c
@@ -0,0 +1,822 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
+ */
+
+#include <getopt.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <time.h>
+
+#include "utils.h"
+#include "osnoise.h"
+#include "timerlat.h"
+
+struct timerlat_hist_params {
+ char *cpus;
+ char *monitored_cpus;
+ char *trace_output;
+ unsigned long long runtime;
+ long long stop_us;
+ long long stop_total_us;
+ long long timerlat_period_us;
+ long long print_stack;
+ int sleep_time;
+ int output_divisor;
+ int duration;
+ int set_sched;
+ struct sched_attr sched_param;
+
+ char no_irq;
+ char no_thread;
+ char no_header;
+ char no_summary;
+ char no_index;
+ char with_zeros;
+ int bucket_size;
+ int entries;
+};
+
+struct timerlat_hist_cpu {
+ int *irq;
+ int *thread;
+
+ int irq_count;
+ int thread_count;
+
+ unsigned long long min_irq;
+ unsigned long long sum_irq;
+ unsigned long long max_irq;
+
+ unsigned long long min_thread;
+ unsigned long long sum_thread;
+ unsigned long long max_thread;
+};
+
+struct timerlat_hist_data {
+ struct timerlat_hist_cpu *hist;
+ int entries;
+ int bucket_size;
+ int nr_cpus;
+};
+
+/*
+ * timerlat_free_histogram - free runtime data
+ */
+static void
+timerlat_free_histogram(struct timerlat_hist_data *data)
+{
+ int cpu;
+
+ /* one histogram for IRQ and one for thread, per CPU */
+ for (cpu = 0; cpu < data->nr_cpus; cpu++) {
+ if (data->hist[cpu].irq)
+ free(data->hist[cpu].irq);
+
+ if (data->hist[cpu].thread)
+ free(data->hist[cpu].thread);
+ }
+
+ /* one set of histograms per CPU */
+ if (data->hist)
+ free(data->hist);
+
+ free(data);
+}
+
+/*
+ * timerlat_alloc_histogram - alloc runtime data
+ */
+static struct timerlat_hist_data
+*timerlat_alloc_histogram(int nr_cpus, int entries, int bucket_size)
+{
+ struct timerlat_hist_data *data;
+ int cpu;
+
+ data = calloc(1, sizeof(*data));
+ if (!data)
+ return NULL;
+
+ data->entries = entries;
+ data->bucket_size = bucket_size;
+ data->nr_cpus = nr_cpus;
+
+ /* one set of histograms per CPU */
+ data->hist = calloc(1, sizeof(*data->hist) * nr_cpus);
+ if (!data->hist)
+ goto cleanup;
+
+ /* one histogram for IRQ and one for thread, per cpu */
+ for (cpu = 0; cpu < nr_cpus; cpu++) {
+ data->hist[cpu].irq = calloc(1, sizeof(*data->hist->irq) * (entries + 1));
+ if (!data->hist[cpu].irq)
+ goto cleanup;
+ data->hist[cpu].thread = calloc(1, sizeof(*data->hist->thread) * (entries + 1));
+ if (!data->hist[cpu].thread)
+ goto cleanup;
+ }
+
+ /* set the min to max */
+ for (cpu = 0; cpu < nr_cpus; cpu++) {
+ data->hist[cpu].min_irq = ~0;
+ data->hist[cpu].min_thread = ~0;
+ }
+
+ return data;
+
+cleanup:
+ timerlat_free_histogram(data);
+ return NULL;
+}
+
+/*
+ * timerlat_hist_update - record a new timerlat occurent on cpu, updating data
+ */
+static void
+timerlat_hist_update(struct osnoise_tool *tool, int cpu,
+ unsigned long long thread,
+ unsigned long long latency)
+{
+ struct timerlat_hist_params *params = tool->params;
+ struct timerlat_hist_data *data = tool->data;
+ int entries = data->entries;
+ int bucket;
+ int *hist;
+
+ if (params->output_divisor)
+ latency = latency / params->output_divisor;
+
+ if (data->bucket_size)
+ bucket = latency / data->bucket_size;
+
+ if (!thread) {
+ hist = data->hist[cpu].irq;
+ data->hist[cpu].irq_count++;
+ update_min(&data->hist[cpu].min_irq, &latency);
+ update_sum(&data->hist[cpu].sum_irq, &latency);
+ update_max(&data->hist[cpu].max_irq, &latency);
+ } else {
+ hist = data->hist[cpu].thread;
+ data->hist[cpu].thread_count++;
+ update_min(&data->hist[cpu].min_thread, &latency);
+ update_sum(&data->hist[cpu].sum_thread, &latency);
+ update_max(&data->hist[cpu].max_thread, &latency);
+ }
+
+ if (bucket < entries)
+ hist[bucket]++;
+ else
+ hist[entries]++;
+}
+
+/*
+ * timerlat_hist_handler - this is the handler for timerlat tracer events
+ */
+static int
+timerlat_hist_handler(struct trace_seq *s, struct tep_record *record,
+ struct tep_event *event, void *data)
+{
+ struct trace_instance *trace = data;
+ unsigned long long thread, latency;
+ struct osnoise_tool *tool;
+ int cpu = record->cpu;
+
+ tool = container_of(trace, struct osnoise_tool, trace);
+
+ tep_get_field_val(s, event, "context", record, &thread, 1);
+ tep_get_field_val(s, event, "timer_latency", record, &latency, 1);
+
+ timerlat_hist_update(tool, cpu, thread, latency);
+
+ return 0;
+}
+
+/*
+ * timerlat_hist_header - print the header of the tracer to the output
+ */
+static void timerlat_hist_header(struct osnoise_tool *tool)
+{
+ struct timerlat_hist_params *params = tool->params;
+ struct timerlat_hist_data *data = tool->data;
+ struct trace_seq *s = tool->trace.seq;
+ char duration[26];
+ int cpu;
+
+ if (params->no_header)
+ return;
+
+ get_duration(tool->start_time, duration, sizeof(duration));
+ trace_seq_printf(s, "# RTLA timerlat histogram\n");
+ trace_seq_printf(s, "# Time unit is %s (%s)\n",
+ params->output_divisor == 1 ? "nanoseconds" : "microseconds",
+ params->output_divisor == 1 ? "ns" : "us");
+
+ trace_seq_printf(s, "# Duration: %s\n", duration);
+
+ if (!params->no_index)
+ trace_seq_printf(s, "Index");
+
+ for (cpu = 0; cpu < data->nr_cpus; cpu++) {
+ if (params->cpus && !params->monitored_cpus[cpu])
+ continue;
+
+ if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count)
+ continue;
+
+ if (!params->no_irq)
+ trace_seq_printf(s, " IRQ-%03d", cpu);
+
+ if (!params->no_thread)
+ trace_seq_printf(s, " Thr-%03d", cpu);
+ }
+ trace_seq_printf(s, "\n");
+
+
+ trace_seq_do_printf(s);
+ trace_seq_reset(s);
+}
+
+/*
+ * timerlat_print_summary - print the summary of the hist data to the output
+ */
+static void
+timerlat_print_summary(struct timerlat_hist_params *params,
+ struct trace_instance *trace,
+ struct timerlat_hist_data *data)
+{
+ int cpu;
+
+ if (params->no_summary)
+ return;
+
+ if (!params->no_index)
+ trace_seq_printf(trace->seq, "count:");
+
+ for (cpu = 0; cpu < data->nr_cpus; cpu++) {
+ if (params->cpus && !params->monitored_cpus[cpu])
+ continue;
+
+ if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count)
+ continue;
+
+ if (!params->no_irq)
+ trace_seq_printf(trace->seq, "%9d ",
+ data->hist[cpu].irq_count);
+
+ if (!params->no_thread)
+ trace_seq_printf(trace->seq, "%9d ",
+ data->hist[cpu].thread_count);
+ }
+ trace_seq_printf(trace->seq, "\n");
+
+ if (!params->no_index)
+ trace_seq_printf(trace->seq, "min: ");
+
+ for (cpu = 0; cpu < data->nr_cpus; cpu++) {
+ if (params->cpus && !params->monitored_cpus[cpu])
+ continue;
+
+ if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count)
+ continue;
+
+ if (!params->no_irq)
+ trace_seq_printf(trace->seq, "%9llu ",
+ data->hist[cpu].min_irq);
+
+ if (!params->no_thread)
+ trace_seq_printf(trace->seq, "%9llu ",
+ data->hist[cpu].min_thread);
+ }
+ trace_seq_printf(trace->seq, "\n");
+
+ if (!params->no_index)
+ trace_seq_printf(trace->seq, "avg: ");
+
+ for (cpu = 0; cpu < data->nr_cpus; cpu++) {
+ if (params->cpus && !params->monitored_cpus[cpu])
+ continue;
+
+ if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count)
+ continue;
+
+ if (!params->no_irq) {
+ if (data->hist[cpu].irq_count)
+ trace_seq_printf(trace->seq, "%9llu ",
+ data->hist[cpu].sum_irq / data->hist[cpu].irq_count);
+ else
+ trace_seq_printf(trace->seq, " - ");
+ }
+
+ if (!params->no_thread) {
+ if (data->hist[cpu].thread_count)
+ trace_seq_printf(trace->seq, "%9llu ",
+ data->hist[cpu].sum_thread / data->hist[cpu].thread_count);
+ else
+ trace_seq_printf(trace->seq, " - ");
+ }
+ }
+ trace_seq_printf(trace->seq, "\n");
+
+ if (!params->no_index)
+ trace_seq_printf(trace->seq, "max: ");
+
+ for (cpu = 0; cpu < data->nr_cpus; cpu++) {
+ if (params->cpus && !params->monitored_cpus[cpu])
+ continue;
+
+ if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count)
+ continue;
+
+ if (!params->no_irq)
+ trace_seq_printf(trace->seq, "%9llu ",
+ data->hist[cpu].max_irq);
+
+ if (!params->no_thread)
+ trace_seq_printf(trace->seq, "%9llu ",
+ data->hist[cpu].max_thread);
+ }
+ trace_seq_printf(trace->seq, "\n");
+ trace_seq_do_printf(trace->seq);
+ trace_seq_reset(trace->seq);
+}
+
+/*
+ * timerlat_print_stats - print data for all CPUs
+ */
+static void
+timerlat_print_stats(struct timerlat_hist_params *params, struct osnoise_tool *tool)
+{
+ struct timerlat_hist_data *data = tool->data;
+ struct trace_instance *trace = &tool->trace;
+ int bucket, cpu;
+ int total;
+
+ timerlat_hist_header(tool);
+
+ for (bucket = 0; bucket < data->entries; bucket++) {
+ total = 0;
+
+ if (!params->no_index)
+ trace_seq_printf(trace->seq, "%-6d",
+ bucket * data->bucket_size);
+
+ for (cpu = 0; cpu < data->nr_cpus; cpu++) {
+ if (params->cpus && !params->monitored_cpus[cpu])
+ continue;
+
+ if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count)
+ continue;
+
+ if (!params->no_irq) {
+ total += data->hist[cpu].irq[bucket];
+ trace_seq_printf(trace->seq, "%9d ",
+ data->hist[cpu].irq[bucket]);
+ }
+
+ if (!params->no_thread) {
+ total += data->hist[cpu].thread[bucket];
+ trace_seq_printf(trace->seq, "%9d ",
+ data->hist[cpu].thread[bucket]);
+ }
+
+ }
+
+ if (total == 0 && !params->with_zeros) {
+ trace_seq_reset(trace->seq);
+ continue;
+ }
+
+ trace_seq_printf(trace->seq, "\n");
+ trace_seq_do_printf(trace->seq);
+ trace_seq_reset(trace->seq);
+ }
+
+ if (!params->no_index)
+ trace_seq_printf(trace->seq, "over: ");
+
+ for (cpu = 0; cpu < data->nr_cpus; cpu++) {
+ if (params->cpus && !params->monitored_cpus[cpu])
+ continue;
+
+ if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count)
+ continue;
+
+ if (!params->no_irq)
+ trace_seq_printf(trace->seq, "%9d ",
+ data->hist[cpu].irq[data->entries]);
+
+ if (!params->no_thread)
+ trace_seq_printf(trace->seq, "%9d ",
+ data->hist[cpu].thread[data->entries]);
+ }
+ trace_seq_printf(trace->seq, "\n");
+ trace_seq_do_printf(trace->seq);
+ trace_seq_reset(trace->seq);
+
+ timerlat_print_summary(params, trace, data);
+}
+
+/*
+ * timerlat_hist_usage - prints timerlat top usage message
+ */
+static void timerlat_hist_usage(char *usage)
+{
+ int i;
+
+ char *msg[] = {
+ "",
+ " usage: [rtla] timerlat hist [-h] [-q] [-d s] [-D] [-n] [-p us] [-i us] [-T us] [-s us] [-t[=file]] \\",
+ " [-c cpu-list] [-P priority] [-e N] [-b N] [--no-irq] [--no-thread] [--no-header] [--no-summary] \\",
+ " [--no-index] [--with-zeros]",
+ "",
+ " -h/--help: print this menu",
+ " -p/--period us: timerlat period in us",
+ " -i/--irq us: stop trace if the irq latency is higher than the argument in us",
+ " -T/--thread us: stop trace if the thread latency is higher than the argument in us",
+ " -s/--stack us: save the stack trace at the IRQ if a thread latency is higher than the argument in us",
+ " -c/--cpus cpus: run the tracer only on the given cpus",
+ " -d/--duration time[m|h|d]: duration of the session in seconds",
+ " -D/--debug: print debug info",
+ " -T/--trace[=file]: save the stopped trace to [file|timerlat_trace.txt]",
+ " -n/--nano: display data in nanoseconds",
+ " -b/--bucket-size N: set the histogram bucket size (default 1)",
+ " -e/--entries N: set the number of entries of the histogram (default 256)",
+ " --no-irq: ignore IRQ latencies",
+ " --no-thread: ignore thread latencies",
+ " --no-header: do not print header",
+ " --no-summary: do not print summary",
+ " --no-index: do not print index",
+ " --with-zeros: print zero only entries",
+ " -P/--priority o:prio|r:prio|f:prio|d:runtime:period : set scheduling parameters",
+ " o:prio - use SCHED_OTHER with prio",
+ " r:prio - use SCHED_RR with prio",
+ " f:prio - use SCHED_FIFO with prio",
+ " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period",
+ " in nanoseconds",
+ NULL,
+ };
+
+ if (usage)
+ fprintf(stderr, "%s\n", usage);
+
+ fprintf(stderr, "rtla timerlat hist: a per-cpu histogram of the timer latency (version %s)\n",
+ VERSION);
+
+ for (i = 0; msg[i]; i++)
+ fprintf(stderr, "%s\n", msg[i]);
+ exit(1);
+}
+
+/*
+ * timerlat_hist_parse_args - allocs, parse and fill the cmd line parameters
+ */
+static struct timerlat_hist_params
+*timerlat_hist_parse_args(int argc, char *argv[])
+{
+ struct timerlat_hist_params *params;
+ int retval;
+ int c;
+
+ params = calloc(1, sizeof(*params));
+ if (!params)
+ exit(1);
+
+ /* display data in microseconds */
+ params->output_divisor = 1000;
+ params->bucket_size = 1;
+ params->entries = 256;
+
+ while (1) {
+ static struct option long_options[] = {
+ {"cpus", required_argument, 0, 'c'},
+ {"bucket-size", required_argument, 0, 'b'},
+ {"debug", no_argument, 0, 'D'},
+ {"entries", required_argument, 0, 'e'},
+ {"duration", required_argument, 0, 'd'},
+ {"help", no_argument, 0, 'h'},
+ {"irq", required_argument, 0, 'i'},
+ {"nano", no_argument, 0, 'n'},
+ {"period", required_argument, 0, 'p'},
+ {"priority", required_argument, 0, 'P'},
+ {"stack", required_argument, 0, 's'},
+ {"thread", required_argument, 0, 'T'},
+ {"trace", optional_argument, 0, 't'},
+ {"no-irq", no_argument, 0, '0'},
+ {"no-thread", no_argument, 0, '1'},
+ {"no-header", no_argument, 0, '2'},
+ {"no-summary", no_argument, 0, '3'},
+ {"no-index", no_argument, 0, '4'},
+ {"with-zeros", no_argument, 0, '5'},
+ {0, 0, 0, 0}
+ };
+
+ /* getopt_long stores the option index here. */
+ int option_index = 0;
+
+ c = getopt_long(argc, argv, "c:b:d:e:Dhi:np:P:s:t::T:012345",
+ long_options, &option_index);
+
+ /* detect the end of the options. */
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 'c':
+ retval = parse_cpu_list(optarg, &params->monitored_cpus);
+ if (retval)
+ timerlat_hist_usage("\nInvalid -c cpu list\n");
+ params->cpus = optarg;
+ break;
+ case 'b':
+ params->bucket_size = get_llong_from_str(optarg);
+ if ((params->bucket_size == 0) || (params->bucket_size >= 1000000))
+ timerlat_hist_usage("Bucket size needs to be > 0 and <= 1000000\n");
+ break;
+ case 'D':
+ config_debug = 1;
+ break;
+ case 'd':
+ params->duration = parse_seconds_duration(optarg);
+ if (!params->duration)
+ timerlat_hist_usage("Invalid -D duration\n");
+ break;
+ case 'e':
+ params->entries = get_llong_from_str(optarg);
+ if ((params->entries < 10) || (params->entries > 9999999))
+ timerlat_hist_usage("Entries must be > 10 and < 9999999\n");
+ break;
+ case 'h':
+ case '?':
+ timerlat_hist_usage(NULL);
+ break;
+ case 'i':
+ params->stop_us = get_llong_from_str(optarg);
+ break;
+ case 'n':
+ params->output_divisor = 1;
+ break;
+ case 'p':
+ params->timerlat_period_us = get_llong_from_str(optarg);
+ if (params->timerlat_period_us > 1000000)
+ timerlat_hist_usage("Period longer than 1 s\n");
+ break;
+ case 'P':
+ retval = parse_prio(optarg, &params->sched_param);
+ if (retval == -1)
+ timerlat_hist_usage("Invalid -P priority");
+ params->set_sched = 1;
+ break;
+ case 's':
+ params->print_stack = get_llong_from_str(optarg);
+ break;
+ case 'T':
+ params->stop_total_us = get_llong_from_str(optarg);
+ break;
+ case 't':
+ if (optarg)
+ /* skip = */
+ params->trace_output = &optarg[1];
+ else
+ params->trace_output = "timerlat_trace.txt";
+ break;
+ case '0': /* no irq */
+ params->no_irq = 1;
+ break;
+ case '1': /* no thread */
+ params->no_thread = 1;
+ break;
+ case '2': /* no header */
+ params->no_header = 1;
+ break;
+ case '3': /* no summary */
+ params->no_summary = 1;
+ break;
+ case '4': /* no index */
+ params->no_index = 1;
+ break;
+ case '5': /* with zeros */
+ params->with_zeros = 1;
+ break;
+ default:
+ timerlat_hist_usage("Invalid option");
+ }
+ }
+
+ if (geteuid()) {
+ err_msg("rtla needs root permission\n");
+ exit(EXIT_FAILURE);
+ }
+
+ if (params->no_irq && params->no_thread)
+ timerlat_hist_usage("no-irq and no-thread set, there is nothing to do here");
+
+ if (params->no_index && !params->with_zeros)
+ timerlat_hist_usage("no-index set with with-zeros is not set - it does not make sense");
+
+ return params;
+}
+
+/*
+ * timerlat_hist_apply_config - apply the hist configs to the initialized tool
+ */
+static int
+timerlat_hist_apply_config(struct osnoise_tool *tool, struct timerlat_hist_params *params)
+{
+ int retval;
+
+ if (!params->sleep_time)
+ params->sleep_time = 1;
+
+ if (params->cpus) {
+ retval = osnoise_set_cpus(tool->context, params->cpus);
+ if (retval) {
+ err_msg("Failed to apply CPUs config\n");
+ goto out_err;
+ }
+ }
+
+ if (params->stop_us) {
+ retval = osnoise_set_stop_us(tool->context, params->stop_us);
+ if (retval) {
+ err_msg("Failed to set stop us\n");
+ goto out_err;
+ }
+ }
+
+ if (params->stop_total_us) {
+ retval = osnoise_set_stop_total_us(tool->context, params->stop_total_us);
+ if (retval) {
+ err_msg("Failed to set stop total us\n");
+ goto out_err;
+ }
+ }
+
+ if (params->timerlat_period_us) {
+ retval = osnoise_set_timerlat_period_us(tool->context, params->timerlat_period_us);
+ if (retval) {
+ err_msg("Failed to set timerlat period\n");
+ goto out_err;
+ }
+ }
+
+ if (params->print_stack) {
+ retval = osnoise_set_print_stack(tool->context, params->print_stack);
+ if (retval) {
+ err_msg("Failed to set print stack\n");
+ goto out_err;
+ }
+ }
+
+ return 0;
+
+out_err:
+ return -1;
+}
+
+/*
+ * timerlat_init_hist - initialize a timerlat hist tool with parameters
+ */
+static struct osnoise_tool
+*timerlat_init_hist(struct timerlat_hist_params *params)
+{
+ struct osnoise_tool *tool;
+ int nr_cpus;
+
+ nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+
+ tool = osnoise_init_tool("timerlat_hist");
+ if (!tool)
+ return NULL;
+
+ tool->data = timerlat_alloc_histogram(nr_cpus, params->entries, params->bucket_size);
+ if (!tool->data)
+ goto out_err;
+
+ tool->params = params;
+
+ tep_register_event_handler(tool->trace.tep, -1, "ftrace", "timerlat",
+ timerlat_hist_handler, tool);
+
+ return tool;
+
+out_err:
+ osnoise_destroy_tool(tool);
+ return NULL;
+}
+
+static int stop_tracing;
+static void stop_hist(int sig)
+{
+ stop_tracing = 1;
+}
+
+/*
+ * timerlat_hist_set_signals - handles the signal to stop the tool
+ */
+static void
+timerlat_hist_set_signals(struct timerlat_hist_params *params)
+{
+ signal(SIGINT, stop_hist);
+ if (params->duration) {
+ signal(SIGALRM, stop_hist);
+ alarm(params->duration);
+ }
+}
+
+int timerlat_hist_main(int argc, char *argv[])
+{
+ struct timerlat_hist_params *params;
+ struct trace_instance *trace;
+ struct osnoise_tool *record;
+ struct osnoise_tool *tool;
+ int return_value = 1;
+ int retval;
+
+ params = timerlat_hist_parse_args(argc, argv);
+ if (!params)
+ exit(1);
+
+ tool = timerlat_init_hist(params);
+ if (!tool) {
+ err_msg("Could not init osnoise hist\n");
+ goto out_exit;
+ }
+
+ retval = timerlat_hist_apply_config(tool, params);
+ if (retval) {
+ err_msg("Could not apply config\n");
+ goto out_hist;
+ }
+
+ trace = &tool->trace;
+
+ retval = enable_timerlat(trace);
+ if (retval) {
+ err_msg("Failed to enable timerlat tracer\n");
+ goto out_hist;
+ }
+
+ if (params->set_sched) {
+ retval = set_comm_sched_attr("timerlat/", &params->sched_param);
+ if (retval) {
+ err_msg("Failed to set sched parameters\n");
+ goto out_hist;
+ }
+ }
+
+ trace_instance_start(trace);
+
+ if (params->trace_output) {
+ record = osnoise_init_trace_tool("timerlat");
+ if (!record) {
+ err_msg("Failed to enable the trace instance\n");
+ goto out_hist;
+ }
+ trace_instance_start(&record->trace);
+ }
+
+ tool->start_time = time(NULL);
+ timerlat_hist_set_signals(params);
+
+ while (!stop_tracing) {
+ sleep(params->sleep_time);
+
+ retval = tracefs_iterate_raw_events(trace->tep,
+ trace->inst,
+ NULL,
+ 0,
+ collect_registered_events,
+ trace);
+ if (retval < 0) {
+ err_msg("Error iterating on events\n");
+ goto out_hist;
+ }
+
+ if (!tracefs_trace_is_on(trace->inst))
+ break;
+ };
+
+ timerlat_print_stats(params, tool);
+
+ return_value = 0;
+
+ if (!tracefs_trace_is_on(trace->inst)) {
+ printf("rtla timelat hit stop tracing\n");
+ if (params->trace_output) {
+ printf(" Saving trace to %s\n", params->trace_output);
+ save_trace_to_file(record->trace.inst, params->trace_output);
+ }
+ }
+
+out_hist:
+ timerlat_free_histogram(tool->data);
+ osnoise_destroy_tool(tool);
+ if (params->trace_output)
+ osnoise_destroy_tool(record);
+ free(params);
+out_exit:
+ exit(return_value);
+}
diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c
new file mode 100644
index 000000000000..1ebd5291539c
--- /dev/null
+++ b/tools/tracing/rtla/src/timerlat_top.c
@@ -0,0 +1,618 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
+ */
+
+#include <getopt.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <time.h>
+
+#include "utils.h"
+#include "osnoise.h"
+#include "timerlat.h"
+
+struct timerlat_top_params {
+ char *cpus;
+ char *monitored_cpus;
+ char *trace_output;
+ unsigned long long runtime;
+ long long stop_us;
+ long long stop_total_us;
+ long long timerlat_period_us;
+ long long print_stack;
+ int sleep_time;
+ int output_divisor;
+ int duration;
+ int quiet;
+ int set_sched;
+ struct sched_attr sched_param;
+};
+
+struct timerlat_top_cpu {
+ int irq_count;
+ int thread_count;
+
+ unsigned long long cur_irq;
+ unsigned long long min_irq;
+ unsigned long long sum_irq;
+ unsigned long long max_irq;
+
+ unsigned long long cur_thread;
+ unsigned long long min_thread;
+ unsigned long long sum_thread;
+ unsigned long long max_thread;
+};
+
+struct timerlat_top_data {
+ struct timerlat_top_cpu *cpu_data;
+ int nr_cpus;
+};
+
+/*
+ * timerlat_free_top - free runtime data
+ */
+static void
+timerlat_free_top(struct timerlat_top_data *data)
+{
+ free(data->cpu_data);
+ free(data);
+}
+
+/*
+ * timerlat_alloc_histogram - alloc runtime data
+ */
+static struct timerlat_top_data *timerlat_alloc_top(int nr_cpus)
+{
+ struct timerlat_top_data *data;
+ int cpu;
+
+ data = calloc(1, sizeof(*data));
+ if (!data)
+ return NULL;
+
+ data->nr_cpus = nr_cpus;
+
+ /* one set of histograms per CPU */
+ data->cpu_data = calloc(1, sizeof(*data->cpu_data) * nr_cpus);
+ if (!data->cpu_data)
+ goto cleanup;
+
+ /* set the min to max */
+ for (cpu = 0; cpu < nr_cpus; cpu++) {
+ data->cpu_data[cpu].min_irq = ~0;
+ data->cpu_data[cpu].min_thread = ~0;
+ }
+
+ return data;
+
+cleanup:
+ timerlat_free_top(data);
+ return NULL;
+}
+
+/*
+ * timerlat_hist_update - record a new timerlat occurent on cpu, updating data
+ */
+static void
+timerlat_top_update(struct osnoise_tool *tool, int cpu,
+ unsigned long long thread,
+ unsigned long long latency)
+{
+ struct timerlat_top_data *data = tool->data;
+ struct timerlat_top_cpu *cpu_data = &data->cpu_data[cpu];
+
+ if (!thread) {
+ cpu_data->irq_count++;
+ cpu_data->cur_irq = latency;
+ update_min(&cpu_data->min_irq, &latency);
+ update_sum(&cpu_data->sum_irq, &latency);
+ update_max(&cpu_data->max_irq, &latency);
+ } else {
+ cpu_data->thread_count++;
+ cpu_data->cur_thread = latency;
+ update_min(&cpu_data->min_thread, &latency);
+ update_sum(&cpu_data->sum_thread, &latency);
+ update_max(&cpu_data->max_thread, &latency);
+ }
+}
+
+/*
+ * timerlat_top_handler - this is the handler for timerlat tracer events
+ */
+static int
+timerlat_top_handler(struct trace_seq *s, struct tep_record *record,
+ struct tep_event *event, void *context)
+{
+ struct trace_instance *trace = context;
+ unsigned long long latency, thread;
+ struct osnoise_tool *top;
+ int cpu = record->cpu;
+
+ top = container_of(trace, struct osnoise_tool, trace);
+
+ tep_get_field_val(s, event, "context", record, &thread, 1);
+ tep_get_field_val(s, event, "timer_latency", record, &latency, 1);
+
+ timerlat_top_update(top, cpu, thread, latency);
+
+ return 0;
+}
+
+/*
+ * timerlat_top_header - print the header of the tool output
+ */
+static void timerlat_top_header(struct osnoise_tool *top)
+{
+ struct timerlat_top_params *params = top->params;
+ struct trace_seq *s = top->trace.seq;
+ char duration[26];
+
+ get_duration(top->start_time, duration, sizeof(duration));
+
+ trace_seq_printf(s, "\033[2;37;40m");
+ trace_seq_printf(s, " Timer Latency ");
+ trace_seq_printf(s, "\033[0;0;0m");
+ trace_seq_printf(s, "\n");
+
+ trace_seq_printf(s, "%-6s | IRQ Timer Latency (%s) | Thread Timer Latency (%s)\n", duration,
+ params->output_divisor == 1 ? "ns" : "us",
+ params->output_divisor == 1 ? "ns" : "us");
+
+ trace_seq_printf(s, "\033[2;30;47m");
+ trace_seq_printf(s, "CPU COUNT | cur min avg max | cur min avg max");
+ trace_seq_printf(s, "\033[0;0;0m");
+ trace_seq_printf(s, "\n");
+}
+
+/*
+ * timerlat_top_print - prints the output of a given CPU
+ */
+static void timerlat_top_print(struct osnoise_tool *top, int cpu)
+{
+
+ struct timerlat_top_params *params = top->params;
+ struct timerlat_top_data *data = top->data;
+ struct timerlat_top_cpu *cpu_data = &data->cpu_data[cpu];
+ int divisor = params->output_divisor;
+ struct trace_seq *s = top->trace.seq;
+
+ if (divisor == 0)
+ return;
+
+ /*
+ * Skip if no data is available: is this cpu offline?
+ */
+ if (!cpu_data->irq_count && !cpu_data->thread_count)
+ return;
+
+ /*
+ * Unless trace is being lost, IRQ counter is always the max.
+ */
+ trace_seq_printf(s, "%3d #%-9d |", cpu, cpu_data->irq_count);
+
+ if (!cpu_data->irq_count) {
+ trace_seq_printf(s, " - ");
+ trace_seq_printf(s, " - ");
+ trace_seq_printf(s, " - ");
+ trace_seq_printf(s, " - |");
+ } else {
+ trace_seq_printf(s, "%9llu ", cpu_data->cur_irq / params->output_divisor);
+ trace_seq_printf(s, "%9llu ", cpu_data->min_irq / params->output_divisor);
+ trace_seq_printf(s, "%9llu ", (cpu_data->sum_irq / cpu_data->irq_count) / divisor);
+ trace_seq_printf(s, "%9llu |", cpu_data->max_irq / divisor);
+ }
+
+ if (!cpu_data->thread_count) {
+ trace_seq_printf(s, " - ");
+ trace_seq_printf(s, " - ");
+ trace_seq_printf(s, " - ");
+ trace_seq_printf(s, " -\n");
+ } else {
+ trace_seq_printf(s, "%9llu ", cpu_data->cur_thread / divisor);
+ trace_seq_printf(s, "%9llu ", cpu_data->min_thread / divisor);
+ trace_seq_printf(s, "%9llu ",
+ (cpu_data->sum_thread / cpu_data->thread_count) / divisor);
+ trace_seq_printf(s, "%9llu\n", cpu_data->max_thread / divisor);
+ }
+}
+
+/*
+ * clear_terminal - clears the output terminal
+ */
+static void clear_terminal(struct trace_seq *seq)
+{
+ if (!config_debug)
+ trace_seq_printf(seq, "\033c");
+}
+
+/*
+ * timerlat_print_stats - print data for all cpus
+ */
+static void
+timerlat_print_stats(struct timerlat_top_params *params, struct osnoise_tool *top)
+{
+ struct trace_instance *trace = &top->trace;
+ static int nr_cpus = -1;
+ int i;
+
+ if (nr_cpus == -1)
+ nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+
+ if (!params->quiet)
+ clear_terminal(trace->seq);
+
+ timerlat_top_header(top);
+
+ for (i = 0; i < nr_cpus; i++) {
+ if (params->cpus && !params->monitored_cpus[i])
+ continue;
+ timerlat_top_print(top, i);
+ }
+
+ trace_seq_do_printf(trace->seq);
+ trace_seq_reset(trace->seq);
+}
+
+/*
+ * timerlat_top_usage - prints timerlat top usage message
+ */
+static void timerlat_top_usage(char *usage)
+{
+ int i;
+
+ static const char *const msg[] = {
+ "",
+ " usage: rtla timerlat [top] [-h] [-q] [-d s] [-D] [-n] [-p us] [-i us] [-T us] [-s us] [-t[=file]] \\",
+ " [-c cpu-list] [-P priority]",
+ "",
+ " -h/--help: print this menu",
+ " -p/--period us: timerlat period in us",
+ " -i/--irq us: stop trace if the irq latency is higher than the argument in us",
+ " -T/--thread us: stop trace if the thread latency is higher than the argument in us",
+ " -s/--stack us: save the stack trace at the IRQ if a thread latency is higher than the argument in us",
+ " -c/--cpus cpus: run the tracer only on the given cpus",
+ " -d/--duration time[m|h|d]: duration of the session in seconds",
+ " -D/--debug: print debug info",
+ " -t/--trace[=file]: save the stopped trace to [file|timerlat_trace.txt]",
+ " -n/--nano: display data in nanoseconds",
+ " -q/--quiet print only a summary at the end",
+ " -P/--priority o:prio|r:prio|f:prio|d:runtime:period : set scheduling parameters",
+ " o:prio - use SCHED_OTHER with prio",
+ " r:prio - use SCHED_RR with prio",
+ " f:prio - use SCHED_FIFO with prio",
+ " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period",
+ " in nanoseconds",
+ NULL,
+ };
+
+ if (usage)
+ fprintf(stderr, "%s\n", usage);
+
+ fprintf(stderr, "rtla timerlat top: a per-cpu summary of the timer latency (version %s)\n",
+ VERSION);
+
+ for (i = 0; msg[i]; i++)
+ fprintf(stderr, "%s\n", msg[i]);
+ exit(1);
+}
+
+/*
+ * timerlat_top_parse_args - allocs, parse and fill the cmd line parameters
+ */
+static struct timerlat_top_params
+*timerlat_top_parse_args(int argc, char **argv)
+{
+ struct timerlat_top_params *params;
+ int retval;
+ int c;
+
+ params = calloc(1, sizeof(*params));
+ if (!params)
+ exit(1);
+
+ /* display data in microseconds */
+ params->output_divisor = 1000;
+
+ while (1) {
+ static struct option long_options[] = {
+ {"cpus", required_argument, 0, 'c'},
+ {"debug", no_argument, 0, 'D'},
+ {"duration", required_argument, 0, 'd'},
+ {"help", no_argument, 0, 'h'},
+ {"irq", required_argument, 0, 'i'},
+ {"nano", no_argument, 0, 'n'},
+ {"period", required_argument, 0, 'p'},
+ {"priority", required_argument, 0, 'P'},
+ {"quiet", no_argument, 0, 'q'},
+ {"stack", required_argument, 0, 's'},
+ {"thread", required_argument, 0, 'T'},
+ {"trace", optional_argument, 0, 't'},
+ {0, 0, 0, 0}
+ };
+
+ /* getopt_long stores the option index here. */
+ int option_index = 0;
+
+ c = getopt_long(argc, argv, "c:d:Dhi:np:P:qs:t::T:",
+ long_options, &option_index);
+
+ /* detect the end of the options. */
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 'c':
+ retval = parse_cpu_list(optarg, &params->monitored_cpus);
+ if (retval)
+ timerlat_top_usage("\nInvalid -c cpu list\n");
+ params->cpus = optarg;
+ break;
+ case 'D':
+ config_debug = 1;
+ break;
+ case 'd':
+ params->duration = parse_seconds_duration(optarg);
+ if (!params->duration)
+ timerlat_top_usage("Invalid -D duration\n");
+ break;
+ case 'h':
+ case '?':
+ timerlat_top_usage(NULL);
+ break;
+ case 'i':
+ params->stop_us = get_llong_from_str(optarg);
+ break;
+ case 'n':
+ params->output_divisor = 1;
+ break;
+ case 'p':
+ params->timerlat_period_us = get_llong_from_str(optarg);
+ if (params->timerlat_period_us > 1000000)
+ timerlat_top_usage("Period longer than 1 s\n");
+ break;
+ case 'P':
+ retval = parse_prio(optarg, &params->sched_param);
+ if (retval == -1)
+ timerlat_top_usage("Invalid -P priority");
+ params->set_sched = 1;
+ break;
+ case 'q':
+ params->quiet = 1;
+ break;
+ case 's':
+ params->print_stack = get_llong_from_str(optarg);
+ break;
+ case 'T':
+ params->stop_total_us = get_llong_from_str(optarg);
+ break;
+ case 't':
+ if (optarg)
+ /* skip = */
+ params->trace_output = &optarg[1];
+ else
+ params->trace_output = "timerlat_trace.txt";
+ break;
+ default:
+ timerlat_top_usage("Invalid option");
+ }
+ }
+
+ if (geteuid()) {
+ err_msg("rtla needs root permission\n");
+ exit(EXIT_FAILURE);
+ }
+
+ return params;
+}
+
+/*
+ * timerlat_top_apply_config - apply the top configs to the initialized tool
+ */
+static int
+timerlat_top_apply_config(struct osnoise_tool *top, struct timerlat_top_params *params)
+{
+ int retval;
+
+ if (!params->sleep_time)
+ params->sleep_time = 1;
+
+ if (params->cpus) {
+ retval = osnoise_set_cpus(top->context, params->cpus);
+ if (retval) {
+ err_msg("Failed to apply CPUs config\n");
+ goto out_err;
+ }
+ }
+
+ if (params->stop_us) {
+ retval = osnoise_set_stop_us(top->context, params->stop_us);
+ if (retval) {
+ err_msg("Failed to set stop us\n");
+ goto out_err;
+ }
+ }
+
+ if (params->stop_total_us) {
+ retval = osnoise_set_stop_total_us(top->context, params->stop_total_us);
+ if (retval) {
+ err_msg("Failed to set stop total us\n");
+ goto out_err;
+ }
+ }
+
+
+ if (params->timerlat_period_us) {
+ retval = osnoise_set_timerlat_period_us(top->context, params->timerlat_period_us);
+ if (retval) {
+ err_msg("Failed to set timerlat period\n");
+ goto out_err;
+ }
+ }
+
+
+ if (params->print_stack) {
+ retval = osnoise_set_print_stack(top->context, params->print_stack);
+ if (retval) {
+ err_msg("Failed to set print stack\n");
+ goto out_err;
+ }
+ }
+
+ return 0;
+
+out_err:
+ return -1;
+}
+
+/*
+ * timerlat_init_top - initialize a timerlat top tool with parameters
+ */
+static struct osnoise_tool
+*timerlat_init_top(struct timerlat_top_params *params)
+{
+ struct osnoise_tool *top;
+ int nr_cpus;
+
+ nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+
+ top = osnoise_init_tool("timerlat_top");
+ if (!top)
+ return NULL;
+
+ top->data = timerlat_alloc_top(nr_cpus);
+ if (!top->data)
+ goto out_err;
+
+ top->params = params;
+
+ tep_register_event_handler(top->trace.tep, -1, "ftrace", "timerlat",
+ timerlat_top_handler, top);
+
+ return top;
+
+out_err:
+ osnoise_destroy_tool(top);
+ return NULL;
+}
+
+static int stop_tracing;
+static void stop_top(int sig)
+{
+ stop_tracing = 1;
+}
+
+/*
+ * timerlat_top_set_signals - handles the signal to stop the tool
+ */
+static void
+timerlat_top_set_signals(struct timerlat_top_params *params)
+{
+ signal(SIGINT, stop_top);
+ if (params->duration) {
+ signal(SIGALRM, stop_top);
+ alarm(params->duration);
+ }
+}
+
+int timerlat_top_main(int argc, char *argv[])
+{
+ struct timerlat_top_params *params;
+ struct trace_instance *trace;
+ struct osnoise_tool *record;
+ struct osnoise_tool *top;
+ int return_value = 1;
+ int retval;
+
+ params = timerlat_top_parse_args(argc, argv);
+ if (!params)
+ exit(1);
+
+ top = timerlat_init_top(params);
+ if (!top) {
+ err_msg("Could not init osnoise top\n");
+ goto out_exit;
+ }
+
+ retval = timerlat_top_apply_config(top, params);
+ if (retval) {
+ err_msg("Could not apply config\n");
+ goto out_top;
+ }
+
+ trace = &top->trace;
+
+ retval = enable_timerlat(trace);
+ if (retval) {
+ err_msg("Failed to enable timerlat tracer\n");
+ goto out_top;
+ }
+
+ if (params->set_sched) {
+ retval = set_comm_sched_attr("timerlat/", &params->sched_param);
+ if (retval) {
+ err_msg("Failed to set sched parameters\n");
+ goto out_top;
+ }
+ }
+
+ trace_instance_start(trace);
+
+ if (params->trace_output) {
+ record = osnoise_init_trace_tool("timerlat");
+ if (!record) {
+ err_msg("Failed to enable the trace instance\n");
+ goto out_top;
+ }
+ trace_instance_start(&record->trace);
+ }
+
+ top->start_time = time(NULL);
+ timerlat_top_set_signals(params);
+
+ while (!stop_tracing) {
+ sleep(params->sleep_time);
+
+ retval = tracefs_iterate_raw_events(trace->tep,
+ trace->inst,
+ NULL,
+ 0,
+ collect_registered_events,
+ trace);
+ if (retval < 0) {
+ err_msg("Error iterating on events\n");
+ goto out_top;
+ }
+
+ if (!params->quiet)
+ timerlat_print_stats(params, top);
+
+ if (!tracefs_trace_is_on(trace->inst))
+ break;
+
+ };
+
+ timerlat_print_stats(params, top);
+
+ return_value = 0;
+
+ if (!tracefs_trace_is_on(trace->inst)) {
+ printf("rtla timelat hit stop tracing\n");
+ if (params->trace_output) {
+ printf(" Saving trace to %s\n", params->trace_output);
+ save_trace_to_file(record->trace.inst, params->trace_output);
+ }
+ }
+
+out_top:
+ timerlat_free_top(top->data);
+ osnoise_destroy_tool(top);
+ if (params->trace_output)
+ osnoise_destroy_tool(record);
+ free(params);
+out_exit:
+ exit(return_value);
+}
diff --git a/tools/tracing/rtla/src/trace.c b/tools/tracing/rtla/src/trace.c
new file mode 100644
index 000000000000..107a0c6387f7
--- /dev/null
+++ b/tools/tracing/rtla/src/trace.c
@@ -0,0 +1,192 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <sys/sendfile.h>
+#include <tracefs.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include "trace.h"
+#include "utils.h"
+
+/*
+ * enable_tracer_by_name - enable a tracer on the given instance
+ */
+int enable_tracer_by_name(struct tracefs_instance *inst, const char *tracer_name)
+{
+ enum tracefs_tracers tracer;
+ int retval;
+
+ tracer = TRACEFS_TRACER_CUSTOM;
+
+ debug_msg("enabling %s tracer\n", tracer_name);
+
+ retval = tracefs_tracer_set(inst, tracer, tracer_name);
+ if (retval < 0) {
+ if (errno == ENODEV)
+ err_msg("tracer %s not found!\n", tracer_name);
+
+ err_msg("failed to enable the tracer %s\n", tracer_name);
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * disable_tracer - set nop tracer to the insta
+ */
+void disable_tracer(struct tracefs_instance *inst)
+{
+ enum tracefs_tracers t = TRACEFS_TRACER_NOP;
+ int retval;
+
+ retval = tracefs_tracer_set(inst, t);
+ if (retval < 0)
+ err_msg("oops, error disabling tracer\n");
+}
+
+/*
+ * create_instance - create a trace instance with *instance_name
+ */
+struct tracefs_instance *create_instance(char *instance_name)
+{
+ return tracefs_instance_create(instance_name);
+}
+
+/*
+ * destroy_instance - remove a trace instance and free the data
+ */
+void destroy_instance(struct tracefs_instance *inst)
+{
+ tracefs_instance_destroy(inst);
+ tracefs_instance_free(inst);
+}
+
+/*
+ * save_trace_to_file - save the trace output of the instance to the file
+ */
+int save_trace_to_file(struct tracefs_instance *inst, const char *filename)
+{
+ const char *file = "trace";
+ mode_t mode = 0644;
+ char buffer[4096];
+ int out_fd, in_fd;
+ int retval = -1;
+
+ in_fd = tracefs_instance_file_open(inst, file, O_RDONLY);
+ if (in_fd < 0) {
+ err_msg("Failed to open trace file\n");
+ return -1;
+ }
+
+ out_fd = creat(filename, mode);
+ if (out_fd < 0) {
+ err_msg("Failed to create output file %s\n", filename);
+ goto out_close_in;
+ }
+
+ do {
+ retval = read(in_fd, buffer, sizeof(buffer));
+ if (retval <= 0)
+ goto out_close;
+
+ retval = write(out_fd, buffer, retval);
+ if (retval < 0)
+ goto out_close;
+ } while (retval > 0);
+
+ retval = 0;
+out_close:
+ close(out_fd);
+out_close_in:
+ close(in_fd);
+ return retval;
+}
+
+/*
+ * collect_registered_events - call the existing callback function for the event
+ *
+ * If an event has a registered callback function, call it.
+ * Otherwise, ignore the event.
+ */
+int
+collect_registered_events(struct tep_event *event, struct tep_record *record,
+ int cpu, void *context)
+{
+ struct trace_instance *trace = context;
+ struct trace_seq *s = trace->seq;
+
+ if (!event->handler)
+ return 0;
+
+ event->handler(s, record, event, context);
+
+ return 0;
+}
+
+/*
+ * trace_instance_destroy - destroy and free a rtla trace instance
+ */
+void trace_instance_destroy(struct trace_instance *trace)
+{
+ if (trace->inst) {
+ disable_tracer(trace->inst);
+ destroy_instance(trace->inst);
+ }
+
+ if (trace->seq)
+ free(trace->seq);
+
+ if (trace->tep)
+ tep_free(trace->tep);
+}
+
+/*
+ * trace_instance_init - create an rtla trace instance
+ *
+ * It is more than the tracefs instance, as it contains other
+ * things required for the tracing, such as the local events and
+ * a seq file.
+ *
+ * Note that the trace instance is returned disabled. This allows
+ * the tool to apply some other configs, like setting priority
+ * to the kernel threads, before starting generating trace entries.
+ */
+int trace_instance_init(struct trace_instance *trace, char *tool_name)
+{
+ trace->seq = calloc(1, sizeof(*trace->seq));
+ if (!trace->seq)
+ goto out_err;
+
+ trace_seq_init(trace->seq);
+
+ trace->inst = create_instance(tool_name);
+ if (!trace->inst)
+ goto out_err;
+
+ trace->tep = tracefs_local_events(NULL);
+ if (!trace->tep)
+ goto out_err;
+
+ /*
+ * Let the main enable the record after setting some other
+ * things such as the priority of the tracer's threads.
+ */
+ tracefs_trace_off(trace->inst);
+
+ return 0;
+
+out_err:
+ trace_instance_destroy(trace);
+ return 1;
+}
+
+/*
+ * trace_instance_start - start tracing a given rtla instance
+ */
+int trace_instance_start(struct trace_instance *trace)
+{
+ return tracefs_trace_on(trace->inst);
+}
diff --git a/tools/tracing/rtla/src/trace.h b/tools/tracing/rtla/src/trace.h
new file mode 100644
index 000000000000..0ea1df0ad9a7
--- /dev/null
+++ b/tools/tracing/rtla/src/trace.h
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <tracefs.h>
+#include <stddef.h>
+
+struct trace_instance {
+ struct tracefs_instance *inst;
+ struct tep_handle *tep;
+ struct trace_seq *seq;
+};
+
+int trace_instance_init(struct trace_instance *trace, char *tool_name);
+int trace_instance_start(struct trace_instance *trace);
+void trace_instance_destroy(struct trace_instance *trace);
+
+struct trace_seq *get_trace_seq(void);
+int enable_tracer_by_name(struct tracefs_instance *inst, const char *tracer_name);
+void disable_tracer(struct tracefs_instance *inst);
+
+int enable_osnoise(struct trace_instance *trace);
+int enable_timerlat(struct trace_instance *trace);
+
+struct tracefs_instance *create_instance(char *instance_name);
+void destroy_instance(struct tracefs_instance *inst);
+
+int save_trace_to_file(struct tracefs_instance *inst, const char *filename);
+int collect_registered_events(struct tep_event *tep, struct tep_record *record,
+ int cpu, void *context);
diff --git a/tools/tracing/rtla/src/utils.c b/tools/tracing/rtla/src/utils.c
new file mode 100644
index 000000000000..1c9f0eea6166
--- /dev/null
+++ b/tools/tracing/rtla/src/utils.c
@@ -0,0 +1,433 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
+ */
+
+#include <proc/readproc.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <errno.h>
+#include <sched.h>
+#include <stdio.h>
+
+#include "utils.h"
+
+#define MAX_MSG_LENGTH 1024
+int config_debug;
+
+/*
+ * err_msg - print an error message to the stderr
+ */
+void err_msg(const char *fmt, ...)
+{
+ char message[MAX_MSG_LENGTH];
+ va_list ap;
+
+ va_start(ap, fmt);
+ vsnprintf(message, sizeof(message), fmt, ap);
+ va_end(ap);
+
+ fprintf(stderr, "%s", message);
+}
+
+/*
+ * debug_msg - print a debug message to stderr if debug is set
+ */
+void debug_msg(const char *fmt, ...)
+{
+ char message[MAX_MSG_LENGTH];
+ va_list ap;
+
+ if (!config_debug)
+ return;
+
+ va_start(ap, fmt);
+ vsnprintf(message, sizeof(message), fmt, ap);
+ va_end(ap);
+
+ fprintf(stderr, "%s", message);
+}
+
+/*
+ * get_llong_from_str - get a long long int from a string
+ */
+long long get_llong_from_str(char *start)
+{
+ long long value;
+ char *end;
+
+ errno = 0;
+ value = strtoll(start, &end, 10);
+ if (errno || start == end)
+ return -1;
+
+ return value;
+}
+
+/*
+ * get_duration - fill output with a human readable duration since start_time
+ */
+void get_duration(time_t start_time, char *output, int output_size)
+{
+ time_t now = time(NULL);
+ struct tm *tm_info;
+ time_t duration;
+
+ duration = difftime(now, start_time);
+ tm_info = localtime(&duration);
+
+ snprintf(output, output_size, "%3d %02d:%02d:%02d",
+ tm_info->tm_yday,
+ tm_info->tm_hour - 1,
+ tm_info->tm_min,
+ tm_info->tm_sec);
+}
+
+/*
+ * parse_cpu_list - parse a cpu_list filling a char vector with cpus set
+ *
+ * Receives a cpu list, like 1-3,5 (cpus 1, 2, 3, 5), and then set the char
+ * in the monitored_cpus.
+ *
+ * XXX: convert to a bitmask.
+ */
+int parse_cpu_list(char *cpu_list, char **monitored_cpus)
+{
+ char *mon_cpus;
+ const char *p;
+ int end_cpu;
+ int nr_cpus;
+ int cpu;
+ int i;
+
+ nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+
+ mon_cpus = malloc(nr_cpus * sizeof(char));
+ memset(mon_cpus, 0, (nr_cpus * sizeof(char)));
+
+ for (p = cpu_list; *p; ) {
+ cpu = atoi(p);
+ if (cpu < 0 || (!cpu && *p != '0') || cpu >= nr_cpus)
+ goto err;
+
+ while (isdigit(*p))
+ p++;
+ if (*p == '-') {
+ p++;
+ end_cpu = atoi(p);
+ if (end_cpu < cpu || (!end_cpu && *p != '0') || end_cpu >= nr_cpus)
+ goto err;
+ while (isdigit(*p))
+ p++;
+ } else
+ end_cpu = cpu;
+
+ if (cpu == end_cpu) {
+ debug_msg("cpu_list: adding cpu %d\n", cpu);
+ mon_cpus[cpu] = 1;
+ } else {
+ for (i = cpu; i <= end_cpu; i++) {
+ debug_msg("cpu_list: adding cpu %d\n", i);
+ mon_cpus[i] = 1;
+ }
+ }
+
+ if (*p == ',')
+ p++;
+ }
+
+ *monitored_cpus = mon_cpus;
+
+ return 0;
+
+err:
+ debug_msg("Error parsing the cpu list %s", cpu_list);
+ return 1;
+}
+
+/*
+ * parse_duration - parse duration with s/m/h/d suffix converting it to seconds
+ */
+long parse_seconds_duration(char *val)
+{
+ char *end;
+ long t;
+
+ t = strtol(val, &end, 10);
+
+ if (end) {
+ switch (*end) {
+ case 's':
+ case 'S':
+ break;
+ case 'm':
+ case 'M':
+ t *= 60;
+ break;
+ case 'h':
+ case 'H':
+ t *= 60 * 60;
+ break;
+
+ case 'd':
+ case 'D':
+ t *= 24 * 60 * 60;
+ break;
+ }
+ }
+
+ return t;
+}
+
+/*
+ * parse_ns_duration - parse duration with ns/us/ms/s converting it to nanoseconds
+ */
+long parse_ns_duration(char *val)
+{
+ char *end;
+ long t;
+
+ t = strtol(val, &end, 10);
+
+ if (end) {
+ if (!strncmp(end, "ns", 2)) {
+ return t;
+ } else if (!strncmp(end, "us", 2)) {
+ t *= 1000;
+ return t;
+ } else if (!strncmp(end, "ms", 2)) {
+ t *= 1000 * 1000;
+ return t;
+ } else if (!strncmp(end, "s", 1)) {
+ t *= 1000 * 1000 * 1000;
+ return t;
+ }
+ return -1;
+ }
+
+ return t;
+}
+
+/*
+ * This is a set of helper functions to use SCHED_DEADLINE.
+ */
+#ifdef __x86_64__
+# define __NR_sched_setattr 314
+# define __NR_sched_getattr 315
+#elif __i386__
+# define __NR_sched_setattr 351
+# define __NR_sched_getattr 352
+#elif __arm__
+# define __NR_sched_setattr 380
+# define __NR_sched_getattr 381
+#elif __aarch64__
+# define __NR_sched_setattr 274
+# define __NR_sched_getattr 275
+#elif __powerpc__
+# define __NR_sched_setattr 355
+# define __NR_sched_getattr 356
+#elif __s390x__
+# define __NR_sched_setattr 345
+# define __NR_sched_getattr 346
+#endif
+
+#define SCHED_DEADLINE 6
+
+static inline int sched_setattr(pid_t pid, const struct sched_attr *attr,
+ unsigned int flags) {
+ return syscall(__NR_sched_setattr, pid, attr, flags);
+}
+
+static inline int sched_getattr(pid_t pid, struct sched_attr *attr,
+ unsigned int size, unsigned int flags)
+{
+ return syscall(__NR_sched_getattr, pid, attr, size, flags);
+}
+
+int __set_sched_attr(int pid, struct sched_attr *attr)
+{
+ int flags = 0;
+ int retval;
+
+ retval = sched_setattr(pid, attr, flags);
+ if (retval < 0) {
+ err_msg("boost_with_deadline failed to boost pid %d: %s\n",
+ pid, strerror(errno));
+ return 1;
+ }
+
+ return 0;
+}
+/*
+ * set_comm_sched_attr - set sched params to threads starting with char *comm
+ *
+ * This function uses procps to list the currently running threads and then
+ * set the sched_attr *attr to the threads that start with char *comm. It is
+ * mainly used to set the priority to the kernel threads created by the
+ * tracers.
+ */
+int set_comm_sched_attr(const char *comm, struct sched_attr *attr)
+{
+ int flags = PROC_FILLCOM | PROC_FILLSTAT;
+ PROCTAB *ptp;
+ proc_t task;
+ int retval;
+
+ ptp = openproc(flags);
+ if (!ptp) {
+ err_msg("error openproc()\n");
+ return -ENOENT;
+ }
+
+ memset(&task, 0, sizeof(task));
+
+ while (readproc(ptp, &task)) {
+ retval = strncmp(comm, task.cmd, strlen(comm));
+ if (retval)
+ continue;
+ retval = __set_sched_attr(task.tid, attr);
+ if (retval)
+ goto out_err;
+ }
+
+ closeproc(ptp);
+ return 0;
+
+out_err:
+ closeproc(ptp);
+ return 1;
+}
+
+#define INVALID_VAL (~0L)
+static long get_long_ns_after_colon(char *start)
+{
+ long val = INVALID_VAL;
+
+ /* find the ":" */
+ start = strstr(start, ":");
+ if (!start)
+ return -1;
+
+ /* skip ":" */
+ start++;
+ val = parse_ns_duration(start);
+
+ return val;
+}
+
+static long get_long_after_colon(char *start)
+{
+ long val = INVALID_VAL;
+
+ /* find the ":" */
+ start = strstr(start, ":");
+ if (!start)
+ return -1;
+
+ /* skip ":" */
+ start++;
+ val = get_llong_from_str(start);
+
+ return val;
+}
+
+/*
+ * parse priority in the format:
+ * SCHED_OTHER:
+ * o:<prio>
+ * O:<prio>
+ * SCHED_RR:
+ * r:<prio>
+ * R:<prio>
+ * SCHED_FIFO:
+ * f:<prio>
+ * F:<prio>
+ * SCHED_DEADLINE:
+ * d:runtime:period
+ * D:runtime:period
+ */
+int parse_prio(char *arg, struct sched_attr *sched_param)
+{
+ long prio;
+ long runtime;
+ long period;
+
+ memset(sched_param, 0, sizeof(*sched_param));
+ sched_param->size = sizeof(*sched_param);
+
+ switch (arg[0]) {
+ case 'd':
+ case 'D':
+ /* d:runtime:period */
+ if (strlen(arg) < 4)
+ return -1;
+
+ runtime = get_long_ns_after_colon(arg);
+ if (runtime == INVALID_VAL)
+ return -1;
+
+ period = get_long_ns_after_colon(&arg[2]);
+ if (period == INVALID_VAL)
+ return -1;
+
+ if (runtime > period)
+ return -1;
+
+ sched_param->sched_policy = SCHED_DEADLINE;
+ sched_param->sched_runtime = runtime;
+ sched_param->sched_deadline = period;
+ sched_param->sched_period = period;
+ break;
+ case 'f':
+ case 'F':
+ /* f:prio */
+ prio = get_long_after_colon(arg);
+ if (prio == INVALID_VAL)
+ return -1;
+
+ if (prio < sched_get_priority_min(SCHED_FIFO))
+ return -1;
+ if (prio > sched_get_priority_max(SCHED_FIFO))
+ return -1;
+
+ sched_param->sched_policy = SCHED_FIFO;
+ sched_param->sched_priority = prio;
+ break;
+ case 'r':
+ case 'R':
+ /* r:prio */
+ prio = get_long_after_colon(arg);
+ if (prio == INVALID_VAL)
+ return -1;
+
+ if (prio < sched_get_priority_min(SCHED_RR))
+ return -1;
+ if (prio > sched_get_priority_max(SCHED_RR))
+ return -1;
+
+ sched_param->sched_policy = SCHED_RR;
+ sched_param->sched_priority = prio;
+ break;
+ case 'o':
+ case 'O':
+ /* o:prio */
+ prio = get_long_after_colon(arg);
+ if (prio == INVALID_VAL)
+ return -1;
+
+ if (prio < sched_get_priority_min(SCHED_OTHER))
+ return -1;
+ if (prio > sched_get_priority_max(SCHED_OTHER))
+ return -1;
+
+ sched_param->sched_policy = SCHED_OTHER;
+ sched_param->sched_priority = prio;
+ break;
+ default:
+ return -1;
+ }
+ return 0;
+}
diff --git a/tools/tracing/rtla/src/utils.h b/tools/tracing/rtla/src/utils.h
new file mode 100644
index 000000000000..9aa962319ca2
--- /dev/null
+++ b/tools/tracing/rtla/src/utils.h
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdint.h>
+#include <time.h>
+
+/*
+ * '18446744073709551615\0'
+ */
+#define BUFF_U64_STR_SIZE 24
+
+#define container_of(ptr, type, member)({ \
+ const typeof(((type *)0)->member) *__mptr = (ptr); \
+ (type *)((char *)__mptr - offsetof(type, member)) ; })
+
+extern int config_debug;
+void debug_msg(const char *fmt, ...);
+void err_msg(const char *fmt, ...);
+
+long parse_seconds_duration(char *val);
+void get_duration(time_t start_time, char *output, int output_size);
+
+int parse_cpu_list(char *cpu_list, char **monitored_cpus);
+long long get_llong_from_str(char *start);
+
+static inline void
+update_min(unsigned long long *a, unsigned long long *b)
+{
+ if (*a > *b)
+ *a = *b;
+}
+
+static inline void
+update_max(unsigned long long *a, unsigned long long *b)
+{
+ if (*a < *b)
+ *a = *b;
+}
+
+static inline void
+update_sum(unsigned long long *a, unsigned long long *b)
+{
+ *a += *b;
+}
+
+struct sched_attr {
+ uint32_t size;
+ uint32_t sched_policy;
+ uint64_t sched_flags;
+ int32_t sched_nice;
+ uint32_t sched_priority;
+ uint64_t sched_runtime;
+ uint64_t sched_deadline;
+ uint64_t sched_period;
+};
+
+int parse_prio(char *arg, struct sched_attr *sched_param);
+int set_comm_sched_attr(const char *comm, struct sched_attr *attr);