aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/Makefile14
-rw-r--r--tools/arch/powerpc/include/uapi/asm/kvm.h2
-rw-r--r--tools/arch/powerpc/include/uapi/asm/perf_regs.h28
-rw-r--r--tools/arch/s390/include/uapi/asm/ptrace.h5
-rw-r--r--tools/arch/x86/include/asm/cpufeatures.h19
-rw-r--r--tools/arch/x86/include/asm/insn.h45
-rw-r--r--tools/arch/x86/include/asm/orc_types.h10
-rw-r--r--tools/arch/x86/include/uapi/asm/kvm.h1
-rw-r--r--tools/arch/x86/include/uapi/asm/vmx.h4
-rw-r--r--tools/arch/x86/lib/insn.c119
-rw-r--r--tools/bpf/Makefile.helpers60
-rw-r--r--tools/bpf/bpf_dbg.c2
-rw-r--r--tools/bpf/bpf_exp.y14
-rw-r--r--tools/bpf/bpftool/.gitignore1
-rw-r--r--tools/bpf/bpftool/Documentation/Makefile11
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-gen.rst78
-rw-r--r--tools/bpf/bpftool/Makefile2
-rw-r--r--tools/bpf/bpftool/bash-completion/bpftool17
-rw-r--r--tools/bpf/bpftool/btf.c11
-rw-r--r--tools/bpf/bpftool/btf_dumper.c1
-rw-r--r--tools/bpf/bpftool/common.c1
-rw-r--r--tools/bpf/bpftool/feature.c4
-rw-r--r--tools/bpf/bpftool/gen.c72
-rw-r--r--tools/bpf/bpftool/main.c3
-rw-r--r--tools/bpf/bpftool/map.c2
-rw-r--r--tools/bpf/bpftool/prog.c1
-rw-r--r--tools/bpf/bpftool/xlated_dumper.c3
-rw-r--r--tools/bpf/resolve_btfids/main.c16
-rw-r--r--tools/bpf/runqslower/Makefile9
-rw-r--r--tools/bpf/runqslower/runqslower.bpf.c33
-rw-r--r--tools/build/Makefile8
-rw-r--r--tools/build/Makefile.feature4
-rw-r--r--tools/build/feature/test-libopencsd.c4
-rw-r--r--tools/gpio/gpio-utils.c89
-rw-r--r--tools/gpio/gpio-utils.h6
-rw-r--r--tools/include/linux/coresight-pmu.h20
-rw-r--r--tools/include/linux/export.h3
-rw-r--r--tools/include/linux/objtool.h13
-rw-r--r--tools/include/uapi/asm-generic/unistd.h4
-rw-r--r--tools/include/uapi/drm/drm.h97
-rw-r--r--tools/include/uapi/drm/i915_drm.h3
-rw-r--r--tools/include/uapi/linux/bpf.h768
-rw-r--r--tools/include/uapi/linux/btf.h5
-rw-r--r--tools/include/uapi/linux/kvm.h74
-rw-r--r--tools/include/uapi/linux/mount.h16
-rw-r--r--tools/include/uapi/linux/openat2.h4
-rw-r--r--tools/include/uapi/linux/perf_event.h96
-rw-r--r--tools/include/uapi/linux/prctl.h3
-rw-r--r--tools/lib/api/fs/cgroup.c95
-rw-r--r--tools/lib/bpf/Build2
-rw-r--r--tools/lib/bpf/Makefile5
-rw-r--r--tools/lib/bpf/bpf_helpers.h21
-rw-r--r--tools/lib/bpf/btf.c763
-rw-r--r--tools/lib/bpf/btf.h9
-rw-r--r--tools/lib/bpf/btf_dump.c14
-rw-r--r--tools/lib/bpf/libbpf.c520
-rw-r--r--tools/lib/bpf/libbpf.h19
-rw-r--r--tools/lib/bpf/libbpf.map11
-rw-r--r--tools/lib/bpf/libbpf_internal.h40
-rw-r--r--tools/lib/bpf/libbpf_util.h47
-rw-r--r--tools/lib/bpf/linker.c1963
-rw-r--r--tools/lib/bpf/netlink.c2
-rw-r--r--tools/lib/bpf/strset.c176
-rw-r--r--tools/lib/bpf/strset.h21
-rw-r--r--tools/lib/bpf/xsk.c263
-rw-r--r--tools/lib/bpf/xsk.h87
-rw-r--r--tools/lib/perf/evlist.c13
-rw-r--r--tools/lib/perf/include/internal/evlist.h2
-rw-r--r--tools/lib/perf/include/perf/event.h18
-rw-r--r--tools/objtool/.gitignore2
-rw-r--r--tools/objtool/Documentation/stack-validation.txt16
-rw-r--r--tools/objtool/Makefile5
-rw-r--r--tools/objtool/arch/x86/decode.c54
-rw-r--r--tools/objtool/arch/x86/include/arch/cfi_regs.h (renamed from tools/objtool/arch/x86/include/cfi_regs.h)0
-rw-r--r--tools/objtool/arch/x86/include/arch/elf.h (renamed from tools/objtool/arch/x86/include/arch_elf.h)0
-rw-r--r--tools/objtool/arch/x86/include/arch/endianness.h9
-rw-r--r--tools/objtool/arch/x86/include/arch/special.h (renamed from tools/objtool/arch/x86/include/arch_special.h)0
-rw-r--r--tools/objtool/arch/x86/special.c6
-rw-r--r--tools/objtool/builtin-check.c14
-rw-r--r--tools/objtool/builtin-orc.c10
-rw-r--r--tools/objtool/check.c516
-rw-r--r--tools/objtool/elf.c40
-rw-r--r--tools/objtool/include/objtool/arch.h (renamed from tools/objtool/arch.h)8
-rw-r--r--tools/objtool/include/objtool/builtin.h (renamed from tools/objtool/builtin.h)2
-rw-r--r--tools/objtool/include/objtool/cfi.h (renamed from tools/objtool/cfi.h)2
-rw-r--r--tools/objtool/include/objtool/check.h (renamed from tools/objtool/check.h)39
-rw-r--r--tools/objtool/include/objtool/elf.h (renamed from tools/objtool/elf.h)0
-rw-r--r--tools/objtool/include/objtool/endianness.h38
-rw-r--r--tools/objtool/include/objtool/objtool.h (renamed from tools/objtool/objtool.h)6
-rw-r--r--tools/objtool/include/objtool/special.h (renamed from tools/objtool/special.h)4
-rw-r--r--tools/objtool/include/objtool/warn.h (renamed from tools/objtool/warn.h)2
-rw-r--r--tools/objtool/objtool.c7
-rw-r--r--tools/objtool/orc_dump.c11
-rw-r--r--tools/objtool/orc_gen.c315
-rw-r--r--tools/objtool/special.c14
-rw-r--r--tools/objtool/weak.c9
-rw-r--r--tools/perf/Build1
-rw-r--r--tools/perf/Documentation/examples.txt2
-rw-r--r--tools/perf/Documentation/itrace.txt2
-rw-r--r--tools/perf/Documentation/perf-buildid-cache.txt6
-rw-r--r--tools/perf/Documentation/perf-config.txt24
-rw-r--r--tools/perf/Documentation/perf-daemon.txt208
-rw-r--r--tools/perf/Documentation/perf-evlist.txt2
-rw-r--r--tools/perf/Documentation/perf-ftrace.txt4
-rw-r--r--tools/perf/Documentation/perf-intel-pt.txt89
-rw-r--r--tools/perf/Documentation/perf-kallsyms.txt2
-rw-r--r--tools/perf/Documentation/perf-mem.txt3
-rw-r--r--tools/perf/Documentation/perf-record.txt21
-rw-r--r--tools/perf/Documentation/perf-report.txt10
-rw-r--r--tools/perf/Documentation/perf-script.txt25
-rw-r--r--tools/perf/Documentation/perf-stat.txt32
-rw-r--r--tools/perf/Documentation/perf-trace.txt4
-rw-r--r--tools/perf/Documentation/topdown.txt78
-rw-r--r--tools/perf/MANIFEST2
-rw-r--r--tools/perf/Makefile.config9
-rw-r--r--tools/perf/Makefile.perf59
-rw-r--r--tools/perf/arch/arm/include/perf_regs.h2
-rw-r--r--tools/perf/arch/arm/util/cs-etm.c12
-rw-r--r--tools/perf/arch/arm64/include/perf_regs.h2
-rw-r--r--tools/perf/arch/arm64/util/machine.c3
-rw-r--r--tools/perf/arch/arm64/util/perf_regs.c94
-rw-r--r--tools/perf/arch/csky/include/perf_regs.h2
-rw-r--r--tools/perf/arch/powerpc/entry/syscalls/syscall.tbl21
-rw-r--r--tools/perf/arch/powerpc/include/perf_regs.h8
-rw-r--r--tools/perf/arch/powerpc/util/Build1
-rw-r--r--tools/perf/arch/powerpc/util/machine.c25
-rw-r--r--tools/perf/arch/powerpc/util/perf_regs.c6
-rw-r--r--tools/perf/arch/riscv/include/perf_regs.h2
-rw-r--r--tools/perf/arch/s390/entry/syscalls/syscall.tbl1
-rw-r--r--tools/perf/arch/s390/include/perf_regs.h2
-rw-r--r--tools/perf/arch/s390/util/machine.c3
-rw-r--r--tools/perf/arch/x86/Makefile11
-rw-r--r--tools/perf/arch/x86/entry/syscalls/syscall_64.tbl1
-rw-r--r--tools/perf/arch/x86/include/arch-tests.h1
-rw-r--r--tools/perf/arch/x86/include/perf_regs.h2
-rw-r--r--tools/perf/arch/x86/tests/Build1
-rw-r--r--tools/perf/arch/x86/tests/arch-tests.c4
-rw-r--r--tools/perf/arch/x86/tests/insn-x86.c3
-rw-r--r--tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c4
-rw-r--r--tools/perf/arch/x86/tests/sample-parsing.c121
-rw-r--r--tools/perf/arch/x86/util/Build3
-rw-r--r--tools/perf/arch/x86/util/archinsn.c2
-rw-r--r--tools/perf/arch/x86/util/event.c25
-rw-r--r--tools/perf/arch/x86/util/evlist.c15
-rw-r--r--tools/perf/arch/x86/util/evsel.c8
-rw-r--r--tools/perf/arch/x86/util/mem-events.c44
-rw-r--r--tools/perf/bench/epoll-ctl.c1
-rw-r--r--tools/perf/bench/epoll-wait.c1
-rw-r--r--tools/perf/bench/futex-hash.c1
-rw-r--r--tools/perf/bench/futex-lock-pi.c1
-rw-r--r--tools/perf/bench/futex-requeue.c1
-rw-r--r--tools/perf/bench/futex-wake-parallel.c1
-rw-r--r--tools/perf/bench/futex-wake.c1
-rw-r--r--tools/perf/bench/numa.c42
-rw-r--r--tools/perf/bench/sched-messaging.c4
-rw-r--r--tools/perf/bench/sched-pipe.c4
-rw-r--r--tools/perf/bench/syscall.c4
-rw-r--r--tools/perf/builtin-buildid-cache.c28
-rw-r--r--tools/perf/builtin-buildid-list.c3
-rw-r--r--tools/perf/builtin-c2c.c171
-rw-r--r--tools/perf/builtin-daemon.c1521
-rw-r--r--tools/perf/builtin-diff.c3
-rw-r--r--tools/perf/builtin-inject.c4
-rw-r--r--tools/perf/builtin-mem.c113
-rw-r--r--tools/perf/builtin-record.c39
-rw-r--r--tools/perf/builtin-script.c37
-rw-r--r--tools/perf/builtin-stat.c124
-rw-r--r--tools/perf/builtin-trace.c5
-rw-r--r--tools/perf/builtin.h1
-rw-r--r--tools/perf/command-list.txt1
-rw-r--r--tools/perf/perf-archive.sh3
-rw-r--r--tools/perf/perf.c1
-rw-r--r--tools/perf/pmu-events/arch/arm64/ampere/emag/branch.json8
-rw-r--r--tools/perf/pmu-events/arch/arm64/ampere/emag/bus.json5
-rw-r--r--tools/perf/pmu-events/arch/arm64/ampere/emag/cache.json58
-rw-r--r--tools/perf/pmu-events/arch/arm64/ampere/emag/clock.json4
-rw-r--r--tools/perf/pmu-events/arch/arm64/ampere/emag/exception.json10
-rw-r--r--tools/perf/pmu-events/arch/arm64/ampere/emag/instruction.json34
-rw-r--r--tools/perf/pmu-events/arch/arm64/ampere/emag/memory.json11
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/branch.json12
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/bus.json19
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/cache.json118
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/exception.json10
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/instruction.json45
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/memory.json6
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/other.json4
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/pipeline.json12
-rw-r--r--tools/perf/pmu-events/arch/arm64/armv8-common-and-microarch.json248
-rw-r--r--tools/perf/pmu-events/arch/arm64/freescale/imx8mm/sys/metrics.json4
-rw-r--r--tools/perf/pmu-events/arch/arm64/freescale/imx8mn/sys/ddrc.json37
-rw-r--r--tools/perf/pmu-events/arch/arm64/freescale/imx8mn/sys/metrics.json18
-rw-r--r--tools/perf/pmu-events/arch/arm64/freescale/imx8mp/sys/ddrc.json37
-rw-r--r--tools/perf/pmu-events/arch/arm64/freescale/imx8mp/sys/metrics.json466
-rw-r--r--tools/perf/pmu-events/arch/arm64/freescale/imx8mq/sys/ddrc.json37
-rw-r--r--tools/perf/pmu-events/arch/arm64/freescale/imx8mq/sys/metrics.json18
-rw-r--r--tools/perf/tests/Build1
-rw-r--r--tools/perf/tests/attr.c8
-rw-r--r--tools/perf/tests/builtin-test.c4
-rw-r--r--tools/perf/tests/code-reading.c20
-rw-r--r--tools/perf/tests/cpumap.c2
-rw-r--r--tools/perf/tests/demangle-ocaml-test.c43
-rw-r--r--tools/perf/tests/keep-tracking.c5
-rw-r--r--tools/perf/tests/mmap-basic.c2
-rw-r--r--tools/perf/tests/openat-syscall-all-cpus.c1
-rw-r--r--tools/perf/tests/parse-metric.c24
-rw-r--r--tools/perf/tests/perf-time-to-tsc.c2
-rw-r--r--tools/perf/tests/sample-parsing.c16
-rwxr-xr-xtools/perf/tests/shell/buildid.sh6
-rwxr-xr-xtools/perf/tests/shell/daemon.sh475
-rwxr-xr-xtools/perf/tests/shell/test_arm_coresight.sh45
-rw-r--r--tools/perf/tests/sw-clock.c12
-rw-r--r--tools/perf/tests/switch-tracking.c5
-rw-r--r--tools/perf/tests/task-exit.c10
-rw-r--r--tools/perf/tests/tests.h1
-rw-r--r--tools/perf/tests/thread-map.c8
-rw-r--r--tools/perf/ui/browsers/annotate.c2
-rw-r--r--tools/perf/util/Build2
-rw-r--r--tools/perf/util/annotate.c8
-rw-r--r--tools/perf/util/annotate.h1
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-decoder.c10
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-decoder.h8
-rw-r--r--tools/perf/util/arm-spe.c133
-rw-r--r--tools/perf/util/auxtrace.c15
-rw-r--r--tools/perf/util/auxtrace.h6
-rw-r--r--tools/perf/util/bpf_counter.c314
-rw-r--r--tools/perf/util/bpf_counter.h72
-rw-r--r--tools/perf/util/bpf_skel/.gitignore3
-rw-r--r--tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c93
-rw-r--r--tools/perf/util/build-id.c5
-rw-r--r--tools/perf/util/build-id.h4
-rw-r--r--tools/perf/util/cgroup.c8
-rw-r--r--tools/perf/util/config.c123
-rw-r--r--tools/perf/util/config.h7
-rw-r--r--tools/perf/util/cs-etm-decoder/cs-etm-decoder.c15
-rw-r--r--tools/perf/util/data-convert-bt.c2
-rw-r--r--tools/perf/util/db-export.c2
-rw-r--r--tools/perf/util/debug.c34
-rw-r--r--tools/perf/util/debug.h1
-rw-r--r--tools/perf/util/demangle-ocaml.c80
-rw-r--r--tools/perf/util/demangle-ocaml.h7
-rw-r--r--tools/perf/util/event.c67
-rw-r--r--tools/perf/util/event.h18
-rw-r--r--tools/perf/util/evlist.c126
-rw-r--r--tools/perf/util/evlist.h12
-rw-r--r--tools/perf/util/evsel.c81
-rw-r--r--tools/perf/util/evsel.h13
-rw-r--r--tools/perf/util/evsel_fprintf.c2
-rw-r--r--tools/perf/util/header.c6
-rw-r--r--tools/perf/util/hist.c15
-rw-r--r--tools/perf/util/hist.h4
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.c334
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.h7
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c15
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h1
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c12
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h2
-rw-r--r--tools/perf/util/intel-pt.c214
-rw-r--r--tools/perf/util/intlist.c27
-rw-r--r--tools/perf/util/intlist.h10
-rw-r--r--tools/perf/util/jit.h2
-rw-r--r--tools/perf/util/jitdump.c84
-rw-r--r--tools/perf/util/machine.c51
-rw-r--r--tools/perf/util/machine.h2
-rw-r--r--tools/perf/util/map.c15
-rw-r--r--tools/perf/util/map.h3
-rw-r--r--tools/perf/util/mem-events.c36
-rw-r--r--tools/perf/util/mem-events.h5
-rw-r--r--tools/perf/util/metricgroup.c2
-rw-r--r--tools/perf/util/namespaces.c23
-rw-r--r--tools/perf/util/namespaces.h3
-rw-r--r--tools/perf/util/parse-events.l1
-rw-r--r--tools/perf/util/parse-events.y6
-rw-r--r--tools/perf/util/perf_api_probe.c10
-rw-r--r--tools/perf/util/perf_api_probe.h1
-rw-r--r--tools/perf/util/perf_event_attr_fprintf.c5
-rw-r--r--tools/perf/util/perf_regs.h7
-rw-r--r--tools/perf/util/probe-event.c12
-rw-r--r--tools/perf/util/probe-file.c38
-rw-r--r--tools/perf/util/probe-finder.c8
-rw-r--r--tools/perf/util/python-ext-sources2
-rw-r--r--tools/perf/util/python.c21
-rw-r--r--tools/perf/util/record.c9
-rw-r--r--tools/perf/util/record.h2
-rw-r--r--tools/perf/util/session.c54
-rw-r--r--tools/perf/util/setup.py2
-rw-r--r--tools/perf/util/sort.c113
-rw-r--r--tools/perf/util/sort.h6
-rw-r--r--tools/perf/util/stat-display.c6
-rw-r--r--tools/perf/util/stat-shadow.c92
-rw-r--r--tools/perf/util/stat.c53
-rw-r--r--tools/perf/util/stat.h9
-rw-r--r--tools/perf/util/string.c9
-rw-r--r--tools/perf/util/string2.h2
-rw-r--r--tools/perf/util/symbol-elf.c25
-rw-r--r--tools/perf/util/symbol.c73
-rw-r--r--tools/perf/util/symbol_conf.h7
-rw-r--r--tools/perf/util/synthetic-events.c225
-rw-r--r--tools/perf/util/target.c34
-rw-r--r--tools/perf/util/target.h10
-rw-r--r--tools/perf/util/trace-event-info.c10
-rw-r--r--tools/perf/util/trace-event-read.c1
-rw-r--r--tools/perf/util/unwind-libdw.c11
-rw-r--r--tools/perf/util/xyarray.c33
-rw-r--r--tools/power/x86/intel-speed-select/isst-config.c115
-rw-r--r--tools/power/x86/intel-speed-select/isst-core.c11
-rw-r--r--tools/power/x86/intel-speed-select/isst-display.c18
-rw-r--r--tools/power/x86/intel-speed-select/isst.h2
-rw-r--r--tools/scripts/Makefile.include1
-rw-r--r--tools/testing/kunit/configs/broken_on_uml.config2
-rwxr-xr-xtools/testing/kunit/kunit.py30
-rw-r--r--tools/testing/kunit/kunit_config.py15
-rw-r--r--tools/testing/kunit/kunit_kernel.py18
-rwxr-xr-xtools/testing/kunit/kunit_tool_test.py204
-rw-r--r--tools/testing/scatterlist/main.c1
-rw-r--r--tools/testing/selftests/Makefile10
-rw-r--r--tools/testing/selftests/arm64/fp/sve-ptrace.c2
-rw-r--r--tools/testing/selftests/arm64/fp/sve-test.S22
-rw-r--r--tools/testing/selftests/arm64/mte/check_buffer_fill.c2
-rw-r--r--tools/testing/selftests/bpf/.gitignore2
-rw-r--r--tools/testing/selftests/bpf/Makefile61
-rw-r--r--tools/testing/selftests/bpf/Makefile.docs82
-rw-r--r--tools/testing/selftests/bpf/README.rst62
-rw-r--r--tools/testing/selftests/bpf/bpf_tcp_helpers.h29
-rw-r--r--tools/testing/selftests/bpf/btf_helpers.c4
-rw-r--r--tools/testing/selftests/bpf/get_cgroup_id_user.c6
-rw-r--r--tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c5
-rw-r--r--tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c158
-rw-r--r--tools/testing/selftests/bpf/prog_tests/attach_probe.c40
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf.c176
-rw-r--r--tools/testing/selftests/bpf/prog_tests/check_mtu.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_reloc.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_sleep.c82
-rw-r--r--tools/testing/selftests/bpf/prog_tests/for_each.c130
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kfunc_call.c59
-rw-r--r--tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c51
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sk_lookup.c83
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_basic.c40
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_listen.c144
-rw-r--r--tools/testing/selftests/bpf/prog_tests/static_linked.c40
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_local_storage.c92
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_ima.c6
-rw-r--r--tools/testing/selftests/bpf/progs/bind4_prog.c25
-rw-r--r--tools/testing/selftests/bpf/progs/bind6_prog.c25
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_cubic.c36
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_dctcp.c22
-rw-r--r--tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c15
-rw-r--r--tools/testing/selftests/bpf/progs/core_reloc_types.h5
-rw-r--r--tools/testing/selftests/bpf/progs/fentry_test.c2
-rw-r--r--tools/testing/selftests/bpf/progs/fexit_sleep.c31
-rw-r--r--tools/testing/selftests/bpf/progs/fexit_test.c4
-rw-r--r--tools/testing/selftests/bpf/progs/for_each_array_map_elem.c61
-rw-r--r--tools/testing/selftests/bpf/progs/for_each_hash_map_elem.c95
-rw-r--r--tools/testing/selftests/bpf/progs/kfunc_call_test.c47
-rw-r--r--tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c42
-rw-r--r--tools/testing/selftests/bpf/progs/loop6.c99
-rw-r--r--tools/testing/selftests/bpf/progs/netif_receive_skb.c13
-rw-r--r--tools/testing/selftests/bpf/progs/skb_pkt_end.c1
-rw-r--r--tools/testing/selftests/bpf/progs/task_local_storage.c64
-rw-r--r--tools/testing/selftests/bpf/progs/task_local_storage_exit_creds.c32
-rw-r--r--tools/testing/selftests/bpf/progs/task_ls_recursion.c70
-rw-r--r--tools/testing/selftests/bpf/progs/test_check_mtu.c92
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_size.c3
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func10.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func11.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_sk_lookup.c62
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_listen.c26
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c18
-rw-r--r--tools/testing/selftests/bpf/progs/test_static_linked1.c30
-rw-r--r--tools/testing/selftests/bpf/progs/test_static_linked2.c31
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_tunnel.c113
-rw-r--r--tools/testing/selftests/bpf/progs/test_tunnel_kern.c12
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool_build.sh21
-rw-r--r--tools/testing/selftests/bpf/test_btf.h3
-rwxr-xr-xtools/testing/selftests/bpf/test_doc_build.sh13
-rw-r--r--tools/testing/selftests/bpf/test_progs.h11
-rw-r--r--tools/testing/selftests/bpf/test_sockmap.c2
-rwxr-xr-xtools/testing/selftests/bpf/test_tc_tunnel.sh15
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c4
-rwxr-xr-xtools/testing/selftests/bpf/test_xsk.sh138
-rw-r--r--tools/testing/selftests/bpf/verifier/array_access.c3
-rw-r--r--tools/testing/selftests/bpf/verifier/atomic_and.c23
-rw-r--r--tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c25
-rw-r--r--tools/testing/selftests/bpf/verifier/atomic_or.c25
-rw-r--r--tools/testing/selftests/bpf/verifier/bounds_deduction.c27
-rw-r--r--tools/testing/selftests/bpf/verifier/calls.c12
-rw-r--r--tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c1
-rw-r--r--tools/testing/selftests/bpf/verifier/dead_code.c10
-rw-r--r--tools/testing/selftests/bpf/verifier/map_ptr.c4
-rw-r--r--tools/testing/selftests/bpf/verifier/unpriv.c15
-rw-r--r--tools/testing/selftests/bpf/verifier/value_ptr_arith.c23
-rwxr-xr-xtools/testing/selftests/bpf/vmtest.sh59
-rw-r--r--tools/testing/selftests/bpf/xdpxceiver.c862
-rw-r--r--tools/testing/selftests/bpf/xdpxceiver.h98
-rwxr-xr-xtools/testing/selftests/bpf/xsk_prereqs.sh30
-rw-r--r--tools/testing/selftests/breakpoints/breakpoint_test_arm64.c4
-rw-r--r--tools/testing/selftests/dma/dma_map_benchmark.c21
-rw-r--r--tools/testing/selftests/dmabuf-heaps/Makefile2
-rw-r--r--tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c149
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh31
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh82
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum-2/q_in_vni_veto.sh77
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh21
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/tc_sample.sh657
-rw-r--r--tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh5
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh110
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/nexthop.sh620
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/psample.sh181
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc4
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc35
-rw-r--r--tools/testing/selftests/gpio/.gitignore2
-rw-r--r--tools/testing/selftests/gpio/Makefile26
-rw-r--r--tools/testing/selftests/gpio/config1
-rw-r--r--tools/testing/selftests/gpio/gpio-mockup-cdev.c198
-rw-r--r--tools/testing/selftests/gpio/gpio-mockup-chardev.c323
-rwxr-xr-xtools/testing/selftests/gpio/gpio-mockup-sysfs.sh168
-rwxr-xr-xtools/testing/selftests/gpio/gpio-mockup.sh497
-rw-r--r--tools/testing/selftests/ipc/msgque.c6
-rwxr-xr-xtools/testing/selftests/kselftest_deps.sh4
-rw-r--r--tools/testing/selftests/kselftest_harness.h26
-rw-r--r--tools/testing/selftests/kselftest_module.h18
-rw-r--r--tools/testing/selftests/kvm/.gitignore10
-rw-r--r--tools/testing/selftests/kvm/Makefile10
-rw-r--r--tools/testing/selftests/kvm/demand_paging_test.c43
-rw-r--r--tools/testing/selftests/kvm/dirty_log_perf_test.c92
-rw-r--r--tools/testing/selftests/kvm/hardware_disable_test.c165
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util.h8
-rw-r--r--tools/testing/selftests/kvm/include/numaif.h55
-rw-r--r--tools/testing/selftests/kvm/include/perf_test_util.h7
-rw-r--r--tools/testing/selftests/kvm/include/test_util.h16
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/processor.h41
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c14
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util_internal.h2
-rw-r--r--tools/testing/selftests/kvm/lib/perf_test_util.c31
-rw-r--r--tools/testing/selftests/kvm/lib/test_util.c31
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/processor.c145
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/svm.c8
-rw-r--r--tools/testing/selftests/kvm/memslot_modification_stress_test.c212
-rw-r--r--tools/testing/selftests/kvm/settings1
-rw-r--r--tools/testing/selftests/kvm/x86_64/evmcs_test.c3
-rw-r--r--tools/testing/selftests/kvm/x86_64/get_cpuid_test.c175
-rw-r--r--tools/testing/selftests/kvm/x86_64/get_msr_index_features.c134
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_clock.c260
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c31
-rw-r--r--tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c166
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_pmu_msrs_test.c131
-rw-r--r--tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c544
-rw-r--r--tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c320
-rw-r--r--tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c149
-rw-r--r--tools/testing/selftests/mount_setattr/.gitignore1
-rw-r--r--tools/testing/selftests/mount_setattr/Makefile7
-rw-r--r--tools/testing/selftests/mount_setattr/config1
-rw-r--r--tools/testing/selftests/mount_setattr/mount_setattr_test.c1424
-rw-r--r--tools/testing/selftests/net/Makefile1
-rwxr-xr-xtools/testing/selftests/net/fib_nexthops.sh557
-rwxr-xr-xtools/testing/selftests/net/forwarding/dual_vxlan_bridge.sh366
-rw-r--r--tools/testing/selftests/net/forwarding/fib_offload_lib.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh361
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh14
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh9
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_mpath_nh_res.sh400
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_police.sh56
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh2
-rw-r--r--tools/testing/selftests/net/ipsec.c2
-rwxr-xr-xtools/testing/selftests/net/mptcp/diag.sh55
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_connect.c10
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh22
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh278
-rwxr-xr-xtools/testing/selftests/net/mptcp/pm_netlink.sh6
-rw-r--r--tools/testing/selftests/net/mptcp/pm_nl_ctl.c34
-rwxr-xr-xtools/testing/selftests/net/mptcp/simult_flows.sh13
-rw-r--r--tools/testing/selftests/net/reuseaddr_ports_exhausted.c32
-rw-r--r--tools/testing/selftests/net/so_txtime.c247
-rwxr-xr-xtools/testing/selftests/net/so_txtime.sh97
-rwxr-xr-xtools/testing/selftests/net/udpgro_fwd.sh251
-rw-r--r--tools/testing/selftests/netfilter/Makefile2
-rwxr-xr-xtools/testing/selftests/netfilter/nf_nat_edemux.sh99
-rwxr-xr-xtools/testing/selftests/netfilter/nft_flowtable.sh82
-rwxr-xr-xtools/testing/selftests/powerpc/eeh/eeh-basic.sh41
-rw-r--r--[-rwxr-xr-x]tools/testing/selftests/powerpc/eeh/eeh-functions.sh168
-rwxr-xr-xtools/testing/selftests/powerpc/eeh/eeh-vf-aware.sh45
-rwxr-xr-xtools/testing/selftests/powerpc/eeh/eeh-vf-unaware.sh35
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c10
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/police.json48
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/simple.json83
-rw-r--r--tools/testing/selftests/timens/.gitignore1
-rw-r--r--tools/testing/selftests/vDSO/vdso_config.h4
-rw-r--r--tools/testing/selftests/vm/Makefile4
-rwxr-xr-xtools/testing/selftests/wireguard/netns.sh15
-rw-r--r--tools/testing/selftests/x86/ldt_gdt.c2
-rw-r--r--tools/tracing/Makefile19
-rw-r--r--tools/tracing/latency/.gitignore2
-rw-r--r--tools/tracing/latency/Makefile24
-rw-r--r--tools/tracing/latency/latency-collector.c2108
494 files changed, 28013 insertions, 4521 deletions
diff --git a/tools/Makefile b/tools/Makefile
index 85af6ebbce91..7e9d34ddd74c 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -31,6 +31,7 @@ help:
@echo ' bootconfig - boot config tool'
@echo ' spi - spi tools'
@echo ' tmon - thermal monitoring and tuning tool'
+ @echo ' tracing - misc tracing tools'
@echo ' turbostat - Intel CPU idle stats and freq reporting tool'
@echo ' usb - USB testing tools'
@echo ' virtio - vhost test module'
@@ -64,7 +65,7 @@ acpi: FORCE
cpupower: FORCE
$(call descend,power/$@)
-cgroup firewire hv guest bootconfig spi usb virtio vm bpf iio gpio objtool leds wmi pci firmware debugging: FORCE
+cgroup firewire hv guest bootconfig spi usb virtio vm bpf iio gpio objtool leds wmi pci firmware debugging tracing: FORCE
$(call descend,$@)
bpf/%: FORCE
@@ -103,7 +104,7 @@ all: acpi cgroup cpupower gpio hv firewire liblockdep \
perf selftests bootconfig spi turbostat usb \
virtio vm bpf x86_energy_perf_policy \
tmon freefall iio objtool kvm_stat wmi \
- pci debugging
+ pci debugging tracing
acpi_install:
$(call descend,power/$(@:_install=),install)
@@ -111,7 +112,7 @@ acpi_install:
cpupower_install:
$(call descend,power/$(@:_install=),install)
-cgroup_install firewire_install gpio_install hv_install iio_install perf_install bootconfig_install spi_install usb_install virtio_install vm_install bpf_install objtool_install wmi_install pci_install debugging_install:
+cgroup_install firewire_install gpio_install hv_install iio_install perf_install bootconfig_install spi_install usb_install virtio_install vm_install bpf_install objtool_install wmi_install pci_install debugging_install tracing_install:
$(call descend,$(@:_install=),install)
liblockdep_install:
@@ -137,7 +138,8 @@ install: acpi_install cgroup_install cpupower_install gpio_install \
perf_install selftests_install turbostat_install usb_install \
virtio_install vm_install bpf_install x86_energy_perf_policy_install \
tmon_install freefall_install objtool_install kvm_stat_install \
- wmi_install pci_install debugging_install intel-speed-select_install
+ wmi_install pci_install debugging_install intel-speed-select_install \
+ tracing_install
acpi_clean:
$(call descend,power/acpi,clean)
@@ -145,7 +147,7 @@ acpi_clean:
cpupower_clean:
$(call descend,power/cpupower,clean)
-cgroup_clean hv_clean firewire_clean bootconfig_clean spi_clean usb_clean virtio_clean vm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean pci_clean firmware_clean debugging_clean:
+cgroup_clean hv_clean firewire_clean bootconfig_clean spi_clean usb_clean virtio_clean vm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean pci_clean firmware_clean debugging_clean tracing_clean:
$(call descend,$(@:_clean=),clean)
liblockdep_clean:
@@ -184,6 +186,6 @@ clean: acpi_clean cgroup_clean cpupower_clean hv_clean firewire_clean \
vm_clean bpf_clean iio_clean x86_energy_perf_policy_clean tmon_clean \
freefall_clean build_clean libbpf_clean libsubcmd_clean liblockdep_clean \
gpio_clean objtool_clean leds_clean wmi_clean pci_clean firmware_clean debugging_clean \
- intel-speed-select_clean
+ intel-speed-select_clean tracing_clean
.PHONY: FORCE
diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h
index c3af3f324c5a..9f18fa090f1f 100644
--- a/tools/arch/powerpc/include/uapi/asm/kvm.h
+++ b/tools/arch/powerpc/include/uapi/asm/kvm.h
@@ -644,6 +644,8 @@ struct kvm_ppc_cpu_char {
#define KVM_REG_PPC_MMCR3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc1)
#define KVM_REG_PPC_SIER2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc2)
#define KVM_REG_PPC_SIER3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc3)
+#define KVM_REG_PPC_DAWR1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc4)
+#define KVM_REG_PPC_DAWRX1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc5)
/* Transactional Memory checkpointed state:
* This is all GPRs, all VSX regs and a subset of SPRs
diff --git a/tools/arch/powerpc/include/uapi/asm/perf_regs.h b/tools/arch/powerpc/include/uapi/asm/perf_regs.h
index bdf5f10f8b9f..578b3ee86105 100644
--- a/tools/arch/powerpc/include/uapi/asm/perf_regs.h
+++ b/tools/arch/powerpc/include/uapi/asm/perf_regs.h
@@ -55,17 +55,33 @@ enum perf_event_powerpc_regs {
PERF_REG_POWERPC_MMCR3,
PERF_REG_POWERPC_SIER2,
PERF_REG_POWERPC_SIER3,
+ PERF_REG_POWERPC_PMC1,
+ PERF_REG_POWERPC_PMC2,
+ PERF_REG_POWERPC_PMC3,
+ PERF_REG_POWERPC_PMC4,
+ PERF_REG_POWERPC_PMC5,
+ PERF_REG_POWERPC_PMC6,
/* Max regs without the extended regs */
PERF_REG_POWERPC_MAX = PERF_REG_POWERPC_MMCRA + 1,
};
#define PERF_REG_PMU_MASK ((1ULL << PERF_REG_POWERPC_MAX) - 1)
-/* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_300 */
-#define PERF_REG_PMU_MASK_300 (((1ULL << (PERF_REG_POWERPC_MMCR2 + 1)) - 1) - PERF_REG_PMU_MASK)
-/* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_31 */
-#define PERF_REG_PMU_MASK_31 (((1ULL << (PERF_REG_POWERPC_SIER3 + 1)) - 1) - PERF_REG_PMU_MASK)
+/* Exclude MMCR3, SIER2, SIER3 for CPU_FTR_ARCH_300 */
+#define PERF_EXCLUDE_REG_EXT_300 (7ULL << PERF_REG_POWERPC_MMCR3)
-#define PERF_REG_MAX_ISA_300 (PERF_REG_POWERPC_MMCR2 + 1)
-#define PERF_REG_MAX_ISA_31 (PERF_REG_POWERPC_SIER3 + 1)
+/*
+ * PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_300
+ * includes 9 SPRS from MMCR0 to PMC6 excluding the
+ * unsupported SPRS in PERF_EXCLUDE_REG_EXT_300.
+ */
+#define PERF_REG_PMU_MASK_300 ((0xfffULL << PERF_REG_POWERPC_MMCR0) - PERF_EXCLUDE_REG_EXT_300)
+
+/*
+ * PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_31
+ * includes 12 SPRs from MMCR0 to PMC6.
+ */
+#define PERF_REG_PMU_MASK_31 (0xfffULL << PERF_REG_POWERPC_MMCR0)
+
+#define PERF_REG_EXTENDED_MAX (PERF_REG_POWERPC_PMC6 + 1)
#endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */
diff --git a/tools/arch/s390/include/uapi/asm/ptrace.h b/tools/arch/s390/include/uapi/asm/ptrace.h
index 543dd70e12c8..ad64d673b5e6 100644
--- a/tools/arch/s390/include/uapi/asm/ptrace.h
+++ b/tools/arch/s390/include/uapi/asm/ptrace.h
@@ -179,8 +179,9 @@
#define ACR_SIZE 4
-#define PTRACE_OLDSETOPTIONS 21
-
+#define PTRACE_OLDSETOPTIONS 21
+#define PTRACE_SYSEMU 31
+#define PTRACE_SYSEMU_SINGLESTEP 32
#ifndef __ASSEMBLY__
#include <linux/stddef.h>
#include <linux/types.h>
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 84b887825f12..cc96e26d69f7 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -13,7 +13,7 @@
/*
* Defines x86 CPU feature bits
*/
-#define NCAPINTS 19 /* N 32-bit words worth of info */
+#define NCAPINTS 20 /* N 32-bit words worth of info */
#define NBUGINTS 1 /* N 32-bit bug flags */
/*
@@ -96,7 +96,7 @@
#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */
#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */
#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */
-#define X86_FEATURE_SME_COHERENT ( 3*32+17) /* "" AMD hardware-enforced cache coherency */
+/* FREE! ( 3*32+17) */
#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */
#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
@@ -201,7 +201,7 @@
#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */
#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
-#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
+/* FREE! ( 7*32+10) */
#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */
@@ -211,7 +211,7 @@
#define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */
#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */
-#define X86_FEATURE_SEV ( 7*32+20) /* AMD Secure Encrypted Virtualization */
+/* FREE! ( 7*32+20) */
#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */
#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */
@@ -236,8 +236,6 @@
#define X86_FEATURE_EPT_AD ( 8*32+17) /* Intel Extended Page Table access-dirty bit */
#define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */
#define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */
-#define X86_FEATURE_SEV_ES ( 8*32+20) /* AMD Secure Encrypted Virtualization - Encrypted State */
-#define X86_FEATURE_VM_PAGE_FLUSH ( 8*32+21) /* "" VM Page Flush MSR is supported */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
@@ -294,6 +292,7 @@
#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
+#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
@@ -337,6 +336,7 @@
#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */
#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */
#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */
+#define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* "" SVME addr check */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */
#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
@@ -385,6 +385,13 @@
#define X86_FEATURE_CORE_CAPABILITIES (18*32+30) /* "" IA32_CORE_CAPABILITIES MSR */
#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */
+/* AMD-defined memory encryption features, CPUID level 0x8000001f (EAX), word 19 */
+#define X86_FEATURE_SME (19*32+ 0) /* AMD Secure Memory Encryption */
+#define X86_FEATURE_SEV (19*32+ 1) /* AMD Secure Encrypted Virtualization */
+#define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* "" VM Page Flush MSR is supported */
+#define X86_FEATURE_SEV_ES (19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */
+#define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */
+
/*
* BUG word(s)
*/
diff --git a/tools/arch/x86/include/asm/insn.h b/tools/arch/x86/include/asm/insn.h
index 52c6262e6bfd..cc777c185212 100644
--- a/tools/arch/x86/include/asm/insn.h
+++ b/tools/arch/x86/include/asm/insn.h
@@ -7,9 +7,12 @@
* Copyright (C) IBM Corporation, 2009
*/
+#include <asm/byteorder.h>
/* insn_attr_t is defined in inat.h */
#include "inat.h"
+#if defined(__BYTE_ORDER) ? __BYTE_ORDER == __LITTLE_ENDIAN : defined(__LITTLE_ENDIAN)
+
struct insn_field {
union {
insn_value_t value;
@@ -20,6 +23,48 @@ struct insn_field {
unsigned char nbytes;
};
+static inline void insn_field_set(struct insn_field *p, insn_value_t v,
+ unsigned char n)
+{
+ p->value = v;
+ p->nbytes = n;
+}
+
+static inline void insn_set_byte(struct insn_field *p, unsigned char n,
+ insn_byte_t v)
+{
+ p->bytes[n] = v;
+}
+
+#else
+
+struct insn_field {
+ insn_value_t value;
+ union {
+ insn_value_t little;
+ insn_byte_t bytes[4];
+ };
+ /* !0 if we've run insn_get_xxx() for this field */
+ unsigned char got;
+ unsigned char nbytes;
+};
+
+static inline void insn_field_set(struct insn_field *p, insn_value_t v,
+ unsigned char n)
+{
+ p->value = v;
+ p->little = __cpu_to_le32(v);
+ p->nbytes = n;
+}
+
+static inline void insn_set_byte(struct insn_field *p, unsigned char n,
+ insn_byte_t v)
+{
+ p->bytes[n] = v;
+ p->value = __le32_to_cpu(p->little);
+}
+#endif
+
struct insn {
struct insn_field prefixes; /*
* Prefixes
diff --git a/tools/arch/x86/include/asm/orc_types.h b/tools/arch/x86/include/asm/orc_types.h
index fdbffec4cfde..5a2baf28a1dc 100644
--- a/tools/arch/x86/include/asm/orc_types.h
+++ b/tools/arch/x86/include/asm/orc_types.h
@@ -40,6 +40,8 @@
#define ORC_REG_MAX 15
#ifndef __ASSEMBLY__
+#include <asm/byteorder.h>
+
/*
* This struct is more or less a vastly simplified version of the DWARF Call
* Frame Information standard. It contains only the necessary parts of DWARF
@@ -51,10 +53,18 @@
struct orc_entry {
s16 sp_offset;
s16 bp_offset;
+#if defined(__LITTLE_ENDIAN_BITFIELD)
unsigned sp_reg:4;
unsigned bp_reg:4;
unsigned type:2;
unsigned end:1;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+ unsigned bp_reg:4;
+ unsigned sp_reg:4;
+ unsigned unused:5;
+ unsigned end:1;
+ unsigned type:2;
+#endif
} __packed;
#endif /* __ASSEMBLY__ */
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index 8e76d3701db3..5a3022c8af82 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -112,6 +112,7 @@ struct kvm_ioapic_state {
#define KVM_NR_IRQCHIPS 3
#define KVM_RUN_X86_SMM (1 << 0)
+#define KVM_RUN_X86_BUS_LOCK (1 << 1)
/* for KVM_GET_REGS and KVM_SET_REGS */
struct kvm_regs {
diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h
index ada955c5ebb6..b8e650a985e3 100644
--- a/tools/arch/x86/include/uapi/asm/vmx.h
+++ b/tools/arch/x86/include/uapi/asm/vmx.h
@@ -89,6 +89,7 @@
#define EXIT_REASON_XRSTORS 64
#define EXIT_REASON_UMWAIT 67
#define EXIT_REASON_TPAUSE 68
+#define EXIT_REASON_BUS_LOCK 74
#define VMX_EXIT_REASONS \
{ EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \
@@ -150,7 +151,8 @@
{ EXIT_REASON_XSAVES, "XSAVES" }, \
{ EXIT_REASON_XRSTORS, "XRSTORS" }, \
{ EXIT_REASON_UMWAIT, "UMWAIT" }, \
- { EXIT_REASON_TPAUSE, "TPAUSE" }
+ { EXIT_REASON_TPAUSE, "TPAUSE" }, \
+ { EXIT_REASON_BUS_LOCK, "BUS_LOCK" }
#define VMX_EXIT_REASON_FLAGS \
{ VMX_EXIT_REASONS_FAILED_VMENTRY, "FAILED_VMENTRY" }
diff --git a/tools/arch/x86/lib/insn.c b/tools/arch/x86/lib/insn.c
index 0151dfc6da61..3d9355ed1246 100644
--- a/tools/arch/x86/lib/insn.c
+++ b/tools/arch/x86/lib/insn.c
@@ -5,6 +5,7 @@
* Copyright (C) IBM Corporation, 2002, 2004, 2009
*/
+#include <linux/kernel.h>
#ifdef __KERNEL__
#include <linux/string.h>
#else
@@ -15,15 +16,28 @@
#include "../include/asm/emulate_prefix.h"
+#define leXX_to_cpu(t, r) \
+({ \
+ __typeof__(t) v; \
+ switch (sizeof(t)) { \
+ case 4: v = le32_to_cpu(r); break; \
+ case 2: v = le16_to_cpu(r); break; \
+ case 1: v = r; break; \
+ default: \
+ BUILD_BUG(); break; \
+ } \
+ v; \
+})
+
/* Verify next sizeof(t) bytes can be on the same instruction */
#define validate_next(t, insn, n) \
((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr)
#define __get_next(t, insn) \
- ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; })
+ ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); leXX_to_cpu(t, r); })
#define __peek_nbyte_next(t, insn, n) \
- ({ t r = *(t*)((insn)->next_byte + n); r; })
+ ({ t r = *(t*)((insn)->next_byte + n); leXX_to_cpu(t, r); })
#define get_next(t, insn) \
({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); })
@@ -147,9 +161,9 @@ found:
b = insn->prefixes.bytes[3];
for (i = 0; i < nb; i++)
if (prefixes->bytes[i] == lb)
- prefixes->bytes[i] = b;
+ insn_set_byte(prefixes, i, b);
}
- insn->prefixes.bytes[3] = lb;
+ insn_set_byte(&insn->prefixes, 3, lb);
}
/* Decode REX prefix */
@@ -157,8 +171,7 @@ found:
b = peek_next(insn_byte_t, insn);
attr = inat_get_opcode_attribute(b);
if (inat_is_rex_prefix(attr)) {
- insn->rex_prefix.value = b;
- insn->rex_prefix.nbytes = 1;
+ insn_field_set(&insn->rex_prefix, b, 1);
insn->next_byte++;
if (X86_REX_W(b))
/* REX.W overrides opnd_size */
@@ -181,13 +194,13 @@ found:
if (X86_MODRM_MOD(b2) != 3)
goto vex_end;
}
- insn->vex_prefix.bytes[0] = b;
- insn->vex_prefix.bytes[1] = b2;
+ insn_set_byte(&insn->vex_prefix, 0, b);
+ insn_set_byte(&insn->vex_prefix, 1, b2);
if (inat_is_evex_prefix(attr)) {
b2 = peek_nbyte_next(insn_byte_t, insn, 2);
- insn->vex_prefix.bytes[2] = b2;
+ insn_set_byte(&insn->vex_prefix, 2, b2);
b2 = peek_nbyte_next(insn_byte_t, insn, 3);
- insn->vex_prefix.bytes[3] = b2;
+ insn_set_byte(&insn->vex_prefix, 3, b2);
insn->vex_prefix.nbytes = 4;
insn->next_byte += 4;
if (insn->x86_64 && X86_VEX_W(b2))
@@ -195,7 +208,7 @@ found:
insn->opnd_bytes = 8;
} else if (inat_is_vex3_prefix(attr)) {
b2 = peek_nbyte_next(insn_byte_t, insn, 2);
- insn->vex_prefix.bytes[2] = b2;
+ insn_set_byte(&insn->vex_prefix, 2, b2);
insn->vex_prefix.nbytes = 3;
insn->next_byte += 3;
if (insn->x86_64 && X86_VEX_W(b2))
@@ -207,7 +220,7 @@ found:
* Makes it easier to decode vex.W, vex.vvvv,
* vex.L and vex.pp. Masking with 0x7f sets vex.W == 0.
*/
- insn->vex_prefix.bytes[2] = b2 & 0x7f;
+ insn_set_byte(&insn->vex_prefix, 2, b2 & 0x7f);
insn->vex_prefix.nbytes = 2;
insn->next_byte += 2;
}
@@ -243,7 +256,7 @@ void insn_get_opcode(struct insn *insn)
/* Get first opcode */
op = get_next(insn_byte_t, insn);
- opcode->bytes[0] = op;
+ insn_set_byte(opcode, 0, op);
opcode->nbytes = 1;
/* Check if there is VEX prefix or not */
@@ -295,8 +308,7 @@ void insn_get_modrm(struct insn *insn)
if (inat_has_modrm(insn->attr)) {
mod = get_next(insn_byte_t, insn);
- modrm->value = mod;
- modrm->nbytes = 1;
+ insn_field_set(modrm, mod, 1);
if (inat_is_group(insn->attr)) {
pfx_id = insn_last_prefix_id(insn);
insn->attr = inat_get_group_attribute(mod, pfx_id,
@@ -334,7 +346,7 @@ int insn_rip_relative(struct insn *insn)
* For rip-relative instructions, the mod field (top 2 bits)
* is zero and the r/m field (bottom 3 bits) is 0x5.
*/
- return (modrm->nbytes && (modrm->value & 0xc7) == 0x5);
+ return (modrm->nbytes && (modrm->bytes[0] & 0xc7) == 0x5);
}
/**
@@ -353,11 +365,11 @@ void insn_get_sib(struct insn *insn)
if (!insn->modrm.got)
insn_get_modrm(insn);
if (insn->modrm.nbytes) {
- modrm = (insn_byte_t)insn->modrm.value;
+ modrm = insn->modrm.bytes[0];
if (insn->addr_bytes != 2 &&
X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) {
- insn->sib.value = get_next(insn_byte_t, insn);
- insn->sib.nbytes = 1;
+ insn_field_set(&insn->sib,
+ get_next(insn_byte_t, insn), 1);
}
}
insn->sib.got = 1;
@@ -407,19 +419,18 @@ void insn_get_displacement(struct insn *insn)
if (mod == 3)
goto out;
if (mod == 1) {
- insn->displacement.value = get_next(signed char, insn);
- insn->displacement.nbytes = 1;
+ insn_field_set(&insn->displacement,
+ get_next(signed char, insn), 1);
} else if (insn->addr_bytes == 2) {
if ((mod == 0 && rm == 6) || mod == 2) {
- insn->displacement.value =
- get_next(short, insn);
- insn->displacement.nbytes = 2;
+ insn_field_set(&insn->displacement,
+ get_next(short, insn), 2);
}
} else {
if ((mod == 0 && rm == 5) || mod == 2 ||
(mod == 0 && base == 5)) {
- insn->displacement.value = get_next(int, insn);
- insn->displacement.nbytes = 4;
+ insn_field_set(&insn->displacement,
+ get_next(int, insn), 4);
}
}
}
@@ -435,18 +446,14 @@ static int __get_moffset(struct insn *insn)
{
switch (insn->addr_bytes) {
case 2:
- insn->moffset1.value = get_next(short, insn);
- insn->moffset1.nbytes = 2;
+ insn_field_set(&insn->moffset1, get_next(short, insn), 2);
break;
case 4:
- insn->moffset1.value = get_next(int, insn);
- insn->moffset1.nbytes = 4;
+ insn_field_set(&insn->moffset1, get_next(int, insn), 4);
break;
case 8:
- insn->moffset1.value = get_next(int, insn);
- insn->moffset1.nbytes = 4;
- insn->moffset2.value = get_next(int, insn);
- insn->moffset2.nbytes = 4;
+ insn_field_set(&insn->moffset1, get_next(int, insn), 4);
+ insn_field_set(&insn->moffset2, get_next(int, insn), 4);
break;
default: /* opnd_bytes must be modified manually */
goto err_out;
@@ -464,13 +471,11 @@ static int __get_immv32(struct insn *insn)
{
switch (insn->opnd_bytes) {
case 2:
- insn->immediate.value = get_next(short, insn);
- insn->immediate.nbytes = 2;
+ insn_field_set(&insn->immediate, get_next(short, insn), 2);
break;
case 4:
case 8:
- insn->immediate.value = get_next(int, insn);
- insn->immediate.nbytes = 4;
+ insn_field_set(&insn->immediate, get_next(int, insn), 4);
break;
default: /* opnd_bytes must be modified manually */
goto err_out;
@@ -487,18 +492,15 @@ static int __get_immv(struct insn *insn)
{
switch (insn->opnd_bytes) {
case 2:
- insn->immediate1.value = get_next(short, insn);
- insn->immediate1.nbytes = 2;
+ insn_field_set(&insn->immediate1, get_next(short, insn), 2);
break;
case 4:
- insn->immediate1.value = get_next(int, insn);
+ insn_field_set(&insn->immediate1, get_next(int, insn), 4);
insn->immediate1.nbytes = 4;
break;
case 8:
- insn->immediate1.value = get_next(int, insn);
- insn->immediate1.nbytes = 4;
- insn->immediate2.value = get_next(int, insn);
- insn->immediate2.nbytes = 4;
+ insn_field_set(&insn->immediate1, get_next(int, insn), 4);
+ insn_field_set(&insn->immediate2, get_next(int, insn), 4);
break;
default: /* opnd_bytes must be modified manually */
goto err_out;
@@ -515,12 +517,10 @@ static int __get_immptr(struct insn *insn)
{
switch (insn->opnd_bytes) {
case 2:
- insn->immediate1.value = get_next(short, insn);
- insn->immediate1.nbytes = 2;
+ insn_field_set(&insn->immediate1, get_next(short, insn), 2);
break;
case 4:
- insn->immediate1.value = get_next(int, insn);
- insn->immediate1.nbytes = 4;
+ insn_field_set(&insn->immediate1, get_next(int, insn), 4);
break;
case 8:
/* ptr16:64 is not exist (no segment) */
@@ -528,8 +528,7 @@ static int __get_immptr(struct insn *insn)
default: /* opnd_bytes must be modified manually */
goto err_out;
}
- insn->immediate2.value = get_next(unsigned short, insn);
- insn->immediate2.nbytes = 2;
+ insn_field_set(&insn->immediate2, get_next(unsigned short, insn), 2);
insn->immediate1.got = insn->immediate2.got = 1;
return 1;
@@ -565,22 +564,17 @@ void insn_get_immediate(struct insn *insn)
switch (inat_immediate_size(insn->attr)) {
case INAT_IMM_BYTE:
- insn->immediate.value = get_next(signed char, insn);
- insn->immediate.nbytes = 1;
+ insn_field_set(&insn->immediate, get_next(signed char, insn), 1);
break;
case INAT_IMM_WORD:
- insn->immediate.value = get_next(short, insn);
- insn->immediate.nbytes = 2;
+ insn_field_set(&insn->immediate, get_next(short, insn), 2);
break;
case INAT_IMM_DWORD:
- insn->immediate.value = get_next(int, insn);
- insn->immediate.nbytes = 4;
+ insn_field_set(&insn->immediate, get_next(int, insn), 4);
break;
case INAT_IMM_QWORD:
- insn->immediate1.value = get_next(int, insn);
- insn->immediate1.nbytes = 4;
- insn->immediate2.value = get_next(int, insn);
- insn->immediate2.nbytes = 4;
+ insn_field_set(&insn->immediate1, get_next(int, insn), 4);
+ insn_field_set(&insn->immediate2, get_next(int, insn), 4);
break;
case INAT_IMM_PTR:
if (!__get_immptr(insn))
@@ -599,8 +593,7 @@ void insn_get_immediate(struct insn *insn)
goto err_out;
}
if (inat_has_second_immediate(insn->attr)) {
- insn->immediate2.value = get_next(signed char, insn);
- insn->immediate2.nbytes = 1;
+ insn_field_set(&insn->immediate2, get_next(signed char, insn), 1);
}
done:
insn->immediate.got = 1;
diff --git a/tools/bpf/Makefile.helpers b/tools/bpf/Makefile.helpers
deleted file mode 100644
index 854d084026dd..000000000000
--- a/tools/bpf/Makefile.helpers
+++ /dev/null
@@ -1,60 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-ifndef allow-override
- include ../scripts/Makefile.include
- include ../scripts/utilities.mak
-else
- # Assume Makefile.helpers is being run from bpftool/Documentation
- # subdirectory. Go up two more directories to fetch bpf.h header and
- # associated script.
- UP2DIR := ../../
-endif
-
-INSTALL ?= install
-RM ?= rm -f
-RMDIR ?= rmdir --ignore-fail-on-non-empty
-
-ifeq ($(V),1)
- Q =
-else
- Q = @
-endif
-
-prefix ?= /usr/local
-mandir ?= $(prefix)/man
-man7dir = $(mandir)/man7
-
-HELPERS_RST = bpf-helpers.rst
-MAN7_RST = $(HELPERS_RST)
-
-_DOC_MAN7 = $(patsubst %.rst,%.7,$(MAN7_RST))
-DOC_MAN7 = $(addprefix $(OUTPUT),$(_DOC_MAN7))
-
-helpers: man7
-man7: $(DOC_MAN7)
-
-RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null)
-
-$(OUTPUT)$(HELPERS_RST): $(UP2DIR)../../include/uapi/linux/bpf.h
- $(QUIET_GEN)$(UP2DIR)../../scripts/bpf_helpers_doc.py --filename $< > $@
-
-$(OUTPUT)%.7: $(OUTPUT)%.rst
-ifndef RST2MAN_DEP
- $(error "rst2man not found, but required to generate man pages")
-endif
- $(QUIET_GEN)rst2man $< > $@
-
-helpers-clean:
- $(call QUIET_CLEAN, eBPF_helpers-manpage)
- $(Q)$(RM) $(DOC_MAN7) $(OUTPUT)$(HELPERS_RST)
-
-helpers-install: helpers
- $(call QUIET_INSTALL, eBPF_helpers-manpage)
- $(Q)$(INSTALL) -d -m 755 $(DESTDIR)$(man7dir)
- $(Q)$(INSTALL) -m 644 $(DOC_MAN7) $(DESTDIR)$(man7dir)
-
-helpers-uninstall:
- $(call QUIET_UNINST, eBPF_helpers-manpage)
- $(Q)$(RM) $(addprefix $(DESTDIR)$(man7dir)/,$(_DOC_MAN7))
- $(Q)$(RMDIR) $(DESTDIR)$(man7dir)
-
-.PHONY: helpers helpers-clean helpers-install helpers-uninstall
diff --git a/tools/bpf/bpf_dbg.c b/tools/bpf/bpf_dbg.c
index a07dfc479270..00e560a17baf 100644
--- a/tools/bpf/bpf_dbg.c
+++ b/tools/bpf/bpf_dbg.c
@@ -1198,7 +1198,7 @@ static int cmd_run(char *num)
else
return CMD_OK;
bpf_reset();
- } while (pcap_next_pkt() && (!has_limit || (has_limit && ++i < pkts)));
+ } while (pcap_next_pkt() && (!has_limit || (++i < pkts)));
rl_printf("bpf passes:%u fails:%u\n", pass, fail);
diff --git a/tools/bpf/bpf_exp.y b/tools/bpf/bpf_exp.y
index 8d48e896be50..dfb7254a24e8 100644
--- a/tools/bpf/bpf_exp.y
+++ b/tools/bpf/bpf_exp.y
@@ -185,13 +185,13 @@ ldx
| OP_LDXB number '*' '(' '[' number ']' '&' number ')' {
if ($2 != 4 || $9 != 0xf) {
fprintf(stderr, "ldxb offset not supported!\n");
- exit(0);
+ exit(1);
} else {
bpf_set_curr_instr(BPF_LDX | BPF_MSH | BPF_B, 0, 0, $6); } }
| OP_LDX number '*' '(' '[' number ']' '&' number ')' {
if ($2 != 4 || $9 != 0xf) {
fprintf(stderr, "ldxb offset not supported!\n");
- exit(0);
+ exit(1);
} else {
bpf_set_curr_instr(BPF_LDX | BPF_MSH | BPF_B, 0, 0, $6); } }
;
@@ -472,7 +472,7 @@ static void bpf_assert_max(void)
{
if (curr_instr >= BPF_MAXINSNS) {
fprintf(stderr, "only max %u insns allowed!\n", BPF_MAXINSNS);
- exit(0);
+ exit(1);
}
}
@@ -522,7 +522,7 @@ static int bpf_find_insns_offset(const char *label)
if (ret == -ENOENT) {
fprintf(stderr, "no such label \'%s\'!\n", label);
- exit(0);
+ exit(1);
}
return ret;
@@ -549,9 +549,11 @@ static uint8_t bpf_encode_jt_jf_offset(int off, int i)
{
int delta = off - i - 1;
- if (delta < 0 || delta > 255)
- fprintf(stderr, "warning: insn #%d jumps to insn #%d, "
+ if (delta < 0 || delta > 255) {
+ fprintf(stderr, "error: insn #%d jumps to insn #%d, "
"which is out of range\n", i, off);
+ exit(1);
+ }
return (uint8_t) delta;
}
diff --git a/tools/bpf/bpftool/.gitignore b/tools/bpf/bpftool/.gitignore
index 944cb4b7c95d..05ce4446b780 100644
--- a/tools/bpf/bpftool/.gitignore
+++ b/tools/bpf/bpftool/.gitignore
@@ -3,7 +3,6 @@
/bootstrap/
/bpftool
bpftool*.8
-bpf-helpers.*
FEATURE-DUMP.bpftool
feature
libbpf
diff --git a/tools/bpf/bpftool/Documentation/Makefile b/tools/bpf/bpftool/Documentation/Makefile
index f33cb02de95c..c49487905ceb 100644
--- a/tools/bpf/bpftool/Documentation/Makefile
+++ b/tools/bpf/bpftool/Documentation/Makefile
@@ -16,15 +16,12 @@ prefix ?= /usr/local
mandir ?= $(prefix)/man
man8dir = $(mandir)/man8
-# Load targets for building eBPF helpers man page.
-include ../../Makefile.helpers
-
MAN8_RST = $(wildcard bpftool*.rst)
_DOC_MAN8 = $(patsubst %.rst,%.8,$(MAN8_RST))
DOC_MAN8 = $(addprefix $(OUTPUT),$(_DOC_MAN8))
-man: man8 helpers
+man: man8
man8: $(DOC_MAN8)
RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null)
@@ -46,16 +43,16 @@ ifndef RST2MAN_DEP
endif
$(QUIET_GEN)( cat $< ; printf "%b" $(call see_also,$<) ) | rst2man $(RST2MAN_OPTS) > $@
-clean: helpers-clean
+clean:
$(call QUIET_CLEAN, Documentation)
$(Q)$(RM) $(DOC_MAN8)
-install: man helpers-install
+install: man
$(call QUIET_INSTALL, Documentation-man)
$(Q)$(INSTALL) -d -m 755 $(DESTDIR)$(man8dir)
$(Q)$(INSTALL) -m 644 $(DOC_MAN8) $(DESTDIR)$(man8dir)
-uninstall: helpers-uninstall
+uninstall:
$(call QUIET_UNINST, Documentation-man)
$(Q)$(RM) $(addprefix $(DESTDIR)$(man8dir)/,$(_DOC_MAN8))
$(Q)$(RMDIR) $(DESTDIR)$(man8dir)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-gen.rst b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
index 84cf0639696f..7cd6681137f3 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-gen.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
@@ -14,16 +14,37 @@ SYNOPSIS
*OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] }
- *COMMAND* := { **skeleton** | **help** }
+ *COMMAND* := { **object** | **skeleton** | **help** }
GEN COMMANDS
=============
-| **bpftool** **gen skeleton** *FILE*
+| **bpftool** **gen object** *OUTPUT_FILE* *INPUT_FILE* [*INPUT_FILE*...]
+| **bpftool** **gen skeleton** *FILE* [**name** *OBJECT_NAME*]
| **bpftool** **gen help**
DESCRIPTION
===========
+ **bpftool gen object** *OUTPUT_FILE* *INPUT_FILE* [*INPUT_FILE*...]
+ Statically link (combine) together one or more *INPUT_FILE*'s
+ into a single resulting *OUTPUT_FILE*. All the files involved
+ are BPF ELF object files.
+
+ The rules of BPF static linking are mostly the same as for
+ user-space object files, but in addition to combining data
+ and instruction sections, .BTF and .BTF.ext (if present in
+ any of the input files) data are combined together. .BTF
+ data is deduplicated, so all the common types across
+ *INPUT_FILE*'s will only be represented once in the resulting
+ BTF information.
+
+ BPF static linking allows to partition BPF source code into
+ individually compiled files that are then linked into
+ a single resulting BPF object file, which can be used to
+ generated BPF skeleton (with **gen skeleton** command) or
+ passed directly into **libbpf** (using **bpf_object__open()**
+ family of APIs).
+
**bpftool gen skeleton** *FILE*
Generate BPF skeleton C header file for a given *FILE*.
@@ -75,10 +96,13 @@ DESCRIPTION
specific maps, programs, etc.
As part of skeleton, few custom functions are generated.
- Each of them is prefixed with object name, derived from
- object file name. I.e., if BPF object file name is
- **example.o**, BPF object name will be **example**. The
- following custom functions are provided in such case:
+ Each of them is prefixed with object name. Object name can
+ either be derived from object file name, i.e., if BPF object
+ file name is **example.o**, BPF object name will be
+ **example**. Object name can be also specified explicitly
+ through **name** *OBJECT_NAME* parameter. The following
+ custom functions are provided (assuming **example** as
+ the object name):
- **example__open** and **example__open_opts**.
These functions are used to instantiate skeleton. It
@@ -130,26 +154,19 @@ OPTIONS
EXAMPLES
========
-**$ cat example.c**
+**$ cat example1.bpf.c**
::
#include <stdbool.h>
#include <linux/ptrace.h>
#include <linux/bpf.h>
- #include "bpf_helpers.h"
+ #include <bpf/bpf_helpers.h>
const volatile int param1 = 42;
bool global_flag = true;
struct { int x; } data = {};
- struct {
- __uint(type, BPF_MAP_TYPE_HASH);
- __uint(max_entries, 128);
- __type(key, int);
- __type(value, long);
- } my_map SEC(".maps");
-
SEC("raw_tp/sys_enter")
int handle_sys_enter(struct pt_regs *ctx)
{
@@ -161,6 +178,21 @@ EXAMPLES
return 0;
}
+**$ cat example2.bpf.c**
+
+::
+
+ #include <linux/ptrace.h>
+ #include <linux/bpf.h>
+ #include <bpf/bpf_helpers.h>
+
+ struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 128);
+ __type(key, int);
+ __type(value, long);
+ } my_map SEC(".maps");
+
SEC("raw_tp/sys_exit")
int handle_sys_exit(struct pt_regs *ctx)
{
@@ -170,9 +202,17 @@ EXAMPLES
}
This is example BPF application with two BPF programs and a mix of BPF maps
-and global variables.
+and global variables. Source code is split across two source code files.
-**$ bpftool gen skeleton example.o**
+**$ clang -target bpf -g example1.bpf.c -o example1.bpf.o**
+**$ clang -target bpf -g example2.bpf.c -o example2.bpf.o**
+**$ bpftool gen object example.bpf.o example1.bpf.o example2.bpf.o**
+
+This set of commands compiles *example1.bpf.c* and *example2.bpf.c*
+individually and then statically links respective object files into the final
+BPF ELF object file *example.bpf.o*.
+
+**$ bpftool gen skeleton example.bpf.o name example | tee example.skel.h**
::
@@ -227,7 +267,7 @@ and global variables.
#endif /* __EXAMPLE_SKEL_H__ */
-**$ cat example_user.c**
+**$ cat example.c**
::
@@ -270,7 +310,7 @@ and global variables.
return err;
}
-**# ./example_user**
+**# ./example**
::
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 8ced1655fea6..b3073ae84018 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -146,6 +146,8 @@ VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \
/boot/vmlinux-$(shell uname -r)
VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
+bootstrap: $(BPFTOOL_BOOTSTRAP)
+
ifneq ($(VMLINUX_BTF)$(VMLINUX_H),)
ifeq ($(feature-clang-bpf-co-re),1)
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index fdffbc64c65c..d67518bcbd44 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -981,12 +981,25 @@ _bpftool()
;;
gen)
case $command in
- skeleton)
+ object)
_filedir
+ return 0
+ ;;
+ skeleton)
+ case $prev in
+ $command)
+ _filedir
+ return 0
+ ;;
+ *)
+ _bpftool_once_attr 'name'
+ return 0
+ ;;
+ esac
;;
*)
[[ $prev == $object ]] && \
- COMPREPLY=( $( compgen -W 'skeleton help' -- "$cur" ) )
+ COMPREPLY=( $( compgen -W 'object skeleton help' -- "$cur" ) )
;;
esac
;;
diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index fe9e7b3a4b50..62953bbf68b4 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -36,6 +36,7 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = {
[BTF_KIND_FUNC_PROTO] = "FUNC_PROTO",
[BTF_KIND_VAR] = "VAR",
[BTF_KIND_DATASEC] = "DATASEC",
+ [BTF_KIND_FLOAT] = "FLOAT",
};
struct btf_attach_table {
@@ -327,6 +328,13 @@ static int dump_btf_type(const struct btf *btf, __u32 id,
jsonw_end_array(w);
break;
}
+ case BTF_KIND_FLOAT: {
+ if (json_output)
+ jsonw_uint_field(w, "size", t->size);
+ else
+ printf(" size=%u", t->size);
+ break;
+ }
default:
break;
}
@@ -538,6 +546,7 @@ static int do_dump(int argc, char **argv)
NEXT_ARG();
if (argc < 1) {
p_err("expecting value for 'format' option\n");
+ err = -EINVAL;
goto done;
}
if (strcmp(*argv, "c") == 0) {
@@ -547,11 +556,13 @@ static int do_dump(int argc, char **argv)
} else {
p_err("unrecognized format specifier: '%s', possible values: raw, c",
*argv);
+ err = -EINVAL;
goto done;
}
NEXT_ARG();
} else {
p_err("unrecognized option: '%s'", *argv);
+ err = -EINVAL;
goto done;
}
}
diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c
index 0e9310727281..7ca54d046362 100644
--- a/tools/bpf/bpftool/btf_dumper.c
+++ b/tools/bpf/bpftool/btf_dumper.c
@@ -596,6 +596,7 @@ static int __btf_dumper_type_only(const struct btf *btf, __u32 type_id,
switch (BTF_INFO_KIND(t->info)) {
case BTF_KIND_INT:
case BTF_KIND_TYPEDEF:
+ case BTF_KIND_FLOAT:
BTF_PRINT_ARG("%s ", btf__name_by_offset(btf, t->name_off));
break;
case BTF_KIND_STRUCT:
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index 65303664417e..1828bba19020 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -57,6 +57,7 @@ const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = {
[BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser",
[BPF_SK_SKB_STREAM_VERDICT] = "sk_skb_stream_verdict",
+ [BPF_SK_SKB_VERDICT] = "sk_skb_verdict",
[BPF_SK_MSG_VERDICT] = "sk_msg_verdict",
[BPF_LIRC_MODE2] = "lirc_mode2",
[BPF_FLOW_DISSECTOR] = "flow_dissector",
diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
index 359960a8f1de..40a88df275f9 100644
--- a/tools/bpf/bpftool/feature.c
+++ b/tools/bpf/bpftool/feature.c
@@ -336,6 +336,10 @@ static void probe_kernel_image_config(const char *define_prefix)
{ "CONFIG_BPF_JIT", },
/* Avoid compiling eBPF interpreter (use JIT only) */
{ "CONFIG_BPF_JIT_ALWAYS_ON", },
+ /* Kernel BTF debug information available */
+ { "CONFIG_DEBUG_INFO_BTF", },
+ /* Kernel module BTF debug information available */
+ { "CONFIG_DEBUG_INFO_BTF_MODULES", },
/* cgroups */
{ "CONFIG_CGROUPS", },
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index 4033c46d83e7..31ade77f5ef8 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -273,7 +273,7 @@ static int do_skeleton(int argc, char **argv)
char header_guard[MAX_OBJ_NAME_LEN + sizeof("__SKEL_H__")];
size_t i, map_cnt = 0, prog_cnt = 0, file_sz, mmap_sz;
DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts);
- char obj_name[MAX_OBJ_NAME_LEN], *obj_data;
+ char obj_name[MAX_OBJ_NAME_LEN] = "", *obj_data;
struct bpf_object *obj = NULL;
const char *file, *ident;
struct bpf_program *prog;
@@ -288,6 +288,28 @@ static int do_skeleton(int argc, char **argv)
}
file = GET_ARG();
+ while (argc) {
+ if (!REQ_ARGS(2))
+ return -1;
+
+ if (is_prefix(*argv, "name")) {
+ NEXT_ARG();
+
+ if (obj_name[0] != '\0') {
+ p_err("object name already specified");
+ return -1;
+ }
+
+ strncpy(obj_name, *argv, MAX_OBJ_NAME_LEN - 1);
+ obj_name[MAX_OBJ_NAME_LEN - 1] = '\0';
+ } else {
+ p_err("unknown arg %s", *argv);
+ return -1;
+ }
+
+ NEXT_ARG();
+ }
+
if (argc) {
p_err("extra unknown arguments");
return -1;
@@ -310,7 +332,8 @@ static int do_skeleton(int argc, char **argv)
p_err("failed to mmap() %s: %s", file, strerror(errno));
goto out;
}
- get_obj_name(obj_name, file);
+ if (obj_name[0] == '\0')
+ get_obj_name(obj_name, file);
opts.object_name = obj_name;
obj = bpf_object__open_mem(obj_data, file_sz, &opts);
if (IS_ERR(obj)) {
@@ -591,6 +614,47 @@ out:
return err;
}
+static int do_object(int argc, char **argv)
+{
+ struct bpf_linker *linker;
+ const char *output_file, *file;
+ int err = 0;
+
+ if (!REQ_ARGS(2)) {
+ usage();
+ return -1;
+ }
+
+ output_file = GET_ARG();
+
+ linker = bpf_linker__new(output_file, NULL);
+ if (!linker) {
+ p_err("failed to create BPF linker instance");
+ return -1;
+ }
+
+ while (argc) {
+ file = GET_ARG();
+
+ err = bpf_linker__add_file(linker, file);
+ if (err) {
+ p_err("failed to link '%s': %s (%d)", file, strerror(err), err);
+ goto out;
+ }
+ }
+
+ err = bpf_linker__finalize(linker);
+ if (err) {
+ p_err("failed to finalize ELF file: %s (%d)", strerror(err), err);
+ goto out;
+ }
+
+ err = 0;
+out:
+ bpf_linker__free(linker);
+ return err;
+}
+
static int do_help(int argc, char **argv)
{
if (json_output) {
@@ -599,7 +663,8 @@ static int do_help(int argc, char **argv)
}
fprintf(stderr,
- "Usage: %1$s %2$s skeleton FILE\n"
+ "Usage: %1$s %2$s object OUTPUT_FILE INPUT_FILE [INPUT_FILE...]\n"
+ " %1$s %2$s skeleton FILE [name OBJECT_NAME]\n"
" %1$s %2$s help\n"
"\n"
" " HELP_SPEC_OPTIONS "\n"
@@ -610,6 +675,7 @@ static int do_help(int argc, char **argv)
}
static const struct cmd cmds[] = {
+ { "object", do_object },
{ "skeleton", do_skeleton },
{ "help", do_help },
{ 0 }
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index b86f450e6fce..d9afb730136a 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -276,7 +276,7 @@ static int do_batch(int argc, char **argv)
int n_argc;
FILE *fp;
char *cp;
- int err;
+ int err = 0;
int i;
if (argc < 2) {
@@ -370,7 +370,6 @@ static int do_batch(int argc, char **argv)
} else {
if (!json_output)
printf("processed %d commands\n", lines);
- err = 0;
}
err_close:
if (fp != stdin)
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index b400364ee054..09ae0381205b 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -100,7 +100,7 @@ static int do_dump_btf(const struct btf_dumper *d,
void *value)
{
__u32 value_id;
- int ret;
+ int ret = 0;
/* start of key-value pair */
jsonw_start_object(d->jw);
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index f2b915b20546..3f067d2d7584 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -76,6 +76,7 @@ enum dump_mode {
static const char * const attach_type_strings[] = {
[BPF_SK_SKB_STREAM_PARSER] = "stream_parser",
[BPF_SK_SKB_STREAM_VERDICT] = "stream_verdict",
+ [BPF_SK_SKB_VERDICT] = "skb_verdict",
[BPF_SK_MSG_VERDICT] = "msg_verdict",
[BPF_FLOW_DISSECTOR] = "flow_dissector",
[__MAX_BPF_ATTACH_TYPE] = NULL,
diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c
index 8608cd68cdd0..6fc3e6f7f40c 100644
--- a/tools/bpf/bpftool/xlated_dumper.c
+++ b/tools/bpf/bpftool/xlated_dumper.c
@@ -196,6 +196,9 @@ static const char *print_imm(void *private_data,
else if (insn->src_reg == BPF_PSEUDO_MAP_VALUE)
snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
"map[id:%u][0]+%u", insn->imm, (insn + 1)->imm);
+ else if (insn->src_reg == BPF_PSEUDO_FUNC)
+ snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+ "subprog[%+d]", insn->imm);
else
snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
"0x%llx", (unsigned long long)full_imm);
diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c
index 7409d7860aa6..7550fd9c3188 100644
--- a/tools/bpf/resolve_btfids/main.c
+++ b/tools/bpf/resolve_btfids/main.c
@@ -115,10 +115,10 @@ struct object {
static int verbose;
-int eprintf(int level, int var, const char *fmt, ...)
+static int eprintf(int level, int var, const char *fmt, ...)
{
va_list args;
- int ret;
+ int ret = 0;
if (var >= level) {
va_start(args, fmt);
@@ -260,6 +260,11 @@ static struct btf_id *add_symbol(struct rb_root *root, char *name, size_t size)
return btf_id__add(root, id, false);
}
+/* Older libelf.h and glibc elf.h might not yet define the ELF compression types. */
+#ifndef SHF_COMPRESSED
+#define SHF_COMPRESSED (1 << 11) /* Section with compressed data. */
+#endif
+
/*
* The data of compressed section should be aligned to 4
* (for 32bit) or 8 (for 64 bit) bytes. The binutils ld
@@ -380,7 +385,7 @@ static int elf_collect(struct object *obj)
static int symbols_collect(struct object *obj)
{
Elf_Scn *scn = NULL;
- int n, i, err = 0;
+ int n, i;
GElf_Shdr sh;
char *name;
@@ -397,11 +402,10 @@ static int symbols_collect(struct object *obj)
* Scan symbols and look for the ones starting with
* __BTF_ID__* over .BTF_ids section.
*/
- for (i = 0; !err && i < n; i++) {
- char *tmp, *prefix;
+ for (i = 0; i < n; i++) {
+ char *prefix;
struct btf_id *id;
GElf_Sym sym;
- int err = -1;
if (!gelf_getsym(obj->efile.symbols, i, &sym))
return -1;
diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile
index 9d9fb6209be1..3818ec511fd2 100644
--- a/tools/bpf/runqslower/Makefile
+++ b/tools/bpf/runqslower/Makefile
@@ -16,7 +16,10 @@ CFLAGS := -g -Wall
# Try to detect best kernel BTF source
KERNEL_REL := $(shell uname -r)
-VMLINUX_BTF_PATHS := /sys/kernel/btf/vmlinux /boot/vmlinux-$(KERNEL_REL)
+VMLINUX_BTF_PATHS := $(if $(O),$(O)/vmlinux) \
+ $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \
+ ../../../vmlinux /sys/kernel/btf/vmlinux \
+ /boot/vmlinux-$(KERNEL_REL)
VMLINUX_BTF_PATH := $(or $(VMLINUX_BTF),$(firstword \
$(wildcard $(VMLINUX_BTF_PATHS))))
@@ -66,12 +69,16 @@ $(OUTPUT) $(BPFOBJ_OUTPUT) $(BPFTOOL_OUTPUT):
$(QUIET_MKDIR)mkdir -p $@
$(OUTPUT)/vmlinux.h: $(VMLINUX_BTF_PATH) | $(OUTPUT) $(BPFTOOL)
+ifeq ($(VMLINUX_H),)
$(Q)if [ ! -e "$(VMLINUX_BTF_PATH)" ] ; then \
echo "Couldn't find kernel BTF; set VMLINUX_BTF to" \
"specify its location." >&2; \
exit 1;\
fi
$(QUIET_GEN)$(BPFTOOL) btf dump file $(VMLINUX_BTF_PATH) format c > $@
+else
+ $(Q)cp "$(VMLINUX_H)" $@
+endif
$(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(BPFOBJ_OUTPUT)
$(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(BPFOBJ_OUTPUT) $@
diff --git a/tools/bpf/runqslower/runqslower.bpf.c b/tools/bpf/runqslower/runqslower.bpf.c
index 1f18a409f044..645530ca7e98 100644
--- a/tools/bpf/runqslower/runqslower.bpf.c
+++ b/tools/bpf/runqslower/runqslower.bpf.c
@@ -11,9 +11,9 @@ const volatile __u64 min_us = 0;
const volatile pid_t targ_pid = 0;
struct {
- __uint(type, BPF_MAP_TYPE_HASH);
- __uint(max_entries, 10240);
- __type(key, u32);
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
__type(value, u64);
} start SEC(".maps");
@@ -25,15 +25,20 @@ struct {
/* record enqueue timestamp */
__always_inline
-static int trace_enqueue(u32 tgid, u32 pid)
+static int trace_enqueue(struct task_struct *t)
{
- u64 ts;
+ u32 pid = t->pid;
+ u64 *ptr;
if (!pid || (targ_pid && targ_pid != pid))
return 0;
- ts = bpf_ktime_get_ns();
- bpf_map_update_elem(&start, &pid, &ts, 0);
+ ptr = bpf_task_storage_get(&start, t, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!ptr)
+ return 0;
+
+ *ptr = bpf_ktime_get_ns();
return 0;
}
@@ -43,7 +48,7 @@ int handle__sched_wakeup(u64 *ctx)
/* TP_PROTO(struct task_struct *p) */
struct task_struct *p = (void *)ctx[0];
- return trace_enqueue(p->tgid, p->pid);
+ return trace_enqueue(p);
}
SEC("tp_btf/sched_wakeup_new")
@@ -52,7 +57,7 @@ int handle__sched_wakeup_new(u64 *ctx)
/* TP_PROTO(struct task_struct *p) */
struct task_struct *p = (void *)ctx[0];
- return trace_enqueue(p->tgid, p->pid);
+ return trace_enqueue(p);
}
SEC("tp_btf/sched_switch")
@@ -70,12 +75,16 @@ int handle__sched_switch(u64 *ctx)
/* ivcsw: treat like an enqueue event and store timestamp */
if (prev->state == TASK_RUNNING)
- trace_enqueue(prev->tgid, prev->pid);
+ trace_enqueue(prev);
pid = next->pid;
+ /* For pid mismatch, save a bpf_task_storage_get */
+ if (!pid || (targ_pid && targ_pid != pid))
+ return 0;
+
/* fetch timestamp and calculate delta */
- tsp = bpf_map_lookup_elem(&start, &pid);
+ tsp = bpf_task_storage_get(&start, next, 0, 0);
if (!tsp)
return 0; /* missed enqueue */
@@ -91,7 +100,7 @@ int handle__sched_switch(u64 *ctx)
bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU,
&event, sizeof(event));
- bpf_map_delete_elem(&start, &pid);
+ bpf_task_storage_delete(&start, next);
return 0;
}
diff --git a/tools/build/Makefile b/tools/build/Makefile
index bae48e6fa995..5ed41b96fcde 100644
--- a/tools/build/Makefile
+++ b/tools/build/Makefile
@@ -30,12 +30,18 @@ build := -f $(srctree)/tools/build/Makefile.build dir=. obj
all: $(OUTPUT)fixdep
+# Make sure there's anything to clean,
+# feature contains check for existing OUTPUT
+TMP_O := $(if $(OUTPUT),$(OUTPUT)/feature,./)
+
clean:
$(call QUIET_CLEAN, fixdep)
$(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
$(Q)rm -f $(OUTPUT)fixdep
$(call QUIET_CLEAN, feature-detect)
- $(Q)$(MAKE) -C feature/ clean >/dev/null
+ifneq ($(wildcard $(TMP_O)),)
+ $(Q)$(MAKE) -C feature OUTPUT=$(TMP_O) clean >/dev/null
+endif
$(OUTPUT)fixdep-in.o: FORCE
$(Q)$(MAKE) $(build)=fixdep
diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index 97cbfb31b762..74e255d58d8d 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -99,7 +99,9 @@ FEATURE_TESTS_EXTRA := \
clang \
libbpf \
libpfm4 \
- libdebuginfod
+ libdebuginfod \
+ clang-bpf-co-re
+
FEATURE_TESTS ?= $(FEATURE_TESTS_BASIC)
diff --git a/tools/build/feature/test-libopencsd.c b/tools/build/feature/test-libopencsd.c
index 1547bc2c0950..52c790b0317b 100644
--- a/tools/build/feature/test-libopencsd.c
+++ b/tools/build/feature/test-libopencsd.c
@@ -4,9 +4,9 @@
/*
* Check OpenCSD library version is sufficient to provide required features
*/
-#define OCSD_MIN_VER ((0 << 16) | (14 << 8) | (0))
+#define OCSD_MIN_VER ((1 << 16) | (0 << 8) | (0))
#if !defined(OCSD_VER_NUM) || (OCSD_VER_NUM < OCSD_MIN_VER)
-#error "OpenCSD >= 0.14.0 is required"
+#error "OpenCSD >= 1.0.0 is required"
#endif
int main(void)
diff --git a/tools/gpio/gpio-utils.c b/tools/gpio/gpio-utils.c
index 37187e056c8b..1639b4d832cd 100644
--- a/tools/gpio/gpio-utils.c
+++ b/tools/gpio/gpio-utils.c
@@ -32,74 +32,6 @@
* following api will request gpio lines, do the operation and then
* release these lines.
*/
-/**
- * gpiotools_request_linehandle() - request gpio lines in a gpiochip
- * @device_name: The name of gpiochip without prefix "/dev/",
- * such as "gpiochip0"
- * @lines: An array desired lines, specified by offset
- * index for the associated GPIO device.
- * @num_lines: The number of lines to request.
- * @flag: The new flag for requsted gpio. Reference
- * "linux/gpio.h" for the meaning of flag.
- * @data: Default value will be set to gpio when flag is
- * GPIOHANDLE_REQUEST_OUTPUT.
- * @consumer_label: The name of consumer, such as "sysfs",
- * "powerkey". This is useful for other users to
- * know who is using.
- *
- * Request gpio lines through the ioctl provided by chardev. User
- * could call gpiotools_set_values() and gpiotools_get_values() to
- * read and write respectively through the returned fd. Call
- * gpiotools_release_linehandle() to release these lines after that.
- *
- * Return: On success return the fd;
- * On failure return the errno.
- */
-int gpiotools_request_linehandle(const char *device_name, unsigned int *lines,
- unsigned int num_lines, unsigned int flag,
- struct gpiohandle_data *data,
- const char *consumer_label)
-{
- struct gpiohandle_request req;
- char *chrdev_name;
- int fd;
- int i;
- int ret;
-
- ret = asprintf(&chrdev_name, "/dev/%s", device_name);
- if (ret < 0)
- return -ENOMEM;
-
- fd = open(chrdev_name, 0);
- if (fd == -1) {
- ret = -errno;
- fprintf(stderr, "Failed to open %s, %s\n",
- chrdev_name, strerror(errno));
- goto exit_free_name;
- }
-
- for (i = 0; i < num_lines; i++)
- req.lineoffsets[i] = lines[i];
-
- req.flags = flag;
- strcpy(req.consumer_label, consumer_label);
- req.lines = num_lines;
- if (flag & GPIOHANDLE_REQUEST_OUTPUT)
- memcpy(req.default_values, data, sizeof(req.default_values));
-
- ret = ioctl(fd, GPIO_GET_LINEHANDLE_IOCTL, &req);
- if (ret == -1) {
- ret = -errno;
- fprintf(stderr, "Failed to issue %s (%d), %s\n",
- "GPIO_GET_LINEHANDLE_IOCTL", ret, strerror(errno));
- }
-
- if (close(fd) == -1)
- perror("Failed to close GPIO character device file");
-exit_free_name:
- free(chrdev_name);
- return ret < 0 ? ret : req.fd;
-}
/**
* gpiotools_request_line() - request gpio lines in a gpiochip
@@ -216,27 +148,6 @@ int gpiotools_get_values(const int fd, struct gpio_v2_line_values *values)
}
/**
- * gpiotools_release_linehandle(): Release the line(s) of gpiochip
- * @fd: The fd returned by
- * gpiotools_request_linehandle().
- *
- * Return: On success return 0;
- * On failure return the errno.
- */
-int gpiotools_release_linehandle(const int fd)
-{
- int ret;
-
- ret = close(fd);
- if (ret == -1) {
- perror("Failed to close GPIO LINEHANDLE device file");
- ret = -errno;
- }
-
- return ret;
-}
-
-/**
* gpiotools_release_line(): Release the line(s) of gpiochip
* @fd: The fd returned by
* gpiotools_request_line().
diff --git a/tools/gpio/gpio-utils.h b/tools/gpio/gpio-utils.h
index 6c69a9f1c253..8af7c8ee19ce 100644
--- a/tools/gpio/gpio-utils.h
+++ b/tools/gpio/gpio-utils.h
@@ -24,12 +24,6 @@ static inline int check_prefix(const char *str, const char *prefix)
strncmp(str, prefix, strlen(prefix)) == 0;
}
-int gpiotools_request_linehandle(const char *device_name, unsigned int *lines,
- unsigned int num_lines, unsigned int flag,
- struct gpiohandle_data *data,
- const char *consumer_label);
-int gpiotools_release_linehandle(const int fd);
-
int gpiotools_request_line(const char *device_name,
unsigned int *lines,
unsigned int num_lines,
diff --git a/tools/include/linux/coresight-pmu.h b/tools/include/linux/coresight-pmu.h
index b0e35eec6499..4ac5c081af93 100644
--- a/tools/include/linux/coresight-pmu.h
+++ b/tools/include/linux/coresight-pmu.h
@@ -10,17 +10,27 @@
#define CORESIGHT_ETM_PMU_NAME "cs_etm"
#define CORESIGHT_ETM_PMU_SEED 0x10
-/* ETMv3.5/PTM's ETMCR config bit */
-#define ETM_OPT_CYCACC 12
-#define ETM_OPT_CTXTID 14
-#define ETM_OPT_TS 28
-#define ETM_OPT_RETSTK 29
+/*
+ * Below are the definition of bit offsets for perf option, and works as
+ * arbitrary values for all ETM versions.
+ *
+ * Most of them are orignally from ETMv3.5/PTM's ETMCR config, therefore,
+ * ETMv3.5/PTM doesn't define ETMCR config bits with prefix "ETM3_" and
+ * directly use below macros as config bits.
+ */
+#define ETM_OPT_CYCACC 12
+#define ETM_OPT_CTXTID 14
+#define ETM_OPT_CTXTID2 15
+#define ETM_OPT_TS 28
+#define ETM_OPT_RETSTK 29
/* ETMv4 CONFIGR programming bits for the ETM OPTs */
#define ETM4_CFG_BIT_CYCACC 4
#define ETM4_CFG_BIT_CTXTID 6
+#define ETM4_CFG_BIT_VMID 7
#define ETM4_CFG_BIT_TS 11
#define ETM4_CFG_BIT_RETSTK 12
+#define ETM4_CFG_BIT_VMID_OPT 15
static inline int coresight_get_trace_id(int cpu)
{
diff --git a/tools/include/linux/export.h b/tools/include/linux/export.h
index d07e586b9ba0..acb6f4daa2f0 100644
--- a/tools/include/linux/export.h
+++ b/tools/include/linux/export.h
@@ -3,8 +3,5 @@
#define EXPORT_SYMBOL(sym)
#define EXPORT_SYMBOL_GPL(sym)
-#define EXPORT_SYMBOL_GPL_FUTURE(sym)
-#define EXPORT_UNUSED_SYMBOL(sym)
-#define EXPORT_UNUSED_SYMBOL_GPL(sym)
#endif
diff --git a/tools/include/linux/objtool.h b/tools/include/linux/objtool.h
index 577f51436cf9..7e72d975cb76 100644
--- a/tools/include/linux/objtool.h
+++ b/tools/include/linux/objtool.h
@@ -29,11 +29,14 @@ struct unwind_hint {
*
* UNWIND_HINT_TYPE_REGS_PARTIAL: Used in entry code to indicate that
* sp_reg+sp_offset points to the iret return frame.
+ *
+ * UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function.
+ * Useful for code which doesn't have an ELF function annotation.
*/
#define UNWIND_HINT_TYPE_CALL 0
#define UNWIND_HINT_TYPE_REGS 1
#define UNWIND_HINT_TYPE_REGS_PARTIAL 2
-#define UNWIND_HINT_TYPE_RET_OFFSET 3
+#define UNWIND_HINT_TYPE_FUNC 3
#ifdef CONFIG_STACK_VALIDATION
@@ -109,6 +112,12 @@ struct unwind_hint {
.popsection
.endm
+.macro STACK_FRAME_NON_STANDARD func:req
+ .pushsection .discard.func_stack_frame_non_standard, "aw"
+ .long \func - .
+ .popsection
+.endm
+
#endif /* __ASSEMBLY__ */
#else /* !CONFIG_STACK_VALIDATION */
@@ -122,6 +131,8 @@ struct unwind_hint {
#define ANNOTATE_INTRA_FUNCTION_CALL
.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0
.endm
+.macro STACK_FRAME_NON_STANDARD func:req
+.endm
#endif
#endif /* CONFIG_STACK_VALIDATION */
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index 728752917785..ce58cff99b66 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -861,9 +861,11 @@ __SYSCALL(__NR_faccessat2, sys_faccessat2)
__SYSCALL(__NR_process_madvise, sys_process_madvise)
#define __NR_epoll_pwait2 441
__SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2)
+#define __NR_mount_setattr 442
+__SYSCALL(__NR_mount_setattr, sys_mount_setattr)
#undef __NR_syscalls
-#define __NR_syscalls 442
+#define __NR_syscalls 443
/*
* 32 bit systems traditionally used different
diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h
index 808b48a93330..0827037c5484 100644
--- a/tools/include/uapi/drm/drm.h
+++ b/tools/include/uapi/drm/drm.h
@@ -1,11 +1,10 @@
-/**
- * \file drm.h
+/*
* Header for the Direct Rendering Manager
*
- * \author Rickard E. (Rik) Faith <faith@valinux.com>
+ * Author: Rickard E. (Rik) Faith <faith@valinux.com>
*
- * \par Acknowledgments:
- * Dec 1999, Richard Henderson <rth@twiddle.net>, move to generic \c cmpxchg.
+ * Acknowledgments:
+ * Dec 1999, Richard Henderson <rth@twiddle.net>, move to generic cmpxchg.
*/
/*
@@ -85,7 +84,7 @@ typedef unsigned int drm_context_t;
typedef unsigned int drm_drawable_t;
typedef unsigned int drm_magic_t;
-/**
+/*
* Cliprect.
*
* \warning: If you change this structure, make sure you change
@@ -101,7 +100,7 @@ struct drm_clip_rect {
unsigned short y2;
};
-/**
+/*
* Drawable information.
*/
struct drm_drawable_info {
@@ -109,7 +108,7 @@ struct drm_drawable_info {
struct drm_clip_rect *rects;
};
-/**
+/*
* Texture region,
*/
struct drm_tex_region {
@@ -120,7 +119,7 @@ struct drm_tex_region {
unsigned int age;
};
-/**
+/*
* Hardware lock.
*
* The lock structure is a simple cache-line aligned integer. To avoid
@@ -132,7 +131,7 @@ struct drm_hw_lock {
char padding[60]; /**< Pad to cache line */
};
-/**
+/*
* DRM_IOCTL_VERSION ioctl argument type.
*
* \sa drmGetVersion().
@@ -149,7 +148,7 @@ struct drm_version {
char __user *desc; /**< User-space buffer to hold desc */
};
-/**
+/*
* DRM_IOCTL_GET_UNIQUE ioctl argument type.
*
* \sa drmGetBusid() and drmSetBusId().
@@ -168,7 +167,7 @@ struct drm_block {
int unused;
};
-/**
+/*
* DRM_IOCTL_CONTROL ioctl argument type.
*
* \sa drmCtlInstHandler() and drmCtlUninstHandler().
@@ -183,7 +182,7 @@ struct drm_control {
int irq;
};
-/**
+/*
* Type of memory to map.
*/
enum drm_map_type {
@@ -195,7 +194,7 @@ enum drm_map_type {
_DRM_CONSISTENT = 5 /**< Consistent memory for PCI DMA */
};
-/**
+/*
* Memory mapping flags.
*/
enum drm_map_flags {
@@ -214,7 +213,7 @@ struct drm_ctx_priv_map {
void *handle; /**< Handle of map */
};
-/**
+/*
* DRM_IOCTL_GET_MAP, DRM_IOCTL_ADD_MAP and DRM_IOCTL_RM_MAP ioctls
* argument type.
*
@@ -231,7 +230,7 @@ struct drm_map {
/* Private data */
};
-/**
+/*
* DRM_IOCTL_GET_CLIENT ioctl argument type.
*/
struct drm_client {
@@ -263,7 +262,7 @@ enum drm_stat_type {
/* Add to the *END* of the list */
};
-/**
+/*
* DRM_IOCTL_GET_STATS ioctl argument type.
*/
struct drm_stats {
@@ -274,7 +273,7 @@ struct drm_stats {
} data[15];
};
-/**
+/*
* Hardware locking flags.
*/
enum drm_lock_flags {
@@ -289,7 +288,7 @@ enum drm_lock_flags {
_DRM_HALT_CUR_QUEUES = 0x20 /**< Halt all current queues */
};
-/**
+/*
* DRM_IOCTL_LOCK, DRM_IOCTL_UNLOCK and DRM_IOCTL_FINISH ioctl argument type.
*
* \sa drmGetLock() and drmUnlock().
@@ -299,7 +298,7 @@ struct drm_lock {
enum drm_lock_flags flags;
};
-/**
+/*
* DMA flags
*
* \warning
@@ -328,7 +327,7 @@ enum drm_dma_flags {
_DRM_DMA_LARGER_OK = 0x40 /**< Larger-than-requested buffers OK */
};
-/**
+/*
* DRM_IOCTL_ADD_BUFS and DRM_IOCTL_MARK_BUFS ioctl argument type.
*
* \sa drmAddBufs().
@@ -351,7 +350,7 @@ struct drm_buf_desc {
*/
};
-/**
+/*
* DRM_IOCTL_INFO_BUFS ioctl argument type.
*/
struct drm_buf_info {
@@ -359,7 +358,7 @@ struct drm_buf_info {
struct drm_buf_desc __user *list;
};
-/**
+/*
* DRM_IOCTL_FREE_BUFS ioctl argument type.
*/
struct drm_buf_free {
@@ -367,7 +366,7 @@ struct drm_buf_free {
int __user *list;
};
-/**
+/*
* Buffer information
*
* \sa drm_buf_map.
@@ -379,7 +378,7 @@ struct drm_buf_pub {
void __user *address; /**< Address of buffer */
};
-/**
+/*
* DRM_IOCTL_MAP_BUFS ioctl argument type.
*/
struct drm_buf_map {
@@ -392,7 +391,7 @@ struct drm_buf_map {
struct drm_buf_pub __user *list; /**< Buffer information */
};
-/**
+/*
* DRM_IOCTL_DMA ioctl argument type.
*
* Indices here refer to the offset into the buffer list in drm_buf_get.
@@ -417,7 +416,7 @@ enum drm_ctx_flags {
_DRM_CONTEXT_2DONLY = 0x02
};
-/**
+/*
* DRM_IOCTL_ADD_CTX ioctl argument type.
*
* \sa drmCreateContext() and drmDestroyContext().
@@ -427,7 +426,7 @@ struct drm_ctx {
enum drm_ctx_flags flags;
};
-/**
+/*
* DRM_IOCTL_RES_CTX ioctl argument type.
*/
struct drm_ctx_res {
@@ -435,14 +434,14 @@ struct drm_ctx_res {
struct drm_ctx __user *contexts;
};
-/**
+/*
* DRM_IOCTL_ADD_DRAW and DRM_IOCTL_RM_DRAW ioctl argument type.
*/
struct drm_draw {
drm_drawable_t handle;
};
-/**
+/*
* DRM_IOCTL_UPDATE_DRAW ioctl argument type.
*/
typedef enum {
@@ -456,14 +455,14 @@ struct drm_update_draw {
unsigned long long data;
};
-/**
+/*
* DRM_IOCTL_GET_MAGIC and DRM_IOCTL_AUTH_MAGIC ioctl argument type.
*/
struct drm_auth {
drm_magic_t magic;
};
-/**
+/*
* DRM_IOCTL_IRQ_BUSID ioctl argument type.
*
* \sa drmGetInterruptFromBusID().
@@ -505,7 +504,7 @@ struct drm_wait_vblank_reply {
long tval_usec;
};
-/**
+/*
* DRM_IOCTL_WAIT_VBLANK ioctl argument type.
*
* \sa drmWaitVBlank().
@@ -518,7 +517,7 @@ union drm_wait_vblank {
#define _DRM_PRE_MODESET 1
#define _DRM_POST_MODESET 2
-/**
+/*
* DRM_IOCTL_MODESET_CTL ioctl argument type
*
* \sa drmModesetCtl().
@@ -528,7 +527,7 @@ struct drm_modeset_ctl {
__u32 cmd;
};
-/**
+/*
* DRM_IOCTL_AGP_ENABLE ioctl argument type.
*
* \sa drmAgpEnable().
@@ -537,7 +536,7 @@ struct drm_agp_mode {
unsigned long mode; /**< AGP mode */
};
-/**
+/*
* DRM_IOCTL_AGP_ALLOC and DRM_IOCTL_AGP_FREE ioctls argument type.
*
* \sa drmAgpAlloc() and drmAgpFree().
@@ -549,7 +548,7 @@ struct drm_agp_buffer {
unsigned long physical; /**< Physical used by i810 */
};
-/**
+/*
* DRM_IOCTL_AGP_BIND and DRM_IOCTL_AGP_UNBIND ioctls argument type.
*
* \sa drmAgpBind() and drmAgpUnbind().
@@ -559,7 +558,7 @@ struct drm_agp_binding {
unsigned long offset; /**< In bytes -- will round to page boundary */
};
-/**
+/*
* DRM_IOCTL_AGP_INFO ioctl argument type.
*
* \sa drmAgpVersionMajor(), drmAgpVersionMinor(), drmAgpGetMode(),
@@ -580,7 +579,7 @@ struct drm_agp_info {
unsigned short id_device;
};
-/**
+/*
* DRM_IOCTL_SG_ALLOC ioctl argument type.
*/
struct drm_scatter_gather {
@@ -588,7 +587,7 @@ struct drm_scatter_gather {
unsigned long handle; /**< Used for mapping / unmapping */
};
-/**
+/*
* DRM_IOCTL_SET_VERSION ioctl argument type.
*/
struct drm_set_version {
@@ -598,14 +597,14 @@ struct drm_set_version {
int drm_dd_minor;
};
-/** DRM_IOCTL_GEM_CLOSE ioctl argument type */
+/* DRM_IOCTL_GEM_CLOSE ioctl argument type */
struct drm_gem_close {
/** Handle of the object to be closed. */
__u32 handle;
__u32 pad;
};
-/** DRM_IOCTL_GEM_FLINK ioctl argument type */
+/* DRM_IOCTL_GEM_FLINK ioctl argument type */
struct drm_gem_flink {
/** Handle for the object being named */
__u32 handle;
@@ -614,7 +613,7 @@ struct drm_gem_flink {
__u32 name;
};
-/** DRM_IOCTL_GEM_OPEN ioctl argument type */
+/* DRM_IOCTL_GEM_OPEN ioctl argument type */
struct drm_gem_open {
/** Name of object being opened */
__u32 name;
@@ -652,7 +651,7 @@ struct drm_gem_open {
#define DRM_CAP_SYNCOBJ 0x13
#define DRM_CAP_SYNCOBJ_TIMELINE 0x14
-/** DRM_IOCTL_GET_CAP ioctl argument type */
+/* DRM_IOCTL_GET_CAP ioctl argument type */
struct drm_get_cap {
__u64 capability;
__u64 value;
@@ -678,7 +677,9 @@ struct drm_get_cap {
/**
* DRM_CLIENT_CAP_ATOMIC
*
- * If set to 1, the DRM core will expose atomic properties to userspace
+ * If set to 1, the DRM core will expose atomic properties to userspace. This
+ * implicitly enables &DRM_CLIENT_CAP_UNIVERSAL_PLANES and
+ * &DRM_CLIENT_CAP_ASPECT_RATIO.
*/
#define DRM_CLIENT_CAP_ATOMIC 3
@@ -698,7 +699,7 @@ struct drm_get_cap {
*/
#define DRM_CLIENT_CAP_WRITEBACK_CONNECTORS 5
-/** DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */
+/* DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */
struct drm_set_client_cap {
__u64 capability;
__u64 value;
@@ -950,7 +951,7 @@ extern "C" {
#define DRM_IOCTL_MODE_GETFB2 DRM_IOWR(0xCE, struct drm_mode_fb_cmd2)
-/**
+/*
* Device specific ioctls should only be in their respective headers
* The device specific ioctl range is from 0x40 to 0x9f.
* Generic IOCTLS restart at 0xA0.
@@ -961,7 +962,7 @@ extern "C" {
#define DRM_COMMAND_BASE 0x40
#define DRM_COMMAND_END 0xA0
-/**
+/*
* Header for events written back to userspace on the drm fd. The
* type defines the type of event, the length specifies the total
* length of the event (including the header), and user_data is
diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
index fa1f3d62f9a6..1987e2ea79a3 100644
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -177,8 +177,9 @@ enum drm_i915_pmu_engine_sample {
#define I915_PMU_REQUESTED_FREQUENCY __I915_PMU_OTHER(1)
#define I915_PMU_INTERRUPTS __I915_PMU_OTHER(2)
#define I915_PMU_RC6_RESIDENCY __I915_PMU_OTHER(3)
+#define I915_PMU_SOFTWARE_GT_AWAKE_TIME __I915_PMU_OTHER(4)
-#define I915_PMU_LAST I915_PMU_RC6_RESIDENCY
+#define I915_PMU_LAST /* Deprecated - do not use */ I915_PMU_RC6_RESIDENCY
/* Each region is a minimum of 16k, and there are at most 255 of them.
*/
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 4c24daa43bac..69902603012c 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -93,7 +93,717 @@ union bpf_iter_link_info {
} map;
};
-/* BPF syscall commands, see bpf(2) man-page for details. */
+/* BPF syscall commands, see bpf(2) man-page for more details. */
+/**
+ * DOC: eBPF Syscall Preamble
+ *
+ * The operation to be performed by the **bpf**\ () system call is determined
+ * by the *cmd* argument. Each operation takes an accompanying argument,
+ * provided via *attr*, which is a pointer to a union of type *bpf_attr* (see
+ * below). The size argument is the size of the union pointed to by *attr*.
+ */
+/**
+ * DOC: eBPF Syscall Commands
+ *
+ * BPF_MAP_CREATE
+ * Description
+ * Create a map and return a file descriptor that refers to the
+ * map. The close-on-exec file descriptor flag (see **fcntl**\ (2))
+ * is automatically enabled for the new file descriptor.
+ *
+ * Applying **close**\ (2) to the file descriptor returned by
+ * **BPF_MAP_CREATE** will delete the map (but see NOTES).
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_MAP_LOOKUP_ELEM
+ * Description
+ * Look up an element with a given *key* in the map referred to
+ * by the file descriptor *map_fd*.
+ *
+ * The *flags* argument may be specified as one of the
+ * following:
+ *
+ * **BPF_F_LOCK**
+ * Look up the value of a spin-locked map without
+ * returning the lock. This must be specified if the
+ * elements contain a spinlock.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_MAP_UPDATE_ELEM
+ * Description
+ * Create or update an element (key/value pair) in a specified map.
+ *
+ * The *flags* argument should be specified as one of the
+ * following:
+ *
+ * **BPF_ANY**
+ * Create a new element or update an existing element.
+ * **BPF_NOEXIST**
+ * Create a new element only if it did not exist.
+ * **BPF_EXIST**
+ * Update an existing element.
+ * **BPF_F_LOCK**
+ * Update a spin_lock-ed map element.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * May set *errno* to **EINVAL**, **EPERM**, **ENOMEM**,
+ * **E2BIG**, **EEXIST**, or **ENOENT**.
+ *
+ * **E2BIG**
+ * The number of elements in the map reached the
+ * *max_entries* limit specified at map creation time.
+ * **EEXIST**
+ * If *flags* specifies **BPF_NOEXIST** and the element
+ * with *key* already exists in the map.
+ * **ENOENT**
+ * If *flags* specifies **BPF_EXIST** and the element with
+ * *key* does not exist in the map.
+ *
+ * BPF_MAP_DELETE_ELEM
+ * Description
+ * Look up and delete an element by key in a specified map.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_MAP_GET_NEXT_KEY
+ * Description
+ * Look up an element by key in a specified map and return the key
+ * of the next element. Can be used to iterate over all elements
+ * in the map.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * The following cases can be used to iterate over all elements of
+ * the map:
+ *
+ * * If *key* is not found, the operation returns zero and sets
+ * the *next_key* pointer to the key of the first element.
+ * * If *key* is found, the operation returns zero and sets the
+ * *next_key* pointer to the key of the next element.
+ * * If *key* is the last element, returns -1 and *errno* is set
+ * to **ENOENT**.
+ *
+ * May set *errno* to **ENOMEM**, **EFAULT**, **EPERM**, or
+ * **EINVAL** on error.
+ *
+ * BPF_PROG_LOAD
+ * Description
+ * Verify and load an eBPF program, returning a new file
+ * descriptor associated with the program.
+ *
+ * Applying **close**\ (2) to the file descriptor returned by
+ * **BPF_PROG_LOAD** will unload the eBPF program (but see NOTES).
+ *
+ * The close-on-exec file descriptor flag (see **fcntl**\ (2)) is
+ * automatically enabled for the new file descriptor.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_OBJ_PIN
+ * Description
+ * Pin an eBPF program or map referred by the specified *bpf_fd*
+ * to the provided *pathname* on the filesystem.
+ *
+ * The *pathname* argument must not contain a dot (".").
+ *
+ * On success, *pathname* retains a reference to the eBPF object,
+ * preventing deallocation of the object when the original
+ * *bpf_fd* is closed. This allow the eBPF object to live beyond
+ * **close**\ (\ *bpf_fd*\ ), and hence the lifetime of the parent
+ * process.
+ *
+ * Applying **unlink**\ (2) or similar calls to the *pathname*
+ * unpins the object from the filesystem, removing the reference.
+ * If no other file descriptors or filesystem nodes refer to the
+ * same object, it will be deallocated (see NOTES).
+ *
+ * The filesystem type for the parent directory of *pathname* must
+ * be **BPF_FS_MAGIC**.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_OBJ_GET
+ * Description
+ * Open a file descriptor for the eBPF object pinned to the
+ * specified *pathname*.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_PROG_ATTACH
+ * Description
+ * Attach an eBPF program to a *target_fd* at the specified
+ * *attach_type* hook.
+ *
+ * The *attach_type* specifies the eBPF attachment point to
+ * attach the program to, and must be one of *bpf_attach_type*
+ * (see below).
+ *
+ * The *attach_bpf_fd* must be a valid file descriptor for a
+ * loaded eBPF program of a cgroup, flow dissector, LIRC, sockmap
+ * or sock_ops type corresponding to the specified *attach_type*.
+ *
+ * The *target_fd* must be a valid file descriptor for a kernel
+ * object which depends on the attach type of *attach_bpf_fd*:
+ *
+ * **BPF_PROG_TYPE_CGROUP_DEVICE**,
+ * **BPF_PROG_TYPE_CGROUP_SKB**,
+ * **BPF_PROG_TYPE_CGROUP_SOCK**,
+ * **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**,
+ * **BPF_PROG_TYPE_CGROUP_SOCKOPT**,
+ * **BPF_PROG_TYPE_CGROUP_SYSCTL**,
+ * **BPF_PROG_TYPE_SOCK_OPS**
+ *
+ * Control Group v2 hierarchy with the eBPF controller
+ * enabled. Requires the kernel to be compiled with
+ * **CONFIG_CGROUP_BPF**.
+ *
+ * **BPF_PROG_TYPE_FLOW_DISSECTOR**
+ *
+ * Network namespace (eg /proc/self/ns/net).
+ *
+ * **BPF_PROG_TYPE_LIRC_MODE2**
+ *
+ * LIRC device path (eg /dev/lircN). Requires the kernel
+ * to be compiled with **CONFIG_BPF_LIRC_MODE2**.
+ *
+ * **BPF_PROG_TYPE_SK_SKB**,
+ * **BPF_PROG_TYPE_SK_MSG**
+ *
+ * eBPF map of socket type (eg **BPF_MAP_TYPE_SOCKHASH**).
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_PROG_DETACH
+ * Description
+ * Detach the eBPF program associated with the *target_fd* at the
+ * hook specified by *attach_type*. The program must have been
+ * previously attached using **BPF_PROG_ATTACH**.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_PROG_TEST_RUN
+ * Description
+ * Run the eBPF program associated with the *prog_fd* a *repeat*
+ * number of times against a provided program context *ctx_in* and
+ * data *data_in*, and return the modified program context
+ * *ctx_out*, *data_out* (for example, packet data), result of the
+ * execution *retval*, and *duration* of the test run.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * **ENOSPC**
+ * Either *data_size_out* or *ctx_size_out* is too small.
+ * **ENOTSUPP**
+ * This command is not supported by the program type of
+ * the program referred to by *prog_fd*.
+ *
+ * BPF_PROG_GET_NEXT_ID
+ * Description
+ * Fetch the next eBPF program currently loaded into the kernel.
+ *
+ * Looks for the eBPF program with an id greater than *start_id*
+ * and updates *next_id* on success. If no other eBPF programs
+ * remain with ids higher than *start_id*, returns -1 and sets
+ * *errno* to **ENOENT**.
+ *
+ * Return
+ * Returns zero on success. On error, or when no id remains, -1
+ * is returned and *errno* is set appropriately.
+ *
+ * BPF_MAP_GET_NEXT_ID
+ * Description
+ * Fetch the next eBPF map currently loaded into the kernel.
+ *
+ * Looks for the eBPF map with an id greater than *start_id*
+ * and updates *next_id* on success. If no other eBPF maps
+ * remain with ids higher than *start_id*, returns -1 and sets
+ * *errno* to **ENOENT**.
+ *
+ * Return
+ * Returns zero on success. On error, or when no id remains, -1
+ * is returned and *errno* is set appropriately.
+ *
+ * BPF_PROG_GET_FD_BY_ID
+ * Description
+ * Open a file descriptor for the eBPF program corresponding to
+ * *prog_id*.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_MAP_GET_FD_BY_ID
+ * Description
+ * Open a file descriptor for the eBPF map corresponding to
+ * *map_id*.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_OBJ_GET_INFO_BY_FD
+ * Description
+ * Obtain information about the eBPF object corresponding to
+ * *bpf_fd*.
+ *
+ * Populates up to *info_len* bytes of *info*, which will be in
+ * one of the following formats depending on the eBPF object type
+ * of *bpf_fd*:
+ *
+ * * **struct bpf_prog_info**
+ * * **struct bpf_map_info**
+ * * **struct bpf_btf_info**
+ * * **struct bpf_link_info**
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_PROG_QUERY
+ * Description
+ * Obtain information about eBPF programs associated with the
+ * specified *attach_type* hook.
+ *
+ * The *target_fd* must be a valid file descriptor for a kernel
+ * object which depends on the attach type of *attach_bpf_fd*:
+ *
+ * **BPF_PROG_TYPE_CGROUP_DEVICE**,
+ * **BPF_PROG_TYPE_CGROUP_SKB**,
+ * **BPF_PROG_TYPE_CGROUP_SOCK**,
+ * **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**,
+ * **BPF_PROG_TYPE_CGROUP_SOCKOPT**,
+ * **BPF_PROG_TYPE_CGROUP_SYSCTL**,
+ * **BPF_PROG_TYPE_SOCK_OPS**
+ *
+ * Control Group v2 hierarchy with the eBPF controller
+ * enabled. Requires the kernel to be compiled with
+ * **CONFIG_CGROUP_BPF**.
+ *
+ * **BPF_PROG_TYPE_FLOW_DISSECTOR**
+ *
+ * Network namespace (eg /proc/self/ns/net).
+ *
+ * **BPF_PROG_TYPE_LIRC_MODE2**
+ *
+ * LIRC device path (eg /dev/lircN). Requires the kernel
+ * to be compiled with **CONFIG_BPF_LIRC_MODE2**.
+ *
+ * **BPF_PROG_QUERY** always fetches the number of programs
+ * attached and the *attach_flags* which were used to attach those
+ * programs. Additionally, if *prog_ids* is nonzero and the number
+ * of attached programs is less than *prog_cnt*, populates
+ * *prog_ids* with the eBPF program ids of the programs attached
+ * at *target_fd*.
+ *
+ * The following flags may alter the result:
+ *
+ * **BPF_F_QUERY_EFFECTIVE**
+ * Only return information regarding programs which are
+ * currently effective at the specified *target_fd*.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_RAW_TRACEPOINT_OPEN
+ * Description
+ * Attach an eBPF program to a tracepoint *name* to access kernel
+ * internal arguments of the tracepoint in their raw form.
+ *
+ * The *prog_fd* must be a valid file descriptor associated with
+ * a loaded eBPF program of type **BPF_PROG_TYPE_RAW_TRACEPOINT**.
+ *
+ * No ABI guarantees are made about the content of tracepoint
+ * arguments exposed to the corresponding eBPF program.
+ *
+ * Applying **close**\ (2) to the file descriptor returned by
+ * **BPF_RAW_TRACEPOINT_OPEN** will delete the map (but see NOTES).
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_BTF_LOAD
+ * Description
+ * Verify and load BPF Type Format (BTF) metadata into the kernel,
+ * returning a new file descriptor associated with the metadata.
+ * BTF is described in more detail at
+ * https://www.kernel.org/doc/html/latest/bpf/btf.html.
+ *
+ * The *btf* parameter must point to valid memory providing
+ * *btf_size* bytes of BTF binary metadata.
+ *
+ * The returned file descriptor can be passed to other **bpf**\ ()
+ * subcommands such as **BPF_PROG_LOAD** or **BPF_MAP_CREATE** to
+ * associate the BTF with those objects.
+ *
+ * Similar to **BPF_PROG_LOAD**, **BPF_BTF_LOAD** has optional
+ * parameters to specify a *btf_log_buf*, *btf_log_size* and
+ * *btf_log_level* which allow the kernel to return freeform log
+ * output regarding the BTF verification process.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_BTF_GET_FD_BY_ID
+ * Description
+ * Open a file descriptor for the BPF Type Format (BTF)
+ * corresponding to *btf_id*.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_TASK_FD_QUERY
+ * Description
+ * Obtain information about eBPF programs associated with the
+ * target process identified by *pid* and *fd*.
+ *
+ * If the *pid* and *fd* are associated with a tracepoint, kprobe
+ * or uprobe perf event, then the *prog_id* and *fd_type* will
+ * be populated with the eBPF program id and file descriptor type
+ * of type **bpf_task_fd_type**. If associated with a kprobe or
+ * uprobe, the *probe_offset* and *probe_addr* will also be
+ * populated. Optionally, if *buf* is provided, then up to
+ * *buf_len* bytes of *buf* will be populated with the name of
+ * the tracepoint, kprobe or uprobe.
+ *
+ * The resulting *prog_id* may be introspected in deeper detail
+ * using **BPF_PROG_GET_FD_BY_ID** and **BPF_OBJ_GET_INFO_BY_FD**.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_MAP_LOOKUP_AND_DELETE_ELEM
+ * Description
+ * Look up an element with the given *key* in the map referred to
+ * by the file descriptor *fd*, and if found, delete the element.
+ *
+ * The **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map types
+ * implement this command as a "pop" operation, deleting the top
+ * element rather than one corresponding to *key*.
+ * The *key* and *key_len* parameters should be zeroed when
+ * issuing this operation for these map types.
+ *
+ * This command is only valid for the following map types:
+ * * **BPF_MAP_TYPE_QUEUE**
+ * * **BPF_MAP_TYPE_STACK**
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_MAP_FREEZE
+ * Description
+ * Freeze the permissions of the specified map.
+ *
+ * Write permissions may be frozen by passing zero *flags*.
+ * Upon success, no future syscall invocations may alter the
+ * map state of *map_fd*. Write operations from eBPF programs
+ * are still possible for a frozen map.
+ *
+ * Not supported for maps of type **BPF_MAP_TYPE_STRUCT_OPS**.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_BTF_GET_NEXT_ID
+ * Description
+ * Fetch the next BPF Type Format (BTF) object currently loaded
+ * into the kernel.
+ *
+ * Looks for the BTF object with an id greater than *start_id*
+ * and updates *next_id* on success. If no other BTF objects
+ * remain with ids higher than *start_id*, returns -1 and sets
+ * *errno* to **ENOENT**.
+ *
+ * Return
+ * Returns zero on success. On error, or when no id remains, -1
+ * is returned and *errno* is set appropriately.
+ *
+ * BPF_MAP_LOOKUP_BATCH
+ * Description
+ * Iterate and fetch multiple elements in a map.
+ *
+ * Two opaque values are used to manage batch operations,
+ * *in_batch* and *out_batch*. Initially, *in_batch* must be set
+ * to NULL to begin the batched operation. After each subsequent
+ * **BPF_MAP_LOOKUP_BATCH**, the caller should pass the resultant
+ * *out_batch* as the *in_batch* for the next operation to
+ * continue iteration from the current point.
+ *
+ * The *keys* and *values* are output parameters which must point
+ * to memory large enough to hold *count* items based on the key
+ * and value size of the map *map_fd*. The *keys* buffer must be
+ * of *key_size* * *count*. The *values* buffer must be of
+ * *value_size* * *count*.
+ *
+ * The *elem_flags* argument may be specified as one of the
+ * following:
+ *
+ * **BPF_F_LOCK**
+ * Look up the value of a spin-locked map without
+ * returning the lock. This must be specified if the
+ * elements contain a spinlock.
+ *
+ * On success, *count* elements from the map are copied into the
+ * user buffer, with the keys copied into *keys* and the values
+ * copied into the corresponding indices in *values*.
+ *
+ * If an error is returned and *errno* is not **EFAULT**, *count*
+ * is set to the number of successfully processed elements.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * May set *errno* to **ENOSPC** to indicate that *keys* or
+ * *values* is too small to dump an entire bucket during
+ * iteration of a hash-based map type.
+ *
+ * BPF_MAP_LOOKUP_AND_DELETE_BATCH
+ * Description
+ * Iterate and delete all elements in a map.
+ *
+ * This operation has the same behavior as
+ * **BPF_MAP_LOOKUP_BATCH** with two exceptions:
+ *
+ * * Every element that is successfully returned is also deleted
+ * from the map. This is at least *count* elements. Note that
+ * *count* is both an input and an output parameter.
+ * * Upon returning with *errno* set to **EFAULT**, up to
+ * *count* elements may be deleted without returning the keys
+ * and values of the deleted elements.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_MAP_UPDATE_BATCH
+ * Description
+ * Update multiple elements in a map by *key*.
+ *
+ * The *keys* and *values* are input parameters which must point
+ * to memory large enough to hold *count* items based on the key
+ * and value size of the map *map_fd*. The *keys* buffer must be
+ * of *key_size* * *count*. The *values* buffer must be of
+ * *value_size* * *count*.
+ *
+ * Each element specified in *keys* is sequentially updated to the
+ * value in the corresponding index in *values*. The *in_batch*
+ * and *out_batch* parameters are ignored and should be zeroed.
+ *
+ * The *elem_flags* argument should be specified as one of the
+ * following:
+ *
+ * **BPF_ANY**
+ * Create new elements or update a existing elements.
+ * **BPF_NOEXIST**
+ * Create new elements only if they do not exist.
+ * **BPF_EXIST**
+ * Update existing elements.
+ * **BPF_F_LOCK**
+ * Update spin_lock-ed map elements. This must be
+ * specified if the map value contains a spinlock.
+ *
+ * On success, *count* elements from the map are updated.
+ *
+ * If an error is returned and *errno* is not **EFAULT**, *count*
+ * is set to the number of successfully processed elements.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * May set *errno* to **EINVAL**, **EPERM**, **ENOMEM**, or
+ * **E2BIG**. **E2BIG** indicates that the number of elements in
+ * the map reached the *max_entries* limit specified at map
+ * creation time.
+ *
+ * May set *errno* to one of the following error codes under
+ * specific circumstances:
+ *
+ * **EEXIST**
+ * If *flags* specifies **BPF_NOEXIST** and the element
+ * with *key* already exists in the map.
+ * **ENOENT**
+ * If *flags* specifies **BPF_EXIST** and the element with
+ * *key* does not exist in the map.
+ *
+ * BPF_MAP_DELETE_BATCH
+ * Description
+ * Delete multiple elements in a map by *key*.
+ *
+ * The *keys* parameter is an input parameter which must point
+ * to memory large enough to hold *count* items based on the key
+ * size of the map *map_fd*, that is, *key_size* * *count*.
+ *
+ * Each element specified in *keys* is sequentially deleted. The
+ * *in_batch*, *out_batch*, and *values* parameters are ignored
+ * and should be zeroed.
+ *
+ * The *elem_flags* argument may be specified as one of the
+ * following:
+ *
+ * **BPF_F_LOCK**
+ * Look up the value of a spin-locked map without
+ * returning the lock. This must be specified if the
+ * elements contain a spinlock.
+ *
+ * On success, *count* elements from the map are updated.
+ *
+ * If an error is returned and *errno* is not **EFAULT**, *count*
+ * is set to the number of successfully processed elements. If
+ * *errno* is **EFAULT**, up to *count* elements may be been
+ * deleted.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_LINK_CREATE
+ * Description
+ * Attach an eBPF program to a *target_fd* at the specified
+ * *attach_type* hook and return a file descriptor handle for
+ * managing the link.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_LINK_UPDATE
+ * Description
+ * Update the eBPF program in the specified *link_fd* to
+ * *new_prog_fd*.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_LINK_GET_FD_BY_ID
+ * Description
+ * Open a file descriptor for the eBPF Link corresponding to
+ * *link_id*.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_LINK_GET_NEXT_ID
+ * Description
+ * Fetch the next eBPF link currently loaded into the kernel.
+ *
+ * Looks for the eBPF link with an id greater than *start_id*
+ * and updates *next_id* on success. If no other eBPF links
+ * remain with ids higher than *start_id*, returns -1 and sets
+ * *errno* to **ENOENT**.
+ *
+ * Return
+ * Returns zero on success. On error, or when no id remains, -1
+ * is returned and *errno* is set appropriately.
+ *
+ * BPF_ENABLE_STATS
+ * Description
+ * Enable eBPF runtime statistics gathering.
+ *
+ * Runtime statistics gathering for the eBPF runtime is disabled
+ * by default to minimize the corresponding performance overhead.
+ * This command enables statistics globally.
+ *
+ * Multiple programs may independently enable statistics.
+ * After gathering the desired statistics, eBPF runtime statistics
+ * may be disabled again by calling **close**\ (2) for the file
+ * descriptor returned by this function. Statistics will only be
+ * disabled system-wide when all outstanding file descriptors
+ * returned by prior calls for this subcommand are closed.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_ITER_CREATE
+ * Description
+ * Create an iterator on top of the specified *link_fd* (as
+ * previously created using **BPF_LINK_CREATE**) and return a
+ * file descriptor that can be used to trigger the iteration.
+ *
+ * If the resulting file descriptor is pinned to the filesystem
+ * using **BPF_OBJ_PIN**, then subsequent **read**\ (2) syscalls
+ * for that path will trigger the iterator to read kernel state
+ * using the eBPF program attached to *link_fd*.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_LINK_DETACH
+ * Description
+ * Forcefully detach the specified *link_fd* from its
+ * corresponding attachment point.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_PROG_BIND_MAP
+ * Description
+ * Bind a map to the lifetime of an eBPF program.
+ *
+ * The map identified by *map_fd* is bound to the program
+ * identified by *prog_fd* and only released when *prog_fd* is
+ * released. This may be used in cases where metadata should be
+ * associated with a program which otherwise does not contain any
+ * references to the map (for example, embedded in the eBPF
+ * program instructions).
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * NOTES
+ * eBPF objects (maps and programs) can be shared between processes.
+ *
+ * * After **fork**\ (2), the child inherits file descriptors
+ * referring to the same eBPF objects.
+ * * File descriptors referring to eBPF objects can be transferred over
+ * **unix**\ (7) domain sockets.
+ * * File descriptors referring to eBPF objects can be duplicated in the
+ * usual way, using **dup**\ (2) and similar calls.
+ * * File descriptors referring to eBPF objects can be pinned to the
+ * filesystem using the **BPF_OBJ_PIN** command of **bpf**\ (2).
+ *
+ * An eBPF object is deallocated only after all file descriptors referring
+ * to the object have been closed and no references remain pinned to the
+ * filesystem or attached (for example, bound to a program or device).
+ */
enum bpf_cmd {
BPF_MAP_CREATE,
BPF_MAP_LOOKUP_ELEM,
@@ -247,6 +957,7 @@ enum bpf_attach_type {
BPF_XDP_CPUMAP,
BPF_SK_LOOKUP,
BPF_XDP,
+ BPF_SK_SKB_VERDICT,
__MAX_BPF_ATTACH_TYPE
};
@@ -393,11 +1104,24 @@ enum bpf_link_type {
* is struct/union.
*/
#define BPF_PSEUDO_BTF_ID 3
+/* insn[0].src_reg: BPF_PSEUDO_FUNC
+ * insn[0].imm: insn offset to the func
+ * insn[1].imm: 0
+ * insn[0].off: 0
+ * insn[1].off: 0
+ * ldimm64 rewrite: address of the function
+ * verifier type: PTR_TO_FUNC.
+ */
+#define BPF_PSEUDO_FUNC 4
/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
* offset to another bpf function
*/
#define BPF_PSEUDO_CALL 1
+/* when bpf_call->src_reg == BPF_PSEUDO_KFUNC_CALL,
+ * bpf_call->imm == btf_id of a BTF_KIND_FUNC in the running kernel
+ */
+#define BPF_PSEUDO_KFUNC_CALL 2
/* flags for BPF_MAP_UPDATE_ELEM command */
enum {
@@ -720,7 +1444,7 @@ union bpf_attr {
* parsed and used to produce a manual page. The workflow is the following,
* and requires the rst2man utility:
*
- * $ ./scripts/bpf_helpers_doc.py \
+ * $ ./scripts/bpf_doc.py \
* --filename include/uapi/linux/bpf.h > /tmp/bpf-helpers.rst
* $ rst2man /tmp/bpf-helpers.rst > /tmp/bpf-helpers.7
* $ man /tmp/bpf-helpers.7
@@ -1765,6 +2489,10 @@ union bpf_attr {
* Use with ENCAP_L3/L4 flags to further specify the tunnel
* type; *len* is the length of the inner MAC header.
*
+ * * **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**:
+ * Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the
+ * L2 type as Ethernet.
+ *
* A call to this helper is susceptible to change the underlying
* packet buffer. Therefore, at load time, all checks on pointers
* previously done by the verifier are invalidated and must be
@@ -3850,7 +4578,6 @@ union bpf_attr {
*
* long bpf_check_mtu(void *ctx, u32 ifindex, u32 *mtu_len, s32 len_diff, u64 flags)
* Description
-
* Check ctx packet size against exceeding MTU of net device (based
* on *ifindex*). This helper will likely be used in combination
* with helpers that adjust/change the packet size.
@@ -3910,6 +4637,34 @@ union bpf_attr {
* * **BPF_MTU_CHK_RET_FRAG_NEEDED**
* * **BPF_MTU_CHK_RET_SEGS_TOOBIG**
*
+ * long bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn, void *callback_ctx, u64 flags)
+ * Description
+ * For each element in **map**, call **callback_fn** function with
+ * **map**, **callback_ctx** and other map-specific parameters.
+ * The **callback_fn** should be a static function and
+ * the **callback_ctx** should be a pointer to the stack.
+ * The **flags** is used to control certain aspects of the helper.
+ * Currently, the **flags** must be 0.
+ *
+ * The following are a list of supported map types and their
+ * respective expected callback signatures:
+ *
+ * BPF_MAP_TYPE_HASH, BPF_MAP_TYPE_PERCPU_HASH,
+ * BPF_MAP_TYPE_LRU_HASH, BPF_MAP_TYPE_LRU_PERCPU_HASH,
+ * BPF_MAP_TYPE_ARRAY, BPF_MAP_TYPE_PERCPU_ARRAY
+ *
+ * long (\*callback_fn)(struct bpf_map \*map, const void \*key, void \*value, void \*ctx);
+ *
+ * For per_cpu maps, the map_value is the value on the cpu where the
+ * bpf_prog is running.
+ *
+ * If **callback_fn** return 0, the helper will continue to the next
+ * element. If return value is 1, the helper will skip the rest of
+ * elements and return. Other return values are not used now.
+ *
+ * Return
+ * The number of traversed map elements for success, **-EINVAL** for
+ * invalid **flags**.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -4076,6 +4831,7 @@ union bpf_attr {
FN(ima_inode_hash), \
FN(sock_from_file), \
FN(check_mtu), \
+ FN(for_each_map_elem), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -4169,6 +4925,7 @@ enum {
BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3),
BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4),
BPF_F_ADJ_ROOM_NO_CSUM_RESET = (1ULL << 5),
+ BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6),
};
enum {
@@ -5206,7 +5963,10 @@ struct bpf_pidns_info {
/* User accessible data for SK_LOOKUP programs. Add new fields at the end. */
struct bpf_sk_lookup {
- __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
+ union {
+ __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
+ __u64 cookie; /* Non-zero if socket was selected in PROG_TEST_RUN */
+ };
__u32 family; /* Protocol family (AF_INET, AF_INET6) */
__u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */
diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h
index 5a667107ad2c..d27b1708efe9 100644
--- a/tools/include/uapi/linux/btf.h
+++ b/tools/include/uapi/linux/btf.h
@@ -52,7 +52,7 @@ struct btf_type {
};
};
-#define BTF_INFO_KIND(info) (((info) >> 24) & 0x0f)
+#define BTF_INFO_KIND(info) (((info) >> 24) & 0x1f)
#define BTF_INFO_VLEN(info) ((info) & 0xffff)
#define BTF_INFO_KFLAG(info) ((info) >> 31)
@@ -72,7 +72,8 @@ struct btf_type {
#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */
#define BTF_KIND_VAR 14 /* Variable */
#define BTF_KIND_DATASEC 15 /* Section */
-#define BTF_KIND_MAX BTF_KIND_DATASEC
+#define BTF_KIND_FLOAT 16 /* Floating point */
+#define BTF_KIND_MAX BTF_KIND_FLOAT
#define NR_BTF_KINDS (BTF_KIND_MAX + 1)
/* For some specific BTF_KIND, "struct btf_type" is immediately
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 374c67875cdb..8b281f722e5b 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -216,6 +216,20 @@ struct kvm_hyperv_exit {
} u;
};
+struct kvm_xen_exit {
+#define KVM_EXIT_XEN_HCALL 1
+ __u32 type;
+ union {
+ struct {
+ __u32 longmode;
+ __u32 cpl;
+ __u64 input;
+ __u64 result;
+ __u64 params[6];
+ } hcall;
+ } u;
+};
+
#define KVM_S390_GET_SKEYS_NONE 1
#define KVM_S390_SKEYS_MAX 1048576
@@ -252,6 +266,8 @@ struct kvm_hyperv_exit {
#define KVM_EXIT_X86_WRMSR 30
#define KVM_EXIT_DIRTY_RING_FULL 31
#define KVM_EXIT_AP_RESET_HOLD 32
+#define KVM_EXIT_X86_BUS_LOCK 33
+#define KVM_EXIT_XEN 34
/* For KVM_EXIT_INTERNAL_ERROR */
/* Emulate instruction failed. */
@@ -428,6 +444,8 @@ struct kvm_run {
__u32 index; /* kernel -> user */
__u64 data; /* kernel <-> user */
} msr;
+ /* KVM_EXIT_XEN */
+ struct kvm_xen_exit xen;
/* Fix the size of the union. */
char padding[256];
};
@@ -1058,6 +1076,8 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190
#define KVM_CAP_SYS_HYPERV_CPUID 191
#define KVM_CAP_DIRTY_LOG_RING 192
+#define KVM_CAP_X86_BUS_LOCK_EXIT 193
+#define KVM_CAP_PPC_DAWR1 194
#ifdef KVM_CAP_IRQ_ROUTING
@@ -1131,6 +1151,10 @@ struct kvm_x86_mce {
#endif
#ifdef KVM_CAP_XEN_HVM
+#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0)
+#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1)
+#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2)
+
struct kvm_xen_hvm_config {
__u32 flags;
__u32 msr;
@@ -1565,6 +1589,45 @@ struct kvm_pv_cmd {
/* Available with KVM_CAP_DIRTY_LOG_RING */
#define KVM_RESET_DIRTY_RINGS _IO(KVMIO, 0xc7)
+/* Per-VM Xen attributes */
+#define KVM_XEN_HVM_GET_ATTR _IOWR(KVMIO, 0xc8, struct kvm_xen_hvm_attr)
+#define KVM_XEN_HVM_SET_ATTR _IOW(KVMIO, 0xc9, struct kvm_xen_hvm_attr)
+
+struct kvm_xen_hvm_attr {
+ __u16 type;
+ __u16 pad[3];
+ union {
+ __u8 long_mode;
+ __u8 vector;
+ struct {
+ __u64 gfn;
+ } shared_info;
+ __u64 pad[8];
+ } u;
+};
+
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
+#define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0
+#define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1
+#define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR 0x2
+
+/* Per-vCPU Xen attributes */
+#define KVM_XEN_VCPU_GET_ATTR _IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr)
+#define KVM_XEN_VCPU_SET_ATTR _IOW(KVMIO, 0xcb, struct kvm_xen_vcpu_attr)
+
+struct kvm_xen_vcpu_attr {
+ __u16 type;
+ __u16 pad[3];
+ union {
+ __u64 gpa;
+ __u64 pad[8];
+ } u;
+};
+
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
+#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO 0x0
+#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO 0x1
+
/* Secure Encrypted Virtualization command */
enum sev_cmd_id {
/* Guest initialization commands */
@@ -1593,6 +1656,8 @@ enum sev_cmd_id {
KVM_SEV_DBG_ENCRYPT,
/* Guest certificates commands */
KVM_SEV_CERT_EXPORT,
+ /* Attestation report */
+ KVM_SEV_GET_ATTESTATION_REPORT,
KVM_SEV_NR_MAX,
};
@@ -1645,6 +1710,12 @@ struct kvm_sev_dbg {
__u32 len;
};
+struct kvm_sev_attestation_report {
+ __u8 mnonce[16];
+ __u64 uaddr;
+ __u32 len;
+};
+
#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
#define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1)
#define KVM_DEV_ASSIGN_MASK_INTX (1 << 2)
@@ -1766,4 +1837,7 @@ struct kvm_dirty_gfn {
__u64 offset;
};
+#define KVM_BUS_LOCK_DETECTION_OFF (1 << 0)
+#define KVM_BUS_LOCK_DETECTION_EXIT (1 << 1)
+
#endif /* __LINUX_KVM_H */
diff --git a/tools/include/uapi/linux/mount.h b/tools/include/uapi/linux/mount.h
index dd8306ea336c..e6524ead2b7b 100644
--- a/tools/include/uapi/linux/mount.h
+++ b/tools/include/uapi/linux/mount.h
@@ -1,6 +1,8 @@
#ifndef _UAPI_LINUX_MOUNT_H
#define _UAPI_LINUX_MOUNT_H
+#include <linux/types.h>
+
/*
* These are the fs-independent mount-flags: up to 32 flags are supported
*
@@ -117,5 +119,19 @@ enum fsconfig_command {
#define MOUNT_ATTR_NOATIME 0x00000010 /* - Do not update access times. */
#define MOUNT_ATTR_STRICTATIME 0x00000020 /* - Always perform atime updates */
#define MOUNT_ATTR_NODIRATIME 0x00000080 /* Do not update directory access times */
+#define MOUNT_ATTR_IDMAP 0x00100000 /* Idmap mount to @userns_fd in struct mount_attr. */
+
+/*
+ * mount_setattr()
+ */
+struct mount_attr {
+ __u64 attr_set;
+ __u64 attr_clr;
+ __u64 propagation;
+ __u64 userns_fd;
+};
+
+/* List of all mount_attr versions. */
+#define MOUNT_ATTR_SIZE_VER0 32 /* sizeof first published struct */
#endif /* _UAPI_LINUX_MOUNT_H */
diff --git a/tools/include/uapi/linux/openat2.h b/tools/include/uapi/linux/openat2.h
index 58b1eb711360..a5feb7604948 100644
--- a/tools/include/uapi/linux/openat2.h
+++ b/tools/include/uapi/linux/openat2.h
@@ -35,5 +35,9 @@ struct open_how {
#define RESOLVE_IN_ROOT 0x10 /* Make all jumps to "/" and ".."
be scoped inside the dirfd
(similar to chroot(2)). */
+#define RESOLVE_CACHED 0x20 /* Only complete if resolution can be
+ completed through cached lookup. May
+ return -EAGAIN if that's not
+ possible. */
#endif /* _UAPI_LINUX_OPENAT2_H */
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index b15e3447cd9f..ad15e40d7f5d 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -145,12 +145,14 @@ enum perf_event_sample_format {
PERF_SAMPLE_CGROUP = 1U << 21,
PERF_SAMPLE_DATA_PAGE_SIZE = 1U << 22,
PERF_SAMPLE_CODE_PAGE_SIZE = 1U << 23,
+ PERF_SAMPLE_WEIGHT_STRUCT = 1U << 24,
- PERF_SAMPLE_MAX = 1U << 24, /* non-ABI */
+ PERF_SAMPLE_MAX = 1U << 25, /* non-ABI */
__PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */
};
+#define PERF_SAMPLE_WEIGHT_TYPE (PERF_SAMPLE_WEIGHT | PERF_SAMPLE_WEIGHT_STRUCT)
/*
* values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set
*
@@ -386,7 +388,8 @@ struct perf_event_attr {
aux_output : 1, /* generate AUX records instead of events */
cgroup : 1, /* include cgroup events */
text_poke : 1, /* include text poke events */
- __reserved_1 : 30;
+ build_id : 1, /* use build id in mmap2 events */
+ __reserved_1 : 29;
union {
__u32 wakeup_events; /* wakeup every n events */
@@ -659,6 +662,22 @@ struct perf_event_mmap_page {
__u64 aux_size;
};
+/*
+ * The current state of perf_event_header::misc bits usage:
+ * ('|' used bit, '-' unused bit)
+ *
+ * 012 CDEF
+ * |||---------||||
+ *
+ * Where:
+ * 0-2 CPUMODE_MASK
+ *
+ * C PROC_MAP_PARSE_TIMEOUT
+ * D MMAP_DATA / COMM_EXEC / FORK_EXEC / SWITCH_OUT
+ * E MMAP_BUILD_ID / EXACT_IP / SCHED_OUT_PREEMPT
+ * F (reserved)
+ */
+
#define PERF_RECORD_MISC_CPUMODE_MASK (7 << 0)
#define PERF_RECORD_MISC_CPUMODE_UNKNOWN (0 << 0)
#define PERF_RECORD_MISC_KERNEL (1 << 0)
@@ -690,6 +709,7 @@ struct perf_event_mmap_page {
*
* PERF_RECORD_MISC_EXACT_IP - PERF_RECORD_SAMPLE of precise events
* PERF_RECORD_MISC_SWITCH_OUT_PREEMPT - PERF_RECORD_SWITCH* events
+ * PERF_RECORD_MISC_MMAP_BUILD_ID - PERF_RECORD_MMAP2 event
*
*
* PERF_RECORD_MISC_EXACT_IP:
@@ -699,9 +719,13 @@ struct perf_event_mmap_page {
*
* PERF_RECORD_MISC_SWITCH_OUT_PREEMPT:
* Indicates that thread was preempted in TASK_RUNNING state.
+ *
+ * PERF_RECORD_MISC_MMAP_BUILD_ID:
+ * Indicates that mmap2 event carries build id data.
*/
#define PERF_RECORD_MISC_EXACT_IP (1 << 14)
#define PERF_RECORD_MISC_SWITCH_OUT_PREEMPT (1 << 14)
+#define PERF_RECORD_MISC_MMAP_BUILD_ID (1 << 14)
/*
* Reserve the last bit to indicate some extended misc field
*/
@@ -890,7 +914,24 @@ enum perf_event_type {
* char data[size];
* u64 dyn_size; } && PERF_SAMPLE_STACK_USER
*
- * { u64 weight; } && PERF_SAMPLE_WEIGHT
+ * { union perf_sample_weight
+ * {
+ * u64 full; && PERF_SAMPLE_WEIGHT
+ * #if defined(__LITTLE_ENDIAN_BITFIELD)
+ * struct {
+ * u32 var1_dw;
+ * u16 var2_w;
+ * u16 var3_w;
+ * } && PERF_SAMPLE_WEIGHT_STRUCT
+ * #elif defined(__BIG_ENDIAN_BITFIELD)
+ * struct {
+ * u16 var3_w;
+ * u16 var2_w;
+ * u32 var1_dw;
+ * } && PERF_SAMPLE_WEIGHT_STRUCT
+ * #endif
+ * }
+ * }
* { u64 data_src; } && PERF_SAMPLE_DATA_SRC
* { u64 transaction; } && PERF_SAMPLE_TRANSACTION
* { u64 abi; # enum perf_sample_regs_abi
@@ -915,10 +956,20 @@ enum perf_event_type {
* u64 addr;
* u64 len;
* u64 pgoff;
- * u32 maj;
- * u32 min;
- * u64 ino;
- * u64 ino_generation;
+ * union {
+ * struct {
+ * u32 maj;
+ * u32 min;
+ * u64 ino;
+ * u64 ino_generation;
+ * };
+ * struct {
+ * u8 build_id_size;
+ * u8 __reserved_1;
+ * u16 __reserved_2;
+ * u8 build_id[20];
+ * };
+ * };
* u32 prot, flags;
* char filename[];
* struct sample_id sample_id;
@@ -1127,14 +1178,16 @@ union perf_mem_data_src {
mem_lvl_num:4, /* memory hierarchy level number */
mem_remote:1, /* remote */
mem_snoopx:2, /* snoop mode, ext */
- mem_rsvd:24;
+ mem_blk:3, /* access blocked */
+ mem_rsvd:21;
};
};
#elif defined(__BIG_ENDIAN_BITFIELD)
union perf_mem_data_src {
__u64 val;
struct {
- __u64 mem_rsvd:24,
+ __u64 mem_rsvd:21,
+ mem_blk:3, /* access blocked */
mem_snoopx:2, /* snoop mode, ext */
mem_remote:1, /* remote */
mem_lvl_num:4, /* memory hierarchy level number */
@@ -1217,6 +1270,12 @@ union perf_mem_data_src {
#define PERF_MEM_TLB_OS 0x40 /* OS fault handler */
#define PERF_MEM_TLB_SHIFT 26
+/* Access blocked */
+#define PERF_MEM_BLK_NA 0x01 /* not available */
+#define PERF_MEM_BLK_DATA 0x02 /* data could not be forwarded */
+#define PERF_MEM_BLK_ADDR 0x04 /* address conflict */
+#define PERF_MEM_BLK_SHIFT 40
+
#define PERF_MEM_S(a, s) \
(((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
@@ -1248,4 +1307,23 @@ struct perf_branch_entry {
reserved:40;
};
+union perf_sample_weight {
+ __u64 full;
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ struct {
+ __u32 var1_dw;
+ __u16 var2_w;
+ __u16 var3_w;
+ };
+#elif defined(__BIG_ENDIAN_BITFIELD)
+ struct {
+ __u16 var3_w;
+ __u16 var2_w;
+ __u32 var1_dw;
+ };
+#else
+#error "Unknown endianness"
+#endif
+};
+
#endif /* _UAPI_LINUX_PERF_EVENT_H */
diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h
index 90deb41c8a34..667f1aed091c 100644
--- a/tools/include/uapi/linux/prctl.h
+++ b/tools/include/uapi/linux/prctl.h
@@ -251,5 +251,8 @@ struct prctl_mm_map {
#define PR_SET_SYSCALL_USER_DISPATCH 59
# define PR_SYS_DISPATCH_OFF 0
# define PR_SYS_DISPATCH_ON 1
+/* The control values for the user space selector when dispatch is enabled */
+# define SYSCALL_DISPATCH_FILTER_ALLOW 0
+# define SYSCALL_DISPATCH_FILTER_BLOCK 1
#endif /* _LINUX_PRCTL_H */
diff --git a/tools/lib/api/fs/cgroup.c b/tools/lib/api/fs/cgroup.c
index 889a6eb4aaca..1573dae4259d 100644
--- a/tools/lib/api/fs/cgroup.c
+++ b/tools/lib/api/fs/cgroup.c
@@ -8,12 +8,29 @@
#include <string.h>
#include "fs.h"
+struct cgroupfs_cache_entry {
+ char subsys[32];
+ char mountpoint[PATH_MAX];
+};
+
+/* just cache last used one */
+static struct cgroupfs_cache_entry cached;
+
int cgroupfs_find_mountpoint(char *buf, size_t maxlen, const char *subsys)
{
FILE *fp;
- char mountpoint[PATH_MAX + 1], tokens[PATH_MAX + 1], type[PATH_MAX + 1];
- char path_v1[PATH_MAX + 1], path_v2[PATH_MAX + 2], *path;
- char *token, *saved_ptr = NULL;
+ char *line = NULL;
+ size_t len = 0;
+ char *p, *path;
+ char mountpoint[PATH_MAX];
+
+ if (!strcmp(cached.subsys, subsys)) {
+ if (strlen(cached.mountpoint) < maxlen) {
+ strcpy(buf, cached.mountpoint);
+ return 0;
+ }
+ return -1;
+ }
fp = fopen("/proc/mounts", "r");
if (!fp)
@@ -22,45 +39,63 @@ int cgroupfs_find_mountpoint(char *buf, size_t maxlen, const char *subsys)
/*
* in order to handle split hierarchy, we need to scan /proc/mounts
* and inspect every cgroupfs mount point to find one that has
- * perf_event subsystem
+ * the given subsystem. If we found v1, just use it. If not we can
+ * use v2 path as a fallback.
*/
- path_v1[0] = '\0';
- path_v2[0] = '\0';
+ mountpoint[0] = '\0';
- while (fscanf(fp, "%*s %"__stringify(PATH_MAX)"s %"__stringify(PATH_MAX)"s %"
- __stringify(PATH_MAX)"s %*d %*d\n",
- mountpoint, type, tokens) == 3) {
+ /*
+ * The /proc/mounts has the follow format:
+ *
+ * <devname> <mount point> <fs type> <options> ...
+ *
+ */
+ while (getline(&line, &len, fp) != -1) {
+ /* skip devname */
+ p = strchr(line, ' ');
+ if (p == NULL)
+ continue;
+
+ /* save the mount point */
+ path = ++p;
+ p = strchr(p, ' ');
+ if (p == NULL)
+ continue;
- if (!path_v1[0] && !strcmp(type, "cgroup")) {
+ *p++ = '\0';
- token = strtok_r(tokens, ",", &saved_ptr);
+ /* check filesystem type */
+ if (strncmp(p, "cgroup", 6))
+ continue;
- while (token != NULL) {
- if (subsys && !strcmp(token, subsys)) {
- strcpy(path_v1, mountpoint);
- break;
- }
- token = strtok_r(NULL, ",", &saved_ptr);
- }
+ if (p[6] == '2') {
+ /* save cgroup v2 path */
+ strcpy(mountpoint, path);
+ continue;
}
- if (!path_v2[0] && !strcmp(type, "cgroup2"))
- strcpy(path_v2, mountpoint);
+ /* now we have cgroup v1, check the options for subsystem */
+ p += 7;
- if (path_v1[0] && path_v2[0])
- break;
+ p = strstr(p, subsys);
+ if (p == NULL)
+ continue;
+
+ /* sanity check: it should be separated by a space or a comma */
+ if (!strchr(" ,", p[-1]) || !strchr(" ,", p[strlen(subsys)]))
+ continue;
+
+ strcpy(mountpoint, path);
+ break;
}
+ free(line);
fclose(fp);
- if (path_v1[0])
- path = path_v1;
- else if (path_v2[0])
- path = path_v2;
- else
- return -1;
+ strncpy(cached.subsys, subsys, sizeof(cached.subsys) - 1);
+ strcpy(cached.mountpoint, mountpoint);
- if (strlen(path) < maxlen) {
- strcpy(buf, path);
+ if (mountpoint[0] && strlen(mountpoint) < maxlen) {
+ strcpy(buf, mountpoint);
return 0;
}
return -1;
diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
index 190366d05588..9b057cc7650a 100644
--- a/tools/lib/bpf/Build
+++ b/tools/lib/bpf/Build
@@ -1,3 +1,3 @@
libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \
netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o hashmap.o \
- btf_dump.o ringbuf.o
+ btf_dump.o ringbuf.o strset.o linker.o
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 887a494ad5fc..e43e1896cb4b 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -158,7 +158,7 @@ $(BPF_IN_STATIC): force $(BPF_HELPER_DEFS)
$(Q)$(MAKE) $(build)=libbpf OUTPUT=$(STATIC_OBJDIR)
$(BPF_HELPER_DEFS): $(srctree)/tools/include/uapi/linux/bpf.h
- $(QUIET_GEN)$(srctree)/scripts/bpf_helpers_doc.py --header \
+ $(QUIET_GEN)$(srctree)/scripts/bpf_doc.py --header \
--file $(srctree)/tools/include/uapi/linux/bpf.h > $(BPF_HELPER_DEFS)
$(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION)
@@ -215,7 +215,7 @@ define do_install
if [ ! -d '$(DESTDIR_SQ)$2' ]; then \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \
fi; \
- $(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2'
+ $(INSTALL) $(if $3,-m $3,) $1 '$(DESTDIR_SQ)$2'
endef
install_lib: all_cmd
@@ -228,7 +228,6 @@ install_headers: $(BPF_HELPER_DEFS)
$(call do_install,bpf.h,$(prefix)/include/bpf,644); \
$(call do_install,libbpf.h,$(prefix)/include/bpf,644); \
$(call do_install,btf.h,$(prefix)/include/bpf,644); \
- $(call do_install,libbpf_util.h,$(prefix)/include/bpf,644); \
$(call do_install,libbpf_common.h,$(prefix)/include/bpf,644); \
$(call do_install,xsk.h,$(prefix)/include/bpf,644); \
$(call do_install,bpf_helpers.h,$(prefix)/include/bpf,644); \
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index ae6c975e0b87..cc2e51c64a54 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -29,9 +29,10 @@
*/
#define SEC(NAME) __attribute__((section(NAME), used))
-#ifndef __always_inline
+/* Avoid 'linux/stddef.h' definition of '__always_inline'. */
+#undef __always_inline
#define __always_inline inline __attribute__((always_inline))
-#endif
+
#ifndef __noinline
#define __noinline __attribute__((noinline))
#endif
@@ -39,8 +40,22 @@
#define __weak __attribute__((weak))
#endif
+/* When utilizing vmlinux.h with BPF CO-RE, user BPF programs can't include
+ * any system-level headers (such as stddef.h, linux/version.h, etc), and
+ * commonly-used macros like NULL and KERNEL_VERSION aren't available through
+ * vmlinux.h. This just adds unnecessary hurdles and forces users to re-define
+ * them on their own. So as a convenience, provide such definitions here.
+ */
+#ifndef NULL
+#define NULL ((void *)0)
+#endif
+
+#ifndef KERNEL_VERSION
+#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + ((c) > 255 ? 255 : (c))
+#endif
+
/*
- * Helper macro to manipulate data structures
+ * Helper macros to manipulate data structures
*/
#ifndef offsetof
#define offsetof(TYPE, MEMBER) ((unsigned long)&((TYPE *)0)->MEMBER)
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index d9c10830d749..d30e67e7e1e5 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -21,6 +21,7 @@
#include "libbpf.h"
#include "libbpf_internal.h"
#include "hashmap.h"
+#include "strset.h"
#define BTF_MAX_NR_TYPES 0x7fffffffU
#define BTF_MAX_STR_OFFSET 0x7fffffffU
@@ -67,7 +68,7 @@ struct btf {
* | | |
* hdr | |
* types_data----+ |
- * strs_data------------------+
+ * strset__data(strs_set)-----+
*
* +----------+---------+-----------+
* | Header | Types | Strings |
@@ -105,20 +106,15 @@ struct btf {
*/
int start_str_off;
+ /* only one of strs_data or strs_set can be non-NULL, depending on
+ * whether BTF is in a modifiable state (strs_set is used) or not
+ * (strs_data points inside raw_data)
+ */
void *strs_data;
- size_t strs_data_cap; /* used size stored in hdr->str_len */
-
- /* lookup index for each unique string in strings section */
- struct hashmap *strs_hash;
+ /* a set of unique strings */
+ struct strset *strs_set;
/* whether strings are already deduplicated */
bool strs_deduped;
- /* extra indirection layer to make strings hashmap work with stable
- * string offsets and ability to transparently choose between
- * btf->strs_data or btf_dedup->strs_data as a source of strings.
- * This is used for BTF strings dedup to transfer deduplicated strings
- * data back to struct btf without re-building strings index.
- */
- void **strs_data_ptr;
/* BTF object FD, if loaded into kernel */
int fd;
@@ -142,8 +138,8 @@ static inline __u64 ptr_to_u64(const void *ptr)
* On success, memory pointer to the beginning of unused memory is returned.
* On error, NULL is returned.
*/
-void *btf_add_mem(void **data, size_t *cap_cnt, size_t elem_sz,
- size_t cur_cnt, size_t max_cnt, size_t add_cnt)
+void *libbpf_add_mem(void **data, size_t *cap_cnt, size_t elem_sz,
+ size_t cur_cnt, size_t max_cnt, size_t add_cnt)
{
size_t new_cnt;
void *new_data;
@@ -179,14 +175,14 @@ void *btf_add_mem(void **data, size_t *cap_cnt, size_t elem_sz,
/* Ensure given dynamically allocated memory region has enough allocated space
* to accommodate *need_cnt* elements of size *elem_sz* bytes each
*/
-int btf_ensure_mem(void **data, size_t *cap_cnt, size_t elem_sz, size_t need_cnt)
+int libbpf_ensure_mem(void **data, size_t *cap_cnt, size_t elem_sz, size_t need_cnt)
{
void *p;
if (need_cnt <= *cap_cnt)
return 0;
- p = btf_add_mem(data, cap_cnt, elem_sz, *cap_cnt, SIZE_MAX, need_cnt - *cap_cnt);
+ p = libbpf_add_mem(data, cap_cnt, elem_sz, *cap_cnt, SIZE_MAX, need_cnt - *cap_cnt);
if (!p)
return -ENOMEM;
@@ -197,8 +193,8 @@ static int btf_add_type_idx_entry(struct btf *btf, __u32 type_off)
{
__u32 *p;
- p = btf_add_mem((void **)&btf->type_offs, &btf->type_offs_cap, sizeof(__u32),
- btf->nr_types, BTF_MAX_NR_TYPES, 1);
+ p = libbpf_add_mem((void **)&btf->type_offs, &btf->type_offs_cap, sizeof(__u32),
+ btf->nr_types, BTF_MAX_NR_TYPES, 1);
if (!p)
return -ENOMEM;
@@ -291,6 +287,7 @@ static int btf_type_size(const struct btf_type *t)
case BTF_KIND_PTR:
case BTF_KIND_TYPEDEF:
case BTF_KIND_FUNC:
+ case BTF_KIND_FLOAT:
return base_size;
case BTF_KIND_INT:
return base_size + sizeof(__u32);
@@ -338,6 +335,7 @@ static int btf_bswap_type_rest(struct btf_type *t)
case BTF_KIND_PTR:
case BTF_KIND_TYPEDEF:
case BTF_KIND_FUNC:
+ case BTF_KIND_FLOAT:
return 0;
case BTF_KIND_INT:
*(__u32 *)(t + 1) = bswap_32(*(__u32 *)(t + 1));
@@ -433,7 +431,7 @@ const struct btf *btf__base_btf(const struct btf *btf)
}
/* internal helper returning non-const pointer to a type */
-static struct btf_type *btf_type_by_id(struct btf *btf, __u32 type_id)
+struct btf_type *btf_type_by_id(struct btf *btf, __u32 type_id)
{
if (type_id == 0)
return &btf_void;
@@ -578,6 +576,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
case BTF_KIND_UNION:
case BTF_KIND_ENUM:
case BTF_KIND_DATASEC:
+ case BTF_KIND_FLOAT:
size = t->size;
goto done;
case BTF_KIND_PTR:
@@ -621,6 +620,7 @@ int btf__align_of(const struct btf *btf, __u32 id)
switch (kind) {
case BTF_KIND_INT:
case BTF_KIND_ENUM:
+ case BTF_KIND_FLOAT:
return min(btf_ptr_sz(btf), (size_t)t->size);
case BTF_KIND_PTR:
return btf_ptr_sz(btf);
@@ -734,7 +734,7 @@ void btf__free(struct btf *btf)
*/
free(btf->hdr);
free(btf->types_data);
- free(btf->strs_data);
+ strset__free(btf->strs_set);
}
free(btf->raw_data);
free(btf->raw_data_swapped);
@@ -1242,6 +1242,11 @@ void btf__set_fd(struct btf *btf, int fd)
btf->fd = fd;
}
+static const void *btf_strs_data(const struct btf *btf)
+{
+ return btf->strs_data ? btf->strs_data : strset__data(btf->strs_set);
+}
+
static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian)
{
struct btf_header *hdr = btf->hdr;
@@ -1282,7 +1287,7 @@ static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endi
}
p += hdr->type_len;
- memcpy(p, btf->strs_data, hdr->str_len);
+ memcpy(p, btf_strs_data(btf), hdr->str_len);
p += hdr->str_len;
*size = data_sz;
@@ -1316,7 +1321,7 @@ const char *btf__str_by_offset(const struct btf *btf, __u32 offset)
if (offset < btf->start_str_off)
return btf__str_by_offset(btf->base_btf, offset);
else if (offset - btf->start_str_off < btf->hdr->str_len)
- return btf->strs_data + (offset - btf->start_str_off);
+ return btf_strs_data(btf) + (offset - btf->start_str_off);
else
return NULL;
}
@@ -1470,25 +1475,6 @@ int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
return 0;
}
-static size_t strs_hash_fn(const void *key, void *ctx)
-{
- const struct btf *btf = ctx;
- const char *strs = *btf->strs_data_ptr;
- const char *str = strs + (long)key;
-
- return str_hash(str);
-}
-
-static bool strs_hash_equal_fn(const void *key1, const void *key2, void *ctx)
-{
- const struct btf *btf = ctx;
- const char *strs = *btf->strs_data_ptr;
- const char *str1 = strs + (long)key1;
- const char *str2 = strs + (long)key2;
-
- return strcmp(str1, str2) == 0;
-}
-
static void btf_invalidate_raw_data(struct btf *btf)
{
if (btf->raw_data) {
@@ -1507,10 +1493,9 @@ static void btf_invalidate_raw_data(struct btf *btf)
*/
static int btf_ensure_modifiable(struct btf *btf)
{
- void *hdr, *types, *strs, *strs_end, *s;
- struct hashmap *hash = NULL;
- long off;
- int err;
+ void *hdr, *types;
+ struct strset *set = NULL;
+ int err = -ENOMEM;
if (btf_is_modifiable(btf)) {
/* any BTF modification invalidates raw_data */
@@ -1521,44 +1506,25 @@ static int btf_ensure_modifiable(struct btf *btf)
/* split raw data into three memory regions */
hdr = malloc(btf->hdr->hdr_len);
types = malloc(btf->hdr->type_len);
- strs = malloc(btf->hdr->str_len);
- if (!hdr || !types || !strs)
+ if (!hdr || !types)
goto err_out;
memcpy(hdr, btf->hdr, btf->hdr->hdr_len);
memcpy(types, btf->types_data, btf->hdr->type_len);
- memcpy(strs, btf->strs_data, btf->hdr->str_len);
-
- /* make hashmap below use btf->strs_data as a source of strings */
- btf->strs_data_ptr = &btf->strs_data;
/* build lookup index for all strings */
- hash = hashmap__new(strs_hash_fn, strs_hash_equal_fn, btf);
- if (IS_ERR(hash)) {
- err = PTR_ERR(hash);
- hash = NULL;
+ set = strset__new(BTF_MAX_STR_OFFSET, btf->strs_data, btf->hdr->str_len);
+ if (IS_ERR(set)) {
+ err = PTR_ERR(set);
goto err_out;
}
- strs_end = strs + btf->hdr->str_len;
- for (off = 0, s = strs; s < strs_end; off += strlen(s) + 1, s = strs + off) {
- /* hashmap__add() returns EEXIST if string with the same
- * content already is in the hash map
- */
- err = hashmap__add(hash, (void *)off, (void *)off);
- if (err == -EEXIST)
- continue; /* duplicate */
- if (err)
- goto err_out;
- }
-
/* only when everything was successful, update internal state */
btf->hdr = hdr;
btf->types_data = types;
btf->types_data_cap = btf->hdr->type_len;
- btf->strs_data = strs;
- btf->strs_data_cap = btf->hdr->str_len;
- btf->strs_hash = hash;
+ btf->strs_data = NULL;
+ btf->strs_set = set;
/* if BTF was created from scratch, all strings are guaranteed to be
* unique and deduplicated
*/
@@ -1573,17 +1539,10 @@ static int btf_ensure_modifiable(struct btf *btf)
return 0;
err_out:
- hashmap__free(hash);
+ strset__free(set);
free(hdr);
free(types);
- free(strs);
- return -ENOMEM;
-}
-
-static void *btf_add_str_mem(struct btf *btf, size_t add_sz)
-{
- return btf_add_mem(&btf->strs_data, &btf->strs_data_cap, 1,
- btf->hdr->str_len, BTF_MAX_STR_OFFSET, add_sz);
+ return err;
}
/* Find an offset in BTF string section that corresponds to a given string *s*.
@@ -1594,34 +1553,23 @@ static void *btf_add_str_mem(struct btf *btf, size_t add_sz)
*/
int btf__find_str(struct btf *btf, const char *s)
{
- long old_off, new_off, len;
- void *p;
+ int off;
if (btf->base_btf) {
- int ret;
-
- ret = btf__find_str(btf->base_btf, s);
- if (ret != -ENOENT)
- return ret;
+ off = btf__find_str(btf->base_btf, s);
+ if (off != -ENOENT)
+ return off;
}
/* BTF needs to be in a modifiable state to build string lookup index */
if (btf_ensure_modifiable(btf))
return -ENOMEM;
- /* see btf__add_str() for why we do this */
- len = strlen(s) + 1;
- p = btf_add_str_mem(btf, len);
- if (!p)
- return -ENOMEM;
-
- new_off = btf->hdr->str_len;
- memcpy(p, s, len);
-
- if (hashmap__find(btf->strs_hash, (void *)new_off, (void **)&old_off))
- return btf->start_str_off + old_off;
+ off = strset__find_str(btf->strs_set, s);
+ if (off < 0)
+ return off;
- return -ENOENT;
+ return btf->start_str_off + off;
}
/* Add a string s to the BTF string section.
@@ -1631,56 +1579,30 @@ int btf__find_str(struct btf *btf, const char *s)
*/
int btf__add_str(struct btf *btf, const char *s)
{
- long old_off, new_off, len;
- void *p;
- int err;
+ int off;
if (btf->base_btf) {
- int ret;
-
- ret = btf__find_str(btf->base_btf, s);
- if (ret != -ENOENT)
- return ret;
+ off = btf__find_str(btf->base_btf, s);
+ if (off != -ENOENT)
+ return off;
}
if (btf_ensure_modifiable(btf))
return -ENOMEM;
- /* Hashmap keys are always offsets within btf->strs_data, so to even
- * look up some string from the "outside", we need to first append it
- * at the end, so that it can be addressed with an offset. Luckily,
- * until btf->hdr->str_len is incremented, that string is just a piece
- * of garbage for the rest of BTF code, so no harm, no foul. On the
- * other hand, if the string is unique, it's already appended and
- * ready to be used, only a simple btf->hdr->str_len increment away.
- */
- len = strlen(s) + 1;
- p = btf_add_str_mem(btf, len);
- if (!p)
- return -ENOMEM;
-
- new_off = btf->hdr->str_len;
- memcpy(p, s, len);
+ off = strset__add_str(btf->strs_set, s);
+ if (off < 0)
+ return off;
- /* Now attempt to add the string, but only if the string with the same
- * contents doesn't exist already (HASHMAP_ADD strategy). If such
- * string exists, we'll get its offset in old_off (that's old_key).
- */
- err = hashmap__insert(btf->strs_hash, (void *)new_off, (void *)new_off,
- HASHMAP_ADD, (const void **)&old_off, NULL);
- if (err == -EEXIST)
- return btf->start_str_off + old_off; /* duplicated string, return existing offset */
- if (err)
- return err;
+ btf->hdr->str_len = strset__data_size(btf->strs_set);
- btf->hdr->str_len += len; /* new unique string, adjust data length */
- return btf->start_str_off + new_off;
+ return btf->start_str_off + off;
}
static void *btf_add_type_mem(struct btf *btf, size_t add_sz)
{
- return btf_add_mem(&btf->types_data, &btf->types_data_cap, 1,
- btf->hdr->type_len, UINT_MAX, add_sz);
+ return libbpf_add_mem(&btf->types_data, &btf->types_data_cap, 1,
+ btf->hdr->type_len, UINT_MAX, add_sz);
}
static __u32 btf_type_info(int kind, int vlen, int kflag)
@@ -1707,6 +1629,54 @@ static int btf_commit_type(struct btf *btf, int data_sz)
return btf->start_id + btf->nr_types - 1;
}
+struct btf_pipe {
+ const struct btf *src;
+ struct btf *dst;
+};
+
+static int btf_rewrite_str(__u32 *str_off, void *ctx)
+{
+ struct btf_pipe *p = ctx;
+ int off;
+
+ if (!*str_off) /* nothing to do for empty strings */
+ return 0;
+
+ off = btf__add_str(p->dst, btf__str_by_offset(p->src, *str_off));
+ if (off < 0)
+ return off;
+
+ *str_off = off;
+ return 0;
+}
+
+int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_type *src_type)
+{
+ struct btf_pipe p = { .src = src_btf, .dst = btf };
+ struct btf_type *t;
+ int sz, err;
+
+ sz = btf_type_size(src_type);
+ if (sz < 0)
+ return sz;
+
+ /* deconstruct BTF, if necessary, and invalidate raw_data */
+ if (btf_ensure_modifiable(btf))
+ return -ENOMEM;
+
+ t = btf_add_type_mem(btf, sz);
+ if (!t)
+ return -ENOMEM;
+
+ memcpy(t, src_type, sz);
+
+ err = btf_type_visit_str_offs(t, btf_rewrite_str, &p);
+ if (err)
+ return err;
+
+ return btf_commit_type(btf, sz);
+}
+
/*
* Append new BTF_KIND_INT type with:
* - *name* - non-empty, non-NULL type name;
@@ -1756,6 +1726,47 @@ int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding
return btf_commit_type(btf, sz);
}
+/*
+ * Append new BTF_KIND_FLOAT type with:
+ * - *name* - non-empty, non-NULL type name;
+ * - *sz* - size of the type, in bytes;
+ * Returns:
+ * - >0, type ID of newly added BTF type;
+ * - <0, on error.
+ */
+int btf__add_float(struct btf *btf, const char *name, size_t byte_sz)
+{
+ struct btf_type *t;
+ int sz, name_off;
+
+ /* non-empty name */
+ if (!name || !name[0])
+ return -EINVAL;
+
+ /* byte_sz must be one of the explicitly allowed values */
+ if (byte_sz != 2 && byte_sz != 4 && byte_sz != 8 && byte_sz != 12 &&
+ byte_sz != 16)
+ return -EINVAL;
+
+ if (btf_ensure_modifiable(btf))
+ return -ENOMEM;
+
+ sz = sizeof(struct btf_type);
+ t = btf_add_type_mem(btf, sz);
+ if (!t)
+ return -ENOMEM;
+
+ name_off = btf__add_str(btf, name);
+ if (name_off < 0)
+ return name_off;
+
+ t->name_off = name_off;
+ t->info = btf_type_info(BTF_KIND_FLOAT, 0, 0);
+ t->size = byte_sz;
+
+ return btf_commit_type(btf, sz);
+}
+
/* it's completely legal to append BTF types with type IDs pointing forward to
* types that haven't been appended yet, so we only make sure that id looks
* sane, we can't guarantee that ID will always be valid
@@ -1883,7 +1894,7 @@ static int btf_add_composite(struct btf *btf, int kind, const char *name, __u32
* - *byte_sz* - size of the struct, in bytes;
*
* Struct initially has no fields in it. Fields can be added by
- * btf__add_field() right after btf__add_struct() succeeds.
+ * btf__add_field() right after btf__add_struct() succeeds.
*
* Returns:
* - >0, type ID of newly added BTF type;
@@ -2971,10 +2982,7 @@ struct btf_dedup {
/* Various option modifying behavior of algorithm */
struct btf_dedup_opts opts;
/* temporary strings deduplication state */
- void *strs_data;
- size_t strs_cap;
- size_t strs_len;
- struct hashmap* strs_hash;
+ struct strset *strs_set;
};
static long hash_combine(long h, long value)
@@ -3110,95 +3118,28 @@ done:
return d;
}
-typedef int (*str_off_fn_t)(__u32 *str_off_ptr, void *ctx);
-
/*
* Iterate over all possible places in .BTF and .BTF.ext that can reference
* string and pass pointer to it to a provided callback `fn`.
*/
-static int btf_for_each_str_off(struct btf_dedup *d, str_off_fn_t fn, void *ctx)
+static int btf_for_each_str_off(struct btf_dedup *d, str_off_visit_fn fn, void *ctx)
{
- void *line_data_cur, *line_data_end;
- int i, j, r, rec_size;
- struct btf_type *t;
+ int i, r;
for (i = 0; i < d->btf->nr_types; i++) {
- t = btf_type_by_id(d->btf, d->btf->start_id + i);
- r = fn(&t->name_off, ctx);
+ struct btf_type *t = btf_type_by_id(d->btf, d->btf->start_id + i);
+
+ r = btf_type_visit_str_offs(t, fn, ctx);
if (r)
return r;
-
- switch (btf_kind(t)) {
- case BTF_KIND_STRUCT:
- case BTF_KIND_UNION: {
- struct btf_member *m = btf_members(t);
- __u16 vlen = btf_vlen(t);
-
- for (j = 0; j < vlen; j++) {
- r = fn(&m->name_off, ctx);
- if (r)
- return r;
- m++;
- }
- break;
- }
- case BTF_KIND_ENUM: {
- struct btf_enum *m = btf_enum(t);
- __u16 vlen = btf_vlen(t);
-
- for (j = 0; j < vlen; j++) {
- r = fn(&m->name_off, ctx);
- if (r)
- return r;
- m++;
- }
- break;
- }
- case BTF_KIND_FUNC_PROTO: {
- struct btf_param *m = btf_params(t);
- __u16 vlen = btf_vlen(t);
-
- for (j = 0; j < vlen; j++) {
- r = fn(&m->name_off, ctx);
- if (r)
- return r;
- m++;
- }
- break;
- }
- default:
- break;
- }
}
if (!d->btf_ext)
return 0;
- line_data_cur = d->btf_ext->line_info.info;
- line_data_end = d->btf_ext->line_info.info + d->btf_ext->line_info.len;
- rec_size = d->btf_ext->line_info.rec_size;
-
- while (line_data_cur < line_data_end) {
- struct btf_ext_info_sec *sec = line_data_cur;
- struct bpf_line_info_min *line_info;
- __u32 num_info = sec->num_info;
-
- r = fn(&sec->sec_name_off, ctx);
- if (r)
- return r;
-
- line_data_cur += sizeof(struct btf_ext_info_sec);
- for (i = 0; i < num_info; i++) {
- line_info = line_data_cur;
- r = fn(&line_info->file_name_off, ctx);
- if (r)
- return r;
- r = fn(&line_info->line_off, ctx);
- if (r)
- return r;
- line_data_cur += rec_size;
- }
- }
+ r = btf_ext_visit_str_offs(d->btf_ext, fn, ctx);
+ if (r)
+ return r;
return 0;
}
@@ -3207,10 +3148,8 @@ static int strs_dedup_remap_str_off(__u32 *str_off_ptr, void *ctx)
{
struct btf_dedup *d = ctx;
__u32 str_off = *str_off_ptr;
- long old_off, new_off, len;
const char *s;
- void *p;
- int err;
+ int off, err;
/* don't touch empty string or string in main BTF */
if (str_off == 0 || str_off < d->btf->start_str_off)
@@ -3227,29 +3166,11 @@ static int strs_dedup_remap_str_off(__u32 *str_off_ptr, void *ctx)
return err;
}
- len = strlen(s) + 1;
-
- new_off = d->strs_len;
- p = btf_add_mem(&d->strs_data, &d->strs_cap, 1, new_off, BTF_MAX_STR_OFFSET, len);
- if (!p)
- return -ENOMEM;
-
- memcpy(p, s, len);
+ off = strset__add_str(d->strs_set, s);
+ if (off < 0)
+ return off;
- /* Now attempt to add the string, but only if the string with the same
- * contents doesn't exist already (HASHMAP_ADD strategy). If such
- * string exists, we'll get its offset in old_off (that's old_key).
- */
- err = hashmap__insert(d->strs_hash, (void *)new_off, (void *)new_off,
- HASHMAP_ADD, (const void **)&old_off, NULL);
- if (err == -EEXIST) {
- *str_off_ptr = d->btf->start_str_off + old_off;
- } else if (err) {
- return err;
- } else {
- *str_off_ptr = d->btf->start_str_off + new_off;
- d->strs_len += len;
- }
+ *str_off_ptr = d->btf->start_str_off + off;
return 0;
}
@@ -3266,39 +3187,23 @@ static int strs_dedup_remap_str_off(__u32 *str_off_ptr, void *ctx)
*/
static int btf_dedup_strings(struct btf_dedup *d)
{
- char *s;
int err;
if (d->btf->strs_deduped)
return 0;
- /* temporarily switch to use btf_dedup's strs_data for strings for hash
- * functions; later we'll just transfer hashmap to struct btf as is,
- * along the strs_data
- */
- d->btf->strs_data_ptr = &d->strs_data;
-
- d->strs_hash = hashmap__new(strs_hash_fn, strs_hash_equal_fn, d->btf);
- if (IS_ERR(d->strs_hash)) {
- err = PTR_ERR(d->strs_hash);
- d->strs_hash = NULL;
+ d->strs_set = strset__new(BTF_MAX_STR_OFFSET, NULL, 0);
+ if (IS_ERR(d->strs_set)) {
+ err = PTR_ERR(d->strs_set);
goto err_out;
}
if (!d->btf->base_btf) {
- s = btf_add_mem(&d->strs_data, &d->strs_cap, 1, d->strs_len, BTF_MAX_STR_OFFSET, 1);
- if (!s)
- return -ENOMEM;
- /* initial empty string */
- s[0] = 0;
- d->strs_len = 1;
-
/* insert empty string; we won't be looking it up during strings
* dedup, but it's good to have it for generic BTF string lookups
*/
- err = hashmap__insert(d->strs_hash, (void *)0, (void *)0,
- HASHMAP_ADD, NULL, NULL);
- if (err)
+ err = strset__add_str(d->strs_set, "");
+ if (err < 0)
goto err_out;
}
@@ -3308,28 +3213,16 @@ static int btf_dedup_strings(struct btf_dedup *d)
goto err_out;
/* replace BTF string data and hash with deduped ones */
- free(d->btf->strs_data);
- hashmap__free(d->btf->strs_hash);
- d->btf->strs_data = d->strs_data;
- d->btf->strs_data_cap = d->strs_cap;
- d->btf->hdr->str_len = d->strs_len;
- d->btf->strs_hash = d->strs_hash;
- /* now point strs_data_ptr back to btf->strs_data */
- d->btf->strs_data_ptr = &d->btf->strs_data;
-
- d->strs_data = d->strs_hash = NULL;
- d->strs_len = d->strs_cap = 0;
+ strset__free(d->btf->strs_set);
+ d->btf->hdr->str_len = strset__data_size(d->strs_set);
+ d->btf->strs_set = d->strs_set;
+ d->strs_set = NULL;
d->btf->strs_deduped = true;
return 0;
err_out:
- free(d->strs_data);
- hashmap__free(d->strs_hash);
- d->strs_data = d->strs_hash = NULL;
- d->strs_len = d->strs_cap = 0;
-
- /* restore strings pointer for existing d->btf->strs_hash back */
- d->btf->strs_data_ptr = &d->strs_data;
+ strset__free(d->strs_set);
+ d->strs_set = NULL;
return err;
}
@@ -3626,6 +3519,7 @@ static int btf_dedup_prep(struct btf_dedup *d)
case BTF_KIND_FWD:
case BTF_KIND_TYPEDEF:
case BTF_KIND_FUNC:
+ case BTF_KIND_FLOAT:
h = btf_hash_common(t);
break;
case BTF_KIND_INT:
@@ -3722,6 +3616,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
break;
case BTF_KIND_FWD:
+ case BTF_KIND_FLOAT:
h = btf_hash_common(t);
for_each_dedup_cand(d, hash_entry, h) {
cand_id = (__u32)(long)hash_entry->value;
@@ -3983,6 +3878,7 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
return btf_compat_enum(cand_type, canon_type);
case BTF_KIND_FWD:
+ case BTF_KIND_FLOAT:
return btf_equal_common(cand_type, canon_type);
case BTF_KIND_CONST:
@@ -4450,15 +4346,18 @@ static int btf_dedup_compact_types(struct btf_dedup *d)
* then mapping it to a deduplicated type ID, stored in btf_dedup->hypot_map,
* which is populated during compaction phase.
*/
-static int btf_dedup_remap_type_id(struct btf_dedup *d, __u32 type_id)
+static int btf_dedup_remap_type_id(__u32 *type_id, void *ctx)
{
+ struct btf_dedup *d = ctx;
__u32 resolved_type_id, new_type_id;
- resolved_type_id = resolve_type_id(d, type_id);
+ resolved_type_id = resolve_type_id(d, *type_id);
new_type_id = d->hypot_map[resolved_type_id];
if (new_type_id > BTF_MAX_NR_TYPES)
return -EINVAL;
- return new_type_id;
+
+ *type_id = new_type_id;
+ return 0;
}
/*
@@ -4471,111 +4370,28 @@ static int btf_dedup_remap_type_id(struct btf_dedup *d, __u32 type_id)
* referenced from any BTF type (e.g., struct fields, func proto args, etc) to
* their final deduped type IDs.
*/
-static int btf_dedup_remap_type(struct btf_dedup *d, __u32 type_id)
+static int btf_dedup_remap_types(struct btf_dedup *d)
{
- struct btf_type *t = btf_type_by_id(d->btf, type_id);
int i, r;
- switch (btf_kind(t)) {
- case BTF_KIND_INT:
- case BTF_KIND_ENUM:
- break;
-
- case BTF_KIND_FWD:
- case BTF_KIND_CONST:
- case BTF_KIND_VOLATILE:
- case BTF_KIND_RESTRICT:
- case BTF_KIND_PTR:
- case BTF_KIND_TYPEDEF:
- case BTF_KIND_FUNC:
- case BTF_KIND_VAR:
- r = btf_dedup_remap_type_id(d, t->type);
- if (r < 0)
- return r;
- t->type = r;
- break;
-
- case BTF_KIND_ARRAY: {
- struct btf_array *arr_info = btf_array(t);
+ for (i = 0; i < d->btf->nr_types; i++) {
+ struct btf_type *t = btf_type_by_id(d->btf, d->btf->start_id + i);
- r = btf_dedup_remap_type_id(d, arr_info->type);
- if (r < 0)
- return r;
- arr_info->type = r;
- r = btf_dedup_remap_type_id(d, arr_info->index_type);
- if (r < 0)
+ r = btf_type_visit_type_ids(t, btf_dedup_remap_type_id, d);
+ if (r)
return r;
- arr_info->index_type = r;
- break;
- }
-
- case BTF_KIND_STRUCT:
- case BTF_KIND_UNION: {
- struct btf_member *member = btf_members(t);
- __u16 vlen = btf_vlen(t);
-
- for (i = 0; i < vlen; i++) {
- r = btf_dedup_remap_type_id(d, member->type);
- if (r < 0)
- return r;
- member->type = r;
- member++;
- }
- break;
}
- case BTF_KIND_FUNC_PROTO: {
- struct btf_param *param = btf_params(t);
- __u16 vlen = btf_vlen(t);
-
- r = btf_dedup_remap_type_id(d, t->type);
- if (r < 0)
- return r;
- t->type = r;
-
- for (i = 0; i < vlen; i++) {
- r = btf_dedup_remap_type_id(d, param->type);
- if (r < 0)
- return r;
- param->type = r;
- param++;
- }
- break;
- }
-
- case BTF_KIND_DATASEC: {
- struct btf_var_secinfo *var = btf_var_secinfos(t);
- __u16 vlen = btf_vlen(t);
-
- for (i = 0; i < vlen; i++) {
- r = btf_dedup_remap_type_id(d, var->type);
- if (r < 0)
- return r;
- var->type = r;
- var++;
- }
- break;
- }
+ if (!d->btf_ext)
+ return 0;
- default:
- return -EINVAL;
- }
+ r = btf_ext_visit_type_ids(d->btf_ext, btf_dedup_remap_type_id, d);
+ if (r)
+ return r;
return 0;
}
-static int btf_dedup_remap_types(struct btf_dedup *d)
-{
- int i, r;
-
- for (i = 0; i < d->btf->nr_types; i++) {
- r = btf_dedup_remap_type(d, d->btf->start_id + i);
- if (r < 0)
- return r;
- }
- return 0;
-}
-
/*
* Probe few well-known locations for vmlinux kernel image and try to load BTF
* data out of it to use for target BTF.
@@ -4626,3 +4442,200 @@ struct btf *libbpf_find_kernel_btf(void)
pr_warn("failed to find valid kernel BTF\n");
return ERR_PTR(-ESRCH);
}
+
+int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ctx)
+{
+ int i, n, err;
+
+ switch (btf_kind(t)) {
+ case BTF_KIND_INT:
+ case BTF_KIND_FLOAT:
+ case BTF_KIND_ENUM:
+ return 0;
+
+ case BTF_KIND_FWD:
+ case BTF_KIND_CONST:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_RESTRICT:
+ case BTF_KIND_PTR:
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_FUNC:
+ case BTF_KIND_VAR:
+ return visit(&t->type, ctx);
+
+ case BTF_KIND_ARRAY: {
+ struct btf_array *a = btf_array(t);
+
+ err = visit(&a->type, ctx);
+ err = err ?: visit(&a->index_type, ctx);
+ return err;
+ }
+
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION: {
+ struct btf_member *m = btf_members(t);
+
+ for (i = 0, n = btf_vlen(t); i < n; i++, m++) {
+ err = visit(&m->type, ctx);
+ if (err)
+ return err;
+ }
+ return 0;
+ }
+
+ case BTF_KIND_FUNC_PROTO: {
+ struct btf_param *m = btf_params(t);
+
+ err = visit(&t->type, ctx);
+ if (err)
+ return err;
+ for (i = 0, n = btf_vlen(t); i < n; i++, m++) {
+ err = visit(&m->type, ctx);
+ if (err)
+ return err;
+ }
+ return 0;
+ }
+
+ case BTF_KIND_DATASEC: {
+ struct btf_var_secinfo *m = btf_var_secinfos(t);
+
+ for (i = 0, n = btf_vlen(t); i < n; i++, m++) {
+ err = visit(&m->type, ctx);
+ if (err)
+ return err;
+ }
+ return 0;
+ }
+
+ default:
+ return -EINVAL;
+ }
+}
+
+int btf_type_visit_str_offs(struct btf_type *t, str_off_visit_fn visit, void *ctx)
+{
+ int i, n, err;
+
+ err = visit(&t->name_off, ctx);
+ if (err)
+ return err;
+
+ switch (btf_kind(t)) {
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION: {
+ struct btf_member *m = btf_members(t);
+
+ for (i = 0, n = btf_vlen(t); i < n; i++, m++) {
+ err = visit(&m->name_off, ctx);
+ if (err)
+ return err;
+ }
+ break;
+ }
+ case BTF_KIND_ENUM: {
+ struct btf_enum *m = btf_enum(t);
+
+ for (i = 0, n = btf_vlen(t); i < n; i++, m++) {
+ err = visit(&m->name_off, ctx);
+ if (err)
+ return err;
+ }
+ break;
+ }
+ case BTF_KIND_FUNC_PROTO: {
+ struct btf_param *m = btf_params(t);
+
+ for (i = 0, n = btf_vlen(t); i < n; i++, m++) {
+ err = visit(&m->name_off, ctx);
+ if (err)
+ return err;
+ }
+ break;
+ }
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+int btf_ext_visit_type_ids(struct btf_ext *btf_ext, type_id_visit_fn visit, void *ctx)
+{
+ const struct btf_ext_info *seg;
+ struct btf_ext_info_sec *sec;
+ int i, err;
+
+ seg = &btf_ext->func_info;
+ for_each_btf_ext_sec(seg, sec) {
+ struct bpf_func_info_min *rec;
+
+ for_each_btf_ext_rec(seg, sec, i, rec) {
+ err = visit(&rec->type_id, ctx);
+ if (err < 0)
+ return err;
+ }
+ }
+
+ seg = &btf_ext->core_relo_info;
+ for_each_btf_ext_sec(seg, sec) {
+ struct bpf_core_relo *rec;
+
+ for_each_btf_ext_rec(seg, sec, i, rec) {
+ err = visit(&rec->type_id, ctx);
+ if (err < 0)
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+int btf_ext_visit_str_offs(struct btf_ext *btf_ext, str_off_visit_fn visit, void *ctx)
+{
+ const struct btf_ext_info *seg;
+ struct btf_ext_info_sec *sec;
+ int i, err;
+
+ seg = &btf_ext->func_info;
+ for_each_btf_ext_sec(seg, sec) {
+ err = visit(&sec->sec_name_off, ctx);
+ if (err)
+ return err;
+ }
+
+ seg = &btf_ext->line_info;
+ for_each_btf_ext_sec(seg, sec) {
+ struct bpf_line_info_min *rec;
+
+ err = visit(&sec->sec_name_off, ctx);
+ if (err)
+ return err;
+
+ for_each_btf_ext_rec(seg, sec, i, rec) {
+ err = visit(&rec->file_name_off, ctx);
+ if (err)
+ return err;
+ err = visit(&rec->line_off, ctx);
+ if (err)
+ return err;
+ }
+ }
+
+ seg = &btf_ext->core_relo_info;
+ for_each_btf_ext_sec(seg, sec) {
+ struct bpf_core_relo *rec;
+
+ err = visit(&sec->sec_name_off, ctx);
+ if (err)
+ return err;
+
+ for_each_btf_ext_rec(seg, sec, i, rec) {
+ err = visit(&rec->access_str_off, ctx);
+ if (err)
+ return err;
+ }
+ }
+
+ return 0;
+}
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 1237bcd1dd17..b54f1c3ebd57 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -93,8 +93,11 @@ LIBBPF_API struct btf *libbpf_find_kernel_btf(void);
LIBBPF_API int btf__find_str(struct btf *btf, const char *s);
LIBBPF_API int btf__add_str(struct btf *btf, const char *s);
+LIBBPF_API int btf__add_type(struct btf *btf, const struct btf *src_btf,
+ const struct btf_type *src_type);
LIBBPF_API int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding);
+LIBBPF_API int btf__add_float(struct btf *btf, const char *name, size_t byte_sz);
LIBBPF_API int btf__add_ptr(struct btf *btf, int ref_type_id);
LIBBPF_API int btf__add_array(struct btf *btf,
int index_type_id, int elem_type_id, __u32 nr_elems);
@@ -173,6 +176,7 @@ struct btf_dump_emit_type_decl_opts {
int indent_level;
/* strip all the const/volatile/restrict mods */
bool strip_mods;
+ size_t :0;
};
#define btf_dump_emit_type_decl_opts__last_field strip_mods
@@ -294,6 +298,11 @@ static inline bool btf_is_datasec(const struct btf_type *t)
return btf_kind(t) == BTF_KIND_DATASEC;
}
+static inline bool btf_is_float(const struct btf_type *t)
+{
+ return btf_kind(t) == BTF_KIND_FLOAT;
+}
+
static inline __u8 btf_int_encoding(const struct btf_type *t)
{
return BTF_INT_ENCODING(*(__u32 *)(t + 1));
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index 2f9d685bd522..5e2809d685bf 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -166,11 +166,11 @@ static int btf_dump_resize(struct btf_dump *d)
if (last_id <= d->last_id)
return 0;
- if (btf_ensure_mem((void **)&d->type_states, &d->type_states_cap,
- sizeof(*d->type_states), last_id + 1))
+ if (libbpf_ensure_mem((void **)&d->type_states, &d->type_states_cap,
+ sizeof(*d->type_states), last_id + 1))
return -ENOMEM;
- if (btf_ensure_mem((void **)&d->cached_names, &d->cached_names_cap,
- sizeof(*d->cached_names), last_id + 1))
+ if (libbpf_ensure_mem((void **)&d->cached_names, &d->cached_names_cap,
+ sizeof(*d->cached_names), last_id + 1))
return -ENOMEM;
if (d->last_id == 0) {
@@ -279,6 +279,7 @@ static int btf_dump_mark_referenced(struct btf_dump *d)
case BTF_KIND_INT:
case BTF_KIND_ENUM:
case BTF_KIND_FWD:
+ case BTF_KIND_FLOAT:
break;
case BTF_KIND_VOLATILE:
@@ -453,6 +454,7 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)
switch (btf_kind(t)) {
case BTF_KIND_INT:
+ case BTF_KIND_FLOAT:
tstate->order_state = ORDERED;
return 0;
@@ -462,7 +464,7 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)
return err;
case BTF_KIND_ARRAY:
- return btf_dump_order_type(d, btf_array(t)->type, through_ptr);
+ return btf_dump_order_type(d, btf_array(t)->type, false);
case BTF_KIND_STRUCT:
case BTF_KIND_UNION: {
@@ -1133,6 +1135,7 @@ skip_mod:
case BTF_KIND_STRUCT:
case BTF_KIND_UNION:
case BTF_KIND_TYPEDEF:
+ case BTF_KIND_FLOAT:
goto done;
default:
pr_warn("unexpected type in decl chain, kind:%u, id:[%u]\n",
@@ -1247,6 +1250,7 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,
switch (kind) {
case BTF_KIND_INT:
+ case BTF_KIND_FLOAT:
btf_dump_emit_mods(d, decls);
name = btf_name_of(d, t->name_off);
btf_dump_printf(d, "%s", name);
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index d43cc3f29dae..7aad78dbb4b4 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -55,10 +55,6 @@
#include "libbpf_internal.h"
#include "hashmap.h"
-#ifndef EM_BPF
-#define EM_BPF 247
-#endif
-
#ifndef BPF_FS_MAGIC
#define BPF_FS_MAGIC 0xcafe4a11
#endif
@@ -178,6 +174,8 @@ enum kern_feature_id {
FEAT_PROG_BIND_MAP,
/* Kernel support for module BTFs */
FEAT_MODULE_BTF,
+ /* BTF_KIND_FLOAT support */
+ FEAT_BTF_FLOAT,
__FEAT_CNT,
};
@@ -187,7 +185,9 @@ enum reloc_type {
RELO_LD64,
RELO_CALL,
RELO_DATA,
- RELO_EXTERN,
+ RELO_EXTERN_VAR,
+ RELO_EXTERN_FUNC,
+ RELO_SUBPROG_ADDR,
};
struct reloc_desc {
@@ -574,6 +574,21 @@ static bool insn_is_subprog_call(const struct bpf_insn *insn)
insn->off == 0;
}
+static bool is_ldimm64_insn(struct bpf_insn *insn)
+{
+ return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
+}
+
+static bool is_call_insn(const struct bpf_insn *insn)
+{
+ return insn->code == (BPF_JMP | BPF_CALL);
+}
+
+static bool insn_is_pseudo_func(struct bpf_insn *insn)
+{
+ return is_ldimm64_insn(insn) && insn->src_reg == BPF_PSEUDO_FUNC;
+}
+
static int
bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
const char *name, size_t sec_idx, const char *sec_name,
@@ -1121,11 +1136,6 @@ static void bpf_object__elf_finish(struct bpf_object *obj)
obj->efile.obj_buf_sz = 0;
}
-/* if libelf is old and doesn't support mmap(), fall back to read() */
-#ifndef ELF_C_READ_MMAP
-#define ELF_C_READ_MMAP ELF_C_READ
-#endif
-
static int bpf_object__elf_init(struct bpf_object *obj)
{
int err = 0;
@@ -1181,7 +1191,8 @@ static int bpf_object__elf_init(struct bpf_object *obj)
if (!elf_rawdata(elf_getscn(obj->efile.elf, obj->efile.shstrndx), NULL)) {
pr_warn("elf: failed to get section names strings from %s: %s\n",
obj->path, elf_errmsg(-1));
- return -LIBBPF_ERRNO__FORMAT;
+ err = -LIBBPF_ERRNO__FORMAT;
+ goto errout;
}
/* Old LLVM set e_machine to EM_NONE */
@@ -1916,9 +1927,9 @@ resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id)
return btf_is_func_proto(t) ? t : NULL;
}
-static const char *btf_kind_str(const struct btf_type *t)
+static const char *__btf_kind_str(__u16 kind)
{
- switch (btf_kind(t)) {
+ switch (kind) {
case BTF_KIND_UNKN: return "void";
case BTF_KIND_INT: return "int";
case BTF_KIND_PTR: return "ptr";
@@ -1935,10 +1946,21 @@ static const char *btf_kind_str(const struct btf_type *t)
case BTF_KIND_FUNC_PROTO: return "func_proto";
case BTF_KIND_VAR: return "var";
case BTF_KIND_DATASEC: return "datasec";
+ case BTF_KIND_FLOAT: return "float";
default: return "unknown";
}
}
+static const char *btf_kind_str(const struct btf_type *t)
+{
+ return __btf_kind_str(btf_kind(t));
+}
+
+static enum btf_func_linkage btf_func_linkage(const struct btf_type *t)
+{
+ return (enum btf_func_linkage)BTF_INFO_VLEN(t->info);
+}
+
/*
* Fetch integer attribute of BTF map definition. Such attributes are
* represented using a pointer to an array, in which dimensionality of array
@@ -2384,15 +2406,17 @@ static bool btf_needs_sanitization(struct bpf_object *obj)
{
bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC);
bool has_datasec = kernel_supports(FEAT_BTF_DATASEC);
+ bool has_float = kernel_supports(FEAT_BTF_FLOAT);
bool has_func = kernel_supports(FEAT_BTF_FUNC);
- return !has_func || !has_datasec || !has_func_global;
+ return !has_func || !has_datasec || !has_func_global || !has_float;
}
static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
{
bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC);
bool has_datasec = kernel_supports(FEAT_BTF_DATASEC);
+ bool has_float = kernel_supports(FEAT_BTF_FLOAT);
bool has_func = kernel_supports(FEAT_BTF_FUNC);
struct btf_type *t;
int i, j, vlen;
@@ -2445,6 +2469,13 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
} else if (!has_func_global && btf_is_func(t)) {
/* replace BTF_FUNC_GLOBAL with BTF_FUNC_STATIC */
t->info = BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0);
+ } else if (!has_float && btf_is_float(t)) {
+ /* replace FLOAT with an equally-sized empty STRUCT;
+ * since C compilers do not accept e.g. "float" as a
+ * valid struct name, make it anonymous
+ */
+ t->name_off = 0;
+ t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 0);
}
}
}
@@ -2784,7 +2815,7 @@ static bool ignore_elf_section(GElf_Shdr *hdr, const char *name)
return true;
/* ignore .llvm_addrsig section as well */
- if (hdr->sh_type == 0x6FFF4C03 /* SHT_LLVM_ADDRSIG */)
+ if (hdr->sh_type == SHT_LLVM_ADDRSIG)
return true;
/* no subprograms will lead to an empty .text section, ignore it */
@@ -2974,10 +3005,27 @@ static bool sym_is_extern(const GElf_Sym *sym)
GELF_ST_TYPE(sym->st_info) == STT_NOTYPE;
}
+static bool sym_is_subprog(const GElf_Sym *sym, int text_shndx)
+{
+ int bind = GELF_ST_BIND(sym->st_info);
+ int type = GELF_ST_TYPE(sym->st_info);
+
+ /* in .text section */
+ if (sym->st_shndx != text_shndx)
+ return false;
+
+ /* local function */
+ if (bind == STB_LOCAL && type == STT_SECTION)
+ return true;
+
+ /* global function */
+ return bind == STB_GLOBAL && type == STT_FUNC;
+}
+
static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
{
const struct btf_type *t;
- const char *var_name;
+ const char *tname;
int i, n;
if (!btf)
@@ -2987,14 +3035,18 @@ static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
for (i = 1; i <= n; i++) {
t = btf__type_by_id(btf, i);
- if (!btf_is_var(t))
+ if (!btf_is_var(t) && !btf_is_func(t))
continue;
- var_name = btf__name_by_offset(btf, t->name_off);
- if (strcmp(var_name, ext_name))
+ tname = btf__name_by_offset(btf, t->name_off);
+ if (strcmp(tname, ext_name))
continue;
- if (btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN)
+ if (btf_is_var(t) &&
+ btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN)
+ return -EINVAL;
+
+ if (btf_is_func(t) && btf_func_linkage(t) != BTF_FUNC_EXTERN)
return -EINVAL;
return i;
@@ -3107,12 +3159,48 @@ static int find_int_btf_id(const struct btf *btf)
return 0;
}
+static int add_dummy_ksym_var(struct btf *btf)
+{
+ int i, int_btf_id, sec_btf_id, dummy_var_btf_id;
+ const struct btf_var_secinfo *vs;
+ const struct btf_type *sec;
+
+ sec_btf_id = btf__find_by_name_kind(btf, KSYMS_SEC,
+ BTF_KIND_DATASEC);
+ if (sec_btf_id < 0)
+ return 0;
+
+ sec = btf__type_by_id(btf, sec_btf_id);
+ vs = btf_var_secinfos(sec);
+ for (i = 0; i < btf_vlen(sec); i++, vs++) {
+ const struct btf_type *vt;
+
+ vt = btf__type_by_id(btf, vs->type);
+ if (btf_is_func(vt))
+ break;
+ }
+
+ /* No func in ksyms sec. No need to add dummy var. */
+ if (i == btf_vlen(sec))
+ return 0;
+
+ int_btf_id = find_int_btf_id(btf);
+ dummy_var_btf_id = btf__add_var(btf,
+ "dummy_ksym",
+ BTF_VAR_GLOBAL_ALLOCATED,
+ int_btf_id);
+ if (dummy_var_btf_id < 0)
+ pr_warn("cannot create a dummy_ksym var\n");
+
+ return dummy_var_btf_id;
+}
+
static int bpf_object__collect_externs(struct bpf_object *obj)
{
struct btf_type *sec, *kcfg_sec = NULL, *ksym_sec = NULL;
const struct btf_type *t;
struct extern_desc *ext;
- int i, n, off;
+ int i, n, off, dummy_var_btf_id;
const char *ext_name, *sec_name;
Elf_Scn *scn;
GElf_Shdr sh;
@@ -3124,6 +3212,10 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
if (elf_sec_hdr(obj, scn, &sh))
return -LIBBPF_ERRNO__FORMAT;
+ dummy_var_btf_id = add_dummy_ksym_var(obj->btf);
+ if (dummy_var_btf_id < 0)
+ return dummy_var_btf_id;
+
n = sh.sh_size / sh.sh_entsize;
pr_debug("looking for externs among %d symbols...\n", n);
@@ -3168,6 +3260,11 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
sec_name = btf__name_by_offset(obj->btf, sec->name_off);
if (strcmp(sec_name, KCONFIG_SEC) == 0) {
+ if (btf_is_func(t)) {
+ pr_warn("extern function %s is unsupported under %s section\n",
+ ext->name, KCONFIG_SEC);
+ return -ENOTSUP;
+ }
kcfg_sec = sec;
ext->type = EXT_KCFG;
ext->kcfg.sz = btf__resolve_size(obj->btf, t->type);
@@ -3189,6 +3286,11 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
return -ENOTSUP;
}
} else if (strcmp(sec_name, KSYMS_SEC) == 0) {
+ if (btf_is_func(t) && ext->is_weak) {
+ pr_warn("extern weak function %s is unsupported\n",
+ ext->name);
+ return -ENOTSUP;
+ }
ksym_sec = sec;
ext->type = EXT_KSYM;
skip_mods_and_typedefs(obj->btf, t->type,
@@ -3215,7 +3317,14 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
* extern variables in DATASEC
*/
int int_btf_id = find_int_btf_id(obj->btf);
+ /* For extern function, a dummy_var added earlier
+ * will be used to replace the vs->type and
+ * its name string will be used to refill
+ * the missing param's name.
+ */
+ const struct btf_type *dummy_var;
+ dummy_var = btf__type_by_id(obj->btf, dummy_var_btf_id);
for (i = 0; i < obj->nr_extern; i++) {
ext = &obj->externs[i];
if (ext->type != EXT_KSYM)
@@ -3234,12 +3343,32 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
ext_name = btf__name_by_offset(obj->btf, vt->name_off);
ext = find_extern_by_name(obj, ext_name);
if (!ext) {
- pr_warn("failed to find extern definition for BTF var '%s'\n",
- ext_name);
+ pr_warn("failed to find extern definition for BTF %s '%s'\n",
+ btf_kind_str(vt), ext_name);
return -ESRCH;
}
- btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
- vt->type = int_btf_id;
+ if (btf_is_func(vt)) {
+ const struct btf_type *func_proto;
+ struct btf_param *param;
+ int j;
+
+ func_proto = btf__type_by_id(obj->btf,
+ vt->type);
+ param = btf_params(func_proto);
+ /* Reuse the dummy_var string if the
+ * func proto does not have param name.
+ */
+ for (j = 0; j < btf_vlen(func_proto); j++)
+ if (param[j].type && !param[j].name_off)
+ param[j].name_off =
+ dummy_var->name_off;
+ vs->type = dummy_var_btf_id;
+ vt->info &= ~0xffff;
+ vt->info |= BTF_FUNC_GLOBAL;
+ } else {
+ btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
+ vt->type = int_btf_id;
+ }
vs->offset = off;
vs->size = sizeof(int);
}
@@ -3371,31 +3500,7 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
reloc_desc->processed = false;
- /* sub-program call relocation */
- if (insn->code == (BPF_JMP | BPF_CALL)) {
- if (insn->src_reg != BPF_PSEUDO_CALL) {
- pr_warn("prog '%s': incorrect bpf_call opcode\n", prog->name);
- return -LIBBPF_ERRNO__RELOC;
- }
- /* text_shndx can be 0, if no default "main" program exists */
- if (!shdr_idx || shdr_idx != obj->efile.text_shndx) {
- sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
- pr_warn("prog '%s': bad call relo against '%s' in section '%s'\n",
- prog->name, sym_name, sym_sec_name);
- return -LIBBPF_ERRNO__RELOC;
- }
- if (sym->st_value % BPF_INSN_SZ) {
- pr_warn("prog '%s': bad call relo against '%s' at offset %zu\n",
- prog->name, sym_name, (size_t)sym->st_value);
- return -LIBBPF_ERRNO__RELOC;
- }
- reloc_desc->type = RELO_CALL;
- reloc_desc->insn_idx = insn_idx;
- reloc_desc->sym_off = sym->st_value;
- return 0;
- }
-
- if (insn->code != (BPF_LD | BPF_IMM | BPF_DW)) {
+ if (!is_call_insn(insn) && !is_ldimm64_insn(insn)) {
pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n",
prog->name, sym_name, insn_idx, insn->code);
return -LIBBPF_ERRNO__RELOC;
@@ -3418,18 +3523,62 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
}
pr_debug("prog '%s': found extern #%d '%s' (sym %d) for insn #%u\n",
prog->name, i, ext->name, ext->sym_idx, insn_idx);
- reloc_desc->type = RELO_EXTERN;
+ if (insn->code == (BPF_JMP | BPF_CALL))
+ reloc_desc->type = RELO_EXTERN_FUNC;
+ else
+ reloc_desc->type = RELO_EXTERN_VAR;
reloc_desc->insn_idx = insn_idx;
reloc_desc->sym_off = i; /* sym_off stores extern index */
return 0;
}
+ /* sub-program call relocation */
+ if (is_call_insn(insn)) {
+ if (insn->src_reg != BPF_PSEUDO_CALL) {
+ pr_warn("prog '%s': incorrect bpf_call opcode\n", prog->name);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+ /* text_shndx can be 0, if no default "main" program exists */
+ if (!shdr_idx || shdr_idx != obj->efile.text_shndx) {
+ sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
+ pr_warn("prog '%s': bad call relo against '%s' in section '%s'\n",
+ prog->name, sym_name, sym_sec_name);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+ if (sym->st_value % BPF_INSN_SZ) {
+ pr_warn("prog '%s': bad call relo against '%s' at offset %zu\n",
+ prog->name, sym_name, (size_t)sym->st_value);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+ reloc_desc->type = RELO_CALL;
+ reloc_desc->insn_idx = insn_idx;
+ reloc_desc->sym_off = sym->st_value;
+ return 0;
+ }
+
if (!shdr_idx || shdr_idx >= SHN_LORESERVE) {
pr_warn("prog '%s': invalid relo against '%s' in special section 0x%x; forgot to initialize global var?..\n",
prog->name, sym_name, shdr_idx);
return -LIBBPF_ERRNO__RELOC;
}
+ /* loading subprog addresses */
+ if (sym_is_subprog(sym, obj->efile.text_shndx)) {
+ /* global_func: sym->st_value = offset in the section, insn->imm = 0.
+ * local_func: sym->st_value = 0, insn->imm = offset in the section.
+ */
+ if ((sym->st_value % BPF_INSN_SZ) || (insn->imm % BPF_INSN_SZ)) {
+ pr_warn("prog '%s': bad subprog addr relo against '%s' at offset %zu+%d\n",
+ prog->name, sym_name, (size_t)sym->st_value, insn->imm);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+
+ reloc_desc->type = RELO_SUBPROG_ADDR;
+ reloc_desc->insn_idx = insn_idx;
+ reloc_desc->sym_off = sym->st_value;
+ return 0;
+ }
+
type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
@@ -3882,6 +4031,18 @@ static int probe_kern_btf_datasec(void)
strs, sizeof(strs)));
}
+static int probe_kern_btf_float(void)
+{
+ static const char strs[] = "\0float";
+ __u32 types[] = {
+ /* float */
+ BTF_TYPE_FLOAT_ENC(1, 4),
+ };
+
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs)));
+}
+
static int probe_kern_array_mmap(void)
{
struct bpf_create_map_attr attr = {
@@ -4061,6 +4222,9 @@ static struct kern_feature_desc {
[FEAT_MODULE_BTF] = {
"module BTF support", probe_module_btf,
},
+ [FEAT_BTF_FLOAT] = {
+ "BTF_KIND_FLOAT support", probe_kern_btf_float,
+ },
};
static bool kernel_supports(enum kern_feature_id feat_id)
@@ -4795,8 +4959,8 @@ static int load_module_btfs(struct bpf_object *obj)
goto err_out;
}
- err = btf_ensure_mem((void **)&obj->btf_modules, &obj->btf_module_cap,
- sizeof(*obj->btf_modules), obj->btf_module_cnt + 1);
+ err = libbpf_ensure_mem((void **)&obj->btf_modules, &obj->btf_module_cap,
+ sizeof(*obj->btf_modules), obj->btf_module_cnt + 1);
if (err)
goto err_out;
@@ -5566,11 +5730,6 @@ static void bpf_core_poison_insn(struct bpf_program *prog, int relo_idx,
insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */
}
-static bool is_ldimm64(struct bpf_insn *insn)
-{
- return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
-}
-
static int insn_bpf_size_to_bytes(struct bpf_insn *insn)
{
switch (BPF_SIZE(insn->code)) {
@@ -5636,7 +5795,7 @@ poison:
/* poison second part of ldimm64 to avoid confusing error from
* verifier about "unknown opcode 00"
*/
- if (is_ldimm64(insn))
+ if (is_ldimm64_insn(insn))
bpf_core_poison_insn(prog, relo_idx, insn_idx + 1, insn + 1);
bpf_core_poison_insn(prog, relo_idx, insn_idx, insn);
return 0;
@@ -5712,7 +5871,7 @@ poison:
case BPF_LD: {
__u64 imm;
- if (!is_ldimm64(insn) ||
+ if (!is_ldimm64_insn(insn) ||
insn[0].src_reg != 0 || insn[0].off != 0 ||
insn_idx + 1 >= prog->insns_cnt ||
insn[1].code != 0 || insn[1].dst_reg != 0 ||
@@ -6154,7 +6313,7 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
insn[0].imm = obj->maps[relo->map_idx].fd;
relo->processed = true;
break;
- case RELO_EXTERN:
+ case RELO_EXTERN_VAR:
ext = &obj->externs[relo->sym_off];
if (ext->type == EXT_KCFG) {
insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
@@ -6172,6 +6331,16 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
}
relo->processed = true;
break;
+ case RELO_EXTERN_FUNC:
+ ext = &obj->externs[relo->sym_off];
+ insn[0].src_reg = BPF_PSEUDO_KFUNC_CALL;
+ insn[0].imm = ext->ksym.kernel_btf_id;
+ relo->processed = true;
+ break;
+ case RELO_SUBPROG_ADDR:
+ insn[0].src_reg = BPF_PSEUDO_FUNC;
+ /* will be handled as a follow up pass */
+ break;
case RELO_CALL:
/* will be handled as a follow up pass */
break;
@@ -6358,11 +6527,11 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
for (insn_idx = 0; insn_idx < prog->sec_insn_cnt; insn_idx++) {
insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
- if (!insn_is_subprog_call(insn))
+ if (!insn_is_subprog_call(insn) && !insn_is_pseudo_func(insn))
continue;
relo = find_prog_insn_relo(prog, insn_idx);
- if (relo && relo->type != RELO_CALL) {
+ if (relo && relo->type != RELO_CALL && relo->type != RELO_SUBPROG_ADDR) {
pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n",
prog->name, insn_idx, relo->type);
return -LIBBPF_ERRNO__RELOC;
@@ -6374,8 +6543,22 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
* call always has imm = -1, but for static functions
* relocation is against STT_SECTION and insn->imm
* points to a start of a static function
+ *
+ * for subprog addr relocation, the relo->sym_off + insn->imm is
+ * the byte offset in the corresponding section.
*/
- sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1;
+ if (relo->type == RELO_CALL)
+ sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1;
+ else
+ sub_insn_idx = (relo->sym_off + insn->imm) / BPF_INSN_SZ;
+ } else if (insn_is_pseudo_func(insn)) {
+ /*
+ * RELO_SUBPROG_ADDR relo is always emitted even if both
+ * functions are in the same section, so it shouldn't reach here.
+ */
+ pr_warn("prog '%s': missing subprog addr relo for insn #%zu\n",
+ prog->name, insn_idx);
+ return -LIBBPF_ERRNO__RELOC;
} else {
/* if subprogram call is to a static function within
* the same ELF section, there won't be any relocation
@@ -7274,6 +7457,7 @@ static int bpf_object__read_kallsyms_file(struct bpf_object *obj)
{
char sym_type, sym_name[500];
unsigned long long sym_addr;
+ const struct btf_type *t;
struct extern_desc *ext;
int ret, err = 0;
FILE *f;
@@ -7300,6 +7484,10 @@ static int bpf_object__read_kallsyms_file(struct bpf_object *obj)
if (!ext || ext->type != EXT_KSYM)
continue;
+ t = btf__type_by_id(obj->btf, ext->btf_id);
+ if (!btf_is_var(t))
+ continue;
+
if (ext->is_set && ext->ksym.addr != sym_addr) {
pr_warn("extern (ksym) '%s' resolution is ambiguous: 0x%llx or 0x%llx\n",
sym_name, ext->ksym.addr, sym_addr);
@@ -7318,75 +7506,151 @@ out:
return err;
}
-static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj)
+static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name,
+ __u16 kind, struct btf **res_btf,
+ int *res_btf_fd)
{
- struct extern_desc *ext;
+ int i, id, btf_fd, err;
struct btf *btf;
- int i, j, id, btf_fd, err;
-
- for (i = 0; i < obj->nr_extern; i++) {
- const struct btf_type *targ_var, *targ_type;
- __u32 targ_type_id, local_type_id;
- const char *targ_var_name;
- int ret;
- ext = &obj->externs[i];
- if (ext->type != EXT_KSYM || !ext->ksym.type_id)
- continue;
+ btf = obj->btf_vmlinux;
+ btf_fd = 0;
+ id = btf__find_by_name_kind(btf, ksym_name, kind);
- btf = obj->btf_vmlinux;
- btf_fd = 0;
- id = btf__find_by_name_kind(btf, ext->name, BTF_KIND_VAR);
- if (id == -ENOENT) {
- err = load_module_btfs(obj);
- if (err)
- return err;
+ if (id == -ENOENT) {
+ err = load_module_btfs(obj);
+ if (err)
+ return err;
- for (j = 0; j < obj->btf_module_cnt; j++) {
- btf = obj->btf_modules[j].btf;
- /* we assume module BTF FD is always >0 */
- btf_fd = obj->btf_modules[j].fd;
- id = btf__find_by_name_kind(btf, ext->name, BTF_KIND_VAR);
- if (id != -ENOENT)
- break;
- }
- }
- if (id <= 0) {
- pr_warn("extern (ksym) '%s': failed to find BTF ID in kernel BTF(s).\n",
- ext->name);
- return -ESRCH;
+ for (i = 0; i < obj->btf_module_cnt; i++) {
+ btf = obj->btf_modules[i].btf;
+ /* we assume module BTF FD is always >0 */
+ btf_fd = obj->btf_modules[i].fd;
+ id = btf__find_by_name_kind(btf, ksym_name, kind);
+ if (id != -ENOENT)
+ break;
}
+ }
+ if (id <= 0) {
+ pr_warn("extern (%s ksym) '%s': failed to find BTF ID in kernel BTF(s).\n",
+ __btf_kind_str(kind), ksym_name);
+ return -ESRCH;
+ }
- /* find local type_id */
- local_type_id = ext->ksym.type_id;
+ *res_btf = btf;
+ *res_btf_fd = btf_fd;
+ return id;
+}
- /* find target type_id */
- targ_var = btf__type_by_id(btf, id);
- targ_var_name = btf__name_by_offset(btf, targ_var->name_off);
- targ_type = skip_mods_and_typedefs(btf, targ_var->type, &targ_type_id);
+static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj,
+ struct extern_desc *ext)
+{
+ const struct btf_type *targ_var, *targ_type;
+ __u32 targ_type_id, local_type_id;
+ const char *targ_var_name;
+ int id, btf_fd = 0, err;
+ struct btf *btf = NULL;
- ret = bpf_core_types_are_compat(obj->btf, local_type_id,
- btf, targ_type_id);
- if (ret <= 0) {
- const struct btf_type *local_type;
- const char *targ_name, *local_name;
+ id = find_ksym_btf_id(obj, ext->name, BTF_KIND_VAR, &btf, &btf_fd);
+ if (id < 0)
+ return id;
- local_type = btf__type_by_id(obj->btf, local_type_id);
- local_name = btf__name_by_offset(obj->btf, local_type->name_off);
- targ_name = btf__name_by_offset(btf, targ_type->name_off);
+ /* find local type_id */
+ local_type_id = ext->ksym.type_id;
- pr_warn("extern (ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n",
- ext->name, local_type_id,
- btf_kind_str(local_type), local_name, targ_type_id,
- btf_kind_str(targ_type), targ_name);
- return -EINVAL;
- }
+ /* find target type_id */
+ targ_var = btf__type_by_id(btf, id);
+ targ_var_name = btf__name_by_offset(btf, targ_var->name_off);
+ targ_type = skip_mods_and_typedefs(btf, targ_var->type, &targ_type_id);
+
+ err = bpf_core_types_are_compat(obj->btf, local_type_id,
+ btf, targ_type_id);
+ if (err <= 0) {
+ const struct btf_type *local_type;
+ const char *targ_name, *local_name;
+
+ local_type = btf__type_by_id(obj->btf, local_type_id);
+ local_name = btf__name_by_offset(obj->btf, local_type->name_off);
+ targ_name = btf__name_by_offset(btf, targ_type->name_off);
+
+ pr_warn("extern (var ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n",
+ ext->name, local_type_id,
+ btf_kind_str(local_type), local_name, targ_type_id,
+ btf_kind_str(targ_type), targ_name);
+ return -EINVAL;
+ }
+
+ ext->is_set = true;
+ ext->ksym.kernel_btf_obj_fd = btf_fd;
+ ext->ksym.kernel_btf_id = id;
+ pr_debug("extern (var ksym) '%s': resolved to [%d] %s %s\n",
+ ext->name, id, btf_kind_str(targ_var), targ_var_name);
+
+ return 0;
+}
+
+static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj,
+ struct extern_desc *ext)
+{
+ int local_func_proto_id, kfunc_proto_id, kfunc_id;
+ const struct btf_type *kern_func;
+ struct btf *kern_btf = NULL;
+ int ret, kern_btf_fd = 0;
+
+ local_func_proto_id = ext->ksym.type_id;
+
+ kfunc_id = find_ksym_btf_id(obj, ext->name, BTF_KIND_FUNC,
+ &kern_btf, &kern_btf_fd);
+ if (kfunc_id < 0) {
+ pr_warn("extern (func ksym) '%s': not found in kernel BTF\n",
+ ext->name);
+ return kfunc_id;
+ }
+
+ if (kern_btf != obj->btf_vmlinux) {
+ pr_warn("extern (func ksym) '%s': function in kernel module is not supported\n",
+ ext->name);
+ return -ENOTSUP;
+ }
+
+ kern_func = btf__type_by_id(kern_btf, kfunc_id);
+ kfunc_proto_id = kern_func->type;
+
+ ret = bpf_core_types_are_compat(obj->btf, local_func_proto_id,
+ kern_btf, kfunc_proto_id);
+ if (ret <= 0) {
+ pr_warn("extern (func ksym) '%s': func_proto [%d] incompatible with kernel [%d]\n",
+ ext->name, local_func_proto_id, kfunc_proto_id);
+ return -EINVAL;
+ }
+
+ ext->is_set = true;
+ ext->ksym.kernel_btf_obj_fd = kern_btf_fd;
+ ext->ksym.kernel_btf_id = kfunc_id;
+ pr_debug("extern (func ksym) '%s': resolved to kernel [%d]\n",
+ ext->name, kfunc_id);
+
+ return 0;
+}
+
+static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj)
+{
+ const struct btf_type *t;
+ struct extern_desc *ext;
+ int i, err;
+
+ for (i = 0; i < obj->nr_extern; i++) {
+ ext = &obj->externs[i];
+ if (ext->type != EXT_KSYM || !ext->ksym.type_id)
+ continue;
- ext->is_set = true;
- ext->ksym.kernel_btf_obj_fd = btf_fd;
- ext->ksym.kernel_btf_id = id;
- pr_debug("extern (ksym) '%s': resolved to [%d] %s %s\n",
- ext->name, id, btf_kind_str(targ_var), targ_var_name);
+ t = btf__type_by_id(obj->btf, ext->btf_id);
+ if (btf_is_var(t))
+ err = bpf_object__resolve_ksym_var_btf_id(obj, ext);
+ else
+ err = bpf_object__resolve_ksym_func_btf_id(obj, ext);
+ if (err)
+ return err;
}
return 0;
}
@@ -8193,6 +8457,16 @@ int bpf_object__btf_fd(const struct bpf_object *obj)
return obj->btf ? btf__fd(obj->btf) : -1;
}
+int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version)
+{
+ if (obj->loaded)
+ return -EINVAL;
+
+ obj->kern_version = kern_version;
+
+ return 0;
+}
+
int bpf_object__set_priv(struct bpf_object *obj, void *priv,
bpf_object_clear_priv_t clear_priv)
{
@@ -8381,7 +8655,7 @@ int bpf_program__nth_fd(const struct bpf_program *prog, int n)
return fd;
}
-enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog)
+enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog)
{
return prog->type;
}
@@ -8426,7 +8700,7 @@ BPF_PROG_TYPE_FNS(extension, BPF_PROG_TYPE_EXT);
BPF_PROG_TYPE_FNS(sk_lookup, BPF_PROG_TYPE_SK_LOOKUP);
enum bpf_attach_type
-bpf_program__get_expected_attach_type(struct bpf_program *prog)
+bpf_program__get_expected_attach_type(const struct bpf_program *prog)
{
return prog->expected_attach_type;
}
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 3c35eb401931..f500621d28e5 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -143,6 +143,7 @@ LIBBPF_API int bpf_object__unload(struct bpf_object *obj);
LIBBPF_API const char *bpf_object__name(const struct bpf_object *obj);
LIBBPF_API unsigned int bpf_object__kversion(const struct bpf_object *obj);
+LIBBPF_API int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version);
struct btf;
LIBBPF_API struct btf *bpf_object__btf(const struct bpf_object *obj);
@@ -361,12 +362,12 @@ LIBBPF_API int bpf_program__set_struct_ops(struct bpf_program *prog);
LIBBPF_API int bpf_program__set_extension(struct bpf_program *prog);
LIBBPF_API int bpf_program__set_sk_lookup(struct bpf_program *prog);
-LIBBPF_API enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog);
+LIBBPF_API enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog);
LIBBPF_API void bpf_program__set_type(struct bpf_program *prog,
enum bpf_prog_type type);
LIBBPF_API enum bpf_attach_type
-bpf_program__get_expected_attach_type(struct bpf_program *prog);
+bpf_program__get_expected_attach_type(const struct bpf_program *prog);
LIBBPF_API void
bpf_program__set_expected_attach_type(struct bpf_program *prog,
enum bpf_attach_type type);
@@ -507,6 +508,7 @@ struct xdp_link_info {
struct bpf_xdp_set_link_opts {
size_t sz;
int old_fd;
+ size_t :0;
};
#define bpf_xdp_set_link_opts__last_field old_fd
@@ -759,6 +761,19 @@ enum libbpf_tristate {
TRI_MODULE = 2,
};
+struct bpf_linker_opts {
+ /* size of this struct, for forward/backward compatiblity */
+ size_t sz;
+};
+#define bpf_linker_opts__last_field sz
+
+struct bpf_linker;
+
+LIBBPF_API struct bpf_linker *bpf_linker__new(const char *filename, struct bpf_linker_opts *opts);
+LIBBPF_API int bpf_linker__add_file(struct bpf_linker *linker, const char *filename);
+LIBBPF_API int bpf_linker__finalize(struct bpf_linker *linker);
+LIBBPF_API void bpf_linker__free(struct bpf_linker *linker);
+
#ifdef __cplusplus
} /* extern "C" */
#endif
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 1c0fd2dd233a..f5990f7208ce 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -350,3 +350,14 @@ LIBBPF_0.3.0 {
xsk_setup_xdp_prog;
xsk_socket__update_xskmap;
} LIBBPF_0.2.0;
+
+LIBBPF_0.4.0 {
+ global:
+ btf__add_float;
+ btf__add_type;
+ bpf_linker__add_file;
+ bpf_linker__finalize;
+ bpf_linker__free;
+ bpf_linker__new;
+ bpf_object__set_kversion;
+} LIBBPF_0.3.0;
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 969d0ac592ba..6017902c687e 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -20,6 +20,26 @@
#include "libbpf.h"
+#ifndef EM_BPF
+#define EM_BPF 247
+#endif
+
+#ifndef R_BPF_64_64
+#define R_BPF_64_64 1
+#endif
+#ifndef R_BPF_64_32
+#define R_BPF_64_32 10
+#endif
+
+#ifndef SHT_LLVM_ADDRSIG
+#define SHT_LLVM_ADDRSIG 0x6FFF4C03
+#endif
+
+/* if libelf is old and doesn't support mmap(), fall back to read() */
+#ifndef ELF_C_READ_MMAP
+#define ELF_C_READ_MMAP ELF_C_READ
+#endif
+
#define BTF_INFO_ENC(kind, kind_flag, vlen) \
((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
#define BTF_TYPE_ENC(name, info, size_or_type) (name), (info), (size_or_type)
@@ -31,6 +51,8 @@
#define BTF_MEMBER_ENC(name, type, bits_offset) (name), (type), (bits_offset)
#define BTF_PARAM_ENC(name, type) (name), (type)
#define BTF_VAR_SECINFO_ENC(type, offset, size) (type), (offset), (size)
+#define BTF_TYPE_FLOAT_ENC(name, sz) \
+ BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FLOAT, 0, 0), sz)
#ifndef likely
#define likely(x) __builtin_expect(!!(x), 1)
@@ -105,9 +127,14 @@ static inline void *libbpf_reallocarray(void *ptr, size_t nmemb, size_t size)
return realloc(ptr, total);
}
-void *btf_add_mem(void **data, size_t *cap_cnt, size_t elem_sz,
- size_t cur_cnt, size_t max_cnt, size_t add_cnt);
-int btf_ensure_mem(void **data, size_t *cap_cnt, size_t elem_sz, size_t need_cnt);
+struct btf;
+struct btf_type;
+
+struct btf_type *btf_type_by_id(struct btf *btf, __u32 type_id);
+
+void *libbpf_add_mem(void **data, size_t *cap_cnt, size_t elem_sz,
+ size_t cur_cnt, size_t max_cnt, size_t add_cnt);
+int libbpf_ensure_mem(void **data, size_t *cap_cnt, size_t elem_sz, size_t need_cnt);
static inline bool libbpf_validate_opts(const char *opts,
size_t opts_sz, size_t user_sz,
@@ -349,4 +376,11 @@ struct bpf_core_relo {
enum bpf_core_relo_kind kind;
};
+typedef int (*type_id_visit_fn)(__u32 *type_id, void *ctx);
+typedef int (*str_off_visit_fn)(__u32 *str_off, void *ctx);
+int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ctx);
+int btf_type_visit_str_offs(struct btf_type *t, str_off_visit_fn visit, void *ctx);
+int btf_ext_visit_type_ids(struct btf_ext *btf_ext, type_id_visit_fn visit, void *ctx);
+int btf_ext_visit_str_offs(struct btf_ext *btf_ext, str_off_visit_fn visit, void *ctx);
+
#endif /* __LIBBPF_LIBBPF_INTERNAL_H */
diff --git a/tools/lib/bpf/libbpf_util.h b/tools/lib/bpf/libbpf_util.h
deleted file mode 100644
index 59c779c5790c..000000000000
--- a/tools/lib/bpf/libbpf_util.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
-/* Copyright (c) 2019 Facebook */
-
-#ifndef __LIBBPF_LIBBPF_UTIL_H
-#define __LIBBPF_LIBBPF_UTIL_H
-
-#include <stdbool.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* Use these barrier functions instead of smp_[rw]mb() when they are
- * used in a libbpf header file. That way they can be built into the
- * application that uses libbpf.
- */
-#if defined(__i386__) || defined(__x86_64__)
-# define libbpf_smp_rmb() asm volatile("" : : : "memory")
-# define libbpf_smp_wmb() asm volatile("" : : : "memory")
-# define libbpf_smp_mb() \
- asm volatile("lock; addl $0,-4(%%rsp)" : : : "memory", "cc")
-/* Hinders stores to be observed before older loads. */
-# define libbpf_smp_rwmb() asm volatile("" : : : "memory")
-#elif defined(__aarch64__)
-# define libbpf_smp_rmb() asm volatile("dmb ishld" : : : "memory")
-# define libbpf_smp_wmb() asm volatile("dmb ishst" : : : "memory")
-# define libbpf_smp_mb() asm volatile("dmb ish" : : : "memory")
-# define libbpf_smp_rwmb() libbpf_smp_mb()
-#elif defined(__arm__)
-/* These are only valid for armv7 and above */
-# define libbpf_smp_rmb() asm volatile("dmb ish" : : : "memory")
-# define libbpf_smp_wmb() asm volatile("dmb ishst" : : : "memory")
-# define libbpf_smp_mb() asm volatile("dmb ish" : : : "memory")
-# define libbpf_smp_rwmb() libbpf_smp_mb()
-#else
-/* Architecture missing native barrier functions. */
-# define libbpf_smp_rmb() __sync_synchronize()
-# define libbpf_smp_wmb() __sync_synchronize()
-# define libbpf_smp_mb() __sync_synchronize()
-# define libbpf_smp_rwmb() __sync_synchronize()
-#endif
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif
diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c
new file mode 100644
index 000000000000..46b16cbdcda3
--- /dev/null
+++ b/tools/lib/bpf/linker.c
@@ -0,0 +1,1963 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/*
+ * BPF static linker
+ *
+ * Copyright (c) 2021 Facebook
+ */
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <linux/err.h>
+#include <linux/btf.h>
+#include <elf.h>
+#include <libelf.h>
+#include <gelf.h>
+#include <fcntl.h>
+#include "libbpf.h"
+#include "btf.h"
+#include "libbpf_internal.h"
+#include "strset.h"
+
+struct src_sec {
+ const char *sec_name;
+ /* positional (not necessarily ELF) index in an array of sections */
+ int id;
+ /* positional (not necessarily ELF) index of a matching section in a final object file */
+ int dst_id;
+ /* section data offset in a matching output section */
+ int dst_off;
+ /* whether section is omitted from the final ELF file */
+ bool skipped;
+ /* whether section is an ephemeral section, not mapped to an ELF section */
+ bool ephemeral;
+
+ /* ELF info */
+ size_t sec_idx;
+ Elf_Scn *scn;
+ Elf64_Shdr *shdr;
+ Elf_Data *data;
+
+ /* corresponding BTF DATASEC type ID */
+ int sec_type_id;
+};
+
+struct src_obj {
+ const char *filename;
+ int fd;
+ Elf *elf;
+ /* Section header strings section index */
+ size_t shstrs_sec_idx;
+ /* SYMTAB section index */
+ size_t symtab_sec_idx;
+
+ struct btf *btf;
+ struct btf_ext *btf_ext;
+
+ /* List of sections (including ephemeral). Slot zero is unused. */
+ struct src_sec *secs;
+ int sec_cnt;
+
+ /* mapping of symbol indices from src to dst ELF */
+ int *sym_map;
+ /* mapping from the src BTF type IDs to dst ones */
+ int *btf_type_map;
+};
+
+/* single .BTF.ext data section */
+struct btf_ext_sec_data {
+ size_t rec_cnt;
+ __u32 rec_sz;
+ void *recs;
+};
+
+struct dst_sec {
+ char *sec_name;
+ /* positional (not necessarily ELF) index in an array of sections */
+ int id;
+
+ /* ELF info */
+ size_t sec_idx;
+ Elf_Scn *scn;
+ Elf64_Shdr *shdr;
+ Elf_Data *data;
+
+ /* final output section size */
+ int sec_sz;
+ /* final output contents of the section */
+ void *raw_data;
+
+ /* corresponding STT_SECTION symbol index in SYMTAB */
+ int sec_sym_idx;
+
+ /* section's DATASEC variable info, emitted on BTF finalization */
+ bool has_btf;
+ int sec_var_cnt;
+ struct btf_var_secinfo *sec_vars;
+
+ /* section's .BTF.ext data */
+ struct btf_ext_sec_data func_info;
+ struct btf_ext_sec_data line_info;
+ struct btf_ext_sec_data core_relo_info;
+};
+
+struct bpf_linker {
+ char *filename;
+ int fd;
+ Elf *elf;
+ Elf64_Ehdr *elf_hdr;
+
+ /* Output sections metadata */
+ struct dst_sec *secs;
+ int sec_cnt;
+
+ struct strset *strtab_strs; /* STRTAB unique strings */
+ size_t strtab_sec_idx; /* STRTAB section index */
+ size_t symtab_sec_idx; /* SYMTAB section index */
+
+ struct btf *btf;
+ struct btf_ext *btf_ext;
+};
+
+#define pr_warn_elf(fmt, ...) \
+do { \
+ libbpf_print(LIBBPF_WARN, "libbpf: " fmt ": %s\n", ##__VA_ARGS__, elf_errmsg(-1)); \
+} while (0)
+
+static int init_output_elf(struct bpf_linker *linker, const char *file);
+
+static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, struct src_obj *obj);
+static int linker_sanity_check_elf(struct src_obj *obj);
+static int linker_sanity_check_btf(struct src_obj *obj);
+static int linker_sanity_check_btf_ext(struct src_obj *obj);
+static int linker_fixup_btf(struct src_obj *obj);
+static int linker_append_sec_data(struct bpf_linker *linker, struct src_obj *obj);
+static int linker_append_elf_syms(struct bpf_linker *linker, struct src_obj *obj);
+static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *obj);
+static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj);
+static int linker_append_btf_ext(struct bpf_linker *linker, struct src_obj *obj);
+
+static int finalize_btf(struct bpf_linker *linker);
+static int finalize_btf_ext(struct bpf_linker *linker);
+
+void bpf_linker__free(struct bpf_linker *linker)
+{
+ int i;
+
+ if (!linker)
+ return;
+
+ free(linker->filename);
+
+ if (linker->elf)
+ elf_end(linker->elf);
+
+ if (linker->fd >= 0)
+ close(linker->fd);
+
+ strset__free(linker->strtab_strs);
+
+ btf__free(linker->btf);
+ btf_ext__free(linker->btf_ext);
+
+ for (i = 1; i < linker->sec_cnt; i++) {
+ struct dst_sec *sec = &linker->secs[i];
+
+ free(sec->sec_name);
+ free(sec->raw_data);
+ free(sec->sec_vars);
+
+ free(sec->func_info.recs);
+ free(sec->line_info.recs);
+ free(sec->core_relo_info.recs);
+ }
+ free(linker->secs);
+
+ free(linker);
+}
+
+struct bpf_linker *bpf_linker__new(const char *filename, struct bpf_linker_opts *opts)
+{
+ struct bpf_linker *linker;
+ int err;
+
+ if (!OPTS_VALID(opts, bpf_linker_opts))
+ return NULL;
+
+ if (elf_version(EV_CURRENT) == EV_NONE) {
+ pr_warn_elf("libelf initialization failed");
+ return NULL;
+ }
+
+ linker = calloc(1, sizeof(*linker));
+ if (!linker)
+ return NULL;
+
+ linker->fd = -1;
+
+ err = init_output_elf(linker, filename);
+ if (err)
+ goto err_out;
+
+ return linker;
+
+err_out:
+ bpf_linker__free(linker);
+ return NULL;
+}
+
+static struct dst_sec *add_dst_sec(struct bpf_linker *linker, const char *sec_name)
+{
+ struct dst_sec *secs = linker->secs, *sec;
+ size_t new_cnt = linker->sec_cnt ? linker->sec_cnt + 1 : 2;
+
+ secs = libbpf_reallocarray(secs, new_cnt, sizeof(*secs));
+ if (!secs)
+ return NULL;
+
+ /* zero out newly allocated memory */
+ memset(secs + linker->sec_cnt, 0, (new_cnt - linker->sec_cnt) * sizeof(*secs));
+
+ linker->secs = secs;
+ linker->sec_cnt = new_cnt;
+
+ sec = &linker->secs[new_cnt - 1];
+ sec->id = new_cnt - 1;
+ sec->sec_name = strdup(sec_name);
+ if (!sec->sec_name)
+ return NULL;
+
+ return sec;
+}
+
+static Elf64_Sym *add_new_sym(struct bpf_linker *linker, size_t *sym_idx)
+{
+ struct dst_sec *symtab = &linker->secs[linker->symtab_sec_idx];
+ Elf64_Sym *syms, *sym;
+ size_t sym_cnt = symtab->sec_sz / sizeof(*sym);
+
+ syms = libbpf_reallocarray(symtab->raw_data, sym_cnt + 1, sizeof(*sym));
+ if (!syms)
+ return NULL;
+
+ sym = &syms[sym_cnt];
+ memset(sym, 0, sizeof(*sym));
+
+ symtab->raw_data = syms;
+ symtab->sec_sz += sizeof(*sym);
+ symtab->shdr->sh_size += sizeof(*sym);
+ symtab->data->d_size += sizeof(*sym);
+
+ if (sym_idx)
+ *sym_idx = sym_cnt;
+
+ return sym;
+}
+
+static int init_output_elf(struct bpf_linker *linker, const char *file)
+{
+ int err, str_off;
+ Elf64_Sym *init_sym;
+ struct dst_sec *sec;
+
+ linker->filename = strdup(file);
+ if (!linker->filename)
+ return -ENOMEM;
+
+ linker->fd = open(file, O_WRONLY | O_CREAT | O_TRUNC, 0644);
+ if (linker->fd < 0) {
+ err = -errno;
+ pr_warn("failed to create '%s': %d\n", file, err);
+ return err;
+ }
+
+ linker->elf = elf_begin(linker->fd, ELF_C_WRITE, NULL);
+ if (!linker->elf) {
+ pr_warn_elf("failed to create ELF object");
+ return -EINVAL;
+ }
+
+ /* ELF header */
+ linker->elf_hdr = elf64_newehdr(linker->elf);
+ if (!linker->elf_hdr){
+ pr_warn_elf("failed to create ELF header");
+ return -EINVAL;
+ }
+
+ linker->elf_hdr->e_machine = EM_BPF;
+ linker->elf_hdr->e_type = ET_REL;
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ linker->elf_hdr->e_ident[EI_DATA] = ELFDATA2LSB;
+#elif __BYTE_ORDER == __BIG_ENDIAN
+ linker->elf_hdr->e_ident[EI_DATA] = ELFDATA2MSB;
+#else
+#error "Unknown __BYTE_ORDER"
+#endif
+
+ /* STRTAB */
+ /* initialize strset with an empty string to conform to ELF */
+ linker->strtab_strs = strset__new(INT_MAX, "", sizeof(""));
+ if (libbpf_get_error(linker->strtab_strs))
+ return libbpf_get_error(linker->strtab_strs);
+
+ sec = add_dst_sec(linker, ".strtab");
+ if (!sec)
+ return -ENOMEM;
+
+ sec->scn = elf_newscn(linker->elf);
+ if (!sec->scn) {
+ pr_warn_elf("failed to create STRTAB section");
+ return -EINVAL;
+ }
+
+ sec->shdr = elf64_getshdr(sec->scn);
+ if (!sec->shdr)
+ return -EINVAL;
+
+ sec->data = elf_newdata(sec->scn);
+ if (!sec->data) {
+ pr_warn_elf("failed to create STRTAB data");
+ return -EINVAL;
+ }
+
+ str_off = strset__add_str(linker->strtab_strs, sec->sec_name);
+ if (str_off < 0)
+ return str_off;
+
+ sec->sec_idx = elf_ndxscn(sec->scn);
+ linker->elf_hdr->e_shstrndx = sec->sec_idx;
+ linker->strtab_sec_idx = sec->sec_idx;
+
+ sec->shdr->sh_name = str_off;
+ sec->shdr->sh_type = SHT_STRTAB;
+ sec->shdr->sh_flags = SHF_STRINGS;
+ sec->shdr->sh_offset = 0;
+ sec->shdr->sh_link = 0;
+ sec->shdr->sh_info = 0;
+ sec->shdr->sh_addralign = 1;
+ sec->shdr->sh_size = sec->sec_sz = 0;
+ sec->shdr->sh_entsize = 0;
+
+ /* SYMTAB */
+ sec = add_dst_sec(linker, ".symtab");
+ if (!sec)
+ return -ENOMEM;
+
+ sec->scn = elf_newscn(linker->elf);
+ if (!sec->scn) {
+ pr_warn_elf("failed to create SYMTAB section");
+ return -EINVAL;
+ }
+
+ sec->shdr = elf64_getshdr(sec->scn);
+ if (!sec->shdr)
+ return -EINVAL;
+
+ sec->data = elf_newdata(sec->scn);
+ if (!sec->data) {
+ pr_warn_elf("failed to create SYMTAB data");
+ return -EINVAL;
+ }
+
+ str_off = strset__add_str(linker->strtab_strs, sec->sec_name);
+ if (str_off < 0)
+ return str_off;
+
+ sec->sec_idx = elf_ndxscn(sec->scn);
+ linker->symtab_sec_idx = sec->sec_idx;
+
+ sec->shdr->sh_name = str_off;
+ sec->shdr->sh_type = SHT_SYMTAB;
+ sec->shdr->sh_flags = 0;
+ sec->shdr->sh_offset = 0;
+ sec->shdr->sh_link = linker->strtab_sec_idx;
+ /* sh_info should be one greater than the index of the last local
+ * symbol (i.e., binding is STB_LOCAL). But why and who cares?
+ */
+ sec->shdr->sh_info = 0;
+ sec->shdr->sh_addralign = 8;
+ sec->shdr->sh_entsize = sizeof(Elf64_Sym);
+
+ /* .BTF */
+ linker->btf = btf__new_empty();
+ err = libbpf_get_error(linker->btf);
+ if (err)
+ return err;
+
+ /* add the special all-zero symbol */
+ init_sym = add_new_sym(linker, NULL);
+ if (!init_sym)
+ return -EINVAL;
+
+ init_sym->st_name = 0;
+ init_sym->st_info = 0;
+ init_sym->st_other = 0;
+ init_sym->st_shndx = SHN_UNDEF;
+ init_sym->st_value = 0;
+ init_sym->st_size = 0;
+
+ return 0;
+}
+
+int bpf_linker__add_file(struct bpf_linker *linker, const char *filename)
+{
+ struct src_obj obj = {};
+ int err = 0;
+
+ if (!linker->elf)
+ return -EINVAL;
+
+ err = err ?: linker_load_obj_file(linker, filename, &obj);
+ err = err ?: linker_append_sec_data(linker, &obj);
+ err = err ?: linker_append_elf_syms(linker, &obj);
+ err = err ?: linker_append_elf_relos(linker, &obj);
+ err = err ?: linker_append_btf(linker, &obj);
+ err = err ?: linker_append_btf_ext(linker, &obj);
+
+ /* free up src_obj resources */
+ free(obj.btf_type_map);
+ btf__free(obj.btf);
+ btf_ext__free(obj.btf_ext);
+ free(obj.secs);
+ free(obj.sym_map);
+ if (obj.elf)
+ elf_end(obj.elf);
+ if (obj.fd >= 0)
+ close(obj.fd);
+
+ return err;
+}
+
+static bool is_dwarf_sec_name(const char *name)
+{
+ /* approximation, but the actual list is too long */
+ return strncmp(name, ".debug_", sizeof(".debug_") - 1) == 0;
+}
+
+static bool is_ignored_sec(struct src_sec *sec)
+{
+ Elf64_Shdr *shdr = sec->shdr;
+ const char *name = sec->sec_name;
+
+ /* no special handling of .strtab */
+ if (shdr->sh_type == SHT_STRTAB)
+ return true;
+
+ /* ignore .llvm_addrsig section as well */
+ if (shdr->sh_type == SHT_LLVM_ADDRSIG)
+ return true;
+
+ /* no subprograms will lead to an empty .text section, ignore it */
+ if (shdr->sh_type == SHT_PROGBITS && shdr->sh_size == 0 &&
+ strcmp(sec->sec_name, ".text") == 0)
+ return true;
+
+ /* DWARF sections */
+ if (is_dwarf_sec_name(sec->sec_name))
+ return true;
+
+ if (strncmp(name, ".rel", sizeof(".rel") - 1) == 0) {
+ name += sizeof(".rel") - 1;
+ /* DWARF section relocations */
+ if (is_dwarf_sec_name(name))
+ return true;
+
+ /* .BTF and .BTF.ext don't need relocations */
+ if (strcmp(name, BTF_ELF_SEC) == 0 ||
+ strcmp(name, BTF_EXT_ELF_SEC) == 0)
+ return true;
+ }
+
+ return false;
+}
+
+static struct src_sec *add_src_sec(struct src_obj *obj, const char *sec_name)
+{
+ struct src_sec *secs = obj->secs, *sec;
+ size_t new_cnt = obj->sec_cnt ? obj->sec_cnt + 1 : 2;
+
+ secs = libbpf_reallocarray(secs, new_cnt, sizeof(*secs));
+ if (!secs)
+ return NULL;
+
+ /* zero out newly allocated memory */
+ memset(secs + obj->sec_cnt, 0, (new_cnt - obj->sec_cnt) * sizeof(*secs));
+
+ obj->secs = secs;
+ obj->sec_cnt = new_cnt;
+
+ sec = &obj->secs[new_cnt - 1];
+ sec->id = new_cnt - 1;
+ sec->sec_name = sec_name;
+
+ return sec;
+}
+
+static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, struct src_obj *obj)
+{
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ const int host_endianness = ELFDATA2LSB;
+#elif __BYTE_ORDER == __BIG_ENDIAN
+ const int host_endianness = ELFDATA2MSB;
+#else
+#error "Unknown __BYTE_ORDER"
+#endif
+ int err = 0;
+ Elf_Scn *scn;
+ Elf_Data *data;
+ Elf64_Ehdr *ehdr;
+ Elf64_Shdr *shdr;
+ struct src_sec *sec;
+
+ pr_debug("linker: adding object file '%s'...\n", filename);
+
+ obj->filename = filename;
+
+ obj->fd = open(filename, O_RDONLY);
+ if (obj->fd < 0) {
+ err = -errno;
+ pr_warn("failed to open file '%s': %d\n", filename, err);
+ return err;
+ }
+ obj->elf = elf_begin(obj->fd, ELF_C_READ_MMAP, NULL);
+ if (!obj->elf) {
+ err = -errno;
+ pr_warn_elf("failed to parse ELF file '%s'", filename);
+ return err;
+ }
+
+ /* Sanity check ELF file high-level properties */
+ ehdr = elf64_getehdr(obj->elf);
+ if (!ehdr) {
+ err = -errno;
+ pr_warn_elf("failed to get ELF header for %s", filename);
+ return err;
+ }
+ if (ehdr->e_ident[EI_DATA] != host_endianness) {
+ err = -EOPNOTSUPP;
+ pr_warn_elf("unsupported byte order of ELF file %s", filename);
+ return err;
+ }
+ if (ehdr->e_type != ET_REL
+ || ehdr->e_machine != EM_BPF
+ || ehdr->e_ident[EI_CLASS] != ELFCLASS64) {
+ err = -EOPNOTSUPP;
+ pr_warn_elf("unsupported kind of ELF file %s", filename);
+ return err;
+ }
+
+ if (elf_getshdrstrndx(obj->elf, &obj->shstrs_sec_idx)) {
+ err = -errno;
+ pr_warn_elf("failed to get SHSTRTAB section index for %s", filename);
+ return err;
+ }
+
+ scn = NULL;
+ while ((scn = elf_nextscn(obj->elf, scn)) != NULL) {
+ size_t sec_idx = elf_ndxscn(scn);
+ const char *sec_name;
+
+ shdr = elf64_getshdr(scn);
+ if (!shdr) {
+ err = -errno;
+ pr_warn_elf("failed to get section #%zu header for %s",
+ sec_idx, filename);
+ return err;
+ }
+
+ sec_name = elf_strptr(obj->elf, obj->shstrs_sec_idx, shdr->sh_name);
+ if (!sec_name) {
+ err = -errno;
+ pr_warn_elf("failed to get section #%zu name for %s",
+ sec_idx, filename);
+ return err;
+ }
+
+ data = elf_getdata(scn, 0);
+ if (!data) {
+ err = -errno;
+ pr_warn_elf("failed to get section #%zu (%s) data from %s",
+ sec_idx, sec_name, filename);
+ return err;
+ }
+
+ sec = add_src_sec(obj, sec_name);
+ if (!sec)
+ return -ENOMEM;
+
+ sec->scn = scn;
+ sec->shdr = shdr;
+ sec->data = data;
+ sec->sec_idx = elf_ndxscn(scn);
+
+ if (is_ignored_sec(sec)) {
+ sec->skipped = true;
+ continue;
+ }
+
+ switch (shdr->sh_type) {
+ case SHT_SYMTAB:
+ if (obj->symtab_sec_idx) {
+ err = -EOPNOTSUPP;
+ pr_warn("multiple SYMTAB sections found, not supported\n");
+ return err;
+ }
+ obj->symtab_sec_idx = sec_idx;
+ break;
+ case SHT_STRTAB:
+ /* we'll construct our own string table */
+ break;
+ case SHT_PROGBITS:
+ if (strcmp(sec_name, BTF_ELF_SEC) == 0) {
+ obj->btf = btf__new(data->d_buf, shdr->sh_size);
+ err = libbpf_get_error(obj->btf);
+ if (err) {
+ pr_warn("failed to parse .BTF from %s: %d\n", filename, err);
+ return err;
+ }
+ sec->skipped = true;
+ continue;
+ }
+ if (strcmp(sec_name, BTF_EXT_ELF_SEC) == 0) {
+ obj->btf_ext = btf_ext__new(data->d_buf, shdr->sh_size);
+ err = libbpf_get_error(obj->btf_ext);
+ if (err) {
+ pr_warn("failed to parse .BTF.ext from '%s': %d\n", filename, err);
+ return err;
+ }
+ sec->skipped = true;
+ continue;
+ }
+
+ /* data & code */
+ break;
+ case SHT_NOBITS:
+ /* BSS */
+ break;
+ case SHT_REL:
+ /* relocations */
+ break;
+ default:
+ pr_warn("unrecognized section #%zu (%s) in %s\n",
+ sec_idx, sec_name, filename);
+ err = -EINVAL;
+ return err;
+ }
+ }
+
+ err = err ?: linker_sanity_check_elf(obj);
+ err = err ?: linker_sanity_check_btf(obj);
+ err = err ?: linker_sanity_check_btf_ext(obj);
+ err = err ?: linker_fixup_btf(obj);
+
+ return err;
+}
+
+static bool is_pow_of_2(size_t x)
+{
+ return x && (x & (x - 1)) == 0;
+}
+
+static int linker_sanity_check_elf(struct src_obj *obj)
+{
+ struct src_sec *sec, *link_sec;
+ int i, j, n;
+
+ if (!obj->symtab_sec_idx) {
+ pr_warn("ELF is missing SYMTAB section in %s\n", obj->filename);
+ return -EINVAL;
+ }
+ if (!obj->shstrs_sec_idx) {
+ pr_warn("ELF is missing section headers STRTAB section in %s\n", obj->filename);
+ return -EINVAL;
+ }
+
+ for (i = 1; i < obj->sec_cnt; i++) {
+ sec = &obj->secs[i];
+
+ if (sec->sec_name[0] == '\0') {
+ pr_warn("ELF section #%zu has empty name in %s\n", sec->sec_idx, obj->filename);
+ return -EINVAL;
+ }
+
+ if (sec->shdr->sh_addralign && !is_pow_of_2(sec->shdr->sh_addralign))
+ return -EINVAL;
+ if (sec->shdr->sh_addralign != sec->data->d_align)
+ return -EINVAL;
+
+ if (sec->shdr->sh_size != sec->data->d_size)
+ return -EINVAL;
+
+ switch (sec->shdr->sh_type) {
+ case SHT_SYMTAB: {
+ Elf64_Sym *sym;
+
+ if (sec->shdr->sh_entsize != sizeof(Elf64_Sym))
+ return -EINVAL;
+ if (sec->shdr->sh_size % sec->shdr->sh_entsize != 0)
+ return -EINVAL;
+
+ if (!sec->shdr->sh_link || sec->shdr->sh_link >= obj->sec_cnt) {
+ pr_warn("ELF SYMTAB section #%zu points to missing STRTAB section #%zu in %s\n",
+ sec->sec_idx, (size_t)sec->shdr->sh_link, obj->filename);
+ return -EINVAL;
+ }
+ link_sec = &obj->secs[sec->shdr->sh_link];
+ if (link_sec->shdr->sh_type != SHT_STRTAB) {
+ pr_warn("ELF SYMTAB section #%zu points to invalid STRTAB section #%zu in %s\n",
+ sec->sec_idx, (size_t)sec->shdr->sh_link, obj->filename);
+ return -EINVAL;
+ }
+
+ n = sec->shdr->sh_size / sec->shdr->sh_entsize;
+ sym = sec->data->d_buf;
+ for (j = 0; j < n; j++, sym++) {
+ if (sym->st_shndx
+ && sym->st_shndx < SHN_LORESERVE
+ && sym->st_shndx >= obj->sec_cnt) {
+ pr_warn("ELF sym #%d in section #%zu points to missing section #%zu in %s\n",
+ j, sec->sec_idx, (size_t)sym->st_shndx, obj->filename);
+ return -EINVAL;
+ }
+ if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION) {
+ if (sym->st_value != 0)
+ return -EINVAL;
+ }
+ }
+ break;
+ }
+ case SHT_STRTAB:
+ break;
+ case SHT_PROGBITS:
+ if (sec->shdr->sh_flags & SHF_EXECINSTR) {
+ if (sec->shdr->sh_size % sizeof(struct bpf_insn) != 0)
+ return -EINVAL;
+ }
+ break;
+ case SHT_NOBITS:
+ break;
+ case SHT_REL: {
+ Elf64_Rel *relo;
+ struct src_sec *sym_sec;
+
+ if (sec->shdr->sh_entsize != sizeof(Elf64_Rel))
+ return -EINVAL;
+ if (sec->shdr->sh_size % sec->shdr->sh_entsize != 0)
+ return -EINVAL;
+
+ /* SHT_REL's sh_link should point to SYMTAB */
+ if (sec->shdr->sh_link != obj->symtab_sec_idx) {
+ pr_warn("ELF relo section #%zu points to invalid SYMTAB section #%zu in %s\n",
+ sec->sec_idx, (size_t)sec->shdr->sh_link, obj->filename);
+ return -EINVAL;
+ }
+
+ /* SHT_REL's sh_info points to relocated section */
+ if (!sec->shdr->sh_info || sec->shdr->sh_info >= obj->sec_cnt) {
+ pr_warn("ELF relo section #%zu points to missing section #%zu in %s\n",
+ sec->sec_idx, (size_t)sec->shdr->sh_info, obj->filename);
+ return -EINVAL;
+ }
+ link_sec = &obj->secs[sec->shdr->sh_info];
+
+ /* .rel<secname> -> <secname> pattern is followed */
+ if (strncmp(sec->sec_name, ".rel", sizeof(".rel") - 1) != 0
+ || strcmp(sec->sec_name + sizeof(".rel") - 1, link_sec->sec_name) != 0) {
+ pr_warn("ELF relo section #%zu name has invalid name in %s\n",
+ sec->sec_idx, obj->filename);
+ return -EINVAL;
+ }
+
+ /* don't further validate relocations for ignored sections */
+ if (link_sec->skipped)
+ break;
+
+ /* relocatable section is data or instructions */
+ if (link_sec->shdr->sh_type != SHT_PROGBITS
+ && link_sec->shdr->sh_type != SHT_NOBITS) {
+ pr_warn("ELF relo section #%zu points to invalid section #%zu in %s\n",
+ sec->sec_idx, (size_t)sec->shdr->sh_info, obj->filename);
+ return -EINVAL;
+ }
+
+ /* check sanity of each relocation */
+ n = sec->shdr->sh_size / sec->shdr->sh_entsize;
+ relo = sec->data->d_buf;
+ sym_sec = &obj->secs[obj->symtab_sec_idx];
+ for (j = 0; j < n; j++, relo++) {
+ size_t sym_idx = ELF64_R_SYM(relo->r_info);
+ size_t sym_type = ELF64_R_TYPE(relo->r_info);
+
+ if (sym_type != R_BPF_64_64 && sym_type != R_BPF_64_32) {
+ pr_warn("ELF relo #%d in section #%zu has unexpected type %zu in %s\n",
+ j, sec->sec_idx, sym_type, obj->filename);
+ return -EINVAL;
+ }
+
+ if (!sym_idx || sym_idx * sizeof(Elf64_Sym) >= sym_sec->shdr->sh_size) {
+ pr_warn("ELF relo #%d in section #%zu points to invalid symbol #%zu in %s\n",
+ j, sec->sec_idx, sym_idx, obj->filename);
+ return -EINVAL;
+ }
+
+ if (link_sec->shdr->sh_flags & SHF_EXECINSTR) {
+ if (relo->r_offset % sizeof(struct bpf_insn) != 0) {
+ pr_warn("ELF relo #%d in section #%zu points to missing symbol #%zu in %s\n",
+ j, sec->sec_idx, sym_idx, obj->filename);
+ return -EINVAL;
+ }
+ }
+ }
+ break;
+ }
+ case SHT_LLVM_ADDRSIG:
+ break;
+ default:
+ pr_warn("ELF section #%zu (%s) has unrecognized type %zu in %s\n",
+ sec->sec_idx, sec->sec_name, (size_t)sec->shdr->sh_type, obj->filename);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int check_btf_type_id(__u32 *type_id, void *ctx)
+{
+ struct btf *btf = ctx;
+
+ if (*type_id > btf__get_nr_types(btf))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int check_btf_str_off(__u32 *str_off, void *ctx)
+{
+ struct btf *btf = ctx;
+ const char *s;
+
+ s = btf__str_by_offset(btf, *str_off);
+
+ if (!s)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int linker_sanity_check_btf(struct src_obj *obj)
+{
+ struct btf_type *t;
+ int i, n, err = 0;
+
+ if (!obj->btf)
+ return 0;
+
+ n = btf__get_nr_types(obj->btf);
+ for (i = 1; i <= n; i++) {
+ t = btf_type_by_id(obj->btf, i);
+
+ err = err ?: btf_type_visit_type_ids(t, check_btf_type_id, obj->btf);
+ err = err ?: btf_type_visit_str_offs(t, check_btf_str_off, obj->btf);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int linker_sanity_check_btf_ext(struct src_obj *obj)
+{
+ int err = 0;
+
+ if (!obj->btf_ext)
+ return 0;
+
+ /* can't use .BTF.ext without .BTF */
+ if (!obj->btf)
+ return -EINVAL;
+
+ err = err ?: btf_ext_visit_type_ids(obj->btf_ext, check_btf_type_id, obj->btf);
+ err = err ?: btf_ext_visit_str_offs(obj->btf_ext, check_btf_str_off, obj->btf);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int init_sec(struct bpf_linker *linker, struct dst_sec *dst_sec, struct src_sec *src_sec)
+{
+ Elf_Scn *scn;
+ Elf_Data *data;
+ Elf64_Shdr *shdr;
+ int name_off;
+
+ dst_sec->sec_sz = 0;
+ dst_sec->sec_idx = 0;
+
+ /* ephemeral sections are just thin section shells lacking most parts */
+ if (src_sec->ephemeral)
+ return 0;
+
+ scn = elf_newscn(linker->elf);
+ if (!scn)
+ return -1;
+ data = elf_newdata(scn);
+ if (!data)
+ return -1;
+ shdr = elf64_getshdr(scn);
+ if (!shdr)
+ return -1;
+
+ dst_sec->scn = scn;
+ dst_sec->shdr = shdr;
+ dst_sec->data = data;
+ dst_sec->sec_idx = elf_ndxscn(scn);
+
+ name_off = strset__add_str(linker->strtab_strs, src_sec->sec_name);
+ if (name_off < 0)
+ return name_off;
+
+ shdr->sh_name = name_off;
+ shdr->sh_type = src_sec->shdr->sh_type;
+ shdr->sh_flags = src_sec->shdr->sh_flags;
+ shdr->sh_size = 0;
+ /* sh_link and sh_info have different meaning for different types of
+ * sections, so we leave it up to the caller code to fill them in, if
+ * necessary
+ */
+ shdr->sh_link = 0;
+ shdr->sh_info = 0;
+ shdr->sh_addralign = src_sec->shdr->sh_addralign;
+ shdr->sh_entsize = src_sec->shdr->sh_entsize;
+
+ data->d_type = src_sec->data->d_type;
+ data->d_size = 0;
+ data->d_buf = NULL;
+ data->d_align = src_sec->data->d_align;
+ data->d_off = 0;
+
+ return 0;
+}
+
+static struct dst_sec *find_dst_sec_by_name(struct bpf_linker *linker, const char *sec_name)
+{
+ struct dst_sec *sec;
+ int i;
+
+ for (i = 1; i < linker->sec_cnt; i++) {
+ sec = &linker->secs[i];
+
+ if (strcmp(sec->sec_name, sec_name) == 0)
+ return sec;
+ }
+
+ return NULL;
+}
+
+static bool secs_match(struct dst_sec *dst, struct src_sec *src)
+{
+ if (dst->shdr->sh_type != src->shdr->sh_type) {
+ pr_warn("sec %s types mismatch\n", dst->sec_name);
+ return false;
+ }
+ if (dst->shdr->sh_flags != src->shdr->sh_flags) {
+ pr_warn("sec %s flags mismatch\n", dst->sec_name);
+ return false;
+ }
+ if (dst->shdr->sh_entsize != src->shdr->sh_entsize) {
+ pr_warn("sec %s entsize mismatch\n", dst->sec_name);
+ return false;
+ }
+
+ return true;
+}
+
+static bool sec_content_is_same(struct dst_sec *dst_sec, struct src_sec *src_sec)
+{
+ if (dst_sec->sec_sz != src_sec->shdr->sh_size)
+ return false;
+ if (memcmp(dst_sec->raw_data, src_sec->data->d_buf, dst_sec->sec_sz) != 0)
+ return false;
+ return true;
+}
+
+static int extend_sec(struct dst_sec *dst, struct src_sec *src)
+{
+ void *tmp;
+ size_t dst_align = dst->shdr->sh_addralign;
+ size_t src_align = src->shdr->sh_addralign;
+ size_t dst_align_sz, dst_final_sz;
+
+ if (dst_align == 0)
+ dst_align = 1;
+ if (dst_align < src_align)
+ dst_align = src_align;
+
+ dst_align_sz = (dst->sec_sz + dst_align - 1) / dst_align * dst_align;
+
+ /* no need to re-align final size */
+ dst_final_sz = dst_align_sz + src->shdr->sh_size;
+
+ if (src->shdr->sh_type != SHT_NOBITS) {
+ tmp = realloc(dst->raw_data, dst_final_sz);
+ if (!tmp)
+ return -ENOMEM;
+ dst->raw_data = tmp;
+
+ /* pad dst section, if it's alignment forced size increase */
+ memset(dst->raw_data + dst->sec_sz, 0, dst_align_sz - dst->sec_sz);
+ /* now copy src data at a properly aligned offset */
+ memcpy(dst->raw_data + dst_align_sz, src->data->d_buf, src->shdr->sh_size);
+ }
+
+ dst->sec_sz = dst_final_sz;
+ dst->shdr->sh_size = dst_final_sz;
+ dst->data->d_size = dst_final_sz;
+
+ dst->shdr->sh_addralign = dst_align;
+ dst->data->d_align = dst_align;
+
+ src->dst_off = dst_align_sz;
+
+ return 0;
+}
+
+static bool is_data_sec(struct src_sec *sec)
+{
+ if (!sec || sec->skipped)
+ return false;
+ /* ephemeral sections are data sections, e.g., .kconfig, .ksyms */
+ if (sec->ephemeral)
+ return true;
+ return sec->shdr->sh_type == SHT_PROGBITS || sec->shdr->sh_type == SHT_NOBITS;
+}
+
+static bool is_relo_sec(struct src_sec *sec)
+{
+ if (!sec || sec->skipped || sec->ephemeral)
+ return false;
+ return sec->shdr->sh_type == SHT_REL;
+}
+
+static int linker_append_sec_data(struct bpf_linker *linker, struct src_obj *obj)
+{
+ int i, err;
+
+ for (i = 1; i < obj->sec_cnt; i++) {
+ struct src_sec *src_sec;
+ struct dst_sec *dst_sec;
+
+ src_sec = &obj->secs[i];
+ if (!is_data_sec(src_sec))
+ continue;
+
+ dst_sec = find_dst_sec_by_name(linker, src_sec->sec_name);
+ if (!dst_sec) {
+ dst_sec = add_dst_sec(linker, src_sec->sec_name);
+ if (!dst_sec)
+ return -ENOMEM;
+ err = init_sec(linker, dst_sec, src_sec);
+ if (err) {
+ pr_warn("failed to init section '%s'\n", src_sec->sec_name);
+ return err;
+ }
+ } else {
+ if (!secs_match(dst_sec, src_sec)) {
+ pr_warn("ELF sections %s are incompatible\n", src_sec->sec_name);
+ return -1;
+ }
+
+ /* "license" and "version" sections are deduped */
+ if (strcmp(src_sec->sec_name, "license") == 0
+ || strcmp(src_sec->sec_name, "version") == 0) {
+ if (!sec_content_is_same(dst_sec, src_sec)) {
+ pr_warn("non-identical contents of section '%s' are not supported\n", src_sec->sec_name);
+ return -EINVAL;
+ }
+ src_sec->skipped = true;
+ src_sec->dst_id = dst_sec->id;
+ continue;
+ }
+ }
+
+ /* record mapped section index */
+ src_sec->dst_id = dst_sec->id;
+
+ if (src_sec->ephemeral)
+ continue;
+
+ err = extend_sec(dst_sec, src_sec);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int linker_append_elf_syms(struct bpf_linker *linker, struct src_obj *obj)
+{
+ struct src_sec *symtab = &obj->secs[obj->symtab_sec_idx];
+ Elf64_Sym *sym = symtab->data->d_buf, *dst_sym;
+ int i, n = symtab->shdr->sh_size / symtab->shdr->sh_entsize;
+ int str_sec_idx = symtab->shdr->sh_link;
+
+ obj->sym_map = calloc(n + 1, sizeof(*obj->sym_map));
+ if (!obj->sym_map)
+ return -ENOMEM;
+
+ for (i = 0; i < n; i++, sym++) {
+ struct src_sec *src_sec = NULL;
+ struct dst_sec *dst_sec = NULL;
+ const char *sym_name;
+ size_t dst_sym_idx;
+ int name_off;
+
+ /* we already have all-zero initial symbol */
+ if (sym->st_name == 0 && sym->st_info == 0 &&
+ sym->st_other == 0 && sym->st_shndx == SHN_UNDEF &&
+ sym->st_value == 0 && sym->st_size ==0)
+ continue;
+
+ sym_name = elf_strptr(obj->elf, str_sec_idx, sym->st_name);
+ if (!sym_name) {
+ pr_warn("can't fetch symbol name for symbol #%d in '%s'\n", i, obj->filename);
+ return -1;
+ }
+
+ if (sym->st_shndx && sym->st_shndx < SHN_LORESERVE) {
+ src_sec = &obj->secs[sym->st_shndx];
+ if (src_sec->skipped)
+ continue;
+ dst_sec = &linker->secs[src_sec->dst_id];
+
+ /* allow only one STT_SECTION symbol per section */
+ if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION && dst_sec->sec_sym_idx) {
+ obj->sym_map[i] = dst_sec->sec_sym_idx;
+ continue;
+ }
+ }
+
+ name_off = strset__add_str(linker->strtab_strs, sym_name);
+ if (name_off < 0)
+ return name_off;
+
+ dst_sym = add_new_sym(linker, &dst_sym_idx);
+ if (!dst_sym)
+ return -ENOMEM;
+
+ dst_sym->st_name = name_off;
+ dst_sym->st_info = sym->st_info;
+ dst_sym->st_other = sym->st_other;
+ dst_sym->st_shndx = src_sec ? dst_sec->sec_idx : sym->st_shndx;
+ dst_sym->st_value = (src_sec ? src_sec->dst_off : 0) + sym->st_value;
+ dst_sym->st_size = sym->st_size;
+
+ obj->sym_map[i] = dst_sym_idx;
+
+ if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION && dst_sym) {
+ dst_sec->sec_sym_idx = dst_sym_idx;
+ dst_sym->st_value = 0;
+ }
+
+ }
+
+ return 0;
+}
+
+static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *obj)
+{
+ struct src_sec *src_symtab = &obj->secs[obj->symtab_sec_idx];
+ struct dst_sec *dst_symtab = &linker->secs[linker->symtab_sec_idx];
+ int i, err;
+
+ for (i = 1; i < obj->sec_cnt; i++) {
+ struct src_sec *src_sec, *src_linked_sec;
+ struct dst_sec *dst_sec, *dst_linked_sec;
+ Elf64_Rel *src_rel, *dst_rel;
+ int j, n;
+
+ src_sec = &obj->secs[i];
+ if (!is_relo_sec(src_sec))
+ continue;
+
+ /* shdr->sh_info points to relocatable section */
+ src_linked_sec = &obj->secs[src_sec->shdr->sh_info];
+ if (src_linked_sec->skipped)
+ continue;
+
+ dst_sec = find_dst_sec_by_name(linker, src_sec->sec_name);
+ if (!dst_sec) {
+ dst_sec = add_dst_sec(linker, src_sec->sec_name);
+ if (!dst_sec)
+ return -ENOMEM;
+ err = init_sec(linker, dst_sec, src_sec);
+ if (err) {
+ pr_warn("failed to init section '%s'\n", src_sec->sec_name);
+ return err;
+ }
+ } else if (!secs_match(dst_sec, src_sec)) {
+ pr_warn("Secs %s are not compatible\n", src_sec->sec_name);
+ return -1;
+ }
+
+ /* shdr->sh_link points to SYMTAB */
+ dst_sec->shdr->sh_link = linker->symtab_sec_idx;
+
+ /* shdr->sh_info points to relocated section */
+ dst_linked_sec = &linker->secs[src_linked_sec->dst_id];
+ dst_sec->shdr->sh_info = dst_linked_sec->sec_idx;
+
+ src_sec->dst_id = dst_sec->id;
+ err = extend_sec(dst_sec, src_sec);
+ if (err)
+ return err;
+
+ src_rel = src_sec->data->d_buf;
+ dst_rel = dst_sec->raw_data + src_sec->dst_off;
+ n = src_sec->shdr->sh_size / src_sec->shdr->sh_entsize;
+ for (j = 0; j < n; j++, src_rel++, dst_rel++) {
+ size_t src_sym_idx = ELF64_R_SYM(src_rel->r_info);
+ size_t sym_type = ELF64_R_TYPE(src_rel->r_info);
+ Elf64_Sym *src_sym, *dst_sym;
+ size_t dst_sym_idx;
+
+ src_sym_idx = ELF64_R_SYM(src_rel->r_info);
+ src_sym = src_symtab->data->d_buf + sizeof(*src_sym) * src_sym_idx;
+
+ dst_sym_idx = obj->sym_map[src_sym_idx];
+ dst_sym = dst_symtab->raw_data + sizeof(*dst_sym) * dst_sym_idx;
+ dst_rel->r_offset += src_linked_sec->dst_off;
+ sym_type = ELF64_R_TYPE(src_rel->r_info);
+ dst_rel->r_info = ELF64_R_INFO(dst_sym_idx, sym_type);
+
+ if (ELF64_ST_TYPE(src_sym->st_info) == STT_SECTION) {
+ struct src_sec *sec = &obj->secs[src_sym->st_shndx];
+ struct bpf_insn *insn;
+
+ if (src_linked_sec->shdr->sh_flags & SHF_EXECINSTR) {
+ /* calls to the very first static function inside
+ * .text section at offset 0 will
+ * reference section symbol, not the
+ * function symbol. Fix that up,
+ * otherwise it won't be possible to
+ * relocate calls to two different
+ * static functions with the same name
+ * (rom two different object files)
+ */
+ insn = dst_linked_sec->raw_data + dst_rel->r_offset;
+ if (insn->code == (BPF_JMP | BPF_CALL))
+ insn->imm += sec->dst_off / sizeof(struct bpf_insn);
+ else
+ insn->imm += sec->dst_off;
+ } else {
+ pr_warn("relocation against STT_SECTION in non-exec section is not supported!\n");
+ return -EINVAL;
+ }
+ }
+
+ }
+ }
+
+ return 0;
+}
+
+static struct src_sec *find_src_sec_by_name(struct src_obj *obj, const char *sec_name)
+{
+ struct src_sec *sec;
+ int i;
+
+ for (i = 1; i < obj->sec_cnt; i++) {
+ sec = &obj->secs[i];
+
+ if (strcmp(sec->sec_name, sec_name) == 0)
+ return sec;
+ }
+
+ return NULL;
+}
+
+static Elf64_Sym *find_sym_by_name(struct src_obj *obj, size_t sec_idx,
+ int sym_type, const char *sym_name)
+{
+ struct src_sec *symtab = &obj->secs[obj->symtab_sec_idx];
+ Elf64_Sym *sym = symtab->data->d_buf;
+ int i, n = symtab->shdr->sh_size / symtab->shdr->sh_entsize;
+ int str_sec_idx = symtab->shdr->sh_link;
+ const char *name;
+
+ for (i = 0; i < n; i++, sym++) {
+ if (sym->st_shndx != sec_idx)
+ continue;
+ if (ELF64_ST_TYPE(sym->st_info) != sym_type)
+ continue;
+
+ name = elf_strptr(obj->elf, str_sec_idx, sym->st_name);
+ if (!name)
+ return NULL;
+
+ if (strcmp(sym_name, name) != 0)
+ continue;
+
+ return sym;
+ }
+
+ return NULL;
+}
+
+static int linker_fixup_btf(struct src_obj *obj)
+{
+ const char *sec_name;
+ struct src_sec *sec;
+ int i, j, n, m;
+
+ if (!obj->btf)
+ return 0;
+
+ n = btf__get_nr_types(obj->btf);
+ for (i = 1; i <= n; i++) {
+ struct btf_var_secinfo *vi;
+ struct btf_type *t;
+
+ t = btf_type_by_id(obj->btf, i);
+ if (btf_kind(t) != BTF_KIND_DATASEC)
+ continue;
+
+ sec_name = btf__str_by_offset(obj->btf, t->name_off);
+ sec = find_src_sec_by_name(obj, sec_name);
+ if (sec) {
+ /* record actual section size, unless ephemeral */
+ if (sec->shdr)
+ t->size = sec->shdr->sh_size;
+ } else {
+ /* BTF can have some sections that are not represented
+ * in ELF, e.g., .kconfig and .ksyms, which are used
+ * for special extern variables. Here we'll
+ * pre-create "section shells" for them to be able to
+ * keep track of extra per-section metadata later
+ * (e.g., BTF variables).
+ */
+ sec = add_src_sec(obj, sec_name);
+ if (!sec)
+ return -ENOMEM;
+
+ sec->ephemeral = true;
+ sec->sec_idx = 0; /* will match UNDEF shndx in ELF */
+ }
+
+ /* remember ELF section and its BTF type ID match */
+ sec->sec_type_id = i;
+
+ /* fix up variable offsets */
+ vi = btf_var_secinfos(t);
+ for (j = 0, m = btf_vlen(t); j < m; j++, vi++) {
+ const struct btf_type *vt = btf__type_by_id(obj->btf, vi->type);
+ const char *var_name = btf__str_by_offset(obj->btf, vt->name_off);
+ int var_linkage = btf_var(vt)->linkage;
+ Elf64_Sym *sym;
+
+ /* no need to patch up static or extern vars */
+ if (var_linkage != BTF_VAR_GLOBAL_ALLOCATED)
+ continue;
+
+ sym = find_sym_by_name(obj, sec->sec_idx, STT_OBJECT, var_name);
+ if (!sym) {
+ pr_warn("failed to find symbol for variable '%s' in section '%s'\n", var_name, sec_name);
+ return -ENOENT;
+ }
+
+ vi->offset = sym->st_value;
+ }
+ }
+
+ return 0;
+}
+
+static int remap_type_id(__u32 *type_id, void *ctx)
+{
+ int *id_map = ctx;
+
+ *type_id = id_map[*type_id];
+
+ return 0;
+}
+
+static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj)
+{
+ const struct btf_type *t;
+ int i, j, n, start_id, id;
+
+ if (!obj->btf)
+ return 0;
+
+ start_id = btf__get_nr_types(linker->btf) + 1;
+ n = btf__get_nr_types(obj->btf);
+
+ obj->btf_type_map = calloc(n + 1, sizeof(int));
+ if (!obj->btf_type_map)
+ return -ENOMEM;
+
+ for (i = 1; i <= n; i++) {
+ t = btf__type_by_id(obj->btf, i);
+
+ /* DATASECs are handled specially below */
+ if (btf_kind(t) == BTF_KIND_DATASEC)
+ continue;
+
+ id = btf__add_type(linker->btf, obj->btf, t);
+ if (id < 0) {
+ pr_warn("failed to append BTF type #%d from file '%s'\n", i, obj->filename);
+ return id;
+ }
+
+ obj->btf_type_map[i] = id;
+ }
+
+ /* remap all the types except DATASECs */
+ n = btf__get_nr_types(linker->btf);
+ for (i = start_id; i <= n; i++) {
+ struct btf_type *dst_t = btf_type_by_id(linker->btf, i);
+
+ if (btf_type_visit_type_ids(dst_t, remap_type_id, obj->btf_type_map))
+ return -EINVAL;
+ }
+
+ /* append DATASEC info */
+ for (i = 1; i < obj->sec_cnt; i++) {
+ struct src_sec *src_sec;
+ struct dst_sec *dst_sec;
+ const struct btf_var_secinfo *src_var;
+ struct btf_var_secinfo *dst_var;
+
+ src_sec = &obj->secs[i];
+ if (!src_sec->sec_type_id || src_sec->skipped)
+ continue;
+ dst_sec = &linker->secs[src_sec->dst_id];
+
+ /* Mark section as having BTF regardless of the presence of
+ * variables. In some cases compiler might generate empty BTF
+ * with no variables information. E.g., when promoting local
+ * array/structure variable initial values and BPF object
+ * file otherwise has no read-only static variables in
+ * .rodata. We need to preserve such empty BTF and just set
+ * correct section size.
+ */
+ dst_sec->has_btf = true;
+
+ t = btf__type_by_id(obj->btf, src_sec->sec_type_id);
+ src_var = btf_var_secinfos(t);
+ n = btf_vlen(t);
+ for (j = 0; j < n; j++, src_var++) {
+ void *sec_vars = dst_sec->sec_vars;
+
+ sec_vars = libbpf_reallocarray(sec_vars,
+ dst_sec->sec_var_cnt + 1,
+ sizeof(*dst_sec->sec_vars));
+ if (!sec_vars)
+ return -ENOMEM;
+
+ dst_sec->sec_vars = sec_vars;
+ dst_sec->sec_var_cnt++;
+
+ dst_var = &dst_sec->sec_vars[dst_sec->sec_var_cnt - 1];
+ dst_var->type = obj->btf_type_map[src_var->type];
+ dst_var->size = src_var->size;
+ dst_var->offset = src_sec->dst_off + src_var->offset;
+ }
+ }
+
+ return 0;
+}
+
+static void *add_btf_ext_rec(struct btf_ext_sec_data *ext_data, const void *src_rec)
+{
+ void *tmp;
+
+ tmp = libbpf_reallocarray(ext_data->recs, ext_data->rec_cnt + 1, ext_data->rec_sz);
+ if (!tmp)
+ return NULL;
+ ext_data->recs = tmp;
+
+ tmp += ext_data->rec_cnt * ext_data->rec_sz;
+ memcpy(tmp, src_rec, ext_data->rec_sz);
+
+ ext_data->rec_cnt++;
+
+ return tmp;
+}
+
+static int linker_append_btf_ext(struct bpf_linker *linker, struct src_obj *obj)
+{
+ const struct btf_ext_info_sec *ext_sec;
+ const char *sec_name, *s;
+ struct src_sec *src_sec;
+ struct dst_sec *dst_sec;
+ int rec_sz, str_off, i;
+
+ if (!obj->btf_ext)
+ return 0;
+
+ rec_sz = obj->btf_ext->func_info.rec_size;
+ for_each_btf_ext_sec(&obj->btf_ext->func_info, ext_sec) {
+ struct bpf_func_info_min *src_rec, *dst_rec;
+
+ sec_name = btf__name_by_offset(obj->btf, ext_sec->sec_name_off);
+ src_sec = find_src_sec_by_name(obj, sec_name);
+ if (!src_sec) {
+ pr_warn("can't find section '%s' referenced from .BTF.ext\n", sec_name);
+ return -EINVAL;
+ }
+ dst_sec = &linker->secs[src_sec->dst_id];
+
+ if (dst_sec->func_info.rec_sz == 0)
+ dst_sec->func_info.rec_sz = rec_sz;
+ if (dst_sec->func_info.rec_sz != rec_sz) {
+ pr_warn("incompatible .BTF.ext record sizes for section '%s'\n", sec_name);
+ return -EINVAL;
+ }
+
+ for_each_btf_ext_rec(&obj->btf_ext->func_info, ext_sec, i, src_rec) {
+ dst_rec = add_btf_ext_rec(&dst_sec->func_info, src_rec);
+ if (!dst_rec)
+ return -ENOMEM;
+
+ dst_rec->insn_off += src_sec->dst_off;
+ dst_rec->type_id = obj->btf_type_map[dst_rec->type_id];
+ }
+ }
+
+ rec_sz = obj->btf_ext->line_info.rec_size;
+ for_each_btf_ext_sec(&obj->btf_ext->line_info, ext_sec) {
+ struct bpf_line_info_min *src_rec, *dst_rec;
+
+ sec_name = btf__name_by_offset(obj->btf, ext_sec->sec_name_off);
+ src_sec = find_src_sec_by_name(obj, sec_name);
+ if (!src_sec) {
+ pr_warn("can't find section '%s' referenced from .BTF.ext\n", sec_name);
+ return -EINVAL;
+ }
+ dst_sec = &linker->secs[src_sec->dst_id];
+
+ if (dst_sec->line_info.rec_sz == 0)
+ dst_sec->line_info.rec_sz = rec_sz;
+ if (dst_sec->line_info.rec_sz != rec_sz) {
+ pr_warn("incompatible .BTF.ext record sizes for section '%s'\n", sec_name);
+ return -EINVAL;
+ }
+
+ for_each_btf_ext_rec(&obj->btf_ext->line_info, ext_sec, i, src_rec) {
+ dst_rec = add_btf_ext_rec(&dst_sec->line_info, src_rec);
+ if (!dst_rec)
+ return -ENOMEM;
+
+ dst_rec->insn_off += src_sec->dst_off;
+
+ s = btf__str_by_offset(obj->btf, src_rec->file_name_off);
+ str_off = btf__add_str(linker->btf, s);
+ if (str_off < 0)
+ return -ENOMEM;
+ dst_rec->file_name_off = str_off;
+
+ s = btf__str_by_offset(obj->btf, src_rec->line_off);
+ str_off = btf__add_str(linker->btf, s);
+ if (str_off < 0)
+ return -ENOMEM;
+ dst_rec->line_off = str_off;
+
+ /* dst_rec->line_col is fine */
+ }
+ }
+
+ rec_sz = obj->btf_ext->core_relo_info.rec_size;
+ for_each_btf_ext_sec(&obj->btf_ext->core_relo_info, ext_sec) {
+ struct bpf_core_relo *src_rec, *dst_rec;
+
+ sec_name = btf__name_by_offset(obj->btf, ext_sec->sec_name_off);
+ src_sec = find_src_sec_by_name(obj, sec_name);
+ if (!src_sec) {
+ pr_warn("can't find section '%s' referenced from .BTF.ext\n", sec_name);
+ return -EINVAL;
+ }
+ dst_sec = &linker->secs[src_sec->dst_id];
+
+ if (dst_sec->core_relo_info.rec_sz == 0)
+ dst_sec->core_relo_info.rec_sz = rec_sz;
+ if (dst_sec->core_relo_info.rec_sz != rec_sz) {
+ pr_warn("incompatible .BTF.ext record sizes for section '%s'\n", sec_name);
+ return -EINVAL;
+ }
+
+ for_each_btf_ext_rec(&obj->btf_ext->core_relo_info, ext_sec, i, src_rec) {
+ dst_rec = add_btf_ext_rec(&dst_sec->core_relo_info, src_rec);
+ if (!dst_rec)
+ return -ENOMEM;
+
+ dst_rec->insn_off += src_sec->dst_off;
+ dst_rec->type_id = obj->btf_type_map[dst_rec->type_id];
+
+ s = btf__str_by_offset(obj->btf, src_rec->access_str_off);
+ str_off = btf__add_str(linker->btf, s);
+ if (str_off < 0)
+ return -ENOMEM;
+ dst_rec->access_str_off = str_off;
+
+ /* dst_rec->kind is fine */
+ }
+ }
+
+ return 0;
+}
+
+int bpf_linker__finalize(struct bpf_linker *linker)
+{
+ struct dst_sec *sec;
+ size_t strs_sz;
+ const void *strs;
+ int err, i;
+
+ if (!linker->elf)
+ return -EINVAL;
+
+ err = finalize_btf(linker);
+ if (err)
+ return err;
+
+ /* Finalize strings */
+ strs_sz = strset__data_size(linker->strtab_strs);
+ strs = strset__data(linker->strtab_strs);
+
+ sec = &linker->secs[linker->strtab_sec_idx];
+ sec->data->d_align = 1;
+ sec->data->d_off = 0LL;
+ sec->data->d_buf = (void *)strs;
+ sec->data->d_type = ELF_T_BYTE;
+ sec->data->d_size = strs_sz;
+ sec->shdr->sh_size = strs_sz;
+
+ for (i = 1; i < linker->sec_cnt; i++) {
+ sec = &linker->secs[i];
+
+ /* STRTAB is handled specially above */
+ if (sec->sec_idx == linker->strtab_sec_idx)
+ continue;
+
+ /* special ephemeral sections (.ksyms, .kconfig, etc) */
+ if (!sec->scn)
+ continue;
+
+ sec->data->d_buf = sec->raw_data;
+ }
+
+ /* Finalize ELF layout */
+ if (elf_update(linker->elf, ELF_C_NULL) < 0) {
+ err = -errno;
+ pr_warn_elf("failed to finalize ELF layout");
+ return err;
+ }
+
+ /* Write out final ELF contents */
+ if (elf_update(linker->elf, ELF_C_WRITE) < 0) {
+ err = -errno;
+ pr_warn_elf("failed to write ELF contents");
+ return err;
+ }
+
+ elf_end(linker->elf);
+ close(linker->fd);
+
+ linker->elf = NULL;
+ linker->fd = -1;
+
+ return 0;
+}
+
+static int emit_elf_data_sec(struct bpf_linker *linker, const char *sec_name,
+ size_t align, const void *raw_data, size_t raw_sz)
+{
+ Elf_Scn *scn;
+ Elf_Data *data;
+ Elf64_Shdr *shdr;
+ int name_off;
+
+ name_off = strset__add_str(linker->strtab_strs, sec_name);
+ if (name_off < 0)
+ return name_off;
+
+ scn = elf_newscn(linker->elf);
+ if (!scn)
+ return -ENOMEM;
+ data = elf_newdata(scn);
+ if (!data)
+ return -ENOMEM;
+ shdr = elf64_getshdr(scn);
+ if (!shdr)
+ return -EINVAL;
+
+ shdr->sh_name = name_off;
+ shdr->sh_type = SHT_PROGBITS;
+ shdr->sh_flags = 0;
+ shdr->sh_size = raw_sz;
+ shdr->sh_link = 0;
+ shdr->sh_info = 0;
+ shdr->sh_addralign = align;
+ shdr->sh_entsize = 0;
+
+ data->d_type = ELF_T_BYTE;
+ data->d_size = raw_sz;
+ data->d_buf = (void *)raw_data;
+ data->d_align = align;
+ data->d_off = 0;
+
+ return 0;
+}
+
+static int finalize_btf(struct bpf_linker *linker)
+{
+ struct btf *btf = linker->btf;
+ const void *raw_data;
+ int i, j, id, err;
+ __u32 raw_sz;
+
+ /* bail out if no BTF data was produced */
+ if (btf__get_nr_types(linker->btf) == 0)
+ return 0;
+
+ for (i = 1; i < linker->sec_cnt; i++) {
+ struct dst_sec *sec = &linker->secs[i];
+
+ if (!sec->has_btf)
+ continue;
+
+ id = btf__add_datasec(btf, sec->sec_name, sec->sec_sz);
+ if (id < 0) {
+ pr_warn("failed to add consolidated BTF type for datasec '%s': %d\n",
+ sec->sec_name, id);
+ return id;
+ }
+
+ for (j = 0; j < sec->sec_var_cnt; j++) {
+ struct btf_var_secinfo *vi = &sec->sec_vars[j];
+
+ if (btf__add_datasec_var_info(btf, vi->type, vi->offset, vi->size))
+ return -EINVAL;
+ }
+ }
+
+ err = finalize_btf_ext(linker);
+ if (err) {
+ pr_warn(".BTF.ext generation failed: %d\n", err);
+ return err;
+ }
+
+ err = btf__dedup(linker->btf, linker->btf_ext, NULL);
+ if (err) {
+ pr_warn("BTF dedup failed: %d\n", err);
+ return err;
+ }
+
+ /* Emit .BTF section */
+ raw_data = btf__get_raw_data(linker->btf, &raw_sz);
+ if (!raw_data)
+ return -ENOMEM;
+
+ err = emit_elf_data_sec(linker, BTF_ELF_SEC, 8, raw_data, raw_sz);
+ if (err) {
+ pr_warn("failed to write out .BTF ELF section: %d\n", err);
+ return err;
+ }
+
+ /* Emit .BTF.ext section */
+ if (linker->btf_ext) {
+ raw_data = btf_ext__get_raw_data(linker->btf_ext, &raw_sz);
+ if (!raw_data)
+ return -ENOMEM;
+
+ err = emit_elf_data_sec(linker, BTF_EXT_ELF_SEC, 8, raw_data, raw_sz);
+ if (err) {
+ pr_warn("failed to write out .BTF.ext ELF section: %d\n", err);
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static int emit_btf_ext_data(struct bpf_linker *linker, void *output,
+ const char *sec_name, struct btf_ext_sec_data *sec_data)
+{
+ struct btf_ext_info_sec *sec_info;
+ void *cur = output;
+ int str_off;
+ size_t sz;
+
+ if (!sec_data->rec_cnt)
+ return 0;
+
+ str_off = btf__add_str(linker->btf, sec_name);
+ if (str_off < 0)
+ return -ENOMEM;
+
+ sec_info = cur;
+ sec_info->sec_name_off = str_off;
+ sec_info->num_info = sec_data->rec_cnt;
+ cur += sizeof(struct btf_ext_info_sec);
+
+ sz = sec_data->rec_cnt * sec_data->rec_sz;
+ memcpy(cur, sec_data->recs, sz);
+ cur += sz;
+
+ return cur - output;
+}
+
+static int finalize_btf_ext(struct bpf_linker *linker)
+{
+ size_t funcs_sz = 0, lines_sz = 0, core_relos_sz = 0, total_sz = 0;
+ size_t func_rec_sz = 0, line_rec_sz = 0, core_relo_rec_sz = 0;
+ struct btf_ext_header *hdr;
+ void *data, *cur;
+ int i, err, sz;
+
+ /* validate that all sections have the same .BTF.ext record sizes
+ * and calculate total data size for each type of data (func info,
+ * line info, core relos)
+ */
+ for (i = 1; i < linker->sec_cnt; i++) {
+ struct dst_sec *sec = &linker->secs[i];
+
+ if (sec->func_info.rec_cnt) {
+ if (func_rec_sz == 0)
+ func_rec_sz = sec->func_info.rec_sz;
+ if (func_rec_sz != sec->func_info.rec_sz) {
+ pr_warn("mismatch in func_info record size %zu != %u\n",
+ func_rec_sz, sec->func_info.rec_sz);
+ return -EINVAL;
+ }
+
+ funcs_sz += sizeof(struct btf_ext_info_sec) + func_rec_sz * sec->func_info.rec_cnt;
+ }
+ if (sec->line_info.rec_cnt) {
+ if (line_rec_sz == 0)
+ line_rec_sz = sec->line_info.rec_sz;
+ if (line_rec_sz != sec->line_info.rec_sz) {
+ pr_warn("mismatch in line_info record size %zu != %u\n",
+ line_rec_sz, sec->line_info.rec_sz);
+ return -EINVAL;
+ }
+
+ lines_sz += sizeof(struct btf_ext_info_sec) + line_rec_sz * sec->line_info.rec_cnt;
+ }
+ if (sec->core_relo_info.rec_cnt) {
+ if (core_relo_rec_sz == 0)
+ core_relo_rec_sz = sec->core_relo_info.rec_sz;
+ if (core_relo_rec_sz != sec->core_relo_info.rec_sz) {
+ pr_warn("mismatch in core_relo_info record size %zu != %u\n",
+ core_relo_rec_sz, sec->core_relo_info.rec_sz);
+ return -EINVAL;
+ }
+
+ core_relos_sz += sizeof(struct btf_ext_info_sec) + core_relo_rec_sz * sec->core_relo_info.rec_cnt;
+ }
+ }
+
+ if (!funcs_sz && !lines_sz && !core_relos_sz)
+ return 0;
+
+ total_sz += sizeof(struct btf_ext_header);
+ if (funcs_sz) {
+ funcs_sz += sizeof(__u32); /* record size prefix */
+ total_sz += funcs_sz;
+ }
+ if (lines_sz) {
+ lines_sz += sizeof(__u32); /* record size prefix */
+ total_sz += lines_sz;
+ }
+ if (core_relos_sz) {
+ core_relos_sz += sizeof(__u32); /* record size prefix */
+ total_sz += core_relos_sz;
+ }
+
+ cur = data = calloc(1, total_sz);
+ if (!data)
+ return -ENOMEM;
+
+ hdr = cur;
+ hdr->magic = BTF_MAGIC;
+ hdr->version = BTF_VERSION;
+ hdr->flags = 0;
+ hdr->hdr_len = sizeof(struct btf_ext_header);
+ cur += sizeof(struct btf_ext_header);
+
+ /* All offsets are in bytes relative to the end of this header */
+ hdr->func_info_off = 0;
+ hdr->func_info_len = funcs_sz;
+ hdr->line_info_off = funcs_sz;
+ hdr->line_info_len = lines_sz;
+ hdr->core_relo_off = funcs_sz + lines_sz;;
+ hdr->core_relo_len = core_relos_sz;
+
+ if (funcs_sz) {
+ *(__u32 *)cur = func_rec_sz;
+ cur += sizeof(__u32);
+
+ for (i = 1; i < linker->sec_cnt; i++) {
+ struct dst_sec *sec = &linker->secs[i];
+
+ sz = emit_btf_ext_data(linker, cur, sec->sec_name, &sec->func_info);
+ if (sz < 0) {
+ err = sz;
+ goto out;
+ }
+
+ cur += sz;
+ }
+ }
+
+ if (lines_sz) {
+ *(__u32 *)cur = line_rec_sz;
+ cur += sizeof(__u32);
+
+ for (i = 1; i < linker->sec_cnt; i++) {
+ struct dst_sec *sec = &linker->secs[i];
+
+ sz = emit_btf_ext_data(linker, cur, sec->sec_name, &sec->line_info);
+ if (sz < 0) {
+ err = sz;
+ goto out;
+ }
+
+ cur += sz;
+ }
+ }
+
+ if (core_relos_sz) {
+ *(__u32 *)cur = core_relo_rec_sz;
+ cur += sizeof(__u32);
+
+ for (i = 1; i < linker->sec_cnt; i++) {
+ struct dst_sec *sec = &linker->secs[i];
+
+ sz = emit_btf_ext_data(linker, cur, sec->sec_name, &sec->core_relo_info);
+ if (sz < 0) {
+ err = sz;
+ goto out;
+ }
+
+ cur += sz;
+ }
+ }
+
+ linker->btf_ext = btf_ext__new(data, total_sz);
+ err = libbpf_get_error(linker->btf_ext);
+ if (err) {
+ linker->btf_ext = NULL;
+ pr_warn("failed to parse final .BTF.ext data: %d\n", err);
+ goto out;
+ }
+
+out:
+ free(data);
+ return err;
+}
diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c
index 4dd73de00b6f..d2cb28e9ef52 100644
--- a/tools/lib/bpf/netlink.c
+++ b/tools/lib/bpf/netlink.c
@@ -40,7 +40,7 @@ static int libbpf_netlink_open(__u32 *nl_pid)
memset(&sa, 0, sizeof(sa));
sa.nl_family = AF_NETLINK;
- sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+ sock = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE);
if (sock < 0)
return -errno;
diff --git a/tools/lib/bpf/strset.c b/tools/lib/bpf/strset.c
new file mode 100644
index 000000000000..1fb8b49de1d6
--- /dev/null
+++ b/tools/lib/bpf/strset.c
@@ -0,0 +1,176 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (c) 2021 Facebook */
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <linux/err.h>
+#include "hashmap.h"
+#include "libbpf_internal.h"
+#include "strset.h"
+
+struct strset {
+ void *strs_data;
+ size_t strs_data_len;
+ size_t strs_data_cap;
+ size_t strs_data_max_len;
+
+ /* lookup index for each unique string in strings set */
+ struct hashmap *strs_hash;
+};
+
+static size_t strset_hash_fn(const void *key, void *ctx)
+{
+ const struct strset *s = ctx;
+ const char *str = s->strs_data + (long)key;
+
+ return str_hash(str);
+}
+
+static bool strset_equal_fn(const void *key1, const void *key2, void *ctx)
+{
+ const struct strset *s = ctx;
+ const char *str1 = s->strs_data + (long)key1;
+ const char *str2 = s->strs_data + (long)key2;
+
+ return strcmp(str1, str2) == 0;
+}
+
+struct strset *strset__new(size_t max_data_sz, const char *init_data, size_t init_data_sz)
+{
+ struct strset *set = calloc(1, sizeof(*set));
+ struct hashmap *hash;
+ int err = -ENOMEM;
+
+ if (!set)
+ return ERR_PTR(-ENOMEM);
+
+ hash = hashmap__new(strset_hash_fn, strset_equal_fn, set);
+ if (IS_ERR(hash))
+ goto err_out;
+
+ set->strs_data_max_len = max_data_sz;
+ set->strs_hash = hash;
+
+ if (init_data) {
+ long off;
+
+ set->strs_data = malloc(init_data_sz);
+ if (!set->strs_data)
+ goto err_out;
+
+ memcpy(set->strs_data, init_data, init_data_sz);
+ set->strs_data_len = init_data_sz;
+ set->strs_data_cap = init_data_sz;
+
+ for (off = 0; off < set->strs_data_len; off += strlen(set->strs_data + off) + 1) {
+ /* hashmap__add() returns EEXIST if string with the same
+ * content already is in the hash map
+ */
+ err = hashmap__add(hash, (void *)off, (void *)off);
+ if (err == -EEXIST)
+ continue; /* duplicate */
+ if (err)
+ goto err_out;
+ }
+ }
+
+ return set;
+err_out:
+ strset__free(set);
+ return ERR_PTR(err);
+}
+
+void strset__free(struct strset *set)
+{
+ if (IS_ERR_OR_NULL(set))
+ return;
+
+ hashmap__free(set->strs_hash);
+ free(set->strs_data);
+}
+
+size_t strset__data_size(const struct strset *set)
+{
+ return set->strs_data_len;
+}
+
+const char *strset__data(const struct strset *set)
+{
+ return set->strs_data;
+}
+
+static void *strset_add_str_mem(struct strset *set, size_t add_sz)
+{
+ return libbpf_add_mem(&set->strs_data, &set->strs_data_cap, 1,
+ set->strs_data_len, set->strs_data_max_len, add_sz);
+}
+
+/* Find string offset that corresponds to a given string *s*.
+ * Returns:
+ * - >0 offset into string data, if string is found;
+ * - -ENOENT, if string is not in the string data;
+ * - <0, on any other error.
+ */
+int strset__find_str(struct strset *set, const char *s)
+{
+ long old_off, new_off, len;
+ void *p;
+
+ /* see strset__add_str() for why we do this */
+ len = strlen(s) + 1;
+ p = strset_add_str_mem(set, len);
+ if (!p)
+ return -ENOMEM;
+
+ new_off = set->strs_data_len;
+ memcpy(p, s, len);
+
+ if (hashmap__find(set->strs_hash, (void *)new_off, (void **)&old_off))
+ return old_off;
+
+ return -ENOENT;
+}
+
+/* Add a string s to the string data. If the string already exists, return its
+ * offset within string data.
+ * Returns:
+ * - > 0 offset into string data, on success;
+ * - < 0, on error.
+ */
+int strset__add_str(struct strset *set, const char *s)
+{
+ long old_off, new_off, len;
+ void *p;
+ int err;
+
+ /* Hashmap keys are always offsets within set->strs_data, so to even
+ * look up some string from the "outside", we need to first append it
+ * at the end, so that it can be addressed with an offset. Luckily,
+ * until set->strs_data_len is incremented, that string is just a piece
+ * of garbage for the rest of the code, so no harm, no foul. On the
+ * other hand, if the string is unique, it's already appended and
+ * ready to be used, only a simple set->strs_data_len increment away.
+ */
+ len = strlen(s) + 1;
+ p = strset_add_str_mem(set, len);
+ if (!p)
+ return -ENOMEM;
+
+ new_off = set->strs_data_len;
+ memcpy(p, s, len);
+
+ /* Now attempt to add the string, but only if the string with the same
+ * contents doesn't exist already (HASHMAP_ADD strategy). If such
+ * string exists, we'll get its offset in old_off (that's old_key).
+ */
+ err = hashmap__insert(set->strs_hash, (void *)new_off, (void *)new_off,
+ HASHMAP_ADD, (const void **)&old_off, NULL);
+ if (err == -EEXIST)
+ return old_off; /* duplicated string, return existing offset */
+ if (err)
+ return err;
+
+ set->strs_data_len += len; /* new unique string, adjust data length */
+ return new_off;
+}
diff --git a/tools/lib/bpf/strset.h b/tools/lib/bpf/strset.h
new file mode 100644
index 000000000000..b6ddf77a83c2
--- /dev/null
+++ b/tools/lib/bpf/strset.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+/* Copyright (c) 2021 Facebook */
+#ifndef __LIBBPF_STRSET_H
+#define __LIBBPF_STRSET_H
+
+#include <stdbool.h>
+#include <stddef.h>
+
+struct strset;
+
+struct strset *strset__new(size_t max_data_sz, const char *init_data, size_t init_data_sz);
+void strset__free(struct strset *set);
+
+const char *strset__data(const struct strset *set);
+size_t strset__data_size(const struct strset *set);
+
+int strset__find_str(struct strset *set, const char *s);
+int strset__add_str(struct strset *set, const char *s);
+
+#endif /* __LIBBPF_STRSET_H */
diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index ffbb588724d8..95da0e19f4a5 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -28,6 +28,7 @@
#include <sys/mman.h>
#include <sys/socket.h>
#include <sys/types.h>
+#include <linux/if_link.h>
#include "bpf.h"
#include "libbpf.h"
@@ -70,8 +71,10 @@ struct xsk_ctx {
int ifindex;
struct list_head list;
int prog_fd;
+ int link_fd;
int xsks_map_fd;
char ifname[IFNAMSIZ];
+ bool has_bpf_link;
};
struct xsk_socket {
@@ -409,7 +412,7 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)
static const int log_buf_size = 16 * 1024;
struct xsk_ctx *ctx = xsk->ctx;
char log_buf[log_buf_size];
- int err, prog_fd;
+ int prog_fd;
/* This is the fallback C-program:
* SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx)
@@ -499,14 +502,41 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)
return prog_fd;
}
- err = bpf_set_link_xdp_fd(xsk->ctx->ifindex, prog_fd,
- xsk->config.xdp_flags);
+ ctx->prog_fd = prog_fd;
+ return 0;
+}
+
+static int xsk_create_bpf_link(struct xsk_socket *xsk)
+{
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts);
+ struct xsk_ctx *ctx = xsk->ctx;
+ __u32 prog_id = 0;
+ int link_fd;
+ int err;
+
+ err = bpf_get_link_xdp_id(ctx->ifindex, &prog_id, xsk->config.xdp_flags);
if (err) {
- close(prog_fd);
+ pr_warn("getting XDP prog id failed\n");
return err;
}
- ctx->prog_fd = prog_fd;
+ /* if there's a netlink-based XDP prog loaded on interface, bail out
+ * and ask user to do the removal by himself
+ */
+ if (prog_id) {
+ pr_warn("Netlink-based XDP prog detected, please unload it in order to launch AF_XDP prog\n");
+ return -EINVAL;
+ }
+
+ opts.flags = xsk->config.xdp_flags & ~(XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_REPLACE);
+
+ link_fd = bpf_link_create(ctx->prog_fd, ctx->ifindex, BPF_XDP, &opts);
+ if (link_fd < 0) {
+ pr_warn("bpf_link_create failed: %s\n", strerror(errno));
+ return link_fd;
+ }
+
+ ctx->link_fd = link_fd;
return 0;
}
@@ -610,21 +640,21 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
if (fd < 0)
continue;
+ memset(&map_info, 0, map_len);
err = bpf_obj_get_info_by_fd(fd, &map_info, &map_len);
if (err) {
close(fd);
continue;
}
- if (!strcmp(map_info.name, "xsks_map")) {
+ if (!strncmp(map_info.name, "xsks_map", sizeof(map_info.name))) {
ctx->xsks_map_fd = fd;
- continue;
+ break;
}
close(fd);
}
- err = 0;
if (ctx->xsks_map_fd == -1)
err = -ENOENT;
@@ -641,6 +671,98 @@ static int xsk_set_bpf_maps(struct xsk_socket *xsk)
&xsk->fd, 0);
}
+static int xsk_link_lookup(int ifindex, __u32 *prog_id, int *link_fd)
+{
+ struct bpf_link_info link_info;
+ __u32 link_len;
+ __u32 id = 0;
+ int err;
+ int fd;
+
+ while (true) {
+ err = bpf_link_get_next_id(id, &id);
+ if (err) {
+ if (errno == ENOENT) {
+ err = 0;
+ break;
+ }
+ pr_warn("can't get next link: %s\n", strerror(errno));
+ break;
+ }
+
+ fd = bpf_link_get_fd_by_id(id);
+ if (fd < 0) {
+ if (errno == ENOENT)
+ continue;
+ pr_warn("can't get link by id (%u): %s\n", id, strerror(errno));
+ err = -errno;
+ break;
+ }
+
+ link_len = sizeof(struct bpf_link_info);
+ memset(&link_info, 0, link_len);
+ err = bpf_obj_get_info_by_fd(fd, &link_info, &link_len);
+ if (err) {
+ pr_warn("can't get link info: %s\n", strerror(errno));
+ close(fd);
+ break;
+ }
+ if (link_info.type == BPF_LINK_TYPE_XDP) {
+ if (link_info.xdp.ifindex == ifindex) {
+ *link_fd = fd;
+ if (prog_id)
+ *prog_id = link_info.prog_id;
+ break;
+ }
+ }
+ close(fd);
+ }
+
+ return err;
+}
+
+static bool xsk_probe_bpf_link(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts,
+ .flags = XDP_FLAGS_SKB_MODE);
+ struct bpf_load_program_attr prog_attr;
+ struct bpf_insn insns[2] = {
+ BPF_MOV64_IMM(BPF_REG_0, XDP_PASS),
+ BPF_EXIT_INSN()
+ };
+ int prog_fd, link_fd = -1;
+ int ifindex_lo = 1;
+ bool ret = false;
+ int err;
+
+ err = xsk_link_lookup(ifindex_lo, NULL, &link_fd);
+ if (err)
+ return ret;
+
+ if (link_fd >= 0)
+ return true;
+
+ memset(&prog_attr, 0, sizeof(prog_attr));
+ prog_attr.prog_type = BPF_PROG_TYPE_XDP;
+ prog_attr.insns = insns;
+ prog_attr.insns_cnt = ARRAY_SIZE(insns);
+ prog_attr.license = "GPL";
+
+ prog_fd = bpf_load_program_xattr(&prog_attr, NULL, 0);
+ if (prog_fd < 0)
+ return ret;
+
+ link_fd = bpf_link_create(prog_fd, ifindex_lo, BPF_XDP, &opts);
+ close(prog_fd);
+
+ if (link_fd >= 0) {
+ ret = true;
+ close(link_fd);
+ }
+
+ return ret;
+}
+
static int xsk_create_xsk_struct(int ifindex, struct xsk_socket *xsk)
{
char ifname[IFNAMSIZ];
@@ -662,64 +784,108 @@ static int xsk_create_xsk_struct(int ifindex, struct xsk_socket *xsk)
ctx->ifname[IFNAMSIZ - 1] = 0;
xsk->ctx = ctx;
+ xsk->ctx->has_bpf_link = xsk_probe_bpf_link();
return 0;
}
-static int __xsk_setup_xdp_prog(struct xsk_socket *_xdp,
- int *xsks_map_fd)
+static int xsk_init_xdp_res(struct xsk_socket *xsk,
+ int *xsks_map_fd)
{
- struct xsk_socket *xsk = _xdp;
struct xsk_ctx *ctx = xsk->ctx;
- __u32 prog_id = 0;
int err;
- err = bpf_get_link_xdp_id(ctx->ifindex, &prog_id,
- xsk->config.xdp_flags);
+ err = xsk_create_bpf_maps(xsk);
if (err)
return err;
- if (!prog_id) {
- err = xsk_create_bpf_maps(xsk);
- if (err)
- return err;
+ err = xsk_load_xdp_prog(xsk);
+ if (err)
+ goto err_load_xdp_prog;
- err = xsk_load_xdp_prog(xsk);
- if (err) {
- goto err_load_xdp_prog;
- }
- } else {
- ctx->prog_fd = bpf_prog_get_fd_by_id(prog_id);
- if (ctx->prog_fd < 0)
- return -errno;
- err = xsk_lookup_bpf_maps(xsk);
- if (err) {
- close(ctx->prog_fd);
- return err;
- }
- }
+ if (ctx->has_bpf_link)
+ err = xsk_create_bpf_link(xsk);
+ else
+ err = bpf_set_link_xdp_fd(xsk->ctx->ifindex, ctx->prog_fd,
+ xsk->config.xdp_flags);
- if (xsk->rx) {
- err = xsk_set_bpf_maps(xsk);
- if (err) {
- if (!prog_id) {
- goto err_set_bpf_maps;
- } else {
- close(ctx->prog_fd);
- return err;
- }
- }
- }
- if (xsks_map_fd)
- *xsks_map_fd = ctx->xsks_map_fd;
+ if (err)
+ goto err_attach_xdp_prog;
- return 0;
+ if (!xsk->rx)
+ return err;
+
+ err = xsk_set_bpf_maps(xsk);
+ if (err)
+ goto err_set_bpf_maps;
+
+ return err;
err_set_bpf_maps:
+ if (ctx->has_bpf_link)
+ close(ctx->link_fd);
+ else
+ bpf_set_link_xdp_fd(ctx->ifindex, -1, 0);
+err_attach_xdp_prog:
close(ctx->prog_fd);
- bpf_set_link_xdp_fd(ctx->ifindex, -1, 0);
err_load_xdp_prog:
xsk_delete_bpf_maps(xsk);
+ return err;
+}
+
+static int xsk_lookup_xdp_res(struct xsk_socket *xsk, int *xsks_map_fd, int prog_id)
+{
+ struct xsk_ctx *ctx = xsk->ctx;
+ int err;
+
+ ctx->prog_fd = bpf_prog_get_fd_by_id(prog_id);
+ if (ctx->prog_fd < 0) {
+ err = -errno;
+ goto err_prog_fd;
+ }
+ err = xsk_lookup_bpf_maps(xsk);
+ if (err)
+ goto err_lookup_maps;
+
+ if (!xsk->rx)
+ return err;
+
+ err = xsk_set_bpf_maps(xsk);
+ if (err)
+ goto err_set_maps;
+
+ return err;
+
+err_set_maps:
+ close(ctx->xsks_map_fd);
+err_lookup_maps:
+ close(ctx->prog_fd);
+err_prog_fd:
+ if (ctx->has_bpf_link)
+ close(ctx->link_fd);
+ return err;
+}
+
+static int __xsk_setup_xdp_prog(struct xsk_socket *_xdp, int *xsks_map_fd)
+{
+ struct xsk_socket *xsk = _xdp;
+ struct xsk_ctx *ctx = xsk->ctx;
+ __u32 prog_id = 0;
+ int err;
+
+ if (ctx->has_bpf_link)
+ err = xsk_link_lookup(ctx->ifindex, &prog_id, &ctx->link_fd);
+ else
+ err = bpf_get_link_xdp_id(ctx->ifindex, &prog_id, xsk->config.xdp_flags);
+
+ if (err)
+ return err;
+
+ err = !prog_id ? xsk_init_xdp_res(xsk, xsks_map_fd) :
+ xsk_lookup_xdp_res(xsk, xsks_map_fd, prog_id);
+
+ if (!err && xsks_map_fd)
+ *xsks_map_fd = ctx->xsks_map_fd;
return err;
}
@@ -897,6 +1063,7 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
}
}
xsk->ctx = ctx;
+ xsk->ctx->has_bpf_link = xsk_probe_bpf_link();
if (rx) {
err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING,
@@ -1053,6 +1220,8 @@ void xsk_socket__delete(struct xsk_socket *xsk)
if (ctx->prog_fd != -1) {
xsk_delete_bpf_maps(xsk);
close(ctx->prog_fd);
+ if (ctx->has_bpf_link)
+ close(ctx->link_fd);
}
err = xsk_get_mmap_offsets(xsk->fd, &off);
diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h
index e9f121f5d129..01c12dca9c10 100644
--- a/tools/lib/bpf/xsk.h
+++ b/tools/lib/bpf/xsk.h
@@ -3,7 +3,8 @@
/*
* AF_XDP user-space access library.
*
- * Copyright(c) 2018 - 2019 Intel Corporation.
+ * Copyright (c) 2018 - 2019 Intel Corporation.
+ * Copyright (c) 2019 Facebook
*
* Author(s): Magnus Karlsson <magnus.karlsson@intel.com>
*/
@@ -13,15 +14,80 @@
#include <stdio.h>
#include <stdint.h>
+#include <stdbool.h>
#include <linux/if_xdp.h>
#include "libbpf.h"
-#include "libbpf_util.h"
#ifdef __cplusplus
extern "C" {
#endif
+/* Load-Acquire Store-Release barriers used by the XDP socket
+ * library. The following macros should *NOT* be considered part of
+ * the xsk.h API, and is subject to change anytime.
+ *
+ * LIBRARY INTERNAL
+ */
+
+#define __XSK_READ_ONCE(x) (*(volatile typeof(x) *)&x)
+#define __XSK_WRITE_ONCE(x, v) (*(volatile typeof(x) *)&x) = (v)
+
+#if defined(__i386__) || defined(__x86_64__)
+# define libbpf_smp_store_release(p, v) \
+ do { \
+ asm volatile("" : : : "memory"); \
+ __XSK_WRITE_ONCE(*p, v); \
+ } while (0)
+# define libbpf_smp_load_acquire(p) \
+ ({ \
+ typeof(*p) ___p1 = __XSK_READ_ONCE(*p); \
+ asm volatile("" : : : "memory"); \
+ ___p1; \
+ })
+#elif defined(__aarch64__)
+# define libbpf_smp_store_release(p, v) \
+ asm volatile ("stlr %w1, %0" : "=Q" (*p) : "r" (v) : "memory")
+# define libbpf_smp_load_acquire(p) \
+ ({ \
+ typeof(*p) ___p1; \
+ asm volatile ("ldar %w0, %1" \
+ : "=r" (___p1) : "Q" (*p) : "memory"); \
+ ___p1; \
+ })
+#elif defined(__riscv)
+# define libbpf_smp_store_release(p, v) \
+ do { \
+ asm volatile ("fence rw,w" : : : "memory"); \
+ __XSK_WRITE_ONCE(*p, v); \
+ } while (0)
+# define libbpf_smp_load_acquire(p) \
+ ({ \
+ typeof(*p) ___p1 = __XSK_READ_ONCE(*p); \
+ asm volatile ("fence r,rw" : : : "memory"); \
+ ___p1; \
+ })
+#endif
+
+#ifndef libbpf_smp_store_release
+#define libbpf_smp_store_release(p, v) \
+ do { \
+ __sync_synchronize(); \
+ __XSK_WRITE_ONCE(*p, v); \
+ } while (0)
+#endif
+
+#ifndef libbpf_smp_load_acquire
+#define libbpf_smp_load_acquire(p) \
+ ({ \
+ typeof(*p) ___p1 = __XSK_READ_ONCE(*p); \
+ __sync_synchronize(); \
+ ___p1; \
+ })
+#endif
+
+/* LIBRARY INTERNAL -- END */
+
/* Do not access these members directly. Use the functions below. */
#define DEFINE_XSK_RING(name) \
struct name { \
@@ -96,7 +162,8 @@ static inline __u32 xsk_prod_nb_free(struct xsk_ring_prod *r, __u32 nb)
* this function. Without this optimization it whould have been
* free_entries = r->cached_prod - r->cached_cons + r->size.
*/
- r->cached_cons = *r->consumer + r->size;
+ r->cached_cons = libbpf_smp_load_acquire(r->consumer);
+ r->cached_cons += r->size;
return r->cached_cons - r->cached_prod;
}
@@ -106,7 +173,7 @@ static inline __u32 xsk_cons_nb_avail(struct xsk_ring_cons *r, __u32 nb)
__u32 entries = r->cached_prod - r->cached_cons;
if (entries == 0) {
- r->cached_prod = *r->producer;
+ r->cached_prod = libbpf_smp_load_acquire(r->producer);
entries = r->cached_prod - r->cached_cons;
}
@@ -129,9 +196,7 @@ static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, __u32 nb)
/* Make sure everything has been written to the ring before indicating
* this to the kernel by writing the producer pointer.
*/
- libbpf_smp_wmb();
-
- *prod->producer += nb;
+ libbpf_smp_store_release(prod->producer, *prod->producer + nb);
}
static inline __u32 xsk_ring_cons__peek(struct xsk_ring_cons *cons, __u32 nb, __u32 *idx)
@@ -139,11 +204,6 @@ static inline __u32 xsk_ring_cons__peek(struct xsk_ring_cons *cons, __u32 nb, __
__u32 entries = xsk_cons_nb_avail(cons, nb);
if (entries > 0) {
- /* Make sure we do not speculatively read the data before
- * we have received the packet buffers from the ring.
- */
- libbpf_smp_rmb();
-
*idx = cons->cached_cons;
cons->cached_cons += entries;
}
@@ -161,9 +221,8 @@ static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, __u32 nb)
/* Make sure data has been read before indicating we are done
* with the entries by updating the consumer pointer.
*/
- libbpf_smp_rwmb();
+ libbpf_smp_store_release(cons->consumer, *cons->consumer + nb);
- *cons->consumer += nb;
}
static inline void *xsk_umem__get_data(void *umem_area, __u64 addr)
diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c
index 17465d454a0e..a0aaf385cbb5 100644
--- a/tools/lib/perf/evlist.c
+++ b/tools/lib/perf/evlist.c
@@ -26,13 +26,10 @@
void perf_evlist__init(struct perf_evlist *evlist)
{
- int i;
-
- for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i)
- INIT_HLIST_HEAD(&evlist->heads[i]);
INIT_LIST_HEAD(&evlist->entries);
evlist->nr_entries = 0;
fdarray__init(&evlist->pollfd, 64);
+ perf_evlist__reset_id_hash(evlist);
}
static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
@@ -237,6 +234,14 @@ static void perf_evlist__id_hash(struct perf_evlist *evlist,
hlist_add_head(&sid->node, &evlist->heads[hash]);
}
+void perf_evlist__reset_id_hash(struct perf_evlist *evlist)
+{
+ int i;
+
+ for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i)
+ INIT_HLIST_HEAD(&evlist->heads[i]);
+}
+
void perf_evlist__id_add(struct perf_evlist *evlist,
struct perf_evsel *evsel,
int cpu, int thread, u64 id)
diff --git a/tools/lib/perf/include/internal/evlist.h b/tools/lib/perf/include/internal/evlist.h
index 2d0fa02b036f..212c29063ad4 100644
--- a/tools/lib/perf/include/internal/evlist.h
+++ b/tools/lib/perf/include/internal/evlist.h
@@ -124,4 +124,6 @@ int perf_evlist__id_add_fd(struct perf_evlist *evlist,
struct perf_evsel *evsel,
int cpu, int thread, int fd);
+void perf_evlist__reset_id_hash(struct perf_evlist *evlist);
+
#endif /* __LIBPERF_INTERNAL_EVLIST_H */
diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h
index 988c539bedb6..d82054225fcc 100644
--- a/tools/lib/perf/include/perf/event.h
+++ b/tools/lib/perf/include/perf/event.h
@@ -23,10 +23,20 @@ struct perf_record_mmap2 {
__u64 start;
__u64 len;
__u64 pgoff;
- __u32 maj;
- __u32 min;
- __u64 ino;
- __u64 ino_generation;
+ union {
+ struct {
+ __u32 maj;
+ __u32 min;
+ __u64 ino;
+ __u64 ino_generation;
+ };
+ struct {
+ __u8 build_id_size;
+ __u8 __reserved_1;
+ __u16 __reserved_2;
+ __u8 build_id[20];
+ };
+ };
__u32 prot;
__u32 flags;
char filename[PATH_MAX];
diff --git a/tools/objtool/.gitignore b/tools/objtool/.gitignore
index 45cefda24c7b..14236db3677f 100644
--- a/tools/objtool/.gitignore
+++ b/tools/objtool/.gitignore
@@ -1,4 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
arch/x86/lib/inat-tables.c
-objtool
+/objtool
fixdep
diff --git a/tools/objtool/Documentation/stack-validation.txt b/tools/objtool/Documentation/stack-validation.txt
index 0542e46c7552..30f38fdc0d56 100644
--- a/tools/objtool/Documentation/stack-validation.txt
+++ b/tools/objtool/Documentation/stack-validation.txt
@@ -315,13 +315,15 @@ they mean, and suggestions for how to fix them.
function tracing inserts additional calls, which is not obvious from the
sources).
-10. file.o: warning: func()+0x5c: alternative modifies stack
-
- This means that an alternative includes instructions that modify the
- stack. The problem is that there is only one ORC unwind table, this means
- that the ORC unwind entries must be valid for each of the alternatives.
- The easiest way to enforce this is to ensure alternatives do not contain
- any ORC entries, which in turn implies the above constraint.
+10. file.o: warning: func()+0x5c: stack layout conflict in alternatives
+
+ This means that in the use of the alternative() or ALTERNATIVE()
+ macro, the code paths have conflicting modifications to the stack.
+ The problem is that there is only one ORC unwind table, which means
+ that the ORC unwind entries must be consistent for all possible
+ instruction boundaries regardless of which code has been patched.
+ This limitation can be overcome by massaging the alternatives with
+ NOPs to shift the stack changes around so they no longer conflict.
11. file.o: warning: unannotated intra-function call
diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
index 5cdb19036d7f..92ce4fce7bc7 100644
--- a/tools/objtool/Makefile
+++ b/tools/objtool/Makefile
@@ -27,6 +27,7 @@ all: $(OBJTOOL)
INCLUDES := -I$(srctree)/tools/include \
-I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi \
-I$(srctree)/tools/arch/$(SRCARCH)/include \
+ -I$(srctree)/tools/objtool/include \
-I$(srctree)/tools/objtool/arch/$(SRCARCH)/include
WARNINGS := $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -Wno-packed -Wno-nested-externs
CFLAGS := -Werror $(WARNINGS) $(KBUILD_HOSTCFLAGS) -g $(INCLUDES) $(LIBELF_FLAGS)
@@ -46,10 +47,6 @@ ifeq ($(SRCARCH),x86)
SUBCMD_ORC := y
endif
-ifeq ($(SUBCMD_ORC),y)
- CFLAGS += -DINSN_USE_ORC
-endif
-
export SUBCMD_CHECK SUBCMD_ORC
export srctree OUTPUT CFLAGS SRCARCH AWK
include $(srctree)/tools/build/Makefile.include
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index cde9c36e40ae..549813cff8ab 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -11,11 +11,11 @@
#include "../../../arch/x86/lib/inat.c"
#include "../../../arch/x86/lib/insn.c"
-#include "../../check.h"
-#include "../../elf.h"
-#include "../../arch.h"
-#include "../../warn.h"
#include <asm/orc_types.h>
+#include <objtool/check.h>
+#include <objtool/elf.h>
+#include <objtool/arch.h>
+#include <objtool/warn.h>
static unsigned char op_to_cfi_reg[][2] = {
{CFI_AX, CFI_R8},
@@ -222,15 +222,38 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
break;
case 0x89:
- if (rex_w && !rex_r && modrm_mod == 3 && modrm_reg == 4) {
+ if (rex_w && !rex_r && modrm_reg == 4) {
- /* mov %rsp, reg */
- ADD_OP(op) {
- op->src.type = OP_SRC_REG;
- op->src.reg = CFI_SP;
- op->dest.type = OP_DEST_REG;
- op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b];
+ if (modrm_mod == 3) {
+ /* mov %rsp, reg */
+ ADD_OP(op) {
+ op->src.type = OP_SRC_REG;
+ op->src.reg = CFI_SP;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b];
+ }
+ break;
+
+ } else {
+ /* skip nontrivial SIB */
+ if (modrm_rm == 4 && !(sib == 0x24 && rex_b == rex_x))
+ break;
+
+ /* skip RIP relative displacement */
+ if (modrm_rm == 5 && modrm_mod == 0)
+ break;
+
+ /* mov %rsp, disp(%reg) */
+ ADD_OP(op) {
+ op->src.type = OP_SRC_REG;
+ op->src.reg = CFI_SP;
+ op->dest.type = OP_DEST_REG_INDIRECT;
+ op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b];
+ op->dest.offset = insn.displacement.value;
+ }
+ break;
}
+
break;
}
@@ -259,8 +282,10 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
op->dest.reg = CFI_BP;
op->dest.offset = insn.displacement.value;
}
+ break;
+ }
- } else if (rex_w && !rex_b && modrm_rm == 4 && sib == 0x24) {
+ if (rex_w && !rex_b && modrm_rm == 4 && sib == 0x24) {
/* mov reg, disp(%rsp) */
ADD_OP(op) {
@@ -270,6 +295,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
op->dest.reg = CFI_SP;
op->dest.offset = insn.displacement.value;
}
+ break;
}
break;
@@ -563,8 +589,8 @@ void arch_initial_func_cfi_state(struct cfi_init_state *state)
state->cfa.offset = 8;
/* initial RA (return address) */
- state->regs[16].base = CFI_CFA;
- state->regs[16].offset = -8;
+ state->regs[CFI_RA].base = CFI_CFA;
+ state->regs[CFI_RA].offset = -8;
}
const char *arch_nop_insn(int len)
diff --git a/tools/objtool/arch/x86/include/cfi_regs.h b/tools/objtool/arch/x86/include/arch/cfi_regs.h
index 79bc517efba8..79bc517efba8 100644
--- a/tools/objtool/arch/x86/include/cfi_regs.h
+++ b/tools/objtool/arch/x86/include/arch/cfi_regs.h
diff --git a/tools/objtool/arch/x86/include/arch_elf.h b/tools/objtool/arch/x86/include/arch/elf.h
index 69cc4264b28a..69cc4264b28a 100644
--- a/tools/objtool/arch/x86/include/arch_elf.h
+++ b/tools/objtool/arch/x86/include/arch/elf.h
diff --git a/tools/objtool/arch/x86/include/arch/endianness.h b/tools/objtool/arch/x86/include/arch/endianness.h
new file mode 100644
index 000000000000..7c362527da20
--- /dev/null
+++ b/tools/objtool/arch/x86/include/arch/endianness.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ARCH_ENDIANNESS_H
+#define _ARCH_ENDIANNESS_H
+
+#include <endian.h>
+
+#define __TARGET_BYTE_ORDER __LITTLE_ENDIAN
+
+#endif /* _ARCH_ENDIANNESS_H */
diff --git a/tools/objtool/arch/x86/include/arch_special.h b/tools/objtool/arch/x86/include/arch/special.h
index d818b2bffa02..d818b2bffa02 100644
--- a/tools/objtool/arch/x86/include/arch_special.h
+++ b/tools/objtool/arch/x86/include/arch/special.h
diff --git a/tools/objtool/arch/x86/special.c b/tools/objtool/arch/x86/special.c
index fd4af88c0ea5..e707d9bcd161 100644
--- a/tools/objtool/arch/x86/special.c
+++ b/tools/objtool/arch/x86/special.c
@@ -1,8 +1,8 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include <string.h>
-#include "../../special.h"
-#include "../../builtin.h"
+#include <objtool/special.h>
+#include <objtool/builtin.h>
#define X86_FEATURE_POPCNT (4 * 32 + 23)
#define X86_FEATURE_SMAP (9 * 32 + 20)
@@ -48,7 +48,7 @@ bool arch_support_alt_relocation(struct special_alt *special_alt,
* replacement group.
*/
return insn->offset == special_alt->new_off &&
- (insn->type == INSN_CALL || is_static_jump(insn));
+ (insn->type == INSN_CALL || is_jump(insn));
}
/*
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index c6d199bfd0ae..c3a85d8f6c5c 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -15,10 +15,10 @@
#include <subcmd/parse-options.h>
#include <string.h>
-#include "builtin.h"
-#include "objtool.h"
+#include <objtool/builtin.h>
+#include <objtool/objtool.h>
-bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux;
+bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux, mcount, noinstr;
static const char * const check_usage[] = {
"objtool check [<options>] file.o",
@@ -34,13 +34,15 @@ const struct option check_options[] = {
OPT_BOOLEAN('a', "uaccess", &uaccess, "enable uaccess checking"),
OPT_BOOLEAN('s', "stats", &stats, "print statistics"),
OPT_BOOLEAN('d', "duplicate", &validate_dup, "duplicate validation for vmlinux.o"),
+ OPT_BOOLEAN('n', "noinstr", &noinstr, "noinstr validation for vmlinux.o"),
OPT_BOOLEAN('l', "vmlinux", &vmlinux, "vmlinux.o validation"),
+ OPT_BOOLEAN('M', "mcount", &mcount, "generate __mcount_loc"),
OPT_END(),
};
int cmd_check(int argc, const char **argv)
{
- const char *objname, *s;
+ const char *objname;
struct objtool_file *file;
int ret;
@@ -51,10 +53,6 @@ int cmd_check(int argc, const char **argv)
objname = argv[0];
- s = strstr(objname, "vmlinux.o");
- if (s && !s[9])
- vmlinux = true;
-
file = objtool_open_read(objname);
if (!file)
return 1;
diff --git a/tools/objtool/builtin-orc.c b/tools/objtool/builtin-orc.c
index 7b31121fa60b..8273bbf7cebb 100644
--- a/tools/objtool/builtin-orc.c
+++ b/tools/objtool/builtin-orc.c
@@ -13,8 +13,8 @@
*/
#include <string.h>
-#include "builtin.h"
-#include "objtool.h"
+#include <objtool/builtin.h>
+#include <objtool/objtool.h>
static const char *orc_usage[] = {
"objtool orc generate [<options>] file.o",
@@ -51,11 +51,7 @@ int cmd_orc(int argc, const char **argv)
if (list_empty(&file->insn_list))
return 0;
- ret = create_orc(file);
- if (ret)
- return ret;
-
- ret = create_orc_sections(file);
+ ret = orc_create(file);
if (ret)
return ret;
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index f2e5e5ce1a05..5e5388a38e2a 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -6,21 +6,20 @@
#include <string.h>
#include <stdlib.h>
-#include "builtin.h"
-#include "cfi.h"
-#include "arch.h"
-#include "check.h"
-#include "special.h"
-#include "warn.h"
-#include "arch_elf.h"
+#include <arch/elf.h>
+#include <objtool/builtin.h>
+#include <objtool/cfi.h>
+#include <objtool/arch.h>
+#include <objtool/check.h>
+#include <objtool/special.h>
+#include <objtool/warn.h>
+#include <objtool/endianness.h>
#include <linux/objtool.h>
#include <linux/hashtable.h>
#include <linux/kernel.h>
#include <linux/static_call_types.h>
-#define FAKE_JUMP_OFFSET -1
-
struct alternative {
struct list_head list;
struct instruction *insn;
@@ -111,15 +110,20 @@ static struct instruction *prev_insn_same_sym(struct objtool_file *file,
static bool is_sibling_call(struct instruction *insn)
{
+ /*
+ * Assume only ELF functions can make sibling calls. This ensures
+ * sibling call detection consistency between vmlinux.o and individual
+ * objects.
+ */
+ if (!insn->func)
+ return false;
+
/* An indirect jump is either a sibling call or a jump to a table. */
if (insn->type == INSN_JUMP_DYNAMIC)
return list_empty(&insn->alts);
- if (!is_static_jump(insn))
- return false;
-
/* add_jump_destinations() sets insn->call_dest for sibling calls. */
- return !!insn->call_dest;
+ return (is_static_jump(insn) && insn->call_dest);
}
/*
@@ -156,6 +160,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func,
"machine_real_restart",
"rewind_stack_do_exit",
"kunit_try_catch_throw",
+ "xen_start_kernel",
};
if (!func)
@@ -244,7 +249,7 @@ static void init_insn_state(struct insn_state *state, struct section *sec)
* not correctly determine insn->call_dest->sec (external symbols do
* not have a section).
*/
- if (vmlinux && sec)
+ if (vmlinux && noinstr && sec)
state->noinstr = sec->noinstr;
}
@@ -543,6 +548,78 @@ static int create_static_call_sections(struct objtool_file *file)
return 0;
}
+static int create_mcount_loc_sections(struct objtool_file *file)
+{
+ struct section *sec, *reloc_sec;
+ struct reloc *reloc;
+ unsigned long *loc;
+ struct instruction *insn;
+ int idx;
+
+ sec = find_section_by_name(file->elf, "__mcount_loc");
+ if (sec) {
+ INIT_LIST_HEAD(&file->mcount_loc_list);
+ WARN("file already has __mcount_loc section, skipping");
+ return 0;
+ }
+
+ if (list_empty(&file->mcount_loc_list))
+ return 0;
+
+ idx = 0;
+ list_for_each_entry(insn, &file->mcount_loc_list, mcount_loc_node)
+ idx++;
+
+ sec = elf_create_section(file->elf, "__mcount_loc", 0, sizeof(unsigned long), idx);
+ if (!sec)
+ return -1;
+
+ reloc_sec = elf_create_reloc_section(file->elf, sec, SHT_RELA);
+ if (!reloc_sec)
+ return -1;
+
+ idx = 0;
+ list_for_each_entry(insn, &file->mcount_loc_list, mcount_loc_node) {
+
+ loc = (unsigned long *)sec->data->d_buf + idx;
+ memset(loc, 0, sizeof(unsigned long));
+
+ reloc = malloc(sizeof(*reloc));
+ if (!reloc) {
+ perror("malloc");
+ return -1;
+ }
+ memset(reloc, 0, sizeof(*reloc));
+
+ if (insn->sec->sym) {
+ reloc->sym = insn->sec->sym;
+ reloc->addend = insn->offset;
+ } else {
+ reloc->sym = find_symbol_containing(insn->sec, insn->offset);
+
+ if (!reloc->sym) {
+ WARN("missing symbol for insn at offset 0x%lx\n",
+ insn->offset);
+ return -1;
+ }
+
+ reloc->addend = insn->offset - reloc->sym->offset;
+ }
+
+ reloc->type = R_X86_64_64;
+ reloc->offset = idx * sizeof(unsigned long);
+ reloc->sec = reloc_sec;
+ elf_add_reloc(file->elf, reloc);
+
+ idx++;
+ }
+
+ if (elf_rebuild_reloc_section(file->elf, reloc_sec))
+ return -1;
+
+ return 0;
+}
+
/*
* Warnings shouldn't be reported for ignored functions.
*/
@@ -589,7 +666,7 @@ static void add_ignores(struct objtool_file *file)
static const char *uaccess_safe_builtin[] = {
/* KASAN */
"kasan_report",
- "check_memory_region",
+ "kasan_check_range",
/* KASAN out-of-line */
"__asan_loadN_noabort",
"__asan_load1_noabort",
@@ -787,22 +864,16 @@ static int add_jump_destinations(struct objtool_file *file)
if (!is_static_jump(insn))
continue;
- if (insn->offset == FAKE_JUMP_OFFSET)
- continue;
-
reloc = find_reloc_by_dest_range(file->elf, insn->sec,
- insn->offset, insn->len);
+ insn->offset, insn->len);
if (!reloc) {
dest_sec = insn->sec;
dest_off = arch_jump_destination(insn);
} else if (reloc->sym->type == STT_SECTION) {
dest_sec = reloc->sym->sec;
dest_off = arch_dest_reloc_offset(reloc->addend);
- } else if (reloc->sym->sec->idx) {
- dest_sec = reloc->sym->sec;
- dest_off = reloc->sym->sym.st_value +
- arch_dest_reloc_offset(reloc->addend);
- } else if (strstr(reloc->sym->name, "_indirect_thunk_")) {
+ } else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 21) ||
+ !strncmp(reloc->sym->name, "__x86_retpoline_", 16)) {
/*
* Retpoline jumps are really dynamic jumps in
* disguise, so convert them accordingly.
@@ -814,14 +885,21 @@ static int add_jump_destinations(struct objtool_file *file)
insn->retpoline_safe = true;
continue;
- } else {
- /* external sibling call */
+ } else if (insn->func) {
+ /* internal or external sibling call (with reloc) */
insn->call_dest = reloc->sym;
if (insn->call_dest->static_call_tramp) {
list_add_tail(&insn->static_call_node,
&file->static_call_list);
}
continue;
+ } else if (reloc->sym->sec->idx) {
+ dest_sec = reloc->sym->sec;
+ dest_off = reloc->sym->sym.st_value +
+ arch_dest_reloc_offset(reloc->addend);
+ } else {
+ /* non-func asm code jumping to another file */
+ continue;
}
insn->jump_dest = find_insn(file, dest_sec, dest_off);
@@ -862,15 +940,15 @@ static int add_jump_destinations(struct objtool_file *file)
* case where the parent function's only reference to a
* subfunction is through a jump table.
*/
- if (!strstr(insn->func->name, ".cold.") &&
- strstr(insn->jump_dest->func->name, ".cold.")) {
+ if (!strstr(insn->func->name, ".cold") &&
+ strstr(insn->jump_dest->func->name, ".cold")) {
insn->func->cfunc = insn->jump_dest->func;
insn->jump_dest->func->pfunc = insn->func;
} else if (insn->jump_dest->func->pfunc != insn->func->pfunc &&
insn->jump_dest->offset == insn->jump_dest->func->offset) {
- /* internal sibling call */
+ /* internal sibling call (without reloc) */
insn->call_dest = insn->jump_dest->func;
if (insn->call_dest->static_call_tramp) {
list_add_tail(&insn->static_call_node,
@@ -969,6 +1047,22 @@ static int add_call_destinations(struct objtool_file *file)
insn->type = INSN_NOP;
}
+ if (mcount && !strcmp(insn->call_dest->name, "__fentry__")) {
+ if (reloc) {
+ reloc->type = R_NONE;
+ elf_write_reloc(file->elf, reloc);
+ }
+
+ elf_write_insn(file->elf, insn->sec,
+ insn->offset, insn->len,
+ arch_nop_insn(insn->len));
+
+ insn->type = INSN_NOP;
+
+ list_add_tail(&insn->mcount_loc_node,
+ &file->mcount_loc_list);
+ }
+
/*
* Whatever stack impact regular CALLs have, should be undone
* by the RETURN of the called function.
@@ -983,73 +1077,83 @@ static int add_call_destinations(struct objtool_file *file)
}
/*
- * The .alternatives section requires some extra special care, over and above
- * what other special sections require:
- *
- * 1. Because alternatives are patched in-place, we need to insert a fake jump
- * instruction at the end so that validate_branch() skips all the original
- * replaced instructions when validating the new instruction path.
- *
- * 2. An added wrinkle is that the new instruction length might be zero. In
- * that case the old instructions are replaced with noops. We simulate that
- * by creating a fake jump as the only new instruction.
- *
- * 3. In some cases, the alternative section includes an instruction which
- * conditionally jumps to the _end_ of the entry. We have to modify these
- * jumps' destinations to point back to .text rather than the end of the
- * entry in .altinstr_replacement.
+ * The .alternatives section requires some extra special care over and above
+ * other special sections because alternatives are patched in place.
*/
static int handle_group_alt(struct objtool_file *file,
struct special_alt *special_alt,
struct instruction *orig_insn,
struct instruction **new_insn)
{
- static unsigned int alt_group_next_index = 1;
- struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump = NULL;
- unsigned int alt_group = alt_group_next_index++;
+ struct instruction *last_orig_insn, *last_new_insn = NULL, *insn, *nop = NULL;
+ struct alt_group *orig_alt_group, *new_alt_group;
unsigned long dest_off;
+
+ orig_alt_group = malloc(sizeof(*orig_alt_group));
+ if (!orig_alt_group) {
+ WARN("malloc failed");
+ return -1;
+ }
+ orig_alt_group->cfi = calloc(special_alt->orig_len,
+ sizeof(struct cfi_state *));
+ if (!orig_alt_group->cfi) {
+ WARN("calloc failed");
+ return -1;
+ }
+
last_orig_insn = NULL;
insn = orig_insn;
sec_for_each_insn_from(file, insn) {
if (insn->offset >= special_alt->orig_off + special_alt->orig_len)
break;
- insn->alt_group = alt_group;
+ insn->alt_group = orig_alt_group;
last_orig_insn = insn;
}
+ orig_alt_group->orig_group = NULL;
+ orig_alt_group->first_insn = orig_insn;
+ orig_alt_group->last_insn = last_orig_insn;
+
+
+ new_alt_group = malloc(sizeof(*new_alt_group));
+ if (!new_alt_group) {
+ WARN("malloc failed");
+ return -1;
+ }
- if (next_insn_same_sec(file, last_orig_insn)) {
- fake_jump = malloc(sizeof(*fake_jump));
- if (!fake_jump) {
+ if (special_alt->new_len < special_alt->orig_len) {
+ /*
+ * Insert a fake nop at the end to make the replacement
+ * alt_group the same size as the original. This is needed to
+ * allow propagate_alt_cfi() to do its magic. When the last
+ * instruction affects the stack, the instruction after it (the
+ * nop) will propagate the new state to the shared CFI array.
+ */
+ nop = malloc(sizeof(*nop));
+ if (!nop) {
WARN("malloc failed");
return -1;
}
- memset(fake_jump, 0, sizeof(*fake_jump));
- INIT_LIST_HEAD(&fake_jump->alts);
- INIT_LIST_HEAD(&fake_jump->stack_ops);
- init_cfi_state(&fake_jump->cfi);
+ memset(nop, 0, sizeof(*nop));
+ INIT_LIST_HEAD(&nop->alts);
+ INIT_LIST_HEAD(&nop->stack_ops);
+ init_cfi_state(&nop->cfi);
- fake_jump->sec = special_alt->new_sec;
- fake_jump->offset = FAKE_JUMP_OFFSET;
- fake_jump->type = INSN_JUMP_UNCONDITIONAL;
- fake_jump->jump_dest = list_next_entry(last_orig_insn, list);
- fake_jump->func = orig_insn->func;
+ nop->sec = special_alt->new_sec;
+ nop->offset = special_alt->new_off + special_alt->new_len;
+ nop->len = special_alt->orig_len - special_alt->new_len;
+ nop->type = INSN_NOP;
+ nop->func = orig_insn->func;
+ nop->alt_group = new_alt_group;
+ nop->ignore = orig_insn->ignore_alts;
}
if (!special_alt->new_len) {
- if (!fake_jump) {
- WARN("%s: empty alternative at end of section",
- special_alt->orig_sec->name);
- return -1;
- }
-
- *new_insn = fake_jump;
- return 0;
+ *new_insn = nop;
+ goto end;
}
- last_new_insn = NULL;
- alt_group = alt_group_next_index++;
insn = *new_insn;
sec_for_each_insn_from(file, insn) {
struct reloc *alt_reloc;
@@ -1061,7 +1165,7 @@ static int handle_group_alt(struct objtool_file *file,
insn->ignore = orig_insn->ignore_alts;
insn->func = orig_insn->func;
- insn->alt_group = alt_group;
+ insn->alt_group = new_alt_group;
/*
* Since alternative replacement code is copy/pasted by the
@@ -1088,14 +1192,8 @@ static int handle_group_alt(struct objtool_file *file,
continue;
dest_off = arch_jump_destination(insn);
- if (dest_off == special_alt->new_off + special_alt->new_len) {
- if (!fake_jump) {
- WARN("%s: alternative jump to end of section",
- special_alt->orig_sec->name);
- return -1;
- }
- insn->jump_dest = fake_jump;
- }
+ if (dest_off == special_alt->new_off + special_alt->new_len)
+ insn->jump_dest = next_insn_same_sec(file, last_orig_insn);
if (!insn->jump_dest) {
WARN_FUNC("can't find alternative jump destination",
@@ -1110,9 +1208,13 @@ static int handle_group_alt(struct objtool_file *file,
return -1;
}
- if (fake_jump)
- list_add(&fake_jump->list, &last_new_insn->list);
-
+ if (nop)
+ list_add(&nop->list, &last_new_insn->list);
+end:
+ new_alt_group->orig_group = orig_alt_group;
+ new_alt_group->first_insn = *new_insn;
+ new_alt_group->last_insn = nop ? : last_new_insn;
+ new_alt_group->cfi = orig_alt_group->cfi;
return 0;
}
@@ -1404,13 +1506,20 @@ static int add_jump_table_alts(struct objtool_file *file)
return 0;
}
+static void set_func_state(struct cfi_state *state)
+{
+ state->cfa = initial_func_cfi.cfa;
+ memcpy(&state->regs, &initial_func_cfi.regs,
+ CFI_NUM_REGS * sizeof(struct cfi_reg));
+ state->stack_size = initial_func_cfi.cfa.offset;
+}
+
static int read_unwind_hints(struct objtool_file *file)
{
struct section *sec, *relocsec;
struct reloc *reloc;
struct unwind_hint *hint;
struct instruction *insn;
- struct cfi_reg *cfa;
int i;
sec = find_section_by_name(file->elf, ".discard.unwind_hints");
@@ -1445,22 +1554,20 @@ static int read_unwind_hints(struct objtool_file *file)
return -1;
}
- cfa = &insn->cfi.cfa;
+ insn->hint = true;
- if (hint->type == UNWIND_HINT_TYPE_RET_OFFSET) {
- insn->ret_offset = hint->sp_offset;
+ if (hint->type == UNWIND_HINT_TYPE_FUNC) {
+ set_func_state(&insn->cfi);
continue;
}
- insn->hint = true;
-
if (arch_decode_hint_reg(insn, hint->sp_reg)) {
WARN_FUNC("unsupported unwind_hint sp base reg %d",
insn->sec, insn->offset, hint->sp_reg);
return -1;
}
- cfa->offset = hint->sp_offset;
+ insn->cfi.cfa.offset = bswap_if_needed(hint->sp_offset);
insn->cfi.type = hint->type;
insn->cfi.end = hint->end;
}
@@ -1716,27 +1823,18 @@ static bool is_fentry_call(struct instruction *insn)
static bool has_modified_stack_frame(struct instruction *insn, struct insn_state *state)
{
- u8 ret_offset = insn->ret_offset;
struct cfi_state *cfi = &state->cfi;
int i;
if (cfi->cfa.base != initial_func_cfi.cfa.base || cfi->drap)
return true;
- if (cfi->cfa.offset != initial_func_cfi.cfa.offset + ret_offset)
+ if (cfi->cfa.offset != initial_func_cfi.cfa.offset)
return true;
- if (cfi->stack_size != initial_func_cfi.cfa.offset + ret_offset)
+ if (cfi->stack_size != initial_func_cfi.cfa.offset)
return true;
- /*
- * If there is a ret offset hint then don't check registers
- * because a callee-saved register might have been pushed on
- * the stack.
- */
- if (ret_offset)
- return false;
-
for (i = 0; i < CFI_NUM_REGS; i++) {
if (cfi->regs[i].base != initial_func_cfi.regs[i].base ||
cfi->regs[i].offset != initial_func_cfi.regs[i].offset)
@@ -1746,12 +1844,20 @@ static bool has_modified_stack_frame(struct instruction *insn, struct insn_state
return false;
}
+static bool check_reg_frame_pos(const struct cfi_reg *reg,
+ int expected_offset)
+{
+ return reg->base == CFI_CFA &&
+ reg->offset == expected_offset;
+}
+
static bool has_valid_stack_frame(struct insn_state *state)
{
struct cfi_state *cfi = &state->cfi;
- if (cfi->cfa.base == CFI_BP && cfi->regs[CFI_BP].base == CFI_CFA &&
- cfi->regs[CFI_BP].offset == -16)
+ if (cfi->cfa.base == CFI_BP &&
+ check_reg_frame_pos(&cfi->regs[CFI_BP], -cfi->cfa.offset) &&
+ check_reg_frame_pos(&cfi->regs[CFI_RA], -cfi->cfa.offset + 8))
return true;
if (cfi->drap && cfi->regs[CFI_BP].base == CFI_BP)
@@ -1880,8 +1986,7 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi,
case OP_SRC_REG:
if (op->src.reg == CFI_SP && op->dest.reg == CFI_BP &&
cfa->base == CFI_SP &&
- regs[CFI_BP].base == CFI_CFA &&
- regs[CFI_BP].offset == -cfa->offset) {
+ check_reg_frame_pos(&regs[CFI_BP], -cfa->offset)) {
/* mov %rsp, %rbp */
cfa->base = op->dest.reg;
@@ -1941,12 +2046,58 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi,
cfa->offset = -cfi->vals[op->src.reg].offset;
cfi->stack_size = cfa->offset;
+ } else if (cfa->base == CFI_SP &&
+ cfi->vals[op->src.reg].base == CFI_SP_INDIRECT &&
+ cfi->vals[op->src.reg].offset == cfa->offset) {
+
+ /*
+ * Stack swizzle:
+ *
+ * 1: mov %rsp, (%[tos])
+ * 2: mov %[tos], %rsp
+ * ...
+ * 3: pop %rsp
+ *
+ * Where:
+ *
+ * 1 - places a pointer to the previous
+ * stack at the Top-of-Stack of the
+ * new stack.
+ *
+ * 2 - switches to the new stack.
+ *
+ * 3 - pops the Top-of-Stack to restore
+ * the original stack.
+ *
+ * Note: we set base to SP_INDIRECT
+ * here and preserve offset. Therefore
+ * when the unwinder reaches ToS it
+ * will dereference SP and then add the
+ * offset to find the next frame, IOW:
+ * (%rsp) + offset.
+ */
+ cfa->base = CFI_SP_INDIRECT;
+
} else {
cfa->base = CFI_UNDEFINED;
cfa->offset = 0;
}
}
+ else if (op->dest.reg == CFI_SP &&
+ cfi->vals[op->src.reg].base == CFI_SP_INDIRECT &&
+ cfi->vals[op->src.reg].offset == cfa->offset) {
+
+ /*
+ * The same stack swizzle case 2) as above. But
+ * because we can't change cfa->base, case 3)
+ * will become a regular POP. Pretend we're a
+ * PUSH so things don't go unbalanced.
+ */
+ cfi->stack_size += 8;
+ }
+
+
break;
case OP_SRC_ADD:
@@ -1966,6 +2117,17 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi,
break;
}
+ if (!cfi->drap && op->src.reg == CFI_SP &&
+ op->dest.reg == CFI_BP && cfa->base == CFI_SP &&
+ check_reg_frame_pos(&regs[CFI_BP], -cfa->offset + op->src.offset)) {
+
+ /* lea disp(%rsp), %rbp */
+ cfa->base = CFI_BP;
+ cfa->offset -= op->src.offset;
+ cfi->bp_scratch = false;
+ break;
+ }
+
if (op->src.reg == CFI_SP && cfa->base == CFI_SP) {
/* drap: lea disp(%rsp), %drap */
@@ -2032,6 +2194,13 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi,
case OP_SRC_POP:
case OP_SRC_POPF:
+ if (op->dest.reg == CFI_SP && cfa->base == CFI_SP_INDIRECT) {
+
+ /* pop %rsp; # restore from a stack swizzle */
+ cfa->base = CFI_SP;
+ break;
+ }
+
if (!cfi->drap && op->dest.reg == cfa->base) {
/* pop %rbp */
@@ -2060,6 +2229,14 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi,
break;
case OP_SRC_REG_INDIRECT:
+ if (!cfi->drap && op->dest.reg == cfa->base &&
+ op->dest.reg == CFI_BP) {
+
+ /* mov disp(%rsp), %rbp */
+ cfa->base = CFI_SP;
+ cfa->offset = cfi->stack_size;
+ }
+
if (cfi->drap && op->src.reg == CFI_BP &&
op->src.offset == cfi->drap_offset) {
@@ -2081,6 +2258,12 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi,
/* mov disp(%rbp), %reg */
/* mov disp(%rsp), %reg */
restore_reg(cfi, op->dest.reg);
+
+ } else if (op->src.reg == CFI_SP &&
+ op->src.offset == regs[op->dest.reg].offset + cfi->stack_size) {
+
+ /* mov disp(%rsp), %reg */
+ restore_reg(cfi, op->dest.reg);
}
break;
@@ -2158,6 +2341,18 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi,
/* mov reg, disp(%rsp) */
save_reg(cfi, op->src.reg, CFI_CFA,
op->dest.offset - cfi->cfa.offset);
+
+ } else if (op->dest.reg == CFI_SP) {
+
+ /* mov reg, disp(%rsp) */
+ save_reg(cfi, op->src.reg, CFI_CFA,
+ op->dest.offset - cfi->stack_size);
+
+ } else if (op->src.reg == CFI_SP && op->dest.offset == 0) {
+
+ /* mov %rsp, (%reg); # setup a stack swizzle. */
+ cfi->vals[op->dest.reg].base = CFI_SP_INDIRECT;
+ cfi->vals[op->dest.reg].offset = cfa->offset;
}
break;
@@ -2205,22 +2400,50 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi,
return 0;
}
+/*
+ * The stack layouts of alternatives instructions can sometimes diverge when
+ * they have stack modifications. That's fine as long as the potential stack
+ * layouts don't conflict at any given potential instruction boundary.
+ *
+ * Flatten the CFIs of the different alternative code streams (both original
+ * and replacement) into a single shared CFI array which can be used to detect
+ * conflicts and nicely feed a linear array of ORC entries to the unwinder.
+ */
+static int propagate_alt_cfi(struct objtool_file *file, struct instruction *insn)
+{
+ struct cfi_state **alt_cfi;
+ int group_off;
+
+ if (!insn->alt_group)
+ return 0;
+
+ alt_cfi = insn->alt_group->cfi;
+ group_off = insn->offset - insn->alt_group->first_insn->offset;
+
+ if (!alt_cfi[group_off]) {
+ alt_cfi[group_off] = &insn->cfi;
+ } else {
+ if (memcmp(alt_cfi[group_off], &insn->cfi, sizeof(struct cfi_state))) {
+ WARN_FUNC("stack layout conflict in alternatives",
+ insn->sec, insn->offset);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
static int handle_insn_ops(struct instruction *insn, struct insn_state *state)
{
struct stack_op *op;
list_for_each_entry(op, &insn->stack_ops, list) {
- struct cfi_state old_cfi = state->cfi;
- int res;
- res = update_cfi_state(insn, &state->cfi, op);
- if (res)
- return res;
+ if (update_cfi_state(insn, &state->cfi, op))
+ return 1;
- if (insn->alt_group && memcmp(&state->cfi, &old_cfi, sizeof(struct cfi_state))) {
- WARN_FUNC("alternative modifies stack", insn->sec, insn->offset);
- return -1;
- }
+ if (!insn->alt_group)
+ continue;
if (op->dest.type == OP_DEST_PUSHF) {
if (!state->uaccess_stack) {
@@ -2410,28 +2633,20 @@ static int validate_return(struct symbol *func, struct instruction *insn, struct
return 0;
}
-/*
- * Alternatives should not contain any ORC entries, this in turn means they
- * should not contain any CFI ops, which implies all instructions should have
- * the same same CFI state.
- *
- * It is possible to constuct alternatives that have unreachable holes that go
- * unreported (because they're NOPs), such holes would result in CFI_UNDEFINED
- * states which then results in ORC entries, which we just said we didn't want.
- *
- * Avoid them by copying the CFI entry of the first instruction into the whole
- * alternative.
- */
-static void fill_alternative_cfi(struct objtool_file *file, struct instruction *insn)
+static struct instruction *next_insn_to_validate(struct objtool_file *file,
+ struct instruction *insn)
{
- struct instruction *first_insn = insn;
- int alt_group = insn->alt_group;
+ struct alt_group *alt_group = insn->alt_group;
- sec_for_each_insn_continue(file, insn) {
- if (insn->alt_group != alt_group)
- break;
- insn->cfi = first_insn->cfi;
- }
+ /*
+ * Simulate the fact that alternatives are patched in-place. When the
+ * end of a replacement alt_group is reached, redirect objtool flow to
+ * the end of the original alt_group.
+ */
+ if (alt_group && insn == alt_group->last_insn && alt_group->orig_group)
+ return next_insn_same_sec(file, alt_group->orig_group->last_insn);
+
+ return next_insn_same_sec(file, insn);
}
/*
@@ -2452,7 +2667,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
sec = insn->sec;
while (1) {
- next_insn = next_insn_same_sec(file, insn);
+ next_insn = next_insn_to_validate(file, insn);
if (file->c_file && func && insn->func && func != insn->func->pfunc) {
WARN("%s() falls through to next function %s()",
@@ -2485,6 +2700,9 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
insn->visited |= visited;
+ if (propagate_alt_cfi(file, insn))
+ return 1;
+
if (!insn->ignore_alts && !list_empty(&insn->alts)) {
bool skip_orig = false;
@@ -2500,9 +2718,6 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
}
}
- if (insn->alt_group)
- fill_alternative_cfi(file, insn);
-
if (skip_orig)
return 0;
}
@@ -2540,7 +2755,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
case INSN_JUMP_CONDITIONAL:
case INSN_JUMP_UNCONDITIONAL:
- if (func && is_sibling_call(insn)) {
+ if (is_sibling_call(insn)) {
ret = validate_sibling_call(insn, &state);
if (ret)
return ret;
@@ -2562,7 +2777,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
case INSN_JUMP_DYNAMIC:
case INSN_JUMP_DYNAMIC_CONDITIONAL:
- if (func && is_sibling_call(insn)) {
+ if (is_sibling_call(insn)) {
ret = validate_sibling_call(insn, &state);
if (ret)
return ret;
@@ -2605,15 +2820,19 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
break;
case INSN_STD:
- if (state.df)
+ if (state.df) {
WARN_FUNC("recursive STD", sec, insn->offset);
+ return 1;
+ }
state.df = true;
break;
case INSN_CLD:
- if (!state.df && func)
+ if (!state.df && func) {
WARN_FUNC("redundant CLD", sec, insn->offset);
+ return 1;
+ }
state.df = false;
break;
@@ -2736,9 +2955,6 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio
!strcmp(insn->sec->name, ".altinstr_aux"))
return true;
- if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->offset == FAKE_JUMP_OFFSET)
- return true;
-
if (!insn->func)
return false;
@@ -2824,10 +3040,7 @@ static int validate_section(struct objtool_file *file, struct section *sec)
continue;
init_insn_state(&state, sec);
- state.cfi.cfa = initial_func_cfi.cfa;
- memcpy(&state.cfi.regs, &initial_func_cfi.regs,
- CFI_NUM_REGS * sizeof(struct cfi_reg));
- state.cfi.stack_size = initial_func_cfi.cfa.offset;
+ set_func_state(&state.cfi);
warnings += validate_symbol(file, sec, func, &state);
}
@@ -2940,6 +3153,13 @@ int check(struct objtool_file *file)
goto out;
warnings += ret;
+ if (mcount) {
+ ret = create_mcount_loc_sections(file);
+ if (ret < 0)
+ goto out;
+ warnings += ret;
+ }
+
out:
/*
* For now, don't fail the kernel build on fatal warnings. These
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index e85988ce04f1..93fa833a49a5 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -15,10 +15,10 @@
#include <string.h>
#include <unistd.h>
#include <errno.h>
-#include "builtin.h"
+#include <objtool/builtin.h>
-#include "elf.h"
-#include "warn.h"
+#include <objtool/elf.h>
+#include <objtool/warn.h>
#define MAX_NAME_LEN 128
@@ -814,25 +814,27 @@ static int elf_rebuild_rel_reloc_section(struct section *sec, int nr)
{
struct reloc *reloc;
int idx = 0, size;
- GElf_Rel *relocs;
+ void *buf;
/* Allocate a buffer for relocations */
- size = nr * sizeof(*relocs);
- relocs = malloc(size);
- if (!relocs) {
+ size = nr * sizeof(GElf_Rel);
+ buf = malloc(size);
+ if (!buf) {
perror("malloc");
return -1;
}
- sec->data->d_buf = relocs;
+ sec->data->d_buf = buf;
sec->data->d_size = size;
+ sec->data->d_type = ELF_T_REL;
sec->sh.sh_size = size;
idx = 0;
list_for_each_entry(reloc, &sec->reloc_list, list) {
- relocs[idx].r_offset = reloc->offset;
- relocs[idx].r_info = GELF_R_INFO(reloc->sym->idx, reloc->type);
+ reloc->rel.r_offset = reloc->offset;
+ reloc->rel.r_info = GELF_R_INFO(reloc->sym->idx, reloc->type);
+ gelf_update_rel(sec->data, idx, &reloc->rel);
idx++;
}
@@ -843,26 +845,28 @@ static int elf_rebuild_rela_reloc_section(struct section *sec, int nr)
{
struct reloc *reloc;
int idx = 0, size;
- GElf_Rela *relocs;
+ void *buf;
/* Allocate a buffer for relocations with addends */
- size = nr * sizeof(*relocs);
- relocs = malloc(size);
- if (!relocs) {
+ size = nr * sizeof(GElf_Rela);
+ buf = malloc(size);
+ if (!buf) {
perror("malloc");
return -1;
}
- sec->data->d_buf = relocs;
+ sec->data->d_buf = buf;
sec->data->d_size = size;
+ sec->data->d_type = ELF_T_RELA;
sec->sh.sh_size = size;
idx = 0;
list_for_each_entry(reloc, &sec->reloc_list, list) {
- relocs[idx].r_offset = reloc->offset;
- relocs[idx].r_addend = reloc->addend;
- relocs[idx].r_info = GELF_R_INFO(reloc->sym->idx, reloc->type);
+ reloc->rela.r_offset = reloc->offset;
+ reloc->rela.r_addend = reloc->addend;
+ reloc->rela.r_info = GELF_R_INFO(reloc->sym->idx, reloc->type);
+ gelf_update_rela(sec->data, idx, &reloc->rela);
idx++;
}
diff --git a/tools/objtool/arch.h b/tools/objtool/include/objtool/arch.h
index 4a84c3081b8e..6ff0685f5cc5 100644
--- a/tools/objtool/arch.h
+++ b/tools/objtool/include/objtool/arch.h
@@ -8,12 +8,8 @@
#include <stdbool.h>
#include <linux/list.h>
-#include "objtool.h"
-#include "cfi.h"
-
-#ifdef INSN_USE_ORC
-#include <asm/orc_types.h>
-#endif
+#include <objtool/objtool.h>
+#include <objtool/cfi.h>
enum insn_type {
INSN_JUMP_CONDITIONAL,
diff --git a/tools/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h
index 85c979caa367..2502bb27de17 100644
--- a/tools/objtool/builtin.h
+++ b/tools/objtool/include/objtool/builtin.h
@@ -8,7 +8,7 @@
#include <subcmd/parse-options.h>
extern const struct option check_options[];
-extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux;
+extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux, mcount, noinstr;
extern int cmd_check(int argc, const char **argv);
extern int cmd_orc(int argc, const char **argv);
diff --git a/tools/objtool/cfi.h b/tools/objtool/include/objtool/cfi.h
index c7c59c6a44ee..fd5cb0bed9bf 100644
--- a/tools/objtool/cfi.h
+++ b/tools/objtool/include/objtool/cfi.h
@@ -6,7 +6,7 @@
#ifndef _OBJTOOL_CFI_H
#define _OBJTOOL_CFI_H
-#include "cfi_regs.h"
+#include <arch/cfi_regs.h>
#define CFI_UNDEFINED -1
#define CFI_CFA -2
diff --git a/tools/objtool/check.h b/tools/objtool/include/objtool/check.h
index 5ec00a4b891b..f5be798107bc 100644
--- a/tools/objtool/check.h
+++ b/tools/objtool/include/objtool/check.h
@@ -7,8 +7,8 @@
#define _CHECK_H
#include <stdbool.h>
-#include "cfi.h"
-#include "arch.h"
+#include <objtool/cfi.h>
+#include <objtool/arch.h>
struct insn_state {
struct cfi_state cfi;
@@ -19,10 +19,28 @@ struct insn_state {
s8 instr;
};
+struct alt_group {
+ /*
+ * Pointer from a replacement group to the original group. NULL if it
+ * *is* the original group.
+ */
+ struct alt_group *orig_group;
+
+ /* First and last instructions in the group */
+ struct instruction *first_insn, *last_insn;
+
+ /*
+ * Byte-offset-addressed len-sized array of pointers to CFI structs.
+ * This is shared with the other alt_groups in the same alternative.
+ */
+ struct cfi_state **cfi;
+};
+
struct instruction {
struct list_head list;
struct hlist_node hash;
struct list_head static_call_node;
+ struct list_head mcount_loc_node;
struct section *sec;
unsigned long offset;
unsigned int len;
@@ -33,8 +51,7 @@ struct instruction {
bool retpoline_safe;
s8 instr;
u8 visited;
- u8 ret_offset;
- int alt_group;
+ struct alt_group *alt_group;
struct symbol *call_dest;
struct instruction *jump_dest;
struct instruction *first_jump_src;
@@ -43,9 +60,6 @@ struct instruction {
struct symbol *func;
struct list_head stack_ops;
struct cfi_state cfi;
-#ifdef INSN_USE_ORC
- struct orc_entry orc;
-#endif
};
static inline bool is_static_jump(struct instruction *insn)
@@ -54,6 +68,17 @@ static inline bool is_static_jump(struct instruction *insn)
insn->type == INSN_JUMP_UNCONDITIONAL;
}
+static inline bool is_dynamic_jump(struct instruction *insn)
+{
+ return insn->type == INSN_JUMP_DYNAMIC ||
+ insn->type == INSN_JUMP_DYNAMIC_CONDITIONAL;
+}
+
+static inline bool is_jump(struct instruction *insn)
+{
+ return is_static_jump(insn) || is_dynamic_jump(insn);
+}
+
struct instruction *find_insn(struct objtool_file *file,
struct section *sec, unsigned long offset);
diff --git a/tools/objtool/elf.h b/tools/objtool/include/objtool/elf.h
index e6890cc70a25..e6890cc70a25 100644
--- a/tools/objtool/elf.h
+++ b/tools/objtool/include/objtool/elf.h
diff --git a/tools/objtool/include/objtool/endianness.h b/tools/objtool/include/objtool/endianness.h
new file mode 100644
index 000000000000..10241341eff3
--- /dev/null
+++ b/tools/objtool/include/objtool/endianness.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _OBJTOOL_ENDIANNESS_H
+#define _OBJTOOL_ENDIANNESS_H
+
+#include <arch/endianness.h>
+#include <linux/kernel.h>
+#include <endian.h>
+
+#ifndef __TARGET_BYTE_ORDER
+#error undefined arch __TARGET_BYTE_ORDER
+#endif
+
+#if __BYTE_ORDER != __TARGET_BYTE_ORDER
+#define __NEED_BSWAP 1
+#else
+#define __NEED_BSWAP 0
+#endif
+
+/*
+ * Does a byte swap if target endianness doesn't match the host, i.e. cross
+ * compilation for little endian on big endian and vice versa.
+ * To be used for multi-byte values conversion, which are read from / about
+ * to be written to a target native endianness ELF file.
+ */
+#define bswap_if_needed(val) \
+({ \
+ __typeof__(val) __ret; \
+ switch (sizeof(val)) { \
+ case 8: __ret = __NEED_BSWAP ? bswap_64(val) : (val); break; \
+ case 4: __ret = __NEED_BSWAP ? bswap_32(val) : (val); break; \
+ case 2: __ret = __NEED_BSWAP ? bswap_16(val) : (val); break; \
+ default: \
+ BUILD_BUG(); break; \
+ } \
+ __ret; \
+})
+
+#endif /* _OBJTOOL_ENDIANNESS_H */
diff --git a/tools/objtool/objtool.h b/tools/objtool/include/objtool/objtool.h
index 4125d4578b23..e68e37476c15 100644
--- a/tools/objtool/objtool.h
+++ b/tools/objtool/include/objtool/objtool.h
@@ -10,7 +10,7 @@
#include <linux/list.h>
#include <linux/hashtable.h>
-#include "elf.h"
+#include <objtool/elf.h>
#define __weak __attribute__((weak))
@@ -19,6 +19,7 @@ struct objtool_file {
struct list_head insn_list;
DECLARE_HASHTABLE(insn_hash, 20);
struct list_head static_call_list;
+ struct list_head mcount_loc_list;
bool ignore_unreachables, c_file, hints, rodata;
};
@@ -26,7 +27,6 @@ struct objtool_file *objtool_open_read(const char *_objname);
int check(struct objtool_file *file);
int orc_dump(const char *objname);
-int create_orc(struct objtool_file *file);
-int create_orc_sections(struct objtool_file *file);
+int orc_create(struct objtool_file *file);
#endif /* _OBJTOOL_H */
diff --git a/tools/objtool/special.h b/tools/objtool/include/objtool/special.h
index abddf38ef334..8a09f4e9d480 100644
--- a/tools/objtool/special.h
+++ b/tools/objtool/include/objtool/special.h
@@ -7,8 +7,8 @@
#define _SPECIAL_H
#include <stdbool.h>
-#include "check.h"
-#include "elf.h"
+#include <objtool/check.h>
+#include <objtool/elf.h>
#define C_JUMP_TABLE_SECTION ".rodata..c_jump_table"
diff --git a/tools/objtool/warn.h b/tools/objtool/include/objtool/warn.h
index 7799f60de80a..d99c4675e4a5 100644
--- a/tools/objtool/warn.h
+++ b/tools/objtool/include/objtool/warn.h
@@ -11,7 +11,7 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
-#include "elf.h"
+#include <objtool/elf.h>
extern const char *objname;
diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c
index 9df0cd86d310..7b97ce499405 100644
--- a/tools/objtool/objtool.c
+++ b/tools/objtool/objtool.c
@@ -21,9 +21,9 @@
#include <subcmd/pager.h>
#include <linux/kernel.h>
-#include "builtin.h"
-#include "objtool.h"
-#include "warn.h"
+#include <objtool/builtin.h>
+#include <objtool/objtool.h>
+#include <objtool/warn.h>
struct cmd_struct {
const char *name;
@@ -62,6 +62,7 @@ struct objtool_file *objtool_open_read(const char *_objname)
INIT_LIST_HEAD(&file.insn_list);
hash_init(file.insn_hash);
INIT_LIST_HEAD(&file.static_call_list);
+ INIT_LIST_HEAD(&file.mcount_loc_list);
file.c_file = !vmlinux && find_section_by_name(file.elf, ".comment");
file.ignore_unreachables = no_unreachable;
file.hints = false;
diff --git a/tools/objtool/orc_dump.c b/tools/objtool/orc_dump.c
index 5e6a95368d35..f5a8508c42d6 100644
--- a/tools/objtool/orc_dump.c
+++ b/tools/objtool/orc_dump.c
@@ -6,8 +6,9 @@
#include <unistd.h>
#include <linux/objtool.h>
#include <asm/orc_types.h>
-#include "objtool.h"
-#include "warn.h"
+#include <objtool/objtool.h>
+#include <objtool/warn.h>
+#include <objtool/endianness.h>
static const char *reg_name(unsigned int reg)
{
@@ -54,7 +55,7 @@ static void print_reg(unsigned int reg, int offset)
if (reg == ORC_REG_BP_INDIRECT)
printf("(bp%+d)", offset);
else if (reg == ORC_REG_SP_INDIRECT)
- printf("(sp%+d)", offset);
+ printf("(sp)%+d", offset);
else if (reg == ORC_REG_UNDEFINED)
printf("(und)");
else
@@ -197,11 +198,11 @@ int orc_dump(const char *_objname)
printf(" sp:");
- print_reg(orc[i].sp_reg, orc[i].sp_offset);
+ print_reg(orc[i].sp_reg, bswap_if_needed(orc[i].sp_offset));
printf(" bp:");
- print_reg(orc[i].bp_reg, orc[i].bp_offset);
+ print_reg(orc[i].bp_reg, bswap_if_needed(orc[i].bp_offset));
printf(" type:%s end:%d\n",
orc_type_name(orc[i].type), orc[i].end);
diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c
index 9ce68b385a1b..738aa5021bc4 100644
--- a/tools/objtool/orc_gen.c
+++ b/tools/objtool/orc_gen.c
@@ -9,93 +9,91 @@
#include <linux/objtool.h>
#include <asm/orc_types.h>
-#include "check.h"
-#include "warn.h"
+#include <objtool/check.h>
+#include <objtool/warn.h>
+#include <objtool/endianness.h>
-int create_orc(struct objtool_file *file)
+static int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi)
{
- struct instruction *insn;
+ struct instruction *insn = container_of(cfi, struct instruction, cfi);
+ struct cfi_reg *bp = &cfi->regs[CFI_BP];
- for_each_insn(file, insn) {
- struct orc_entry *orc = &insn->orc;
- struct cfi_reg *cfa = &insn->cfi.cfa;
- struct cfi_reg *bp = &insn->cfi.regs[CFI_BP];
+ memset(orc, 0, sizeof(*orc));
- if (!insn->sec->text)
- continue;
-
- orc->end = insn->cfi.end;
-
- if (cfa->base == CFI_UNDEFINED) {
- orc->sp_reg = ORC_REG_UNDEFINED;
- continue;
- }
+ orc->end = cfi->end;
- switch (cfa->base) {
- case CFI_SP:
- orc->sp_reg = ORC_REG_SP;
- break;
- case CFI_SP_INDIRECT:
- orc->sp_reg = ORC_REG_SP_INDIRECT;
- break;
- case CFI_BP:
- orc->sp_reg = ORC_REG_BP;
- break;
- case CFI_BP_INDIRECT:
- orc->sp_reg = ORC_REG_BP_INDIRECT;
- break;
- case CFI_R10:
- orc->sp_reg = ORC_REG_R10;
- break;
- case CFI_R13:
- orc->sp_reg = ORC_REG_R13;
- break;
- case CFI_DI:
- orc->sp_reg = ORC_REG_DI;
- break;
- case CFI_DX:
- orc->sp_reg = ORC_REG_DX;
- break;
- default:
- WARN_FUNC("unknown CFA base reg %d",
- insn->sec, insn->offset, cfa->base);
- return -1;
- }
+ if (cfi->cfa.base == CFI_UNDEFINED) {
+ orc->sp_reg = ORC_REG_UNDEFINED;
+ return 0;
+ }
- switch(bp->base) {
- case CFI_UNDEFINED:
- orc->bp_reg = ORC_REG_UNDEFINED;
- break;
- case CFI_CFA:
- orc->bp_reg = ORC_REG_PREV_SP;
- break;
- case CFI_BP:
- orc->bp_reg = ORC_REG_BP;
- break;
- default:
- WARN_FUNC("unknown BP base reg %d",
- insn->sec, insn->offset, bp->base);
- return -1;
- }
+ switch (cfi->cfa.base) {
+ case CFI_SP:
+ orc->sp_reg = ORC_REG_SP;
+ break;
+ case CFI_SP_INDIRECT:
+ orc->sp_reg = ORC_REG_SP_INDIRECT;
+ break;
+ case CFI_BP:
+ orc->sp_reg = ORC_REG_BP;
+ break;
+ case CFI_BP_INDIRECT:
+ orc->sp_reg = ORC_REG_BP_INDIRECT;
+ break;
+ case CFI_R10:
+ orc->sp_reg = ORC_REG_R10;
+ break;
+ case CFI_R13:
+ orc->sp_reg = ORC_REG_R13;
+ break;
+ case CFI_DI:
+ orc->sp_reg = ORC_REG_DI;
+ break;
+ case CFI_DX:
+ orc->sp_reg = ORC_REG_DX;
+ break;
+ default:
+ WARN_FUNC("unknown CFA base reg %d",
+ insn->sec, insn->offset, cfi->cfa.base);
+ return -1;
+ }
- orc->sp_offset = cfa->offset;
- orc->bp_offset = bp->offset;
- orc->type = insn->cfi.type;
+ switch (bp->base) {
+ case CFI_UNDEFINED:
+ orc->bp_reg = ORC_REG_UNDEFINED;
+ break;
+ case CFI_CFA:
+ orc->bp_reg = ORC_REG_PREV_SP;
+ break;
+ case CFI_BP:
+ orc->bp_reg = ORC_REG_BP;
+ break;
+ default:
+ WARN_FUNC("unknown BP base reg %d",
+ insn->sec, insn->offset, bp->base);
+ return -1;
}
+ orc->sp_offset = cfi->cfa.offset;
+ orc->bp_offset = bp->offset;
+ orc->type = cfi->type;
+
return 0;
}
-static int create_orc_entry(struct elf *elf, struct section *u_sec, struct section *ip_relocsec,
- unsigned int idx, struct section *insn_sec,
- unsigned long insn_off, struct orc_entry *o)
+static int write_orc_entry(struct elf *elf, struct section *orc_sec,
+ struct section *ip_rsec, unsigned int idx,
+ struct section *insn_sec, unsigned long insn_off,
+ struct orc_entry *o)
{
struct orc_entry *orc;
struct reloc *reloc;
/* populate ORC data */
- orc = (struct orc_entry *)u_sec->data->d_buf + idx;
+ orc = (struct orc_entry *)orc_sec->data->d_buf + idx;
memcpy(orc, o, sizeof(*orc));
+ orc->sp_offset = bswap_if_needed(orc->sp_offset);
+ orc->bp_offset = bswap_if_needed(orc->bp_offset);
/* populate reloc for ip */
reloc = malloc(sizeof(*reloc));
@@ -114,102 +112,149 @@ static int create_orc_entry(struct elf *elf, struct section *u_sec, struct secti
reloc->type = R_X86_64_PC32;
reloc->offset = idx * sizeof(int);
- reloc->sec = ip_relocsec;
+ reloc->sec = ip_rsec;
elf_add_reloc(elf, reloc);
return 0;
}
-int create_orc_sections(struct objtool_file *file)
-{
- struct instruction *insn, *prev_insn;
- struct section *sec, *u_sec, *ip_relocsec;
- unsigned int idx;
+struct orc_list_entry {
+ struct list_head list;
+ struct orc_entry orc;
+ struct section *insn_sec;
+ unsigned long insn_off;
+};
- struct orc_entry empty = {
- .sp_reg = ORC_REG_UNDEFINED,
- .bp_reg = ORC_REG_UNDEFINED,
- .type = UNWIND_HINT_TYPE_CALL,
- };
+static int orc_list_add(struct list_head *orc_list, struct orc_entry *orc,
+ struct section *sec, unsigned long offset)
+{
+ struct orc_list_entry *entry = malloc(sizeof(*entry));
- sec = find_section_by_name(file->elf, ".orc_unwind");
- if (sec) {
- WARN("file already has .orc_unwind section, skipping");
+ if (!entry) {
+ WARN("malloc failed");
return -1;
}
- /* count the number of needed orcs */
- idx = 0;
- for_each_sec(file, sec) {
- if (!sec->text)
- continue;
-
- prev_insn = NULL;
- sec_for_each_insn(file, sec, insn) {
- if (!prev_insn ||
- memcmp(&insn->orc, &prev_insn->orc,
- sizeof(struct orc_entry))) {
- idx++;
- }
- prev_insn = insn;
- }
-
- /* section terminator */
- if (prev_insn)
- idx++;
- }
- if (!idx)
- return -1;
+ entry->orc = *orc;
+ entry->insn_sec = sec;
+ entry->insn_off = offset;
+ list_add_tail(&entry->list, orc_list);
+ return 0;
+}
- /* create .orc_unwind_ip and .rela.orc_unwind_ip sections */
- sec = elf_create_section(file->elf, ".orc_unwind_ip", 0, sizeof(int), idx);
- if (!sec)
- return -1;
+static unsigned long alt_group_len(struct alt_group *alt_group)
+{
+ return alt_group->last_insn->offset +
+ alt_group->last_insn->len -
+ alt_group->first_insn->offset;
+}
- ip_relocsec = elf_create_reloc_section(file->elf, sec, SHT_RELA);
- if (!ip_relocsec)
- return -1;
+int orc_create(struct objtool_file *file)
+{
+ struct section *sec, *ip_rsec, *orc_sec;
+ unsigned int nr = 0, idx = 0;
+ struct orc_list_entry *entry;
+ struct list_head orc_list;
- /* create .orc_unwind section */
- u_sec = elf_create_section(file->elf, ".orc_unwind", 0,
- sizeof(struct orc_entry), idx);
+ struct orc_entry null = {
+ .sp_reg = ORC_REG_UNDEFINED,
+ .bp_reg = ORC_REG_UNDEFINED,
+ .type = UNWIND_HINT_TYPE_CALL,
+ };
- /* populate sections */
- idx = 0;
+ /* Build a deduplicated list of ORC entries: */
+ INIT_LIST_HEAD(&orc_list);
for_each_sec(file, sec) {
+ struct orc_entry orc, prev_orc = {0};
+ struct instruction *insn;
+ bool empty = true;
+
if (!sec->text)
continue;
- prev_insn = NULL;
sec_for_each_insn(file, sec, insn) {
- if (!prev_insn || memcmp(&insn->orc, &prev_insn->orc,
- sizeof(struct orc_entry))) {
+ struct alt_group *alt_group = insn->alt_group;
+ int i;
- if (create_orc_entry(file->elf, u_sec, ip_relocsec, idx,
- insn->sec, insn->offset,
- &insn->orc))
+ if (!alt_group) {
+ if (init_orc_entry(&orc, &insn->cfi))
return -1;
+ if (!memcmp(&prev_orc, &orc, sizeof(orc)))
+ continue;
+ if (orc_list_add(&orc_list, &orc, sec,
+ insn->offset))
+ return -1;
+ nr++;
+ prev_orc = orc;
+ empty = false;
+ continue;
+ }
- idx++;
+ /*
+ * Alternatives can have different stack layout
+ * possibilities (but they shouldn't conflict).
+ * Instead of traversing the instructions, use the
+ * alt_group's flattened byte-offset-addressed CFI
+ * array.
+ */
+ for (i = 0; i < alt_group_len(alt_group); i++) {
+ struct cfi_state *cfi = alt_group->cfi[i];
+ if (!cfi)
+ continue;
+ if (init_orc_entry(&orc, cfi))
+ return -1;
+ if (!memcmp(&prev_orc, &orc, sizeof(orc)))
+ continue;
+ if (orc_list_add(&orc_list, &orc, insn->sec,
+ insn->offset + i))
+ return -1;
+ nr++;
+ prev_orc = orc;
+ empty = false;
}
- prev_insn = insn;
- }
- /* section terminator */
- if (prev_insn) {
- if (create_orc_entry(file->elf, u_sec, ip_relocsec, idx,
- prev_insn->sec,
- prev_insn->offset + prev_insn->len,
- &empty))
- return -1;
+ /* Skip to the end of the alt_group */
+ insn = alt_group->last_insn;
+ }
- idx++;
+ /* Add a section terminator */
+ if (!empty) {
+ orc_list_add(&orc_list, &null, sec, sec->len);
+ nr++;
}
}
+ if (!nr)
+ return 0;
+
+ /* Create .orc_unwind, .orc_unwind_ip and .rela.orc_unwind_ip sections: */
+ sec = find_section_by_name(file->elf, ".orc_unwind");
+ if (sec) {
+ WARN("file already has .orc_unwind section, skipping");
+ return -1;
+ }
+ orc_sec = elf_create_section(file->elf, ".orc_unwind", 0,
+ sizeof(struct orc_entry), nr);
+ if (!orc_sec)
+ return -1;
+
+ sec = elf_create_section(file->elf, ".orc_unwind_ip", 0, sizeof(int), nr);
+ if (!sec)
+ return -1;
+ ip_rsec = elf_create_reloc_section(file->elf, sec, SHT_RELA);
+ if (!ip_rsec)
+ return -1;
+
+ /* Write ORC entries to sections: */
+ list_for_each_entry(entry, &orc_list, list) {
+ if (write_orc_entry(file->elf, orc_sec, ip_rsec, idx++,
+ entry->insn_sec, entry->insn_off,
+ &entry->orc))
+ return -1;
+ }
- if (elf_rebuild_reloc_section(file->elf, ip_relocsec))
+ if (elf_rebuild_reloc_section(file->elf, ip_rsec))
return -1;
return 0;
diff --git a/tools/objtool/special.c b/tools/objtool/special.c
index 1a2420febd08..2c7fbda7b055 100644
--- a/tools/objtool/special.c
+++ b/tools/objtool/special.c
@@ -11,10 +11,11 @@
#include <stdlib.h>
#include <string.h>
-#include "builtin.h"
-#include "special.h"
-#include "warn.h"
-#include "arch_special.h"
+#include <arch/special.h>
+#include <objtool/builtin.h>
+#include <objtool/special.h>
+#include <objtool/warn.h>
+#include <objtool/endianness.h>
struct special_entry {
const char *sec;
@@ -77,8 +78,9 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry,
if (entry->feature) {
unsigned short feature;
- feature = *(unsigned short *)(sec->data->d_buf + offset +
- entry->feature);
+ feature = bswap_if_needed(*(unsigned short *)(sec->data->d_buf +
+ offset +
+ entry->feature));
arch_handle_alternative(feature, alt);
}
diff --git a/tools/objtool/weak.c b/tools/objtool/weak.c
index 7843e9a7a72f..8314e824db4a 100644
--- a/tools/objtool/weak.c
+++ b/tools/objtool/weak.c
@@ -7,7 +7,7 @@
#include <stdbool.h>
#include <errno.h>
-#include "objtool.h"
+#include <objtool/objtool.h>
#define UNSUPPORTED(name) \
({ \
@@ -25,12 +25,7 @@ int __weak orc_dump(const char *_objname)
UNSUPPORTED("orc");
}
-int __weak create_orc(struct objtool_file *file)
-{
- UNSUPPORTED("orc");
-}
-
-int __weak create_orc_sections(struct objtool_file *file)
+int __weak orc_create(struct objtool_file *file)
{
UNSUPPORTED("orc");
}
diff --git a/tools/perf/Build b/tools/perf/Build
index 5f392dbb88fc..db61dbe2b543 100644
--- a/tools/perf/Build
+++ b/tools/perf/Build
@@ -24,6 +24,7 @@ perf-y += builtin-mem.o
perf-y += builtin-data.o
perf-y += builtin-version.o
perf-y += builtin-c2c.o
+perf-y += builtin-daemon.o
perf-$(CONFIG_TRACE) += builtin-trace.o
perf-$(CONFIG_LIBELF) += builtin-probe.o
diff --git a/tools/perf/Documentation/examples.txt b/tools/perf/Documentation/examples.txt
index a4e392156488..c0d22fbe9201 100644
--- a/tools/perf/Documentation/examples.txt
+++ b/tools/perf/Documentation/examples.txt
@@ -3,7 +3,7 @@
****** perf by examples ******
------------------------------
-[ From an e-mail by Ingo Molnar, http://lkml.org/lkml/2009/8/4/346 ]
+[ From an e-mail by Ingo Molnar, https://lore.kernel.org/lkml/20090804195717.GA5998@elte.hu ]
First, discovery/enumeration of available counters can be done via
diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt
index 079cdfabb352..0f1005209a2b 100644
--- a/tools/perf/Documentation/itrace.txt
+++ b/tools/perf/Documentation/itrace.txt
@@ -4,7 +4,7 @@
r synthesize branches events (returns only)
x synthesize transactions events
w synthesize ptwrite events
- p synthesize power events
+ p synthesize power events (incl. PSB events for Intel PT)
o synthesize other events recorded due to the use
of aux-output (refer to perf record)
e synthesize error events
diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt
index f6de0952ff3c..bb167e32a1d7 100644
--- a/tools/perf/Documentation/perf-buildid-cache.txt
+++ b/tools/perf/Documentation/perf-buildid-cache.txt
@@ -74,6 +74,12 @@ OPTIONS
used when creating a uprobe for a process that resides in a
different mount namespace from the perf(1) utility.
+--debuginfod=URLs::
+ Specify debuginfod URL to be used when retrieving perf.data binaries,
+ it follows the same syntax as the DEBUGINFOD_URLS variable, like:
+
+ buildid-cache.debuginfod=http://192.168.122.174:8002
+
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-buildid-list[1]
diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt
index 5c379adf8304..153bde14bbe0 100644
--- a/tools/perf/Documentation/perf-config.txt
+++ b/tools/perf/Documentation/perf-config.txt
@@ -238,6 +238,13 @@ buildid.*::
cache location, or to disable it altogether. If you want to disable it,
set buildid.dir to /dev/null. The default is $HOME/.debug
+buildid-cache.*::
+ buildid-cache.debuginfod=URLs
+ Specify debuginfod URLs to be used when retrieving perf.data binaries,
+ it follows the same syntax as the DEBUGINFOD_URLS variable, like:
+
+ buildid-cache.debuginfod=http://192.168.122.174:8002
+
annotate.*::
These are in control of addresses, jump function, source code
in lines of assembly code from a specific program.
@@ -552,11 +559,12 @@ kmem.*::
record.*::
record.build-id::
- This option can be 'cache', 'no-cache' or 'skip'.
+ This option can be 'cache', 'no-cache', 'skip' or 'mmap'.
'cache' is to post-process data and save/update the binaries into
the build-id cache (in ~/.debug). This is the default.
But if this option is 'no-cache', it will not update the build-id cache.
'skip' skips post-processing and does not update the cache.
+ 'mmap' skips post-processing and reads build-ids from MMAP events.
record.call-graph::
This is identical to 'call-graph.record-mode', except it is
@@ -695,6 +703,20 @@ auxtrace.*::
If the directory does not exist or has the wrong file type,
the current directory is used.
+daemon.*::
+
+ daemon.base::
+ Base path for daemon data. All sessions data are stored under
+ this path.
+
+session-<NAME>.*::
+
+ session-<NAME>.run::
+
+ Defines new record session for daemon. The value is record's
+ command line without the 'record' keyword.
+
+
SEE ALSO
--------
linkperf:perf[1]
diff --git a/tools/perf/Documentation/perf-daemon.txt b/tools/perf/Documentation/perf-daemon.txt
new file mode 100644
index 000000000000..f558f8e4bc9b
--- /dev/null
+++ b/tools/perf/Documentation/perf-daemon.txt
@@ -0,0 +1,208 @@
+perf-daemon(1)
+==============
+
+
+NAME
+----
+perf-daemon - Run record sessions on background
+
+
+SYNOPSIS
+--------
+[verse]
+'perf daemon'
+'perf daemon' [<options>]
+'perf daemon start' [<options>]
+'perf daemon stop' [<options>]
+'perf daemon signal' [<options>]
+'perf daemon ping' [<options>]
+
+
+DESCRIPTION
+-----------
+This command allows to run simple daemon process that starts and
+monitors configured record sessions.
+
+You can imagine 'perf daemon' of background process with several
+'perf record' child tasks, like:
+
+ # ps axjf
+ ...
+ 1 916507 ... perf daemon start
+ 916507 916508 ... \_ perf record --control=fifo:control,ack -m 10M -e cycles --overwrite --switch-output -a
+ 916507 916509 ... \_ perf record --control=fifo:control,ack -m 20M -e sched:* --overwrite --switch-output -a
+
+Not every 'perf record' session is suitable for running under daemon.
+User need perf session that either produces data on query, like the
+flight recorder sessions in above example or session that is configured
+to produce data periodically, like with --switch-output configuration
+for time and size.
+
+Each session is started with control setup (with perf record --control
+options).
+
+Sessions are configured through config file, see CONFIG FILE section
+with EXAMPLES.
+
+
+OPTIONS
+-------
+-v::
+--verbose::
+ Be more verbose.
+
+--config=<PATH>::
+ Config file path. If not provided, perf will check system and default
+ locations (/etc/perfconfig, $HOME/.perfconfig).
+
+--base=<PATH>::
+ Base directory path. Each daemon instance is running on top
+ of base directory. Only one instance of server can run on
+ top of one directory at the time.
+
+All generic options are available also under commands.
+
+
+START COMMAND
+-------------
+The start command creates the daemon process.
+
+-f::
+--foreground::
+ Do not put the process in background.
+
+
+STOP COMMAND
+------------
+The stop command stops all the session and the daemon process.
+
+
+SIGNAL COMMAND
+--------------
+The signal command sends signal to configured sessions.
+
+--session::
+ Send signal to specific session.
+
+
+PING COMMAND
+------------
+The ping command sends control ping to configured sessions.
+
+--session::
+ Send ping to specific session.
+
+
+CONFIG FILE
+-----------
+The daemon is configured within standard perf config file by
+following new variables:
+
+daemon.base:
+ Base path for daemon data. All sessions data are
+ stored under this path.
+
+session-<NAME>.run:
+ Defines new record session. The value is record's command
+ line without the 'record' keyword.
+
+Each perf record session is run in daemon.base/<NAME> directory.
+
+
+EXAMPLES
+--------
+Example with 2 record sessions:
+
+ # cat ~/.perfconfig
+ [daemon]
+ base=/opt/perfdata
+
+ [session-cycles]
+ run = -m 10M -e cycles --overwrite --switch-output -a
+
+ [session-sched]
+ run = -m 20M -e sched:* --overwrite --switch-output -a
+
+
+Starting the daemon:
+
+ # perf daemon start
+
+
+Check sessions:
+
+ # perf daemon
+ [603349:daemon] base: /opt/perfdata
+ [603350:cycles] perf record -m 10M -e cycles --overwrite --switch-output -a
+ [603351:sched] perf record -m 20M -e sched:* --overwrite --switch-output -a
+
+First line is daemon process info with configured daemon base.
+
+
+Check sessions with more info:
+
+ # perf daemon -v
+ [603349:daemon] base: /opt/perfdata
+ output: /opt/perfdata/output
+ lock: /opt/perfdata/lock
+ up: 1 minutes
+ [603350:cycles] perf record -m 10M -e cycles --overwrite --switch-output -a
+ base: /opt/perfdata/session-cycles
+ output: /opt/perfdata/session-cycles/output
+ control: /opt/perfdata/session-cycles/control
+ ack: /opt/perfdata/session-cycles/ack
+ up: 1 minutes
+ [603351:sched] perf record -m 20M -e sched:* --overwrite --switch-output -a
+ base: /opt/perfdata/session-sched
+ output: /opt/perfdata/session-sched/output
+ control: /opt/perfdata/session-sched/control
+ ack: /opt/perfdata/session-sched/ack
+ up: 1 minutes
+
+The 'base' path is daemon/session base.
+The 'lock' file is daemon's lock file guarding that no other
+daemon is running on top of the base.
+The 'output' file is perf record output for specific session.
+The 'control' and 'ack' files are perf control files.
+The 'up' number shows minutes daemon/session is running.
+
+
+Make sure control session is online:
+
+ # perf daemon ping
+ OK cycles
+ OK sched
+
+
+Send USR2 signal to session 'cycles' to generate perf.data file:
+
+ # perf daemon signal --session cycles
+ signal 12 sent to session 'cycles [603452]'
+
+ # tail -2 /opt/perfdata/session-cycles/output
+ [ perf record: dump data: Woken up 1 times ]
+ [ perf record: Dump perf.data.2020123017013149 ]
+
+
+Send USR2 signal to all sessions:
+
+ # perf daemon signal
+ signal 12 sent to session 'cycles [603452]'
+ signal 12 sent to session 'sched [603453]'
+
+ # tail -2 /opt/perfdata/session-cycles/output
+ [ perf record: dump data: Woken up 1 times ]
+ [ perf record: Dump perf.data.2020123017024689 ]
+ # tail -2 /opt/perfdata/session-sched/output
+ [ perf record: dump data: Woken up 1 times ]
+ [ perf record: Dump perf.data.2020123017024713 ]
+
+
+Stop daemon:
+
+ # perf daemon stop
+
+
+SEE ALSO
+--------
+linkperf:perf-record[1], linkperf:perf-config[1]
diff --git a/tools/perf/Documentation/perf-evlist.txt b/tools/perf/Documentation/perf-evlist.txt
index c0a66400a960..9af8b8dfb7b6 100644
--- a/tools/perf/Documentation/perf-evlist.txt
+++ b/tools/perf/Documentation/perf-evlist.txt
@@ -29,7 +29,7 @@ OPTIONS
Show just the sample frequency used for each event.
-v::
---verbose=::
+--verbose::
Show all fields.
-g::
diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt
index 1e91121bac0f..6e82b7cc0bf0 100644
--- a/tools/perf/Documentation/perf-ftrace.txt
+++ b/tools/perf/Documentation/perf-ftrace.txt
@@ -28,8 +28,8 @@ OPTIONS
specified: function_graph or function.
-v::
---verbose=::
- Verbosity level.
+--verbose::
+ Increase the verbosity level.
-F::
--funcs::
diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt
index cd362dc2af07..1dcec73c910c 100644
--- a/tools/perf/Documentation/perf-intel-pt.txt
+++ b/tools/perf/Documentation/perf-intel-pt.txt
@@ -858,7 +858,7 @@ The letters are:
b synthesize "branches" events
x synthesize "transactions" events
w synthesize "ptwrite" events
- p synthesize "power" events
+ p synthesize "power" events (incl. PSB events)
c synthesize branches events (calls only)
r synthesize branches events (returns only)
e synthesize tracing error events
@@ -913,6 +913,11 @@ Where:
For more details refer to the Intel 64 and IA-32 Architectures Software
Developer Manuals.
+PSB events show when a PSB+ occurred and also the byte-offset in the trace.
+Emitting a PSB+ can cause a CPU a slight delay. When doing timing analysis
+of code with Intel PT, it is useful to know if a timing bubble was caused
+by Intel PT or not.
+
Error events show where the decoder lost the trace. Error events
are quite important. Users must know if what they are seeing is a complete
picture or not. The "e" option may be followed by flags which affect what errors
@@ -1141,6 +1146,88 @@ XED
include::build-xed.txt[]
+
+Tracing Virtual Machines
+------------------------
+
+Currently, only kernel tracing is supported and only with "timeless" decoding
+i.e. no TSC timestamps
+
+Other limitations and caveats
+
+ VMX controls may suppress packets needed for decoding resulting in decoding errors
+ VMX controls may block the perf NMI to the host potentially resulting in lost trace data
+ Guest kernel self-modifying code (e.g. jump labels or JIT-compiled eBPF) will result in decoding errors
+ Guest thread information is unknown
+ Guest VCPU is unknown but may be able to be inferred from the host thread
+ Callchains are not supported
+
+Example
+
+Start VM
+
+ $ sudo virsh start kubuntu20.04
+ Domain kubuntu20.04 started
+
+Mount the guest file system. Note sshfs needs -o direct_io to enable reading of proc files. root access is needed to read /proc/kcore.
+
+ $ mkdir vm0
+ $ sshfs -o direct_io root@vm0:/ vm0
+
+Copy the guest /proc/kallsyms, /proc/modules and /proc/kcore
+
+ $ perf buildid-cache -v --kcore vm0/proc/kcore
+ kcore added to build-id cache directory /home/user/.debug/[kernel.kcore]/9600f316a53a0f54278885e8d9710538ec5f6a08/2021021807494306
+ $ KALLSYMS=/home/user/.debug/[kernel.kcore]/9600f316a53a0f54278885e8d9710538ec5f6a08/2021021807494306/kallsyms
+
+Find the VM process
+
+ $ ps -eLl | grep 'KVM\|PID'
+ F S UID PID PPID LWP C PRI NI ADDR SZ WCHAN TTY TIME CMD
+ 3 S 64055 1430 1 1440 1 80 0 - 1921718 - ? 00:02:47 CPU 0/KVM
+ 3 S 64055 1430 1 1441 1 80 0 - 1921718 - ? 00:02:41 CPU 1/KVM
+ 3 S 64055 1430 1 1442 1 80 0 - 1921718 - ? 00:02:38 CPU 2/KVM
+ 3 S 64055 1430 1 1443 2 80 0 - 1921718 - ? 00:03:18 CPU 3/KVM
+
+Start an open-ended perf record, tracing the VM process, do something on the VM, and then ctrl-C to stop.
+TSC is not supported and tsc=0 must be specified. That means mtc is useless, so add mtc=0.
+However, IPC can still be determined, hence cyc=1 can be added.
+Only kernel decoding is supported, so 'k' must be specified.
+Intel PT traces both the host and the guest so --guest and --host need to be specified.
+Without timestamps, --per-thread must be specified to distinguish threads.
+
+ $ sudo perf kvm --guest --host --guestkallsyms $KALLSYMS record --kcore -e intel_pt/tsc=0,mtc=0,cyc=1/k -p 1430 --per-thread
+ ^C
+ [ perf record: Woken up 1 times to write data ]
+ [ perf record: Captured and wrote 5.829 MB ]
+
+perf script can be used to provide an instruction trace
+
+ $ perf script --guestkallsyms $KALLSYMS --insn-trace --xed -F+ipc | grep -C10 vmresume | head -21
+ CPU 0/KVM 1440 ffffffff82133cdd __vmx_vcpu_run+0x3d ([kernel.kallsyms]) movq 0x48(%rax), %r9
+ CPU 0/KVM 1440 ffffffff82133ce1 __vmx_vcpu_run+0x41 ([kernel.kallsyms]) movq 0x50(%rax), %r10
+ CPU 0/KVM 1440 ffffffff82133ce5 __vmx_vcpu_run+0x45 ([kernel.kallsyms]) movq 0x58(%rax), %r11
+ CPU 0/KVM 1440 ffffffff82133ce9 __vmx_vcpu_run+0x49 ([kernel.kallsyms]) movq 0x60(%rax), %r12
+ CPU 0/KVM 1440 ffffffff82133ced __vmx_vcpu_run+0x4d ([kernel.kallsyms]) movq 0x68(%rax), %r13
+ CPU 0/KVM 1440 ffffffff82133cf1 __vmx_vcpu_run+0x51 ([kernel.kallsyms]) movq 0x70(%rax), %r14
+ CPU 0/KVM 1440 ffffffff82133cf5 __vmx_vcpu_run+0x55 ([kernel.kallsyms]) movq 0x78(%rax), %r15
+ CPU 0/KVM 1440 ffffffff82133cf9 __vmx_vcpu_run+0x59 ([kernel.kallsyms]) movq (%rax), %rax
+ CPU 0/KVM 1440 ffffffff82133cfc __vmx_vcpu_run+0x5c ([kernel.kallsyms]) callq 0xffffffff82133c40
+ CPU 0/KVM 1440 ffffffff82133c40 vmx_vmenter+0x0 ([kernel.kallsyms]) jz 0xffffffff82133c46
+ CPU 0/KVM 1440 ffffffff82133c42 vmx_vmenter+0x2 ([kernel.kallsyms]) vmresume IPC: 0.11 (50/445)
+ :1440 1440 ffffffffbb678b06 native_write_msr+0x6 ([guest.kernel.kallsyms]) nopl %eax, (%rax,%rax,1)
+ :1440 1440 ffffffffbb678b0b native_write_msr+0xb ([guest.kernel.kallsyms]) retq IPC: 0.04 (2/41)
+ :1440 1440 ffffffffbb666646 lapic_next_deadline+0x26 ([guest.kernel.kallsyms]) data16 nop
+ :1440 1440 ffffffffbb666648 lapic_next_deadline+0x28 ([guest.kernel.kallsyms]) xor %eax, %eax
+ :1440 1440 ffffffffbb66664a lapic_next_deadline+0x2a ([guest.kernel.kallsyms]) popq %rbp
+ :1440 1440 ffffffffbb66664b lapic_next_deadline+0x2b ([guest.kernel.kallsyms]) retq IPC: 0.16 (4/25)
+ :1440 1440 ffffffffbb74607f clockevents_program_event+0x8f ([guest.kernel.kallsyms]) test %eax, %eax
+ :1440 1440 ffffffffbb746081 clockevents_program_event+0x91 ([guest.kernel.kallsyms]) jz 0xffffffffbb74603c IPC: 0.06 (2/30)
+ :1440 1440 ffffffffbb74603c clockevents_program_event+0x4c ([guest.kernel.kallsyms]) popq %rbx
+ :1440 1440 ffffffffbb74603d clockevents_program_event+0x4d ([guest.kernel.kallsyms]) popq %r12
+
+
+
SEE ALSO
--------
diff --git a/tools/perf/Documentation/perf-kallsyms.txt b/tools/perf/Documentation/perf-kallsyms.txt
index f3c620951f6e..c97527df8ecd 100644
--- a/tools/perf/Documentation/perf-kallsyms.txt
+++ b/tools/perf/Documentation/perf-kallsyms.txt
@@ -20,5 +20,5 @@ modules).
OPTIONS
-------
-v::
---verbose=::
+--verbose::
Increase verbosity level, showing details about symbol table loading, etc.
diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt
index 199ea0f0a6c0..66177511c5c4 100644
--- a/tools/perf/Documentation/perf-mem.txt
+++ b/tools/perf/Documentation/perf-mem.txt
@@ -63,6 +63,9 @@ OPTIONS
--phys-data::
Record/Report sample physical addresses
+--data-page-size::
+ Record/Report sample data address page size
+
RECORD OPTIONS
--------------
-e::
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 34cf651ee237..f3161c9673e9 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -296,6 +296,9 @@ OPTIONS
--data-page-size::
Record the sampled data address data page size.
+--code-page-size::
+ Record the sampled code address (ip) page size
+
-T::
--timestamp::
Record the sample timestamps. Use it with 'perf report -D' to see the
@@ -485,6 +488,9 @@ Specify vmlinux path which has debuginfo.
--buildid-all::
Record build-id of all DSOs regardless whether it's actually hit or not.
+--buildid-mmap::
+Record build ids in mmap2 events, disables build id cache (implies --no-buildid).
+
--aio[=n]::
Use <n> control blocks in asynchronous (Posix AIO) trace writing mode (default: 1, max: 4).
Asynchronous mode is supported only when linking Perf tool with libc library
@@ -640,9 +646,18 @@ ctl-fifo / ack-fifo are opened and used as ctl-fd / ack-fd as follows.
Listen on ctl-fd descriptor for command to control measurement.
Available commands:
- 'enable' : enable events
- 'disable' : disable events
- 'snapshot': AUX area tracing snapshot).
+ 'enable' : enable events
+ 'disable' : disable events
+ 'enable name' : enable event 'name'
+ 'disable name' : disable event 'name'
+ 'snapshot' : AUX area tracing snapshot).
+ 'stop' : stop perf record
+ 'ping' : ping
+
+ 'evlist [-v|-g|-F] : display all events
+ -F Show just the sample frequency used for each event.
+ -v Show all fields.
+ -g Show event group information.
Measurements can be started with events disabled using --delay=-1 option. Optionally
send control command completion ('ack\n') to ack-fd descriptor to synchronize with the
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 8f7f4e9605d8..f546b5e9db05 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -108,6 +108,10 @@ OPTIONS
- period: Raw number of event count of sample
- time: Separate the samples by time stamp with the resolution specified by
--time-quantum (default 100ms). Specify with overhead and before it.
+ - code_page_size: the code page size of sampled code address (ip)
+ - ins_lat: Instruction latency in core cycles. This is the global instruction
+ latency
+ - local_ins_lat: Local instruction latency version
By default, comm, dso and symbol keys are used.
(i.e. --sort comm,dso,symbol)
@@ -139,7 +143,7 @@ OPTIONS
If the --mem-mode option is used, the following sort keys are also available
(incompatible with --branch-stack):
- symbol_daddr, dso_daddr, locked, tlb, mem, snoop, dcacheline.
+ symbol_daddr, dso_daddr, locked, tlb, mem, snoop, dcacheline, blocked.
- symbol_daddr: name of data symbol being executed on at the time of sample
- dso_daddr: name of library or module containing the data being executed
@@ -151,9 +155,11 @@ OPTIONS
- dcacheline: the cacheline the data address is on at the time of the sample
- phys_daddr: physical address of data being executed on at the time of sample
- data_page_size: the data page size of data being executed on at the time of sample
+ - blocked: reason of blocked load access for the data at the time of the sample
And the default sort keys are changed to local_weight, mem, sym, dso,
- symbol_daddr, dso_daddr, snoop, tlb, locked, see '--mem-mode'.
+ symbol_daddr, dso_daddr, snoop, tlb, locked, blocked, local_ins_lat,
+ see '--mem-mode'.
If the data file has tracepoint event(s), following (dynamic) sort keys
are also available:
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 44d37210fc8f..5b8b61075039 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -118,7 +118,7 @@ OPTIONS
comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output,
brstackinsn, brstackoff, callindent, insn, insnlen, synth, phys_addr,
- metric, misc, srccode, ipc, data_page_size.
+ metric, misc, srccode, ipc, data_page_size, code_page_size.
Field list can be prepended with the type, trace, sw or hw,
to indicate to which event type the field list applies.
e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace
@@ -422,9 +422,32 @@ include::itrace.txt[]
Only consider the listed symbols. Symbols are typically a name
but they may also be hexadecimal address.
+ The hexadecimal address may be the start address of a symbol or
+ any other address to filter the trace records
+
For example, to select the symbol noploop or the address 0x4007a0:
perf script --symbols=noploop,0x4007a0
+ Support filtering trace records by symbol name, start address of
+ symbol, any hexadecimal address and address range.
+
+ The comparison order is:
+
+ 1. symbol name comparison
+ 2. symbol start address comparison.
+ 3. any hexadecimal address comparison.
+ 4. address range comparison (see --addr-range).
+
+--addr-range::
+ Use with -S or --symbols to list traced records within address range.
+
+ For example, to list the traced records within the address range
+ [0x4007a0, 0x0x4007a9]:
+ perf script -S 0x4007a0 --addr-range 10
+
+--dsos=::
+ Only consider symbols in these DSOs.
+
--call-trace::
Show call stream for intel_pt traces. The CPUs are interleaved, but
can be filtered with -C.
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 5d4a673d7621..08a1714494f8 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -75,6 +75,24 @@ report::
--tid=<tid>::
stat events on existing thread id (comma separated list)
+-b::
+--bpf-prog::
+ stat events on existing bpf program id (comma separated list),
+ requiring root rights. bpftool-prog could be used to find program
+ id all bpf programs in the system. For example:
+
+ # bpftool prog | head -n 1
+ 17247: tracepoint name sys_enter tag 192d548b9d754067 gpl
+
+ # perf stat -e cycles,instructions --bpf-prog 17247 --timeout 1000
+
+ Performance counter stats for 'BPF program(s) 17247':
+
+ 85,967 cycles
+ 28,982 instructions # 0.34 insn per cycle
+
+ 1.102235068 seconds time elapsed
+
ifdef::HAVE_LIBPFM[]
--pfm-events events::
Select a PMU event using libpfm4 syntax (see http://perfmon2.sf.net)
@@ -358,7 +376,7 @@ See perf list output for the possble metrics and metricgroups.
Do not aggregate counts across all monitored CPUs.
--topdown::
-Print top down level 1 metrics if supported by the CPU. This allows to
+Print complete top-down metrics supported by the CPU. This allows to
determine bottle necks in the CPU pipeline for CPU bound workloads,
by breaking the cycles consumed down into frontend bound, backend bound,
bad speculation and retiring.
@@ -393,6 +411,18 @@ To interpret the results it is usually needed to know on which
CPUs the workload runs on. If needed the CPUs can be forced using
taskset.
+--td-level::
+Print the top-down statistics that equal to or lower than the input level.
+It allows users to print the interested top-down metrics level instead of
+the complete top-down metrics.
+
+The availability of the top-down metrics level depends on the hardware. For
+example, Ice Lake only supports L1 top-down metrics. The Sapphire Rapids
+supports both L1 and L2 top-down metrics.
+
+Default: 0 means the max level that the current hardware support.
+Error out if the input is higher than the supported max level.
+
--no-merge::
Do not merge results from same PMUs.
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index abc9b5d83312..f0da8cf63e9a 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -97,8 +97,8 @@ filter out the startup phase of the program, which is often very different.
Filter out events for these pids and for 'trace' itself (comma separated list).
-v::
---verbose=::
- Verbosity level.
+--verbose::
+ Increase the verbosity level.
--no-inherit::
Child tasks do not inherit counters.
diff --git a/tools/perf/Documentation/topdown.txt b/tools/perf/Documentation/topdown.txt
index 3c39bb3dc5fa..10f07f9455b8 100644
--- a/tools/perf/Documentation/topdown.txt
+++ b/tools/perf/Documentation/topdown.txt
@@ -121,7 +121,7 @@ to read slots and the topdown metrics at different points of the program:
#define RDPMC_METRIC (1 << 29) /* return metric counters */
#define FIXED_COUNTER_SLOTS 3
-#define METRIC_COUNTER_TOPDOWN_L1 0
+#define METRIC_COUNTER_TOPDOWN_L1_L2 0
static inline uint64_t read_slots(void)
{
@@ -130,7 +130,7 @@ static inline uint64_t read_slots(void)
static inline uint64_t read_metrics(void)
{
- return _rdpmc(RDPMC_METRIC | METRIC_COUNTER_TOPDOWN_L1);
+ return _rdpmc(RDPMC_METRIC | METRIC_COUNTER_TOPDOWN_L1_L2);
}
Then the program can be instrumented to read these metrics at different
@@ -152,11 +152,21 @@ The binary ratios in the metric value can be converted to float ratios:
#define GET_METRIC(m, i) (((m) >> (i*8)) & 0xff)
+/* L1 Topdown metric events */
#define TOPDOWN_RETIRING(val) ((float)GET_METRIC(val, 0) / 0xff)
#define TOPDOWN_BAD_SPEC(val) ((float)GET_METRIC(val, 1) / 0xff)
#define TOPDOWN_FE_BOUND(val) ((float)GET_METRIC(val, 2) / 0xff)
#define TOPDOWN_BE_BOUND(val) ((float)GET_METRIC(val, 3) / 0xff)
+/*
+ * L2 Topdown metric events.
+ * Available on Sapphire Rapids and later platforms.
+ */
+#define TOPDOWN_HEAVY_OPS(val) ((float)GET_METRIC(val, 4) / 0xff)
+#define TOPDOWN_BR_MISPREDICT(val) ((float)GET_METRIC(val, 5) / 0xff)
+#define TOPDOWN_FETCH_LAT(val) ((float)GET_METRIC(val, 6) / 0xff)
+#define TOPDOWN_MEM_BOUND(val) ((float)GET_METRIC(val, 7) / 0xff)
+
and then converted to percent for printing.
The ratios in the metric accumulate for the time when the counter
@@ -190,8 +200,8 @@ for that time period.
fe_bound_slots = GET_METRIC(metric_b, 2) * slots_b - fe_bound_slots_a
be_bound_slots = GET_METRIC(metric_b, 3) * slots_b - be_bound_slots_a
-Later the individual ratios for the measurement period can be recreated
-from these counts.
+Later the individual ratios of L1 metric events for the measurement period can
+be recreated from these counts.
slots_delta = slots_b - slots_a
retiring_ratio = (float)retiring_slots / slots_delta
@@ -205,6 +215,48 @@ from these counts.
fe_bound_ratio * 100.,
be_bound_ratio * 100.);
+The individual ratios of L2 metric events for the measurement period can be
+recreated from L1 and L2 metric counters. (Available on Sapphire Rapids and
+later platforms)
+
+ # compute scaled metrics for measurement a
+ heavy_ops_slots_a = GET_METRIC(metric_a, 4) * slots_a
+ br_mispredict_slots_a = GET_METRIC(metric_a, 5) * slots_a
+ fetch_lat_slots_a = GET_METRIC(metric_a, 6) * slots_a
+ mem_bound_slots_a = GET_METRIC(metric_a, 7) * slots_a
+
+ # compute delta scaled metrics between b and a
+ heavy_ops_slots = GET_METRIC(metric_b, 4) * slots_b - heavy_ops_slots_a
+ br_mispredict_slots = GET_METRIC(metric_b, 5) * slots_b - br_mispredict_slots_a
+ fetch_lat_slots = GET_METRIC(metric_b, 6) * slots_b - fetch_lat_slots_a
+ mem_bound_slots = GET_METRIC(metric_b, 7) * slots_b - mem_bound_slots_a
+
+ slots_delta = slots_b - slots_a
+ heavy_ops_ratio = (float)heavy_ops_slots / slots_delta
+ light_ops_ratio = retiring_ratio - heavy_ops_ratio;
+
+ br_mispredict_ratio = (float)br_mispredict_slots / slots_delta
+ machine_clears_ratio = bad_spec_ratio - br_mispredict_ratio;
+
+ fetch_lat_ratio = (float)fetch_lat_slots / slots_delta
+ fetch_bw_ratio = fe_bound_ratio - fetch_lat_ratio;
+
+ mem_bound_ratio = (float)mem_bound_slots / slota_delta
+ core_bound_ratio = be_bound_ratio - mem_bound_ratio;
+
+ printf("Heavy Operations %.2f%% Light Operations %.2f%% "
+ "Branch Mispredict %.2f%% Machine Clears %.2f%% "
+ "Fetch Latency %.2f%% Fetch Bandwidth %.2f%% "
+ "Mem Bound %.2f%% Core Bound %.2f%%\n",
+ heavy_ops_ratio * 100.,
+ light_ops_ratio * 100.,
+ br_mispredict_ratio * 100.,
+ machine_clears_ratio * 100.,
+ fetch_lat_ratio * 100.,
+ fetch_bw_ratio * 100.,
+ mem_bound_ratio * 100.,
+ core_bound_ratio * 100.);
+
Resetting metrics counters
==========================
@@ -248,6 +300,24 @@ a sampling read group. Since the SLOTS event must be the leader of a TopDown
group, the second event of the group is the sampling event.
For example, perf record -e '{slots, $sampling_event, topdown-retiring}:S'
+Extension on Sapphire Rapids Server
+===================================
+The metrics counter is extended to support TMA method level 2 metrics.
+The lower half of the register is the TMA level 1 metrics (legacy).
+The upper half is also divided into four 8-bit fields for the new level 2
+metrics. Four more TopDown metric events are exposed for the end-users,
+topdown-heavy-ops, topdown-br-mispredict, topdown-fetch-lat and
+topdown-mem-bound.
+
+Each of the new level 2 metrics in the upper half is a subset of the
+corresponding level 1 metric in the lower half. Software can deduce the
+other four level 2 metrics by subtracting corresponding metrics as below.
+
+ Light_Operations = Retiring - Heavy_Operations
+ Machine_Clears = Bad_Speculation - Branch_Mispredicts
+ Fetch_Bandwidth = Frontend_Bound - Fetch_Latency
+ Core_Bound = Backend_Bound - Memory_Bound
+
[1] https://software.intel.com/en-us/top-down-microarchitecture-analysis-method-win
[2] https://github.com/andikleen/pmu-tools/wiki/toplev-manual
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 5d7b947320fb..f05c4d48fd7e 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -20,4 +20,4 @@ tools/lib/bitmap.c
tools/lib/str_error_r.c
tools/lib/vsprintf.c
tools/lib/zalloc.c
-scripts/bpf_helpers_doc.py
+scripts/bpf_doc.py
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index ce8516e4de34..d8e59d31399a 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -621,6 +621,15 @@ ifndef NO_LIBBPF
endif
endif
+ifdef BUILD_BPF_SKEL
+ $(call feature_check,clang-bpf-co-re)
+ ifeq ($(feature-clang-bpf-co-re), 0)
+ dummy := $(error Error: clang too old. Please install recent clang)
+ endif
+ $(call detected,CONFIG_PERF_BPF_SKEL)
+ CFLAGS += -DHAVE_BPF_SKEL
+endif
+
dwarf-post-unwind := 1
dwarf-post-unwind-text := BUG
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index f4df7534026d..f6e609673de2 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -126,6 +126,8 @@ include ../scripts/utilities.mak
#
# Define NO_LIBDEBUGINFOD if you do not want support debuginfod
#
+# Define BUILD_BPF_SKEL to enable BPF skeletons
+#
# As per kernel Makefile, avoid funny character set dependencies
unexport LC_ALL
@@ -175,6 +177,12 @@ endef
LD += $(EXTRA_LDFLAGS)
+HOSTCC ?= gcc
+HOSTLD ?= ld
+HOSTAR ?= ar
+CLANG ?= clang
+LLVM_STRIP ?= llvm-strip
+
PKG_CONFIG = $(CROSS_COMPILE)pkg-config
RM = rm -f
@@ -599,7 +607,7 @@ arch_errno_hdr_dir := $(srctree)/tools
arch_errno_tbl := $(srctree)/tools/perf/trace/beauty/arch_errno_names.sh
$(arch_errno_name_array): $(arch_errno_tbl)
- $(Q)$(SHELL) '$(arch_errno_tbl)' $(firstword $(CC)) $(arch_errno_hdr_dir) > $@
+ $(Q)$(SHELL) '$(arch_errno_tbl)' '$(patsubst -%,,$(CC))' $(arch_errno_hdr_dir) > $@
sync_file_range_arrays := $(beauty_outdir)/sync_file_range_arrays.c
sync_file_range_tbls := $(srctree)/tools/perf/trace/beauty/sync_file_range.sh
@@ -730,7 +738,8 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc
$(x86_arch_prctl_code_array) \
$(rename_flags_array) \
$(arch_errno_name_array) \
- $(sync_file_range_arrays)
+ $(sync_file_range_arrays) \
+ bpf-skel
$(OUTPUT)%.o: %.c prepare FORCE
$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
@@ -992,18 +1001,46 @@ $(INSTALL_DOC_TARGETS):
### Cleaning rules
-#
-# This is here, not in Makefile.config, because Makefile.config does
-# not get included for the clean target:
-#
-config-clean:
- $(call QUIET_CLEAN, config)
- $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ $(if $(OUTPUT),OUTPUT=$(OUTPUT)feature/,) clean >/dev/null
-
python-clean:
$(python-clean)
-clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBPERF)-clean config-clean fixdep-clean python-clean
+SKEL_OUT := $(abspath $(OUTPUT)util/bpf_skel)
+SKEL_TMP_OUT := $(abspath $(SKEL_OUT)/.tmp)
+SKELETONS := $(SKEL_OUT)/bpf_prog_profiler.skel.h
+
+ifdef BUILD_BPF_SKEL
+BPFTOOL := $(SKEL_TMP_OUT)/bootstrap/bpftool
+LIBBPF_SRC := $(abspath ../lib/bpf)
+BPF_INCLUDE := -I$(SKEL_TMP_OUT)/.. -I$(BPF_PATH) -I$(LIBBPF_SRC)/..
+
+$(SKEL_TMP_OUT):
+ $(Q)$(MKDIR) -p $@
+
+$(BPFTOOL): | $(SKEL_TMP_OUT)
+ CFLAGS= $(MAKE) -C ../bpf/bpftool \
+ OUTPUT=$(SKEL_TMP_OUT)/ bootstrap
+
+$(SKEL_TMP_OUT)/%.bpf.o: util/bpf_skel/%.bpf.c $(LIBBPF) | $(SKEL_TMP_OUT)
+ $(QUIET_CLANG)$(CLANG) -g -O2 -target bpf $(BPF_INCLUDE) \
+ -c $(filter util/bpf_skel/%.bpf.c,$^) -o $@ && $(LLVM_STRIP) -g $@
+
+$(SKEL_OUT)/%.skel.h: $(SKEL_TMP_OUT)/%.bpf.o | $(BPFTOOL)
+ $(QUIET_GENSKEL)$(BPFTOOL) gen skeleton $< > $@
+
+bpf-skel: $(SKELETONS)
+
+.PRECIOUS: $(SKEL_TMP_OUT)/%.bpf.o
+
+else # BUILD_BPF_SKEL
+
+bpf-skel:
+
+endif # BUILD_BPF_SKEL
+
+bpf-skel-clean:
+ $(call QUIET_CLEAN, bpf-skel) $(RM) -r $(SKEL_TMP_OUT) $(SKELETONS)
+
+clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBPERF)-clean fixdep-clean python-clean bpf-skel-clean
$(call QUIET_CLEAN, core-objs) $(RM) $(LIBPERF_A) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS)
$(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
$(Q)$(RM) $(OUTPUT).config-detected
diff --git a/tools/perf/arch/arm/include/perf_regs.h b/tools/perf/arch/arm/include/perf_regs.h
index ed20e0253e25..4085419283d0 100644
--- a/tools/perf/arch/arm/include/perf_regs.h
+++ b/tools/perf/arch/arm/include/perf_regs.h
@@ -15,7 +15,7 @@ void perf_regs_load(u64 *regs);
#define PERF_REG_IP PERF_REG_ARM_PC
#define PERF_REG_SP PERF_REG_ARM_SP
-static inline const char *perf_reg_name(int id)
+static inline const char *__perf_reg_name(int id)
{
switch (id) {
case PERF_REG_ARM_R0:
diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
index bd446aba64f7..c25c878fd06c 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -156,6 +156,10 @@ out:
return err;
}
+#define ETM_SET_OPT_CTXTID (1 << 0)
+#define ETM_SET_OPT_TS (1 << 1)
+#define ETM_SET_OPT_MASK (ETM_SET_OPT_CTXTID | ETM_SET_OPT_TS)
+
static int cs_etm_set_option(struct auxtrace_record *itr,
struct evsel *evsel, u32 option)
{
@@ -169,17 +173,17 @@ static int cs_etm_set_option(struct auxtrace_record *itr,
!cpu_map__has(online_cpus, i))
continue;
- if (option & ETM_OPT_CTXTID) {
+ if (option & ETM_SET_OPT_CTXTID) {
err = cs_etm_set_context_id(itr, evsel, i);
if (err)
goto out;
}
- if (option & ETM_OPT_TS) {
+ if (option & ETM_SET_OPT_TS) {
err = cs_etm_set_timestamp(itr, evsel, i);
if (err)
goto out;
}
- if (option & ~(ETM_OPT_CTXTID | ETM_OPT_TS))
+ if (option & ~(ETM_SET_OPT_MASK))
/* Nothing else is currently supported */
goto out;
}
@@ -406,7 +410,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
evsel__set_sample_bit(cs_etm_evsel, CPU);
err = cs_etm_set_option(itr, cs_etm_evsel,
- ETM_OPT_CTXTID | ETM_OPT_TS);
+ ETM_SET_OPT_CTXTID | ETM_SET_OPT_TS);
if (err)
goto out;
}
diff --git a/tools/perf/arch/arm64/include/perf_regs.h b/tools/perf/arch/arm64/include/perf_regs.h
index baaa5e64a3fb..fa3e07459f76 100644
--- a/tools/perf/arch/arm64/include/perf_regs.h
+++ b/tools/perf/arch/arm64/include/perf_regs.h
@@ -15,7 +15,7 @@ void perf_regs_load(u64 *regs);
#define PERF_REG_IP PERF_REG_ARM64_PC
#define PERF_REG_SP PERF_REG_ARM64_SP
-static inline const char *perf_reg_name(int id)
+static inline const char *__perf_reg_name(int id)
{
switch (id) {
case PERF_REG_ARM64_X0:
diff --git a/tools/perf/arch/arm64/util/machine.c b/tools/perf/arch/arm64/util/machine.c
index d41b27e781d3..40c5e0b5bda8 100644
--- a/tools/perf/arch/arm64/util/machine.c
+++ b/tools/perf/arch/arm64/util/machine.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
#include <stdio.h>
#include <string.h>
#include "debug.h"
@@ -23,5 +24,5 @@ void arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
p->end += SYMBOL_LIMIT;
else
p->end = c->start;
- pr_debug4("%s sym:%s end:%#lx\n", __func__, p->name, p->end);
+ pr_debug4("%s sym:%s end:%#" PRIx64 "\n", __func__, p->name, p->end);
}
diff --git a/tools/perf/arch/arm64/util/perf_regs.c b/tools/perf/arch/arm64/util/perf_regs.c
index 54efa12fdbea..2518cde18b34 100644
--- a/tools/perf/arch/arm64/util/perf_regs.c
+++ b/tools/perf/arch/arm64/util/perf_regs.c
@@ -1,4 +1,12 @@
// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <regex.h>
+#include <string.h>
+#include <linux/kernel.h>
+#include <linux/zalloc.h>
+
+#include "../../../util/debug.h"
+#include "../../../util/event.h"
#include "../../../util/perf_regs.h"
const struct sample_reg sample_reg_masks[] = {
@@ -37,3 +45,89 @@ const struct sample_reg sample_reg_masks[] = {
SMPL_REG(pc, PERF_REG_ARM64_PC),
SMPL_REG_END
};
+
+/* %xNUM */
+#define SDT_OP_REGEX1 "^(x[1-2]?[0-9]|3[0-1])$"
+
+/* [sp], [sp, NUM] */
+#define SDT_OP_REGEX2 "^\\[sp(, )?([0-9]+)?\\]$"
+
+static regex_t sdt_op_regex1, sdt_op_regex2;
+
+static int sdt_init_op_regex(void)
+{
+ static int initialized;
+ int ret = 0;
+
+ if (initialized)
+ return 0;
+
+ ret = regcomp(&sdt_op_regex1, SDT_OP_REGEX1, REG_EXTENDED);
+ if (ret)
+ goto error;
+
+ ret = regcomp(&sdt_op_regex2, SDT_OP_REGEX2, REG_EXTENDED);
+ if (ret)
+ goto free_regex1;
+
+ initialized = 1;
+ return 0;
+
+free_regex1:
+ regfree(&sdt_op_regex1);
+error:
+ pr_debug4("Regex compilation error.\n");
+ return ret;
+}
+
+/*
+ * SDT marker arguments on Arm64 uses %xREG or [sp, NUM], currently
+ * support these two formats.
+ */
+int arch_sdt_arg_parse_op(char *old_op, char **new_op)
+{
+ int ret, new_len;
+ regmatch_t rm[5];
+
+ ret = sdt_init_op_regex();
+ if (ret < 0)
+ return ret;
+
+ if (!regexec(&sdt_op_regex1, old_op, 3, rm, 0)) {
+ /* Extract xNUM */
+ new_len = 2; /* % NULL */
+ new_len += (int)(rm[1].rm_eo - rm[1].rm_so);
+
+ *new_op = zalloc(new_len);
+ if (!*new_op)
+ return -ENOMEM;
+
+ scnprintf(*new_op, new_len, "%%%.*s",
+ (int)(rm[1].rm_eo - rm[1].rm_so), old_op + rm[1].rm_so);
+ } else if (!regexec(&sdt_op_regex2, old_op, 5, rm, 0)) {
+ /* [sp], [sp, NUM] or [sp,NUM] */
+ new_len = 7; /* + ( % s p ) NULL */
+
+ /* If the arugment is [sp], need to fill offset '0' */
+ if (rm[2].rm_so == -1)
+ new_len += 1;
+ else
+ new_len += (int)(rm[2].rm_eo - rm[2].rm_so);
+
+ *new_op = zalloc(new_len);
+ if (!*new_op)
+ return -ENOMEM;
+
+ if (rm[2].rm_so == -1)
+ scnprintf(*new_op, new_len, "+0(%%sp)");
+ else
+ scnprintf(*new_op, new_len, "+%.*s(%%sp)",
+ (int)(rm[2].rm_eo - rm[2].rm_so),
+ old_op + rm[2].rm_so);
+ } else {
+ pr_debug4("Skipping unsupported SDT argument: %s\n", old_op);
+ return SDT_ARG_SKIP;
+ }
+
+ return SDT_ARG_VALID;
+}
diff --git a/tools/perf/arch/csky/include/perf_regs.h b/tools/perf/arch/csky/include/perf_regs.h
index 8f336ea1161a..25ac3bdcb9d1 100644
--- a/tools/perf/arch/csky/include/perf_regs.h
+++ b/tools/perf/arch/csky/include/perf_regs.h
@@ -15,7 +15,7 @@
#define PERF_REG_IP PERF_REG_CSKY_PC
#define PERF_REG_SP PERF_REG_CSKY_SP
-static inline const char *perf_reg_name(int id)
+static inline const char *__perf_reg_name(int id)
{
switch (id) {
case PERF_REG_CSKY_A0:
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index f744eb5cba88..0b2480cf3e47 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -9,9 +9,7 @@
#
0 nospu restart_syscall sys_restart_syscall
1 nospu exit sys_exit
-2 32 fork ppc_fork sys_fork
-2 64 fork sys_fork
-2 spu fork sys_ni_syscall
+2 nospu fork sys_fork
3 common read sys_read
4 common write sys_write
5 common open sys_open compat_sys_open
@@ -160,9 +158,7 @@
119 32 sigreturn sys_sigreturn compat_sys_sigreturn
119 64 sigreturn sys_ni_syscall
119 spu sigreturn sys_ni_syscall
-120 32 clone ppc_clone sys_clone
-120 64 clone sys_clone
-120 spu clone sys_ni_syscall
+120 nospu clone sys_clone
121 common setdomainname sys_setdomainname
122 common uname sys_newuname
123 common modify_ldt sys_ni_syscall
@@ -244,9 +240,7 @@
186 spu sendfile sys_sendfile64
187 common getpmsg sys_ni_syscall
188 common putpmsg sys_ni_syscall
-189 32 vfork ppc_vfork sys_vfork
-189 64 vfork sys_vfork
-189 spu vfork sys_ni_syscall
+189 nospu vfork sys_vfork
190 common ugetrlimit sys_getrlimit compat_sys_getrlimit
191 common readahead sys_readahead compat_sys_readahead
192 32 mmap2 sys_mmap2 compat_sys_mmap2
@@ -322,9 +316,7 @@
248 32 clock_nanosleep sys_clock_nanosleep_time32
248 64 clock_nanosleep sys_clock_nanosleep
248 spu clock_nanosleep sys_clock_nanosleep
-249 32 swapcontext ppc_swapcontext compat_sys_swapcontext
-249 64 swapcontext sys_swapcontext
-249 spu swapcontext sys_ni_syscall
+249 nospu swapcontext sys_swapcontext compat_sys_swapcontext
250 common tgkill sys_tgkill
251 32 utimes sys_utimes_time32
251 64 utimes sys_utimes
@@ -522,12 +514,11 @@
432 common fsmount sys_fsmount
433 common fspick sys_fspick
434 common pidfd_open sys_pidfd_open
-435 32 clone3 ppc_clone3 sys_clone3
-435 64 clone3 sys_clone3
-435 spu clone3 sys_ni_syscall
+435 nospu clone3 sys_clone3
436 common close_range sys_close_range
437 common openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2
440 common process_madvise sys_process_madvise
441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2
+442 common mount_setattr sys_mount_setattr
diff --git a/tools/perf/arch/powerpc/include/perf_regs.h b/tools/perf/arch/powerpc/include/perf_regs.h
index 63f3ac91049f..04e5dc07e93f 100644
--- a/tools/perf/arch/powerpc/include/perf_regs.h
+++ b/tools/perf/arch/powerpc/include/perf_regs.h
@@ -71,9 +71,15 @@ static const char *reg_names[] = {
[PERF_REG_POWERPC_MMCR3] = "mmcr3",
[PERF_REG_POWERPC_SIER2] = "sier2",
[PERF_REG_POWERPC_SIER3] = "sier3",
+ [PERF_REG_POWERPC_PMC1] = "pmc1",
+ [PERF_REG_POWERPC_PMC2] = "pmc2",
+ [PERF_REG_POWERPC_PMC3] = "pmc3",
+ [PERF_REG_POWERPC_PMC4] = "pmc4",
+ [PERF_REG_POWERPC_PMC5] = "pmc5",
+ [PERF_REG_POWERPC_PMC6] = "pmc6",
};
-static inline const char *perf_reg_name(int id)
+static inline const char *__perf_reg_name(int id)
{
return reg_names[id];
}
diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build
index e86e210bf514..b7945e5a543b 100644
--- a/tools/perf/arch/powerpc/util/Build
+++ b/tools/perf/arch/powerpc/util/Build
@@ -1,4 +1,5 @@
perf-y += header.o
+perf-y += machine.o
perf-y += kvm-stat.o
perf-y += perf_regs.o
perf-y += mem-events.o
diff --git a/tools/perf/arch/powerpc/util/machine.c b/tools/perf/arch/powerpc/util/machine.c
new file mode 100644
index 000000000000..e652a1aa8132
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/machine.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <string.h>
+#include <internal/lib.h> // page_size
+#include "debug.h"
+#include "symbol.h"
+
+/* On powerpc kernel text segment start at memory addresses, 0xc000000000000000
+ * whereas the modules are located at very high memory addresses,
+ * for example 0xc00800000xxxxxxx. The gap between end of kernel text segment
+ * and beginning of first module's text segment is very high.
+ * Therefore do not fill this gap and do not assign it to the kernel dso map.
+ */
+
+void arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
+{
+ if (strchr(p->name, '[') == NULL && strchr(c->name, '['))
+ /* Limit the range of last kernel symbol */
+ p->end += page_size;
+ else
+ p->end = c->start;
+ pr_debug4("%s sym:%s end:%#" PRIx64 "\n", __func__, p->name, p->end);
+}
diff --git a/tools/perf/arch/powerpc/util/perf_regs.c b/tools/perf/arch/powerpc/util/perf_regs.c
index 2b6d4704e3aa..8116a253f91f 100644
--- a/tools/perf/arch/powerpc/util/perf_regs.c
+++ b/tools/perf/arch/powerpc/util/perf_regs.c
@@ -68,6 +68,12 @@ const struct sample_reg sample_reg_masks[] = {
SMPL_REG(mmcr3, PERF_REG_POWERPC_MMCR3),
SMPL_REG(sier2, PERF_REG_POWERPC_SIER2),
SMPL_REG(sier3, PERF_REG_POWERPC_SIER3),
+ SMPL_REG(pmc1, PERF_REG_POWERPC_PMC1),
+ SMPL_REG(pmc2, PERF_REG_POWERPC_PMC2),
+ SMPL_REG(pmc3, PERF_REG_POWERPC_PMC3),
+ SMPL_REG(pmc4, PERF_REG_POWERPC_PMC4),
+ SMPL_REG(pmc5, PERF_REG_POWERPC_PMC5),
+ SMPL_REG(pmc6, PERF_REG_POWERPC_PMC6),
SMPL_REG_END
};
diff --git a/tools/perf/arch/riscv/include/perf_regs.h b/tools/perf/arch/riscv/include/perf_regs.h
index 7a8bcde7a2b1..6b02a767c918 100644
--- a/tools/perf/arch/riscv/include/perf_regs.h
+++ b/tools/perf/arch/riscv/include/perf_regs.h
@@ -19,7 +19,7 @@
#define PERF_REG_IP PERF_REG_RISCV_PC
#define PERF_REG_SP PERF_REG_RISCV_SP
-static inline const char *perf_reg_name(int id)
+static inline const char *__perf_reg_name(int id)
{
switch (id) {
case PERF_REG_RISCV_PC:
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index d443423495e5..3abef2144dac 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -444,3 +444,4 @@
439 common faccessat2 sys_faccessat2 sys_faccessat2
440 common process_madvise sys_process_madvise sys_process_madvise
441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2
+442 common mount_setattr sys_mount_setattr sys_mount_setattr
diff --git a/tools/perf/arch/s390/include/perf_regs.h b/tools/perf/arch/s390/include/perf_regs.h
index bcfbaed78cc2..ce3031526623 100644
--- a/tools/perf/arch/s390/include/perf_regs.h
+++ b/tools/perf/arch/s390/include/perf_regs.h
@@ -14,7 +14,7 @@ void perf_regs_load(u64 *regs);
#define PERF_REG_IP PERF_REG_S390_PC
#define PERF_REG_SP PERF_REG_S390_R15
-static inline const char *perf_reg_name(int id)
+static inline const char *__perf_reg_name(int id)
{
switch (id) {
case PERF_REG_S390_R0:
diff --git a/tools/perf/arch/s390/util/machine.c b/tools/perf/arch/s390/util/machine.c
index 724efb2d842d..7644a4f6d4a4 100644
--- a/tools/perf/arch/s390/util/machine.c
+++ b/tools/perf/arch/s390/util/machine.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
#include <unistd.h>
#include <stdio.h>
#include <string.h>
@@ -48,5 +49,5 @@ void arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
p->end = roundup(p->end, page_size);
else
p->end = c->start;
- pr_debug4("%s sym:%s end:%#lx\n", __func__, p->name, p->end);
+ pr_debug4("%s sym:%s end:%#" PRIx64 "\n", __func__, p->name, p->end);
}
diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile
index 8cc6642fce7a..5a9f9a7bf07d 100644
--- a/tools/perf/arch/x86/Makefile
+++ b/tools/perf/arch/x86/Makefile
@@ -10,10 +10,11 @@ PERF_HAVE_JITDUMP := 1
# Syscall table generation
#
-out := $(OUTPUT)arch/x86/include/generated/asm
-header := $(out)/syscalls_64.c
-sys := $(srctree)/tools/perf/arch/x86/entry/syscalls
-systbl := $(sys)/syscalltbl.sh
+generated := $(OUTPUT)arch/x86/include/generated
+out := $(generated)/asm
+header := $(out)/syscalls_64.c
+sys := $(srctree)/tools/perf/arch/x86/entry/syscalls
+systbl := $(sys)/syscalltbl.sh
# Create output directory if not already present
_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
@@ -22,6 +23,6 @@ $(header): $(sys)/syscall_64.tbl $(systbl)
$(Q)$(SHELL) '$(systbl)' $(sys)/syscall_64.tbl 'x86_64' > $@
clean::
- $(call QUIET_CLEAN, x86) $(RM) $(header)
+ $(call QUIET_CLEAN, x86) $(RM) -r $(header) $(generated)
archheaders: $(header)
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index 78672124d28b..7bf01cbe582f 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -363,6 +363,7 @@
439 common faccessat2 sys_faccessat2
440 common process_madvise sys_process_madvise
441 common epoll_pwait2 sys_epoll_pwait2
+442 common mount_setattr sys_mount_setattr
#
# Due to a historical design error, certain syscalls are numbered differently
diff --git a/tools/perf/arch/x86/include/arch-tests.h b/tools/perf/arch/x86/include/arch-tests.h
index 6a54b94f1c25..0e20f3dc69f3 100644
--- a/tools/perf/arch/x86/include/arch-tests.h
+++ b/tools/perf/arch/x86/include/arch-tests.h
@@ -10,6 +10,7 @@ int test__rdpmc(struct test *test __maybe_unused, int subtest);
int test__insn_x86(struct test *test __maybe_unused, int subtest);
int test__intel_pt_pkt_decoder(struct test *test, int subtest);
int test__bp_modify(struct test *test, int subtest);
+int test__x86_sample_parsing(struct test *test, int subtest);
#ifdef HAVE_DWARF_UNWIND_SUPPORT
struct thread;
diff --git a/tools/perf/arch/x86/include/perf_regs.h b/tools/perf/arch/x86/include/perf_regs.h
index b7321337d100..cddc4cdc0d9b 100644
--- a/tools/perf/arch/x86/include/perf_regs.h
+++ b/tools/perf/arch/x86/include/perf_regs.h
@@ -23,7 +23,7 @@ void perf_regs_load(u64 *regs);
#define PERF_REG_IP PERF_REG_X86_IP
#define PERF_REG_SP PERF_REG_X86_SP
-static inline const char *perf_reg_name(int id)
+static inline const char *__perf_reg_name(int id)
{
switch (id) {
case PERF_REG_X86_AX:
diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build
index 36d4f248b51d..28d793390198 100644
--- a/tools/perf/arch/x86/tests/Build
+++ b/tools/perf/arch/x86/tests/Build
@@ -3,5 +3,6 @@ perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
perf-y += arch-tests.o
perf-y += rdpmc.o
+perf-y += sample-parsing.o
perf-$(CONFIG_AUXTRACE) += insn-x86.o intel-pt-pkt-decoder-test.o
perf-$(CONFIG_X86_64) += bp-modify.o
diff --git a/tools/perf/arch/x86/tests/arch-tests.c b/tools/perf/arch/x86/tests/arch-tests.c
index bc25d727b4e9..71aa67367ad6 100644
--- a/tools/perf/arch/x86/tests/arch-tests.c
+++ b/tools/perf/arch/x86/tests/arch-tests.c
@@ -31,6 +31,10 @@ struct test arch_tests[] = {
},
#endif
{
+ .desc = "x86 Sample parsing",
+ .func = test__x86_sample_parsing,
+ },
+ {
.func = NULL,
},
diff --git a/tools/perf/arch/x86/tests/insn-x86.c b/tools/perf/arch/x86/tests/insn-x86.c
index 745f29adb14b..4f75ae990140 100644
--- a/tools/perf/arch/x86/tests/insn-x86.c
+++ b/tools/perf/arch/x86/tests/insn-x86.c
@@ -1,11 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
-#include "../../../../arch/x86/include/asm/insn.h"
#include <string.h>
#include "debug.h"
#include "tests/tests.h"
#include "arch-tests.h"
+#include "../../../../arch/x86/include/asm/insn.h"
#include "intel-pt-decoder/intel-pt-insn-decoder.h"
@@ -48,6 +48,7 @@ static int get_op(const char *op_str)
{"int", INTEL_PT_OP_INT},
{"syscall", INTEL_PT_OP_SYSCALL},
{"sysret", INTEL_PT_OP_SYSRET},
+ {"vmentry", INTEL_PT_OP_VMENTRY},
{NULL, 0},
};
struct val_data *val;
diff --git a/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c b/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c
index 901bf1f449c4..c933e3dcd0a8 100644
--- a/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c
+++ b/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c
@@ -66,8 +66,8 @@ struct test_data {
{7, {0x9d, 1, 2, 3, 4, 5, 6}, 0, {INTEL_PT_FUP, 4, 0x60504030201}, 0, 0 },
{9, {0xdd, 1, 2, 3, 4, 5, 6, 7, 8}, 0, {INTEL_PT_FUP, 6, 0x807060504030201}, 0, 0 },
/* Paging Information Packet */
- {8, {0x02, 0x43, 2, 4, 6, 8, 10, 12}, 0, {INTEL_PT_PIP, 0, 0x60504030201}, 0, 0 },
- {8, {0x02, 0x43, 3, 4, 6, 8, 10, 12}, 0, {INTEL_PT_PIP, 0, 0x60504030201 | (1ULL << 63)}, 0, 0 },
+ {8, {0x02, 0x43, 2, 4, 6, 8, 10, 12}, 0, {INTEL_PT_PIP, 0, 0xC0A08060402}, 0, 0 },
+ {8, {0x02, 0x43, 3, 4, 6, 8, 10, 12}, 0, {INTEL_PT_PIP, 0, 0xC0A08060403}, 0, 0 },
/* Mode Exec Packet */
{2, {0x99, 0x00}, 0, {INTEL_PT_MODE_EXEC, 0, 16}, 0, 0 },
{2, {0x99, 0x01}, 0, {INTEL_PT_MODE_EXEC, 0, 64}, 0, 0 },
diff --git a/tools/perf/arch/x86/tests/sample-parsing.c b/tools/perf/arch/x86/tests/sample-parsing.c
new file mode 100644
index 000000000000..c92db87e4479
--- /dev/null
+++ b/tools/perf/arch/x86/tests/sample-parsing.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <stdbool.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/bitops.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+
+#include "event.h"
+#include "evsel.h"
+#include "debug.h"
+#include "util/synthetic-events.h"
+
+#include "tests/tests.h"
+#include "arch-tests.h"
+
+#define COMP(m) do { \
+ if (s1->m != s2->m) { \
+ pr_debug("Samples differ at '"#m"'\n"); \
+ return false; \
+ } \
+} while (0)
+
+static bool samples_same(const struct perf_sample *s1,
+ const struct perf_sample *s2,
+ u64 type)
+{
+ if (type & PERF_SAMPLE_WEIGHT_STRUCT)
+ COMP(ins_lat);
+
+ return true;
+}
+
+static int do_test(u64 sample_type)
+{
+ struct evsel evsel = {
+ .needs_swap = false,
+ .core = {
+ . attr = {
+ .sample_type = sample_type,
+ .read_format = 0,
+ },
+ },
+ };
+ union perf_event *event;
+ struct perf_sample sample = {
+ .weight = 101,
+ .ins_lat = 102,
+ };
+ struct perf_sample sample_out;
+ size_t i, sz, bufsz;
+ int err, ret = -1;
+
+ sz = perf_event__sample_event_size(&sample, sample_type, 0);
+ bufsz = sz + 4096; /* Add a bit for overrun checking */
+ event = malloc(bufsz);
+ if (!event) {
+ pr_debug("malloc failed\n");
+ return -1;
+ }
+
+ memset(event, 0xff, bufsz);
+ event->header.type = PERF_RECORD_SAMPLE;
+ event->header.misc = 0;
+ event->header.size = sz;
+
+ err = perf_event__synthesize_sample(event, sample_type, 0, &sample);
+ if (err) {
+ pr_debug("%s failed for sample_type %#"PRIx64", error %d\n",
+ "perf_event__synthesize_sample", sample_type, err);
+ goto out_free;
+ }
+
+ /* The data does not contain 0xff so we use that to check the size */
+ for (i = bufsz; i > 0; i--) {
+ if (*(i - 1 + (u8 *)event) != 0xff)
+ break;
+ }
+ if (i != sz) {
+ pr_debug("Event size mismatch: actual %zu vs expected %zu\n",
+ i, sz);
+ goto out_free;
+ }
+
+ evsel.sample_size = __evsel__sample_size(sample_type);
+
+ err = evsel__parse_sample(&evsel, event, &sample_out);
+ if (err) {
+ pr_debug("%s failed for sample_type %#"PRIx64", error %d\n",
+ "evsel__parse_sample", sample_type, err);
+ goto out_free;
+ }
+
+ if (!samples_same(&sample, &sample_out, sample_type)) {
+ pr_debug("parsing failed for sample_type %#"PRIx64"\n",
+ sample_type);
+ goto out_free;
+ }
+
+ ret = 0;
+out_free:
+ free(event);
+
+ return ret;
+}
+
+/**
+ * test__x86_sample_parsing - test X86 specific sample parsing
+ *
+ * This function implements a test that synthesizes a sample event, parses it
+ * and then checks that the parsed sample matches the original sample. If the
+ * test passes %0 is returned, otherwise %-1 is returned.
+ *
+ * For now, the PERF_SAMPLE_WEIGHT_STRUCT is the only X86 specific sample type.
+ * The test only checks the PERF_SAMPLE_WEIGHT_STRUCT type.
+ */
+int test__x86_sample_parsing(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ return do_test(PERF_SAMPLE_WEIGHT_STRUCT);
+}
diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build
index 347c39b960eb..0c72d418932e 100644
--- a/tools/perf/arch/x86/util/Build
+++ b/tools/perf/arch/x86/util/Build
@@ -6,6 +6,9 @@ perf-y += perf_regs.o
perf-y += topdown.o
perf-y += machine.o
perf-y += event.o
+perf-y += evlist.o
+perf-y += mem-events.o
+perf-y += evsel.o
perf-$(CONFIG_DWARF) += dwarf-regs.o
perf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o
diff --git a/tools/perf/arch/x86/util/archinsn.c b/tools/perf/arch/x86/util/archinsn.c
index 3e6791531ca5..34d600c51044 100644
--- a/tools/perf/arch/x86/util/archinsn.c
+++ b/tools/perf/arch/x86/util/archinsn.c
@@ -1,10 +1,10 @@
// SPDX-License-Identifier: GPL-2.0
-#include "../../../../arch/x86/include/asm/insn.h"
#include "archinsn.h"
#include "event.h"
#include "machine.h"
#include "thread.h"
#include "symbol.h"
+#include "../../../../arch/x86/include/asm/insn.h"
void arch_fetch_insn(struct perf_sample *sample,
struct thread *thread,
diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c
index 047dc00eafa6..9b31734ee968 100644
--- a/tools/perf/arch/x86/util/event.c
+++ b/tools/perf/arch/x86/util/event.c
@@ -75,3 +75,28 @@ int perf_event__synthesize_extra_kmaps(struct perf_tool *tool,
}
#endif
+
+void arch_perf_parse_sample_weight(struct perf_sample *data,
+ const __u64 *array, u64 type)
+{
+ union perf_sample_weight weight;
+
+ weight.full = *array;
+ if (type & PERF_SAMPLE_WEIGHT)
+ data->weight = weight.full;
+ else {
+ data->weight = weight.var1_dw;
+ data->ins_lat = weight.var2_w;
+ }
+}
+
+void arch_perf_synthesize_sample_weight(const struct perf_sample *data,
+ __u64 *array, u64 type)
+{
+ *array = data->weight;
+
+ if (type & PERF_SAMPLE_WEIGHT_STRUCT) {
+ *array &= 0xffffffff;
+ *array |= ((u64)data->ins_lat << 32);
+ }
+}
diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c
new file mode 100644
index 000000000000..8c6732cc7794
--- /dev/null
+++ b/tools/perf/arch/x86/util/evlist.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include "util/pmu.h"
+#include "util/evlist.h"
+#include "util/parse-events.h"
+
+#define TOPDOWN_L1_EVENTS "{slots,topdown-retiring,topdown-bad-spec,topdown-fe-bound,topdown-be-bound}"
+
+int arch_evlist__add_default_attrs(struct evlist *evlist)
+{
+ if (!pmu_have_event("cpu", "slots"))
+ return 0;
+
+ return parse_events(evlist, TOPDOWN_L1_EVENTS, NULL);
+}
diff --git a/tools/perf/arch/x86/util/evsel.c b/tools/perf/arch/x86/util/evsel.c
new file mode 100644
index 000000000000..2f733cdc8dbb
--- /dev/null
+++ b/tools/perf/arch/x86/util/evsel.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include "util/evsel.h"
+
+void arch_evsel__set_sample_weight(struct evsel *evsel)
+{
+ evsel__set_sample_bit(evsel, WEIGHT_STRUCT);
+}
diff --git a/tools/perf/arch/x86/util/mem-events.c b/tools/perf/arch/x86/util/mem-events.c
new file mode 100644
index 000000000000..588110fd8904
--- /dev/null
+++ b/tools/perf/arch/x86/util/mem-events.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "util/pmu.h"
+#include "map_symbol.h"
+#include "mem-events.h"
+
+static char mem_loads_name[100];
+static bool mem_loads_name__init;
+
+#define MEM_LOADS_AUX 0x8203
+#define MEM_LOADS_AUX_NAME "{cpu/mem-loads-aux/,cpu/mem-loads,ldlat=%u/pp}:S"
+
+bool is_mem_loads_aux_event(struct evsel *leader)
+{
+ if (!pmu_have_event("cpu", "mem-loads-aux"))
+ return false;
+
+ return leader->core.attr.config == MEM_LOADS_AUX;
+}
+
+char *perf_mem_events__name(int i)
+{
+ struct perf_mem_event *e = perf_mem_events__ptr(i);
+
+ if (!e)
+ return NULL;
+
+ if (i == PERF_MEM_EVENTS__LOAD) {
+ if (mem_loads_name__init)
+ return mem_loads_name;
+
+ mem_loads_name__init = true;
+
+ if (pmu_have_event("cpu", "mem-loads-aux")) {
+ scnprintf(mem_loads_name, sizeof(mem_loads_name),
+ MEM_LOADS_AUX_NAME, perf_mem_events__loads_ldlat);
+ } else {
+ scnprintf(mem_loads_name, sizeof(mem_loads_name),
+ e->name, perf_mem_events__loads_ldlat);
+ }
+ return mem_loads_name;
+ }
+
+ return (char *)e->name;
+}
diff --git a/tools/perf/bench/epoll-ctl.c b/tools/perf/bench/epoll-ctl.c
index ca2d591aad8a..ddaca75c3bc0 100644
--- a/tools/perf/bench/epoll-ctl.c
+++ b/tools/perf/bench/epoll-ctl.c
@@ -21,7 +21,6 @@
#include <sys/resource.h>
#include <sys/epoll.h>
#include <sys/eventfd.h>
-#include <internal/cpumap.h>
#include <perf/cpumap.h>
#include "../util/stat.h"
diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c
index 75dca9773186..0a0ff1247c83 100644
--- a/tools/perf/bench/epoll-wait.c
+++ b/tools/perf/bench/epoll-wait.c
@@ -76,7 +76,6 @@
#include <sys/epoll.h>
#include <sys/eventfd.h>
#include <sys/types.h>
-#include <internal/cpumap.h>
#include <perf/cpumap.h>
#include "../util/stat.h"
diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
index 915bf3da7ce2..b65373ce5c4f 100644
--- a/tools/perf/bench/futex-hash.c
+++ b/tools/perf/bench/futex-hash.c
@@ -20,7 +20,6 @@
#include <linux/kernel.h>
#include <linux/zalloc.h>
#include <sys/time.h>
-#include <internal/cpumap.h>
#include <perf/cpumap.h>
#include "../util/stat.h"
diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
index bb25d8beb3b8..89c6d160379c 100644
--- a/tools/perf/bench/futex-lock-pi.c
+++ b/tools/perf/bench/futex-lock-pi.c
@@ -14,7 +14,6 @@
#include <linux/kernel.h>
#include <linux/zalloc.h>
#include <errno.h>
-#include <internal/cpumap.h>
#include <perf/cpumap.h>
#include "bench.h"
#include "futex.h"
diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
index 7a15c2e61022..5fa23295ee5f 100644
--- a/tools/perf/bench/futex-requeue.c
+++ b/tools/perf/bench/futex-requeue.c
@@ -20,7 +20,6 @@
#include <linux/kernel.h>
#include <linux/time64.h>
#include <errno.h>
-#include <internal/cpumap.h>
#include <perf/cpumap.h>
#include "bench.h"
#include "futex.h"
diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
index cd2b81a845ac..6e6f5247e1fe 100644
--- a/tools/perf/bench/futex-wake-parallel.c
+++ b/tools/perf/bench/futex-wake-parallel.c
@@ -29,7 +29,6 @@ int bench_futex_wake_parallel(int argc __maybe_unused, const char **argv __maybe
#include <linux/time64.h>
#include <errno.h>
#include "futex.h"
-#include <internal/cpumap.h>
#include <perf/cpumap.h>
#include <err.h>
diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
index 2dfcef3e371e..6d217868f53c 100644
--- a/tools/perf/bench/futex-wake.c
+++ b/tools/perf/bench/futex-wake.c
@@ -20,7 +20,6 @@
#include <linux/kernel.h>
#include <linux/time64.h>
#include <errno.h>
-#include <internal/cpumap.h>
#include <perf/cpumap.h>
#include "bench.h"
#include "futex.h"
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index 11726ec6285f..20b87e29c96f 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -344,18 +344,22 @@ static void mempol_restore(void)
static void bind_to_memnode(int node)
{
- unsigned long nodemask;
+ struct bitmask *node_mask;
int ret;
if (node == NUMA_NO_NODE)
return;
- BUG_ON(g->p.nr_nodes > (int)sizeof(nodemask)*8);
- nodemask = 1L << node;
+ node_mask = numa_allocate_nodemask();
+ BUG_ON(!node_mask);
- ret = set_mempolicy(MPOL_BIND, &nodemask, sizeof(nodemask)*8);
- dprintf("binding to node %d, mask: %016lx => %d\n", node, nodemask, ret);
+ numa_bitmask_clearall(node_mask);
+ numa_bitmask_setbit(node_mask, node);
+ ret = set_mempolicy(MPOL_BIND, node_mask->maskp, node_mask->size + 1);
+ dprintf("binding to node %d, mask: %016lx => %d\n", node, *node_mask->maskp, ret);
+
+ numa_bitmask_free(node_mask);
BUG_ON(ret);
}
@@ -876,8 +880,6 @@ static void update_curr_cpu(int task_nr, unsigned long bytes_worked)
prctl(0, bytes_worked);
}
-#define MAX_NR_NODES 64
-
/*
* Count the number of nodes a process's threads
* are spread out on.
@@ -888,10 +890,15 @@ static void update_curr_cpu(int task_nr, unsigned long bytes_worked)
*/
static int count_process_nodes(int process_nr)
{
- char node_present[MAX_NR_NODES] = { 0, };
+ char *node_present;
int nodes;
int n, t;
+ node_present = (char *)malloc(g->p.nr_nodes * sizeof(char));
+ BUG_ON(!node_present);
+ for (nodes = 0; nodes < g->p.nr_nodes; nodes++)
+ node_present[nodes] = 0;
+
for (t = 0; t < g->p.nr_threads; t++) {
struct thread_data *td;
int task_nr;
@@ -901,17 +908,20 @@ static int count_process_nodes(int process_nr)
td = g->threads + task_nr;
node = numa_node_of_cpu(td->curr_cpu);
- if (node < 0) /* curr_cpu was likely still -1 */
+ if (node < 0) /* curr_cpu was likely still -1 */ {
+ free(node_present);
return 0;
+ }
node_present[node] = 1;
}
nodes = 0;
- for (n = 0; n < MAX_NR_NODES; n++)
+ for (n = 0; n < g->p.nr_nodes; n++)
nodes += node_present[n];
+ free(node_present);
return nodes;
}
@@ -980,7 +990,7 @@ static void calc_convergence(double runtime_ns_max, double *convergence)
{
unsigned int loops_done_min, loops_done_max;
int process_groups;
- int nodes[MAX_NR_NODES];
+ int *nodes;
int distance;
int nr_min;
int nr_max;
@@ -994,6 +1004,8 @@ static void calc_convergence(double runtime_ns_max, double *convergence)
if (!g->p.show_convergence && !g->p.measure_convergence)
return;
+ nodes = (int *)malloc(g->p.nr_nodes * sizeof(int));
+ BUG_ON(!nodes);
for (node = 0; node < g->p.nr_nodes; node++)
nodes[node] = 0;
@@ -1035,8 +1047,10 @@ static void calc_convergence(double runtime_ns_max, double *convergence)
BUG_ON(sum > g->p.nr_tasks);
- if (0 && (sum < g->p.nr_tasks))
+ if (0 && (sum < g->p.nr_tasks)) {
+ free(nodes);
return;
+ }
/*
* Count the number of distinct process groups present
@@ -1088,6 +1102,8 @@ static void calc_convergence(double runtime_ns_max, double *convergence)
}
tprintf("\n");
}
+
+ free(nodes);
}
static void show_summary(double runtime_ns_max, int l, double *convergence)
@@ -1413,7 +1429,7 @@ static int init(void)
g->p.nr_nodes = numa_max_node() + 1;
/* char array in count_process_nodes(): */
- BUG_ON(g->p.nr_nodes > MAX_NR_NODES || g->p.nr_nodes < 0);
+ BUG_ON(g->p.nr_nodes < 0);
if (g->p.show_quiet && !g->p.show_details)
g->p.show_details = -1;
diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c
index cecce93ccc63..488f6e6ba1a5 100644
--- a/tools/perf/bench/sched-messaging.c
+++ b/tools/perf/bench/sched-messaging.c
@@ -309,11 +309,11 @@ int bench_sched_messaging(int argc, const char **argv)
num_groups, num_groups * 2 * num_fds,
thread_mode ? "threads" : "processes");
printf(" %14s: %lu.%03lu [sec]\n", "Total time",
- diff.tv_sec,
+ (unsigned long) diff.tv_sec,
(unsigned long) (diff.tv_usec / USEC_PER_MSEC));
break;
case BENCH_FORMAT_SIMPLE:
- printf("%lu.%03lu\n", diff.tv_sec,
+ printf("%lu.%03lu\n", (unsigned long) diff.tv_sec,
(unsigned long) (diff.tv_usec / USEC_PER_MSEC));
break;
default:
diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c
index 3c88d1f201f1..a960e7a93aec 100644
--- a/tools/perf/bench/sched-pipe.c
+++ b/tools/perf/bench/sched-pipe.c
@@ -156,7 +156,7 @@ int bench_sched_pipe(int argc, const char **argv)
result_usec += diff.tv_usec;
printf(" %14s: %lu.%03lu [sec]\n\n", "Total time",
- diff.tv_sec,
+ (unsigned long) diff.tv_sec,
(unsigned long) (diff.tv_usec / USEC_PER_MSEC));
printf(" %14lf usecs/op\n",
@@ -168,7 +168,7 @@ int bench_sched_pipe(int argc, const char **argv)
case BENCH_FORMAT_SIMPLE:
printf("%lu.%03lu\n",
- diff.tv_sec,
+ (unsigned long) diff.tv_sec,
(unsigned long) (diff.tv_usec / USEC_PER_MSEC));
break;
diff --git a/tools/perf/bench/syscall.c b/tools/perf/bench/syscall.c
index 5fe621cff8e9..9b751016f4b6 100644
--- a/tools/perf/bench/syscall.c
+++ b/tools/perf/bench/syscall.c
@@ -54,7 +54,7 @@ int bench_syscall_basic(int argc, const char **argv)
result_usec += diff.tv_usec;
printf(" %14s: %lu.%03lu [sec]\n\n", "Total time",
- diff.tv_sec,
+ (unsigned long) diff.tv_sec,
(unsigned long) (diff.tv_usec/1000));
printf(" %14lf usecs/op\n",
@@ -66,7 +66,7 @@ int bench_syscall_basic(int argc, const char **argv)
case BENCH_FORMAT_SIMPLE:
printf("%lu.%03lu\n",
- diff.tv_sec,
+ (unsigned long) diff.tv_sec,
(unsigned long) (diff.tv_usec / 1000));
break;
diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c
index a25411926e48..ecd0d3cb6f5c 100644
--- a/tools/perf/builtin-buildid-cache.c
+++ b/tools/perf/builtin-buildid-cache.c
@@ -27,6 +27,7 @@
#include "util/time-utils.h"
#include "util/util.h"
#include "util/probe-file.h"
+#include "util/config.h"
#include <linux/string.h>
#include <linux/err.h>
@@ -348,12 +349,21 @@ static int build_id_cache__show_all(void)
return 0;
}
+static int perf_buildid_cache_config(const char *var, const char *value, void *cb)
+{
+ const char **debuginfod = cb;
+
+ if (!strcmp(var, "buildid-cache.debuginfod"))
+ *debuginfod = strdup(value);
+
+ return 0;
+}
+
int cmd_buildid_cache(int argc, const char **argv)
{
struct strlist *list;
struct str_node *pos;
- int ret = 0;
- int ns_id = -1;
+ int ret, ns_id = -1;
bool force = false;
bool list_files = false;
bool opts_flag = false;
@@ -363,7 +373,8 @@ int cmd_buildid_cache(int argc, const char **argv)
*purge_name_list_str = NULL,
*missing_filename = NULL,
*update_name_list_str = NULL,
- *kcore_filename = NULL;
+ *kcore_filename = NULL,
+ *debuginfod = NULL;
char sbuf[STRERR_BUFSIZE];
struct perf_data data = {
@@ -388,6 +399,8 @@ int cmd_buildid_cache(int argc, const char **argv)
OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
OPT_STRING('u', "update", &update_name_list_str, "file list",
"file(s) to update"),
+ OPT_STRING(0, "debuginfod", &debuginfod, "debuginfod url",
+ "set debuginfod url"),
OPT_INCR('v', "verbose", &verbose, "be more verbose"),
OPT_INTEGER(0, "target-ns", &ns_id, "target pid for namespace context"),
OPT_END()
@@ -397,6 +410,10 @@ int cmd_buildid_cache(int argc, const char **argv)
NULL
};
+ ret = perf_config(perf_buildid_cache_config, &debuginfod);
+ if (ret)
+ return ret;
+
argc = parse_options(argc, argv, buildid_cache_options,
buildid_cache_usage, 0);
@@ -408,6 +425,11 @@ int cmd_buildid_cache(int argc, const char **argv)
if (argc || !(list_files || opts_flag))
usage_with_options(buildid_cache_usage, buildid_cache_options);
+ if (debuginfod) {
+ pr_debug("DEBUGINFOD_URLS=%s\n", debuginfod);
+ setenv("DEBUGINFOD_URLS", debuginfod, 1);
+ }
+
/* -l is exclusive. It can not be used with other options. */
if (list_files && opts_flag) {
usage_with_options_msg(buildid_cache_usage,
diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c
index e3ef75583514..87f5b1a4a7fa 100644
--- a/tools/perf/builtin-buildid-list.c
+++ b/tools/perf/builtin-buildid-list.c
@@ -77,6 +77,9 @@ static int perf_session__list_build_ids(bool force, bool with_hits)
perf_header__has_feat(&session->header, HEADER_AUXTRACE))
with_hits = false;
+ if (!perf_header__has_feat(&session->header, HEADER_BUILD_ID))
+ with_hits = true;
+
/*
* in pipe-mode, the only way to get the buildids is to parse
* the record stream. Buildids are stored as RECORD_HEADER_BUILD_ID
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index c5babeaa3b38..e3b9d63077ef 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -97,8 +97,8 @@ struct perf_c2c {
bool symbol_full;
bool stitch_lbr;
- /* HITM shared clines stats */
- struct c2c_stats hitm_stats;
+ /* Shared cache line stats */
+ struct c2c_stats shared_clines_stats;
int shared_clines;
int display;
@@ -876,7 +876,7 @@ static struct c2c_stats *total_stats(struct hist_entry *he)
return &hists->stats;
}
-static double percent(int st, int tot)
+static double percent(u32 st, u32 tot)
{
return tot ? 100. * (double) st / (double) tot : 0;
}
@@ -1048,6 +1048,19 @@ empty_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
return 0;
}
+static int display_metrics(struct perf_hpp *hpp, u32 val, u32 sum)
+{
+ int ret;
+
+ if (sum != 0)
+ ret = scnprintf(hpp->buf, hpp->size, "%5.1f%% ",
+ percent(val, sum));
+ else
+ ret = scnprintf(hpp->buf, hpp->size, "%6s ", "n/a");
+
+ return ret;
+}
+
static int
node_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp,
struct hist_entry *he)
@@ -1091,29 +1104,23 @@ node_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp,
ret = scnprintf(hpp->buf, hpp->size, "%2d{%2d ", node, num);
advance_hpp(hpp, ret);
- #define DISPLAY_HITM(__h) \
- if (c2c_he->stats.__h> 0) { \
- ret = scnprintf(hpp->buf, hpp->size, "%5.1f%% ", \
- percent(stats->__h, c2c_he->stats.__h));\
- } else { \
- ret = scnprintf(hpp->buf, hpp->size, "%6s ", "n/a"); \
- }
-
switch (c2c.display) {
case DISPLAY_RMT:
- DISPLAY_HITM(rmt_hitm);
+ ret = display_metrics(hpp, stats->rmt_hitm,
+ c2c_he->stats.rmt_hitm);
break;
case DISPLAY_LCL:
- DISPLAY_HITM(lcl_hitm);
+ ret = display_metrics(hpp, stats->lcl_hitm,
+ c2c_he->stats.lcl_hitm);
break;
case DISPLAY_TOT:
- DISPLAY_HITM(tot_hitm);
+ ret = display_metrics(hpp, stats->tot_hitm,
+ c2c_he->stats.tot_hitm);
+ break;
default:
break;
}
- #undef DISPLAY_HITM
-
advance_hpp(hpp, ret);
if (c2c_he->stats.store > 0) {
@@ -1851,53 +1858,69 @@ static int c2c_hists__reinit(struct c2c_hists *c2c_hists,
#define DISPLAY_LINE_LIMIT 0.001
+static u8 filter_display(u32 val, u32 sum)
+{
+ if (sum == 0 || ((double)val / sum) < DISPLAY_LINE_LIMIT)
+ return HIST_FILTER__C2C;
+
+ return 0;
+}
+
static bool he__display(struct hist_entry *he, struct c2c_stats *stats)
{
struct c2c_hist_entry *c2c_he;
- double ld_dist;
if (c2c.show_all)
return true;
c2c_he = container_of(he, struct c2c_hist_entry, he);
-#define FILTER_HITM(__h) \
- if (stats->__h) { \
- ld_dist = ((double)c2c_he->stats.__h / stats->__h); \
- if (ld_dist < DISPLAY_LINE_LIMIT) \
- he->filtered = HIST_FILTER__C2C; \
- } else { \
- he->filtered = HIST_FILTER__C2C; \
- }
-
switch (c2c.display) {
case DISPLAY_LCL:
- FILTER_HITM(lcl_hitm);
+ he->filtered = filter_display(c2c_he->stats.lcl_hitm,
+ stats->lcl_hitm);
break;
case DISPLAY_RMT:
- FILTER_HITM(rmt_hitm);
+ he->filtered = filter_display(c2c_he->stats.rmt_hitm,
+ stats->rmt_hitm);
break;
case DISPLAY_TOT:
- FILTER_HITM(tot_hitm);
+ he->filtered = filter_display(c2c_he->stats.tot_hitm,
+ stats->tot_hitm);
+ break;
default:
break;
}
-#undef FILTER_HITM
-
return he->filtered == 0;
}
-static inline int valid_hitm_or_store(struct hist_entry *he)
+static inline bool is_valid_hist_entry(struct hist_entry *he)
{
struct c2c_hist_entry *c2c_he;
- bool has_hitm;
+ bool has_record = false;
c2c_he = container_of(he, struct c2c_hist_entry, he);
- has_hitm = c2c.display == DISPLAY_TOT ? c2c_he->stats.tot_hitm :
- c2c.display == DISPLAY_LCL ? c2c_he->stats.lcl_hitm :
- c2c_he->stats.rmt_hitm;
- return has_hitm || c2c_he->stats.store;
+
+ /* It's a valid entry if contains stores */
+ if (c2c_he->stats.store)
+ return true;
+
+ switch (c2c.display) {
+ case DISPLAY_LCL:
+ has_record = !!c2c_he->stats.lcl_hitm;
+ break;
+ case DISPLAY_RMT:
+ has_record = !!c2c_he->stats.rmt_hitm;
+ break;
+ case DISPLAY_TOT:
+ has_record = !!c2c_he->stats.tot_hitm;
+ break;
+ default:
+ break;
+ }
+
+ return has_record;
}
static void set_node_width(struct c2c_hist_entry *c2c_he, int len)
@@ -1951,7 +1974,7 @@ static int filter_cb(struct hist_entry *he, void *arg __maybe_unused)
calc_width(c2c_he);
- if (!valid_hitm_or_store(he))
+ if (!is_valid_hist_entry(he))
he->filtered = HIST_FILTER__C2C;
return 0;
@@ -1961,7 +1984,7 @@ static int resort_cl_cb(struct hist_entry *he, void *arg __maybe_unused)
{
struct c2c_hist_entry *c2c_he;
struct c2c_hists *c2c_hists;
- bool display = he__display(he, &c2c.hitm_stats);
+ bool display = he__display(he, &c2c.shared_clines_stats);
c2c_he = container_of(he, struct c2c_hist_entry, he);
c2c_hists = c2c_he->hists;
@@ -2048,14 +2071,14 @@ static int setup_nodes(struct perf_session *session)
#define HAS_HITMS(__h) ((__h)->stats.lcl_hitm || (__h)->stats.rmt_hitm)
-static int resort_hitm_cb(struct hist_entry *he, void *arg __maybe_unused)
+static int resort_shared_cl_cb(struct hist_entry *he, void *arg __maybe_unused)
{
struct c2c_hist_entry *c2c_he;
c2c_he = container_of(he, struct c2c_hist_entry, he);
if (HAS_HITMS(c2c_he)) {
c2c.shared_clines++;
- c2c_add_stats(&c2c.hitm_stats, &c2c_he->stats);
+ c2c_add_stats(&c2c.shared_clines_stats, &c2c_he->stats);
}
return 0;
@@ -2111,6 +2134,8 @@ static void print_c2c__display_stats(FILE *out)
fprintf(out, " Load MESI State Exclusive : %10d\n", stats->ld_excl);
fprintf(out, " Load MESI State Shared : %10d\n", stats->ld_shared);
fprintf(out, " Load LLC Misses : %10d\n", llc_misses);
+ fprintf(out, " Load access blocked by data : %10d\n", stats->blk_data);
+ fprintf(out, " Load access blocked by address : %10d\n", stats->blk_addr);
fprintf(out, " LLC Misses to Local DRAM : %10.1f%%\n", ((double)stats->lcl_dram/(double)llc_misses) * 100.);
fprintf(out, " LLC Misses to Remote DRAM : %10.1f%%\n", ((double)stats->rmt_dram/(double)llc_misses) * 100.);
fprintf(out, " LLC Misses to Remote cache (HIT) : %10.1f%%\n", ((double)stats->rmt_hit /(double)llc_misses) * 100.);
@@ -2126,7 +2151,7 @@ static void print_c2c__display_stats(FILE *out)
static void print_shared_cacheline_info(FILE *out)
{
- struct c2c_stats *stats = &c2c.hitm_stats;
+ struct c2c_stats *stats = &c2c.shared_clines_stats;
int hitm_cnt = stats->lcl_hitm + stats->rmt_hitm;
fprintf(out, "=================================================\n");
@@ -2139,6 +2164,7 @@ static void print_shared_cacheline_info(FILE *out)
fprintf(out, " L2D hits on shared lines : %10d\n", stats->ld_l2hit);
fprintf(out, " LLC hits on shared lines : %10d\n", stats->ld_llchit + stats->lcl_hitm);
fprintf(out, " Locked Access on shared lines : %10d\n", stats->locks);
+ fprintf(out, " Blocked Access on shared lines : %10d\n", stats->blk_data + stats->blk_addr);
fprintf(out, " Store HITs on shared lines : %10d\n", stats->store);
fprintf(out, " Store L1D hits on shared lines : %10d\n", stats->st_l1hit);
fprintf(out, " Total Merged records : %10d\n", hitm_cnt + stats->store);
@@ -2176,16 +2202,17 @@ static void print_pareto(FILE *out)
struct perf_hpp_list hpp_list;
struct rb_node *nd;
int ret;
+ const char *cl_output;
+
+ cl_output = "cl_num,"
+ "cl_rmt_hitm,"
+ "cl_lcl_hitm,"
+ "cl_stores_l1hit,"
+ "cl_stores_l1miss,"
+ "dcacheline";
perf_hpp_list__init(&hpp_list);
- ret = hpp_list__parse(&hpp_list,
- "cl_num,"
- "cl_rmt_hitm,"
- "cl_lcl_hitm,"
- "cl_stores_l1hit,"
- "cl_stores_l1miss,"
- "dcacheline",
- NULL);
+ ret = hpp_list__parse(&hpp_list, cl_output, NULL);
if (WARN_ONCE(ret, "failed to setup sort entries\n"))
return;
@@ -2729,6 +2756,7 @@ static int perf_c2c__report(int argc, const char **argv)
OPT_END()
};
int err = 0;
+ const char *output_str, *sort_str = NULL;
argc = parse_options(argc, argv, options, report_c2c_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
@@ -2805,29 +2833,34 @@ static int perf_c2c__report(int argc, const char **argv)
goto out_mem2node;
}
- c2c_hists__reinit(&c2c.hists,
- "cl_idx,"
- "dcacheline,"
- "dcacheline_node,"
- "dcacheline_count,"
- "percent_hitm,"
- "tot_hitm,lcl_hitm,rmt_hitm,"
- "tot_recs,"
- "tot_loads,"
- "tot_stores,"
- "stores_l1hit,stores_l1miss,"
- "ld_fbhit,ld_l1hit,ld_l2hit,"
- "ld_lclhit,lcl_hitm,"
- "ld_rmthit,rmt_hitm,"
- "dram_lcl,dram_rmt",
- c2c.display == DISPLAY_TOT ? "tot_hitm" :
- c2c.display == DISPLAY_LCL ? "lcl_hitm" : "rmt_hitm"
- );
+ output_str = "cl_idx,"
+ "dcacheline,"
+ "dcacheline_node,"
+ "dcacheline_count,"
+ "percent_hitm,"
+ "tot_hitm,lcl_hitm,rmt_hitm,"
+ "tot_recs,"
+ "tot_loads,"
+ "tot_stores,"
+ "stores_l1hit,stores_l1miss,"
+ "ld_fbhit,ld_l1hit,ld_l2hit,"
+ "ld_lclhit,lcl_hitm,"
+ "ld_rmthit,rmt_hitm,"
+ "dram_lcl,dram_rmt";
+
+ if (c2c.display == DISPLAY_TOT)
+ sort_str = "tot_hitm";
+ else if (c2c.display == DISPLAY_RMT)
+ sort_str = "rmt_hitm";
+ else if (c2c.display == DISPLAY_LCL)
+ sort_str = "lcl_hitm";
+
+ c2c_hists__reinit(&c2c.hists, output_str, sort_str);
ui_progress__init(&prog, c2c.hists.hists.nr_entries, "Sorting...");
hists__collapse_resort(&c2c.hists.hists, NULL);
- hists__output_resort_cb(&c2c.hists.hists, &prog, resort_hitm_cb);
+ hists__output_resort_cb(&c2c.hists.hists, &prog, resort_shared_cl_cb);
hists__iterate_cb(&c2c.hists.hists, resort_cl_cb);
ui_progress__finish();
diff --git a/tools/perf/builtin-daemon.c b/tools/perf/builtin-daemon.c
new file mode 100644
index 000000000000..ace8772a4f03
--- /dev/null
+++ b/tools/perf/builtin-daemon.c
@@ -0,0 +1,1521 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <internal/lib.h>
+#include <subcmd/parse-options.h>
+#include <api/fd/array.h>
+#include <api/fs/fs.h>
+#include <linux/zalloc.h>
+#include <linux/string.h>
+#include <linux/limits.h>
+#include <linux/string.h>
+#include <string.h>
+#include <sys/file.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <time.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/inotify.h>
+#include <libgen.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/stat.h>
+#include <sys/signalfd.h>
+#include <sys/wait.h>
+#include <poll.h>
+#include <sys/stat.h>
+#include <time.h>
+#include "builtin.h"
+#include "perf.h"
+#include "debug.h"
+#include "config.h"
+#include "util.h"
+
+#define SESSION_OUTPUT "output"
+#define SESSION_CONTROL "control"
+#define SESSION_ACK "ack"
+
+/*
+ * Session states:
+ *
+ * OK - session is up and running
+ * RECONFIG - session is pending for reconfiguration,
+ * new values are already loaded in session object
+ * KILL - session is pending to be killed
+ *
+ * Session object life and its state is maintained by
+ * following functions:
+ *
+ * setup_server_config
+ * - reads config file and setup session objects
+ * with following states:
+ *
+ * OK - no change needed
+ * RECONFIG - session needs to be changed
+ * (run variable changed)
+ * KILL - session needs to be killed
+ * (session is no longer in config file)
+ *
+ * daemon__reconfig
+ * - scans session objects and does following actions
+ * for states:
+ *
+ * OK - skip
+ * RECONFIG - session is killed and re-run with new config
+ * KILL - session is killed
+ *
+ * - all sessions have OK state on the function exit
+ */
+enum daemon_session_state {
+ OK,
+ RECONFIG,
+ KILL,
+};
+
+struct daemon_session {
+ char *base;
+ char *name;
+ char *run;
+ char *control;
+ int pid;
+ struct list_head list;
+ enum daemon_session_state state;
+ time_t start;
+};
+
+struct daemon {
+ const char *config;
+ char *config_real;
+ char *config_base;
+ const char *csv_sep;
+ const char *base_user;
+ char *base;
+ struct list_head sessions;
+ FILE *out;
+ char perf[PATH_MAX];
+ int signal_fd;
+ time_t start;
+};
+
+static struct daemon __daemon = {
+ .sessions = LIST_HEAD_INIT(__daemon.sessions),
+};
+
+static const char * const daemon_usage[] = {
+ "perf daemon start [<options>]",
+ "perf daemon [<options>]",
+ NULL
+};
+
+static bool done;
+
+static void sig_handler(int sig __maybe_unused)
+{
+ done = true;
+}
+
+static struct daemon_session *daemon__add_session(struct daemon *config, char *name)
+{
+ struct daemon_session *session = zalloc(sizeof(*session));
+
+ if (!session)
+ return NULL;
+
+ session->name = strdup(name);
+ if (!session->name) {
+ free(session);
+ return NULL;
+ }
+
+ session->pid = -1;
+ list_add_tail(&session->list, &config->sessions);
+ return session;
+}
+
+static struct daemon_session *daemon__find_session(struct daemon *daemon, char *name)
+{
+ struct daemon_session *session;
+
+ list_for_each_entry(session, &daemon->sessions, list) {
+ if (!strcmp(session->name, name))
+ return session;
+ }
+
+ return NULL;
+}
+
+static int get_session_name(const char *var, char *session, int len)
+{
+ const char *p = var + sizeof("session-") - 1;
+
+ while (*p != '.' && *p != 0x0 && len--)
+ *session++ = *p++;
+
+ *session = 0;
+ return *p == '.' ? 0 : -EINVAL;
+}
+
+static int session_config(struct daemon *daemon, const char *var, const char *value)
+{
+ struct daemon_session *session;
+ char name[100];
+
+ if (get_session_name(var, name, sizeof(name) - 1))
+ return -EINVAL;
+
+ var = strchr(var, '.');
+ if (!var)
+ return -EINVAL;
+
+ var++;
+
+ session = daemon__find_session(daemon, name);
+
+ if (!session) {
+ /* New session is defined. */
+ session = daemon__add_session(daemon, name);
+ if (!session)
+ return -ENOMEM;
+
+ pr_debug("reconfig: found new session %s\n", name);
+
+ /* Trigger reconfig to start it. */
+ session->state = RECONFIG;
+ } else if (session->state == KILL) {
+ /* Current session is defined, no action needed. */
+ pr_debug("reconfig: found current session %s\n", name);
+ session->state = OK;
+ }
+
+ if (!strcmp(var, "run")) {
+ bool same = false;
+
+ if (session->run)
+ same = !strcmp(session->run, value);
+
+ if (!same) {
+ if (session->run) {
+ free(session->run);
+ pr_debug("reconfig: session %s is changed\n", name);
+ }
+
+ session->run = strdup(value);
+ if (!session->run)
+ return -ENOMEM;
+
+ /*
+ * Either new or changed run value is defined,
+ * trigger reconfig for the session.
+ */
+ session->state = RECONFIG;
+ }
+ }
+
+ return 0;
+}
+
+static int server_config(const char *var, const char *value, void *cb)
+{
+ struct daemon *daemon = cb;
+
+ if (strstarts(var, "session-")) {
+ return session_config(daemon, var, value);
+ } else if (!strcmp(var, "daemon.base") && !daemon->base_user) {
+ if (daemon->base && strcmp(daemon->base, value)) {
+ pr_err("failed: can't redefine base, bailing out\n");
+ return -EINVAL;
+ }
+ daemon->base = strdup(value);
+ if (!daemon->base)
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static int client_config(const char *var, const char *value, void *cb)
+{
+ struct daemon *daemon = cb;
+
+ if (!strcmp(var, "daemon.base") && !daemon->base_user) {
+ daemon->base = strdup(value);
+ if (!daemon->base)
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static int check_base(struct daemon *daemon)
+{
+ struct stat st;
+
+ if (!daemon->base) {
+ pr_err("failed: base not defined\n");
+ return -EINVAL;
+ }
+
+ if (stat(daemon->base, &st)) {
+ switch (errno) {
+ case EACCES:
+ pr_err("failed: permission denied for '%s' base\n",
+ daemon->base);
+ return -EACCES;
+ case ENOENT:
+ pr_err("failed: base '%s' does not exists\n",
+ daemon->base);
+ return -EACCES;
+ default:
+ pr_err("failed: can't access base '%s': %s\n",
+ daemon->base, strerror(errno));
+ return -errno;
+ }
+ }
+
+ if ((st.st_mode & S_IFMT) != S_IFDIR) {
+ pr_err("failed: base '%s' is not directory\n",
+ daemon->base);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int setup_client_config(struct daemon *daemon)
+{
+ struct perf_config_set *set = perf_config_set__load_file(daemon->config_real);
+ int err = -ENOMEM;
+
+ if (set) {
+ err = perf_config_set(set, client_config, daemon);
+ perf_config_set__delete(set);
+ }
+
+ return err ?: check_base(daemon);
+}
+
+static int setup_server_config(struct daemon *daemon)
+{
+ struct perf_config_set *set;
+ struct daemon_session *session;
+ int err = -ENOMEM;
+
+ pr_debug("reconfig: started\n");
+
+ /*
+ * Mark all sessions for kill, the server config
+ * will set following states, see explanation at
+ * enum daemon_session_state declaration.
+ */
+ list_for_each_entry(session, &daemon->sessions, list)
+ session->state = KILL;
+
+ set = perf_config_set__load_file(daemon->config_real);
+ if (set) {
+ err = perf_config_set(set, server_config, daemon);
+ perf_config_set__delete(set);
+ }
+
+ return err ?: check_base(daemon);
+}
+
+static int daemon_session__run(struct daemon_session *session,
+ struct daemon *daemon)
+{
+ char buf[PATH_MAX];
+ char **argv;
+ int argc, fd;
+
+ if (asprintf(&session->base, "%s/session-%s",
+ daemon->base, session->name) < 0) {
+ perror("failed: asprintf");
+ return -1;
+ }
+
+ if (mkdir(session->base, 0755) && errno != EEXIST) {
+ perror("failed: mkdir");
+ return -1;
+ }
+
+ session->start = time(NULL);
+
+ session->pid = fork();
+ if (session->pid < 0)
+ return -1;
+ if (session->pid > 0) {
+ pr_info("reconfig: ruining session [%s:%d]: %s\n",
+ session->name, session->pid, session->run);
+ return 0;
+ }
+
+ if (chdir(session->base)) {
+ perror("failed: chdir");
+ return -1;
+ }
+
+ fd = open("/dev/null", O_RDONLY);
+ if (fd < 0) {
+ perror("failed: open /dev/null");
+ return -1;
+ }
+
+ dup2(fd, 0);
+ close(fd);
+
+ fd = open(SESSION_OUTPUT, O_RDWR|O_CREAT|O_TRUNC, 0644);
+ if (fd < 0) {
+ perror("failed: open session output");
+ return -1;
+ }
+
+ dup2(fd, 1);
+ dup2(fd, 2);
+ close(fd);
+
+ if (mkfifo(SESSION_CONTROL, 0600) && errno != EEXIST) {
+ perror("failed: create control fifo");
+ return -1;
+ }
+
+ if (mkfifo(SESSION_ACK, 0600) && errno != EEXIST) {
+ perror("failed: create ack fifo");
+ return -1;
+ }
+
+ scnprintf(buf, sizeof(buf), "%s record --control=fifo:%s,%s %s",
+ daemon->perf, SESSION_CONTROL, SESSION_ACK, session->run);
+
+ argv = argv_split(buf, &argc);
+ if (!argv)
+ exit(-1);
+
+ exit(execve(daemon->perf, argv, NULL));
+ return -1;
+}
+
+static pid_t handle_signalfd(struct daemon *daemon)
+{
+ struct daemon_session *session;
+ struct signalfd_siginfo si;
+ ssize_t err;
+ int status;
+ pid_t pid;
+
+ err = read(daemon->signal_fd, &si, sizeof(struct signalfd_siginfo));
+ if (err != sizeof(struct signalfd_siginfo))
+ return -1;
+
+ list_for_each_entry(session, &daemon->sessions, list) {
+
+ if (session->pid != (int) si.ssi_pid)
+ continue;
+
+ pid = waitpid(session->pid, &status, 0);
+ if (pid == session->pid) {
+ if (WIFEXITED(status)) {
+ pr_info("session '%s' exited, status=%d\n",
+ session->name, WEXITSTATUS(status));
+ } else if (WIFSIGNALED(status)) {
+ pr_info("session '%s' killed (signal %d)\n",
+ session->name, WTERMSIG(status));
+ } else if (WIFSTOPPED(status)) {
+ pr_info("session '%s' stopped (signal %d)\n",
+ session->name, WSTOPSIG(status));
+ } else {
+ pr_info("session '%s' Unexpected status (0x%x)\n",
+ session->name, status);
+ }
+ }
+
+ session->state = KILL;
+ session->pid = -1;
+ return pid;
+ }
+
+ return 0;
+}
+
+static int daemon_session__wait(struct daemon_session *session, struct daemon *daemon,
+ int secs)
+{
+ struct pollfd pollfd = {
+ .fd = daemon->signal_fd,
+ .events = POLLIN,
+ };
+ pid_t wpid = 0, pid = session->pid;
+ time_t start;
+
+ start = time(NULL);
+
+ do {
+ int err = poll(&pollfd, 1, 1000);
+
+ if (err > 0) {
+ wpid = handle_signalfd(daemon);
+ } else if (err < 0) {
+ perror("failed: poll\n");
+ return -1;
+ }
+
+ if (start + secs < time(NULL))
+ return -1;
+ } while (wpid != pid);
+
+ return 0;
+}
+
+static bool daemon__has_alive_session(struct daemon *daemon)
+{
+ struct daemon_session *session;
+
+ list_for_each_entry(session, &daemon->sessions, list) {
+ if (session->pid != -1)
+ return true;
+ }
+
+ return false;
+}
+
+static int daemon__wait(struct daemon *daemon, int secs)
+{
+ struct pollfd pollfd = {
+ .fd = daemon->signal_fd,
+ .events = POLLIN,
+ };
+ time_t start;
+
+ start = time(NULL);
+
+ do {
+ int err = poll(&pollfd, 1, 1000);
+
+ if (err > 0) {
+ handle_signalfd(daemon);
+ } else if (err < 0) {
+ perror("failed: poll\n");
+ return -1;
+ }
+
+ if (start + secs < time(NULL))
+ return -1;
+ } while (daemon__has_alive_session(daemon));
+
+ return 0;
+}
+
+static int daemon_session__control(struct daemon_session *session,
+ const char *msg, bool do_ack)
+{
+ struct pollfd pollfd = { .events = POLLIN, };
+ char control_path[PATH_MAX];
+ char ack_path[PATH_MAX];
+ int control, ack = -1, len;
+ char buf[20];
+ int ret = -1;
+ ssize_t err;
+
+ /* open the control file */
+ scnprintf(control_path, sizeof(control_path), "%s/%s",
+ session->base, SESSION_CONTROL);
+
+ control = open(control_path, O_WRONLY|O_NONBLOCK);
+ if (!control)
+ return -1;
+
+ if (do_ack) {
+ /* open the ack file */
+ scnprintf(ack_path, sizeof(ack_path), "%s/%s",
+ session->base, SESSION_ACK);
+
+ ack = open(ack_path, O_RDONLY, O_NONBLOCK);
+ if (!ack) {
+ close(control);
+ return -1;
+ }
+ }
+
+ /* write the command */
+ len = strlen(msg);
+
+ err = writen(control, msg, len);
+ if (err != len) {
+ pr_err("failed: write to control pipe: %d (%s)\n",
+ errno, control_path);
+ goto out;
+ }
+
+ if (!do_ack)
+ goto out;
+
+ /* wait for an ack */
+ pollfd.fd = ack;
+
+ if (!poll(&pollfd, 1, 2000)) {
+ pr_err("failed: control ack timeout\n");
+ goto out;
+ }
+
+ if (!(pollfd.revents & POLLIN)) {
+ pr_err("failed: did not received an ack\n");
+ goto out;
+ }
+
+ err = read(ack, buf, sizeof(buf));
+ if (err > 0)
+ ret = strcmp(buf, "ack\n");
+ else
+ perror("failed: read ack %d\n");
+
+out:
+ if (ack != -1)
+ close(ack);
+
+ close(control);
+ return ret;
+}
+
+static int setup_server_socket(struct daemon *daemon)
+{
+ struct sockaddr_un addr;
+ char path[PATH_MAX];
+ int fd = socket(AF_UNIX, SOCK_STREAM, 0);
+
+ if (fd < 0) {
+ fprintf(stderr, "socket: %s\n", strerror(errno));
+ return -1;
+ }
+
+ if (fcntl(fd, F_SETFD, FD_CLOEXEC)) {
+ perror("failed: fcntl FD_CLOEXEC");
+ close(fd);
+ return -1;
+ }
+
+ scnprintf(path, sizeof(path), "%s/control", daemon->base);
+
+ if (strlen(path) + 1 >= sizeof(addr.sun_path)) {
+ pr_err("failed: control path too long '%s'\n", path);
+ close(fd);
+ return -1;
+ }
+
+ memset(&addr, 0, sizeof(addr));
+ addr.sun_family = AF_UNIX;
+
+ strlcpy(addr.sun_path, path, sizeof(addr.sun_path) - 1);
+ unlink(path);
+
+ if (bind(fd, (struct sockaddr *)&addr, sizeof(addr)) == -1) {
+ perror("failed: bind");
+ close(fd);
+ return -1;
+ }
+
+ if (listen(fd, 1) == -1) {
+ perror("failed: listen");
+ close(fd);
+ return -1;
+ }
+
+ return fd;
+}
+
+enum {
+ CMD_LIST = 0,
+ CMD_SIGNAL = 1,
+ CMD_STOP = 2,
+ CMD_PING = 3,
+ CMD_MAX,
+};
+
+#define SESSION_MAX 64
+
+union cmd {
+ int cmd;
+
+ /* CMD_LIST */
+ struct {
+ int cmd;
+ int verbose;
+ char csv_sep;
+ } list;
+
+ /* CMD_SIGNAL */
+ struct {
+ int cmd;
+ int sig;
+ char name[SESSION_MAX];
+ } signal;
+
+ /* CMD_PING */
+ struct {
+ int cmd;
+ char name[SESSION_MAX];
+ } ping;
+};
+
+enum {
+ PING_OK = 0,
+ PING_FAIL = 1,
+ PING_MAX,
+};
+
+static int daemon_session__ping(struct daemon_session *session)
+{
+ return daemon_session__control(session, "ping", true) ? PING_FAIL : PING_OK;
+}
+
+static int cmd_session_list(struct daemon *daemon, union cmd *cmd, FILE *out)
+{
+ char csv_sep = cmd->list.csv_sep;
+ struct daemon_session *session;
+ time_t curr = time(NULL);
+
+ if (csv_sep) {
+ fprintf(out, "%d%c%s%c%s%c%s/%s",
+ /* pid daemon */
+ getpid(), csv_sep, "daemon",
+ /* base */
+ csv_sep, daemon->base,
+ /* output */
+ csv_sep, daemon->base, SESSION_OUTPUT);
+
+ fprintf(out, "%c%s/%s",
+ /* lock */
+ csv_sep, daemon->base, "lock");
+
+ fprintf(out, "%c%lu",
+ /* session up time */
+ csv_sep, (curr - daemon->start) / 60);
+
+ fprintf(out, "\n");
+ } else {
+ fprintf(out, "[%d:daemon] base: %s\n", getpid(), daemon->base);
+ if (cmd->list.verbose) {
+ fprintf(out, " output: %s/%s\n",
+ daemon->base, SESSION_OUTPUT);
+ fprintf(out, " lock: %s/lock\n",
+ daemon->base);
+ fprintf(out, " up: %lu minutes\n",
+ (curr - daemon->start) / 60);
+ }
+ }
+
+ list_for_each_entry(session, &daemon->sessions, list) {
+ if (csv_sep) {
+ fprintf(out, "%d%c%s%c%s",
+ /* pid */
+ session->pid,
+ /* name */
+ csv_sep, session->name,
+ /* base */
+ csv_sep, session->run);
+
+ fprintf(out, "%c%s%c%s/%s",
+ /* session dir */
+ csv_sep, session->base,
+ /* session output */
+ csv_sep, session->base, SESSION_OUTPUT);
+
+ fprintf(out, "%c%s/%s%c%s/%s",
+ /* session control */
+ csv_sep, session->base, SESSION_CONTROL,
+ /* session ack */
+ csv_sep, session->base, SESSION_ACK);
+
+ fprintf(out, "%c%lu",
+ /* session up time */
+ csv_sep, (curr - session->start) / 60);
+
+ fprintf(out, "\n");
+ } else {
+ fprintf(out, "[%d:%s] perf record %s\n",
+ session->pid, session->name, session->run);
+ if (!cmd->list.verbose)
+ continue;
+ fprintf(out, " base: %s\n",
+ session->base);
+ fprintf(out, " output: %s/%s\n",
+ session->base, SESSION_OUTPUT);
+ fprintf(out, " control: %s/%s\n",
+ session->base, SESSION_CONTROL);
+ fprintf(out, " ack: %s/%s\n",
+ session->base, SESSION_ACK);
+ fprintf(out, " up: %lu minutes\n",
+ (curr - session->start) / 60);
+ }
+ }
+
+ return 0;
+}
+
+static int daemon_session__signal(struct daemon_session *session, int sig)
+{
+ if (session->pid < 0)
+ return -1;
+ return kill(session->pid, sig);
+}
+
+static int cmd_session_kill(struct daemon *daemon, union cmd *cmd, FILE *out)
+{
+ struct daemon_session *session;
+ bool all = false;
+
+ all = !strcmp(cmd->signal.name, "all");
+
+ list_for_each_entry(session, &daemon->sessions, list) {
+ if (all || !strcmp(cmd->signal.name, session->name)) {
+ daemon_session__signal(session, cmd->signal.sig);
+ fprintf(out, "signal %d sent to session '%s [%d]'\n",
+ cmd->signal.sig, session->name, session->pid);
+ }
+ }
+
+ return 0;
+}
+
+static const char *ping_str[PING_MAX] = {
+ [PING_OK] = "OK",
+ [PING_FAIL] = "FAIL",
+};
+
+static int cmd_session_ping(struct daemon *daemon, union cmd *cmd, FILE *out)
+{
+ struct daemon_session *session;
+ bool all = false, found = false;
+
+ all = !strcmp(cmd->ping.name, "all");
+
+ list_for_each_entry(session, &daemon->sessions, list) {
+ if (all || !strcmp(cmd->ping.name, session->name)) {
+ int state = daemon_session__ping(session);
+
+ fprintf(out, "%-4s %s\n", ping_str[state], session->name);
+ found = true;
+ }
+ }
+
+ if (!found && !all) {
+ fprintf(out, "%-4s %s (not found)\n",
+ ping_str[PING_FAIL], cmd->ping.name);
+ }
+ return 0;
+}
+
+static int handle_server_socket(struct daemon *daemon, int sock_fd)
+{
+ int ret = -1, fd;
+ FILE *out = NULL;
+ union cmd cmd;
+
+ fd = accept(sock_fd, NULL, NULL);
+ if (fd < 0) {
+ perror("failed: accept");
+ return -1;
+ }
+
+ if (sizeof(cmd) != readn(fd, &cmd, sizeof(cmd))) {
+ perror("failed: read");
+ goto out;
+ }
+
+ out = fdopen(fd, "w");
+ if (!out) {
+ perror("failed: fdopen");
+ goto out;
+ }
+
+ switch (cmd.cmd) {
+ case CMD_LIST:
+ ret = cmd_session_list(daemon, &cmd, out);
+ break;
+ case CMD_SIGNAL:
+ ret = cmd_session_kill(daemon, &cmd, out);
+ break;
+ case CMD_STOP:
+ done = 1;
+ ret = 0;
+ pr_debug("perf daemon is exciting\n");
+ break;
+ case CMD_PING:
+ ret = cmd_session_ping(daemon, &cmd, out);
+ break;
+ default:
+ break;
+ }
+
+ fclose(out);
+out:
+ /* If out is defined, then fd is closed via fclose. */
+ if (!out)
+ close(fd);
+ return ret;
+}
+
+static int setup_client_socket(struct daemon *daemon)
+{
+ struct sockaddr_un addr;
+ char path[PATH_MAX];
+ int fd = socket(AF_UNIX, SOCK_STREAM, 0);
+
+ if (fd == -1) {
+ perror("failed: socket");
+ return -1;
+ }
+
+ scnprintf(path, sizeof(path), "%s/control", daemon->base);
+
+ if (strlen(path) + 1 >= sizeof(addr.sun_path)) {
+ pr_err("failed: control path too long '%s'\n", path);
+ close(fd);
+ return -1;
+ }
+
+ memset(&addr, 0, sizeof(addr));
+ addr.sun_family = AF_UNIX;
+ strlcpy(addr.sun_path, path, sizeof(addr.sun_path) - 1);
+
+ if (connect(fd, (struct sockaddr *) &addr, sizeof(addr)) == -1) {
+ perror("failed: connect");
+ close(fd);
+ return -1;
+ }
+
+ return fd;
+}
+
+static void daemon_session__kill(struct daemon_session *session,
+ struct daemon *daemon)
+{
+ int how = 0;
+
+ do {
+ switch (how) {
+ case 0:
+ daemon_session__control(session, "stop", false);
+ break;
+ case 1:
+ daemon_session__signal(session, SIGTERM);
+ break;
+ case 2:
+ daemon_session__signal(session, SIGKILL);
+ break;
+ default:
+ break;
+ }
+ how++;
+
+ } while (daemon_session__wait(session, daemon, 10));
+}
+
+static void daemon__signal(struct daemon *daemon, int sig)
+{
+ struct daemon_session *session;
+
+ list_for_each_entry(session, &daemon->sessions, list)
+ daemon_session__signal(session, sig);
+}
+
+static void daemon_session__delete(struct daemon_session *session)
+{
+ free(session->base);
+ free(session->name);
+ free(session->run);
+ free(session);
+}
+
+static void daemon_session__remove(struct daemon_session *session)
+{
+ list_del(&session->list);
+ daemon_session__delete(session);
+}
+
+static void daemon__stop(struct daemon *daemon)
+{
+ struct daemon_session *session;
+
+ list_for_each_entry(session, &daemon->sessions, list)
+ daemon_session__control(session, "stop", false);
+}
+
+static void daemon__kill(struct daemon *daemon)
+{
+ int how = 0;
+
+ do {
+ switch (how) {
+ case 0:
+ daemon__stop(daemon);
+ break;
+ case 1:
+ daemon__signal(daemon, SIGTERM);
+ break;
+ case 2:
+ daemon__signal(daemon, SIGKILL);
+ break;
+ default:
+ break;
+ }
+ how++;
+
+ } while (daemon__wait(daemon, 10));
+}
+
+static void daemon__exit(struct daemon *daemon)
+{
+ struct daemon_session *session, *h;
+
+ list_for_each_entry_safe(session, h, &daemon->sessions, list)
+ daemon_session__remove(session);
+
+ free(daemon->config_real);
+ free(daemon->config_base);
+ free(daemon->base);
+}
+
+static int daemon__reconfig(struct daemon *daemon)
+{
+ struct daemon_session *session, *n;
+
+ list_for_each_entry_safe(session, n, &daemon->sessions, list) {
+ /* No change. */
+ if (session->state == OK)
+ continue;
+
+ /* Remove session. */
+ if (session->state == KILL) {
+ if (session->pid > 0) {
+ daemon_session__kill(session, daemon);
+ pr_info("reconfig: session '%s' killed\n", session->name);
+ }
+ daemon_session__remove(session);
+ continue;
+ }
+
+ /* Reconfig session. */
+ if (session->pid > 0) {
+ daemon_session__kill(session, daemon);
+ pr_info("reconfig: session '%s' killed\n", session->name);
+ }
+ if (daemon_session__run(session, daemon))
+ return -1;
+
+ session->state = OK;
+ }
+
+ return 0;
+}
+
+static int setup_config_changes(struct daemon *daemon)
+{
+ char *basen = strdup(daemon->config_real);
+ char *dirn = strdup(daemon->config_real);
+ char *base, *dir;
+ int fd, wd = -1;
+
+ if (!dirn || !basen)
+ goto out;
+
+ fd = inotify_init1(IN_NONBLOCK|O_CLOEXEC);
+ if (fd < 0) {
+ perror("failed: inotify_init");
+ goto out;
+ }
+
+ dir = dirname(dirn);
+ base = basename(basen);
+ pr_debug("config file: %s, dir: %s\n", base, dir);
+
+ wd = inotify_add_watch(fd, dir, IN_CLOSE_WRITE);
+ if (wd >= 0) {
+ daemon->config_base = strdup(base);
+ if (!daemon->config_base) {
+ close(fd);
+ wd = -1;
+ }
+ } else {
+ perror("failed: inotify_add_watch");
+ }
+
+out:
+ free(basen);
+ free(dirn);
+ return wd < 0 ? -1 : fd;
+}
+
+static bool process_inotify_event(struct daemon *daemon, char *buf, ssize_t len)
+{
+ char *p = buf;
+
+ while (p < (buf + len)) {
+ struct inotify_event *event = (struct inotify_event *) p;
+
+ /*
+ * We monitor config directory, check if our
+ * config file was changes.
+ */
+ if ((event->mask & IN_CLOSE_WRITE) &&
+ !(event->mask & IN_ISDIR)) {
+ if (!strcmp(event->name, daemon->config_base))
+ return true;
+ }
+ p += sizeof(*event) + event->len;
+ }
+ return false;
+}
+
+static int handle_config_changes(struct daemon *daemon, int conf_fd,
+ bool *config_changed)
+{
+ char buf[4096];
+ ssize_t len;
+
+ while (!(*config_changed)) {
+ len = read(conf_fd, buf, sizeof(buf));
+ if (len == -1) {
+ if (errno != EAGAIN) {
+ perror("failed: read");
+ return -1;
+ }
+ return 0;
+ }
+ *config_changed = process_inotify_event(daemon, buf, len);
+ }
+ return 0;
+}
+
+static int setup_config(struct daemon *daemon)
+{
+ if (daemon->base_user) {
+ daemon->base = strdup(daemon->base_user);
+ if (!daemon->base)
+ return -ENOMEM;
+ }
+
+ if (daemon->config) {
+ char *real = realpath(daemon->config, NULL);
+
+ if (!real) {
+ perror("failed: realpath");
+ return -1;
+ }
+ daemon->config_real = real;
+ return 0;
+ }
+
+ if (perf_config_system() && !access(perf_etc_perfconfig(), R_OK))
+ daemon->config_real = strdup(perf_etc_perfconfig());
+ else if (perf_config_global() && perf_home_perfconfig())
+ daemon->config_real = strdup(perf_home_perfconfig());
+
+ return daemon->config_real ? 0 : -1;
+}
+
+#ifndef F_TLOCK
+#define F_TLOCK 2
+
+#include <sys/file.h>
+
+static int lockf(int fd, int cmd, off_t len)
+{
+ if (cmd != F_TLOCK || len != 0)
+ return -1;
+
+ return flock(fd, LOCK_EX | LOCK_NB);
+}
+#endif // F_TLOCK
+
+/*
+ * Each daemon tries to create and lock BASE/lock file,
+ * if it's successful we are sure we're the only daemon
+ * running over the BASE.
+ *
+ * Once daemon is finished, file descriptor to lock file
+ * is closed and lock is released.
+ */
+static int check_lock(struct daemon *daemon)
+{
+ char path[PATH_MAX];
+ char buf[20];
+ int fd, pid;
+ ssize_t len;
+
+ scnprintf(path, sizeof(path), "%s/lock", daemon->base);
+
+ fd = open(path, O_RDWR|O_CREAT|O_CLOEXEC, 0640);
+ if (fd < 0)
+ return -1;
+
+ if (lockf(fd, F_TLOCK, 0) < 0) {
+ filename__read_int(path, &pid);
+ fprintf(stderr, "failed: another perf daemon (pid %d) owns %s\n",
+ pid, daemon->base);
+ close(fd);
+ return -1;
+ }
+
+ scnprintf(buf, sizeof(buf), "%d", getpid());
+ len = strlen(buf);
+
+ if (write(fd, buf, len) != len) {
+ perror("failed: write");
+ close(fd);
+ return -1;
+ }
+
+ if (ftruncate(fd, len)) {
+ perror("failed: ftruncate");
+ close(fd);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int go_background(struct daemon *daemon)
+{
+ int pid, fd;
+
+ pid = fork();
+ if (pid < 0)
+ return -1;
+
+ if (pid > 0)
+ return 1;
+
+ if (setsid() < 0)
+ return -1;
+
+ if (check_lock(daemon))
+ return -1;
+
+ umask(0);
+
+ if (chdir(daemon->base)) {
+ perror("failed: chdir");
+ return -1;
+ }
+
+ fd = open("output", O_RDWR|O_CREAT|O_TRUNC, 0644);
+ if (fd < 0) {
+ perror("failed: open");
+ return -1;
+ }
+
+ if (fcntl(fd, F_SETFD, FD_CLOEXEC)) {
+ perror("failed: fcntl FD_CLOEXEC");
+ close(fd);
+ return -1;
+ }
+
+ close(0);
+ dup2(fd, 1);
+ dup2(fd, 2);
+ close(fd);
+
+ daemon->out = fdopen(1, "w");
+ if (!daemon->out) {
+ close(1);
+ close(2);
+ return -1;
+ }
+
+ setbuf(daemon->out, NULL);
+ return 0;
+}
+
+static int setup_signalfd(struct daemon *daemon)
+{
+ sigset_t mask;
+
+ sigemptyset(&mask);
+ sigaddset(&mask, SIGCHLD);
+
+ if (sigprocmask(SIG_BLOCK, &mask, NULL) == -1)
+ return -1;
+
+ daemon->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
+ return daemon->signal_fd;
+}
+
+static int __cmd_start(struct daemon *daemon, struct option parent_options[],
+ int argc, const char **argv)
+{
+ bool foreground = false;
+ struct option start_options[] = {
+ OPT_BOOLEAN('f', "foreground", &foreground, "stay on console"),
+ OPT_PARENT(parent_options),
+ OPT_END()
+ };
+ int sock_fd = -1, conf_fd = -1, signal_fd = -1;
+ int sock_pos, file_pos, signal_pos;
+ struct fdarray fda;
+ int err = 0;
+
+ argc = parse_options(argc, argv, start_options, daemon_usage, 0);
+ if (argc)
+ usage_with_options(daemon_usage, start_options);
+
+ daemon->start = time(NULL);
+
+ if (setup_config(daemon)) {
+ pr_err("failed: config not found\n");
+ return -1;
+ }
+
+ if (setup_server_config(daemon))
+ return -1;
+
+ if (foreground && check_lock(daemon))
+ return -1;
+
+ if (!foreground) {
+ err = go_background(daemon);
+ if (err) {
+ /* original process, exit normally */
+ if (err == 1)
+ err = 0;
+ daemon__exit(daemon);
+ return err;
+ }
+ }
+
+ debug_set_file(daemon->out);
+ debug_set_display_time(true);
+
+ pr_info("daemon started (pid %d)\n", getpid());
+
+ fdarray__init(&fda, 3);
+
+ sock_fd = setup_server_socket(daemon);
+ if (sock_fd < 0)
+ goto out;
+
+ conf_fd = setup_config_changes(daemon);
+ if (conf_fd < 0)
+ goto out;
+
+ signal_fd = setup_signalfd(daemon);
+ if (signal_fd < 0)
+ goto out;
+
+ sock_pos = fdarray__add(&fda, sock_fd, POLLIN|POLLERR|POLLHUP, 0);
+ if (sock_pos < 0)
+ goto out;
+
+ file_pos = fdarray__add(&fda, conf_fd, POLLIN|POLLERR|POLLHUP, 0);
+ if (file_pos < 0)
+ goto out;
+
+ signal_pos = fdarray__add(&fda, signal_fd, POLLIN|POLLERR|POLLHUP, 0);
+ if (signal_pos < 0)
+ goto out;
+
+ signal(SIGINT, sig_handler);
+ signal(SIGTERM, sig_handler);
+ signal(SIGPIPE, SIG_IGN);
+
+ while (!done && !err) {
+ err = daemon__reconfig(daemon);
+
+ if (!err && fdarray__poll(&fda, -1)) {
+ bool reconfig = false;
+
+ if (fda.entries[sock_pos].revents & POLLIN)
+ err = handle_server_socket(daemon, sock_fd);
+ if (fda.entries[file_pos].revents & POLLIN)
+ err = handle_config_changes(daemon, conf_fd, &reconfig);
+ if (fda.entries[signal_pos].revents & POLLIN)
+ err = handle_signalfd(daemon) < 0;
+
+ if (reconfig)
+ err = setup_server_config(daemon);
+ }
+ }
+
+out:
+ fdarray__exit(&fda);
+
+ daemon__kill(daemon);
+ daemon__exit(daemon);
+
+ if (sock_fd != -1)
+ close(sock_fd);
+ if (conf_fd != -1)
+ close(conf_fd);
+ if (conf_fd != -1)
+ close(signal_fd);
+
+ pr_info("daemon exited\n");
+ fclose(daemon->out);
+ return err;
+}
+
+static int send_cmd(struct daemon *daemon, union cmd *cmd)
+{
+ int ret = -1, fd;
+ char *line = NULL;
+ size_t len = 0;
+ ssize_t nread;
+ FILE *in = NULL;
+
+ if (setup_client_config(daemon))
+ return -1;
+
+ fd = setup_client_socket(daemon);
+ if (fd < 0)
+ return -1;
+
+ if (sizeof(*cmd) != writen(fd, cmd, sizeof(*cmd))) {
+ perror("failed: write");
+ goto out;
+ }
+
+ in = fdopen(fd, "r");
+ if (!in) {
+ perror("failed: fdopen");
+ goto out;
+ }
+
+ while ((nread = getline(&line, &len, in)) != -1) {
+ if (fwrite(line, nread, 1, stdout) != 1)
+ goto out_fclose;
+ fflush(stdout);
+ }
+
+ ret = 0;
+out_fclose:
+ fclose(in);
+ free(line);
+out:
+ /* If in is defined, then fd is closed via fclose. */
+ if (!in)
+ close(fd);
+ return ret;
+}
+
+static int send_cmd_list(struct daemon *daemon)
+{
+ union cmd cmd = { .cmd = CMD_LIST, };
+
+ cmd.list.verbose = verbose;
+ cmd.list.csv_sep = daemon->csv_sep ? *daemon->csv_sep : 0;
+
+ return send_cmd(daemon, &cmd);
+}
+
+static int __cmd_signal(struct daemon *daemon, struct option parent_options[],
+ int argc, const char **argv)
+{
+ const char *name = "all";
+ struct option start_options[] = {
+ OPT_STRING(0, "session", &name, "session",
+ "Sent signal to specific session"),
+ OPT_PARENT(parent_options),
+ OPT_END()
+ };
+ union cmd cmd;
+
+ argc = parse_options(argc, argv, start_options, daemon_usage, 0);
+ if (argc)
+ usage_with_options(daemon_usage, start_options);
+
+ if (setup_config(daemon)) {
+ pr_err("failed: config not found\n");
+ return -1;
+ }
+
+ cmd.signal.cmd = CMD_SIGNAL,
+ cmd.signal.sig = SIGUSR2;
+ strncpy(cmd.signal.name, name, sizeof(cmd.signal.name) - 1);
+
+ return send_cmd(daemon, &cmd);
+}
+
+static int __cmd_stop(struct daemon *daemon, struct option parent_options[],
+ int argc, const char **argv)
+{
+ struct option start_options[] = {
+ OPT_PARENT(parent_options),
+ OPT_END()
+ };
+ union cmd cmd = { .cmd = CMD_STOP, };
+
+ argc = parse_options(argc, argv, start_options, daemon_usage, 0);
+ if (argc)
+ usage_with_options(daemon_usage, start_options);
+
+ if (setup_config(daemon)) {
+ pr_err("failed: config not found\n");
+ return -1;
+ }
+
+ return send_cmd(daemon, &cmd);
+}
+
+static int __cmd_ping(struct daemon *daemon, struct option parent_options[],
+ int argc, const char **argv)
+{
+ const char *name = "all";
+ struct option ping_options[] = {
+ OPT_STRING(0, "session", &name, "session",
+ "Ping to specific session"),
+ OPT_PARENT(parent_options),
+ OPT_END()
+ };
+ union cmd cmd = { .cmd = CMD_PING, };
+
+ argc = parse_options(argc, argv, ping_options, daemon_usage, 0);
+ if (argc)
+ usage_with_options(daemon_usage, ping_options);
+
+ if (setup_config(daemon)) {
+ pr_err("failed: config not found\n");
+ return -1;
+ }
+
+ scnprintf(cmd.ping.name, sizeof(cmd.ping.name), "%s", name);
+ return send_cmd(daemon, &cmd);
+}
+
+int cmd_daemon(int argc, const char **argv)
+{
+ struct option daemon_options[] = {
+ OPT_INCR('v', "verbose", &verbose, "be more verbose"),
+ OPT_STRING(0, "config", &__daemon.config,
+ "config file", "config file path"),
+ OPT_STRING(0, "base", &__daemon.base_user,
+ "directory", "base directory"),
+ OPT_STRING_OPTARG('x', "field-separator", &__daemon.csv_sep,
+ "field separator", "print counts with custom separator", ","),
+ OPT_END()
+ };
+
+ perf_exe(__daemon.perf, sizeof(__daemon.perf));
+ __daemon.out = stdout;
+
+ argc = parse_options(argc, argv, daemon_options, daemon_usage,
+ PARSE_OPT_STOP_AT_NON_OPTION);
+
+ if (argc) {
+ if (!strcmp(argv[0], "start"))
+ return __cmd_start(&__daemon, daemon_options, argc, argv);
+ if (!strcmp(argv[0], "signal"))
+ return __cmd_signal(&__daemon, daemon_options, argc, argv);
+ else if (!strcmp(argv[0], "stop"))
+ return __cmd_stop(&__daemon, daemon_options, argc, argv);
+ else if (!strcmp(argv[0], "ping"))
+ return __cmd_ping(&__daemon, daemon_options, argc, argv);
+
+ pr_err("failed: unknown command '%s'\n", argv[0]);
+ return -1;
+ }
+
+ if (setup_config(&__daemon)) {
+ pr_err("failed: config not found\n");
+ return -1;
+ }
+
+ return send_cmd_list(&__daemon);
+}
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 8f6c784ce629..878e04b1fab7 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -1236,7 +1236,8 @@ static int __cmd_diff(void)
out_delete:
data__for_each_file(i, d) {
- perf_session__delete(d->session);
+ if (!IS_ERR(d->session))
+ perf_session__delete(d->session);
data__free(d);
}
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 43937f4b399a..6fe44d97fde5 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -313,7 +313,7 @@ static int perf_event__jit_repipe_mmap(struct perf_tool *tool,
* if jit marker, then inject jit mmaps and generate ELF images
*/
ret = jit_process(inject->session, &inject->output, machine,
- event->mmap.filename, event->mmap.pid, &n);
+ event->mmap.filename, event->mmap.pid, event->mmap.tid, &n);
if (ret < 0)
return ret;
if (ret) {
@@ -413,7 +413,7 @@ static int perf_event__jit_repipe_mmap2(struct perf_tool *tool,
* if jit marker, then inject jit mmaps and generate ELF images
*/
ret = jit_process(inject->session, &inject->output, machine,
- event->mmap2.filename, event->mmap2.pid, &n);
+ event->mmap2.filename, event->mmap2.pid, event->mmap2.tid, &n);
if (ret < 0)
return ret;
if (ret) {
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index 823742036ddb..cdd2b9f643f6 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -30,6 +30,7 @@ struct perf_mem {
bool dump_raw;
bool force;
bool phys_addr;
+ bool data_page_size;
int operation;
const char *cpu_list;
DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
@@ -124,6 +125,9 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
if (mem->phys_addr)
rec_argv[i++] = "--phys-data";
+ if (mem->data_page_size)
+ rec_argv[i++] = "--data-page-size";
+
for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
e = perf_mem_events__ptr(j);
if (!e->record)
@@ -172,7 +176,8 @@ dump_raw_samples(struct perf_tool *tool,
{
struct perf_mem *mem = container_of(tool, struct perf_mem, tool);
struct addr_location al;
- const char *fmt;
+ const char *fmt, *field_sep;
+ char str[PAGE_SIZE_NAME_LEN];
if (machine__resolve(machine, &al, sample) < 0) {
fprintf(stderr, "problem processing %d event, skipping it.\n",
@@ -186,60 +191,47 @@ dump_raw_samples(struct perf_tool *tool,
if (al.map != NULL)
al.map->dso->hit = 1;
- if (mem->phys_addr) {
- if (symbol_conf.field_sep) {
- fmt = "%d%s%d%s0x%"PRIx64"%s0x%"PRIx64"%s0x%016"PRIx64
- "%s%"PRIu64"%s0x%"PRIx64"%s%s:%s\n";
- } else {
- fmt = "%5d%s%5d%s0x%016"PRIx64"%s0x016%"PRIx64
- "%s0x%016"PRIx64"%s%5"PRIu64"%s0x%06"PRIx64
- "%s%s:%s\n";
- symbol_conf.field_sep = " ";
- }
+ field_sep = symbol_conf.field_sep;
+ if (field_sep) {
+ fmt = "%d%s%d%s0x%"PRIx64"%s0x%"PRIx64"%s";
+ } else {
+ fmt = "%5d%s%5d%s0x%016"PRIx64"%s0x016%"PRIx64"%s";
+ symbol_conf.field_sep = " ";
+ }
+ printf(fmt,
+ sample->pid,
+ symbol_conf.field_sep,
+ sample->tid,
+ symbol_conf.field_sep,
+ sample->ip,
+ symbol_conf.field_sep,
+ sample->addr,
+ symbol_conf.field_sep);
- printf(fmt,
- sample->pid,
- symbol_conf.field_sep,
- sample->tid,
- symbol_conf.field_sep,
- sample->ip,
- symbol_conf.field_sep,
- sample->addr,
- symbol_conf.field_sep,
+ if (mem->phys_addr) {
+ printf("0x%016"PRIx64"%s",
sample->phys_addr,
- symbol_conf.field_sep,
- sample->weight,
- symbol_conf.field_sep,
- sample->data_src,
- symbol_conf.field_sep,
- al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???",
- al.sym ? al.sym->name : "???");
- } else {
- if (symbol_conf.field_sep) {
- fmt = "%d%s%d%s0x%"PRIx64"%s0x%"PRIx64"%s%"PRIu64
- "%s0x%"PRIx64"%s%s:%s\n";
- } else {
- fmt = "%5d%s%5d%s0x%016"PRIx64"%s0x016%"PRIx64
- "%s%5"PRIu64"%s0x%06"PRIx64"%s%s:%s\n";
- symbol_conf.field_sep = " ";
- }
+ symbol_conf.field_sep);
+ }
- printf(fmt,
- sample->pid,
- symbol_conf.field_sep,
- sample->tid,
- symbol_conf.field_sep,
- sample->ip,
- symbol_conf.field_sep,
- sample->addr,
- symbol_conf.field_sep,
- sample->weight,
- symbol_conf.field_sep,
- sample->data_src,
- symbol_conf.field_sep,
- al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???",
- al.sym ? al.sym->name : "???");
+ if (mem->data_page_size) {
+ printf("%s%s",
+ get_page_size_name(sample->data_page_size, str),
+ symbol_conf.field_sep);
}
+
+ if (field_sep)
+ fmt = "%"PRIu64"%s0x%"PRIx64"%s%s:%s\n";
+ else
+ fmt = "%5"PRIu64"%s0x%06"PRIx64"%s%s:%s\n";
+
+ printf(fmt,
+ sample->weight,
+ symbol_conf.field_sep,
+ sample->data_src,
+ symbol_conf.field_sep,
+ al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???",
+ al.sym ? al.sym->name : "???");
out_put:
addr_location__put(&al);
return 0;
@@ -287,10 +279,15 @@ static int report_raw_events(struct perf_mem *mem)
if (ret < 0)
goto out_delete;
+ printf("# PID, TID, IP, ADDR, ");
+
if (mem->phys_addr)
- printf("# PID, TID, IP, ADDR, PHYS ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n");
- else
- printf("# PID, TID, IP, ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n");
+ printf("PHYS ADDR, ");
+
+ if (mem->data_page_size)
+ printf("DATA PAGE SIZE, ");
+
+ printf("LOCAL WEIGHT, DSRC, SYMBOL\n");
ret = perf_session__process_events(session);
@@ -300,7 +297,7 @@ out_delete:
}
static char *get_sort_order(struct perf_mem *mem)
{
- bool has_extra_options = mem->phys_addr ? true : false;
+ bool has_extra_options = (mem->phys_addr | mem->data_page_size) ? true : false;
char sort[128];
/*
@@ -312,13 +309,16 @@ static char *get_sort_order(struct perf_mem *mem)
"dso_daddr,tlb,locked");
} else if (has_extra_options) {
strcpy(sort, "--sort=local_weight,mem,sym,dso,symbol_daddr,"
- "dso_daddr,snoop,tlb,locked");
+ "dso_daddr,snoop,tlb,locked,blocked");
} else
return NULL;
if (mem->phys_addr)
strcat(sort, ",phys_daddr");
+ if (mem->data_page_size)
+ strcat(sort, ",data_page_size");
+
return strdup(sort);
}
@@ -464,6 +464,7 @@ int cmd_mem(int argc, const char **argv)
" between columns '.' is reserved."),
OPT_BOOLEAN('f', "force", &mem.force, "don't complain, do it"),
OPT_BOOLEAN('p', "phys-data", &mem.phys_addr, "Record/Report sample physical addresses"),
+ OPT_BOOLEAN(0, "data-page-size", &mem.data_page_size, "Record/Report sample data address page size"),
OPT_END()
};
const char *const mem_subcommands[] = { "record", "report", NULL };
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index fd3911650612..35465d1db6dd 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -102,6 +102,7 @@ struct record {
bool no_buildid_cache;
bool no_buildid_cache_set;
bool buildid_all;
+ bool buildid_mmap;
bool timestamp_filename;
bool timestamp_boundary;
struct switch_output switch_output;
@@ -730,6 +731,8 @@ static int record__auxtrace_init(struct record *rec)
if (err)
return err;
+ auxtrace_regroup_aux_output(rec->evlist);
+
return auxtrace_parse_filters(rec->evlist);
}
@@ -1663,7 +1666,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
status = -1;
goto out_delete_session;
}
- err = evlist__add_pollfd(rec->evlist, done_fd);
+ err = evlist__add_wakeup_eventfd(rec->evlist, done_fd);
if (err < 0) {
pr_err("Failed to add wakeup eventfd to poll list\n");
status = err;
@@ -1937,18 +1940,19 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) {
switch (cmd) {
- case EVLIST_CTL_CMD_ENABLE:
- pr_info(EVLIST_ENABLED_MSG);
- break;
- case EVLIST_CTL_CMD_DISABLE:
- pr_info(EVLIST_DISABLED_MSG);
- break;
case EVLIST_CTL_CMD_SNAPSHOT:
hit_auxtrace_snapshot_trigger(rec);
evlist__ctlfd_ack(rec->evlist);
break;
+ case EVLIST_CTL_CMD_STOP:
+ done = 1;
+ break;
case EVLIST_CTL_CMD_ACK:
case EVLIST_CTL_CMD_UNSUPPORTED:
+ case EVLIST_CTL_CMD_ENABLE:
+ case EVLIST_CTL_CMD_DISABLE:
+ case EVLIST_CTL_CMD_EVLIST:
+ case EVLIST_CTL_CMD_PING:
default:
break;
}
@@ -2135,6 +2139,8 @@ static int perf_record_config(const char *var, const char *value, void *cb)
rec->no_buildid_cache = true;
else if (!strcmp(value, "skip"))
rec->no_buildid = true;
+ else if (!strcmp(value, "mmap"))
+ rec->buildid_mmap = true;
else
return -1;
return 0;
@@ -2474,6 +2480,8 @@ static struct option __record_options[] = {
"Record the sample physical addresses"),
OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size,
"Record the sampled data address data page size"),
+ OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size,
+ "Record the sampled code address (ip) page size"),
OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
&record.opts.sample_time_set,
@@ -2552,6 +2560,8 @@ static struct option __record_options[] = {
"file", "vmlinux pathname"),
OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
"Record build-id of all DSOs regardless of hits"),
+ OPT_BOOLEAN(0, "buildid-mmap", &record.buildid_mmap,
+ "Record build-id in map events"),
OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
"append timestamp to output filename"),
OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
@@ -2655,6 +2665,21 @@ int cmd_record(int argc, const char **argv)
}
+ if (rec->buildid_mmap) {
+ if (!perf_can_record_build_id()) {
+ pr_err("Failed: no support to record build id in mmap events, update your kernel.\n");
+ err = -EINVAL;
+ goto out_opts;
+ }
+ pr_debug("Enabling build id in mmap2 events.\n");
+ /* Enable mmap build id synthesizing. */
+ symbol_conf.buildid_mmap2 = true;
+ /* Enable perf_event_attr::build_id bit. */
+ rec->opts.build_id = true;
+ /* Disable build id cache. */
+ rec->no_buildid = true;
+ }
+
if (rec->opts.kcore)
rec->data.is_dir = true;
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 42dad4a0f8cf..5915f19cee55 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -117,6 +117,7 @@ enum perf_output_field {
PERF_OUTPUT_IPC = 1ULL << 31,
PERF_OUTPUT_TOD = 1ULL << 32,
PERF_OUTPUT_DATA_PAGE_SIZE = 1ULL << 33,
+ PERF_OUTPUT_CODE_PAGE_SIZE = 1ULL << 34,
};
struct perf_script {
@@ -182,6 +183,7 @@ struct output_option {
{.str = "ipc", .field = PERF_OUTPUT_IPC},
{.str = "tod", .field = PERF_OUTPUT_TOD},
{.str = "data_page_size", .field = PERF_OUTPUT_DATA_PAGE_SIZE},
+ {.str = "code_page_size", .field = PERF_OUTPUT_CODE_PAGE_SIZE},
};
enum {
@@ -256,7 +258,7 @@ static struct {
PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD |
PERF_OUTPUT_ADDR | PERF_OUTPUT_DATA_SRC |
PERF_OUTPUT_WEIGHT | PERF_OUTPUT_PHYS_ADDR |
- PERF_OUTPUT_DATA_PAGE_SIZE,
+ PERF_OUTPUT_DATA_PAGE_SIZE | PERF_OUTPUT_CODE_PAGE_SIZE,
.invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
},
@@ -523,6 +525,10 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session)
evsel__check_stype(evsel, PERF_SAMPLE_DATA_PAGE_SIZE, "DATA_PAGE_SIZE", PERF_OUTPUT_DATA_PAGE_SIZE))
return -EINVAL;
+ if (PRINT_FIELD(CODE_PAGE_SIZE) &&
+ evsel__check_stype(evsel, PERF_SAMPLE_CODE_PAGE_SIZE, "CODE_PAGE_SIZE", PERF_OUTPUT_CODE_PAGE_SIZE))
+ return -EINVAL;
+
return 0;
}
@@ -1531,6 +1537,8 @@ static struct {
{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT, "tx abrt"},
{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TRACE_BEGIN, "tr strt"},
{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TRACE_END, "tr end"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_VMENTRY, "vmentry"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_VMEXIT, "vmexit"},
{0, NULL}
};
@@ -1760,6 +1768,18 @@ static int perf_sample__fprintf_synth_cbr(struct perf_sample *sample, FILE *fp)
return len + perf_sample__fprintf_pt_spacing(len, fp);
}
+static int perf_sample__fprintf_synth_psb(struct perf_sample *sample, FILE *fp)
+{
+ struct perf_synth_intel_psb *data = perf_sample__synth_ptr(sample);
+ int len;
+
+ if (perf_sample__bad_synth_size(sample, *data))
+ return 0;
+
+ len = fprintf(fp, " psb offs: %#" PRIx64, data->offset);
+ return len + perf_sample__fprintf_pt_spacing(len, fp);
+}
+
static int perf_sample__fprintf_synth(struct perf_sample *sample,
struct evsel *evsel, FILE *fp)
{
@@ -1776,6 +1796,8 @@ static int perf_sample__fprintf_synth(struct perf_sample *sample,
return perf_sample__fprintf_synth_pwrx(sample, fp);
case PERF_SYNTH_INTEL_CBR:
return perf_sample__fprintf_synth_cbr(sample, fp);
+ case PERF_SYNTH_INTEL_PSB:
+ return perf_sample__fprintf_synth_psb(sample, fp);
default:
break;
}
@@ -2036,6 +2058,9 @@ static void process_event(struct perf_script *script,
if (PRINT_FIELD(DATA_PAGE_SIZE))
fprintf(fp, " %s", get_page_size_name(sample->data_page_size, str));
+ if (PRINT_FIELD(CODE_PAGE_SIZE))
+ fprintf(fp, " %s", get_page_size_name(sample->code_page_size, str));
+
perf_sample__fprintf_ipc(sample, attr, fp);
fprintf(fp, "\n");
@@ -2786,7 +2811,7 @@ parse:
break;
}
if (i == imax && strcmp(tok, "flags") == 0) {
- print_flags = change == REMOVE ? false : true;
+ print_flags = change != REMOVE;
continue;
}
if (i == imax) {
@@ -3234,7 +3259,7 @@ static char *get_script_path(const char *script_root, const char *suffix)
static bool is_top_script(const char *script_path)
{
- return ends_with(script_path, "top") == NULL ? false : true;
+ return ends_with(script_path, "top") != NULL;
}
static int has_required_arg(char *script_path)
@@ -3535,12 +3560,16 @@ int cmd_script(int argc, const char **argv)
"addr,symoff,srcline,period,iregs,uregs,brstack,"
"brstacksym,flags,bpf-output,brstackinsn,brstackoff,"
"callindent,insn,insnlen,synth,phys_addr,metric,misc,ipc,tod,"
- "data_page_size",
+ "data_page_size,code_page_size",
parse_output_fields),
OPT_BOOLEAN('a', "all-cpus", &system_wide,
"system-wide collection from all CPUs"),
+ OPT_STRING(0, "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
+ "only consider symbols in these DSOs"),
OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
"only consider these symbols"),
+ OPT_INTEGER(0, "addr-range", &symbol_conf.addr_range,
+ "Use with -S to list traced records within address range"),
OPT_CALLBACK_OPTARG(0, "insn-trace", &itrace_synth_opts, NULL, NULL,
"Decode instructions from itrace", parse_insn_trace),
OPT_CALLBACK_OPTARG(0, "xed", NULL, NULL, NULL,
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 8cc24967bc27..2e2e4a8345ea 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -67,6 +67,7 @@
#include "util/top.h"
#include "util/affinity.h"
#include "util/pfm.h"
+#include "util/bpf_counter.h"
#include "asm/bug.h"
#include <linux/time64.h>
@@ -137,6 +138,19 @@ static const char *topdown_metric_attrs[] = {
NULL,
};
+static const char *topdown_metric_L2_attrs[] = {
+ "slots",
+ "topdown-retiring",
+ "topdown-bad-spec",
+ "topdown-fe-bound",
+ "topdown-be-bound",
+ "topdown-heavy-ops",
+ "topdown-br-mispredict",
+ "topdown-fetch-lat",
+ "topdown-mem-bound",
+ NULL,
+};
+
static const char *smi_cost_attrs = {
"{"
"msr/aperf/,"
@@ -409,12 +423,32 @@ static int read_affinity_counters(struct timespec *rs)
return 0;
}
+static int read_bpf_map_counters(void)
+{
+ struct evsel *counter;
+ int err;
+
+ evlist__for_each_entry(evsel_list, counter) {
+ err = bpf_counter__read(counter);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
static void read_counters(struct timespec *rs)
{
struct evsel *counter;
+ int err;
- if (!stat_config.stop_read_counter && (read_affinity_counters(rs) < 0))
- return;
+ if (!stat_config.stop_read_counter) {
+ if (target__has_bpf(&target))
+ err = read_bpf_map_counters();
+ else
+ err = read_affinity_counters(rs);
+ if (err < 0)
+ return;
+ }
evlist__for_each_entry(evsel_list, counter) {
if (counter->err)
@@ -496,11 +530,22 @@ static bool handle_interval(unsigned int interval, int *times)
return false;
}
-static void enable_counters(void)
+static int enable_counters(void)
{
+ struct evsel *evsel;
+ int err;
+
+ if (target__has_bpf(&target)) {
+ evlist__for_each_entry(evsel_list, evsel) {
+ err = bpf_counter__enable(evsel);
+ if (err)
+ return err;
+ }
+ }
+
if (stat_config.initial_delay < 0) {
pr_info(EVLIST_DISABLED_MSG);
- return;
+ return 0;
}
if (stat_config.initial_delay > 0) {
@@ -518,6 +563,7 @@ static void enable_counters(void)
if (stat_config.initial_delay > 0)
pr_info(EVLIST_ENABLED_MSG);
}
+ return 0;
}
static void disable_counters(void)
@@ -578,18 +624,19 @@ static void process_evlist(struct evlist *evlist, unsigned int interval)
if (evlist__ctlfd_process(evlist, &cmd) > 0) {
switch (cmd) {
case EVLIST_CTL_CMD_ENABLE:
- pr_info(EVLIST_ENABLED_MSG);
if (interval)
process_interval();
break;
case EVLIST_CTL_CMD_DISABLE:
if (interval)
process_interval();
- pr_info(EVLIST_DISABLED_MSG);
break;
case EVLIST_CTL_CMD_SNAPSHOT:
case EVLIST_CTL_CMD_ACK:
case EVLIST_CTL_CMD_UNSUPPORTED:
+ case EVLIST_CTL_CMD_EVLIST:
+ case EVLIST_CTL_CMD_STOP:
+ case EVLIST_CTL_CMD_PING:
default:
break;
}
@@ -720,7 +767,7 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
const bool forks = (argc > 0);
bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false;
struct affinity affinity;
- int i, cpu;
+ int i, cpu, err;
bool second_pass = false;
if (forks) {
@@ -737,6 +784,13 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
if (affinity__setup(&affinity) < 0)
return -1;
+ if (target__has_bpf(&target)) {
+ evlist__for_each_entry(evsel_list, counter) {
+ if (bpf_counter__load(counter, &target))
+ return -1;
+ }
+ }
+
evlist__for_each_cpu (evsel_list, i, cpu) {
affinity__set(&affinity, cpu);
@@ -850,7 +904,7 @@ try_again_reset:
}
if (STAT_RECORD) {
- int err, fd = perf_data__fd(&perf_stat.data);
+ int fd = perf_data__fd(&perf_stat.data);
if (is_pipe) {
err = perf_header__write_pipe(perf_data__fd(&perf_stat.data));
@@ -876,7 +930,9 @@ try_again_reset:
if (forks) {
evlist__start_workload(evsel_list);
- enable_counters();
+ err = enable_counters();
+ if (err)
+ return -1;
if (interval || timeout || evlist__ctlfd_initialized(evsel_list))
status = dispatch_events(forks, timeout, interval, &times);
@@ -895,7 +951,9 @@ try_again_reset:
if (WIFSIGNALED(status))
psignal(WTERMSIG(status), argv[0]);
} else {
- enable_counters();
+ err = enable_counters();
+ if (err)
+ return -1;
status = dispatch_events(forks, timeout, interval, &times);
}
@@ -1085,6 +1143,10 @@ static struct option stat_options[] = {
"stat events on existing process id"),
OPT_STRING('t', "tid", &target.tid, "tid",
"stat events on existing thread id"),
+#ifdef HAVE_BPF_SKEL
+ OPT_STRING('b', "bpf-prog", &target.bpf_str, "bpf-prog-id",
+ "stat events on existing bpf program id"),
+#endif
OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
"system-wide collection from all CPUs"),
OPT_BOOLEAN('g', "group", &group,
@@ -1153,7 +1215,9 @@ static struct option stat_options[] = {
OPT_BOOLEAN(0, "metric-no-merge", &stat_config.metric_no_merge,
"don't try to share events between metrics in a group"),
OPT_BOOLEAN(0, "topdown", &topdown_run,
- "measure topdown level 1 statistics"),
+ "measure top-down statistics"),
+ OPT_UINTEGER(0, "td-level", &stat_config.topdown_level,
+ "Set the metrics level for the top-down statistics (0: max level)"),
OPT_BOOLEAN(0, "smi-cost", &smi_cost,
"measure SMI cost"),
OPT_CALLBACK('M', "metrics", &evsel_list, "metric/metric group list",
@@ -1706,17 +1770,30 @@ static int add_default_attributes(void)
}
if (topdown_run) {
+ const char **metric_attrs = topdown_metric_attrs;
+ unsigned int max_level = 1;
char *str = NULL;
bool warn = false;
if (!force_metric_only)
stat_config.metric_only = true;
- if (topdown_filter_events(topdown_metric_attrs, &str, 1) < 0) {
+ if (pmu_have_event("cpu", topdown_metric_L2_attrs[5])) {
+ metric_attrs = topdown_metric_L2_attrs;
+ max_level = 2;
+ }
+
+ if (stat_config.topdown_level > max_level) {
+ pr_err("Invalid top-down metrics level. The max level is %u.\n", max_level);
+ return -1;
+ } else if (!stat_config.topdown_level)
+ stat_config.topdown_level = max_level;
+
+ if (topdown_filter_events(metric_attrs, &str, 1) < 0) {
pr_err("Out of memory\n");
return -1;
}
- if (topdown_metric_attrs[0] && str) {
+ if (metric_attrs[0] && str) {
if (!stat_config.interval && !stat_config.metric_only) {
fprintf(stat_config.output,
"Topdown accuracy may decrease when measuring long periods.\n"
@@ -1779,6 +1856,9 @@ setup_metrics:
}
if (evlist__add_default_attrs(evsel_list, default_attrs1) < 0)
return -1;
+
+ if (arch_evlist__add_default_attrs(evsel_list) < 0)
+ return -1;
}
/* Detailed events get appended to the event list: */
@@ -2064,11 +2144,12 @@ int cmd_stat(int argc, const char **argv)
"perf stat [<options>] [<command>]",
NULL
};
- int status = -EINVAL, run_idx;
+ int status = -EINVAL, run_idx, err;
const char *mode;
FILE *output = stderr;
unsigned int interval, timeout;
const char * const stat_subcommands[] = { "record", "report" };
+ char errbuf[BUFSIZ];
setlocale(LC_ALL, "");
@@ -2179,6 +2260,12 @@ int cmd_stat(int argc, const char **argv)
} else if (big_num_opt == 0) /* User passed --no-big-num */
stat_config.big_num = false;
+ err = target__validate(&target);
+ if (err) {
+ target__strerror(&target, err, errbuf, BUFSIZ);
+ pr_warning("%s\n", errbuf);
+ }
+
setup_system_wide(argc);
/*
@@ -2252,8 +2339,6 @@ int cmd_stat(int argc, const char **argv)
}
}
- target__validate(&target);
-
if ((stat_config.aggr_mode == AGGR_THREAD) && (target.system_wide))
target.per_thread = true;
@@ -2384,9 +2469,10 @@ int cmd_stat(int argc, const char **argv)
* tools remain -acme
*/
int fd = perf_data__fd(&perf_stat.data);
- int err = perf_event__synthesize_kernel_mmap((void *)&perf_stat,
- process_synthesized_event,
- &perf_stat.session->machines.host);
+
+ err = perf_event__synthesize_kernel_mmap((void *)&perf_stat,
+ process_synthesized_event,
+ &perf_stat.session->machines.host);
if (err) {
pr_warning("Couldn't synthesize the kernel mmap record, harmless, "
"older tools may produce warnings about this file\n.");
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 85b6a46e85b6..7ec18ff57fc4 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -3964,9 +3964,6 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
evlist__config(evlist, &trace->opts, &callchain_param);
- signal(SIGCHLD, sig_handler);
- signal(SIGINT, sig_handler);
-
if (forks) {
err = evlist__prepare_workload(evlist, &trace->opts.target, argv, false, NULL);
if (err < 0) {
@@ -4827,6 +4824,8 @@ int cmd_trace(int argc, const char **argv)
signal(SIGSEGV, sighandler_dump_stack);
signal(SIGFPE, sighandler_dump_stack);
+ signal(SIGCHLD, sig_handler);
+ signal(SIGINT, sig_handler);
trace.evlist = evlist__new();
trace.sctbl = syscalltbl__new();
diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h
index 14a2db622a7b..7303e80a639c 100644
--- a/tools/perf/builtin.h
+++ b/tools/perf/builtin.h
@@ -37,6 +37,7 @@ int cmd_inject(int argc, const char **argv);
int cmd_mem(int argc, const char **argv);
int cmd_data(int argc, const char **argv);
int cmd_ftrace(int argc, const char **argv);
+int cmd_daemon(int argc, const char **argv);
int find_scripts(char **scripts_array, char **scripts_path_array, int num,
int pathlen);
diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt
index bc6c585f74fc..825a12e8d694 100644
--- a/tools/perf/command-list.txt
+++ b/tools/perf/command-list.txt
@@ -31,3 +31,4 @@ perf-timechart mainporcelain common
perf-top mainporcelain common
perf-trace mainporcelain audit
perf-version mainporcelain common
+perf-daemon mainporcelain common
diff --git a/tools/perf/perf-archive.sh b/tools/perf/perf-archive.sh
index 0cfb3e2cefef..133f0eddbcc4 100644
--- a/tools/perf/perf-archive.sh
+++ b/tools/perf/perf-archive.sh
@@ -20,9 +20,8 @@ else
fi
BUILDIDS=$(mktemp /tmp/perf-archive-buildids.XXXXXX)
-NOBUILDID=0000000000000000000000000000000000000000
-perf buildid-list -i $PERF_DATA --with-hits | grep -v "^$NOBUILDID " > $BUILDIDS
+perf buildid-list -i $PERF_DATA --with-hits | grep -v "^ " > $BUILDIDS
if [ ! -s $BUILDIDS ] ; then
echo "perf archive: no build-ids found"
rm $BUILDIDS || true
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 27f94b0bb874..20cb91ef06ff 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -88,6 +88,7 @@ static struct cmd_struct commands[] = {
{ "mem", cmd_mem, 0 },
{ "data", cmd_data, 0 },
{ "ftrace", cmd_ftrace, 0 },
+ { "daemon", cmd_daemon, 0 },
};
struct pager_config {
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/branch.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/branch.json
index 2d15b11e5383..5c69c1e82ef8 100644
--- a/tools/perf/pmu-events/arch/arm64/ampere/emag/branch.json
+++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/branch.json
@@ -9,15 +9,11 @@
"ArchStdEvent": "BR_INDIRECT_SPEC"
},
{
- "PublicDescription": "Mispredicted or not predicted branch speculatively executed",
- "EventCode": "0x10",
- "EventName": "BR_MIS_PRED",
+ "ArchStdEvent": "BR_MIS_PRED",
"BriefDescription": "Branch mispredicted"
},
{
- "PublicDescription": "Predictable branch speculatively executed",
- "EventCode": "0x12",
- "EventName": "BR_PRED",
+ "ArchStdEvent": "BR_PRED",
"BriefDescription": "Predictable branch"
}
]
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/bus.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/bus.json
index 5c1a9a922ca4..9bea1ba1c4d2 100644
--- a/tools/perf/pmu-events/arch/arm64/ampere/emag/bus.json
+++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/bus.json
@@ -18,9 +18,6 @@
"ArchStdEvent": "BUS_ACCESS_PERIPH"
},
{
- "PublicDescription": "Bus access",
- "EventCode": "0x19",
- "EventName": "BUS_ACCESS",
- "BriefDescription": "Bus access"
+ "ArchStdEvent": "BUS_ACCESS",
}
]
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/cache.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/cache.json
index 40010a8724b3..1e25f2ae4ae0 100644
--- a/tools/perf/pmu-events/arch/arm64/ampere/emag/cache.json
+++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/cache.json
@@ -39,70 +39,40 @@
"ArchStdEvent": "L2D_CACHE_INVAL"
},
{
- "PublicDescription": "Level 1 instruction cache refill",
- "EventCode": "0x01",
- "EventName": "L1I_CACHE_REFILL",
- "BriefDescription": "L1I cache refill"
+ "ArchStdEvent": "L1I_CACHE_REFILL",
},
{
- "PublicDescription": "Level 1 instruction TLB refill",
- "EventCode": "0x02",
- "EventName": "L1I_TLB_REFILL",
- "BriefDescription": "L1I TLB refill"
+ "ArchStdEvent": "L1I_TLB_REFILL",
},
{
- "PublicDescription": "Level 1 data cache refill",
- "EventCode": "0x03",
- "EventName": "L1D_CACHE_REFILL",
- "BriefDescription": "L1D cache refill"
+ "ArchStdEvent": "L1D_CACHE_REFILL",
},
{
- "PublicDescription": "Level 1 data cache access",
- "EventCode": "0x04",
- "EventName": "L1D_CACHE_ACCESS",
- "BriefDescription": "L1D cache access"
+ "ArchStdEvent": "L1D_CACHE",
},
{
- "PublicDescription": "Level 1 data TLB refill",
- "EventCode": "0x05",
- "EventName": "L1D_TLB_REFILL",
- "BriefDescription": "L1D TLB refill"
+ "ArchStdEvent": "L1D_TLB_REFILL",
},
{
- "PublicDescription": "Level 1 instruction cache access",
- "EventCode": "0x14",
- "EventName": "L1I_CACHE_ACCESS",
- "BriefDescription": "L1I cache access"
+ "ArchStdEvent": "L1I_CACHE",
},
{
- "PublicDescription": "Level 2 data cache access",
- "EventCode": "0x16",
- "EventName": "L2D_CACHE_ACCESS",
- "BriefDescription": "L2D cache access"
+ "ArchStdEvent": "L2D_CACHE",
},
{
- "PublicDescription": "Level 2 data refill",
- "EventCode": "0x17",
- "EventName": "L2D_CACHE_REFILL",
- "BriefDescription": "L2D cache refill"
+ "ArchStdEvent": "L2D_CACHE_REFILL",
},
{
- "PublicDescription": "Level 2 data cache, Write-Back",
- "EventCode": "0x18",
- "EventName": "L2D_CACHE_WB",
- "BriefDescription": "L2D cache Write-Back"
+ "ArchStdEvent": "L2D_CACHE_WB",
},
{
- "PublicDescription": "Level 1 data TLB access. This event counts any load or store operation which accesses the data L1 TLB",
- "EventCode": "0x25",
- "EventName": "L1D_TLB_ACCESS",
+ "PublicDescription": "This event counts any load or store operation which accesses the data L1 TLB",
+ "ArchStdEvent": "L1D_TLB",
"BriefDescription": "L1D TLB access"
},
{
- "PublicDescription": "Level 1 instruction TLB access. This event counts any instruction fetch which accesses the instruction L1 TLB",
- "EventCode": "0x26",
- "EventName": "L1I_TLB_ACCESS",
- "BriefDescription": "L1I TLB access"
+ "PublicDescription": "This event counts any instruction fetch which accesses the instruction L1 TLB",
+ "ArchStdEvent": "L1I_TLB",
},
{
"PublicDescription": "Level 2 access to data TLB that caused a page table walk. This event counts on any data access which causes L2D_TLB_REFILL to count",
@@ -114,7 +84,7 @@
"PublicDescription": "Level 2 access to instruciton TLB that caused a page table walk. This event counts on any instruciton access which causes L2I_TLB_REFILL to count",
"EventCode": "0x35",
"EventName": "L2I_TLB_ACCESS",
- "BriefDescription": "L2D TLB access"
+ "BriefDescription": "L2I TLB access"
},
{
"PublicDescription": "Branch target buffer misprediction",
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/clock.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/clock.json
index 51d1dc1519b2..9076ca2daf9e 100644
--- a/tools/perf/pmu-events/arch/arm64/ampere/emag/clock.json
+++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/clock.json
@@ -1,9 +1,7 @@
[
{
"PublicDescription": "The number of core clock cycles",
- "EventCode": "0x11",
- "EventName": "CPU_CYCLES",
- "BriefDescription": "Clock cycles"
+ "ArchStdEvent": "CPU_CYCLES",
},
{
"PublicDescription": "FSU clocking gated off cycle",
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/exception.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/exception.json
index 66e51bc64b22..9761433ad329 100644
--- a/tools/perf/pmu-events/arch/arm64/ampere/emag/exception.json
+++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/exception.json
@@ -36,15 +36,9 @@
"ArchStdEvent": "EXC_TRAP_FIQ"
},
{
- "PublicDescription": "Exception taken",
- "EventCode": "0x09",
- "EventName": "EXC_TAKEN",
- "BriefDescription": "Exception taken"
+ "ArchStdEvent": "EXC_TAKEN",
},
{
- "PublicDescription": "Instruction architecturally executed, condition check pass, exception return",
- "EventCode": "0x0a",
- "EventName": "EXC_RETURN",
- "BriefDescription": "Exception return"
+ "ArchStdEvent": "EXC_RETURN",
}
]
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/instruction.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/instruction.json
index 0d3e46776642..482aa3f19e58 100644
--- a/tools/perf/pmu-events/arch/arm64/ampere/emag/instruction.json
+++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/instruction.json
@@ -40,45 +40,29 @@
},
{
"PublicDescription": "Instruction architecturally executed, software increment",
- "EventCode": "0x00",
- "EventName": "SW_INCR",
+ "ArchStdEvent": "SW_INCR",
"BriefDescription": "Software increment"
},
{
- "PublicDescription": "Instruction architecturally executed",
- "EventCode": "0x08",
- "EventName": "INST_RETIRED",
- "BriefDescription": "Instruction retired"
+ "ArchStdEvent": "INST_RETIRED",
},
{
- "PublicDescription": "Instruction architecturally executed, condition code check pass, write to CONTEXTIDR",
- "EventCode": "0x0b",
- "EventName": "CID_WRITE_RETIRED",
+ "ArchStdEvent": "CID_WRITE_RETIRED",
"BriefDescription": "Write to CONTEXTIDR"
},
{
- "PublicDescription": "Operation speculatively executed",
- "EventCode": "0x1b",
- "EventName": "INST_SPEC",
- "BriefDescription": "Speculatively executed"
+ "ArchStdEvent": "INST_SPEC",
},
{
- "PublicDescription": "Instruction architecturally executed (condition check pass), write to TTBR",
- "EventCode": "0x1c",
- "EventName": "TTBR_WRITE_RETIRED",
- "BriefDescription": "Instruction executed, TTBR write"
+ "ArchStdEvent": "TTBR_WRITE_RETIRED",
},
{
- "PublicDescription": "Instruction architecturally executed, branch. This event counts all branches, taken or not. This excludes exception entries, debug entries and CCFAIL branches",
- "EventCode": "0x21",
- "EventName": "BR_RETIRED",
- "BriefDescription": "Branch retired"
+ "PublicDescription": "This event counts all branches, taken or not. This excludes exception entries, debug entries and CCFAIL branches",
+ "ArchStdEvent": "BR_RETIRED",
},
{
- "PublicDescription": "Instruction architecturally executed, mispredicted branch. This event counts any branch counted by BR_RETIRED which is not correctly predicted and causes a pipeline flush",
- "EventCode": "0x22",
- "EventName": "BR_MISPRED_RETIRED",
- "BriefDescription": "Mispredicted branch retired"
+ "PublicDescription": "This event counts any branch counted by BR_RETIRED which is not correctly predicted and causes a pipeline flush",
+ "ArchStdEvent": "BR_MIS_PRED_RETIRED",
},
{
"PublicDescription": "Operation speculatively executed, NOP",
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/memory.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/memory.json
index c2fe674df960..2e7555696caf 100644
--- a/tools/perf/pmu-events/arch/arm64/ampere/emag/memory.json
+++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/memory.json
@@ -15,15 +15,10 @@
"ArchStdEvent": "UNALIGNED_LDST_SPEC"
},
{
- "PublicDescription": "Data memory access",
- "EventCode": "0x13",
- "EventName": "MEM_ACCESS",
- "BriefDescription": "Memory access"
+ "ArchStdEvent": "MEM_ACCESS",
},
{
- "PublicDescription": "Local memory error. This event counts any correctable or uncorrectable memory error (ECC or parity) in the protected core RAMs",
- "EventCode": "0x1a",
- "EventName": "MEM_ERROR",
- "BriefDescription": "Memory error"
+ "PublicDescription": "This event counts any correctable or uncorrectable memory error (ECC or parity) in the protected core RAMs",
+ "ArchStdEvent": "MEMORY_ERROR",
}
]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/branch.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/branch.json
index b5e5d055c70d..ec0dc92288ab 100644
--- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/branch.json
+++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/branch.json
@@ -1,14 +1,10 @@
[
{
- "PublicDescription": "Mispredicted or not predicted branch speculatively executed. This event counts any predictable branch instruction which is mispredicted either due to dynamic misprediction or because the MMU is off and the branches are statically predicted not taken.",
- "EventCode": "0x10",
- "EventName": "BR_MIS_PRED",
- "BriefDescription": "Mispredicted or not predicted branch speculatively executed."
+ "PublicDescription": "This event counts any predictable branch instruction which is mispredicted either due to dynamic misprediction or because the MMU is off and the branches are statically predicted not taken",
+ "ArchStdEvent": "BR_MIS_PRED",
},
{
- "PublicDescription": "Predictable branch speculatively executed. This event counts all predictable branches.",
- "EventCode": "0x12",
- "EventName": "BR_PRED",
- "BriefDescription": "Predictable branch speculatively executed."
+ "PublicDescription": "This event counts all predictable branches.",
+ "ArchStdEvent": "BR_PRED",
}
]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/bus.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/bus.json
index fce7309ae624..6263929efce2 100644
--- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/bus.json
+++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/bus.json
@@ -1,24 +1,21 @@
[
{
- "EventCode": "0x11",
- "EventName": "CPU_CYCLES",
+ "PublicDescription": "The number of core clock cycles"
+ "ArchStdEvent": "CPU_CYCLES",
"BriefDescription": "The number of core clock cycles."
},
{
- "PublicDescription": "Bus access. This event counts for every beat of data transferred over the data channels between the core and the SCU. If both read and write data beats are transferred on a given cycle, this event is counted twice on that cycle. This event counts the sum of BUS_ACCESS_RD and BUS_ACCESS_WR.",
- "EventCode": "0x19",
- "EventName": "BUS_ACCESS",
- "BriefDescription": "Bus access."
+ "PublicDescription": "This event counts for every beat of data transferred over the data channels between the core and the SCU. If both read and write data beats are transferred on a given cycle, this event is counted twice on that cycle. This event counts the sum of BUS_ACCESS_RD and BUS_ACCESS_WR.",
+ "ArchStdEvent": "BUS_ACCESS",
},
{
- "EventCode": "0x1D",
- "EventName": "BUS_CYCLES",
- "BriefDescription": "Bus cycles. This event duplicates CPU_CYCLES."
+ "PublicDescription": "This event duplicates CPU_CYCLES."
+ "ArchStdEvent": "BUS_CYCLES",
},
{
- "ArchStdEvent": "BUS_ACCESS_RD"
+ "ArchStdEvent": "BUS_ACCESS_RD",
},
{
- "ArchStdEvent": "BUS_ACCESS_WR"
+ "ArchStdEvent": "BUS_ACCESS_WR",
}
]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/cache.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/cache.json
index 24594081c199..cd67bb9df139 100644
--- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/cache.json
+++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/cache.json
@@ -1,133 +1,95 @@
[
{
- "PublicDescription": "L1 instruction cache refill. This event counts any instruction fetch which misses in the cache.",
- "EventCode": "0x01",
- "EventName": "L1I_CACHE_REFILL",
- "BriefDescription": "L1 instruction cache refill"
+ "PublicDescription": "This event counts any instruction fetch which misses in the cache.",
+ "ArchStdEvent": "L1I_CACHE_REFILL",
},
{
- "PublicDescription": "L1 instruction TLB refill. This event counts any refill of the instruction L1 TLB from the L2 TLB. This includes refills that result in a translation fault.",
- "EventCode": "0x02",
- "EventName": "L1I_TLB_REFILL",
- "BriefDescription": "L1 instruction TLB refill"
+ "PublicDescription": "This event counts any refill of the instruction L1 TLB from the L2 TLB. This includes refills that result in a translation fault.",
+ "ArchStdEvent": "L1I_TLB_REFILL",
},
{
- "PublicDescription": "L1 data cache refill. This event counts any load or store operation or page table walk access which causes data to be read from outside the L1, including accesses which do not allocate into L1.",
- "EventCode": "0x03",
- "EventName": "L1D_CACHE_REFILL",
- "BriefDescription": "L1 data cache refill"
+ "PublicDescription": "This event counts any load or store operation or page table walk access which causes data to be read from outside the L1, including accesses which do not allocate into L1.",
+ "ArchStdEvent": "L1D_CACHE_REFILL",
},
{
- "PublicDescription": "L1 data cache access. This event counts any load or store operation or page table walk access which looks up in the L1 data cache. In particular, any access which could count the L1D_CACHE_REFILL event causes this event to count.",
- "EventCode": "0x04",
- "EventName": "L1D_CACHE",
- "BriefDescription": "L1 data cache access"
+ "PublicDescription": "This event counts any load or store operation or page table walk access which looks up in the L1 data cache. In particular, any access which could count the L1D_CACHE_REFILL event causes this event to count.",
+ "ArchStdEvent": "L1D_CACHE",
},
{
- "PublicDescription": "L1 data TLB refill. This event counts any refill of the data L1 TLB from the L2 TLB. This includes refills that result in a translation fault.",
- "EventCode": "0x05",
- "EventName": "L1D_TLB_REFILL",
- "BriefDescription": "L1 data TLB refill"
+ "PublicDescription": "This event counts any refill of the data L1 TLB from the L2 TLB. This includes refills that result in a translation fault.",
+ "ArchStdEvent": "L1D_TLB_REFILL",
},
- {
+ {,
"PublicDescription": "Level 1 instruction cache access or Level 0 Macro-op cache access. This event counts any instruction fetch which accesses the L1 instruction cache or L0 Macro-op cache.",
- "EventCode": "0x14",
- "EventName": "L1I_CACHE",
- "BriefDescription": "L1 instruction cache access"
+ "ArchStdEvent": "L1I_CACHE",
},
{
- "PublicDescription": "L1 data cache Write-Back. This event counts any write-back of data from the L1 data cache to L2 or L3. This counts both victim line evictions and snoops, including cache maintenance operations.",
- "EventCode": "0x15",
- "EventName": "L1D_CACHE_WB",
- "BriefDescription": "L1 data cache Write-Back"
+ "PublicDescription": "This event counts any write-back of data from the L1 data cache to L2 or L3. This counts both victim line evictions and snoops, including cache maintenance operations.",
+ "ArchStdEvent": "L1D_CACHE_WB",
},
{
- "PublicDescription": "L2 data cache access. This event counts any transaction from L1 which looks up in the L2 cache, and any write-back from the L1 to the L2. Snoops from outside the core and cache maintenance operations are not counted.",
- "EventCode": "0x16",
- "EventName": "L2D_CACHE",
- "BriefDescription": "L2 data cache access"
+ "PublicDescription": "This event counts any transaction from L1 which looks up in the L2 cache, and any write-back from the L1 to the L2. Snoops from outside the core and cache maintenance operations are not counted.",
+ "ArchStdEvent": "L2D_CACHE",
},
{
"PublicDescription": "L2 data cache refill. This event counts any cacheable transaction from L1 which causes data to be read from outside the core. L2 refills caused by stashes into L2 should not be counted",
- "EventCode": "0x17",
- "EventName": "L2D_CACHE_REFILL",
- "BriefDescription": "L2 data cache refill"
+ "ArchStdEvent": "L2D_CACHE_REFILL",
},
{
- "PublicDescription": "L2 data cache write-back. This event counts any write-back of data from the L2 cache to outside the core. This includes snoops to the L2 which return data, regardless of whether they cause an invalidation. Invalidations from the L2 which do not write data outside of the core and snoops which return data from the L1 are not counted",
- "EventCode": "0x18",
- "EventName": "L2D_CACHE_WB",
- "BriefDescription": "L2 data cache write-back"
+ "PublicDescription": "This event counts any write-back of data from the L2 cache to outside the core. This includes snoops to the L2 which return data, regardless of whether they cause an invalidation. Invalidations from the L2 which do not write data outside of the core and snoops which return data from the L1 are not counted",
+ "ArchStdEvent": "L2D_CACHE_WB",
},
{
- "PublicDescription": "L2 data cache allocation without refill. This event counts any full cache line write into the L2 cache which does not cause a linefill, including write-backs from L1 to L2 and full-line writes which do not allocate into L1.",
- "EventCode": "0x20",
- "EventName": "L2D_CACHE_ALLOCATE",
- "BriefDescription": "L2 data cache allocation without refill"
+ "PublicDescription": "This event counts any full cache line write into the L2 cache which does not cause a linefill, including write-backs from L1 to L2 and full-line writes which do not allocate into L1.",
+ "ArchStdEvent": "L2D_CACHE_ALLOCATE",
},
{
- "PublicDescription": "Level 1 data TLB access. This event counts any load or store operation which accesses the data L1 TLB. If both a load and a store are executed on a cycle, this event counts twice. This event counts regardless of whether the MMU is enabled.",
- "EventCode": "0x25",
- "EventName": "L1D_TLB",
+ "PublicDescription": "This event counts any load or store operation which accesses the data L1 TLB. If both a load and a store are executed on a cycle, this event counts twice. This event counts regardless of whether the MMU is enabled.",
+ "ArchStdEvent": "L1D_TLB",
"BriefDescription": "Level 1 data TLB access."
},
{
- "PublicDescription": "Level 1 instruction TLB access. This event counts any instruction fetch which accesses the instruction L1 TLB.This event counts regardless of whether the MMU is enabled.",
- "EventCode": "0x26",
- "EventName": "L1I_TLB",
+ "PublicDescription": "This event counts any instruction fetch which accesses the instruction L1 TLB.This event counts regardless of whether the MMU is enabled.",
+ "ArchStdEvent": "L1I_TLB",
"BriefDescription": "Level 1 instruction TLB access"
},
{
"PublicDescription": "This event counts any full cache line write into the L3 cache which does not cause a linefill, including write-backs from L2 to L3 and full-line writes which do not allocate into L2",
- "EventCode": "0x29",
- "EventName": "L3D_CACHE_ALLOCATE",
+ "ArchStdEvent": "L3D_CACHE_ALLOCATE",
"BriefDescription": "Allocation without refill"
},
{
- "PublicDescription": "Attributable Level 3 unified cache refill. This event counts for any cacheable read transaction returning datafrom the SCU for which the data source was outside the cluster. Transactions such as ReadUnique are counted here as 'read' transactions, even though they can be generated by store instructions.",
- "EventCode": "0x2A",
- "EventName": "L3D_CACHE_REFILL",
+ "PublicDescription": "This event counts for any cacheable read transaction returning datafrom the SCU for which the data source was outside the cluster. Transactions such as ReadUnique are counted here as 'read' transactions, even though they can be generated by store instructions.",
+ "ArchStdEvent": "L3D_CACHE_REFILL",
"BriefDescription": "Attributable Level 3 unified cache refill."
},
{
- "PublicDescription": "Attributable Level 3 unified cache access. This event counts for any cacheable read transaction returning datafrom the SCU, or for any cacheable write to the SCU.",
- "EventCode": "0x2B",
- "EventName": "L3D_CACHE",
+ "PublicDescription": "This event counts for any cacheable read transaction returning datafrom the SCU, or for any cacheable write to the SCU.",
+ "ArchStdEvent": "L3D_CACHE",
"BriefDescription": "Attributable Level 3 unified cache access."
},
{
- "PublicDescription": "Attributable L2 data or unified TLB refill. This event counts on anyrefill of the L2 TLB, caused by either an instruction or data access.This event does not count if the MMU is disabled.",
- "EventCode": "0x2D",
- "EventName": "L2D_TLB_REFILL",
+ "PublicDescription": "This event counts on anyrefill of the L2 TLB, caused by either an instruction or data access.This event does not count if the MMU is disabled.",
+ "ArchStdEvent": "L2D_TLB_REFILL",
"BriefDescription": "Attributable L2 data or unified TLB refill"
},
{
- "PublicDescription": "Attributable L2 data or unified TLB access. This event counts on any access to the L2 TLB (caused by a refill of any of the L1 TLBs). This event does not count if the MMU is disabled.",
- "EventCode": "0x2F",
- "EventName": "L2D_TLB",
- "BriefDescription": "Attributable L2 data or unified TLB access"
+ "PublicDescription": "This event counts on any access to the L2 TLB (caused by a refill of any of the L1 TLBs). This event does not count if the MMU is disabled.",
+ "ArchStdEvent": "L2D_TLB",
},
{
- "PublicDescription": "Access to data TLB that caused a page table walk. This event counts on any data access which causes L2D_TLB_REFILL to count.",
- "EventCode": "0x34",
- "EventName": "DTLB_WALK",
- "BriefDescription": "Access to data TLB that caused a page table walk."
+ "PublicDescription": "This event counts on any data access which causes L2D_TLB_REFILL to count.",
+ "ArchStdEvent": "DTLB_WALK",
},
{
- "PublicDescription": "Access to instruction TLB that caused a page table walk. This event counts on any instruction access which causes L2D_TLB_REFILL to count.",
- "EventCode": "0x35",
- "EventName": "ITLB_WALK",
- "BriefDescription": "Access to instruction TLB that caused a page table walk."
+ "PublicDescription": "This event counts on any instruction access which causes L2D_TLB_REFILL to count.",
+ "ArchStdEvent": "ITLB_WALK",
},
{
- "EventCode": "0x36",
- "EventName": "LL_CACHE_RD",
- "BriefDescription": "Last level cache access, read"
+ "ArchStdEvent": "LL_CACHE_RD",
},
{
- "EventCode": "0x37",
- "EventName": "LL_CACHE_MISS_RD",
- "BriefDescription": "Last level cache miss, read"
+ "ArchStdEvent": "LL_CACHE_MISS_RD",
},
{
"ArchStdEvent": "L1D_CACHE_INVAL"
diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/exception.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/exception.json
index 98d29c862320..ea4631db41b5 100644
--- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/exception.json
+++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/exception.json
@@ -1,14 +1,10 @@
[
{
- "EventCode": "0x09",
- "EventName": "EXC_TAKEN",
- "BriefDescription": "Exception taken."
+ "ArchStdEvent": "EXC_TAKEN",
},
{
- "PublicDescription": "Local memory error. This event counts any correctable or uncorrectable memory error (ECC or parity) in the protected core RAMs",
- "EventCode": "0x1A",
- "EventName": "MEMORY_ERROR",
- "BriefDescription": "Local memory error."
+ "PublicDescription": "This event counts any correctable or uncorrectable memory error (ECC or parity) in the protected core RAMs",
+ "ArchStdEvent": "MEMORY_ERROR",
},
{
"ArchStdEvent": "EXC_DABORT"
diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/instruction.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/instruction.json
index c153ac706d8d..8e59566cba8b 100644
--- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/instruction.json
+++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/instruction.json
@@ -1,49 +1,32 @@
[
{
- "PublicDescription": "Software increment. Instruction architecturally executed (condition code check pass).",
- "EventCode": "0x00",
- "EventName": "SW_INCR",
- "BriefDescription": "Software increment."
+ "ArchStdEvent": "SW_INCR",
},
{
- "PublicDescription": "Instruction architecturally executed. This event counts all retired instructions, including those that fail their condition check.",
- "EventCode": "0x08",
- "EventName": "INST_RETIRED",
- "BriefDescription": "Instruction architecturally executed."
+ "PublicDescription": "This event counts all retired instructions, including those that fail their condition check.",
+ "ArchStdEvent": "INST_RETIRED",
},
{
- "EventCode": "0x0A",
- "EventName": "EXC_RETURN",
- "BriefDescription": "Instruction architecturally executed, condition code check pass, exception return."
+ "ArchStdEvent": "EXC_RETURN",
},
{
- "PublicDescription": "Instruction architecturally executed, condition code check pass, write to CONTEXTIDR. This event only counts writes to CONTEXTIDR in AArch32 state, and via the CONTEXTIDR_EL1 mnemonic in AArch64 state.",
- "EventCode": "0x0B",
- "EventName": "CID_WRITE_RETIRED",
- "BriefDescription": "Instruction architecturally executed, condition code check pass, write to CONTEXTIDR."
+ "PublicDescription": "This event only counts writes to CONTEXTIDR in AArch32 state, and via the CONTEXTIDR_EL1 mnemonic in AArch64 state.",
+ "ArchStdEvent": "CID_WRITE_RETIRED",
},
{
- "EventCode": "0x1B",
- "EventName": "INST_SPEC",
- "BriefDescription": "Operation speculatively executed"
+ "ArchStdEvent": "INST_SPEC",
},
{
- "PublicDescription": "Instruction architecturally executed, condition code check pass, write to TTBR. This event only counts writes to TTBR0/TTBR1 in AArch32 state and TTBR0_EL1/TTBR1_EL1 in AArch64 state.",
- "EventCode": "0x1C",
- "EventName": "TTBR_WRITE_RETIRED",
- "BriefDescription": "Instruction architecturally executed, condition code check pass, write to TTBR"
+ "PublicDescription": "This event only counts writes to TTBR0/TTBR1 in AArch32 state and TTBR0_EL1/TTBR1_EL1 in AArch64 state.",
+ "ArchStdEvent": "TTBR_WRITE_RETIRED",
},
- {
- "PublicDescription": "Instruction architecturally executed, branch. This event counts all branches, taken or not. This excludes exception entries, debug entries and CCFAIL branches.",
- "EventCode": "0x21",
- "EventName": "BR_RETIRED",
- "BriefDescription": "Instruction architecturally executed, branch."
+ {,
+ "PublicDescription": "This event counts all branches, taken or not. This excludes exception entries, debug entries and CCFAIL branches.",
+ "ArchStdEvent": "BR_RETIRED",
},
{
- "PublicDescription": "Instruction architecturally executed, mispredicted branch. This event counts any branch counted by BR_RETIRED which is not correctly predicted and causes a pipeline flush.",
- "EventCode": "0x22",
- "EventName": "BR_MIS_PRED_RETIRED",
- "BriefDescription": "Instruction architecturally executed, mispredicted branch."
+ "PublicDescription": "This event counts any branch counted by BR_RETIRED which is not correctly predicted and causes a pipeline flush.",
+ "ArchStdEvent": "BR_MIS_PRED_RETIRED",
},
{
"ArchStdEvent": "ASE_SPEC"
diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/memory.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/memory.json
index b86643253f19..f06f399051c1 100644
--- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/memory.json
+++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/memory.json
@@ -1,9 +1,7 @@
[
{
- "PublicDescription": "Data memory access. This event counts memory accesses due to load or store instructions. This event counts the sum of MEM_ACCESS_RD and MEM_ACCESS_WR.",
- "EventCode": "0x13",
- "EventName": "MEM_ACCESS",
- "BriefDescription": "Data memory access"
+ "PublicDescription": "This event counts memory accesses due to load or store instructions. This event counts the sum of MEM_ACCESS_RD and MEM_ACCESS_WR.",
+ "ArchStdEvent": "MEM_ACCESS",
},
{
"ArchStdEvent": "MEM_ACCESS_RD"
diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/other.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/other.json
index 8bde029a62d5..c2ccbf6fbfa0 100644
--- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/other.json
+++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/other.json
@@ -1,7 +1,5 @@
[
{
- "EventCode": "0x31",
- "EventName": "REMOTE_ACCESS",
- "BriefDescription": "Access to another socket in a multi-socket system"
+ "ArchStdEvent": "REMOTE_ACCESS",
}
]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/pipeline.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/pipeline.json
index 010a647f9d02..d79f0aeaf7f1 100644
--- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/pipeline.json
+++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/pipeline.json
@@ -1,14 +1,10 @@
[
{
- "PublicDescription": "No operation issued because of the frontend. The counter counts on any cycle when there are no fetched instructions available to dispatch.",
- "EventCode": "0x23",
- "EventName": "STALL_FRONTEND",
- "BriefDescription": "No operation issued because of the frontend."
+ "PublicDescription": "The counter counts on any cycle when there are no fetched instructions available to dispatch.",
+ "ArchStdEvent": "STALL_FRONTEND",
},
{
- "PublicDescription": "No operation issued because of the backend. The counter counts on any cycle fetched instructions are not dispatched due to resource constraints.",
- "EventCode": "0x24",
- "EventName": "STALL_BACKEND",
- "BriefDescription": "No operation issued because of the backend."
+ "PublicDescription": "The counter counts on any cycle fetched instructions are not dispatched due to resource constraints.",
+ "ArchStdEvent": "STALL_BACKEND",
}
]
diff --git a/tools/perf/pmu-events/arch/arm64/armv8-common-and-microarch.json b/tools/perf/pmu-events/arch/arm64/armv8-common-and-microarch.json
new file mode 100644
index 000000000000..75376c7cc072
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/armv8-common-and-microarch.json
@@ -0,0 +1,248 @@
+[
+ {
+ "PublicDescription": "Instruction architecturally executed, Condition code check pass, software increment",
+ "EventCode": "0x00",
+ "EventName": "SW_INCR",
+ "BriefDescription": "Instruction architecturally executed, Condition code check pass, software increment"
+ },
+ {
+ "PublicDescription": "Level 1 instruction cache refill",
+ "EventCode": "0x01",
+ "EventName": "L1I_CACHE_REFILL",
+ "BriefDescription": "Level 1 instruction cache refill"
+ },
+ {
+ "PublicDescription": "Attributable Level 1 instruction TLB refill",
+ "EventCode": "0x02",
+ "EventName": "L1I_TLB_REFILL",
+ "BriefDescription": "Attributable Level 1 instruction TLB refill"
+ },
+ {
+ "PublicDescription": "Level 1 data cache refill",
+ "EventCode": "0x03",
+ "EventName": "L1D_CACHE_REFILL",
+ "BriefDescription": "Level 1 data cache refill"
+ },
+ {
+ "PublicDescription": "Level 1 data cache access",
+ "EventCode": "0x04",
+ "EventName": "L1D_CACHE",
+ "BriefDescription": "Level 1 data cache access"
+ },
+ {
+ "PublicDescription": "Attributable Level 1 data TLB refill",
+ "EventCode": "0x05",
+ "EventName": "L1D_TLB_REFILL",
+ "BriefDescription": "Attributable Level 1 data TLB refill"
+ },
+ {
+ "PublicDescription": "Instruction architecturally executed",
+ "EventCode": "0x08",
+ "EventName": "INST_RETIRED",
+ "BriefDescription": "Instruction architecturally executed"
+ },
+ {
+ "PublicDescription": "Exception taken",
+ "EventCode": "0x09",
+ "EventName": "EXC_TAKEN",
+ "BriefDescription": "Exception taken"
+ },
+ {
+ "PublicDescription": "Instruction architecturally executed, condition check pass, exception return",
+ "EventCode": "0x0a",
+ "EventName": "EXC_RETURN",
+ "BriefDescription": "Instruction architecturally executed, condition check pass, exception return"
+ },
+ {
+ "PublicDescription": "Instruction architecturally executed, condition code check pass, write to CONTEXTIDR",
+ "EventCode": "0x0b",
+ "EventName": "CID_WRITE_RETIRED",
+ "BriefDescription": "Instruction architecturally executed, condition code check pass, write to CONTEXTIDR"
+ },
+ {
+ "PublicDescription": "Mispredicted or not predicted branch speculatively executed",
+ "EventCode": "0x10",
+ "EventName": "BR_MIS_PRED",
+ "BriefDescription": "Mispredicted or not predicted branch speculatively executed"
+ },
+ {
+ "PublicDescription": "Cycle",
+ "EventCode": "0x11",
+ "EventName": "CPU_CYCLES",
+ "BriefDescription": "Cycle"
+ },
+ {
+ "PublicDescription": "Predictable branch speculatively executed",
+ "EventCode": "0x12",
+ "EventName": "BR_PRED",
+ "BriefDescription": "Predictable branch speculatively executed"
+ },
+ {
+ "PublicDescription": "Data memory access",
+ "EventCode": "0x13",
+ "EventName": "MEM_ACCESS",
+ "BriefDescription": "Data memory access"
+ },
+ {
+ "PublicDescription": "Attributable Level 1 instruction cache access",
+ "EventCode": "0x14",
+ "EventName": "L1I_CACHE",
+ "BriefDescription": "Attributable Level 1 instruction cache access"
+ },
+ {
+ "PublicDescription": "Attributable Level 1 data cache write-back",
+ "EventCode": "0x15",
+ "EventName": "L1D_CACHE_WB",
+ "BriefDescription": "Attributable Level 1 data cache write-back"
+ },
+ {
+ "PublicDescription": "Level 2 data cache access",
+ "EventCode": "0x16",
+ "EventName": "L2D_CACHE",
+ "BriefDescription": "Level 2 data cache access"
+ },
+ {
+ "PublicDescription": "Level 2 data refill",
+ "EventCode": "0x17",
+ "EventName": "L2D_CACHE_REFILL",
+ "BriefDescription": "Level 2 data refill"
+ },
+ {
+ "PublicDescription": "Attributable Level 2 data cache write-back",
+ "EventCode": "0x18",
+ "EventName": "L2D_CACHE_WB",
+ "BriefDescription": "Attributable Level 2 data cache write-back"
+ },
+ {
+ "PublicDescription": "Attributable Bus access",
+ "EventCode": "0x19",
+ "EventName": "BUS_ACCESS",
+ "BriefDescription": "Attributable Bus access"
+ },
+ {
+ "PublicDescription": "Local memory error",
+ "EventCode": "0x1a",
+ "EventName": "MEMORY_ERROR",
+ "BriefDescription": "Local memory error"
+ },
+ {
+ "PublicDescription": "Operation speculatively executed",
+ "EventCode": "0x1b",
+ "EventName": "INST_SPEC",
+ "BriefDescription": "Operation speculatively executed"
+ },
+ {
+ "PublicDescription": "Instruction architecturally executed, Condition code check pass, write to TTBR",
+ "EventCode": "0x1c",
+ "EventName": "TTBR_WRITE_RETIRED",
+ "BriefDescription": "Instruction architecturally executed, Condition code check pass, write to TTBR"
+ },
+ {
+ "PublicDescription": "Bus cycle",
+ "EventCode": "0x1D",
+ "EventName": "BUS_CYCLES",
+ "BriefDescription": "Bus cycle"
+ },
+ {
+ "PublicDescription": "Attributable Level 2 data cache allocation without refill",
+ "EventCode": "0x20",
+ "EventName": "L2D_CACHE_ALLOCATE",
+ "BriefDescription": "Attributable Level 2 data cache allocation without refill"
+ },
+ {
+ "PublicDescription": "Instruction architecturally executed, branch",
+ "EventCode": "0x21",
+ "EventName": "BR_RETIRED",
+ "BriefDescription": "Instruction architecturally executed, branch"
+ },
+ {
+ "PublicDescription": "Instruction architecturally executed, mispredicted branch",
+ "EventCode": "0x22",
+ "EventName": "BR_MIS_PRED_RETIRED",
+ "BriefDescription": "Instruction architecturally executed, mispredicted branch"
+ },
+ {
+ "PublicDescription": "No operation issued because of the frontend",
+ "EventCode": "0x23",
+ "EventName": "STALL_FRONTEND",
+ "BriefDescription": "No operation issued because of the frontend"
+ },
+ {
+ "PublicDescription": "No operation issued due to the backend",
+ "EventCode": "0x24",
+ "EventName": "STALL_BACKEND",
+ "BriefDescription": "No operation issued due to the backend"
+ },
+ {
+ "PublicDescription": "Attributable Level 1 data or unified TLB access",
+ "EventCode": "0x25",
+ "EventName": "L1D_TLB",
+ "BriefDescription": "Attributable Level 1 data or unified TLB access"
+ },
+ {
+ "PublicDescription": "Attributable Level 1 instruction TLB access",
+ "EventCode": "0x26",
+ "EventName": "L1I_TLB",
+ "BriefDescription": "Attributable Level 1 instruction TLB access"
+ },
+ {
+ "PublicDescription": "Attributable Level 3 data cache allocation without refill",
+ "EventCode": "0x29",
+ "EventName": "L3D_CACHE_ALLOCATE",
+ "BriefDescription": "Attributable Level 3 data cache allocation without refill"
+ },
+ {
+ "PublicDescription": "Attributable Level 3 data cache refill",
+ "EventCode": "0x2A",
+ "EventName": "L3D_CACHE_REFILL",
+ "BriefDescription": "Attributable Level 3 data cache refill"
+ },
+ {
+ "PublicDescription": "Attributable Level 3 data cache access",
+ "EventCode": "0x2B",
+ "EventName": "L3D_CACHE",
+ "BriefDescription": "Attributable Level 3 data cache access"
+ },
+ {
+ "PublicDescription": "Attributable Level 2 data TLB refill",
+ "EventCode": "0x2D",
+ "EventName": "L2D_TLB_REFILL",
+ "BriefDescription": "Attributable Level 2 data TLB refill"
+ },
+ {
+ "PublicDescription": "Attributable Level 2 data or unified TLB access",
+ "EventCode": "0x2F",
+ "EventName": "L2D_TLB",
+ "BriefDescription": "Attributable Level 2 data or unified TLB access"
+ },
+ {
+ "PublicDescription": "Access to another socket in a multi-socket system",
+ "EventCode": "0x31",
+ "EventName": "REMOTE_ACCESS",
+ "BriefDescription": "Access to another socket in a multi-socket system"
+ },
+ {
+ "PublicDescription": "Access to data TLB causes a translation table walk",
+ "EventCode": "0x34",
+ "EventName": "DTLB_WALK",
+ "BriefDescription": "Access to data TLB causes a translation table walk"
+ },
+ {
+ "PublicDescription": "Access to instruction TLB that causes a translation table walk",
+ "EventCode": "0x35",
+ "EventName": "ITLB_WALK",
+ "BriefDescription": "Access to instruction TLB that causes a translation table walk"
+ },
+ {
+ "PublicDescription": "Attributable Last level cache memory read",
+ "EventCode": "0x36",
+ "EventName": "LL_CACHE_RD",
+ "BriefDescription": "Attributable Last level cache memory read"
+ },
+ {
+ "PublicDescription": "Last level cache miss, read",
+ "EventCode": "0x37",
+ "EventName": "LL_CACHE_MISS_RD",
+ "BriefDescription": "Last level cache miss, read"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/freescale/imx8mm/sys/metrics.json b/tools/perf/pmu-events/arch/arm64/freescale/imx8mm/sys/metrics.json
index 8e553b67cae6..f416fa052337 100644
--- a/tools/perf/pmu-events/arch/arm64/freescale/imx8mm/sys/metrics.json
+++ b/tools/perf/pmu-events/arch/arm64/freescale/imx8mm/sys/metrics.json
@@ -6,7 +6,7 @@
"ScaleUnit": "9.765625e-4KB",
"Unit": "imx8_ddr",
"Compat": "i.MX8MM"
- },
+ },
{
"BriefDescription": "bytes all masters write to ddr based on write-cycles event",
"MetricName": "imx8mm_ddr_write.all",
@@ -14,5 +14,5 @@
"ScaleUnit": "9.765625e-4KB",
"Unit": "imx8_ddr",
"Compat": "i.MX8MM"
- }
+ }
]
diff --git a/tools/perf/pmu-events/arch/arm64/freescale/imx8mn/sys/ddrc.json b/tools/perf/pmu-events/arch/arm64/freescale/imx8mn/sys/ddrc.json
new file mode 100644
index 000000000000..8352e73d6d35
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/freescale/imx8mn/sys/ddrc.json
@@ -0,0 +1,37 @@
+[
+ {
+ "BriefDescription": "ddr cycles event",
+ "EventCode": "0x00",
+ "EventName": "imx8mn_ddr.cycles",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MN"
+ },
+ {
+ "BriefDescription": "ddr read-cycles event",
+ "EventCode": "0x2a",
+ "EventName": "imx8mn_ddr.read_cycles",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MN"
+ },
+ {
+ "BriefDescription": "ddr write-cycles event",
+ "EventCode": "0x2b",
+ "EventName": "imx8mn_ddr.write_cycles",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MN"
+ },
+ {
+ "BriefDescription": "ddr read event",
+ "EventCode": "0x35",
+ "EventName": "imx8mn_ddr.read",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MN"
+ },
+ {
+ "BriefDescription": "ddr write event",
+ "EventCode": "0x38",
+ "EventName": "imx8mn_ddr.write",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MN"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/freescale/imx8mn/sys/metrics.json b/tools/perf/pmu-events/arch/arm64/freescale/imx8mn/sys/metrics.json
new file mode 100644
index 000000000000..2bbba4d8ea5b
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/freescale/imx8mn/sys/metrics.json
@@ -0,0 +1,18 @@
+[
+ {
+ "BriefDescription": "bytes all masters read from ddr based on read-cycles event",
+ "MetricName": "imx8mn_ddr_read.all",
+ "MetricExpr": "imx8mn_ddr.read_cycles * 4 * 2",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MN"
+ },
+ {
+ "BriefDescription": "bytes all masters write to ddr based on write-cycles event",
+ "MetricName": "imx8mn_ddr_write.all",
+ "MetricExpr": "imx8mn_ddr.write_cycles * 4 * 2",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MN"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/freescale/imx8mp/sys/ddrc.json b/tools/perf/pmu-events/arch/arm64/freescale/imx8mp/sys/ddrc.json
new file mode 100644
index 000000000000..f9a89efc9b24
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/freescale/imx8mp/sys/ddrc.json
@@ -0,0 +1,37 @@
+[
+ {
+ "BriefDescription": "ddr cycles event",
+ "EventCode": "0x00",
+ "EventName": "imx8mp_ddr.cycles",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "ddr read-cycles event",
+ "EventCode": "0x2a",
+ "EventName": "imx8mp_ddr.read_cycles",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "ddr write-cycles event",
+ "EventCode": "0x2b",
+ "EventName": "imx8mp_ddr.write_cycles",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "ddr read event",
+ "EventCode": "0x35",
+ "EventName": "imx8mp_ddr.read",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "ddr write event",
+ "EventCode": "0x38",
+ "EventName": "imx8mp_ddr.write",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/freescale/imx8mp/sys/metrics.json b/tools/perf/pmu-events/arch/arm64/freescale/imx8mp/sys/metrics.json
new file mode 100644
index 000000000000..8b9544424b3f
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/freescale/imx8mp/sys/metrics.json
@@ -0,0 +1,466 @@
+[
+ {
+ "BriefDescription": "bytes of all masters read from ddr",
+ "MetricName": "imx8mp_ddr_read.all",
+ "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0xffff\\,axi_id\\=0x0000@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of all masters write to ddr",
+ "MetricName": "imx8mp_ddr_write.all",
+ "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0xffff\\,axi_id\\=0x0000@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of a53 core read from ddr",
+ "MetricName": "imx8mp_ddr_read.a53",
+ "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x0000@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of a53 core write to ddr",
+ "MetricName": "imx8mp_ddr_write.a53",
+ "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x0000@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of supermix(m7) core read from ddr",
+ "MetricName": "imx8mp_ddr_read.supermix",
+ "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x000f\\,axi_id\\=0x0020@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of supermix(m7) write to ddr",
+ "MetricName": "imx8mp_ddr_write.supermix",
+ "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x000f\\,axi_id\\=0x0020@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of gpu 3d read from ddr",
+ "MetricName": "imx8mp_ddr_read.3d",
+ "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x0070@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of gpu 3d write to ddr",
+ "MetricName": "imx8mp_ddr_write.3d",
+ "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x0070@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of gpu 2d read from ddr",
+ "MetricName": "imx8mp_ddr_read.2d",
+ "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x0071@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of gpu 2d write to ddr",
+ "MetricName": "imx8mp_ddr_write.2d",
+ "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x0071@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of display lcdif1 read from ddr",
+ "MetricName": "imx8mp_ddr_read.lcdif1",
+ "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x0068@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of display lcdif1 write to ddr",
+ "MetricName": "imx8mp_ddr_write.lcdif1",
+ "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x0068@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of display lcdif2 read from ddr",
+ "MetricName": "imx8mp_ddr_read.lcdif2",
+ "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x0069@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of display lcdif2 write to ddr",
+ "MetricName": "imx8mp_ddr_write.lcdif2",
+ "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x0069@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of display isi1 read from ddr",
+ "MetricName": "imx8mp_ddr_read.isi1",
+ "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x006a@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of display isi1 write to ddr",
+ "MetricName": "imx8mp_ddr_write.isi1",
+ "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x006a@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of display isi2 read from ddr",
+ "MetricName": "imx8mp_ddr_read.isi2",
+ "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x006b@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of display isi2 write to ddr",
+ "MetricName": "imx8mp_ddr_write.isi2",
+ "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x006b@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of display isi3 read from ddr",
+ "MetricName": "imx8mp_ddr_read.isi3",
+ "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x006c@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of display isi3 write to ddr",
+ "MetricName": "imx8mp_ddr_write.isi3",
+ "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x006c@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of display isp1 read from ddr",
+ "MetricName": "imx8mp_ddr_read.isp1",
+ "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x006d@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of display isp1 write to ddr",
+ "MetricName": "imx8mp_ddr_write.isp1",
+ "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x006d@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of display isp2 read from ddr",
+ "MetricName": "imx8mp_ddr_read.isp2",
+ "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x006e@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of display isp2 write to ddr",
+ "MetricName": "imx8mp_ddr_write.isp2",
+ "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x006e@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of display dewarp read from ddr",
+ "MetricName": "imx8mp_ddr_read.dewarp",
+ "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x006f@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of display dewarp write to ddr",
+ "MetricName": "imx8mp_ddr_write.dewarp",
+ "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x006f@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of vpu1 read from ddr",
+ "MetricName": "imx8mp_ddr_read.vpu1",
+ "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x007c@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of vpu1 write to ddr",
+ "MetricName": "imx8mp_ddr_write.vpu1",
+ "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x007c@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of vpu2 read from ddr",
+ "MetricName": "imx8mp_ddr_read.vpu2",
+ "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x007d@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of vpu2 write to ddr",
+ "MetricName": "imx8mp_ddr_write.vpu2",
+ "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x007d@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of vpu3 read from ddr",
+ "MetricName": "imx8mp_ddr_read.vpu3",
+ "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x007e@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of vpu3 write to ddr",
+ "MetricName": "imx8mp_ddr_write.vpu3",
+ "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x007e@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of npu read from ddr",
+ "MetricName": "imx8mp_ddr_read.npu",
+ "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x0073@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of npu write to ddr",
+ "MetricName": "imx8mp_ddr_write.npu",
+ "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x0073@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of hsio usb1 read from ddr",
+ "MetricName": "imx8mp_ddr_read.usb1",
+ "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x0078@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of hsio usb1 write to ddr",
+ "MetricName": "imx8mp_ddr_write.usb1",
+ "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x0078@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of hsio usb2 read from ddr",
+ "MetricName": "imx8mp_ddr_read.usb2",
+ "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x0079@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of hsio usb2 write to ddr",
+ "MetricName": "imx8mp_ddr_write.usb2",
+ "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x0079@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of hsio pci read from ddr",
+ "MetricName": "imx8mp_ddr_read.pci",
+ "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x007a@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of hsio pci write to ddr",
+ "MetricName": "imx8mp_ddr_write.pci",
+ "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x007a@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of hdmi_tx hrv_mwr read from ddr",
+ "MetricName": "imx8mp_ddr_read.hdmi_hrv_mwr",
+ "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x0074@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of hdmi_tx hrv_mwr write to ddr",
+ "MetricName": "imx8mp_ddr_write.hdmi_hrv_mwr",
+ "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x0074@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of hdmi_tx lcdif read from ddr",
+ "MetricName": "imx8mp_ddr_read.hdmi_lcdif",
+ "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x0075@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of hdmi_tx lcdif write to ddr",
+ "MetricName": "imx8mp_ddr_write.hdmi_lcdif",
+ "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x0075@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of hdmi_tx tx_hdcp read from ddr",
+ "MetricName": "imx8mp_ddr_read.hdmi_hdcp",
+ "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x0076@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of hdmi_tx tx_hdcp write to ddr",
+ "MetricName": "imx8mp_ddr_write.hdmi_hdcp",
+ "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x0076@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of audio dsp read from ddr",
+ "MetricName": "imx8mp_ddr_read.audio_dsp",
+ "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x0041@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of audio dsp write to ddr",
+ "MetricName": "imx8mp_ddr_write.audio_dsp",
+ "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x0041@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of audio sdma2_per read from ddr",
+ "MetricName": "imx8mp_ddr_read.audio_sdma2_per",
+ "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x0062@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of audio sdma2_per write to ddr",
+ "MetricName": "imx8mp_ddr_write.audio_sdma2_per",
+ "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x0062@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of audio sdma2_burst read from ddr",
+ "MetricName": "imx8mp_ddr_read.audio_sdma2_burst",
+ "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x0063@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of audio sdma2_burst write to ddr",
+ "MetricName": "imx8mp_ddr_write.audio_sdma2_burst",
+ "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x0063@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of audio sdma3_per read from ddr",
+ "MetricName": "imx8mp_ddr_read.audio_sdma3_per",
+ "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x0064@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of audio sdma3_per write to ddr",
+ "MetricName": "imx8mp_ddr_write.audio_sdma3_per",
+ "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x0064@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of audio sdma3_burst read from ddr",
+ "MetricName": "imx8mp_ddr_read.audio_sdma3_burst",
+ "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x0065@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of audio sdma3_burst write to ddr",
+ "MetricName": "imx8mp_ddr_write.audio_sdma3_burst",
+ "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x0065@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of audio sdma_pif read from ddr",
+ "MetricName": "imx8mp_ddr_read.audio_sdma_pif",
+ "MetricExpr": "imx8_ddr0@axid\\-read\\,axi_mask\\=0x0000\\,axi_id\\=0x0066@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ },
+ {
+ "BriefDescription": "bytes of audio sdma_pif write to ddr",
+ "MetricName": "imx8mp_ddr_write.audio_sdma_pif",
+ "MetricExpr": "imx8_ddr0@axid\\-write\\,axi_mask\\=0x0000\\,axi_id\\=0x0066@",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MP"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/freescale/imx8mq/sys/ddrc.json b/tools/perf/pmu-events/arch/arm64/freescale/imx8mq/sys/ddrc.json
new file mode 100644
index 000000000000..c8682728ddad
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/freescale/imx8mq/sys/ddrc.json
@@ -0,0 +1,37 @@
+[
+ {
+ "BriefDescription": "ddr cycles event",
+ "EventCode": "0x00",
+ "EventName": "imx8mq_ddr.cycles",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MQ"
+ },
+ {
+ "BriefDescription": "ddr read-cycles event",
+ "EventCode": "0x2a",
+ "EventName": "imx8mq_ddr.read_cycles",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MQ"
+ },
+ {
+ "BriefDescription": "ddr write-cycles event",
+ "EventCode": "0x2b",
+ "EventName": "imx8mq_ddr.write_cycles",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MQ"
+ },
+ {
+ "BriefDescription": "ddr read event",
+ "EventCode": "0x35",
+ "EventName": "imx8mq_ddr.read",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MQ"
+ },
+ {
+ "BriefDescription": "ddr write event",
+ "EventCode": "0x38",
+ "EventName": "imx8mq_ddr.write",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MQ"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/freescale/imx8mq/sys/metrics.json b/tools/perf/pmu-events/arch/arm64/freescale/imx8mq/sys/metrics.json
new file mode 100644
index 000000000000..862c98171e0d
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/freescale/imx8mq/sys/metrics.json
@@ -0,0 +1,18 @@
+[
+ {
+ "BriefDescription": "bytes all masters read from ddr based on read-cycles event",
+ "MetricName": "imx8mq_ddr_read.all",
+ "MetricExpr": "imx8mq_ddr.read_cycles * 4 * 4",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MQ"
+ },
+ {
+ "BriefDescription": "bytes all masters write to ddr based on write-cycles event",
+ "MetricName": "imx8mq_ddr_write.all",
+ "MetricExpr": "imx8mq_ddr.write_cycles * 4 * 4",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx8_ddr",
+ "Compat": "i.MX8MQ"
+ }
+]
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index aa4dc4f5abde..650aec19d490 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -58,6 +58,7 @@ perf-y += time-utils-test.o
perf-y += genelf.o
perf-y += api-io.o
perf-y += demangle-java-test.o
+perf-y += demangle-ocaml-test.o
perf-y += pfm.o
perf-y += parse-metric.o
perf-y += pe-file-parsing.o
diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c
index ec972e0892ab..dd39ce9b0277 100644
--- a/tools/perf/tests/attr.c
+++ b/tools/perf/tests/attr.c
@@ -182,14 +182,20 @@ int test__attr(struct test *test __maybe_unused, int subtest __maybe_unused)
struct stat st;
char path_perf[PATH_MAX];
char path_dir[PATH_MAX];
+ char *exec_path;
/* First try development tree tests. */
if (!lstat("./tests", &st))
return run_dir("./tests", "./perf");
+ exec_path = get_argv_exec_path();
+ if (exec_path == NULL)
+ return -1;
+
/* Then installed path. */
- snprintf(path_dir, PATH_MAX, "%s/tests", get_argv_exec_path());
+ snprintf(path_dir, PATH_MAX, "%s/tests", exec_path);
snprintf(path_perf, PATH_MAX, "%s/perf", BINDIR);
+ free(exec_path);
if (!lstat(path_dir, &st) &&
!lstat(path_perf, &st))
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 7273823d0d02..c4b888f18e9c 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -339,6 +339,10 @@ static struct test generic_tests[] = {
.func = test__demangle_java,
},
{
+ .desc = "Demangle OCaml",
+ .func = test__demangle_ocaml,
+ },
+ {
.desc = "Parse and process metrics",
.func = test__parse_metric,
},
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 7c098d49c77e..2fdc7b2f996e 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -26,6 +26,7 @@
#include "event.h"
#include "record.h"
#include "util/mmap.h"
+#include "util/string2.h"
#include "util/synthetic-events.h"
#include "thread.h"
@@ -41,15 +42,6 @@ struct state {
size_t done_cnt;
};
-static unsigned int hex(char c)
-{
- if (c >= '0' && c <= '9')
- return c - '0';
- if (c >= 'a' && c <= 'f')
- return c - 'a' + 10;
- return c - 'A' + 10;
-}
-
static size_t read_objdump_chunk(const char **line, unsigned char **buf,
size_t *buf_len)
{
@@ -714,13 +706,9 @@ static int do_test_code_reading(bool try_kcore)
out_put:
thread__put(thread);
out_err:
-
- if (evlist) {
- evlist__delete(evlist);
- } else {
- perf_cpu_map__put(cpus);
- perf_thread_map__put(threads);
- }
+ evlist__delete(evlist);
+ perf_cpu_map__put(cpus);
+ perf_thread_map__put(threads);
machine__delete_threads(machine);
machine__delete(machine);
diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c
index 29c793ac7d10..0472b110fe65 100644
--- a/tools/perf/tests/cpumap.c
+++ b/tools/perf/tests/cpumap.c
@@ -106,6 +106,8 @@ static int cpu_map_print(const char *str)
return -1;
cpu_map__snprint(map, buf, sizeof(buf));
+ perf_cpu_map__put(map);
+
return !strcmp(buf, str);
}
diff --git a/tools/perf/tests/demangle-ocaml-test.c b/tools/perf/tests/demangle-ocaml-test.c
new file mode 100644
index 000000000000..a273ed5163d7
--- /dev/null
+++ b/tools/perf/tests/demangle-ocaml-test.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "tests.h"
+#include "session.h"
+#include "debug.h"
+#include "demangle-ocaml.h"
+
+int test__demangle_ocaml(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ int ret = TEST_OK;
+ char *buf = NULL;
+ size_t i;
+
+ struct {
+ const char *mangled, *demangled;
+ } test_cases[] = {
+ { "main",
+ NULL },
+ { "camlStdlib__array__map_154",
+ "Stdlib.array.map" },
+ { "camlStdlib__anon_fn$5bstdlib$2eml$3a334$2c0$2d$2d54$5d_1453",
+ "Stdlib.anon_fn[stdlib.ml:334,0--54]" },
+ { "camlStdlib__bytes__$2b$2b_2205",
+ "Stdlib.bytes.++" },
+ };
+
+ for (i = 0; i < sizeof(test_cases) / sizeof(test_cases[0]); i++) {
+ buf = ocaml_demangle_sym(test_cases[i].mangled);
+ if ((buf == NULL && test_cases[i].demangled != NULL)
+ || (buf != NULL && test_cases[i].demangled == NULL)
+ || (buf != NULL && strcmp(buf, test_cases[i].demangled))) {
+ pr_debug("FAILED: %s: %s != %s\n", test_cases[i].mangled,
+ buf == NULL ? "(null)" : buf,
+ test_cases[i].demangled == NULL ? "(null)" : test_cases[i].demangled);
+ ret = TEST_FAIL;
+ }
+ free(buf);
+ }
+
+ return ret;
+}
diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c
index e6f1b2a38e03..a0438b0f0805 100644
--- a/tools/perf/tests/keep-tracking.c
+++ b/tools/perf/tests/keep-tracking.c
@@ -154,10 +154,9 @@ out_err:
if (evlist) {
evlist__disable(evlist);
evlist__delete(evlist);
- } else {
- perf_cpu_map__put(cpus);
- perf_thread_map__put(threads);
}
+ perf_cpu_map__put(cpus);
+ perf_thread_map__put(threads);
return err;
}
diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
index 57093aeacc6f..73ae8f7aa066 100644
--- a/tools/perf/tests/mmap-basic.c
+++ b/tools/perf/tests/mmap-basic.c
@@ -158,8 +158,6 @@ out_init:
out_delete_evlist:
evlist__delete(evlist);
- cpus = NULL;
- threads = NULL;
out_free_cpus:
perf_cpu_map__put(cpus);
out_free_threads:
diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c
index 71f85e2cc127..f7dd6c463f04 100644
--- a/tools/perf/tests/openat-syscall-all-cpus.c
+++ b/tools/perf/tests/openat-syscall-all-cpus.c
@@ -15,7 +15,6 @@
#include "tests.h"
#include "thread_map.h"
#include <perf/cpumap.h>
-#include <internal/cpumap.h>
#include "debug.h"
#include "stat.h"
#include "util/counts.h"
diff --git a/tools/perf/tests/parse-metric.c b/tools/perf/tests/parse-metric.c
index ce7be37f0d88..6dc1db1626ad 100644
--- a/tools/perf/tests/parse-metric.c
+++ b/tools/perf/tests/parse-metric.c
@@ -70,6 +70,10 @@ static struct pmu_event pme_test[] = {
.metric_name = "M3",
},
{
+ .metric_expr = "64 * l1d.replacement / 1000000000 / duration_time",
+ .metric_name = "L1D_Cache_Fill_BW",
+},
+{
.name = NULL,
}
};
@@ -107,6 +111,8 @@ static void load_runtime_stat(struct runtime_stat *st, struct evlist *evlist,
evlist__for_each_entry(evlist, evsel) {
count = find_value(evsel->name, vals);
perf_stat__update_shadow_stats(evsel, count, 0, st);
+ if (!strcmp(evsel->name, "duration_time"))
+ update_stats(&walltime_nsecs_stats, count);
}
}
@@ -321,6 +327,23 @@ static int test_recursion_fail(void)
return 0;
}
+static int test_memory_bandwidth(void)
+{
+ double ratio;
+ struct value vals[] = {
+ { .event = "l1d.replacement", .val = 4000000 },
+ { .event = "duration_time", .val = 200000000 },
+ { .event = NULL, },
+ };
+
+ TEST_ASSERT_VAL("failed to compute metric",
+ compute_metric("L1D_Cache_Fill_BW", vals, &ratio) == 0);
+ TEST_ASSERT_VAL("L1D_Cache_Fill_BW, wrong ratio",
+ 1.28 == ratio);
+
+ return 0;
+}
+
static int test_metric_group(void)
{
double ratio1, ratio2;
@@ -353,5 +376,6 @@ int test__parse_metric(struct test *test __maybe_unused, int subtest __maybe_unu
TEST_ASSERT_VAL("DCache_L2 failed", test_dcache_l2() == 0);
TEST_ASSERT_VAL("recursion fail failed", test_recursion_fail() == 0);
TEST_ASSERT_VAL("test metric group", test_metric_group() == 0);
+ TEST_ASSERT_VAL("Memory bandwidth", test_memory_bandwidth() == 0);
return 0;
}
diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c
index 7cff02664d0e..680c3cffb128 100644
--- a/tools/perf/tests/perf-time-to-tsc.c
+++ b/tools/perf/tests/perf-time-to-tsc.c
@@ -167,6 +167,8 @@ next_event:
out_err:
evlist__delete(evlist);
+ perf_cpu_map__put(cpus);
+ perf_thread_map__put(threads);
return err;
}
diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c
index 2393916f6128..8fd8a4ef97da 100644
--- a/tools/perf/tests/sample-parsing.c
+++ b/tools/perf/tests/sample-parsing.c
@@ -157,6 +157,9 @@ static bool samples_same(const struct perf_sample *s1,
if (type & PERF_SAMPLE_DATA_PAGE_SIZE)
COMP(data_page_size);
+ if (type & PERF_SAMPLE_CODE_PAGE_SIZE)
+ COMP(code_page_size);
+
if (type & PERF_SAMPLE_AUX) {
COMP(aux_sample.size);
if (memcmp(s1->aux_sample.data, s2->aux_sample.data,
@@ -196,7 +199,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format)
.data = {1, -1ULL, 211, 212, 213},
};
u64 regs[64];
- const u64 raw_data[] = {0x123456780a0b0c0dULL, 0x1102030405060708ULL};
+ const u32 raw_data[] = {0x12345678, 0x0a0b0c0d, 0x11020304, 0x05060708, 0 };
const u64 data[] = {0x2211443366558877ULL, 0, 0xaabbccddeeff4321ULL};
const u64 aux_data[] = {0xa55a, 0, 0xeeddee, 0x0282028202820282};
struct perf_sample sample = {
@@ -238,6 +241,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format)
.phys_addr = 113,
.cgroup = 114,
.data_page_size = 115,
+ .code_page_size = 116,
.aux_sample = {
.size = sizeof(aux_data),
.data = (void *)aux_data,
@@ -344,7 +348,7 @@ int test__sample_parsing(struct test *test __maybe_unused, int subtest __maybe_u
* were added. Please actually update the test rather than just change
* the condition below.
*/
- if (PERF_SAMPLE_MAX > PERF_SAMPLE_CODE_PAGE_SIZE << 1) {
+ if (PERF_SAMPLE_MAX > PERF_SAMPLE_WEIGHT_STRUCT << 1) {
pr_debug("sample format has changed, some new PERF_SAMPLE_ bit was introduced - test needs updating\n");
return -1;
}
@@ -374,8 +378,12 @@ int test__sample_parsing(struct test *test __maybe_unused, int subtest __maybe_u
return err;
}
- /* Test all sample format bits together */
- sample_type = PERF_SAMPLE_MAX - 1;
+ /*
+ * Test all sample format bits together
+ * Note: PERF_SAMPLE_WEIGHT and PERF_SAMPLE_WEIGHT_STRUCT cannot
+ * be set simultaneously.
+ */
+ sample_type = (PERF_SAMPLE_MAX - 1) & ~PERF_SAMPLE_WEIGHT;
sample_regs = 0x3fff; /* shared yb intr and user regs */
for (i = 0; i < ARRAY_SIZE(rf); i++) {
err = do_test(sample_type, sample_regs, rf[i]);
diff --git a/tools/perf/tests/shell/buildid.sh b/tools/perf/tests/shell/buildid.sh
index 4861a20edee2..416af614bbe0 100755
--- a/tools/perf/tests/shell/buildid.sh
+++ b/tools/perf/tests/shell/buildid.sh
@@ -50,6 +50,12 @@ check()
exit 1
fi
+ ${perf} buildid-cache -l | grep $id
+ if [ $? -ne 0 ]; then
+ echo "failed: ${id} is not reported by \"perf buildid-cache -l\""
+ exit 1
+ fi
+
echo "OK for ${1}"
}
diff --git a/tools/perf/tests/shell/daemon.sh b/tools/perf/tests/shell/daemon.sh
new file mode 100755
index 000000000000..5ad3ca8d681b
--- /dev/null
+++ b/tools/perf/tests/shell/daemon.sh
@@ -0,0 +1,475 @@
+#!/bin/sh
+# daemon operations
+# SPDX-License-Identifier: GPL-2.0
+
+check_line_first()
+{
+ local line=$1
+ local name=$2
+ local base=$3
+ local output=$4
+ local lock=$5
+ local up=$6
+
+ local line_name=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $2 }'`
+ local line_base=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $3 }'`
+ local line_output=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $4 }'`
+ local line_lock=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $5 }'`
+ local line_up=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $6 }'`
+
+ if [ "${name}" != "${line_name}" ]; then
+ echo "FAILED: wrong name"
+ error=1
+ fi
+
+ if [ "${base}" != "${line_base}" ]; then
+ echo "FAILED: wrong base"
+ error=1
+ fi
+
+ if [ "${output}" != "${line_output}" ]; then
+ echo "FAILED: wrong output"
+ error=1
+ fi
+
+ if [ "${lock}" != "${line_lock}" ]; then
+ echo "FAILED: wrong lock"
+ error=1
+ fi
+
+ if [ "${up}" != "${line_up}" ]; then
+ echo "FAILED: wrong up"
+ error=1
+ fi
+}
+
+check_line_other()
+{
+ local line=$1
+ local name=$2
+ local run=$3
+ local base=$4
+ local output=$5
+ local control=$6
+ local ack=$7
+ local up=$8
+
+ local line_name=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $2 }'`
+ local line_run=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $3 }'`
+ local line_base=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $4 }'`
+ local line_output=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $5 }'`
+ local line_control=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $6 }'`
+ local line_ack=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $7 }'`
+ local line_up=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $8 }'`
+
+ if [ "${name}" != "${line_name}" ]; then
+ echo "FAILED: wrong name"
+ error=1
+ fi
+
+ if [ "${run}" != "${line_run}" ]; then
+ echo "FAILED: wrong run"
+ error=1
+ fi
+
+ if [ "${base}" != "${line_base}" ]; then
+ echo "FAILED: wrong base"
+ error=1
+ fi
+
+ if [ "${output}" != "${line_output}" ]; then
+ echo "FAILED: wrong output"
+ error=1
+ fi
+
+ if [ "${control}" != "${line_control}" ]; then
+ echo "FAILED: wrong control"
+ error=1
+ fi
+
+ if [ "${ack}" != "${line_ack}" ]; then
+ echo "FAILED: wrong ack"
+ error=1
+ fi
+
+ if [ "${up}" != "${line_up}" ]; then
+ echo "FAILED: wrong up"
+ error=1
+ fi
+}
+
+daemon_start()
+{
+ local config=$1
+ local session=$2
+
+ perf daemon start --config ${config}
+
+ # wait for the session to ping
+ local state="FAIL"
+ while [ "${state}" != "OK" ]; do
+ state=`perf daemon ping --config ${config} --session ${session} | awk '{ print $1 }'`
+ sleep 0.05
+ done
+}
+
+daemon_exit()
+{
+ local base=$1
+ local config=$2
+
+ local line=`perf daemon --config ${config} -x: | head -1`
+ local pid=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $1 }'`
+
+ # stop daemon
+ perf daemon stop --config ${config}
+
+ # ... and wait for the pid to go away
+ tail --pid=${pid} -f /dev/null
+}
+
+test_list()
+{
+ echo "test daemon list"
+
+ local config=$(mktemp /tmp/perf.daemon.config.XXX)
+ local base=$(mktemp -d /tmp/perf.daemon.base.XXX)
+
+ cat <<EOF > ${config}
+[daemon]
+base=BASE
+
+[session-size]
+run = -e cpu-clock -m 1 sleep 10
+
+[session-time]
+run = -e task-clock -m 1 sleep 10
+EOF
+
+ sed -i -e "s|BASE|${base}|" ${config}
+
+ # start daemon
+ daemon_start ${config} size
+
+ # check first line
+ # pid:daemon:base:base/output:base/lock
+ local line=`perf daemon --config ${config} -x: | head -1`
+ check_line_first ${line} daemon ${base} ${base}/output ${base}/lock "0"
+
+ # check 1st session
+ # pid:size:-e cpu-clock:base/size:base/size/output:base/size/control:base/size/ack:0
+ local line=`perf daemon --config ${config} -x: | head -2 | tail -1`
+ check_line_other "${line}" size "-e cpu-clock -m 1 sleep 10" ${base}/session-size \
+ ${base}/session-size/output ${base}/session-size/control \
+ ${base}/session-size/ack "0"
+
+ # check 2nd session
+ # pid:time:-e task-clock:base/time:base/time/output:base/time/control:base/time/ack:0
+ local line=`perf daemon --config ${config} -x: | head -3 | tail -1`
+ check_line_other "${line}" time "-e task-clock -m 1 sleep 10" ${base}/session-time \
+ ${base}/session-time/output ${base}/session-time/control \
+ ${base}/session-time/ack "0"
+
+ # stop daemon
+ daemon_exit ${base} ${config}
+
+ rm -rf ${base}
+ rm -f ${config}
+}
+
+test_reconfig()
+{
+ echo "test daemon reconfig"
+
+ local config=$(mktemp /tmp/perf.daemon.config.XXX)
+ local base=$(mktemp -d /tmp/perf.daemon.base.XXX)
+
+ # prepare config
+ cat <<EOF > ${config}
+[daemon]
+base=BASE
+
+[session-size]
+run = -e cpu-clock -m 1 sleep 10
+
+[session-time]
+run = -e task-clock -m 1 sleep 10
+EOF
+
+ sed -i -e "s|BASE|${base}|" ${config}
+
+ # start daemon
+ daemon_start ${config} size
+
+ # check 2nd session
+ # pid:time:-e task-clock:base/time:base/time/output:base/time/control:base/time/ack:0
+ local line=`perf daemon --config ${config} -x: | head -3 | tail -1`
+ check_line_other "${line}" time "-e task-clock -m 1 sleep 10" ${base}/session-time \
+ ${base}/session-time/output ${base}/session-time/control ${base}/session-time/ack "0"
+ local pid=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $1 }'`
+
+ # prepare new config
+ local config_new=${config}.new
+ cat <<EOF > ${config_new}
+[daemon]
+base=BASE
+
+[session-size]
+run = -e cpu-clock -m 1 sleep 10
+
+[session-time]
+run = -e cpu-clock -m 1 sleep 10
+EOF
+
+ # TEST 1 - change config
+
+ sed -i -e "s|BASE|${base}|" ${config_new}
+ cp ${config_new} ${config}
+
+ # wait for old session to finish
+ tail --pid=${pid} -f /dev/null
+
+ # wait for new one to start
+ local state="FAIL"
+ while [ "${state}" != "OK" ]; do
+ state=`perf daemon ping --config ${config} --session time | awk '{ print $1 }'`
+ done
+
+ # check reconfigured 2nd session
+ # pid:time:-e task-clock:base/time:base/time/output:base/time/control:base/time/ack:0
+ local line=`perf daemon --config ${config} -x: | head -3 | tail -1`
+ check_line_other "${line}" time "-e cpu-clock -m 1 sleep 10" ${base}/session-time \
+ ${base}/session-time/output ${base}/session-time/control ${base}/session-time/ack "0"
+
+ # TEST 2 - empty config
+
+ local config_empty=${config}.empty
+ cat <<EOF > ${config_empty}
+[daemon]
+base=BASE
+EOF
+
+ # change config
+ sed -i -e "s|BASE|${base}|" ${config_empty}
+ cp ${config_empty} ${config}
+
+ # wait for sessions to finish
+ local state="OK"
+ while [ "${state}" != "FAIL" ]; do
+ state=`perf daemon ping --config ${config} --session time | awk '{ print $1 }'`
+ done
+
+ local state="OK"
+ while [ "${state}" != "FAIL" ]; do
+ state=`perf daemon ping --config ${config} --session size | awk '{ print $1 }'`
+ done
+
+ local one=`perf daemon --config ${config} -x: | wc -l`
+
+ if [ ${one} -ne "1" ]; then
+ echo "FAILED: wrong list output"
+ error=1
+ fi
+
+ # TEST 3 - config again
+
+ cp ${config_new} ${config}
+
+ # wait for size to start
+ local state="FAIL"
+ while [ "${state}" != "OK" ]; do
+ state=`perf daemon ping --config ${config} --session size | awk '{ print $1 }'`
+ done
+
+ # wait for time to start
+ local state="FAIL"
+ while [ "${state}" != "OK" ]; do
+ state=`perf daemon ping --config ${config} --session time | awk '{ print $1 }'`
+ done
+
+ # stop daemon
+ daemon_exit ${base} ${config}
+
+ rm -rf ${base}
+ rm -f ${config}
+ rm -f ${config_new}
+ rm -f ${config_empty}
+}
+
+test_stop()
+{
+ echo "test daemon stop"
+
+ local config=$(mktemp /tmp/perf.daemon.config.XXX)
+ local base=$(mktemp -d /tmp/perf.daemon.base.XXX)
+
+ # prepare config
+ cat <<EOF > ${config}
+[daemon]
+base=BASE
+
+[session-size]
+run = -e cpu-clock -m 1 sleep 10
+
+[session-time]
+run = -e task-clock -m 1 sleep 10
+EOF
+
+ sed -i -e "s|BASE|${base}|" ${config}
+
+ # start daemon
+ daemon_start ${config} size
+
+ local pid_size=`perf daemon --config ${config} -x: | head -2 | tail -1 | awk 'BEGIN { FS = ":" } ; { print $1 }'`
+ local pid_time=`perf daemon --config ${config} -x: | head -3 | tail -1 | awk 'BEGIN { FS = ":" } ; { print $1 }'`
+
+ # check that sessions are running
+ if [ ! -d "/proc/${pid_size}" ]; then
+ echo "FAILED: session size not up"
+ fi
+
+ if [ ! -d "/proc/${pid_time}" ]; then
+ echo "FAILED: session time not up"
+ fi
+
+ # stop daemon
+ daemon_exit ${base} ${config}
+
+ # check that sessions are gone
+ if [ -d "/proc/${pid_size}" ]; then
+ echo "FAILED: session size still up"
+ fi
+
+ if [ -d "/proc/${pid_time}" ]; then
+ echo "FAILED: session time still up"
+ fi
+
+ rm -rf ${base}
+ rm -f ${config}
+}
+
+test_signal()
+{
+ echo "test daemon signal"
+
+ local config=$(mktemp /tmp/perf.daemon.config.XXX)
+ local base=$(mktemp -d /tmp/perf.daemon.base.XXX)
+
+ # prepare config
+ cat <<EOF > ${config}
+[daemon]
+base=BASE
+
+[session-test]
+run = -e cpu-clock --switch-output -m 1 sleep 10
+EOF
+
+ sed -i -e "s|BASE|${base}|" ${config}
+
+ # start daemon
+ daemon_start ${config} test
+
+ # send 2 signals
+ perf daemon signal --config ${config} --session test
+ perf daemon signal --config ${config}
+
+ # stop daemon
+ daemon_exit ${base} ${config}
+
+ # count is 2 perf.data for signals and 1 for perf record finished
+ count=`ls ${base}/session-test/ | grep perf.data | wc -l`
+ if [ ${count} -ne 3 ]; then
+ error=1
+ echo "FAILED: perf data no generated"
+ fi
+
+ rm -rf ${base}
+ rm -f ${config}
+}
+
+test_ping()
+{
+ echo "test daemon ping"
+
+ local config=$(mktemp /tmp/perf.daemon.config.XXX)
+ local base=$(mktemp -d /tmp/perf.daemon.base.XXX)
+
+ # prepare config
+ cat <<EOF > ${config}
+[daemon]
+base=BASE
+
+[session-size]
+run = -e cpu-clock -m 1 sleep 10
+
+[session-time]
+run = -e task-clock -m 1 sleep 10
+EOF
+
+ sed -i -e "s|BASE|${base}|" ${config}
+
+ # start daemon
+ daemon_start ${config} size
+
+ size=`perf daemon ping --config ${config} --session size | awk '{ print $1 }'`
+ type=`perf daemon ping --config ${config} --session time | awk '{ print $1 }'`
+
+ if [ ${size} != "OK" -o ${type} != "OK" ]; then
+ error=1
+ echo "FAILED: daemon ping failed"
+ fi
+
+ # stop daemon
+ daemon_exit ${base} ${config}
+
+ rm -rf ${base}
+ rm -f ${config}
+}
+
+test_lock()
+{
+ echo "test daemon lock"
+
+ local config=$(mktemp /tmp/perf.daemon.config.XXX)
+ local base=$(mktemp -d /tmp/perf.daemon.base.XXX)
+
+ # prepare config
+ cat <<EOF > ${config}
+[daemon]
+base=BASE
+
+[session-size]
+run = -e cpu-clock -m 1 sleep 10
+EOF
+
+ sed -i -e "s|BASE|${base}|" ${config}
+
+ # start daemon
+ daemon_start ${config} size
+
+ # start second daemon over the same config/base
+ failed=`perf daemon start --config ${config} 2>&1 | awk '{ print $1 }'`
+
+ # check that we failed properly
+ if [ ${failed} != "failed:" ]; then
+ error=1
+ echo "FAILED: daemon lock failed"
+ fi
+
+ # stop daemon
+ daemon_exit ${base} ${config}
+
+ rm -rf ${base}
+ rm -f ${config}
+}
+
+error=0
+
+test_list
+test_reconfig
+test_stop
+test_signal
+test_ping
+test_lock
+
+exit ${error}
diff --git a/tools/perf/tests/shell/test_arm_coresight.sh b/tools/perf/tests/shell/test_arm_coresight.sh
index 18fde2f179cd..c9eef0bba6f1 100755
--- a/tools/perf/tests/shell/test_arm_coresight.sh
+++ b/tools/perf/tests/shell/test_arm_coresight.sh
@@ -11,6 +11,7 @@
perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX)
file=$(mktemp /tmp/temporary_file.XXXXX)
+glb_err=0
skip_if_no_cs_etm_event() {
perf list | grep -q 'cs_etm//' && return 0
@@ -33,7 +34,7 @@ record_touch_file() {
echo "Recording trace (only user mode) with path: CPU$2 => $1"
rm -f $file
perf record -o ${perfdata} -e cs_etm/@$1/u --per-thread \
- -- taskset -c $2 touch $file
+ -- taskset -c $2 touch $file > /dev/null 2>&1
}
perf_script_branch_samples() {
@@ -43,8 +44,8 @@ perf_script_branch_samples() {
# touch 6512 1 branches:u: ffffb220824c strcmp+0xc (/lib/aarch64-linux-gnu/ld-2.27.so)
# touch 6512 1 branches:u: ffffb22082e0 strcmp+0xa0 (/lib/aarch64-linux-gnu/ld-2.27.so)
# touch 6512 1 branches:u: ffffb2208320 strcmp+0xe0 (/lib/aarch64-linux-gnu/ld-2.27.so)
- perf script -F,-time -i ${perfdata} | \
- egrep " +$1 +[0-9]+ .* +branches:(.*:)? +"
+ perf script -F,-time -i ${perfdata} 2>&1 | \
+ egrep " +$1 +[0-9]+ .* +branches:(.*:)? +" > /dev/null 2>&1
}
perf_report_branch_samples() {
@@ -54,8 +55,8 @@ perf_report_branch_samples() {
# 73.04% 73.04% touch libc-2.27.so [.] _dl_addr
# 7.71% 7.71% touch libc-2.27.so [.] getenv
# 2.59% 2.59% touch ld-2.27.so [.] strcmp
- perf report --stdio -i ${perfdata} | \
- egrep " +[0-9]+\.[0-9]+% +[0-9]+\.[0-9]+% +$1 "
+ perf report --stdio -i ${perfdata} 2>&1 | \
+ egrep " +[0-9]+\.[0-9]+% +[0-9]+\.[0-9]+% +$1 " > /dev/null 2>&1
}
perf_report_instruction_samples() {
@@ -65,8 +66,17 @@ perf_report_instruction_samples() {
# 68.12% touch libc-2.27.so [.] _dl_addr
# 5.80% touch libc-2.27.so [.] getenv
# 4.35% touch ld-2.27.so [.] _dl_fixup
- perf report --itrace=i1000i --stdio -i ${perfdata} | \
- egrep " +[0-9]+\.[0-9]+% +$1"
+ perf report --itrace=i1000i --stdio -i ${perfdata} 2>&1 | \
+ egrep " +[0-9]+\.[0-9]+% +$1" > /dev/null 2>&1
+}
+
+arm_cs_report() {
+ if [ $2 != 0 ]; then
+ echo "$1: FAIL"
+ glb_err=$2
+ else
+ echo "$1: PASS"
+ fi
}
is_device_sink() {
@@ -113,9 +123,7 @@ arm_cs_iterate_devices() {
perf_report_instruction_samples touch
err=$?
-
- # Exit when find failure
- [ $err != 0 ] && exit $err
+ arm_cs_report "CoreSight path testing (CPU$2 -> $device_name)" $err
fi
arm_cs_iterate_devices $dev $2
@@ -129,9 +137,6 @@ arm_cs_etm_traverse_path_test() {
# Find the ETM device belonging to which CPU
cpu=`cat $dev/cpu`
- echo $dev
- echo $cpu
-
# Use depth-first search (DFS) to iterate outputs
arm_cs_iterate_devices $dev $cpu
done
@@ -139,22 +144,20 @@ arm_cs_etm_traverse_path_test() {
arm_cs_etm_system_wide_test() {
echo "Recording trace with system wide mode"
- perf record -o ${perfdata} -e cs_etm// -a -- ls
+ perf record -o ${perfdata} -e cs_etm// -a -- ls > /dev/null 2>&1
perf_script_branch_samples perf &&
perf_report_branch_samples perf &&
perf_report_instruction_samples perf
err=$?
-
- # Exit when find failure
- [ $err != 0 ] && exit $err
+ arm_cs_report "CoreSight system wide testing" $err
}
arm_cs_etm_snapshot_test() {
echo "Recording trace with snapshot mode"
perf record -o ${perfdata} -e cs_etm// -S \
- -- dd if=/dev/zero of=/dev/null &
+ -- dd if=/dev/zero of=/dev/null > /dev/null 2>&1 &
PERFPID=$!
# Wait for perf program
@@ -172,12 +175,10 @@ arm_cs_etm_snapshot_test() {
perf_report_instruction_samples dd
err=$?
-
- # Exit when find failure
- [ $err != 0 ] && exit $err
+ arm_cs_report "CoreSight snapshot testing" $err
}
arm_cs_etm_traverse_path_test
arm_cs_etm_system_wide_test
arm_cs_etm_snapshot_test
-exit 0
+exit $glb_err
diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c
index a49c9e23053b..74988846be1d 100644
--- a/tools/perf/tests/sw-clock.c
+++ b/tools/perf/tests/sw-clock.c
@@ -42,8 +42,8 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id)
.disabled = 1,
.freq = 1,
};
- struct perf_cpu_map *cpus;
- struct perf_thread_map *threads;
+ struct perf_cpu_map *cpus = NULL;
+ struct perf_thread_map *threads = NULL;
struct mmap *md;
attr.sample_freq = 500;
@@ -66,14 +66,11 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id)
if (!cpus || !threads) {
err = -ENOMEM;
pr_debug("Not enough memory to create thread/cpu maps\n");
- goto out_free_maps;
+ goto out_delete_evlist;
}
perf_evlist__set_maps(&evlist->core, cpus, threads);
- cpus = NULL;
- threads = NULL;
-
if (evlist__open(evlist)) {
const char *knob = "/proc/sys/kernel/perf_event_max_sample_rate";
@@ -129,10 +126,9 @@ out_init:
err = -1;
}
-out_free_maps:
+out_delete_evlist:
perf_cpu_map__put(cpus);
perf_thread_map__put(threads);
-out_delete_evlist:
evlist__delete(evlist);
return err;
}
diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c
index 15a2ab765d89..3ebaa758df77 100644
--- a/tools/perf/tests/switch-tracking.c
+++ b/tools/perf/tests/switch-tracking.c
@@ -574,10 +574,9 @@ out:
if (evlist) {
evlist__disable(evlist);
evlist__delete(evlist);
- } else {
- perf_cpu_map__put(cpus);
- perf_thread_map__put(threads);
}
+ perf_cpu_map__put(cpus);
+ perf_thread_map__put(threads);
return err;
diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c
index bbf94e4aa145..4c2969db59b0 100644
--- a/tools/perf/tests/task-exit.c
+++ b/tools/perf/tests/task-exit.c
@@ -75,14 +75,11 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused
if (!cpus || !threads) {
err = -ENOMEM;
pr_debug("Not enough memory to create thread/cpu maps\n");
- goto out_free_maps;
+ goto out_delete_evlist;
}
perf_evlist__set_maps(&evlist->core, cpus, threads);
- cpus = NULL;
- threads = NULL;
-
err = evlist__prepare_workload(evlist, &target, argv, false, workload_exec_failed_signal);
if (err < 0) {
pr_debug("Couldn't run the workload!\n");
@@ -137,7 +134,7 @@ out_init:
if (retry_count++ > 1000) {
pr_debug("Failed after retrying 1000 times\n");
err = -1;
- goto out_free_maps;
+ goto out_delete_evlist;
}
goto retry;
@@ -148,10 +145,9 @@ out_init:
err = -1;
}
-out_free_maps:
+out_delete_evlist:
perf_cpu_map__put(cpus);
perf_thread_map__put(threads);
-out_delete_evlist:
evlist__delete(evlist);
return err;
}
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 8e24a61fe4c2..b85f005308a3 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -119,6 +119,7 @@ int test__time_utils(struct test *t, int subtest);
int test__jit_write_elf(struct test *test, int subtest);
int test__api_io(struct test *test, int subtest);
int test__demangle_java(struct test *test, int subtest);
+int test__demangle_ocaml(struct test *test, int subtest);
int test__pfm(struct test *test, int subtest);
const char *test__pfm_subtest_get_desc(int subtest);
int test__pfm_subtest_get_nr(void);
diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c
index 28f51c4bd373..d1e208b4a571 100644
--- a/tools/perf/tests/thread-map.c
+++ b/tools/perf/tests/thread-map.c
@@ -102,6 +102,7 @@ int test__thread_map_synthesize(struct test *test __maybe_unused, int subtest __
TEST_ASSERT_VAL("failed to synthesize map",
!perf_event__synthesize_thread_map2(NULL, threads, process_event, NULL));
+ perf_thread_map__put(threads);
return 0;
}
@@ -109,12 +110,12 @@ int test__thread_map_remove(struct test *test __maybe_unused, int subtest __mayb
{
struct perf_thread_map *threads;
char *str;
- int i;
TEST_ASSERT_VAL("failed to allocate map string",
asprintf(&str, "%d,%d", getpid(), getppid()) >= 0);
threads = thread_map__new_str(str, NULL, 0, false);
+ free(str);
TEST_ASSERT_VAL("failed to allocate thread_map",
threads);
@@ -141,9 +142,6 @@ int test__thread_map_remove(struct test *test __maybe_unused, int subtest __mayb
TEST_ASSERT_VAL("failed to not remove thread",
thread_map__remove(threads, 0));
- for (i = 0; i < threads->nr; i++)
- zfree(&threads->map[i].comm);
-
- free(threads);
+ perf_thread_map__put(threads);
return 0;
}
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index bd77825fd5a1..35b82caf8090 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -759,7 +759,7 @@ static int annotate_browser__run(struct annotate_browser *browser,
continue;
case 'k':
notes->options->show_linenr = !notes->options->show_linenr;
- break;
+ continue;
case 'H':
nd = browser->curr_hot;
break;
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index e2563d0154eb..e3e12f9d4733 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -135,6 +135,7 @@ perf-y += clockid.o
perf-$(CONFIG_LIBBPF) += bpf-loader.o
perf-$(CONFIG_LIBBPF) += bpf_map.o
+perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o
perf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o
perf-$(CONFIG_LIBELF) += symbol-elf.o
perf-$(CONFIG_LIBELF) += probe-file.o
@@ -172,6 +173,7 @@ perf-$(CONFIG_ZSTD) += zstd.o
perf-$(CONFIG_LIBCAP) += cap.o
+perf-y += demangle-ocaml.o
perf-y += demangle-java.o
perf-y += demangle-rust.o
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index ce8c07bc8c56..e60841b86d27 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -321,12 +321,18 @@ bool ins__is_call(const struct ins *ins)
/*
* Prevents from matching commas in the comment section, e.g.:
* ffff200008446e70: b.cs ffff2000084470f4 <generic_exec_single+0x314> // b.hs, b.nlast
+ *
+ * and skip comma as part of function arguments, e.g.:
+ * 1d8b4ac <linemap_lookup(line_maps const*, unsigned int)+0xcc>
*/
static inline const char *validate_comma(const char *c, struct ins_operands *ops)
{
if (ops->raw_comment && c > ops->raw_comment)
return NULL;
+ if (ops->raw_func_start && c > ops->raw_func_start)
+ return NULL;
+
return c;
}
@@ -341,6 +347,8 @@ static int jump__parse(struct arch *arch, struct ins_operands *ops, struct map_s
u64 start, end;
ops->raw_comment = strchr(ops->raw, arch->objdump.comment_char);
+ ops->raw_func_start = strchr(ops->raw, '<');
+
c = validate_comma(c, ops);
/*
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 0a0cd4f32175..096cdaf21b01 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -32,6 +32,7 @@ struct ins {
struct ins_operands {
char *raw;
char *raw_comment;
+ char *raw_func_start;
struct {
char *raw;
char *name;
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
index 90d575cee1b9..32fe41835fa6 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
@@ -172,12 +172,22 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
decoder->record.from_ip = ip;
else if (idx == SPE_ADDR_PKT_HDR_INDEX_BRANCH)
decoder->record.to_ip = ip;
+ else if (idx == SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT)
+ decoder->record.virt_addr = ip;
+ else if (idx == SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS)
+ decoder->record.phys_addr = ip;
break;
case ARM_SPE_COUNTER:
break;
case ARM_SPE_CONTEXT:
break;
case ARM_SPE_OP_TYPE:
+ if (idx == SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC) {
+ if (payload & 0x1)
+ decoder->record.op = ARM_SPE_ST;
+ else
+ decoder->record.op = ARM_SPE_LD;
+ }
break;
case ARM_SPE_EVENTS:
if (payload & BIT(EV_L1D_REFILL))
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
index 24727b8ca7ff..59bdb7309674 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
@@ -24,12 +24,20 @@ enum arm_spe_sample_type {
ARM_SPE_REMOTE_ACCESS = 1 << 7,
};
+enum arm_spe_op_type {
+ ARM_SPE_LD = 1 << 0,
+ ARM_SPE_ST = 1 << 1,
+};
+
struct arm_spe_record {
enum arm_spe_sample_type type;
int err;
+ u32 op;
u64 from_ip;
u64 to_ip;
u64 timestamp;
+ u64 virt_addr;
+ u64 phys_addr;
};
struct arm_spe_insn;
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index 8901a1656a41..2539d4baec44 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -53,6 +53,7 @@ struct arm_spe {
u8 sample_tlb;
u8 sample_branch;
u8 sample_remote_access;
+ u8 sample_memory;
u64 l1d_miss_id;
u64 l1d_access_id;
@@ -62,6 +63,7 @@ struct arm_spe {
u64 tlb_access_id;
u64 branch_miss_id;
u64 remote_access_id;
+ u64 memory_id;
u64 kernel_start;
@@ -235,7 +237,6 @@ static void arm_spe_prep_sample(struct arm_spe *spe,
sample->cpumode = arm_spe_cpumode(spe, sample->ip);
sample->pid = speq->pid;
sample->tid = speq->tid;
- sample->addr = record->to_ip;
sample->period = 1;
sample->cpu = speq->cpu;
@@ -259,11 +260,11 @@ arm_spe_deliver_synth_event(struct arm_spe *spe,
return ret;
}
-static int
-arm_spe_synth_spe_events_sample(struct arm_spe_queue *speq,
- u64 spe_events_id)
+static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq,
+ u64 spe_events_id, u64 data_src)
{
struct arm_spe *spe = speq->spe;
+ struct arm_spe_record *record = &speq->decoder->record;
union perf_event *event = speq->event_buf;
struct perf_sample sample = { .ip = 0, };
@@ -271,27 +272,102 @@ arm_spe_synth_spe_events_sample(struct arm_spe_queue *speq,
sample.id = spe_events_id;
sample.stream_id = spe_events_id;
+ sample.addr = record->virt_addr;
+ sample.phys_addr = record->phys_addr;
+ sample.data_src = data_src;
return arm_spe_deliver_synth_event(spe, speq, event, &sample);
}
+static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
+ u64 spe_events_id)
+{
+ struct arm_spe *spe = speq->spe;
+ struct arm_spe_record *record = &speq->decoder->record;
+ union perf_event *event = speq->event_buf;
+ struct perf_sample sample = { .ip = 0, };
+
+ arm_spe_prep_sample(spe, speq, event, &sample);
+
+ sample.id = spe_events_id;
+ sample.stream_id = spe_events_id;
+ sample.addr = record->to_ip;
+
+ return arm_spe_deliver_synth_event(spe, speq, event, &sample);
+}
+
+#define SPE_MEM_TYPE (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS | \
+ ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS | \
+ ARM_SPE_REMOTE_ACCESS)
+
+static bool arm_spe__is_memory_event(enum arm_spe_sample_type type)
+{
+ if (type & SPE_MEM_TYPE)
+ return true;
+
+ return false;
+}
+
+static u64 arm_spe__synth_data_source(const struct arm_spe_record *record)
+{
+ union perf_mem_data_src data_src = { 0 };
+
+ if (record->op == ARM_SPE_LD)
+ data_src.mem_op = PERF_MEM_OP_LOAD;
+ else
+ data_src.mem_op = PERF_MEM_OP_STORE;
+
+ if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) {
+ data_src.mem_lvl = PERF_MEM_LVL_L3;
+
+ if (record->type & ARM_SPE_LLC_MISS)
+ data_src.mem_lvl |= PERF_MEM_LVL_MISS;
+ else
+ data_src.mem_lvl |= PERF_MEM_LVL_HIT;
+ } else if (record->type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS)) {
+ data_src.mem_lvl = PERF_MEM_LVL_L1;
+
+ if (record->type & ARM_SPE_L1D_MISS)
+ data_src.mem_lvl |= PERF_MEM_LVL_MISS;
+ else
+ data_src.mem_lvl |= PERF_MEM_LVL_HIT;
+ }
+
+ if (record->type & ARM_SPE_REMOTE_ACCESS)
+ data_src.mem_lvl |= PERF_MEM_LVL_REM_CCE1;
+
+ if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) {
+ data_src.mem_dtlb = PERF_MEM_TLB_WK;
+
+ if (record->type & ARM_SPE_TLB_MISS)
+ data_src.mem_dtlb |= PERF_MEM_TLB_MISS;
+ else
+ data_src.mem_dtlb |= PERF_MEM_TLB_HIT;
+ }
+
+ return data_src.val;
+}
+
static int arm_spe_sample(struct arm_spe_queue *speq)
{
const struct arm_spe_record *record = &speq->decoder->record;
struct arm_spe *spe = speq->spe;
+ u64 data_src;
int err;
+ data_src = arm_spe__synth_data_source(record);
+
if (spe->sample_flc) {
if (record->type & ARM_SPE_L1D_MISS) {
- err = arm_spe_synth_spe_events_sample(
- speq, spe->l1d_miss_id);
+ err = arm_spe__synth_mem_sample(speq, spe->l1d_miss_id,
+ data_src);
if (err)
return err;
}
if (record->type & ARM_SPE_L1D_ACCESS) {
- err = arm_spe_synth_spe_events_sample(
- speq, spe->l1d_access_id);
+ err = arm_spe__synth_mem_sample(speq, spe->l1d_access_id,
+ data_src);
if (err)
return err;
}
@@ -299,15 +375,15 @@ static int arm_spe_sample(struct arm_spe_queue *speq)
if (spe->sample_llc) {
if (record->type & ARM_SPE_LLC_MISS) {
- err = arm_spe_synth_spe_events_sample(
- speq, spe->llc_miss_id);
+ err = arm_spe__synth_mem_sample(speq, spe->llc_miss_id,
+ data_src);
if (err)
return err;
}
if (record->type & ARM_SPE_LLC_ACCESS) {
- err = arm_spe_synth_spe_events_sample(
- speq, spe->llc_access_id);
+ err = arm_spe__synth_mem_sample(speq, spe->llc_access_id,
+ data_src);
if (err)
return err;
}
@@ -315,31 +391,36 @@ static int arm_spe_sample(struct arm_spe_queue *speq)
if (spe->sample_tlb) {
if (record->type & ARM_SPE_TLB_MISS) {
- err = arm_spe_synth_spe_events_sample(
- speq, spe->tlb_miss_id);
+ err = arm_spe__synth_mem_sample(speq, spe->tlb_miss_id,
+ data_src);
if (err)
return err;
}
if (record->type & ARM_SPE_TLB_ACCESS) {
- err = arm_spe_synth_spe_events_sample(
- speq, spe->tlb_access_id);
+ err = arm_spe__synth_mem_sample(speq, spe->tlb_access_id,
+ data_src);
if (err)
return err;
}
}
if (spe->sample_branch && (record->type & ARM_SPE_BRANCH_MISS)) {
- err = arm_spe_synth_spe_events_sample(speq,
- spe->branch_miss_id);
+ err = arm_spe__synth_branch_sample(speq, spe->branch_miss_id);
if (err)
return err;
}
if (spe->sample_remote_access &&
(record->type & ARM_SPE_REMOTE_ACCESS)) {
- err = arm_spe_synth_spe_events_sample(speq,
- spe->remote_access_id);
+ err = arm_spe__synth_mem_sample(speq, spe->remote_access_id,
+ data_src);
+ if (err)
+ return err;
+ }
+
+ if (spe->sample_memory && arm_spe__is_memory_event(record->type)) {
+ err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src);
if (err)
return err;
}
@@ -803,7 +884,7 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
attr.type = PERF_TYPE_HARDWARE;
attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
- PERF_SAMPLE_PERIOD;
+ PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC;
if (spe->timeless_decoding)
attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
else
@@ -907,6 +988,16 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
id += 1;
}
+ if (spe->synth_opts.mem) {
+ spe->sample_memory = true;
+
+ err = arm_spe_synth_event(session, &attr, id);
+ if (err)
+ return err;
+ spe->memory_id = id;
+ arm_spe_set_event_name(evlist, id, "memory");
+ }
+
return 0;
}
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index a60878498139..953f4afacd3b 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -788,6 +788,21 @@ no_opt:
return auxtrace_validate_aux_sample_size(evlist, opts);
}
+void auxtrace_regroup_aux_output(struct evlist *evlist)
+{
+ struct evsel *evsel, *aux_evsel = NULL;
+ struct evsel_config_term *term;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel__is_aux_event(evsel))
+ aux_evsel = evsel;
+ term = evsel__get_config_term(evsel, AUX_OUTPUT);
+ /* If possible, group with the AUX event */
+ if (term && aux_evsel)
+ evlist__regroup(evlist, aux_evsel, evsel);
+ }
+}
+
struct auxtrace_record *__weak
auxtrace_record__init(struct evlist *evlist __maybe_unused, int *err)
{
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index 7e5c9e1552bd..a4fbb33b7245 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -559,6 +559,7 @@ int auxtrace_parse_snapshot_options(struct auxtrace_record *itr,
int auxtrace_parse_sample_options(struct auxtrace_record *itr,
struct evlist *evlist,
struct record_opts *opts, const char *str);
+void auxtrace_regroup_aux_output(struct evlist *evlist);
int auxtrace_record__options(struct auxtrace_record *itr,
struct evlist *evlist,
struct record_opts *opts);
@@ -741,6 +742,11 @@ int auxtrace_parse_sample_options(struct auxtrace_record *itr __maybe_unused,
}
static inline
+void auxtrace_regroup_aux_output(struct evlist *evlist __maybe_unused)
+{
+}
+
+static inline
int auxtrace__process_event(struct perf_session *session __maybe_unused,
union perf_event *event __maybe_unused,
struct perf_sample *sample __maybe_unused,
diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c
new file mode 100644
index 000000000000..04f89120b323
--- /dev/null
+++ b/tools/perf/util/bpf_counter.c
@@ -0,0 +1,314 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Copyright (c) 2019 Facebook */
+
+#include <assert.h>
+#include <limits.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <linux/err.h>
+#include <linux/zalloc.h>
+#include <bpf/bpf.h>
+#include <bpf/btf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_counter.h"
+#include "counts.h"
+#include "debug.h"
+#include "evsel.h"
+#include "target.h"
+
+#include "bpf_skel/bpf_prog_profiler.skel.h"
+
+static inline void *u64_to_ptr(__u64 ptr)
+{
+ return (void *)(unsigned long)ptr;
+}
+
+static void set_max_rlimit(void)
+{
+ struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
+
+ setrlimit(RLIMIT_MEMLOCK, &rinf);
+}
+
+static struct bpf_counter *bpf_counter_alloc(void)
+{
+ struct bpf_counter *counter;
+
+ counter = zalloc(sizeof(*counter));
+ if (counter)
+ INIT_LIST_HEAD(&counter->list);
+ return counter;
+}
+
+static int bpf_program_profiler__destroy(struct evsel *evsel)
+{
+ struct bpf_counter *counter, *tmp;
+
+ list_for_each_entry_safe(counter, tmp,
+ &evsel->bpf_counter_list, list) {
+ list_del_init(&counter->list);
+ bpf_prog_profiler_bpf__destroy(counter->skel);
+ free(counter);
+ }
+ assert(list_empty(&evsel->bpf_counter_list));
+
+ return 0;
+}
+
+static char *bpf_target_prog_name(int tgt_fd)
+{
+ struct bpf_prog_info_linear *info_linear;
+ struct bpf_func_info *func_info;
+ const struct btf_type *t;
+ char *name = NULL;
+ struct btf *btf;
+
+ info_linear = bpf_program__get_prog_info_linear(
+ tgt_fd, 1UL << BPF_PROG_INFO_FUNC_INFO);
+ if (IS_ERR_OR_NULL(info_linear)) {
+ pr_debug("failed to get info_linear for prog FD %d\n", tgt_fd);
+ return NULL;
+ }
+
+ if (info_linear->info.btf_id == 0 ||
+ btf__get_from_id(info_linear->info.btf_id, &btf)) {
+ pr_debug("prog FD %d doesn't have valid btf\n", tgt_fd);
+ goto out;
+ }
+
+ func_info = u64_to_ptr(info_linear->info.func_info);
+ t = btf__type_by_id(btf, func_info[0].type_id);
+ if (!t) {
+ pr_debug("btf %d doesn't have type %d\n",
+ info_linear->info.btf_id, func_info[0].type_id);
+ goto out;
+ }
+ name = strdup(btf__name_by_offset(btf, t->name_off));
+out:
+ free(info_linear);
+ return name;
+}
+
+static int bpf_program_profiler_load_one(struct evsel *evsel, u32 prog_id)
+{
+ struct bpf_prog_profiler_bpf *skel;
+ struct bpf_counter *counter;
+ struct bpf_program *prog;
+ char *prog_name;
+ int prog_fd;
+ int err;
+
+ prog_fd = bpf_prog_get_fd_by_id(prog_id);
+ if (prog_fd < 0) {
+ pr_err("Failed to open fd for bpf prog %u\n", prog_id);
+ return -1;
+ }
+ counter = bpf_counter_alloc();
+ if (!counter) {
+ close(prog_fd);
+ return -1;
+ }
+
+ skel = bpf_prog_profiler_bpf__open();
+ if (!skel) {
+ pr_err("Failed to open bpf skeleton\n");
+ goto err_out;
+ }
+
+ skel->rodata->num_cpu = evsel__nr_cpus(evsel);
+
+ bpf_map__resize(skel->maps.events, evsel__nr_cpus(evsel));
+ bpf_map__resize(skel->maps.fentry_readings, 1);
+ bpf_map__resize(skel->maps.accum_readings, 1);
+
+ prog_name = bpf_target_prog_name(prog_fd);
+ if (!prog_name) {
+ pr_err("Failed to get program name for bpf prog %u. Does it have BTF?\n", prog_id);
+ goto err_out;
+ }
+
+ bpf_object__for_each_program(prog, skel->obj) {
+ err = bpf_program__set_attach_target(prog, prog_fd, prog_name);
+ if (err) {
+ pr_err("bpf_program__set_attach_target failed.\n"
+ "Does bpf prog %u have BTF?\n", prog_id);
+ goto err_out;
+ }
+ }
+ set_max_rlimit();
+ err = bpf_prog_profiler_bpf__load(skel);
+ if (err) {
+ pr_err("bpf_prog_profiler_bpf__load failed\n");
+ goto err_out;
+ }
+
+ assert(skel != NULL);
+ counter->skel = skel;
+ list_add(&counter->list, &evsel->bpf_counter_list);
+ close(prog_fd);
+ return 0;
+err_out:
+ bpf_prog_profiler_bpf__destroy(skel);
+ free(counter);
+ close(prog_fd);
+ return -1;
+}
+
+static int bpf_program_profiler__load(struct evsel *evsel, struct target *target)
+{
+ char *bpf_str, *bpf_str_, *tok, *saveptr = NULL, *p;
+ u32 prog_id;
+ int ret;
+
+ bpf_str_ = bpf_str = strdup(target->bpf_str);
+ if (!bpf_str)
+ return -1;
+
+ while ((tok = strtok_r(bpf_str, ",", &saveptr)) != NULL) {
+ prog_id = strtoul(tok, &p, 10);
+ if (prog_id == 0 || prog_id == UINT_MAX ||
+ (*p != '\0' && *p != ',')) {
+ pr_err("Failed to parse bpf prog ids %s\n",
+ target->bpf_str);
+ return -1;
+ }
+
+ ret = bpf_program_profiler_load_one(evsel, prog_id);
+ if (ret) {
+ bpf_program_profiler__destroy(evsel);
+ free(bpf_str_);
+ return -1;
+ }
+ bpf_str = NULL;
+ }
+ free(bpf_str_);
+ return 0;
+}
+
+static int bpf_program_profiler__enable(struct evsel *evsel)
+{
+ struct bpf_counter *counter;
+ int ret;
+
+ list_for_each_entry(counter, &evsel->bpf_counter_list, list) {
+ assert(counter->skel != NULL);
+ ret = bpf_prog_profiler_bpf__attach(counter->skel);
+ if (ret) {
+ bpf_program_profiler__destroy(evsel);
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static int bpf_program_profiler__read(struct evsel *evsel)
+{
+ // perf_cpu_map uses /sys/devices/system/cpu/online
+ int num_cpu = evsel__nr_cpus(evsel);
+ // BPF_MAP_TYPE_PERCPU_ARRAY uses /sys/devices/system/cpu/possible
+ // Sometimes possible > online, like on a Ryzen 3900X that has 24
+ // threads but its possible showed 0-31 -acme
+ int num_cpu_bpf = libbpf_num_possible_cpus();
+ struct bpf_perf_event_value values[num_cpu_bpf];
+ struct bpf_counter *counter;
+ int reading_map_fd;
+ __u32 key = 0;
+ int err, cpu;
+
+ if (list_empty(&evsel->bpf_counter_list))
+ return -EAGAIN;
+
+ for (cpu = 0; cpu < num_cpu; cpu++) {
+ perf_counts(evsel->counts, cpu, 0)->val = 0;
+ perf_counts(evsel->counts, cpu, 0)->ena = 0;
+ perf_counts(evsel->counts, cpu, 0)->run = 0;
+ }
+ list_for_each_entry(counter, &evsel->bpf_counter_list, list) {
+ struct bpf_prog_profiler_bpf *skel = counter->skel;
+
+ assert(skel != NULL);
+ reading_map_fd = bpf_map__fd(skel->maps.accum_readings);
+
+ err = bpf_map_lookup_elem(reading_map_fd, &key, values);
+ if (err) {
+ pr_err("failed to read value\n");
+ return err;
+ }
+
+ for (cpu = 0; cpu < num_cpu; cpu++) {
+ perf_counts(evsel->counts, cpu, 0)->val += values[cpu].counter;
+ perf_counts(evsel->counts, cpu, 0)->ena += values[cpu].enabled;
+ perf_counts(evsel->counts, cpu, 0)->run += values[cpu].running;
+ }
+ }
+ return 0;
+}
+
+static int bpf_program_profiler__install_pe(struct evsel *evsel, int cpu,
+ int fd)
+{
+ struct bpf_prog_profiler_bpf *skel;
+ struct bpf_counter *counter;
+ int ret;
+
+ list_for_each_entry(counter, &evsel->bpf_counter_list, list) {
+ skel = counter->skel;
+ assert(skel != NULL);
+
+ ret = bpf_map_update_elem(bpf_map__fd(skel->maps.events),
+ &cpu, &fd, BPF_ANY);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+struct bpf_counter_ops bpf_program_profiler_ops = {
+ .load = bpf_program_profiler__load,
+ .enable = bpf_program_profiler__enable,
+ .read = bpf_program_profiler__read,
+ .destroy = bpf_program_profiler__destroy,
+ .install_pe = bpf_program_profiler__install_pe,
+};
+
+int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd)
+{
+ if (list_empty(&evsel->bpf_counter_list))
+ return 0;
+ return evsel->bpf_counter_ops->install_pe(evsel, cpu, fd);
+}
+
+int bpf_counter__load(struct evsel *evsel, struct target *target)
+{
+ if (target__has_bpf(target))
+ evsel->bpf_counter_ops = &bpf_program_profiler_ops;
+
+ if (evsel->bpf_counter_ops)
+ return evsel->bpf_counter_ops->load(evsel, target);
+ return 0;
+}
+
+int bpf_counter__enable(struct evsel *evsel)
+{
+ if (list_empty(&evsel->bpf_counter_list))
+ return 0;
+ return evsel->bpf_counter_ops->enable(evsel);
+}
+
+int bpf_counter__read(struct evsel *evsel)
+{
+ if (list_empty(&evsel->bpf_counter_list))
+ return -EAGAIN;
+ return evsel->bpf_counter_ops->read(evsel);
+}
+
+void bpf_counter__destroy(struct evsel *evsel)
+{
+ if (list_empty(&evsel->bpf_counter_list))
+ return;
+ evsel->bpf_counter_ops->destroy(evsel);
+ evsel->bpf_counter_ops = NULL;
+}
diff --git a/tools/perf/util/bpf_counter.h b/tools/perf/util/bpf_counter.h
new file mode 100644
index 000000000000..2eca210e5dc1
--- /dev/null
+++ b/tools/perf/util/bpf_counter.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_BPF_COUNTER_H
+#define __PERF_BPF_COUNTER_H 1
+
+#include <linux/list.h>
+
+struct evsel;
+struct target;
+struct bpf_counter;
+
+typedef int (*bpf_counter_evsel_op)(struct evsel *evsel);
+typedef int (*bpf_counter_evsel_target_op)(struct evsel *evsel,
+ struct target *target);
+typedef int (*bpf_counter_evsel_install_pe_op)(struct evsel *evsel,
+ int cpu,
+ int fd);
+
+struct bpf_counter_ops {
+ bpf_counter_evsel_target_op load;
+ bpf_counter_evsel_op enable;
+ bpf_counter_evsel_op read;
+ bpf_counter_evsel_op destroy;
+ bpf_counter_evsel_install_pe_op install_pe;
+};
+
+struct bpf_counter {
+ void *skel;
+ struct list_head list;
+};
+
+#ifdef HAVE_BPF_SKEL
+
+int bpf_counter__load(struct evsel *evsel, struct target *target);
+int bpf_counter__enable(struct evsel *evsel);
+int bpf_counter__read(struct evsel *evsel);
+void bpf_counter__destroy(struct evsel *evsel);
+int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd);
+
+#else /* HAVE_BPF_SKEL */
+
+#include<linux/err.h>
+
+static inline int bpf_counter__load(struct evsel *evsel __maybe_unused,
+ struct target *target __maybe_unused)
+{
+ return 0;
+}
+
+static inline int bpf_counter__enable(struct evsel *evsel __maybe_unused)
+{
+ return 0;
+}
+
+static inline int bpf_counter__read(struct evsel *evsel __maybe_unused)
+{
+ return -EAGAIN;
+}
+
+static inline void bpf_counter__destroy(struct evsel *evsel __maybe_unused)
+{
+}
+
+static inline int bpf_counter__install_pe(struct evsel *evsel __maybe_unused,
+ int cpu __maybe_unused,
+ int fd __maybe_unused)
+{
+ return 0;
+}
+
+#endif /* HAVE_BPF_SKEL */
+
+#endif /* __PERF_BPF_COUNTER_H */
diff --git a/tools/perf/util/bpf_skel/.gitignore b/tools/perf/util/bpf_skel/.gitignore
new file mode 100644
index 000000000000..5263e9e6c5d8
--- /dev/null
+++ b/tools/perf/util/bpf_skel/.gitignore
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+.tmp
+*.skel.h \ No newline at end of file
diff --git a/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c b/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c
new file mode 100644
index 000000000000..c7cec92d0236
--- /dev/null
+++ b/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// Copyright (c) 2020 Facebook
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+/* map of perf event fds, num_cpu * num_metric entries */
+struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(int));
+} events SEC(".maps");
+
+/* readings at fentry */
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(struct bpf_perf_event_value));
+ __uint(max_entries, 1);
+} fentry_readings SEC(".maps");
+
+/* accumulated readings */
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(struct bpf_perf_event_value));
+ __uint(max_entries, 1);
+} accum_readings SEC(".maps");
+
+const volatile __u32 num_cpu = 1;
+
+SEC("fentry/XXX")
+int BPF_PROG(fentry_XXX)
+{
+ __u32 key = bpf_get_smp_processor_id();
+ struct bpf_perf_event_value *ptr;
+ __u32 zero = 0;
+ long err;
+
+ /* look up before reading, to reduce error */
+ ptr = bpf_map_lookup_elem(&fentry_readings, &zero);
+ if (!ptr)
+ return 0;
+
+ err = bpf_perf_event_read_value(&events, key, ptr, sizeof(*ptr));
+ if (err)
+ return 0;
+
+ return 0;
+}
+
+static inline void
+fexit_update_maps(struct bpf_perf_event_value *after)
+{
+ struct bpf_perf_event_value *before, diff, *accum;
+ __u32 zero = 0;
+
+ before = bpf_map_lookup_elem(&fentry_readings, &zero);
+ /* only account samples with a valid fentry_reading */
+ if (before && before->counter) {
+ struct bpf_perf_event_value *accum;
+
+ diff.counter = after->counter - before->counter;
+ diff.enabled = after->enabled - before->enabled;
+ diff.running = after->running - before->running;
+
+ accum = bpf_map_lookup_elem(&accum_readings, &zero);
+ if (accum) {
+ accum->counter += diff.counter;
+ accum->enabled += diff.enabled;
+ accum->running += diff.running;
+ }
+ }
+}
+
+SEC("fexit/XXX")
+int BPF_PROG(fexit_XXX)
+{
+ struct bpf_perf_event_value reading;
+ __u32 cpu = bpf_get_smp_processor_id();
+ __u32 one = 1, zero = 0;
+ int err;
+
+ /* read all events before updating the maps, to reduce error */
+ err = bpf_perf_event_read_value(&events, cpu, &reading, sizeof(reading));
+ if (err)
+ return 0;
+
+ fexit_update_maps(&reading);
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "Dual BSD/GPL";
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 02df36b30ac5..e32e8f2ff3bd 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -448,7 +448,8 @@ static bool lsdir_bid_tail_filter(const char *name __maybe_unused,
int i = 0;
while (isxdigit(d->d_name[i]) && i < SBUILD_ID_SIZE - 3)
i++;
- return (i == SBUILD_ID_SIZE - 3) && (d->d_name[i] == '\0');
+ return (i >= SBUILD_ID_MIN_SIZE - 3) && (i <= SBUILD_ID_SIZE - 3) &&
+ (d->d_name[i] == '\0');
}
struct strlist *build_id_cache__list_all(bool validonly)
@@ -490,7 +491,7 @@ struct strlist *build_id_cache__list_all(bool validonly)
}
strlist__for_each_entry(nd2, linklist) {
if (snprintf(sbuild_id, SBUILD_ID_SIZE, "%s%s",
- nd->s, nd2->s) != SBUILD_ID_SIZE - 1)
+ nd->s, nd2->s) > SBUILD_ID_SIZE - 1)
goto err_out;
if (validonly && !build_id_cache__valid_id(sbuild_id))
continue;
diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h
index 02613f4b2c29..c19617151670 100644
--- a/tools/perf/util/build-id.h
+++ b/tools/perf/util/build-id.h
@@ -2,8 +2,10 @@
#ifndef PERF_BUILD_ID_H_
#define PERF_BUILD_ID_H_ 1
-#define BUILD_ID_SIZE 20
+#define BUILD_ID_SIZE 20 /* SHA-1 length in bytes */
+#define BUILD_ID_MIN_SIZE 16 /* MD5/UUID/GUID length in bytes */
#define SBUILD_ID_SIZE (BUILD_ID_SIZE * 2 + 1)
+#define SBUILD_ID_MIN_SIZE (BUILD_ID_MIN_SIZE * 2 + 1)
#include "machine.h"
#include "tool.h"
diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index 5dff7e489921..f24ab4585553 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -161,7 +161,7 @@ void evlist__set_default_cgroup(struct evlist *evlist, struct cgroup *cgroup)
/* helper function for ftw() in match_cgroups and list_cgroups */
static int add_cgroup_name(const char *fpath, const struct stat *sb __maybe_unused,
- int typeflag)
+ int typeflag, struct FTW *ftwbuf __maybe_unused)
{
struct cgroup_name *cn;
@@ -209,12 +209,12 @@ static int list_cgroups(const char *str)
if (!s)
return -1;
/* pretend if it's added by ftw() */
- ret = add_cgroup_name(s, NULL, FTW_D);
+ ret = add_cgroup_name(s, NULL, FTW_D, NULL);
free(s);
if (ret)
return -1;
} else {
- if (add_cgroup_name("", NULL, FTW_D) < 0)
+ if (add_cgroup_name("", NULL, FTW_D, NULL) < 0)
return -1;
}
@@ -247,7 +247,7 @@ static int match_cgroups(const char *str)
prefix_len = strlen(mnt);
/* collect all cgroups in the cgroup_list */
- if (ftw(mnt, add_cgroup_name, 20) < 0)
+ if (nftw(mnt, add_cgroup_name, 20, 0) < 0)
return -1;
for (;;) {
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index 6969f82843ee..6984c77068a3 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -489,7 +489,7 @@ int perf_default_config(const char *var, const char *value,
return 0;
}
-int perf_config_from_file(config_fn_t fn, const char *filename, void *data)
+static int perf_config_from_file(config_fn_t fn, const char *filename, void *data)
{
int ret;
FILE *f = fopen(filename, "r");
@@ -521,16 +521,66 @@ static int perf_env_bool(const char *k, int def)
return v ? perf_config_bool(k, v) : def;
}
-static int perf_config_system(void)
+int perf_config_system(void)
{
return !perf_env_bool("PERF_CONFIG_NOSYSTEM", 0);
}
-static int perf_config_global(void)
+int perf_config_global(void)
{
return !perf_env_bool("PERF_CONFIG_NOGLOBAL", 0);
}
+static char *home_perfconfig(void)
+{
+ const char *home = NULL;
+ char *config;
+ struct stat st;
+
+ home = getenv("HOME");
+
+ /*
+ * Skip reading user config if:
+ * - there is no place to read it from (HOME)
+ * - we are asked not to (PERF_CONFIG_NOGLOBAL=1)
+ */
+ if (!home || !*home || !perf_config_global())
+ return NULL;
+
+ config = strdup(mkpath("%s/.perfconfig", home));
+ if (config == NULL) {
+ pr_warning("Not enough memory to process %s/.perfconfig, ignoring it.", home);
+ return NULL;
+ }
+
+ if (stat(config, &st) < 0)
+ goto out_free;
+
+ if (st.st_uid && (st.st_uid != geteuid())) {
+ pr_warning("File %s not owned by current user or root, ignoring it.", config);
+ goto out_free;
+ }
+
+ if (st.st_size)
+ return config;
+
+out_free:
+ free(config);
+ return NULL;
+}
+
+const char *perf_home_perfconfig(void)
+{
+ static const char *config;
+ static bool failed;
+
+ config = failed ? NULL : home_perfconfig();
+ if (!config)
+ failed = true;
+
+ return config;
+}
+
static struct perf_config_section *find_section(struct list_head *sections,
const char *section_name)
{
@@ -676,9 +726,6 @@ int perf_config_set__collect(struct perf_config_set *set, const char *file_name,
static int perf_config_set__init(struct perf_config_set *set)
{
int ret = -1;
- const char *home = NULL;
- char *user_config;
- struct stat st;
/* Setting $PERF_CONFIG makes perf read _only_ the given config file. */
if (config_exclusive_filename)
@@ -687,41 +734,11 @@ static int perf_config_set__init(struct perf_config_set *set)
if (perf_config_from_file(collect_config, perf_etc_perfconfig(), set) < 0)
goto out;
}
-
- home = getenv("HOME");
-
- /*
- * Skip reading user config if:
- * - there is no place to read it from (HOME)
- * - we are asked not to (PERF_CONFIG_NOGLOBAL=1)
- */
- if (!home || !*home || !perf_config_global())
- return 0;
-
- user_config = strdup(mkpath("%s/.perfconfig", home));
- if (user_config == NULL) {
- pr_warning("Not enough memory to process %s/.perfconfig, ignoring it.", home);
- goto out;
- }
-
- if (stat(user_config, &st) < 0) {
- if (errno == ENOENT)
- ret = 0;
- goto out_free;
- }
-
- ret = 0;
-
- if (st.st_uid && (st.st_uid != geteuid())) {
- pr_warning("File %s not owned by current user or root, ignoring it.", user_config);
- goto out_free;
+ if (perf_config_global() && perf_home_perfconfig()) {
+ if (perf_config_from_file(collect_config, perf_home_perfconfig(), set) < 0)
+ goto out;
}
- if (st.st_size)
- ret = perf_config_from_file(collect_config, user_config, set);
-
-out_free:
- free(user_config);
out:
return ret;
}
@@ -738,6 +755,18 @@ struct perf_config_set *perf_config_set__new(void)
return set;
}
+struct perf_config_set *perf_config_set__load_file(const char *file)
+{
+ struct perf_config_set *set = zalloc(sizeof(*set));
+
+ if (set) {
+ INIT_LIST_HEAD(&set->sections);
+ perf_config_from_file(collect_config, file, set);
+ }
+
+ return set;
+}
+
static int perf_config__init(void)
{
if (config_set == NULL)
@@ -746,17 +775,15 @@ static int perf_config__init(void)
return config_set == NULL;
}
-int perf_config(config_fn_t fn, void *data)
+int perf_config_set(struct perf_config_set *set,
+ config_fn_t fn, void *data)
{
int ret = 0;
char key[BUFSIZ];
struct perf_config_section *section;
struct perf_config_item *item;
- if (config_set == NULL && perf_config__init())
- return -1;
-
- perf_config_set__for_each_entry(config_set, section, item) {
+ perf_config_set__for_each_entry(set, section, item) {
char *value = item->value;
if (value) {
@@ -778,6 +805,14 @@ out:
return ret;
}
+int perf_config(config_fn_t fn, void *data)
+{
+ if (config_set == NULL && perf_config__init())
+ return -1;
+
+ return perf_config_set(config_set, fn, data);
+}
+
void perf_config__exit(void)
{
perf_config_set__delete(config_set);
diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h
index 8c881e3a3ec3..2fd77aaff4d2 100644
--- a/tools/perf/util/config.h
+++ b/tools/perf/util/config.h
@@ -27,17 +27,22 @@ extern const char *config_exclusive_filename;
typedef int (*config_fn_t)(const char *, const char *, void *);
-int perf_config_from_file(config_fn_t fn, const char *filename, void *data);
int perf_default_config(const char *, const char *, void *);
int perf_config(config_fn_t fn, void *);
+int perf_config_set(struct perf_config_set *set,
+ config_fn_t fn, void *data);
int perf_config_int(int *dest, const char *, const char *);
int perf_config_u8(u8 *dest, const char *name, const char *value);
int perf_config_u64(u64 *dest, const char *, const char *);
int perf_config_bool(const char *, const char *);
int config_error_nonbool(const char *);
const char *perf_etc_perfconfig(void);
+const char *perf_home_perfconfig(void);
+int perf_config_system(void);
+int perf_config_global(void);
struct perf_config_set *perf_config_set__new(void);
+struct perf_config_set *perf_config_set__load_file(const char *file);
void perf_config_set__delete(struct perf_config_set *set);
int perf_config_set__collect(struct perf_config_set *set, const char *file_name,
const char *var, const char *value);
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
index cd007cc9c283..3f4bc4050477 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -419,19 +419,10 @@ cs_etm_decoder__buffer_range(struct cs_etm_queue *etmq,
packet->last_instr_subtype = elem->last_i_subtype;
packet->last_instr_cond = elem->last_instr_cond;
- switch (elem->last_i_type) {
- case OCSD_INSTR_BR:
- case OCSD_INSTR_BR_INDIRECT:
+ if (elem->last_i_type == OCSD_INSTR_BR || elem->last_i_type == OCSD_INSTR_BR_INDIRECT)
packet->last_instr_taken_branch = elem->last_instr_exec;
- break;
- case OCSD_INSTR_ISB:
- case OCSD_INSTR_DSB_DMB:
- case OCSD_INSTR_WFI_WFE:
- case OCSD_INSTR_OTHER:
- default:
+ else
packet->last_instr_taken_branch = false;
- break;
- }
packet->last_instr_size = elem->last_instr_sz;
@@ -572,6 +563,8 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
case OCSD_GEN_TRC_ELEM_EVENT:
case OCSD_GEN_TRC_ELEM_SWTRACE:
case OCSD_GEN_TRC_ELEM_CUSTOM:
+ case OCSD_GEN_TRC_ELEM_SYNC_MARKER:
+ case OCSD_GEN_TRC_ELEM_MEMTRANS:
default:
break;
}
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index 27c5fef9ad54..8b67bd97d122 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -948,7 +948,7 @@ static char *change_name(char *name, char *orig_name, int dup)
goto out;
/*
* Add '_' prefix to potential keywork. According to
- * Mathieu Desnoyers (https://lkml.org/lkml/2015/1/23/652),
+ * Mathieu Desnoyers (https://lore.kernel.org/lkml/1074266107.40857.1422045946295.JavaMail.zimbra@efficios.com),
* futher CTF spec updating may require us to use '$'.
*/
if (dup < 0)
diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c
index db7447154622..5cd189172525 100644
--- a/tools/perf/util/db-export.c
+++ b/tools/perf/util/db-export.c
@@ -438,6 +438,8 @@ static struct {
{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT, "transaction abort"},
{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TRACE_BEGIN, "trace begin"},
{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TRACE_END, "trace end"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_VMENTRY, "vm entry"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_VMEXIT, "vm exit"},
{0, NULL}
};
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index 50fd6a4be4e0..2c06abf6dcd2 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -10,6 +10,7 @@
#include <api/debug.h>
#include <linux/kernel.h>
#include <linux/time64.h>
+#include <sys/time.h>
#ifdef HAVE_BACKTRACE_SUPPORT
#include <execinfo.h>
#endif
@@ -31,21 +32,48 @@ int debug_ordered_events;
static int redirect_to_stderr;
int debug_data_convert;
static FILE *debug_file;
+bool debug_display_time;
void debug_set_file(FILE *file)
{
debug_file = file;
}
+void debug_set_display_time(bool set)
+{
+ debug_display_time = set;
+}
+
+static int fprintf_time(FILE *file)
+{
+ struct timeval tod;
+ struct tm ltime;
+ char date[64];
+
+ if (!debug_display_time)
+ return 0;
+
+ if (gettimeofday(&tod, NULL) != 0)
+ return 0;
+
+ if (localtime_r(&tod.tv_sec, &ltime) == NULL)
+ return 0;
+
+ strftime(date, sizeof(date), "%F %H:%M:%S", &ltime);
+ return fprintf(file, "[%s.%06lu] ", date, (long)tod.tv_usec);
+}
+
int veprintf(int level, int var, const char *fmt, va_list args)
{
int ret = 0;
if (var >= level) {
- if (use_browser >= 1 && !redirect_to_stderr)
+ if (use_browser >= 1 && !redirect_to_stderr) {
ui_helpline__vshow(fmt, args);
- else
- ret = vfprintf(debug_file, fmt, args);
+ } else {
+ ret = fprintf_time(debug_file);
+ ret += vfprintf(debug_file, fmt, args);
+ }
}
return ret;
diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h
index 43f712295645..48f631966067 100644
--- a/tools/perf/util/debug.h
+++ b/tools/perf/util/debug.h
@@ -64,6 +64,7 @@ int veprintf(int level, int var, const char *fmt, va_list args);
int perf_debug_option(const char *str);
void debug_set_file(FILE *file);
+void debug_set_display_time(bool set);
void perf_debug_setup(void);
int perf_quiet_option(void);
diff --git a/tools/perf/util/demangle-ocaml.c b/tools/perf/util/demangle-ocaml.c
new file mode 100644
index 000000000000..3df14e67c622
--- /dev/null
+++ b/tools/perf/util/demangle-ocaml.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+#include <stdlib.h>
+#include "util/string2.h"
+
+#include "demangle-ocaml.h"
+
+#include <linux/ctype.h>
+
+static const char *caml_prefix = "caml";
+static const size_t caml_prefix_len = 4;
+
+/* mangled OCaml symbols start with "caml" followed by an upper-case letter */
+static bool
+ocaml_is_mangled(const char *sym)
+{
+ return 0 == strncmp(sym, caml_prefix, caml_prefix_len)
+ && isupper(sym[caml_prefix_len]);
+}
+
+/*
+ * input:
+ * sym: a symbol which may have been mangled by the OCaml compiler
+ * return:
+ * if the input doesn't look like a mangled OCaml symbol, NULL is returned
+ * otherwise, a newly allocated string containing the demangled symbol is returned
+ */
+char *
+ocaml_demangle_sym(const char *sym)
+{
+ char *result;
+ int j = 0;
+ int i;
+ int len;
+
+ if (!ocaml_is_mangled(sym)) {
+ return NULL;
+ }
+
+ len = strlen(sym);
+
+ /* the demangled symbol is always smaller than the mangled symbol */
+ result = malloc(len + 1);
+ if (!result)
+ return NULL;
+
+ /* skip "caml" prefix */
+ i = caml_prefix_len;
+
+ while (i < len) {
+ if (sym[i] == '_' && sym[i + 1] == '_') {
+ /* "__" -> "." */
+ result[j++] = '.';
+ i += 2;
+ }
+ else if (sym[i] == '$' && isxdigit(sym[i + 1]) && isxdigit(sym[i + 2])) {
+ /* "$xx" is a hex-encoded character */
+ result[j++] = (hex(sym[i + 1]) << 4) | hex(sym[i + 2]);
+ i += 3;
+ }
+ else {
+ result[j++] = sym[i++];
+ }
+ }
+ result[j] = '\0';
+
+ /* scan backwards to remove an "_" followed by decimal digits */
+ if (j != 0 && isdigit(result[j - 1])) {
+ while (--j) {
+ if (!isdigit(result[j])) {
+ break;
+ }
+ }
+ if (result[j] == '_') {
+ result[j] = '\0';
+ }
+ }
+
+ return result;
+}
diff --git a/tools/perf/util/demangle-ocaml.h b/tools/perf/util/demangle-ocaml.h
new file mode 100644
index 000000000000..843cc4fa10a6
--- /dev/null
+++ b/tools/perf/util/demangle-ocaml.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_DEMANGLE_OCAML
+#define __PERF_DEMANGLE_OCAML 1
+
+char * ocaml_demangle_sym(const char *str);
+
+#endif /* __PERF_DEMANGLE_OCAML */
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 05616d4138a9..ac706304afe9 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -288,17 +288,36 @@ size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp)
size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp)
{
- return fprintf(fp, " %d/%d: [%#" PRI_lx64 "(%#" PRI_lx64 ") @ %#" PRI_lx64
- " %02x:%02x %"PRI_lu64" %"PRI_lu64"]: %c%c%c%c %s\n",
- event->mmap2.pid, event->mmap2.tid, event->mmap2.start,
- event->mmap2.len, event->mmap2.pgoff, event->mmap2.maj,
- event->mmap2.min, event->mmap2.ino,
- event->mmap2.ino_generation,
- (event->mmap2.prot & PROT_READ) ? 'r' : '-',
- (event->mmap2.prot & PROT_WRITE) ? 'w' : '-',
- (event->mmap2.prot & PROT_EXEC) ? 'x' : '-',
- (event->mmap2.flags & MAP_SHARED) ? 's' : 'p',
- event->mmap2.filename);
+ if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) {
+ char sbuild_id[SBUILD_ID_SIZE];
+ struct build_id bid;
+
+ build_id__init(&bid, event->mmap2.build_id,
+ event->mmap2.build_id_size);
+ build_id__sprintf(&bid, sbuild_id);
+
+ return fprintf(fp, " %d/%d: [%#" PRI_lx64 "(%#" PRI_lx64 ") @ %#" PRI_lx64
+ " <%s>]: %c%c%c%c %s\n",
+ event->mmap2.pid, event->mmap2.tid, event->mmap2.start,
+ event->mmap2.len, event->mmap2.pgoff, sbuild_id,
+ (event->mmap2.prot & PROT_READ) ? 'r' : '-',
+ (event->mmap2.prot & PROT_WRITE) ? 'w' : '-',
+ (event->mmap2.prot & PROT_EXEC) ? 'x' : '-',
+ (event->mmap2.flags & MAP_SHARED) ? 's' : 'p',
+ event->mmap2.filename);
+ } else {
+ return fprintf(fp, " %d/%d: [%#" PRI_lx64 "(%#" PRI_lx64 ") @ %#" PRI_lx64
+ " %02x:%02x %"PRI_lu64" %"PRI_lu64"]: %c%c%c%c %s\n",
+ event->mmap2.pid, event->mmap2.tid, event->mmap2.start,
+ event->mmap2.len, event->mmap2.pgoff, event->mmap2.maj,
+ event->mmap2.min, event->mmap2.ino,
+ event->mmap2.ino_generation,
+ (event->mmap2.prot & PROT_READ) ? 'r' : '-',
+ (event->mmap2.prot & PROT_WRITE) ? 'w' : '-',
+ (event->mmap2.prot & PROT_EXEC) ? 'x' : '-',
+ (event->mmap2.flags & MAP_SHARED) ? 's' : 'p',
+ event->mmap2.filename);
+ }
}
size_t perf_event__fprintf_thread_map(union perf_event *event, FILE *fp)
@@ -626,6 +645,19 @@ struct symbol *thread__find_symbol_fb(struct thread *thread, u8 cpumode,
return al->sym;
}
+static bool check_address_range(struct intlist *addr_list, int addr_range,
+ unsigned long addr)
+{
+ struct int_node *pos;
+
+ intlist__for_each_entry(pos, addr_list) {
+ if (addr >= pos->i && addr < pos->i + addr_range)
+ return true;
+ }
+
+ return false;
+}
+
/*
* Callers need to drop the reference to al->thread, obtained in
* machine__findnew_thread()
@@ -673,6 +705,8 @@ int machine__resolve(struct machine *machine, struct addr_location *al,
}
al->sym = map__find_symbol(al->map, al->addr);
+ } else if (symbol_conf.dso_list) {
+ al->filtered |= (1 << HIST_FILTER__DSO);
}
if (symbol_conf.sym_list) {
@@ -690,6 +724,17 @@ int machine__resolve(struct machine *machine, struct addr_location *al,
ret = strlist__has_entry(symbol_conf.sym_list,
al_addr_str);
}
+ if (!ret && symbol_conf.addr_list && al->map) {
+ unsigned long addr = al->map->unmap_ip(al->map, al->addr);
+
+ ret = intlist__has_entry(symbol_conf.addr_list, addr);
+ if (!ret && symbol_conf.addr_range) {
+ ret = check_address_range(symbol_conf.addr_list,
+ symbol_conf.addr_range,
+ addr);
+ }
+ }
+
if (!ret)
al->filtered |= (1 << HIST_FILTER__SYMBOL);
}
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index ff403ea578e1..f603edbbbc6f 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -96,6 +96,8 @@ enum {
PERF_IP_FLAG_TRACE_BEGIN = 1ULL << 8,
PERF_IP_FLAG_TRACE_END = 1ULL << 9,
PERF_IP_FLAG_IN_TX = 1ULL << 10,
+ PERF_IP_FLAG_VMENTRY = 1ULL << 11,
+ PERF_IP_FLAG_VMEXIT = 1ULL << 12,
};
#define PERF_IP_FLAG_CHARS "bcrosyiABEx"
@@ -110,7 +112,9 @@ enum {
PERF_IP_FLAG_INTERRUPT |\
PERF_IP_FLAG_TX_ABORT |\
PERF_IP_FLAG_TRACE_BEGIN |\
- PERF_IP_FLAG_TRACE_END)
+ PERF_IP_FLAG_TRACE_END |\
+ PERF_IP_FLAG_VMENTRY |\
+ PERF_IP_FLAG_VMEXIT)
#define MAX_INSN 16
@@ -136,11 +140,13 @@ struct perf_sample {
u64 data_src;
u64 phys_addr;
u64 data_page_size;
+ u64 code_page_size;
u64 cgroup;
u32 flags;
u16 insn_len;
u8 cpumode;
u16 misc;
+ u16 ins_lat;
bool no_hw_idx; /* No hw_idx collected in branch_stack */
char insn[MAX_INSN];
void *raw_data;
@@ -171,6 +177,7 @@ enum perf_synth_id {
PERF_SYNTH_INTEL_EXSTOP,
PERF_SYNTH_INTEL_PWRX,
PERF_SYNTH_INTEL_CBR,
+ PERF_SYNTH_INTEL_PSB,
};
/*
@@ -263,6 +270,12 @@ struct perf_synth_intel_cbr {
u32 reserved3;
};
+struct perf_synth_intel_psb {
+ u32 padding;
+ u32 reserved;
+ u64 offset;
+};
+
/*
* raw_data is always 4 bytes from an 8-byte boundary, so subtract 4 to get
* 8-byte alignment.
@@ -412,4 +425,7 @@ extern unsigned int proc_map_timeout;
#define PAGE_SIZE_NAME_LEN 32
char *get_page_size_name(u64 size, char *str);
+void arch_perf_parse_sample_weight(struct perf_sample *data, const __u64 *array, u64 type);
+void arch_perf_synthesize_sample_weight(const struct perf_sample *data, __u64 *array, u64 type);
+
#endif /* __PERF_RECORD_H */
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 05363a7247c4..882cd1f721d9 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -24,6 +24,7 @@
#include "bpf-event.h"
#include "util/string2.h"
#include "util/perf_api_probe.h"
+#include "util/evsel_fprintf.h"
#include <signal.h>
#include <unistd.h>
#include <sched.h>
@@ -303,6 +304,11 @@ int __evlist__add_default_attrs(struct evlist *evlist, struct perf_event_attr *a
return evlist__add_attrs(evlist, attrs, nr_attrs);
}
+__weak int arch_evlist__add_default_attrs(struct evlist *evlist __maybe_unused)
+{
+ return 0;
+}
+
struct evsel *evlist__find_tracepoint_by_id(struct evlist *evlist, int id)
{
struct evsel *evsel;
@@ -572,6 +578,14 @@ int evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask)
return perf_evlist__filter_pollfd(&evlist->core, revents_and_mask);
}
+#ifdef HAVE_EVENTFD_SUPPORT
+int evlist__add_wakeup_eventfd(struct evlist *evlist, int fd)
+{
+ return perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN,
+ fdarray_flag__nonfilterable);
+}
+#endif
+
int evlist__poll(struct evlist *evlist, int timeout)
{
return perf_evlist__poll(&evlist->core, timeout);
@@ -1292,6 +1306,7 @@ void evlist__close(struct evlist *evlist)
perf_evsel__free_fd(&evsel->core);
perf_evsel__free_id(&evsel->core);
}
+ perf_evlist__reset_id_hash(&evlist->core);
}
static int evlist__create_syswide_maps(struct evlist *evlist)
@@ -1936,6 +1951,15 @@ static int evlist__ctlfd_recv(struct evlist *evlist, enum evlist_ctl_cmd *cmd,
(sizeof(EVLIST_CTL_CMD_SNAPSHOT_TAG)-1))) {
*cmd = EVLIST_CTL_CMD_SNAPSHOT;
pr_debug("is snapshot\n");
+ } else if (!strncmp(cmd_data, EVLIST_CTL_CMD_EVLIST_TAG,
+ (sizeof(EVLIST_CTL_CMD_EVLIST_TAG)-1))) {
+ *cmd = EVLIST_CTL_CMD_EVLIST;
+ } else if (!strncmp(cmd_data, EVLIST_CTL_CMD_STOP_TAG,
+ (sizeof(EVLIST_CTL_CMD_STOP_TAG)-1))) {
+ *cmd = EVLIST_CTL_CMD_STOP;
+ } else if (!strncmp(cmd_data, EVLIST_CTL_CMD_PING_TAG,
+ (sizeof(EVLIST_CTL_CMD_PING_TAG)-1))) {
+ *cmd = EVLIST_CTL_CMD_PING;
}
}
@@ -1957,6 +1981,98 @@ int evlist__ctlfd_ack(struct evlist *evlist)
return err;
}
+static int get_cmd_arg(char *cmd_data, size_t cmd_size, char **arg)
+{
+ char *data = cmd_data + cmd_size;
+
+ /* no argument */
+ if (!*data)
+ return 0;
+
+ /* there's argument */
+ if (*data == ' ') {
+ *arg = data + 1;
+ return 1;
+ }
+
+ /* malformed */
+ return -1;
+}
+
+static int evlist__ctlfd_enable(struct evlist *evlist, char *cmd_data, bool enable)
+{
+ struct evsel *evsel;
+ char *name;
+ int err;
+
+ err = get_cmd_arg(cmd_data,
+ enable ? sizeof(EVLIST_CTL_CMD_ENABLE_TAG) - 1 :
+ sizeof(EVLIST_CTL_CMD_DISABLE_TAG) - 1,
+ &name);
+ if (err < 0) {
+ pr_info("failed: wrong command\n");
+ return -1;
+ }
+
+ if (err) {
+ evsel = evlist__find_evsel_by_str(evlist, name);
+ if (evsel) {
+ if (enable)
+ evlist__enable_evsel(evlist, name);
+ else
+ evlist__disable_evsel(evlist, name);
+ pr_info("Event %s %s\n", evsel->name,
+ enable ? "enabled" : "disabled");
+ } else {
+ pr_info("failed: can't find '%s' event\n", name);
+ }
+ } else {
+ if (enable) {
+ evlist__enable(evlist);
+ pr_info(EVLIST_ENABLED_MSG);
+ } else {
+ evlist__disable(evlist);
+ pr_info(EVLIST_DISABLED_MSG);
+ }
+ }
+
+ return 0;
+}
+
+static int evlist__ctlfd_list(struct evlist *evlist, char *cmd_data)
+{
+ struct perf_attr_details details = { .verbose = false, };
+ struct evsel *evsel;
+ char *arg;
+ int err;
+
+ err = get_cmd_arg(cmd_data,
+ sizeof(EVLIST_CTL_CMD_EVLIST_TAG) - 1,
+ &arg);
+ if (err < 0) {
+ pr_info("failed: wrong command\n");
+ return -1;
+ }
+
+ if (err) {
+ if (!strcmp(arg, "-v")) {
+ details.verbose = true;
+ } else if (!strcmp(arg, "-g")) {
+ details.event_group = true;
+ } else if (!strcmp(arg, "-F")) {
+ details.freq = true;
+ } else {
+ pr_info("failed: wrong command\n");
+ return -1;
+ }
+ }
+
+ evlist__for_each_entry(evlist, evsel)
+ evsel__fprintf(evsel, &details, stderr);
+
+ return 0;
+}
+
int evlist__ctlfd_process(struct evlist *evlist, enum evlist_ctl_cmd *cmd)
{
int err = 0;
@@ -1973,12 +2089,16 @@ int evlist__ctlfd_process(struct evlist *evlist, enum evlist_ctl_cmd *cmd)
if (err > 0) {
switch (*cmd) {
case EVLIST_CTL_CMD_ENABLE:
- evlist__enable(evlist);
- break;
case EVLIST_CTL_CMD_DISABLE:
- evlist__disable(evlist);
+ err = evlist__ctlfd_enable(evlist, cmd_data,
+ *cmd == EVLIST_CTL_CMD_ENABLE);
+ break;
+ case EVLIST_CTL_CMD_EVLIST:
+ err = evlist__ctlfd_list(evlist, cmd_data);
break;
case EVLIST_CTL_CMD_SNAPSHOT:
+ case EVLIST_CTL_CMD_STOP:
+ case EVLIST_CTL_CMD_PING:
break;
case EVLIST_CTL_CMD_ACK:
case EVLIST_CTL_CMD_UNSUPPORTED:
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 1aae75895dea..b695ffaae519 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -110,6 +110,8 @@ int __evlist__add_default_attrs(struct evlist *evlist,
#define evlist__add_default_attrs(evlist, array) \
__evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array))
+int arch_evlist__add_default_attrs(struct evlist *evlist);
+
int evlist__add_dummy(struct evlist *evlist);
int evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *attr,
@@ -142,6 +144,10 @@ struct evsel *evlist__find_tracepoint_by_name(struct evlist *evlist, const char
int evlist__add_pollfd(struct evlist *evlist, int fd);
int evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask);
+#ifdef HAVE_EVENTFD_SUPPORT
+int evlist__add_wakeup_eventfd(struct evlist *evlist, int fd);
+#endif
+
int evlist__poll(struct evlist *evlist, int timeout);
struct evsel *evlist__id2evsel(struct evlist *evlist, u64 id);
@@ -330,6 +336,9 @@ struct evsel *evlist__reset_weak_group(struct evlist *evlist, struct evsel *evse
#define EVLIST_CTL_CMD_DISABLE_TAG "disable"
#define EVLIST_CTL_CMD_ACK_TAG "ack\n"
#define EVLIST_CTL_CMD_SNAPSHOT_TAG "snapshot"
+#define EVLIST_CTL_CMD_EVLIST_TAG "evlist"
+#define EVLIST_CTL_CMD_STOP_TAG "stop"
+#define EVLIST_CTL_CMD_PING_TAG "ping"
#define EVLIST_CTL_CMD_MAX_LEN 64
@@ -339,6 +348,9 @@ enum evlist_ctl_cmd {
EVLIST_CTL_CMD_DISABLE,
EVLIST_CTL_CMD_ACK,
EVLIST_CTL_CMD_SNAPSHOT,
+ EVLIST_CTL_CMD_EVLIST,
+ EVLIST_CTL_CMD_STOP,
+ EVLIST_CTL_CMD_PING,
};
int evlist__parse_control(const char *str, int *ctl_fd, int *ctl_fd_ack, bool *ctl_fd_close);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index c26ea82220bd..7ecbc8e2fbfa 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -25,6 +25,7 @@
#include <stdlib.h>
#include <perf/evsel.h>
#include "asm/bug.h"
+#include "bpf_counter.h"
#include "callchain.h"
#include "cgroup.h"
#include "counts.h"
@@ -45,6 +46,7 @@
#include "string2.h"
#include "memswap.h"
#include "util.h"
+#include "hashmap.h"
#include "../perf-sys.h"
#include "util/parse-branch-options.h"
#include <internal/xyarray.h>
@@ -247,6 +249,7 @@ void evsel__init(struct evsel *evsel,
evsel->bpf_obj = NULL;
evsel->bpf_fd = -1;
INIT_LIST_HEAD(&evsel->config_terms);
+ INIT_LIST_HEAD(&evsel->bpf_counter_list);
perf_evsel__object.init(evsel);
evsel->sample_size = __evsel__sample_size(attr->sample_type);
evsel__calc_id_pos(evsel);
@@ -1012,6 +1015,11 @@ struct evsel_config_term *__evsel__get_config_term(struct evsel *evsel, enum evs
return found_term;
}
+void __weak arch_evsel__set_sample_weight(struct evsel *evsel)
+{
+ evsel__set_sample_bit(evsel, WEIGHT);
+}
+
/*
* The enable_on_exec/disabled value strategy:
*
@@ -1166,12 +1174,14 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,
}
if (opts->sample_weight)
- evsel__set_sample_bit(evsel, WEIGHT);
+ arch_evsel__set_sample_weight(evsel);
+
+ attr->task = track;
+ attr->mmap = track;
+ attr->mmap2 = track && !perf_missing_features.mmap2;
+ attr->comm = track;
+ attr->build_id = track && opts->build_id;
- attr->task = track;
- attr->mmap = track;
- attr->mmap2 = track && !perf_missing_features.mmap2;
- attr->comm = track;
/*
* ksymbol is tracked separately with text poke because it needs to be
* system wide and enabled immediately.
@@ -1191,6 +1201,9 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,
if (opts->sample_data_page_size)
evsel__set_sample_bit(evsel, DATA_PAGE_SIZE);
+ if (opts->sample_code_page_size)
+ evsel__set_sample_bit(evsel, CODE_PAGE_SIZE);
+
if (opts->record_switch_events)
attr->context_switch = track;
@@ -1366,6 +1379,7 @@ void evsel__exit(struct evsel *evsel)
{
assert(list_empty(&evsel->core.node));
assert(evsel->evlist == NULL);
+ bpf_counter__destroy(evsel);
evsel__free_counts(evsel);
perf_evsel__free_fd(&evsel->core);
perf_evsel__free_id(&evsel->core);
@@ -1377,7 +1391,9 @@ void evsel__exit(struct evsel *evsel)
zfree(&evsel->group_name);
zfree(&evsel->name);
zfree(&evsel->pmu_name);
- zfree(&evsel->per_pkg_mask);
+ evsel__zero_per_pkg(evsel);
+ hashmap__free(evsel->per_pkg_mask);
+ evsel->per_pkg_mask = NULL;
zfree(&evsel->metric_events);
perf_evsel__object.fini(evsel);
}
@@ -1735,6 +1751,10 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus,
}
fallback_missing_features:
+ if (perf_missing_features.weight_struct) {
+ evsel__set_sample_bit(evsel, WEIGHT);
+ evsel__reset_sample_bit(evsel, WEIGHT_STRUCT);
+ }
if (perf_missing_features.clockid_wrong)
evsel->core.attr.clockid = CLOCK_MONOTONIC; /* should always work */
if (perf_missing_features.clockid) {
@@ -1781,6 +1801,8 @@ retry_open:
FD(evsel, cpu, thread) = fd;
+ bpf_counter__install_pe(evsel, cpu, fd);
+
if (unlikely(test_attr__enabled)) {
test_attr__open(&evsel->core.attr, pid, cpus->map[cpu],
fd, group_fd, flags);
@@ -1873,7 +1895,17 @@ try_fallback:
* Must probe features in the order they were added to the
* perf_event_attr interface.
*/
- if (!perf_missing_features.data_page_size &&
+ if (!perf_missing_features.weight_struct &&
+ (evsel->core.attr.sample_type & PERF_SAMPLE_WEIGHT_STRUCT)) {
+ perf_missing_features.weight_struct = true;
+ pr_debug2("switching off weight struct support\n");
+ goto fallback_missing_features;
+ } else if (!perf_missing_features.code_page_size &&
+ (evsel->core.attr.sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)) {
+ perf_missing_features.code_page_size = true;
+ pr_debug2_peo("Kernel has no PERF_SAMPLE_CODE_PAGE_SIZE support, bailing out\n");
+ goto out_close;
+ } else if (!perf_missing_features.data_page_size &&
(evsel->core.attr.sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)) {
perf_missing_features.data_page_size = true;
pr_debug2_peo("Kernel has no PERF_SAMPLE_DATA_PAGE_SIZE support, bailing out\n");
@@ -2076,6 +2108,13 @@ perf_event__check_size(union perf_event *event, unsigned int sample_size)
return 0;
}
+void __weak arch_perf_parse_sample_weight(struct perf_sample *data,
+ const __u64 *array,
+ u64 type __maybe_unused)
+{
+ data->weight = *array;
+}
+
int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
struct perf_sample *data)
{
@@ -2316,9 +2355,9 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
}
}
- if (type & PERF_SAMPLE_WEIGHT) {
+ if (type & PERF_SAMPLE_WEIGHT_TYPE) {
OVERFLOW_CHECK_u64(array);
- data->weight = *array;
+ arch_perf_parse_sample_weight(data, array, type);
array++;
}
@@ -2369,6 +2408,12 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
array++;
}
+ data->code_page_size = 0;
+ if (type & PERF_SAMPLE_CODE_PAGE_SIZE) {
+ data->code_page_size = *array;
+ array++;
+ }
+
if (type & PERF_SAMPLE_AUX) {
OVERFLOW_CHECK_u64(array);
sz = *array++;
@@ -2678,6 +2723,8 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target,
"We found oprofile daemon running, please stop it and try again.");
break;
case EINVAL:
+ if (evsel->core.attr.sample_type & PERF_SAMPLE_CODE_PAGE_SIZE && perf_missing_features.code_page_size)
+ return scnprintf(msg, size, "Asking for the code page size isn't supported by this kernel.");
if (evsel->core.attr.sample_type & PERF_SAMPLE_DATA_PAGE_SIZE && perf_missing_features.data_page_size)
return scnprintf(msg, size, "Asking for the data page size isn't supported by this kernel.");
if (evsel->core.attr.write_backward && perf_missing_features.write_backward)
@@ -2689,6 +2736,9 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target,
if (perf_missing_features.aux_output)
return scnprintf(msg, size, "The 'aux_output' feature is not supported, update the kernel.");
break;
+ case ENODATA:
+ return scnprintf(msg, size, "Cannot collect data source with the load latency event alone. "
+ "Please add an auxiliary event in front of the load latency event.");
default:
break;
}
@@ -2734,3 +2784,16 @@ int evsel__store_ids(struct evsel *evsel, struct evlist *evlist)
return store_evsel_ids(evsel, evlist);
}
+
+void evsel__zero_per_pkg(struct evsel *evsel)
+{
+ struct hashmap_entry *cur;
+ size_t bkt;
+
+ if (evsel->per_pkg_mask) {
+ hashmap__for_each_entry(evsel->per_pkg_mask, cur, bkt)
+ free((char *)cur->key);
+
+ hashmap__clear(evsel->per_pkg_mask);
+ }
+}
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index cd1d8dd43199..6026487353dd 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -17,6 +17,9 @@ struct cgroup;
struct perf_counts;
struct perf_stat_evsel;
union perf_event;
+struct bpf_counter_ops;
+struct target;
+struct hashmap;
typedef int (evsel__sb_cb_t)(union perf_event *event, void *data);
@@ -110,7 +113,7 @@ struct evsel {
bool merged_stat;
bool reset_group;
bool errored;
- unsigned long *per_pkg_mask;
+ struct hashmap *per_pkg_mask;
struct evsel *leader;
struct list_head config_terms;
int err;
@@ -127,6 +130,8 @@ struct evsel {
* See also evsel__has_callchain().
*/
__u64 synth_sample_type;
+ struct list_head bpf_counter_list;
+ struct bpf_counter_ops *bpf_counter_ops;
};
struct perf_missing_features {
@@ -145,6 +150,8 @@ struct perf_missing_features {
bool branch_hw_idx;
bool cgroup;
bool data_page_size;
+ bool code_page_size;
+ bool weight_struct;
};
extern struct perf_missing_features perf_missing_features;
@@ -239,6 +246,8 @@ void __evsel__reset_sample_bit(struct evsel *evsel, enum perf_event_sample_forma
void evsel__set_sample_id(struct evsel *evsel, bool use_sample_identifier);
+void arch_evsel__set_sample_weight(struct evsel *evsel);
+
int evsel__set_filter(struct evsel *evsel, const char *filter);
int evsel__append_tp_filter(struct evsel *evsel, const char *filter);
int evsel__append_addr_filter(struct evsel *evsel, const char *filter);
@@ -424,4 +433,6 @@ static inline bool evsel__is_dummy_event(struct evsel *evsel)
struct perf_env *evsel__env(struct evsel *evsel);
int evsel__store_ids(struct evsel *evsel, struct evlist *evlist);
+
+void evsel__zero_per_pkg(struct evsel *evsel);
#endif /* __PERF_EVSEL_H */
diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c
index fb498a723a00..bfedd7b23521 100644
--- a/tools/perf/util/evsel_fprintf.c
+++ b/tools/perf/util/evsel_fprintf.c
@@ -100,6 +100,7 @@ out:
return ++printed;
}
+#ifndef PYTHON_PERF
int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
unsigned int print_opts, struct callchain_cursor *cursor,
struct strlist *bt_stop_list, FILE *fp)
@@ -239,3 +240,4 @@ int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al,
return printed;
}
+#endif /* PYTHON_PERF */
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index c4ed3dc2c8f4..20effdff76ce 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1618,8 +1618,8 @@ static void print_clock_data(struct feat_fd *ff, FILE *fp)
fprintf(fp, "# clockid: %s (%u)\n", clockid_name(clockid), clockid);
fprintf(fp, "# reference time: %s = %ld.%06d (TOD) = %ld.%09ld (%s)\n",
- tstr, tod_ns.tv_sec, (int) tod_ns.tv_usec,
- clockid_ns.tv_sec, clockid_ns.tv_nsec,
+ tstr, (long) tod_ns.tv_sec, (int) tod_ns.tv_usec,
+ (long) clockid_ns.tv_sec, clockid_ns.tv_nsec,
clockid_name(clockid));
}
@@ -3806,7 +3806,7 @@ int perf_session__read_header(struct perf_session *session)
* check for the pipe header regardless of source.
*/
err = perf_header__read_pipe(session);
- if (!err || (err && perf_data__is_pipe(data))) {
+ if (!err || perf_data__is_pipe(data)) {
data->is_pipe = true;
return err;
}
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index a08fb9ea411b..c82f5fc26af8 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -208,10 +208,14 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
hists__new_col_len(hists, HISTC_MEM_LVL, 21 + 3);
hists__new_col_len(hists, HISTC_LOCAL_WEIGHT, 12);
hists__new_col_len(hists, HISTC_GLOBAL_WEIGHT, 12);
+ hists__new_col_len(hists, HISTC_MEM_BLOCKED, 10);
+ hists__new_col_len(hists, HISTC_LOCAL_INS_LAT, 13);
+ hists__new_col_len(hists, HISTC_GLOBAL_INS_LAT, 13);
if (symbol_conf.nanosecs)
hists__new_col_len(hists, HISTC_TIME, 16);
else
hists__new_col_len(hists, HISTC_TIME, 12);
+ hists__new_col_len(hists, HISTC_CODE_PAGE_SIZE, 6);
if (h->srcline) {
len = MAX(strlen(h->srcline), strlen(sort_srcline.se_header));
@@ -285,12 +289,13 @@ static long hist_time(unsigned long htime)
}
static void he_stat__add_period(struct he_stat *he_stat, u64 period,
- u64 weight)
+ u64 weight, u64 ins_lat)
{
he_stat->period += period;
he_stat->weight += weight;
he_stat->nr_events += 1;
+ he_stat->ins_lat += ins_lat;
}
static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src)
@@ -302,6 +307,7 @@ static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src)
dest->period_guest_us += src->period_guest_us;
dest->nr_events += src->nr_events;
dest->weight += src->weight;
+ dest->ins_lat += src->ins_lat;
}
static void he_stat__decay(struct he_stat *he_stat)
@@ -590,6 +596,7 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
int64_t cmp;
u64 period = entry->stat.period;
u64 weight = entry->stat.weight;
+ u64 ins_lat = entry->stat.ins_lat;
bool leftmost = true;
p = &hists->entries_in->rb_root.rb_node;
@@ -608,11 +615,11 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
if (!cmp) {
if (sample_self) {
- he_stat__add_period(&he->stat, period, weight);
+ he_stat__add_period(&he->stat, period, weight, ins_lat);
hist_entry__add_callchain_period(he, period);
}
if (symbol_conf.cumulate_callchain)
- he_stat__add_period(he->stat_acc, period, weight);
+ he_stat__add_period(he->stat_acc, period, weight, ins_lat);
/*
* This mem info was allocated from sample__resolve_mem
@@ -718,10 +725,12 @@ __hists__add_entry(struct hists *hists,
.cpumode = al->cpumode,
.ip = al->addr,
.level = al->level,
+ .code_page_size = sample->code_page_size,
.stat = {
.nr_events = 1,
.period = sample->period,
.weight = sample->weight,
+ .ins_lat = sample->ins_lat,
},
.parent = sym_parent,
.filtered = symbol__parent_filter(sym_parent) | al->filtered,
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 14f66330923d..3c537232294b 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -53,6 +53,7 @@ enum hist_column {
HISTC_DSO_TO,
HISTC_LOCAL_WEIGHT,
HISTC_GLOBAL_WEIGHT,
+ HISTC_CODE_PAGE_SIZE,
HISTC_MEM_DADDR_SYMBOL,
HISTC_MEM_DADDR_DSO,
HISTC_MEM_PHYS_DADDR,
@@ -71,6 +72,9 @@ enum hist_column {
HISTC_SYM_SIZE,
HISTC_DSO_SIZE,
HISTC_SYMBOL_IPC,
+ HISTC_MEM_BLOCKED,
+ HISTC_LOCAL_INS_LAT,
+ HISTC_GLOBAL_INS_LAT,
HISTC_NR_COLS, /* Last entry */
};
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index 697513f35154..8c59677bee13 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -24,6 +24,13 @@
#include "intel-pt-decoder.h"
#include "intel-pt-log.h"
+#define BITULL(x) (1ULL << (x))
+
+/* IA32_RTIT_CTL MSR bits */
+#define INTEL_PT_CYC_ENABLE BITULL(1)
+#define INTEL_PT_CYC_THRESHOLD (BITULL(22) | BITULL(21) | BITULL(20) | BITULL(19))
+#define INTEL_PT_CYC_THRESHOLD_SHIFT 19
+
#define INTEL_PT_BLK_SIZE 1024
#define BIT63 (((uint64_t)1 << 63))
@@ -55,6 +62,7 @@ enum intel_pt_pkt_state {
INTEL_PT_STATE_TIP_PGD,
INTEL_PT_STATE_FUP,
INTEL_PT_STATE_FUP_NO_TIP,
+ INTEL_PT_STATE_FUP_IN_PSB,
INTEL_PT_STATE_RESAMPLE,
};
@@ -73,6 +81,7 @@ static inline bool intel_pt_sample_time(enum intel_pt_pkt_state pkt_state)
case INTEL_PT_STATE_TIP_PGD:
case INTEL_PT_STATE_FUP:
case INTEL_PT_STATE_FUP_NO_TIP:
+ case INTEL_PT_STATE_FUP_IN_PSB:
return false;
default:
return true;
@@ -112,13 +121,14 @@ struct intel_pt_decoder {
bool have_last_ip;
bool in_psb;
bool hop;
- bool hop_psb_fup;
bool leap;
+ bool nr;
+ bool next_nr;
enum intel_pt_param_flags flags;
uint64_t pos;
uint64_t last_ip;
uint64_t ip;
- uint64_t cr3;
+ uint64_t pip_payload;
uint64_t timestamp;
uint64_t tsc_timestamp;
uint64_t ref_timestamp;
@@ -167,6 +177,8 @@ struct intel_pt_decoder {
uint64_t sample_tot_cyc_cnt;
uint64_t base_cyc_cnt;
uint64_t cyc_cnt_timestamp;
+ uint64_t ctl;
+ uint64_t cyc_threshold;
double tsc_to_cyc;
bool continuous_period;
bool overflow;
@@ -189,6 +201,7 @@ struct intel_pt_decoder {
int no_progress;
int stuck_ip_prd;
int stuck_ip_cnt;
+ uint64_t psb_ip;
const unsigned char *next_buf;
size_t next_len;
unsigned char temp_buf[INTEL_PT_PKT_MAX_SZ];
@@ -204,6 +217,14 @@ static uint64_t intel_pt_lower_power_of_2(uint64_t x)
return x << i;
}
+static uint64_t intel_pt_cyc_threshold(uint64_t ctl)
+{
+ if (!(ctl & INTEL_PT_CYC_ENABLE))
+ return 0;
+
+ return (ctl & INTEL_PT_CYC_THRESHOLD) >> INTEL_PT_CYC_THRESHOLD_SHIFT;
+}
+
static void intel_pt_setup_period(struct intel_pt_decoder *decoder)
{
if (decoder->period_type == INTEL_PT_PERIOD_TICKS) {
@@ -245,12 +266,15 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
decoder->flags = params->flags;
+ decoder->ctl = params->ctl;
decoder->period = params->period;
decoder->period_type = params->period_type;
decoder->max_non_turbo_ratio = params->max_non_turbo_ratio;
decoder->max_non_turbo_ratio_fp = params->max_non_turbo_ratio;
+ decoder->cyc_threshold = intel_pt_cyc_threshold(decoder->ctl);
+
intel_pt_setup_period(decoder);
decoder->mtc_shift = params->mtc_period;
@@ -481,6 +505,28 @@ static inline void intel_pt_update_in_tx(struct intel_pt_decoder *decoder)
decoder->tx_flags = decoder->packet.payload & INTEL_PT_IN_TX;
}
+static inline void intel_pt_update_pip(struct intel_pt_decoder *decoder)
+{
+ decoder->pip_payload = decoder->packet.payload;
+}
+
+static inline void intel_pt_update_nr(struct intel_pt_decoder *decoder)
+{
+ decoder->next_nr = decoder->pip_payload & 1;
+}
+
+static inline void intel_pt_set_nr(struct intel_pt_decoder *decoder)
+{
+ decoder->nr = decoder->pip_payload & 1;
+ decoder->next_nr = decoder->nr;
+}
+
+static inline void intel_pt_set_pip(struct intel_pt_decoder *decoder)
+{
+ intel_pt_update_pip(decoder);
+ intel_pt_set_nr(decoder);
+}
+
static int intel_pt_bad_packet(struct intel_pt_decoder *decoder)
{
intel_pt_clear_tx_flags(decoder);
@@ -1218,6 +1264,7 @@ static int intel_pt_walk_tip(struct intel_pt_decoder *decoder)
decoder->continuous_period = false;
decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
decoder->state.type |= INTEL_PT_TRACE_END;
+ intel_pt_update_nr(decoder);
return 0;
}
if (err == INTEL_PT_RETURN)
@@ -1225,6 +1272,8 @@ static int intel_pt_walk_tip(struct intel_pt_decoder *decoder)
if (err)
return err;
+ intel_pt_update_nr(decoder);
+
if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) {
if (decoder->pkt_state == INTEL_PT_STATE_TIP_PGD) {
decoder->pge = false;
@@ -1337,6 +1386,7 @@ static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder)
decoder->state.from_ip = decoder->ip;
decoder->state.to_ip = decoder->last_ip;
decoder->ip = decoder->last_ip;
+ intel_pt_update_nr(decoder);
return 0;
}
@@ -1461,6 +1511,7 @@ static int intel_pt_overflow(struct intel_pt_decoder *decoder)
{
intel_pt_log("ERROR: Buffer overflow\n");
intel_pt_clear_tx_flags(decoder);
+ intel_pt_set_nr(decoder);
decoder->timestamp_insn_cnt = 0;
decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
decoder->overflow = true;
@@ -1735,18 +1786,14 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
break;
case INTEL_PT_PIP:
- decoder->cr3 = decoder->packet.payload & (BIT63 - 1);
+ intel_pt_set_pip(decoder);
break;
case INTEL_PT_FUP:
decoder->pge = true;
if (decoder->packet.count) {
intel_pt_set_last_ip(decoder);
- if (decoder->hop) {
- /* Act on FUP at PSBEND */
- decoder->ip = decoder->last_ip;
- decoder->hop_psb_fup = true;
- }
+ decoder->psb_ip = decoder->last_ip;
}
break;
@@ -1761,6 +1808,9 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
break;
case INTEL_PT_CYC:
+ intel_pt_calc_cyc_timestamp(decoder);
+ break;
+
case INTEL_PT_VMCS:
case INTEL_PT_MNT:
case INTEL_PT_PAD:
@@ -1835,6 +1885,7 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
decoder->pge = false;
decoder->continuous_period = false;
decoder->state.type |= INTEL_PT_TRACE_END;
+ intel_pt_update_nr(decoder);
return 0;
case INTEL_PT_TIP_PGE:
@@ -1850,6 +1901,7 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
}
decoder->state.type |= INTEL_PT_TRACE_BEGIN;
intel_pt_mtc_cyc_cnt_pge(decoder);
+ intel_pt_set_nr(decoder);
return 0;
case INTEL_PT_TIP:
@@ -1860,10 +1912,11 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
intel_pt_set_ip(decoder);
decoder->state.to_ip = decoder->ip;
}
+ intel_pt_update_nr(decoder);
return 0;
case INTEL_PT_PIP:
- decoder->cr3 = decoder->packet.payload & (BIT63 - 1);
+ intel_pt_update_pip(decoder);
break;
case INTEL_PT_MTC:
@@ -1922,21 +1975,27 @@ static int intel_pt_hop_trace(struct intel_pt_decoder *decoder, bool *no_tip, in
return HOP_IGNORE;
case INTEL_PT_TIP_PGD:
- if (!decoder->packet.count)
+ if (!decoder->packet.count) {
+ intel_pt_set_nr(decoder);
return HOP_IGNORE;
+ }
intel_pt_set_ip(decoder);
decoder->state.type |= INTEL_PT_TRACE_END;
decoder->state.from_ip = 0;
decoder->state.to_ip = decoder->ip;
+ intel_pt_update_nr(decoder);
return HOP_RETURN;
case INTEL_PT_TIP:
- if (!decoder->packet.count)
+ if (!decoder->packet.count) {
+ intel_pt_set_nr(decoder);
return HOP_IGNORE;
+ }
intel_pt_set_ip(decoder);
decoder->state.type = INTEL_PT_INSTRUCTION;
decoder->state.from_ip = decoder->ip;
decoder->state.to_ip = 0;
+ intel_pt_update_nr(decoder);
return HOP_RETURN;
case INTEL_PT_FUP:
@@ -1959,26 +2018,23 @@ static int intel_pt_hop_trace(struct intel_pt_decoder *decoder, bool *no_tip, in
return HOP_RETURN;
case INTEL_PT_PSB:
+ decoder->state.psb_offset = decoder->pos;
+ decoder->psb_ip = 0;
decoder->last_ip = 0;
decoder->have_last_ip = true;
- decoder->hop_psb_fup = false;
*err = intel_pt_walk_psbend(decoder);
if (*err == -EAGAIN)
return HOP_AGAIN;
if (*err)
return HOP_RETURN;
- if (decoder->hop_psb_fup) {
- decoder->hop_psb_fup = false;
- decoder->state.type = INTEL_PT_INSTRUCTION;
- decoder->state.from_ip = decoder->ip;
- decoder->state.to_ip = 0;
- return HOP_RETURN;
+ decoder->state.type = INTEL_PT_PSB_EVT;
+ if (decoder->psb_ip) {
+ decoder->state.type |= INTEL_PT_INSTRUCTION;
+ decoder->ip = decoder->psb_ip;
}
- if (decoder->cbr != decoder->cbr_seen) {
- decoder->state.type = 0;
- return HOP_RETURN;
- }
- return HOP_IGNORE;
+ decoder->state.from_ip = decoder->psb_ip;
+ decoder->state.to_ip = 0;
+ return HOP_RETURN;
case INTEL_PT_BAD:
case INTEL_PT_PAD:
@@ -2012,8 +2068,151 @@ static int intel_pt_hop_trace(struct intel_pt_decoder *decoder, bool *no_tip, in
}
}
+struct intel_pt_psb_info {
+ struct intel_pt_pkt fup_packet;
+ bool fup;
+ int after_psbend;
+};
+
+/* Lookahead and get the FUP packet from PSB+ */
+static int intel_pt_psb_lookahead_cb(struct intel_pt_pkt_info *pkt_info)
+{
+ struct intel_pt_psb_info *data = pkt_info->data;
+
+ switch (pkt_info->packet.type) {
+ case INTEL_PT_PAD:
+ case INTEL_PT_MNT:
+ case INTEL_PT_TSC:
+ case INTEL_PT_TMA:
+ case INTEL_PT_MODE_EXEC:
+ case INTEL_PT_MODE_TSX:
+ case INTEL_PT_MTC:
+ case INTEL_PT_CYC:
+ case INTEL_PT_VMCS:
+ case INTEL_PT_CBR:
+ case INTEL_PT_PIP:
+ if (data->after_psbend) {
+ data->after_psbend -= 1;
+ if (!data->after_psbend)
+ return 1;
+ }
+ break;
+
+ case INTEL_PT_FUP:
+ if (data->after_psbend)
+ return 1;
+ if (data->fup || pkt_info->packet.count == 0)
+ return 1;
+ data->fup_packet = pkt_info->packet;
+ data->fup = true;
+ break;
+
+ case INTEL_PT_PSBEND:
+ if (!data->fup)
+ return 1;
+ /* Keep going to check for a TIP.PGE */
+ data->after_psbend = 6;
+ break;
+
+ case INTEL_PT_TIP_PGE:
+ /* Ignore FUP in PSB+ if followed by TIP.PGE */
+ if (data->after_psbend)
+ data->fup = false;
+ return 1;
+
+ case INTEL_PT_PTWRITE:
+ case INTEL_PT_PTWRITE_IP:
+ case INTEL_PT_EXSTOP:
+ case INTEL_PT_EXSTOP_IP:
+ case INTEL_PT_MWAIT:
+ case INTEL_PT_PWRE:
+ case INTEL_PT_PWRX:
+ case INTEL_PT_BBP:
+ case INTEL_PT_BIP:
+ case INTEL_PT_BEP:
+ case INTEL_PT_BEP_IP:
+ if (data->after_psbend) {
+ data->after_psbend -= 1;
+ if (!data->after_psbend)
+ return 1;
+ break;
+ }
+ return 1;
+
+ case INTEL_PT_OVF:
+ case INTEL_PT_BAD:
+ case INTEL_PT_TNT:
+ case INTEL_PT_TIP_PGD:
+ case INTEL_PT_TIP:
+ case INTEL_PT_PSB:
+ case INTEL_PT_TRACESTOP:
+ default:
+ return 1;
+ }
+
+ return 0;
+}
+
+static int intel_pt_psb(struct intel_pt_decoder *decoder)
+{
+ int err;
+
+ decoder->last_ip = 0;
+ decoder->psb_ip = 0;
+ decoder->have_last_ip = true;
+ intel_pt_clear_stack(&decoder->stack);
+ err = intel_pt_walk_psbend(decoder);
+ if (err)
+ return err;
+ decoder->state.type = INTEL_PT_PSB_EVT;
+ decoder->state.from_ip = decoder->psb_ip;
+ decoder->state.to_ip = 0;
+ return 0;
+}
+
+static int intel_pt_fup_in_psb(struct intel_pt_decoder *decoder)
+{
+ int err;
+
+ if (decoder->ip != decoder->last_ip) {
+ err = intel_pt_walk_fup(decoder);
+ if (!err || err != -EAGAIN)
+ return err;
+ }
+
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ err = intel_pt_psb(decoder);
+ if (err) {
+ decoder->pkt_state = INTEL_PT_STATE_ERR3;
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
+static bool intel_pt_psb_with_fup(struct intel_pt_decoder *decoder, int *err)
+{
+ struct intel_pt_psb_info data = { .fup = false };
+
+ if (!decoder->branch_enable || !decoder->pge)
+ return false;
+
+ intel_pt_pkt_lookahead(decoder, intel_pt_psb_lookahead_cb, &data);
+ if (!data.fup)
+ return false;
+
+ decoder->packet = data.fup_packet;
+ intel_pt_set_last_ip(decoder);
+ decoder->pkt_state = INTEL_PT_STATE_FUP_IN_PSB;
+
+ *err = intel_pt_fup_in_psb(decoder);
+
+ return true;
+}
+
static int intel_pt_walk_trace(struct intel_pt_decoder *decoder)
{
+ int last_packet_type = INTEL_PT_PAD;
bool no_tip = false;
int err;
@@ -2022,6 +2221,12 @@ static int intel_pt_walk_trace(struct intel_pt_decoder *decoder)
if (err)
return err;
next:
+ if (decoder->cyc_threshold) {
+ if (decoder->sample_cyc && last_packet_type != INTEL_PT_CYC)
+ decoder->sample_cyc = false;
+ last_packet_type = decoder->packet.type;
+ }
+
if (decoder->hop) {
switch (intel_pt_hop_trace(decoder, &no_tip, &err)) {
case HOP_IGNORE:
@@ -2055,6 +2260,7 @@ next:
case INTEL_PT_TIP_PGE: {
decoder->pge = true;
intel_pt_mtc_cyc_cnt_pge(decoder);
+ intel_pt_set_nr(decoder);
if (decoder->packet.count == 0) {
intel_pt_log_at("Skipping zero TIP.PGE",
decoder->pos);
@@ -2120,27 +2326,17 @@ next:
break;
case INTEL_PT_PSB:
- decoder->last_ip = 0;
- decoder->have_last_ip = true;
- intel_pt_clear_stack(&decoder->stack);
- err = intel_pt_walk_psbend(decoder);
+ decoder->state.psb_offset = decoder->pos;
+ decoder->psb_ip = 0;
+ if (intel_pt_psb_with_fup(decoder, &err))
+ return err;
+ err = intel_pt_psb(decoder);
if (err == -EAGAIN)
goto next;
- if (err)
- return err;
- /*
- * PSB+ CBR will not have changed but cater for the
- * possibility of another CBR change that gets caught up
- * in the PSB+.
- */
- if (decoder->cbr != decoder->cbr_seen) {
- decoder->state.type = 0;
- return 0;
- }
- break;
+ return err;
case INTEL_PT_PIP:
- decoder->cr3 = decoder->packet.payload & (BIT63 - 1);
+ intel_pt_update_pip(decoder);
break;
case INTEL_PT_MTC:
@@ -2351,6 +2547,7 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
uint64_t current_ip = decoder->ip;
intel_pt_set_ip(decoder);
+ decoder->psb_ip = decoder->ip;
if (current_ip)
intel_pt_log_to("Setting IP",
decoder->ip);
@@ -2378,7 +2575,7 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
break;
case INTEL_PT_PIP:
- decoder->cr3 = decoder->packet.payload & (BIT63 - 1);
+ intel_pt_set_pip(decoder);
break;
case INTEL_PT_MODE_EXEC:
@@ -2497,7 +2694,7 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
break;
case INTEL_PT_PIP:
- decoder->cr3 = decoder->packet.payload & (BIT63 - 1);
+ intel_pt_set_pip(decoder);
break;
case INTEL_PT_MODE_EXEC:
@@ -2522,18 +2719,18 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
break;
case INTEL_PT_PSB:
+ decoder->state.psb_offset = decoder->pos;
+ decoder->psb_ip = 0;
decoder->last_ip = 0;
decoder->have_last_ip = true;
intel_pt_clear_stack(&decoder->stack);
err = intel_pt_walk_psb(decoder);
if (err)
return err;
- if (decoder->ip) {
- /* Do not have a sample */
- decoder->state.type = 0;
- return 0;
- }
- break;
+ decoder->state.type = INTEL_PT_PSB_EVT;
+ decoder->state.from_ip = decoder->psb_ip;
+ decoder->state.to_ip = 0;
+ return 0;
case INTEL_PT_TNT:
case INTEL_PT_PSBEND:
@@ -2577,7 +2774,7 @@ static int intel_pt_sync_ip(struct intel_pt_decoder *decoder)
intel_pt_log("Scanning for full IP\n");
err = intel_pt_walk_to_ip(decoder);
- if (err)
+ if (err || ((decoder->state.type & INTEL_PT_PSB_EVT) && !decoder->ip))
return err;
/* In hop mode, resample to get the to_ip as an "instruction" sample */
@@ -2689,10 +2886,10 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder)
decoder->continuous_period = false;
decoder->have_last_ip = false;
decoder->last_ip = 0;
+ decoder->psb_ip = 0;
decoder->ip = 0;
intel_pt_clear_stack(&decoder->stack);
-leap:
err = intel_pt_scan_for_psb(decoder);
if (err)
return err;
@@ -2704,8 +2901,11 @@ leap:
if (err)
return err;
+ decoder->state.type = INTEL_PT_PSB_EVT; /* Only PSB sample */
+ decoder->state.from_ip = decoder->psb_ip;
+ decoder->state.to_ip = 0;
+
if (decoder->ip) {
- decoder->state.type = 0; /* Do not have a sample */
/*
* In hop mode, resample to get the PSB FUP ip as an
* "instruction" sample.
@@ -2714,14 +2914,6 @@ leap:
decoder->pkt_state = INTEL_PT_STATE_RESAMPLE;
else
decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
- } else if (decoder->leap) {
- /*
- * In leap mode, only PSB+ is decoded, so keeping leaping to the
- * next PSB until there is an ip.
- */
- goto leap;
- } else {
- return intel_pt_sync_ip(decoder);
}
return 0;
@@ -2783,6 +2975,9 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
if (err == -EAGAIN)
err = intel_pt_walk_trace(decoder);
break;
+ case INTEL_PT_STATE_FUP_IN_PSB:
+ err = intel_pt_fup_in_psb(decoder);
+ break;
case INTEL_PT_STATE_RESAMPLE:
err = intel_pt_resample(decoder);
break;
@@ -2797,6 +2992,7 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
decoder->state.from_ip = decoder->ip;
intel_pt_update_sample_time(decoder);
decoder->sample_tot_cyc_cnt = decoder->tot_cyc_cnt;
+ intel_pt_set_nr(decoder);
} else {
decoder->state.err = 0;
if (decoder->cbr != decoder->cbr_seen) {
@@ -2811,14 +3007,30 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
}
if (intel_pt_sample_time(decoder->pkt_state)) {
intel_pt_update_sample_time(decoder);
- if (decoder->sample_cyc)
+ if (decoder->sample_cyc) {
decoder->sample_tot_cyc_cnt = decoder->tot_cyc_cnt;
+ decoder->state.flags |= INTEL_PT_SAMPLE_IPC;
+ decoder->sample_cyc = false;
+ }
}
+ /*
+ * When using only TSC/MTC to compute cycles, IPC can be
+ * sampled as soon as the cycle count changes.
+ */
+ if (!decoder->have_cyc)
+ decoder->state.flags |= INTEL_PT_SAMPLE_IPC;
}
+ /* Let PSB event always have TSC timestamp */
+ if ((decoder->state.type & INTEL_PT_PSB_EVT) && decoder->tsc_timestamp)
+ decoder->sample_timestamp = decoder->tsc_timestamp;
+
+ decoder->state.from_nr = decoder->nr;
+ decoder->state.to_nr = decoder->next_nr;
+ decoder->nr = decoder->next_nr;
+
decoder->state.timestamp = decoder->sample_timestamp;
decoder->state.est_timestamp = intel_pt_est_timestamp(decoder);
- decoder->state.cr3 = decoder->cr3;
decoder->state.tot_insn_cnt = decoder->tot_insn_cnt;
decoder->state.tot_cyc_cnt = decoder->sample_tot_cyc_cnt;
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
index 8645fc265481..d9e62a7f6f0e 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
@@ -17,6 +17,7 @@
#define INTEL_PT_ABORT_TX (1 << 1)
#define INTEL_PT_ASYNC (1 << 2)
#define INTEL_PT_FUP_IP (1 << 3)
+#define INTEL_PT_SAMPLE_IPC (1 << 4)
enum intel_pt_sample_type {
INTEL_PT_BRANCH = 1 << 0,
@@ -31,6 +32,7 @@ enum intel_pt_sample_type {
INTEL_PT_TRACE_BEGIN = 1 << 9,
INTEL_PT_TRACE_END = 1 << 10,
INTEL_PT_BLK_ITEMS = 1 << 11,
+ INTEL_PT_PSB_EVT = 1 << 12,
};
enum intel_pt_period_type {
@@ -199,10 +201,11 @@ struct intel_pt_blk_items {
struct intel_pt_state {
enum intel_pt_sample_type type;
+ bool from_nr;
+ bool to_nr;
int err;
uint64_t from_ip;
uint64_t to_ip;
- uint64_t cr3;
uint64_t tot_insn_cnt;
uint64_t tot_cyc_cnt;
uint64_t timestamp;
@@ -213,6 +216,7 @@ struct intel_pt_state {
uint64_t pwre_payload;
uint64_t pwrx_payload;
uint64_t cbr_payload;
+ uint64_t psb_offset;
uint32_t cbr;
uint32_t flags;
enum intel_pt_insn_op insn_op;
@@ -243,6 +247,7 @@ struct intel_pt_params {
void *data;
bool return_compression;
bool branch_enable;
+ uint64_t ctl;
uint64_t period;
enum intel_pt_period_type period_type;
unsigned max_non_turbo_ratio;
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
index fb8a3558d3d5..2f6cc7eea251 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
@@ -43,6 +43,17 @@ static void intel_pt_insn_decoder(struct insn *insn,
switch (insn->opcode.bytes[0]) {
case 0xf:
switch (insn->opcode.bytes[1]) {
+ case 0x01:
+ switch (insn->modrm.bytes[0]) {
+ case 0xc2: /* vmlaunch */
+ case 0xc3: /* vmresume */
+ op = INTEL_PT_OP_VMENTRY;
+ branch = INTEL_PT_BR_INDIRECT;
+ break;
+ default:
+ break;
+ }
+ break;
case 0x05: /* syscall */
case 0x34: /* sysenter */
op = INTEL_PT_OP_SYSCALL;
@@ -213,6 +224,7 @@ const char *branch_name[] = {
[INTEL_PT_OP_INT] = "Int",
[INTEL_PT_OP_SYSCALL] = "Syscall",
[INTEL_PT_OP_SYSRET] = "Sysret",
+ [INTEL_PT_OP_VMENTRY] = "VMentry",
};
const char *intel_pt_insn_name(enum intel_pt_insn_op op)
@@ -267,6 +279,9 @@ int intel_pt_insn_type(enum intel_pt_insn_op op)
case INTEL_PT_OP_SYSRET:
return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN |
PERF_IP_FLAG_SYSCALLRET;
+ case INTEL_PT_OP_VMENTRY:
+ return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
+ PERF_IP_FLAG_VMENTRY;
default:
return 0;
}
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h
index 95a1eb0141ff..c2861cfdd768 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h
@@ -24,6 +24,7 @@ enum intel_pt_insn_op {
INTEL_PT_OP_INT,
INTEL_PT_OP_SYSCALL,
INTEL_PT_OP_SYSRET,
+ INTEL_PT_OP_VMENTRY,
};
enum intel_pt_insn_branch {
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
index 4ce109993e74..02a3395d6ce3 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
@@ -16,8 +16,6 @@
#define BIT63 ((uint64_t)1 << 63)
-#define NR_FLAG BIT63
-
#if __BYTE_ORDER == __BIG_ENDIAN
#define le16_to_cpu bswap_16
#define le32_to_cpu bswap_32
@@ -106,9 +104,7 @@ static int intel_pt_get_pip(const unsigned char *buf, size_t len,
packet->type = INTEL_PT_PIP;
memcpy_le64(&payload, buf + 2, 6);
- packet->payload = payload >> 1;
- if (payload & 1)
- packet->payload |= NR_FLAG;
+ packet->payload = payload;
return 8;
}
@@ -719,10 +715,10 @@ int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf,
name, (unsigned)(payload >> 1) & 1,
(unsigned)payload & 1);
case INTEL_PT_PIP:
- nr = packet->payload & NR_FLAG ? 1 : 0;
- payload &= ~NR_FLAG;
+ nr = packet->payload & INTEL_PT_VMX_NR_FLAG ? 1 : 0;
+ payload &= ~INTEL_PT_VMX_NR_FLAG;
ret = snprintf(buf, buf_len, "%s 0x%llx (NR=%d)",
- name, payload, nr);
+ name, payload >> 1, nr);
return ret;
case INTEL_PT_PTWRITE:
return snprintf(buf, buf_len, "%s 0x%llx IP:0", name, payload);
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
index 17ca9b56d72f..996090cb84f6 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
@@ -21,6 +21,8 @@
#define INTEL_PT_PKT_MAX_SZ 16
+#define INTEL_PT_VMX_NR_FLAG 1
+
enum intel_pt_pkt_type {
INTEL_PT_BAD,
INTEL_PT_PAD,
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 60214de42f31..f6e28ac231b7 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -108,6 +108,7 @@ struct intel_pt {
u64 exstop_id;
u64 pwrx_id;
u64 cbr_id;
+ u64 psb_id;
bool sample_pebs;
struct evsel *pebs_evsel;
@@ -162,6 +163,9 @@ struct intel_pt_queue {
int switch_state;
pid_t next_tid;
struct thread *thread;
+ struct machine *guest_machine;
+ struct thread *unknown_guest_thread;
+ pid_t guest_machine_pid;
bool exclude_kernel;
bool have_sample;
u64 time;
@@ -549,13 +553,59 @@ static void intel_pt_cache_invalidate(struct dso *dso, struct machine *machine,
auxtrace_cache__remove(dso->auxtrace_cache, offset);
}
-static inline u8 intel_pt_cpumode(struct intel_pt *pt, uint64_t ip)
+static inline bool intel_pt_guest_kernel_ip(uint64_t ip)
{
- return ip >= pt->kernel_start ?
+ /* Assumes 64-bit kernel */
+ return ip & (1ULL << 63);
+}
+
+static inline u8 intel_pt_nr_cpumode(struct intel_pt_queue *ptq, uint64_t ip, bool nr)
+{
+ if (nr) {
+ return intel_pt_guest_kernel_ip(ip) ?
+ PERF_RECORD_MISC_GUEST_KERNEL :
+ PERF_RECORD_MISC_GUEST_USER;
+ }
+
+ return ip >= ptq->pt->kernel_start ?
PERF_RECORD_MISC_KERNEL :
PERF_RECORD_MISC_USER;
}
+static inline u8 intel_pt_cpumode(struct intel_pt_queue *ptq, uint64_t from_ip, uint64_t to_ip)
+{
+ /* No support for non-zero CS base */
+ if (from_ip)
+ return intel_pt_nr_cpumode(ptq, from_ip, ptq->state->from_nr);
+ return intel_pt_nr_cpumode(ptq, to_ip, ptq->state->to_nr);
+}
+
+static int intel_pt_get_guest(struct intel_pt_queue *ptq)
+{
+ struct machines *machines = &ptq->pt->session->machines;
+ struct machine *machine;
+ pid_t pid = ptq->pid <= 0 ? DEFAULT_GUEST_KERNEL_ID : ptq->pid;
+
+ if (ptq->guest_machine && pid == ptq->guest_machine_pid)
+ return 0;
+
+ ptq->guest_machine = NULL;
+ thread__zput(ptq->unknown_guest_thread);
+
+ machine = machines__find_guest(machines, pid);
+ if (!machine)
+ return -1;
+
+ ptq->unknown_guest_thread = machine__idle_thread(machine);
+ if (!ptq->unknown_guest_thread)
+ return -1;
+
+ ptq->guest_machine = machine;
+ ptq->guest_machine_pid = pid;
+
+ return 0;
+}
+
static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
uint64_t *insn_cnt_ptr, uint64_t *ip,
uint64_t to_ip, uint64_t max_insn_cnt,
@@ -572,19 +622,29 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
u64 offset, start_offset, start_ip;
u64 insn_cnt = 0;
bool one_map = true;
+ bool nr;
intel_pt_insn->length = 0;
if (to_ip && *ip == to_ip)
goto out_no_cache;
- cpumode = intel_pt_cpumode(ptq->pt, *ip);
+ nr = ptq->state->to_nr;
+ cpumode = intel_pt_nr_cpumode(ptq, *ip, nr);
- thread = ptq->thread;
- if (!thread) {
- if (cpumode != PERF_RECORD_MISC_KERNEL)
+ if (nr) {
+ if (cpumode != PERF_RECORD_MISC_GUEST_KERNEL ||
+ intel_pt_get_guest(ptq))
return -EINVAL;
- thread = ptq->pt->unknown_thread;
+ machine = ptq->guest_machine;
+ thread = ptq->unknown_guest_thread;
+ } else {
+ thread = ptq->thread;
+ if (!thread) {
+ if (cpumode != PERF_RECORD_MISC_KERNEL)
+ return -EINVAL;
+ thread = ptq->pt->unknown_thread;
+ }
}
while (1) {
@@ -732,8 +792,14 @@ static int __intel_pt_pgd_ip(uint64_t ip, void *data)
u8 cpumode;
u64 offset;
- if (ip >= ptq->pt->kernel_start)
+ if (ptq->state->to_nr) {
+ if (intel_pt_guest_kernel_ip(ip))
+ return intel_pt_match_pgd_ip(ptq->pt, ip, ip, NULL);
+ /* No support for decoding guest user space */
+ return -EINVAL;
+ } else if (ip >= ptq->pt->kernel_start) {
return intel_pt_match_pgd_ip(ptq->pt, ip, ip, NULL);
+ }
cpumode = PERF_RECORD_MISC_USER;
@@ -893,6 +959,18 @@ static bool intel_pt_sampling_mode(struct intel_pt *pt)
return false;
}
+static u64 intel_pt_ctl(struct intel_pt *pt)
+{
+ struct evsel *evsel;
+ u64 config;
+
+ evlist__for_each_entry(pt->session->evlist, evsel) {
+ if (intel_pt_get_config(pt, &evsel->core.attr, &config))
+ return config;
+ }
+ return 0;
+}
+
static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns)
{
u64 quot, rem;
@@ -1026,6 +1104,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
params.data = ptq;
params.return_compression = intel_pt_return_compression(pt);
params.branch_enable = intel_pt_branch_enable(pt);
+ params.ctl = intel_pt_ctl(pt);
params.max_non_turbo_ratio = pt->max_non_turbo_ratio;
params.mtc_period = intel_pt_mtc_period(pt);
params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n;
@@ -1087,6 +1166,7 @@ static void intel_pt_free_queue(void *priv)
if (!ptq)
return;
thread__zput(ptq->thread);
+ thread__zput(ptq->unknown_guest_thread);
intel_pt_decoder_free(ptq->decoder);
zfree(&ptq->event_buf);
zfree(&ptq->last_branch);
@@ -1121,13 +1201,16 @@ static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
if (ptq->state->flags & INTEL_PT_ABORT_TX) {
ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT;
} else if (ptq->state->flags & INTEL_PT_ASYNC) {
- if (ptq->state->to_ip)
+ if (!ptq->state->to_ip)
+ ptq->flags = PERF_IP_FLAG_BRANCH |
+ PERF_IP_FLAG_TRACE_END;
+ else if (ptq->state->from_nr && !ptq->state->to_nr)
+ ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
+ PERF_IP_FLAG_VMEXIT;
+ else
ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
PERF_IP_FLAG_ASYNC |
PERF_IP_FLAG_INTERRUPT;
- else
- ptq->flags = PERF_IP_FLAG_BRANCH |
- PERF_IP_FLAG_TRACE_END;
ptq->insn_len = 0;
} else {
if (ptq->state->from_ip)
@@ -1301,8 +1384,8 @@ static void intel_pt_prep_b_sample(struct intel_pt *pt,
sample->time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
sample->ip = ptq->state->from_ip;
- sample->cpumode = intel_pt_cpumode(pt, sample->ip);
sample->addr = ptq->state->to_ip;
+ sample->cpumode = intel_pt_cpumode(ptq, sample->ip, sample->addr);
sample->period = 1;
sample->flags = ptq->flags;
@@ -1381,7 +1464,8 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
sample.branch_stack = (struct branch_stack *)&dummy_bs;
}
- sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_br_cyc_cnt;
+ if (ptq->state->flags & INTEL_PT_SAMPLE_IPC)
+ sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_br_cyc_cnt;
if (sample.cyc_cnt) {
sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_br_insn_cnt;
ptq->last_br_insn_cnt = ptq->ipc_insn_cnt;
@@ -1431,7 +1515,8 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
else
sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt;
- sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt;
+ if (ptq->state->flags & INTEL_PT_SAMPLE_IPC)
+ sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt;
if (sample.cyc_cnt) {
sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_in_insn_cnt;
ptq->last_in_insn_cnt = ptq->ipc_insn_cnt;
@@ -1533,6 +1618,32 @@ static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq)
pt->pwr_events_sample_type);
}
+static int intel_pt_synth_psb_sample(struct intel_pt_queue *ptq)
+{
+ struct intel_pt *pt = ptq->pt;
+ union perf_event *event = ptq->event_buf;
+ struct perf_sample sample = { .ip = 0, };
+ struct perf_synth_intel_psb raw;
+
+ if (intel_pt_skip_event(pt))
+ return 0;
+
+ intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+ sample.id = ptq->pt->psb_id;
+ sample.stream_id = ptq->pt->psb_id;
+ sample.flags = 0;
+
+ raw.reserved = 0;
+ raw.offset = ptq->state->psb_offset;
+
+ sample.raw_size = perf_synth__raw_size(raw);
+ sample.raw_data = perf_synth__raw_data(&raw);
+
+ return intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->pwr_events_sample_type);
+}
+
static int intel_pt_synth_mwait_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
@@ -1791,10 +1902,7 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq)
else
sample.ip = ptq->state->from_ip;
- /* No support for guest mode at this time */
- cpumode = sample.ip < ptq->pt->kernel_start ?
- PERF_RECORD_MISC_USER :
- PERF_RECORD_MISC_KERNEL;
+ cpumode = intel_pt_cpumode(ptq, sample.ip, 0);
event->sample.header.misc = cpumode | PERF_RECORD_MISC_EXACT_IP;
@@ -1853,13 +1961,30 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq)
if (sample_type & PERF_SAMPLE_ADDR && items->has_mem_access_address)
sample.addr = items->mem_access_address;
- if (sample_type & PERF_SAMPLE_WEIGHT) {
+ if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
/*
* Refer kernel's setup_pebs_adaptive_sample_data() and
* intel_hsw_weight().
*/
- if (items->has_mem_access_latency)
- sample.weight = items->mem_access_latency;
+ if (items->has_mem_access_latency) {
+ u64 weight = items->mem_access_latency >> 32;
+
+ /*
+ * Starts from SPR, the mem access latency field
+ * contains both cache latency [47:32] and instruction
+ * latency [15:0]. The cache latency is the same as the
+ * mem access latency on previous platforms.
+ *
+ * In practice, no memory access could last than 4G
+ * cycles. Use latency >> 32 to distinguish the
+ * different format of the mem access latency field.
+ */
+ if (weight > 0) {
+ sample.weight = weight & 0xffff;
+ sample.ins_lat = items->mem_access_latency & 0xffff;
+ } else
+ sample.weight = items->mem_access_latency;
+ }
if (!sample.weight && items->has_tsx_aux_info) {
/* Cycles last block */
sample.weight = (u32)items->tsx_aux_info;
@@ -1966,14 +2091,8 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
ptq->have_sample = false;
- if (ptq->state->tot_cyc_cnt > ptq->ipc_cyc_cnt) {
- /*
- * Cycle count and instruction count only go together to create
- * a valid IPC ratio when the cycle count changes.
- */
- ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt;
- ptq->ipc_cyc_cnt = ptq->state->tot_cyc_cnt;
- }
+ ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt;
+ ptq->ipc_cyc_cnt = ptq->state->tot_cyc_cnt;
/*
* Do PEBS first to allow for the possibility that the PEBS timestamp
@@ -1986,6 +2105,11 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
}
if (pt->sample_pwr_events) {
+ if (state->type & INTEL_PT_PSB_EVT) {
+ err = intel_pt_synth_psb_sample(ptq);
+ if (err)
+ return err;
+ }
if (ptq->state->cbr != ptq->cbr_seen) {
err = intel_pt_synth_cbr_sample(ptq);
if (err)
@@ -2047,7 +2171,27 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
}
if (pt->sample_branches) {
- err = intel_pt_synth_branch_sample(ptq);
+ if (state->from_nr != state->to_nr &&
+ state->from_ip && state->to_ip) {
+ struct intel_pt_state *st = (struct intel_pt_state *)state;
+ u64 to_ip = st->to_ip;
+ u64 from_ip = st->from_ip;
+
+ /*
+ * perf cannot handle having different machines for ip
+ * and addr, so create 2 branches.
+ */
+ st->to_ip = 0;
+ err = intel_pt_synth_branch_sample(ptq);
+ if (err)
+ return err;
+ st->from_ip = 0;
+ st->to_ip = to_ip;
+ err = intel_pt_synth_branch_sample(ptq);
+ st->from_ip = from_ip;
+ } else {
+ err = intel_pt_synth_branch_sample(ptq);
+ }
if (err)
return err;
}
@@ -3083,6 +3227,14 @@ static int intel_pt_synth_events(struct intel_pt *pt,
pt->cbr_id = id;
intel_pt_set_event_name(evlist, id, "cbr");
id += 1;
+
+ attr.config = PERF_SYNTH_INTEL_PSB;
+ err = intel_pt_synth_event(session, "psb", &attr, id);
+ if (err)
+ return err;
+ pt->psb_id = id;
+ intel_pt_set_event_name(evlist, id, "psb");
+ id += 1;
}
if (pt->synth_opts.pwr_events && (evsel->core.attr.config & 0x10)) {
diff --git a/tools/perf/util/intlist.c b/tools/perf/util/intlist.c
index 84e5304e151a..934092199f89 100644
--- a/tools/perf/util/intlist.c
+++ b/tools/perf/util/intlist.c
@@ -13,7 +13,7 @@
static struct rb_node *intlist__node_new(struct rblist *rblist __maybe_unused,
const void *entry)
{
- int i = (int)((long)entry);
+ unsigned long i = (unsigned long)entry;
struct rb_node *rc = NULL;
struct int_node *node = malloc(sizeof(*node));
@@ -41,15 +41,20 @@ static void intlist__node_delete(struct rblist *rblist __maybe_unused,
static int intlist__node_cmp(struct rb_node *rb_node, const void *entry)
{
- int i = (int)((long)entry);
+ unsigned long i = (unsigned long)entry;
struct int_node *node = container_of(rb_node, struct int_node, rb_node);
- return node->i - i;
+ if (node->i > i)
+ return 1;
+ else if (node->i < i)
+ return -1;
+
+ return 0;
}
-int intlist__add(struct intlist *ilist, int i)
+int intlist__add(struct intlist *ilist, unsigned long i)
{
- return rblist__add_node(&ilist->rblist, (void *)((long)i));
+ return rblist__add_node(&ilist->rblist, (void *)i);
}
void intlist__remove(struct intlist *ilist, struct int_node *node)
@@ -58,7 +63,7 @@ void intlist__remove(struct intlist *ilist, struct int_node *node)
}
static struct int_node *__intlist__findnew(struct intlist *ilist,
- int i, bool create)
+ unsigned long i, bool create)
{
struct int_node *node = NULL;
struct rb_node *rb_node;
@@ -67,9 +72,9 @@ static struct int_node *__intlist__findnew(struct intlist *ilist,
return NULL;
if (create)
- rb_node = rblist__findnew(&ilist->rblist, (void *)((long)i));
+ rb_node = rblist__findnew(&ilist->rblist, (void *)i);
else
- rb_node = rblist__find(&ilist->rblist, (void *)((long)i));
+ rb_node = rblist__find(&ilist->rblist, (void *)i);
if (rb_node)
node = container_of(rb_node, struct int_node, rb_node);
@@ -77,12 +82,12 @@ static struct int_node *__intlist__findnew(struct intlist *ilist,
return node;
}
-struct int_node *intlist__find(struct intlist *ilist, int i)
+struct int_node *intlist__find(struct intlist *ilist, unsigned long i)
{
return __intlist__findnew(ilist, i, false);
}
-struct int_node *intlist__findnew(struct intlist *ilist, int i)
+struct int_node *intlist__findnew(struct intlist *ilist, unsigned long i)
{
return __intlist__findnew(ilist, i, true);
}
@@ -93,7 +98,7 @@ static int intlist__parse_list(struct intlist *ilist, const char *s)
int err;
do {
- long value = strtol(s, &sep, 10);
+ unsigned long value = strtol(s, &sep, 10);
err = -EINVAL;
if (*sep != ',' && *sep != '\0')
break;
diff --git a/tools/perf/util/intlist.h b/tools/perf/util/intlist.h
index 5c19ee001299..e336b174d0c7 100644
--- a/tools/perf/util/intlist.h
+++ b/tools/perf/util/intlist.h
@@ -9,7 +9,7 @@
struct int_node {
struct rb_node rb_node;
- int i;
+ unsigned long i;
void *priv;
};
@@ -21,13 +21,13 @@ struct intlist *intlist__new(const char *slist);
void intlist__delete(struct intlist *ilist);
void intlist__remove(struct intlist *ilist, struct int_node *in);
-int intlist__add(struct intlist *ilist, int i);
+int intlist__add(struct intlist *ilist, unsigned long i);
struct int_node *intlist__entry(const struct intlist *ilist, unsigned int idx);
-struct int_node *intlist__find(struct intlist *ilist, int i);
-struct int_node *intlist__findnew(struct intlist *ilist, int i);
+struct int_node *intlist__find(struct intlist *ilist, unsigned long i);
+struct int_node *intlist__findnew(struct intlist *ilist, unsigned long i);
-static inline bool intlist__has_entry(struct intlist *ilist, int i)
+static inline bool intlist__has_entry(struct intlist *ilist, unsigned long i)
{
return intlist__find(ilist, i) != NULL;
}
diff --git a/tools/perf/util/jit.h b/tools/perf/util/jit.h
index 6817ffc2a059..fb810e1b2de7 100644
--- a/tools/perf/util/jit.h
+++ b/tools/perf/util/jit.h
@@ -5,7 +5,7 @@
#include <data.h>
int jit_process(struct perf_session *session, struct perf_data *output,
- struct machine *machine, char *filename, pid_t pid, u64 *nbytes);
+ struct machine *machine, char *filename, pid_t pid, pid_t tid, u64 *nbytes);
int jit_inject_record(const char *filename);
diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c
index 055bab7a92b3..9760d8e7b386 100644
--- a/tools/perf/util/jitdump.c
+++ b/tools/perf/util/jitdump.c
@@ -18,6 +18,7 @@
#include "event.h"
#include "debug.h"
#include "evlist.h"
+#include "namespaces.h"
#include "symbol.h"
#include <elf.h>
@@ -35,6 +36,7 @@ struct jit_buf_desc {
struct perf_data *output;
struct perf_session *session;
struct machine *machine;
+ struct nsinfo *nsi;
union jr_entry *entry;
void *buf;
uint64_t sample_type;
@@ -72,7 +74,8 @@ struct jit_tool {
#define get_jit_tool(t) (container_of(tool, struct jit_tool, tool))
static int
-jit_emit_elf(char *filename,
+jit_emit_elf(struct jit_buf_desc *jd,
+ char *filename,
const char *sym,
uint64_t code_addr,
const void *code,
@@ -83,14 +86,18 @@ jit_emit_elf(char *filename,
uint32_t unwinding_header_size,
uint32_t unwinding_size)
{
- int ret, fd;
+ int ret, fd, saved_errno;
+ struct nscookie nsc;
if (verbose > 0)
fprintf(stderr, "write ELF image %s\n", filename);
+ nsinfo__mountns_enter(jd->nsi, &nsc);
fd = open(filename, O_CREAT|O_TRUNC|O_WRONLY, 0644);
+ saved_errno = errno;
+ nsinfo__mountns_exit(&nsc);
if (fd == -1) {
- pr_warning("cannot create jit ELF %s: %s\n", filename, strerror(errno));
+ pr_warning("cannot create jit ELF %s: %s\n", filename, strerror(saved_errno));
return -1;
}
@@ -99,8 +106,11 @@ jit_emit_elf(char *filename,
close(fd);
- if (ret)
- unlink(filename);
+ if (ret) {
+ nsinfo__mountns_enter(jd->nsi, &nsc);
+ unlink(filename);
+ nsinfo__mountns_exit(&nsc);
+ }
return ret;
}
@@ -134,12 +144,15 @@ static int
jit_open(struct jit_buf_desc *jd, const char *name)
{
struct jitheader header;
+ struct nscookie nsc;
struct jr_prefix *prefix;
ssize_t bs, bsz = 0;
void *n, *buf = NULL;
int ret, retval = -1;
+ nsinfo__mountns_enter(jd->nsi, &nsc);
jd->in = fopen(name, "r");
+ nsinfo__mountns_exit(&nsc);
if (!jd->in)
return -1;
@@ -367,6 +380,20 @@ jit_inject_event(struct jit_buf_desc *jd, union perf_event *event)
return 0;
}
+static pid_t jr_entry_pid(struct jit_buf_desc *jd, union jr_entry *jr)
+{
+ if (jd->nsi && jd->nsi->in_pidns)
+ return jd->nsi->tgid;
+ return jr->load.pid;
+}
+
+static pid_t jr_entry_tid(struct jit_buf_desc *jd, union jr_entry *jr)
+{
+ if (jd->nsi && jd->nsi->in_pidns)
+ return jd->nsi->pid;
+ return jr->load.tid;
+}
+
static uint64_t convert_timestamp(struct jit_buf_desc *jd, uint64_t timestamp)
{
struct perf_tsc_conversion tc;
@@ -402,14 +429,15 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr)
const char *sym;
uint64_t count;
int ret, csize, usize;
- pid_t pid, tid;
+ pid_t nspid, pid, tid;
struct {
u32 pid, tid;
u64 time;
} *id;
- pid = jr->load.pid;
- tid = jr->load.tid;
+ nspid = jr->load.pid;
+ pid = jr_entry_pid(jd, jr);
+ tid = jr_entry_tid(jd, jr);
csize = jr->load.code_size;
usize = jd->unwinding_mapped_size;
addr = jr->load.code_addr;
@@ -425,14 +453,14 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr)
filename = event->mmap2.filename;
size = snprintf(filename, PATH_MAX, "%s/jitted-%d-%" PRIu64 ".so",
jd->dir,
- pid,
+ nspid,
count);
size++; /* for \0 */
size = PERF_ALIGN(size, sizeof(u64));
uaddr = (uintptr_t)code;
- ret = jit_emit_elf(filename, sym, addr, (const void *)uaddr, csize, jd->debug_data, jd->nr_debug_entries,
+ ret = jit_emit_elf(jd, filename, sym, addr, (const void *)uaddr, csize, jd->debug_data, jd->nr_debug_entries,
jd->unwinding_data, jd->eh_frame_hdr_size, jd->unwinding_size);
if (jd->debug_data && jd->nr_debug_entries) {
@@ -451,7 +479,7 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr)
free(event);
return -1;
}
- if (stat(filename, &st))
+ if (nsinfo__stat(filename, &st, jd->nsi))
memset(&st, 0, sizeof(st));
event->mmap2.header.type = PERF_RECORD_MMAP2;
@@ -515,14 +543,15 @@ static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr)
int usize;
u16 idr_size;
int ret;
- pid_t pid, tid;
+ pid_t nspid, pid, tid;
struct {
u32 pid, tid;
u64 time;
} *id;
- pid = jr->move.pid;
- tid = jr->move.tid;
+ nspid = jr->load.pid;
+ pid = jr_entry_pid(jd, jr);
+ tid = jr_entry_tid(jd, jr);
usize = jd->unwinding_mapped_size;
idr_size = jd->machine->id_hdr_size;
@@ -536,12 +565,12 @@ static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr)
filename = event->mmap2.filename;
size = snprintf(filename, PATH_MAX, "%s/jitted-%d-%" PRIu64 ".so",
jd->dir,
- pid,
+ nspid,
jr->move.code_index);
size++; /* for \0 */
- if (stat(filename, &st))
+ if (nsinfo__stat(filename, &st, jd->nsi))
memset(&st, 0, sizeof(st));
size = PERF_ALIGN(size, sizeof(u64));
@@ -700,7 +729,7 @@ jit_inject(struct jit_buf_desc *jd, char *path)
* as captured in the RECORD_MMAP record
*/
static int
-jit_detect(char *mmap_name, pid_t pid)
+jit_detect(char *mmap_name, pid_t pid, struct nsinfo *nsi)
{
char *p;
char *end = NULL;
@@ -740,7 +769,7 @@ jit_detect(char *mmap_name, pid_t pid)
* pid does not match mmap pid
* pid==0 in system-wide mode (synthesized)
*/
- if (pid && pid2 != pid)
+ if (pid && pid2 != nsi->nstgid)
return -1;
/*
* validate suffix
@@ -782,16 +811,30 @@ jit_process(struct perf_session *session,
struct machine *machine,
char *filename,
pid_t pid,
+ pid_t tid,
u64 *nbytes)
{
+ struct thread *thread;
+ struct nsinfo *nsi;
struct evsel *first;
struct jit_buf_desc jd;
int ret;
+ thread = machine__findnew_thread(machine, pid, tid);
+ if (thread == NULL) {
+ pr_err("problem processing JIT mmap event, skipping it.\n");
+ return 0;
+ }
+
+ nsi = nsinfo__get(thread->nsinfo);
+ thread__put(thread);
+
/*
* first, detect marker mmap (i.e., the jitdump mmap)
*/
- if (jit_detect(filename, pid)) {
+ if (jit_detect(filename, pid, nsi)) {
+ nsinfo__put(nsi);
+
// Strip //anon* mmaps if we processed a jitdump for this pid
if (jit_has_pid(machine, pid) && (strncmp(filename, "//anon", 6) == 0))
return 1;
@@ -804,6 +847,7 @@ jit_process(struct perf_session *session,
jd.session = session;
jd.output = output;
jd.machine = machine;
+ jd.nsi = nsi;
/*
* track sample_type to compute id_all layout
@@ -821,5 +865,7 @@ jit_process(struct perf_session *session,
ret = 1;
}
+ nsinfo__put(jd.nsi);
+
return ret;
}
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 1e9d3f982b47..b5c2d8be4144 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -369,6 +369,15 @@ out:
return machine;
}
+struct machine *machines__find_guest(struct machines *machines, pid_t pid)
+{
+ struct machine *machine = machines__find(machines, pid);
+
+ if (!machine)
+ machine = machines__findnew(machines, DEFAULT_GUEST_KERNEL_ID);
+ return machine;
+}
+
void machines__process_guests(struct machines *machines,
machine__process_t process, void *data)
{
@@ -589,6 +598,24 @@ struct thread *machine__find_thread(struct machine *machine, pid_t pid,
return th;
}
+/*
+ * Threads are identified by pid and tid, and the idle task has pid == tid == 0.
+ * So here a single thread is created for that, but actually there is a separate
+ * idle task per cpu, so there should be one 'struct thread' per cpu, but there
+ * is only 1. That causes problems for some tools, requiring workarounds. For
+ * example get_idle_thread() in builtin-sched.c, or thread_stack__per_cpu().
+ */
+struct thread *machine__idle_thread(struct machine *machine)
+{
+ struct thread *thread = machine__findnew_thread(machine, 0, 0);
+
+ if (!thread || thread__set_comm(thread, "swapper", 0) ||
+ thread__set_namespaces(thread, 0, NULL))
+ pr_err("problem inserting idle task for machine pid %d\n", machine->pid);
+
+ return thread;
+}
+
struct comm *machine__thread_exec_comm(struct machine *machine,
struct thread *thread)
{
@@ -1599,7 +1626,8 @@ static int machine__process_extra_kernel_map(struct machine *machine,
}
static int machine__process_kernel_mmap_event(struct machine *machine,
- struct extra_kernel_map *xm)
+ struct extra_kernel_map *xm,
+ struct build_id *bid)
{
struct map *map;
enum dso_space_type dso_space;
@@ -1624,6 +1652,10 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
goto out_problem;
map->end = map->start + xm->end - xm->start;
+
+ if (build_id__is_defined(bid))
+ dso__set_build_id(map->dso, bid);
+
} else if (is_kernel_mmap) {
const char *symbol_name = (xm->name + strlen(machine->mmap_name));
/*
@@ -1681,6 +1713,9 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
machine__update_kernel_mmap(machine, xm->start, xm->end);
+ if (build_id__is_defined(bid))
+ dso__set_build_id(kernel, bid);
+
/*
* Avoid using a zero address (kptr_restrict) for the ref reloc
* symbol. Effectively having zero here means that at record
@@ -1718,11 +1753,17 @@ int machine__process_mmap2_event(struct machine *machine,
.ino = event->mmap2.ino,
.ino_generation = event->mmap2.ino_generation,
};
+ struct build_id __bid, *bid = NULL;
int ret = 0;
if (dump_trace)
perf_event__fprintf_mmap2(event, stdout);
+ if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) {
+ bid = &__bid;
+ build_id__init(bid, event->mmap2.build_id, event->mmap2.build_id_size);
+ }
+
if (sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL ||
sample->cpumode == PERF_RECORD_MISC_KERNEL) {
struct extra_kernel_map xm = {
@@ -1732,7 +1773,7 @@ int machine__process_mmap2_event(struct machine *machine,
};
strlcpy(xm.name, event->mmap2.filename, KMAP_NAME_LEN);
- ret = machine__process_kernel_mmap_event(machine, &xm);
+ ret = machine__process_kernel_mmap_event(machine, &xm, bid);
if (ret < 0)
goto out_problem;
return 0;
@@ -1746,7 +1787,7 @@ int machine__process_mmap2_event(struct machine *machine,
map = map__new(machine, event->mmap2.start,
event->mmap2.len, event->mmap2.pgoff,
&dso_id, event->mmap2.prot,
- event->mmap2.flags,
+ event->mmap2.flags, bid,
event->mmap2.filename, thread);
if (map == NULL)
@@ -1789,7 +1830,7 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event
};
strlcpy(xm.name, event->mmap.filename, KMAP_NAME_LEN);
- ret = machine__process_kernel_mmap_event(machine, &xm);
+ ret = machine__process_kernel_mmap_event(machine, &xm, NULL);
if (ret < 0)
goto out_problem;
return 0;
@@ -1805,7 +1846,7 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event
map = map__new(machine, event->mmap.start,
event->mmap.len, event->mmap.pgoff,
- NULL, prot, 0, event->mmap.filename, thread);
+ NULL, prot, 0, NULL, event->mmap.filename, thread);
if (map == NULL)
goto out_problem_map;
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 26368d3c1754..7377ed6efdf1 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -106,6 +106,7 @@ u8 machine__addr_cpumode(struct machine *machine, u8 cpumode, u64 addr);
struct thread *machine__find_thread(struct machine *machine, pid_t pid,
pid_t tid);
+struct thread *machine__idle_thread(struct machine *machine);
struct comm *machine__thread_exec_comm(struct machine *machine,
struct thread *thread);
@@ -162,6 +163,7 @@ struct machine *machines__add(struct machines *machines, pid_t pid,
struct machine *machines__find_host(struct machines *machines);
struct machine *machines__find(struct machines *machines, pid_t pid);
struct machine *machines__findnew(struct machines *machines, pid_t pid);
+struct machine *machines__find_guest(struct machines *machines, pid_t pid);
void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size);
void machines__set_comm_exec(struct machines *machines, bool comm_exec);
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index f44ede437dc7..fbc40a2c17d4 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -77,8 +77,7 @@ static inline bool replace_android_lib(const char *filename, char *newfilename)
if (strstarts(filename, "/system/lib/")) {
char *ndk, *app;
const char *arch;
- size_t ndk_length;
- size_t app_length;
+ int ndk_length, app_length;
ndk = getenv("NDK_ROOT");
app = getenv("APP_PLATFORM");
@@ -106,8 +105,8 @@ static inline bool replace_android_lib(const char *filename, char *newfilename)
if (new_length > PATH_MAX)
return false;
snprintf(newfilename, new_length,
- "%s/platforms/%s/arch-%s/usr/lib/%s",
- ndk, app, arch, libname);
+ "%.*s/platforms/%.*s/arch-%s/usr/lib/%s",
+ ndk_length, ndk, app_length, app, arch, libname);
return true;
}
@@ -130,8 +129,8 @@ void map__init(struct map *map, u64 start, u64 end, u64 pgoff, struct dso *dso)
struct map *map__new(struct machine *machine, u64 start, u64 len,
u64 pgoff, struct dso_id *id,
- u32 prot, u32 flags, char *filename,
- struct thread *thread)
+ u32 prot, u32 flags, struct build_id *bid,
+ char *filename, struct thread *thread)
{
struct map *map = malloc(sizeof(*map));
struct nsinfo *nsi = NULL;
@@ -194,6 +193,10 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
dso__set_loaded(dso);
}
dso->nsinfo = nsi;
+
+ if (build_id__is_defined(bid))
+ dso__set_build_id(dso, bid);
+
dso__put(dso);
}
return map;
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index b1c0686db1b7..9f32825c98d8 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -104,10 +104,11 @@ void map__init(struct map *map,
u64 start, u64 end, u64 pgoff, struct dso *dso);
struct dso_id;
+struct build_id;
struct map *map__new(struct machine *machine, u64 start, u64 len,
u64 pgoff, struct dso_id *id, u32 prot, u32 flags,
- char *filename, struct thread *thread);
+ struct build_id *bid, char *filename, struct thread *thread);
struct map *map__new2(u64 start, struct dso *dso);
void map__delete(struct map *map);
struct map *map__clone(struct map *map);
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index 19007e463b8a..f93a852ad838 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -56,6 +56,11 @@ char * __weak perf_mem_events__name(int i)
return (char *)e->name;
}
+__weak bool is_mem_loads_aux_event(struct evsel *leader __maybe_unused)
+{
+ return false;
+}
+
int perf_mem_events__parse(const char *str)
{
char *tok, *saveptr = NULL;
@@ -332,6 +337,29 @@ int perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
return l;
}
+int perf_mem__blk_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+ size_t l = 0;
+ u64 mask = PERF_MEM_BLK_NA;
+
+ sz -= 1; /* -1 for null termination */
+ out[0] = '\0';
+
+ if (mem_info)
+ mask = mem_info->data_src.mem_blk;
+
+ if (!mask || (mask & PERF_MEM_BLK_NA)) {
+ l += scnprintf(out + l, sz - l, " N/A");
+ return l;
+ }
+ if (mask & PERF_MEM_BLK_DATA)
+ l += scnprintf(out + l, sz - l, " Data");
+ if (mask & PERF_MEM_BLK_ADDR)
+ l += scnprintf(out + l, sz - l, " Addr");
+
+ return l;
+}
+
int perf_script__meminfo_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
{
int i = 0;
@@ -343,6 +371,8 @@ int perf_script__meminfo_scnprintf(char *out, size_t sz, struct mem_info *mem_in
i += perf_mem__tlb_scnprintf(out + i, sz - i, mem_info);
i += scnprintf(out + i, sz - i, "|LCK ");
i += perf_mem__lck_scnprintf(out + i, sz - i, mem_info);
+ i += scnprintf(out + i, sz - i, "|BLK ");
+ i += perf_mem__blk_scnprintf(out + i, sz - i, mem_info);
return i;
}
@@ -355,6 +385,7 @@ int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi)
u64 lvl = data_src->mem_lvl;
u64 snoop = data_src->mem_snoop;
u64 lock = data_src->mem_lock;
+ u64 blk = data_src->mem_blk;
/*
* Skylake might report unknown remote level via this
* bit, consider it when evaluating remote HITMs.
@@ -374,6 +405,9 @@ do { \
if (lock & P(LOCK, LOCKED)) stats->locks++;
+ if (blk & P(BLK, DATA)) stats->blk_data++;
+ if (blk & P(BLK, ADDR)) stats->blk_addr++;
+
if (op & P(OP, LOAD)) {
/* load */
stats->load++;
@@ -485,6 +519,8 @@ void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add)
stats->rmt_hit += add->rmt_hit;
stats->lcl_dram += add->lcl_dram;
stats->rmt_dram += add->rmt_dram;
+ stats->blk_data += add->blk_data;
+ stats->blk_addr += add->blk_addr;
stats->nomap += add->nomap;
stats->noparse += add->noparse;
}
diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h
index 5ef178278909..755cef7e0625 100644
--- a/tools/perf/util/mem-events.h
+++ b/tools/perf/util/mem-events.h
@@ -9,6 +9,7 @@
#include <linux/refcount.h>
#include <linux/perf_event.h>
#include "stat.h"
+#include "evsel.h"
struct perf_mem_event {
bool record;
@@ -39,6 +40,7 @@ int perf_mem_events__init(void);
char *perf_mem_events__name(int i);
struct perf_mem_event *perf_mem_events__ptr(int i);
+bool is_mem_loads_aux_event(struct evsel *leader);
void perf_mem_events__list(void);
@@ -47,6 +49,7 @@ int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
int perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
+int perf_mem__blk_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
int perf_script__meminfo_scnprintf(char *bf, size_t size, struct mem_info *mem_info);
@@ -76,6 +79,8 @@ struct c2c_stats {
u32 rmt_hit; /* count of loads with remote hit clean; */
u32 lcl_dram; /* count of loads miss to local DRAM */
u32 rmt_dram; /* count of loads miss to remote DRAM */
+ u32 blk_data; /* count of loads blocked by data */
+ u32 blk_addr; /* count of loads blocked by address conflict */
u32 nomap; /* count of load/stores with no phys adrs */
u32 noparse; /* count of unparsable data sources */
};
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index e6d3452031e5..26c990e32378 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -379,7 +379,7 @@ static int metricgroup__setup_events(struct list_head *groups,
metric_refs[i].metric_expr = ref->metric_expr;
i++;
}
- };
+ }
expr->metric_refs = metric_refs;
expr->metric_expr = m->metric_expr;
diff --git a/tools/perf/util/namespaces.c b/tools/perf/util/namespaces.c
index 285d6f30d912..608b20c72a5c 100644
--- a/tools/perf/util/namespaces.c
+++ b/tools/perf/util/namespaces.c
@@ -66,6 +66,7 @@ int nsinfo__init(struct nsinfo *nsi)
char spath[PATH_MAX];
char *newns = NULL;
char *statln = NULL;
+ char *nspid;
struct stat old_stat;
struct stat new_stat;
FILE *f = NULL;
@@ -112,8 +113,12 @@ int nsinfo__init(struct nsinfo *nsi)
}
if (strstr(statln, "NStgid:") != NULL) {
- nsi->nstgid = (pid_t)strtol(strrchr(statln, '\t'),
- NULL, 10);
+ nspid = strrchr(statln, '\t');
+ nsi->nstgid = (pid_t)strtol(nspid, NULL, 10);
+ /* If innermost tgid is not the first, process is in a different
+ * PID namespace.
+ */
+ nsi->in_pidns = (statln + sizeof("NStgid:") - 1) != nspid;
break;
}
}
@@ -140,6 +145,7 @@ struct nsinfo *nsinfo__new(pid_t pid)
nsi->tgid = pid;
nsi->nstgid = pid;
nsi->need_setns = false;
+ nsi->in_pidns = false;
/* Init may fail if the process exits while we're trying to look
* at its proc information. In that case, save the pid but
* don't try to enter the namespace.
@@ -166,6 +172,7 @@ struct nsinfo *nsinfo__copy(struct nsinfo *nsi)
nnsi->tgid = nsi->tgid;
nnsi->nstgid = nsi->nstgid;
nnsi->need_setns = nsi->need_setns;
+ nnsi->in_pidns = nsi->in_pidns;
if (nsi->mntns_path) {
nnsi->mntns_path = strdup(nsi->mntns_path);
if (!nnsi->mntns_path) {
@@ -280,3 +287,15 @@ char *nsinfo__realpath(const char *path, struct nsinfo *nsi)
return rpath;
}
+
+int nsinfo__stat(const char *filename, struct stat *st, struct nsinfo *nsi)
+{
+ int ret;
+ struct nscookie nsc;
+
+ nsinfo__mountns_enter(nsi, &nsc);
+ ret = stat(filename, st);
+ nsinfo__mountns_exit(&nsc);
+
+ return ret;
+}
diff --git a/tools/perf/util/namespaces.h b/tools/perf/util/namespaces.h
index 4b33f684eddd..ad9775db7b9c 100644
--- a/tools/perf/util/namespaces.h
+++ b/tools/perf/util/namespaces.h
@@ -8,6 +8,7 @@
#define __PERF_NAMESPACES_H
#include <sys/types.h>
+#include <sys/stat.h>
#include <linux/stddef.h>
#include <linux/perf_event.h>
#include <linux/refcount.h>
@@ -33,6 +34,7 @@ struct nsinfo {
pid_t tgid;
pid_t nstgid;
bool need_setns;
+ bool in_pidns;
char *mntns_path;
refcount_t refcnt;
};
@@ -55,6 +57,7 @@ void nsinfo__mountns_enter(struct nsinfo *nsi, struct nscookie *nc);
void nsinfo__mountns_exit(struct nscookie *nc);
char *nsinfo__realpath(const char *path, struct nsinfo *nsi);
+int nsinfo__stat(const char *filename, struct stat *st, struct nsinfo *nsi);
static inline void __nsinfo__zput(struct nsinfo **nsip)
{
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 9db5097317f4..0b36285a9435 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -356,6 +356,7 @@ bpf-output { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUT
cycles-ct |
cycles-t |
mem-loads |
+mem-loads-aux |
mem-stores |
topdown-[a-z-]+ |
tx-capacity-[a-z-]+ |
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index d5b6aff82f21..d57ac86ce7ca 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -89,6 +89,7 @@ static void inc_group_count(struct list_head *list,
%type <str> PE_EVENT_NAME
%type <str> PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE
%type <str> PE_DRV_CFG_TERM
+%type <str> event_pmu_name
%destructor { free ($$); } <str>
%type <term> event_term
%destructor { parse_events_term__delete ($$); } <term>
@@ -272,8 +273,11 @@ event_def: event_pmu |
event_legacy_raw sep_dc |
event_bpf_file
+event_pmu_name:
+PE_NAME | PE_PMU_EVENT_PRE
+
event_pmu:
-PE_NAME opt_pmu_config
+event_pmu_name opt_pmu_config
{
struct parse_events_state *parse_state = _parse_state;
struct parse_events_error *error = parse_state->error;
diff --git a/tools/perf/util/perf_api_probe.c b/tools/perf/util/perf_api_probe.c
index 3840d02f0f7b..829af17a0867 100644
--- a/tools/perf/util/perf_api_probe.c
+++ b/tools/perf/util/perf_api_probe.c
@@ -98,6 +98,11 @@ static void perf_probe_text_poke(struct evsel *evsel)
evsel->core.attr.text_poke = 1;
}
+static void perf_probe_build_id(struct evsel *evsel)
+{
+ evsel->core.attr.build_id = 1;
+}
+
bool perf_can_sample_identifier(void)
{
return perf_probe_api(perf_probe_sample_identifier);
@@ -172,3 +177,8 @@ bool perf_can_aux_sample(void)
return true;
}
+
+bool perf_can_record_build_id(void)
+{
+ return perf_probe_api(perf_probe_build_id);
+}
diff --git a/tools/perf/util/perf_api_probe.h b/tools/perf/util/perf_api_probe.h
index d5506a983a94..f12ca55f509a 100644
--- a/tools/perf/util/perf_api_probe.h
+++ b/tools/perf/util/perf_api_probe.h
@@ -11,5 +11,6 @@ bool perf_can_record_cpu_wide(void);
bool perf_can_record_switch_events(void);
bool perf_can_record_text_poke_events(void);
bool perf_can_sample_identifier(void);
+bool perf_can_record_build_id(void);
#endif // __PERF_API_PROBE_H
diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c
index fb0bb6684438..30481825515b 100644
--- a/tools/perf/util/perf_event_attr_fprintf.c
+++ b/tools/perf/util/perf_event_attr_fprintf.c
@@ -35,7 +35,8 @@ static void __p_sample_type(char *buf, size_t size, u64 value)
bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER),
bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC),
bit_name(WEIGHT), bit_name(PHYS_ADDR), bit_name(AUX),
- bit_name(CGROUP), bit_name(DATA_PAGE_SIZE),
+ bit_name(CGROUP), bit_name(DATA_PAGE_SIZE), bit_name(CODE_PAGE_SIZE),
+ bit_name(WEIGHT_STRUCT),
{ .name = NULL, }
};
#undef bit_name
@@ -134,6 +135,8 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
PRINT_ATTRf(bpf_event, p_unsigned);
PRINT_ATTRf(aux_output, p_unsigned);
PRINT_ATTRf(cgroup, p_unsigned);
+ PRINT_ATTRf(text_poke, p_unsigned);
+ PRINT_ATTRf(build_id, p_unsigned);
PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned);
PRINT_ATTRf(bp_type, p_unsigned);
diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h
index a45499126184..eeac181ebccf 100644
--- a/tools/perf/util/perf_regs.h
+++ b/tools/perf/util/perf_regs.h
@@ -33,6 +33,13 @@ extern const struct sample_reg sample_reg_masks[];
int perf_reg_value(u64 *valp, struct regs_dump *regs, int id);
+static inline const char *perf_reg_name(int id)
+{
+ const char *reg_name = __perf_reg_name(id);
+
+ return reg_name ?: "unknown";
+}
+
#else
#define PERF_REGS_MASK 0
#define PERF_REGS_MAX 0
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 8eae2afff71a..a9cff3a50ddf 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -894,6 +894,16 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
struct debuginfo *dinfo;
int ntevs, ret = 0;
+ /* Workaround for gcc #98776 issue.
+ * Perf failed to add kretprobe event with debuginfo of vmlinux which is
+ * compiled by gcc with -fpatchable-function-entry option enabled. The
+ * same issue with kernel module. The retprobe doesn`t need debuginfo.
+ * This workaround solution use map to query the probe function address
+ * for retprobe event.
+ */
+ if (pev->point.retprobe)
+ return 0;
+
dinfo = open_debuginfo(pev->target, pev->nsi, !need_dwarf);
if (!dinfo) {
if (need_dwarf)
@@ -1074,7 +1084,7 @@ static int __show_line_range(struct line_range *lr, const char *module,
}
intlist__for_each_entry(ln, lr->line_list) {
- for (; ln->i > l; l++) {
+ for (; ln->i > (unsigned long)l; l++) {
ret = show_one_line(fp, l - lr->offset);
if (ret < 0)
goto end;
diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c
index bbecb449ea94..52273542e6ef 100644
--- a/tools/perf/util/probe-file.c
+++ b/tools/perf/util/probe-file.c
@@ -794,6 +794,8 @@ static char *synthesize_sdt_probe_command(struct sdt_note *note,
char *ret = NULL;
int i, args_count, err;
unsigned long long ref_ctr_offset;
+ char *arg;
+ int arg_idx = 0;
if (strbuf_init(&buf, 32) < 0)
return NULL;
@@ -818,11 +820,43 @@ static char *synthesize_sdt_probe_command(struct sdt_note *note,
if (args == NULL)
goto error;
- for (i = 0; i < args_count; ++i) {
- if (synthesize_sdt_probe_arg(&buf, i, args[i]) < 0) {
+ for (i = 0; i < args_count; ) {
+ /*
+ * FIXUP: Arm64 ELF section '.note.stapsdt' uses string
+ * format "-4@[sp, NUM]" if a probe is to access data in
+ * the stack, e.g. below is an example for the SDT
+ * Arguments:
+ *
+ * Arguments: -4@[sp, 12] -4@[sp, 8] -4@[sp, 4]
+ *
+ * Since the string introduces an extra space character
+ * in the middle of square brackets, the argument is
+ * divided into two items. Fixup for this case, if an
+ * item contains sub string "[sp,", need to concatenate
+ * the two items.
+ */
+ if (strstr(args[i], "[sp,") && (i+1) < args_count) {
+ err = asprintf(&arg, "%s %s", args[i], args[i+1]);
+ i += 2;
+ } else {
+ err = asprintf(&arg, "%s", args[i]);
+ i += 1;
+ }
+
+ /* Failed to allocate memory */
+ if (err < 0) {
argv_free(args);
goto error;
}
+
+ if (synthesize_sdt_probe_arg(&buf, arg_idx, arg) < 0) {
+ free(arg);
+ argv_free(args);
+ goto error;
+ }
+
+ free(arg);
+ arg_idx++;
}
argv_free(args);
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 76dd349aa48d..1b118c9c86a6 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -1187,8 +1187,10 @@ static int debuginfo__find_probe_location(struct debuginfo *dbg,
while (!dwarf_nextcu(dbg->dbg, off, &noff, &cuhl, NULL, NULL, NULL)) {
/* Get the DIE(Debugging Information Entry) of this CU */
diep = dwarf_offdie(dbg->dbg, off + cuhl, &pf->cu_die);
- if (!diep)
+ if (!diep) {
+ off = noff;
continue;
+ }
/* Check if target file is included. */
if (pp->file)
@@ -1949,8 +1951,10 @@ int debuginfo__find_line_range(struct debuginfo *dbg, struct line_range *lr)
/* Get the DIE(Debugging Information Entry) of this CU */
diep = dwarf_offdie(dbg->dbg, off + cuhl, &lf.cu_die);
- if (!diep)
+ if (!diep) {
+ off = noff;
continue;
+ }
/* Check if target file is included. */
if (lr->file)
diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources
index a9d9c142eb7c..845dd46e3c61 100644
--- a/tools/perf/util/python-ext-sources
+++ b/tools/perf/util/python-ext-sources
@@ -10,6 +10,7 @@ util/python.c
util/cap.c
util/evlist.c
util/evsel.c
+util/evsel_fprintf.c
util/perf_event_attr_fprintf.c
util/cpumap.c
util/memswap.c
@@ -35,3 +36,4 @@ util/symbol_fprintf.c
util/units.c
util/affinity.c
util/rwsem.c
+util/hashmap.c
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index cc5ade85a33f..278abecb5bdf 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -80,6 +80,27 @@ int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp,
}
/*
+ * XXX: All these evsel destructors need some better mechanism, like a linked
+ * list of destructors registered when the relevant code indeed is used instead
+ * of having more and more calls in perf_evsel__delete(). -- acme
+ *
+ * For now, add some more:
+ *
+ * Not to drag the BPF bandwagon...
+ */
+void bpf_counter__destroy(struct evsel *evsel);
+int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd);
+
+void bpf_counter__destroy(struct evsel *evsel __maybe_unused)
+{
+}
+
+int bpf_counter__install_pe(struct evsel *evsel __maybe_unused, int cpu __maybe_unused, int fd __maybe_unused)
+{
+ return 0;
+}
+
+/*
* Support debug printing even though util/debug.c is not linked. That means
* implementing 'verbose' and 'eprintf'.
*/
diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
index e70c9dd04567..f99852d54b14 100644
--- a/tools/perf/util/record.c
+++ b/tools/perf/util/record.c
@@ -15,6 +15,8 @@
#include "record.h"
#include "../perf-sys.h"
#include "topdown.h"
+#include "map_symbol.h"
+#include "mem-events.h"
/*
* evsel__config_leader_sampling() uses special rules for leader sampling.
@@ -25,7 +27,8 @@ static struct evsel *evsel__read_sampler(struct evsel *evsel, struct evlist *evl
{
struct evsel *leader = evsel->leader;
- if (evsel__is_aux_event(leader) || arch_topdown_sample_read(leader)) {
+ if (evsel__is_aux_event(leader) || arch_topdown_sample_read(leader) ||
+ is_mem_loads_aux_event(leader)) {
evlist__for_each_entry(evlist, evsel) {
if (evsel->leader == leader && evsel != evsel->leader)
return evsel;
@@ -201,10 +204,10 @@ static int record_opts__config_freq(struct record_opts *opts)
* Default frequency is over current maximum.
*/
if (max_rate < opts->freq) {
- pr_warning("Lowering default frequency rate to %u.\n"
+ pr_warning("Lowering default frequency rate from %u to %u.\n"
"Please consider tweaking "
"/proc/sys/kernel/perf_event_max_sample_rate.\n",
- max_rate);
+ opts->freq, max_rate);
opts->freq = max_rate;
}
diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h
index 694b351dcd27..68f471d9a88b 100644
--- a/tools/perf/util/record.h
+++ b/tools/perf/util/record.h
@@ -23,6 +23,7 @@ struct record_opts {
bool sample_address;
bool sample_phys_addr;
bool sample_data_page_size;
+ bool sample_code_page_size;
bool sample_weight;
bool sample_time;
bool sample_time_set;
@@ -50,6 +51,7 @@ struct record_opts {
bool no_bpf_event;
bool kcore;
bool text_poke;
+ bool build_id;
unsigned int freq;
unsigned int mmap_pages;
unsigned int auxtrace_mmap_pages;
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 25adbcce0281..859832a82496 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -593,10 +593,13 @@ static void perf_event__mmap2_swap(union perf_event *event,
event->mmap2.start = bswap_64(event->mmap2.start);
event->mmap2.len = bswap_64(event->mmap2.len);
event->mmap2.pgoff = bswap_64(event->mmap2.pgoff);
- event->mmap2.maj = bswap_32(event->mmap2.maj);
- event->mmap2.min = bswap_32(event->mmap2.min);
- event->mmap2.ino = bswap_64(event->mmap2.ino);
- event->mmap2.ino_generation = bswap_64(event->mmap2.ino_generation);
+
+ if (!(event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID)) {
+ event->mmap2.maj = bswap_32(event->mmap2.maj);
+ event->mmap2.min = bswap_32(event->mmap2.min);
+ event->mmap2.ino = bswap_64(event->mmap2.ino);
+ event->mmap2.ino_generation = bswap_64(event->mmap2.ino_generation);
+ }
if (sample_id_all) {
void *data = &event->mmap2.filename;
@@ -1297,8 +1300,12 @@ static void dump_sample(struct evsel *evsel, union perf_event *event,
if (sample_type & PERF_SAMPLE_STACK_USER)
stack_user__printf(&sample->user_stack);
- if (sample_type & PERF_SAMPLE_WEIGHT)
- printf("... weight: %" PRIu64 "\n", sample->weight);
+ if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
+ printf("... weight: %" PRIu64 "", sample->weight);
+ if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT)
+ printf(",0x%"PRIx16"", sample->ins_lat);
+ printf("\n");
+ }
if (sample_type & PERF_SAMPLE_DATA_SRC)
printf(" . data_src: 0x%"PRIx64"\n", sample->data_src);
@@ -1309,6 +1316,9 @@ static void dump_sample(struct evsel *evsel, union perf_event *event,
if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)
printf(" .. data page size: %s\n", get_page_size_name(sample->data_page_size, str));
+ if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)
+ printf(" .. code page size: %s\n", get_page_size_name(sample->code_page_size, str));
+
if (sample_type & PERF_SAMPLE_TRANSACTION)
printf("... transaction: %" PRIx64 "\n", sample->transaction);
@@ -1346,8 +1356,6 @@ static struct machine *machines__find_for_cpumode(struct machines *machines,
union perf_event *event,
struct perf_sample *sample)
{
- struct machine *machine;
-
if (perf_guest &&
((sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ||
(sample->cpumode == PERF_RECORD_MISC_GUEST_USER))) {
@@ -1359,10 +1367,7 @@ static struct machine *machines__find_for_cpumode(struct machines *machines,
else
pid = sample->pid;
- machine = machines__find(machines, pid);
- if (!machine)
- machine = machines__findnew(machines, DEFAULT_GUEST_KERNEL_ID);
- return machine;
+ return machines__find_guest(machines, pid);
}
return &machines->host;
@@ -1784,32 +1789,13 @@ struct thread *perf_session__findnew(struct perf_session *session, pid_t pid)
return machine__findnew_thread(&session->machines.host, -1, pid);
}
-/*
- * Threads are identified by pid and tid, and the idle task has pid == tid == 0.
- * So here a single thread is created for that, but actually there is a separate
- * idle task per cpu, so there should be one 'struct thread' per cpu, but there
- * is only 1. That causes problems for some tools, requiring workarounds. For
- * example get_idle_thread() in builtin-sched.c, or thread_stack__per_cpu().
- */
int perf_session__register_idle_thread(struct perf_session *session)
{
- struct thread *thread;
- int err = 0;
-
- thread = machine__findnew_thread(&session->machines.host, 0, 0);
- if (thread == NULL || thread__set_comm(thread, "swapper", 0)) {
- pr_err("problem inserting idle task.\n");
- err = -1;
- }
+ struct thread *thread = machine__idle_thread(&session->machines.host);
- if (thread == NULL || thread__set_namespaces(thread, 0, NULL)) {
- pr_err("problem inserting idle task.\n");
- err = -1;
- }
-
- /* machine__findnew_thread() got the thread, so put it */
+ /* machine__idle_thread() got the thread, so put it */
thread__put(thread);
- return err;
+ return thread ? 0 : -1;
}
static void
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
index c5e3e9a68162..483f05004e68 100644
--- a/tools/perf/util/setup.py
+++ b/tools/perf/util/setup.py
@@ -43,7 +43,7 @@ class install_lib(_install_lib):
cflags = getenv('CFLAGS', '').split()
# switch off several checks (need to be at the end of cflags list)
-cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter', '-Wno-redundant-decls' ]
+cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter', '-Wno-redundant-decls', '-DPYTHON_PERF' ]
if not cc_is_clang:
cflags += ['-Wno-cast-function-type' ]
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 80907bc32683..552b590485bf 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -36,7 +36,7 @@ const char default_parent_pattern[] = "^sys_|^do_page_fault";
const char *parent_pattern = default_parent_pattern;
const char *default_sort_order = "comm,dso,symbol";
const char default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles";
-const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked";
+const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat";
const char default_top_sort_order[] = "dso,symbol";
const char default_diff_sort_order[] = "dso,symbol";
const char default_tracepoint_sort_order[] = "trace";
@@ -1365,6 +1365,49 @@ struct sort_entry sort_global_weight = {
.se_width_idx = HISTC_GLOBAL_WEIGHT,
};
+static u64 he_ins_lat(struct hist_entry *he)
+{
+ return he->stat.nr_events ? he->stat.ins_lat / he->stat.nr_events : 0;
+}
+
+static int64_t
+sort__local_ins_lat_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ return he_ins_lat(left) - he_ins_lat(right);
+}
+
+static int hist_entry__local_ins_lat_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ return repsep_snprintf(bf, size, "%-*u", width, he_ins_lat(he));
+}
+
+struct sort_entry sort_local_ins_lat = {
+ .se_header = "Local INSTR Latency",
+ .se_cmp = sort__local_ins_lat_cmp,
+ .se_snprintf = hist_entry__local_ins_lat_snprintf,
+ .se_width_idx = HISTC_LOCAL_INS_LAT,
+};
+
+static int64_t
+sort__global_ins_lat_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ return left->stat.ins_lat - right->stat.ins_lat;
+}
+
+static int hist_entry__global_ins_lat_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ return repsep_snprintf(bf, size, "%-*u", width, he->stat.ins_lat);
+}
+
+struct sort_entry sort_global_ins_lat = {
+ .se_header = "INSTR Latency",
+ .se_cmp = sort__global_ins_lat_cmp,
+ .se_snprintf = hist_entry__global_ins_lat_snprintf,
+ .se_width_idx = HISTC_GLOBAL_INS_LAT,
+};
+
struct sort_entry sort_mem_daddr_sym = {
.se_header = "Data Symbol",
.se_cmp = sort__daddr_cmp,
@@ -1422,6 +1465,41 @@ struct sort_entry sort_mem_dcacheline = {
};
static int64_t
+sort__blocked_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ union perf_mem_data_src data_src_l;
+ union perf_mem_data_src data_src_r;
+
+ if (left->mem_info)
+ data_src_l = left->mem_info->data_src;
+ else
+ data_src_l.mem_blk = PERF_MEM_BLK_NA;
+
+ if (right->mem_info)
+ data_src_r = right->mem_info->data_src;
+ else
+ data_src_r.mem_blk = PERF_MEM_BLK_NA;
+
+ return (int64_t)(data_src_r.mem_blk - data_src_l.mem_blk);
+}
+
+static int hist_entry__blocked_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ char out[16];
+
+ perf_mem__blk_scnprintf(out, sizeof(out), he->mem_info);
+ return repsep_snprintf(bf, size, "%.*s", width, out);
+}
+
+struct sort_entry sort_mem_blocked = {
+ .se_header = "Blocked",
+ .se_cmp = sort__blocked_cmp,
+ .se_snprintf = hist_entry__blocked_snprintf,
+ .se_width_idx = HISTC_MEM_BLOCKED,
+};
+
+static int64_t
sort__phys_daddr_cmp(struct hist_entry *left, struct hist_entry *right)
{
uint64_t l = 0, r = 0;
@@ -1492,6 +1570,31 @@ struct sort_entry sort_mem_data_page_size = {
};
static int64_t
+sort__code_page_size_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ uint64_t l = left->code_page_size;
+ uint64_t r = right->code_page_size;
+
+ return (int64_t)(r - l);
+}
+
+static int hist_entry__code_page_size_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ char str[PAGE_SIZE_NAME_LEN];
+
+ return repsep_snprintf(bf, size, "%-*s", width,
+ get_page_size_name(he->code_page_size, str));
+}
+
+struct sort_entry sort_code_page_size = {
+ .se_header = "Code Page Size",
+ .se_cmp = sort__code_page_size_cmp,
+ .se_snprintf = hist_entry__code_page_size_snprintf,
+ .se_width_idx = HISTC_CODE_PAGE_SIZE,
+};
+
+static int64_t
sort__abort_cmp(struct hist_entry *left, struct hist_entry *right)
{
if (!left->branch_info || !right->branch_info)
@@ -1735,6 +1838,9 @@ static struct sort_dimension common_sort_dimensions[] = {
DIM(SORT_CGROUP_ID, "cgroup_id", sort_cgroup_id),
DIM(SORT_SYM_IPC_NULL, "ipc_null", sort_sym_ipc_null),
DIM(SORT_TIME, "time", sort_time),
+ DIM(SORT_CODE_PAGE_SIZE, "code_page_size", sort_code_page_size),
+ DIM(SORT_LOCAL_INS_LAT, "local_ins_lat", sort_local_ins_lat),
+ DIM(SORT_GLOBAL_INS_LAT, "ins_lat", sort_global_ins_lat),
};
#undef DIM
@@ -1770,6 +1876,7 @@ static struct sort_dimension memory_sort_dimensions[] = {
DIM(SORT_MEM_DCACHELINE, "dcacheline", sort_mem_dcacheline),
DIM(SORT_MEM_PHYS_DADDR, "phys_daddr", sort_mem_phys_daddr),
DIM(SORT_MEM_DATA_PAGE_SIZE, "data_page_size", sort_mem_data_page_size),
+ DIM(SORT_MEM_BLOCKED, "blocked", sort_mem_blocked),
};
#undef DIM
@@ -3033,7 +3140,7 @@ int output_field_add(struct perf_hpp_list *list, char *tok)
if (strncasecmp(tok, sd->name, strlen(tok)))
continue;
- if (sort__mode != SORT_MODE__MEMORY)
+ if (sort__mode != SORT_MODE__BRANCH)
return -EINVAL;
return __sort_dimension__add_output(list, sd);
@@ -3045,7 +3152,7 @@ int output_field_add(struct perf_hpp_list *list, char *tok)
if (strncasecmp(tok, sd->name, strlen(tok)))
continue;
- if (sort__mode != SORT_MODE__BRANCH)
+ if (sort__mode != SORT_MODE__MEMORY)
return -EINVAL;
return __sort_dimension__add_output(list, sd);
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index e50f2b695bc4..63f67a3f3630 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -50,6 +50,7 @@ struct he_stat {
u64 period_guest_sys;
u64 period_guest_us;
u64 weight;
+ u64 ins_lat;
u32 nr_events;
};
@@ -106,6 +107,7 @@ struct hist_entry {
u64 transaction;
s32 socket;
s32 cpu;
+ u64 code_page_size;
u8 cpumode;
u8 depth;
@@ -229,6 +231,9 @@ enum sort_type {
SORT_CGROUP_ID,
SORT_SYM_IPC_NULL,
SORT_TIME,
+ SORT_CODE_PAGE_SIZE,
+ SORT_LOCAL_INS_LAT,
+ SORT_GLOBAL_INS_LAT,
/* branch stack specific sort keys */
__SORT_BRANCH_STACK,
@@ -256,6 +261,7 @@ enum sort_type {
SORT_MEM_IADDR_SYMBOL,
SORT_MEM_PHYS_DADDR,
SORT_MEM_DATA_PAGE_SIZE,
+ SORT_MEM_BLOCKED,
};
/*
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 583ae4f09c5d..7f09cdaf5b60 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -983,7 +983,7 @@ static void print_interval(struct perf_stat_config *config,
if (config->interval_clear)
puts(CONSOLE_CLEAR);
- sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, config->csv_sep);
+ sprintf(prefix, "%6lu.%09lu%s", (unsigned long) ts->tv_sec, ts->tv_nsec, config->csv_sep);
if ((num_print_interval == 0 && !config->csv_output) || config->interval_clear) {
switch (config->aggr_mode) {
@@ -1045,7 +1045,9 @@ static void print_header(struct perf_stat_config *config,
if (!config->csv_output) {
fprintf(output, "\n");
fprintf(output, " Performance counter stats for ");
- if (_target->system_wide)
+ if (_target->bpf_str)
+ fprintf(output, "\'BPF program(s) %s", _target->bpf_str);
+ else if (_target->system_wide)
fprintf(output, "\'system wide");
else if (_target->cpu_list)
fprintf(output, "\'CPU(s) %s", _target->cpu_list);
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 12eafd12a693..6ccf21a72f06 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -273,6 +273,18 @@ void perf_stat__update_shadow_stats(struct evsel *counter, u64 count,
else if (perf_stat_evsel__is(counter, TOPDOWN_BE_BOUND))
update_runtime_stat(st, STAT_TOPDOWN_BE_BOUND,
cpu, count, &rsd);
+ else if (perf_stat_evsel__is(counter, TOPDOWN_HEAVY_OPS))
+ update_runtime_stat(st, STAT_TOPDOWN_HEAVY_OPS,
+ cpu, count, &rsd);
+ else if (perf_stat_evsel__is(counter, TOPDOWN_BR_MISPREDICT))
+ update_runtime_stat(st, STAT_TOPDOWN_BR_MISPREDICT,
+ cpu, count, &rsd);
+ else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_LAT))
+ update_runtime_stat(st, STAT_TOPDOWN_FETCH_LAT,
+ cpu, count, &rsd);
+ else if (perf_stat_evsel__is(counter, TOPDOWN_MEM_BOUND))
+ update_runtime_stat(st, STAT_TOPDOWN_MEM_BOUND,
+ cpu, count, &rsd);
else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT,
cpu, count, &rsd);
@@ -1174,6 +1186,86 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
color = PERF_COLOR_RED;
print_metric(config, ctxp, color, "%8.1f%%", "bad speculation",
bad_spec * 100.);
+ } else if (perf_stat_evsel__is(evsel, TOPDOWN_HEAVY_OPS) &&
+ full_td(cpu, st, &rsd) && (config->topdown_level > 1)) {
+ double retiring = td_metric_ratio(cpu,
+ STAT_TOPDOWN_RETIRING, st,
+ &rsd);
+ double heavy_ops = td_metric_ratio(cpu,
+ STAT_TOPDOWN_HEAVY_OPS, st,
+ &rsd);
+ double light_ops = retiring - heavy_ops;
+
+ if (retiring > 0.7 && heavy_ops > 0.1)
+ color = PERF_COLOR_GREEN;
+ print_metric(config, ctxp, color, "%8.1f%%", "heavy operations",
+ heavy_ops * 100.);
+ if (retiring > 0.7 && light_ops > 0.6)
+ color = PERF_COLOR_GREEN;
+ else
+ color = NULL;
+ print_metric(config, ctxp, color, "%8.1f%%", "light operations",
+ light_ops * 100.);
+ } else if (perf_stat_evsel__is(evsel, TOPDOWN_BR_MISPREDICT) &&
+ full_td(cpu, st, &rsd) && (config->topdown_level > 1)) {
+ double bad_spec = td_metric_ratio(cpu,
+ STAT_TOPDOWN_BAD_SPEC, st,
+ &rsd);
+ double br_mis = td_metric_ratio(cpu,
+ STAT_TOPDOWN_BR_MISPREDICT, st,
+ &rsd);
+ double m_clears = bad_spec - br_mis;
+
+ if (bad_spec > 0.1 && br_mis > 0.05)
+ color = PERF_COLOR_RED;
+ print_metric(config, ctxp, color, "%8.1f%%", "branch mispredict",
+ br_mis * 100.);
+ if (bad_spec > 0.1 && m_clears > 0.05)
+ color = PERF_COLOR_RED;
+ else
+ color = NULL;
+ print_metric(config, ctxp, color, "%8.1f%%", "machine clears",
+ m_clears * 100.);
+ } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_LAT) &&
+ full_td(cpu, st, &rsd) && (config->topdown_level > 1)) {
+ double fe_bound = td_metric_ratio(cpu,
+ STAT_TOPDOWN_FE_BOUND, st,
+ &rsd);
+ double fetch_lat = td_metric_ratio(cpu,
+ STAT_TOPDOWN_FETCH_LAT, st,
+ &rsd);
+ double fetch_bw = fe_bound - fetch_lat;
+
+ if (fe_bound > 0.2 && fetch_lat > 0.15)
+ color = PERF_COLOR_RED;
+ print_metric(config, ctxp, color, "%8.1f%%", "fetch latency",
+ fetch_lat * 100.);
+ if (fe_bound > 0.2 && fetch_bw > 0.1)
+ color = PERF_COLOR_RED;
+ else
+ color = NULL;
+ print_metric(config, ctxp, color, "%8.1f%%", "fetch bandwidth",
+ fetch_bw * 100.);
+ } else if (perf_stat_evsel__is(evsel, TOPDOWN_MEM_BOUND) &&
+ full_td(cpu, st, &rsd) && (config->topdown_level > 1)) {
+ double be_bound = td_metric_ratio(cpu,
+ STAT_TOPDOWN_BE_BOUND, st,
+ &rsd);
+ double mem_bound = td_metric_ratio(cpu,
+ STAT_TOPDOWN_MEM_BOUND, st,
+ &rsd);
+ double core_bound = be_bound - mem_bound;
+
+ if (be_bound > 0.2 && mem_bound > 0.2)
+ color = PERF_COLOR_RED;
+ print_metric(config, ctxp, color, "%8.1f%%", "memory bound",
+ mem_bound * 100.);
+ if (be_bound > 0.2 && core_bound > 0.1)
+ color = PERF_COLOR_RED;
+ else
+ color = NULL;
+ print_metric(config, ctxp, color, "%8.1f%%", "Core bound",
+ core_bound * 100.);
} else if (evsel->metric_expr) {
generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL,
evsel->name, evsel->metric_name, NULL, 1, cpu, out, st);
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 8ce1479c98f0..c400f8dde017 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -13,6 +13,7 @@
#include "evlist.h"
#include "evsel.h"
#include "thread_map.h"
+#include "hashmap.h"
#include <linux/zalloc.h>
void update_stats(struct stats *stats, u64 val)
@@ -99,6 +100,10 @@ static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = {
ID(TOPDOWN_BAD_SPEC, topdown-bad-spec),
ID(TOPDOWN_FE_BOUND, topdown-fe-bound),
ID(TOPDOWN_BE_BOUND, topdown-be-bound),
+ ID(TOPDOWN_HEAVY_OPS, topdown-heavy-ops),
+ ID(TOPDOWN_BR_MISPREDICT, topdown-br-mispredict),
+ ID(TOPDOWN_FETCH_LAT, topdown-fetch-lat),
+ ID(TOPDOWN_MEM_BOUND, topdown-mem-bound),
ID(SMI_NUM, msr/smi/),
ID(APERF, msr/aperf/),
};
@@ -273,18 +278,29 @@ void evlist__save_aggr_prev_raw_counts(struct evlist *evlist)
}
}
-static void zero_per_pkg(struct evsel *counter)
+static size_t pkg_id_hash(const void *__key, void *ctx __maybe_unused)
{
- if (counter->per_pkg_mask)
- memset(counter->per_pkg_mask, 0, cpu__max_cpu());
+ uint64_t *key = (uint64_t *) __key;
+
+ return *key & 0xffffffff;
+}
+
+static bool pkg_id_equal(const void *__key1, const void *__key2,
+ void *ctx __maybe_unused)
+{
+ uint64_t *key1 = (uint64_t *) __key1;
+ uint64_t *key2 = (uint64_t *) __key2;
+
+ return *key1 == *key2;
}
static int check_per_pkg(struct evsel *counter,
struct perf_counts_values *vals, int cpu, bool *skip)
{
- unsigned long *mask = counter->per_pkg_mask;
+ struct hashmap *mask = counter->per_pkg_mask;
struct perf_cpu_map *cpus = evsel__cpus(counter);
- int s;
+ int s, d, ret = 0;
+ uint64_t *key;
*skip = false;
@@ -295,7 +311,7 @@ static int check_per_pkg(struct evsel *counter,
return 0;
if (!mask) {
- mask = zalloc(cpu__max_cpu());
+ mask = hashmap__new(pkg_id_hash, pkg_id_equal, NULL);
if (!mask)
return -ENOMEM;
@@ -317,8 +333,25 @@ static int check_per_pkg(struct evsel *counter,
if (s < 0)
return -1;
- *skip = test_and_set_bit(s, mask) == 1;
- return 0;
+ /*
+ * On multi-die system, die_id > 0. On no-die system, die_id = 0.
+ * We use hashmap(socket, die) to check the used socket+die pair.
+ */
+ d = cpu_map__get_die(cpus, cpu, NULL).die;
+ if (d < 0)
+ return -1;
+
+ key = malloc(sizeof(*key));
+ if (!key)
+ return -ENOMEM;
+
+ *key = (uint64_t)d << 32 | s;
+ if (hashmap__find(mask, (void *)key, NULL))
+ *skip = true;
+ else
+ ret = hashmap__add(mask, (void *)key, (void *)1);
+
+ return ret;
}
static int
@@ -418,7 +451,7 @@ int perf_stat_process_counter(struct perf_stat_config *config,
}
if (counter->per_pkg)
- zero_per_pkg(counter);
+ evsel__zero_per_pkg(counter);
ret = process_counter_maps(config, counter);
if (ret)
@@ -527,7 +560,7 @@ int create_perf_stat_counter(struct evsel *evsel,
if (leader->core.nr_members > 1)
attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP;
- attr->inherit = !config->no_inherit;
+ attr->inherit = !config->no_inherit && list_empty(&evsel->bpf_counter_list);
/*
* Some events get initialized with sample_(period/type) set,
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index b5369730b4a2..d85c292148bb 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -33,6 +33,10 @@ enum perf_stat_evsel_id {
PERF_STAT_EVSEL_ID__TOPDOWN_BAD_SPEC,
PERF_STAT_EVSEL_ID__TOPDOWN_FE_BOUND,
PERF_STAT_EVSEL_ID__TOPDOWN_BE_BOUND,
+ PERF_STAT_EVSEL_ID__TOPDOWN_HEAVY_OPS,
+ PERF_STAT_EVSEL_ID__TOPDOWN_BR_MISPREDICT,
+ PERF_STAT_EVSEL_ID__TOPDOWN_FETCH_LAT,
+ PERF_STAT_EVSEL_ID__TOPDOWN_MEM_BOUND,
PERF_STAT_EVSEL_ID__SMI_NUM,
PERF_STAT_EVSEL_ID__APERF,
PERF_STAT_EVSEL_ID__MAX,
@@ -91,6 +95,10 @@ enum stat_type {
STAT_TOPDOWN_BAD_SPEC,
STAT_TOPDOWN_FE_BOUND,
STAT_TOPDOWN_BE_BOUND,
+ STAT_TOPDOWN_HEAVY_OPS,
+ STAT_TOPDOWN_BR_MISPREDICT,
+ STAT_TOPDOWN_FETCH_LAT,
+ STAT_TOPDOWN_MEM_BOUND,
STAT_SMI_NUM,
STAT_APERF,
STAT_MAX
@@ -148,6 +156,7 @@ struct perf_stat_config {
int ctl_fd_ack;
bool ctl_fd_close;
const char *cgroup_list;
+ unsigned int topdown_level;
};
void perf_stat__set_big_num(int set);
diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c
index 52603876c548..f6d90cdd9225 100644
--- a/tools/perf/util/string.c
+++ b/tools/perf/util/string.c
@@ -293,3 +293,12 @@ char *strdup_esc(const char *str)
return ret;
}
+
+unsigned int hex(char c)
+{
+ if (c >= '0' && c <= '9')
+ return c - '0';
+ if (c >= 'a' && c <= 'f')
+ return c - 'a' + 10;
+ return c - 'A' + 10;
+}
diff --git a/tools/perf/util/string2.h b/tools/perf/util/string2.h
index 73df616ced43..56c30fef9682 100644
--- a/tools/perf/util/string2.h
+++ b/tools/perf/util/string2.h
@@ -38,4 +38,6 @@ char *asprintf__tp_filter_pids(size_t npids, pid_t *pids);
char *strpbrk_esc(char *str, const char *stopset);
char *strdup_esc(const char *str);
+unsigned int hex(char c);
+
#endif /* PERF_STRING_H */
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index f3577f7d72fe..6dff843fd883 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -12,6 +12,7 @@
#include "maps.h"
#include "symbol.h"
#include "symsrc.h"
+#include "demangle-ocaml.h"
#include "demangle-java.h"
#include "demangle-rust.h"
#include "machine.h"
@@ -251,8 +252,12 @@ static char *demangle_sym(struct dso *dso, int kmodule, const char *elf_name)
return demangled;
demangled = bfd_demangle(NULL, elf_name, demangle_flags);
- if (demangled == NULL)
- demangled = java_demangle_sym(elf_name, JAVA_DEMANGLE_NORET);
+ if (demangled == NULL) {
+ demangled = ocaml_demangle_sym(elf_name);
+ if (demangled == NULL) {
+ demangled = java_demangle_sym(elf_name, JAVA_DEMANGLE_NORET);
+ }
+ }
else if (rust_is_mangled(demangled))
/*
* Input to Rust demangling is the BFD-demangled
@@ -1226,12 +1231,26 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
if (sym.st_shndx == SHN_ABS)
continue;
- sec = elf_getscn(runtime_ss->elf, sym.st_shndx);
+ sec = elf_getscn(syms_ss->elf, sym.st_shndx);
if (!sec)
goto out_elf_end;
gelf_getshdr(sec, &shdr);
+ /*
+ * We have to fallback to runtime when syms' section header has
+ * NOBITS set. NOBITS results in file offset (sh_offset) not
+ * being incremented. So sh_offset used below has different
+ * values for syms (invalid) and runtime (valid).
+ */
+ if (shdr.sh_type == SHT_NOBITS) {
+ sec = elf_getscn(runtime_ss->elf, sym.st_shndx);
+ if (!sec)
+ goto out_elf_end;
+
+ gelf_getshdr(sec, &shdr);
+ }
+
if (is_label && !elf_sec__filter(&shdr, secstrs))
continue;
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 64a039cbba1b..77fc46ca07c0 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1561,15 +1561,14 @@ static int bfd2elf_binding(asymbol *symbol)
int dso__load_bfd_symbols(struct dso *dso, const char *debugfile)
{
int err = -1;
- long symbols_size, symbols_count;
+ long symbols_size, symbols_count, i;
asection *section;
asymbol **symbols, *sym;
struct symbol *symbol;
bfd *abfd;
- u_int i;
u64 start, len;
- abfd = bfd_openr(dso->long_name, NULL);
+ abfd = bfd_openr(debugfile, NULL);
if (!abfd)
return -1;
@@ -1586,21 +1585,6 @@ int dso__load_bfd_symbols(struct dso *dso, const char *debugfile)
if (section)
dso->text_offset = section->vma - section->filepos;
- bfd_close(abfd);
-
- abfd = bfd_openr(debugfile, NULL);
- if (!abfd)
- return -1;
-
- if (!bfd_check_format(abfd, bfd_object)) {
- pr_debug2("%s: cannot read %s bfd file.\n", __func__,
- debugfile);
- goto out_close;
- }
-
- if (bfd_get_flavour(abfd) == bfd_target_elf_flavour)
- goto out_close;
-
symbols_size = bfd_get_symtab_upper_bound(abfd);
if (symbols_size == 0) {
bfd_close(abfd);
@@ -1867,8 +1851,10 @@ int dso__load(struct dso *dso, struct map *map)
if (nsexit)
nsinfo__mountns_enter(dso->nsinfo, &nsc);
- if (bfdrc == 0)
+ if (bfdrc == 0) {
+ ret = 0;
break;
+ }
if (!is_reg || sirc < 0)
continue;
@@ -2406,6 +2392,49 @@ int setup_intlist(struct intlist **list, const char *list_str,
return 0;
}
+static int setup_addrlist(struct intlist **addr_list, struct strlist *sym_list)
+{
+ struct str_node *pos, *tmp;
+ unsigned long val;
+ char *sep;
+ const char *end;
+ int i = 0, err;
+
+ *addr_list = intlist__new(NULL);
+ if (!*addr_list)
+ return -1;
+
+ strlist__for_each_entry_safe(pos, tmp, sym_list) {
+ errno = 0;
+ val = strtoul(pos->s, &sep, 16);
+ if (errno || (sep == pos->s))
+ continue;
+
+ if (*sep != '\0') {
+ end = pos->s + strlen(pos->s) - 1;
+ while (end >= sep && isspace(*end))
+ end--;
+
+ if (end >= sep)
+ continue;
+ }
+
+ err = intlist__add(*addr_list, val);
+ if (err)
+ break;
+
+ strlist__remove(sym_list, pos);
+ i++;
+ }
+
+ if (i == 0) {
+ intlist__delete(*addr_list);
+ *addr_list = NULL;
+ }
+
+ return 0;
+}
+
static bool symbol__read_kptr_restrict(void)
{
bool value = false;
@@ -2489,6 +2518,10 @@ int symbol__init(struct perf_env *env)
symbol_conf.sym_list_str, "symbol") < 0)
goto out_free_tid_list;
+ if (symbol_conf.sym_list &&
+ setup_addrlist(&symbol_conf.addr_list, symbol_conf.sym_list) < 0)
+ goto out_free_sym_list;
+
if (setup_list(&symbol_conf.bt_stop_list,
symbol_conf.bt_stop_list_str, "symbol") < 0)
goto out_free_sym_list;
@@ -2512,6 +2545,7 @@ int symbol__init(struct perf_env *env)
out_free_sym_list:
strlist__delete(symbol_conf.sym_list);
+ intlist__delete(symbol_conf.addr_list);
out_free_tid_list:
intlist__delete(symbol_conf.tid_list);
out_free_pid_list:
@@ -2533,6 +2567,7 @@ void symbol__exit(void)
strlist__delete(symbol_conf.comm_list);
intlist__delete(symbol_conf.tid_list);
intlist__delete(symbol_conf.pid_list);
+ intlist__delete(symbol_conf.addr_list);
vmlinux_path__exit();
symbol_conf.sym_list = symbol_conf.dso_list = symbol_conf.comm_list = NULL;
symbol_conf.bt_stop_list = NULL;
diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h
index b916afb95ec5..a70b3ec09dac 100644
--- a/tools/perf/util/symbol_conf.h
+++ b/tools/perf/util/symbol_conf.h
@@ -42,7 +42,8 @@ struct symbol_conf {
report_block,
report_individual_block,
inline_name,
- disable_add2line_warn;
+ disable_add2line_warn,
+ buildid_mmap2;
const char *vmlinux_name,
*kallsyms_name,
*source_prefix,
@@ -69,11 +70,13 @@ struct symbol_conf {
*sym_to_list,
*bt_stop_list;
struct intlist *pid_list,
- *tid_list;
+ *tid_list,
+ *addr_list;
const char *symfs;
int res_sample;
int pad_output_len_dso;
int group_sort_idx;
+ int addr_range;
};
extern struct symbol_conf symbol_conf;
diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
index 2947e3f3c6d9..b698046ec2db 100644
--- a/tools/perf/util/synthetic-events.c
+++ b/tools/perf/util/synthetic-events.c
@@ -24,7 +24,6 @@
#include <linux/perf_event.h>
#include <asm/bug.h>
#include <perf/evsel.h>
-#include <internal/cpumap.h>
#include <perf/cpumap.h>
#include <internal/lib.h> // page_size
#include <internal/threadmap.h>
@@ -69,19 +68,22 @@ int perf_tool__process_synth_event(struct perf_tool *tool,
* Assumes that the first 4095 bytes of /proc/pid/stat contains
* the comm, tgid and ppid.
*/
-static int perf_event__get_comm_ids(pid_t pid, char *comm, size_t len,
- pid_t *tgid, pid_t *ppid)
+static int perf_event__get_comm_ids(pid_t pid, pid_t tid, char *comm, size_t len,
+ pid_t *tgid, pid_t *ppid, bool *kernel)
{
char bf[4096];
int fd;
size_t size = 0;
ssize_t n;
- char *name, *tgids, *ppids;
+ char *name, *tgids, *ppids, *vmpeak, *threads;
*tgid = -1;
*ppid = -1;
- snprintf(bf, sizeof(bf), "/proc/%d/status", pid);
+ if (pid)
+ snprintf(bf, sizeof(bf), "/proc/%d/task/%d/status", pid, tid);
+ else
+ snprintf(bf, sizeof(bf), "/proc/%d/status", tid);
fd = open(bf, O_RDONLY);
if (fd < 0) {
@@ -93,14 +95,20 @@ static int perf_event__get_comm_ids(pid_t pid, char *comm, size_t len,
close(fd);
if (n <= 0) {
pr_warning("Couldn't get COMM, tigd and ppid for pid %d\n",
- pid);
+ tid);
return -1;
}
bf[n] = '\0';
name = strstr(bf, "Name:");
- tgids = strstr(bf, "Tgid:");
- ppids = strstr(bf, "PPid:");
+ tgids = strstr(name ?: bf, "Tgid:");
+ ppids = strstr(tgids ?: bf, "PPid:");
+ vmpeak = strstr(ppids ?: bf, "VmPeak:");
+
+ if (vmpeak)
+ threads = NULL;
+ else
+ threads = strstr(ppids ?: bf, "Threads:");
if (name) {
char *nl;
@@ -116,29 +124,34 @@ static int perf_event__get_comm_ids(pid_t pid, char *comm, size_t len,
memcpy(comm, name, size);
comm[size] = '\0';
} else {
- pr_debug("Name: string not found for pid %d\n", pid);
+ pr_debug("Name: string not found for pid %d\n", tid);
}
if (tgids) {
tgids += 5; /* strlen("Tgid:") */
*tgid = atoi(tgids);
} else {
- pr_debug("Tgid: string not found for pid %d\n", pid);
+ pr_debug("Tgid: string not found for pid %d\n", tid);
}
if (ppids) {
ppids += 5; /* strlen("PPid:") */
*ppid = atoi(ppids);
} else {
- pr_debug("PPid: string not found for pid %d\n", pid);
+ pr_debug("PPid: string not found for pid %d\n", tid);
}
+ if (!vmpeak && threads)
+ *kernel = true;
+ else
+ *kernel = false;
+
return 0;
}
-static int perf_event__prepare_comm(union perf_event *event, pid_t pid,
+static int perf_event__prepare_comm(union perf_event *event, pid_t pid, pid_t tid,
struct machine *machine,
- pid_t *tgid, pid_t *ppid)
+ pid_t *tgid, pid_t *ppid, bool *kernel)
{
size_t size;
@@ -147,9 +160,9 @@ static int perf_event__prepare_comm(union perf_event *event, pid_t pid,
memset(&event->comm, 0, sizeof(event->comm));
if (machine__is_host(machine)) {
- if (perf_event__get_comm_ids(pid, event->comm.comm,
+ if (perf_event__get_comm_ids(pid, tid, event->comm.comm,
sizeof(event->comm.comm),
- tgid, ppid) != 0) {
+ tgid, ppid, kernel) != 0) {
return -1;
}
} else {
@@ -168,7 +181,7 @@ static int perf_event__prepare_comm(union perf_event *event, pid_t pid,
event->comm.header.size = (sizeof(event->comm) -
(sizeof(event->comm.comm) - size) +
machine->id_hdr_size);
- event->comm.tid = pid;
+ event->comm.tid = tid;
return 0;
}
@@ -179,8 +192,10 @@ pid_t perf_event__synthesize_comm(struct perf_tool *tool,
struct machine *machine)
{
pid_t tgid, ppid;
+ bool kernel_thread;
- if (perf_event__prepare_comm(event, pid, machine, &tgid, &ppid) != 0)
+ if (perf_event__prepare_comm(event, 0, pid, machine, &tgid, &ppid,
+ &kernel_thread) != 0)
return -1;
if (perf_tool__process_synth_event(tool, event, machine, process) != 0)
@@ -347,6 +362,31 @@ static bool read_proc_maps_line(struct io *io, __u64 *start, __u64 *end,
}
}
+static void perf_record_mmap2__read_build_id(struct perf_record_mmap2 *event,
+ bool is_kernel)
+{
+ struct build_id bid;
+ int rc;
+
+ if (is_kernel)
+ rc = sysfs__read_build_id("/sys/kernel/notes", &bid);
+ else
+ rc = filename__read_build_id(event->filename, &bid) > 0 ? 0 : -1;
+
+ if (rc == 0) {
+ memcpy(event->build_id, bid.data, sizeof(bid.data));
+ event->build_id_size = (u8) bid.size;
+ event->header.misc |= PERF_RECORD_MISC_MMAP_BUILD_ID;
+ event->__reserved_1 = 0;
+ event->__reserved_2 = 0;
+ } else {
+ if (event->filename[0] == '/') {
+ pr_debug2("Failed to read build ID for %s\n",
+ event->filename);
+ }
+ }
+}
+
int perf_event__synthesize_mmap_events(struct perf_tool *tool,
union perf_event *event,
pid_t pid, pid_t tgid,
@@ -453,6 +493,9 @@ out:
event->mmap2.pid = tgid;
event->mmap2.tid = pid;
+ if (symbol_conf.buildid_mmap2)
+ perf_record_mmap2__read_build_id(&event->mmap2, false);
+
if (perf_tool__process_synth_event(tool, event, machine, process) != 0) {
rc = -1;
break;
@@ -596,16 +639,17 @@ int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t
int rc = 0;
struct map *pos;
struct maps *maps = machine__kernel_maps(machine);
- union perf_event *event = zalloc((sizeof(event->mmap) +
- machine->id_hdr_size));
+ union perf_event *event;
+ size_t size = symbol_conf.buildid_mmap2 ?
+ sizeof(event->mmap2) : sizeof(event->mmap);
+
+ event = zalloc(size + machine->id_hdr_size);
if (event == NULL) {
pr_debug("Not enough memory synthesizing mmap event "
"for kernel modules\n");
return -1;
}
- event->header.type = PERF_RECORD_MMAP;
-
/*
* kernel uses 0 for user space maps, see kernel/perf_event.c
* __perf_event_mmap
@@ -616,23 +660,39 @@ int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t
event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
maps__for_each_entry(maps, pos) {
- size_t size;
-
if (!__map__is_kmodule(pos))
continue;
- size = PERF_ALIGN(pos->dso->long_name_len + 1, sizeof(u64));
- event->mmap.header.type = PERF_RECORD_MMAP;
- event->mmap.header.size = (sizeof(event->mmap) -
- (sizeof(event->mmap.filename) - size));
- memset(event->mmap.filename + size, 0, machine->id_hdr_size);
- event->mmap.header.size += machine->id_hdr_size;
- event->mmap.start = pos->start;
- event->mmap.len = pos->end - pos->start;
- event->mmap.pid = machine->pid;
+ if (symbol_conf.buildid_mmap2) {
+ size = PERF_ALIGN(pos->dso->long_name_len + 1, sizeof(u64));
+ event->mmap2.header.type = PERF_RECORD_MMAP2;
+ event->mmap2.header.size = (sizeof(event->mmap2) -
+ (sizeof(event->mmap2.filename) - size));
+ memset(event->mmap2.filename + size, 0, machine->id_hdr_size);
+ event->mmap2.header.size += machine->id_hdr_size;
+ event->mmap2.start = pos->start;
+ event->mmap2.len = pos->end - pos->start;
+ event->mmap2.pid = machine->pid;
+
+ memcpy(event->mmap2.filename, pos->dso->long_name,
+ pos->dso->long_name_len + 1);
+
+ perf_record_mmap2__read_build_id(&event->mmap2, false);
+ } else {
+ size = PERF_ALIGN(pos->dso->long_name_len + 1, sizeof(u64));
+ event->mmap.header.type = PERF_RECORD_MMAP;
+ event->mmap.header.size = (sizeof(event->mmap) -
+ (sizeof(event->mmap.filename) - size));
+ memset(event->mmap.filename + size, 0, machine->id_hdr_size);
+ event->mmap.header.size += machine->id_hdr_size;
+ event->mmap.start = pos->start;
+ event->mmap.len = pos->end - pos->start;
+ event->mmap.pid = machine->pid;
+
+ memcpy(event->mmap.filename, pos->dso->long_name,
+ pos->dso->long_name_len + 1);
+ }
- memcpy(event->mmap.filename, pos->dso->long_name,
- pos->dso->long_name_len + 1);
if (perf_tool__process_synth_event(tool, event, machine, process) != 0) {
rc = -1;
break;
@@ -643,6 +703,11 @@ int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t
return rc;
}
+static int filter_task(const struct dirent *dirent)
+{
+ return isdigit(dirent->d_name[0]);
+}
+
static int __event__synthesize_thread(union perf_event *comm_event,
union perf_event *mmap_event,
union perf_event *fork_event,
@@ -651,10 +716,10 @@ static int __event__synthesize_thread(union perf_event *comm_event,
struct perf_tool *tool, struct machine *machine, bool mmap_data)
{
char filename[PATH_MAX];
- DIR *tasks;
- struct dirent *dirent;
+ struct dirent **dirent;
pid_t tgid, ppid;
int rc = 0;
+ int i, n;
/* special case: only send one comm event using passed in pid */
if (!full) {
@@ -686,23 +751,22 @@ static int __event__synthesize_thread(union perf_event *comm_event,
snprintf(filename, sizeof(filename), "%s/proc/%d/task",
machine->root_dir, pid);
- tasks = opendir(filename);
- if (tasks == NULL) {
- pr_debug("couldn't open %s\n", filename);
- return 0;
- }
+ n = scandir(filename, &dirent, filter_task, alphasort);
+ if (n < 0)
+ return n;
- while ((dirent = readdir(tasks)) != NULL) {
+ for (i = 0; i < n; i++) {
char *end;
pid_t _pid;
+ bool kernel_thread;
- _pid = strtol(dirent->d_name, &end, 10);
+ _pid = strtol(dirent[i]->d_name, &end, 10);
if (*end)
continue;
rc = -1;
- if (perf_event__prepare_comm(comm_event, _pid, machine,
- &tgid, &ppid) != 0)
+ if (perf_event__prepare_comm(comm_event, pid, _pid, machine,
+ &tgid, &ppid, &kernel_thread) != 0)
break;
if (perf_event__synthesize_fork(tool, fork_event, _pid, tgid,
@@ -720,7 +784,7 @@ static int __event__synthesize_thread(union perf_event *comm_event,
break;
rc = 0;
- if (_pid == pid) {
+ if (_pid == pid && !kernel_thread) {
/* process the parent's maps too */
rc = perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid,
process, machine, mmap_data);
@@ -729,7 +793,10 @@ static int __event__synthesize_thread(union perf_event *comm_event,
}
}
- closedir(tasks);
+ for (i = 0; i < n; i++)
+ zfree(&dirent[i]);
+ free(dirent);
+
return rc;
}
@@ -914,7 +981,7 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
return 0;
snprintf(proc_path, sizeof(proc_path), "%s/proc", machine->root_dir);
- n = scandir(proc_path, &dirent, 0, alphasort);
+ n = scandir(proc_path, &dirent, filter_task, alphasort);
if (n < 0)
return err;
@@ -991,11 +1058,12 @@ static int __perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
perf_event__handler_t process,
struct machine *machine)
{
- size_t size;
+ union perf_event *event;
+ size_t size = symbol_conf.buildid_mmap2 ?
+ sizeof(event->mmap2) : sizeof(event->mmap);
struct map *map = machine__kernel_map(machine);
struct kmap *kmap;
int err;
- union perf_event *event;
if (map == NULL)
return -1;
@@ -1009,7 +1077,7 @@ static int __perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
* available use this, and after it is use this as a fallback for older
* kernels.
*/
- event = zalloc((sizeof(event->mmap) + machine->id_hdr_size));
+ event = zalloc(size + machine->id_hdr_size);
if (event == NULL) {
pr_debug("Not enough memory synthesizing mmap event "
"for kernel modules\n");
@@ -1026,16 +1094,31 @@ static int __perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
}
- size = snprintf(event->mmap.filename, sizeof(event->mmap.filename),
- "%s%s", machine->mmap_name, kmap->ref_reloc_sym->name) + 1;
- size = PERF_ALIGN(size, sizeof(u64));
- event->mmap.header.type = PERF_RECORD_MMAP;
- event->mmap.header.size = (sizeof(event->mmap) -
- (sizeof(event->mmap.filename) - size) + machine->id_hdr_size);
- event->mmap.pgoff = kmap->ref_reloc_sym->addr;
- event->mmap.start = map->start;
- event->mmap.len = map->end - event->mmap.start;
- event->mmap.pid = machine->pid;
+ if (symbol_conf.buildid_mmap2) {
+ size = snprintf(event->mmap2.filename, sizeof(event->mmap2.filename),
+ "%s%s", machine->mmap_name, kmap->ref_reloc_sym->name) + 1;
+ size = PERF_ALIGN(size, sizeof(u64));
+ event->mmap2.header.type = PERF_RECORD_MMAP2;
+ event->mmap2.header.size = (sizeof(event->mmap2) -
+ (sizeof(event->mmap2.filename) - size) + machine->id_hdr_size);
+ event->mmap2.pgoff = kmap->ref_reloc_sym->addr;
+ event->mmap2.start = map->start;
+ event->mmap2.len = map->end - event->mmap.start;
+ event->mmap2.pid = machine->pid;
+
+ perf_record_mmap2__read_build_id(&event->mmap2, true);
+ } else {
+ size = snprintf(event->mmap.filename, sizeof(event->mmap.filename),
+ "%s%s", machine->mmap_name, kmap->ref_reloc_sym->name) + 1;
+ size = PERF_ALIGN(size, sizeof(u64));
+ event->mmap.header.type = PERF_RECORD_MMAP;
+ event->mmap.header.size = (sizeof(event->mmap) -
+ (sizeof(event->mmap.filename) - size) + machine->id_hdr_size);
+ event->mmap.pgoff = kmap->ref_reloc_sym->addr;
+ event->mmap.start = map->start;
+ event->mmap.len = map->end - event->mmap.start;
+ event->mmap.pid = machine->pid;
+ }
err = perf_tool__process_synth_event(tool, event, machine, process);
free(event);
@@ -1384,7 +1467,7 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
}
}
- if (type & PERF_SAMPLE_WEIGHT)
+ if (type & PERF_SAMPLE_WEIGHT_TYPE)
result += sizeof(u64);
if (type & PERF_SAMPLE_DATA_SRC)
@@ -1412,6 +1495,9 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
if (type & PERF_SAMPLE_DATA_PAGE_SIZE)
result += sizeof(u64);
+ if (type & PERF_SAMPLE_CODE_PAGE_SIZE)
+ result += sizeof(u64);
+
if (type & PERF_SAMPLE_AUX) {
result += sizeof(u64);
result += sample->aux_sample.size;
@@ -1420,6 +1506,12 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
return result;
}
+void __weak arch_perf_synthesize_sample_weight(const struct perf_sample *data,
+ __u64 *array, u64 type __maybe_unused)
+{
+ *array = data->weight;
+}
+
int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_format,
const struct perf_sample *sample)
{
@@ -1555,8 +1647,8 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo
}
}
- if (type & PERF_SAMPLE_WEIGHT) {
- *array = sample->weight;
+ if (type & PERF_SAMPLE_WEIGHT_TYPE) {
+ arch_perf_synthesize_sample_weight(sample, array, type);
array++;
}
@@ -1596,6 +1688,11 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo
array++;
}
+ if (type & PERF_SAMPLE_CODE_PAGE_SIZE) {
+ *array = sample->code_page_size;
+ array++;
+ }
+
if (type & PERF_SAMPLE_AUX) {
sz = sample->aux_sample.size;
*array++ = sz;
diff --git a/tools/perf/util/target.c b/tools/perf/util/target.c
index a3db13dea937..0f383418e3df 100644
--- a/tools/perf/util/target.c
+++ b/tools/perf/util/target.c
@@ -56,6 +56,34 @@ enum target_errno target__validate(struct target *target)
ret = TARGET_ERRNO__UID_OVERRIDE_SYSTEM;
}
+ /* BPF and CPU are mutually exclusive */
+ if (target->bpf_str && target->cpu_list) {
+ target->cpu_list = NULL;
+ if (ret == TARGET_ERRNO__SUCCESS)
+ ret = TARGET_ERRNO__BPF_OVERRIDE_CPU;
+ }
+
+ /* BPF and PID/TID are mutually exclusive */
+ if (target->bpf_str && target->tid) {
+ target->tid = NULL;
+ if (ret == TARGET_ERRNO__SUCCESS)
+ ret = TARGET_ERRNO__BPF_OVERRIDE_PID;
+ }
+
+ /* BPF and UID are mutually exclusive */
+ if (target->bpf_str && target->uid_str) {
+ target->uid_str = NULL;
+ if (ret == TARGET_ERRNO__SUCCESS)
+ ret = TARGET_ERRNO__BPF_OVERRIDE_UID;
+ }
+
+ /* BPF and THREADS are mutually exclusive */
+ if (target->bpf_str && target->per_thread) {
+ target->per_thread = false;
+ if (ret == TARGET_ERRNO__SUCCESS)
+ ret = TARGET_ERRNO__BPF_OVERRIDE_THREAD;
+ }
+
/* THREAD and SYSTEM/CPU are mutually exclusive */
if (target->per_thread && (target->system_wide || target->cpu_list)) {
target->per_thread = false;
@@ -109,6 +137,10 @@ static const char *target__error_str[] = {
"PID/TID switch overriding SYSTEM",
"UID switch overriding SYSTEM",
"SYSTEM/CPU switch overriding PER-THREAD",
+ "BPF switch overriding CPU",
+ "BPF switch overriding PID/TID",
+ "BPF switch overriding UID",
+ "BPF switch overriding THREAD",
"Invalid User: %s",
"Problems obtaining information for user %s",
};
@@ -134,7 +166,7 @@ int target__strerror(struct target *target, int errnum,
switch (errnum) {
case TARGET_ERRNO__PID_OVERRIDE_CPU ...
- TARGET_ERRNO__SYSTEM_OVERRIDE_THREAD:
+ TARGET_ERRNO__BPF_OVERRIDE_THREAD:
snprintf(buf, buflen, "%s", msg);
break;
diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h
index 6ef01a83b24e..f132c6c2eef8 100644
--- a/tools/perf/util/target.h
+++ b/tools/perf/util/target.h
@@ -10,6 +10,7 @@ struct target {
const char *tid;
const char *cpu_list;
const char *uid_str;
+ const char *bpf_str;
uid_t uid;
bool system_wide;
bool uses_mmap;
@@ -36,6 +37,10 @@ enum target_errno {
TARGET_ERRNO__PID_OVERRIDE_SYSTEM,
TARGET_ERRNO__UID_OVERRIDE_SYSTEM,
TARGET_ERRNO__SYSTEM_OVERRIDE_THREAD,
+ TARGET_ERRNO__BPF_OVERRIDE_CPU,
+ TARGET_ERRNO__BPF_OVERRIDE_PID,
+ TARGET_ERRNO__BPF_OVERRIDE_UID,
+ TARGET_ERRNO__BPF_OVERRIDE_THREAD,
/* for target__parse_uid() */
TARGET_ERRNO__INVALID_UID,
@@ -59,6 +64,11 @@ static inline bool target__has_cpu(struct target *target)
return target->system_wide || target->cpu_list;
}
+static inline bool target__has_bpf(struct target *target)
+{
+ return target->bpf_str;
+}
+
static inline bool target__none(struct target *target)
{
return !target__has_task(target) && !target__has_cpu(target);
diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c
index 0e5c4786f296..a65f65d0857e 100644
--- a/tools/perf/util/trace-event-info.c
+++ b/tools/perf/util/trace-event-info.c
@@ -152,7 +152,7 @@ static bool name_in_tp_list(char *sys, struct tracepoint_path *tps)
return false;
}
-#define for_each_event(dir, dent, tps) \
+#define for_each_event_tps(dir, dent, tps) \
while ((dent = readdir(dir))) \
if (dent->d_type == DT_DIR && \
(strcmp(dent->d_name, ".")) && \
@@ -174,7 +174,7 @@ static int copy_event_system(const char *sys, struct tracepoint_path *tps)
return -errno;
}
- for_each_event(dir, dent, tps) {
+ for_each_event_tps(dir, dent, tps) {
if (!name_in_tp_list(dent->d_name, tps))
continue;
@@ -196,7 +196,7 @@ static int copy_event_system(const char *sys, struct tracepoint_path *tps)
}
rewinddir(dir);
- for_each_event(dir, dent, tps) {
+ for_each_event_tps(dir, dent, tps) {
if (!name_in_tp_list(dent->d_name, tps))
continue;
@@ -274,7 +274,7 @@ static int record_event_files(struct tracepoint_path *tps)
goto out;
}
- for_each_event(dir, dent, tps) {
+ for_each_event_tps(dir, dent, tps) {
if (strcmp(dent->d_name, "ftrace") == 0 ||
!system_in_tp_list(dent->d_name, tps))
continue;
@@ -289,7 +289,7 @@ static int record_event_files(struct tracepoint_path *tps)
}
rewinddir(dir);
- for_each_event(dir, dent, tps) {
+ for_each_event_tps(dir, dent, tps) {
if (strcmp(dent->d_name, "ftrace") == 0 ||
!system_in_tp_list(dent->d_name, tps))
continue;
diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c
index f507dff713c9..8a01af783310 100644
--- a/tools/perf/util/trace-event-read.c
+++ b/tools/perf/util/trace-event-read.c
@@ -361,6 +361,7 @@ static int read_saved_cmdline(struct tep_handle *pevent)
pr_debug("error reading saved cmdlines\n");
goto out;
}
+ buf[ret] = '\0';
parse_saved_cmdline(pevent, buf, size);
ret = 0;
diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c
index 0ada907c60d4..a74b517f7497 100644
--- a/tools/perf/util/unwind-libdw.c
+++ b/tools/perf/util/unwind-libdw.c
@@ -60,10 +60,8 @@ static int __report_module(struct addr_location *al, u64 ip,
mod = dwfl_addrmodule(ui->dwfl, ip);
if (mod) {
Dwarf_Addr s;
- void **userdatap;
- dwfl_module_info(mod, &userdatap, &s, NULL, NULL, NULL, NULL, NULL);
- *userdatap = dso;
+ dwfl_module_info(mod, NULL, &s, NULL, NULL, NULL, NULL, NULL);
if (s != al->map->start - al->map->pgoff)
mod = 0;
}
@@ -79,6 +77,13 @@ static int __report_module(struct addr_location *al, u64 ip,
al->map->start - al->map->pgoff, false);
}
+ if (mod) {
+ void **userdatap;
+
+ dwfl_module_info(mod, &userdatap, NULL, NULL, NULL, NULL, NULL, NULL);
+ *userdatap = dso;
+ }
+
return mod && dwfl_addrmodule(ui->dwfl, ip) == mod ? 0 : -1;
}
diff --git a/tools/perf/util/xyarray.c b/tools/perf/util/xyarray.c
deleted file mode 100644
index 86889ebc3514..000000000000
--- a/tools/perf/util/xyarray.c
+++ /dev/null
@@ -1,33 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include "xyarray.h"
-#include <stdlib.h>
-#include <string.h>
-#include <linux/zalloc.h>
-
-struct xyarray *xyarray__new(int xlen, int ylen, size_t entry_size)
-{
- size_t row_size = ylen * entry_size;
- struct xyarray *xy = zalloc(sizeof(*xy) + xlen * row_size);
-
- if (xy != NULL) {
- xy->entry_size = entry_size;
- xy->row_size = row_size;
- xy->entries = xlen * ylen;
- xy->max_x = xlen;
- xy->max_y = ylen;
- }
-
- return xy;
-}
-
-void xyarray__reset(struct xyarray *xy)
-{
- size_t n = xy->entries * xy->entry_size;
-
- memset(xy->contents, 0, n);
-}
-
-void xyarray__delete(struct xyarray *xy)
-{
- free(xy);
-}
diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c
index 09cb3a6672f3..582feb88eca3 100644
--- a/tools/power/x86/intel-speed-select/isst-config.c
+++ b/tools/power/x86/intel-speed-select/isst-config.c
@@ -15,7 +15,7 @@ struct process_cmd_struct {
int arg;
};
-static const char *version_str = "v1.7";
+static const char *version_str = "v1.8";
static const int supported_api_ver = 1;
static struct isst_if_platform_info isst_platform_info;
static char *progname;
@@ -2304,6 +2304,102 @@ static void get_clos_assoc(int arg)
isst_ctdp_display_information_end(outf);
}
+static void set_turbo_mode_for_cpu(int cpu, int status)
+{
+ int base_freq;
+
+ if (status) {
+ base_freq = get_cpufreq_base_freq(cpu);
+ set_cpufreq_scaling_min_max(cpu, 1, base_freq);
+ } else {
+ set_scaling_max_to_cpuinfo_max(cpu);
+ }
+
+ if (status) {
+ isst_display_result(cpu, outf, "turbo-mode", "enable", 0);
+ } else {
+ isst_display_result(cpu, outf, "turbo-mode", "disable", 0);
+ }
+}
+
+static void set_turbo_mode(int arg)
+{
+ int i, enable = arg;
+
+ if (cmd_help) {
+ if (enable)
+ fprintf(stderr, "Set turbo mode enable\n");
+ else
+ fprintf(stderr, "Set turbo mode disable\n");
+ exit(0);
+ }
+
+ isst_ctdp_display_information_start(outf);
+
+ for (i = 0; i < topo_max_cpus; ++i) {
+ int online;
+
+ if (i)
+ online = parse_int_file(
+ 1, "/sys/devices/system/cpu/cpu%d/online", i);
+ else
+ online =
+ 1; /* online entry for CPU 0 needs some special configs */
+
+ if (online)
+ set_turbo_mode_for_cpu(i, enable);
+
+ }
+ isst_ctdp_display_information_end(outf);
+}
+
+static void get_set_trl(int cpu, void *arg1, void *arg2, void *arg3,
+ void *arg4)
+{
+ unsigned long long trl;
+ int set = *(int *)arg4;
+ int ret;
+
+ if (set && !fact_trl) {
+ isst_display_error_info_message(1, "Invalid TRL. Specify with [-t|--trl]", 0, 0);
+ exit(0);
+ }
+
+ if (set) {
+ ret = isst_set_trl(cpu, fact_trl);
+ isst_display_result(cpu, outf, "turbo-mode", "set-trl", ret);
+ return;
+ }
+
+ ret = isst_get_trl(cpu, &trl);
+ if (ret)
+ isst_display_result(cpu, outf, "turbo-mode", "get-trl", ret);
+ else
+ isst_trl_display_information(cpu, outf, trl);
+}
+
+static void process_trl(int arg)
+{
+ if (cmd_help) {
+ if (arg) {
+ fprintf(stderr, "Set TRL (turbo ratio limits)\n");
+ fprintf(stderr, "\t t|--trl: Specify turbo ratio limit for setting TRL\n");
+ } else {
+ fprintf(stderr, "Get TRL (turbo ratio limits)\n");
+ }
+ exit(0);
+ }
+
+ isst_ctdp_display_information_start(outf);
+ if (max_target_cpus)
+ for_each_online_target_cpu_in_set(get_set_trl, NULL,
+ NULL, NULL, &arg);
+ else
+ for_each_online_package_in_set(get_set_trl, NULL,
+ NULL, NULL, &arg);
+ isst_ctdp_display_information_end(outf);
+}
+
static struct process_cmd_struct clx_n_cmds[] = {
{ "perf-profile", "info", dump_isst_config, 0 },
{ "base-freq", "info", dump_pbf_config, 0 },
@@ -2334,6 +2430,10 @@ static struct process_cmd_struct isst_cmds[] = {
{ "core-power", "get-config", dump_clos_config, 0 },
{ "core-power", "assoc", set_clos_assoc, 0 },
{ "core-power", "get-assoc", get_clos_assoc, 0 },
+ { "turbo-mode", "enable", set_turbo_mode, 0 },
+ { "turbo-mode", "disable", set_turbo_mode, 1 },
+ { "turbo-mode", "get-trl", process_trl, 0 },
+ { "turbo-mode", "set-trl", process_trl, 1 },
{ NULL, NULL, NULL }
};
@@ -2549,6 +2649,16 @@ static void fact_help(void)
printf("\tcommand : disable\n");
}
+static void turbo_mode_help(void)
+{
+ printf("turbo-mode:\tEnables users to enable/disable turbo mode by adjusting frequency settings. Also allows to get and set turbo ratio limits (TRL).\n");
+ printf("\tcommand : enable\n");
+ printf("\tcommand : disable\n");
+ printf("\tcommand : get-trl\n");
+ printf("\tcommand : set-trl\n");
+}
+
+
static void core_power_help(void)
{
printf("core-power:\tInterface that allows user to define per core/tile\n\
@@ -2573,6 +2683,7 @@ static struct process_cmd_help_struct isst_help_cmds[] = {
{ "base-freq", pbf_help },
{ "turbo-freq", fact_help },
{ "core-power", core_power_help },
+ { "turbo-mode", turbo_mode_help },
{ NULL, NULL }
};
@@ -2636,7 +2747,7 @@ static void usage(void)
if (is_clx_n_platform())
printf("\nFEATURE : [perf-profile|base-freq]\n");
else
- printf("\nFEATURE : [perf-profile|base-freq|turbo-freq|core-power]\n");
+ printf("\nFEATURE : [perf-profile|base-freq|turbo-freq|core-power|turbo-mode]\n");
printf("\nFor help on each feature, use -h|--help\n");
printf("\tFor example: intel-speed-select perf-profile -h\n");
diff --git a/tools/power/x86/intel-speed-select/isst-core.c b/tools/power/x86/intel-speed-select/isst-core.c
index 8afd23407522..6a26d5769984 100644
--- a/tools/power/x86/intel-speed-select/isst-core.c
+++ b/tools/power/x86/intel-speed-select/isst-core.c
@@ -665,6 +665,17 @@ int isst_get_fact_info(int cpu, int level, int fact_bucket, struct isst_fact_inf
return 0;
}
+int isst_get_trl(int cpu, unsigned long long *trl)
+{
+ int ret;
+
+ ret = isst_send_msr_command(cpu, 0x1AD, 0, trl);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
int isst_set_trl(int cpu, unsigned long long trl)
{
int ret;
diff --git a/tools/power/x86/intel-speed-select/isst-display.c b/tools/power/x86/intel-speed-select/isst-display.c
index e105fece47b6..8e54ce47648e 100644
--- a/tools/power/x86/intel-speed-select/isst-display.c
+++ b/tools/power/x86/intel-speed-select/isst-display.c
@@ -763,3 +763,21 @@ void isst_display_error_info_message(int error, char *msg, int arg_valid, int ar
if (!start)
format_and_print(outf, 0, NULL, NULL);
}
+
+void isst_trl_display_information(int cpu, FILE *outf, unsigned long long trl)
+{
+ char header[256];
+ char value[256];
+ int level;
+
+ level = print_package_info(cpu, outf);
+
+ snprintf(header, sizeof(header), "get-trl");
+ format_and_print(outf, level + 1, header, NULL);
+
+ snprintf(header, sizeof(header), "trl");
+ snprintf(value, sizeof(value), "0x%llx", trl);
+ format_and_print(outf, level + 2, header, value);
+
+ format_and_print(outf, level, NULL, NULL);
+}
diff --git a/tools/power/x86/intel-speed-select/isst.h b/tools/power/x86/intel-speed-select/isst.h
index 60db0bb084d5..0cac6c54be87 100644
--- a/tools/power/x86/intel-speed-select/isst.h
+++ b/tools/power/x86/intel-speed-select/isst.h
@@ -228,6 +228,7 @@ extern void isst_fact_display_information(int cpu, FILE *outf, int level,
int fact_bucket, int fact_avx,
struct isst_fact_info *fact_info);
extern int isst_set_trl(int cpu, unsigned long long trl);
+extern int isst_get_trl(int cpu, unsigned long long *trl);
extern int isst_set_trl_from_current_tdp(int cpu, unsigned long long trl);
extern int isst_get_config_tdp_lock_status(int cpu);
@@ -256,4 +257,5 @@ extern int get_cpufreq_base_freq(int cpu);
extern int isst_read_pm_config(int cpu, int *cp_state, int *cp_cap);
extern void isst_display_error_info_message(int error, char *msg, int arg_valid, int arg);
extern int is_skx_based_platform(void);
+extern void isst_trl_display_information(int cpu, FILE *outf, unsigned long long trl);
#endif
diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include
index 4255e71f72b7..a402f32a145c 100644
--- a/tools/scripts/Makefile.include
+++ b/tools/scripts/Makefile.include
@@ -134,6 +134,7 @@ ifneq ($(silent),1)
$(MAKE) $(PRINT_DIR) -C $$subdir
QUIET_FLEX = @echo ' FLEX '$@;
QUIET_BISON = @echo ' BISON '$@;
+ QUIET_GENSKEL = @echo ' GEN-SKEL '$@;
descend = \
+@echo ' DESCEND '$(1); \
diff --git a/tools/testing/kunit/configs/broken_on_uml.config b/tools/testing/kunit/configs/broken_on_uml.config
index a7f0603d33f6..690870043ac0 100644
--- a/tools/testing/kunit/configs/broken_on_uml.config
+++ b/tools/testing/kunit/configs/broken_on_uml.config
@@ -40,3 +40,5 @@
# CONFIG_RESET_BRCMSTB_RESCAL is not set
# CONFIG_RESET_INTEL_GW is not set
# CONFIG_ADI_AXI_ADC is not set
+# CONFIG_DEBUG_PAGEALLOC is not set
+# CONFIG_PAGE_POISONING is not set
diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py
index e808a47c839b..d5144fcb03ac 100755
--- a/tools/testing/kunit/kunit.py
+++ b/tools/testing/kunit/kunit.py
@@ -28,12 +28,12 @@ KunitBuildRequest = namedtuple('KunitBuildRequest',
['jobs', 'build_dir', 'alltests',
'make_options'])
KunitExecRequest = namedtuple('KunitExecRequest',
- ['timeout', 'build_dir', 'alltests'])
+ ['timeout', 'build_dir', 'alltests', 'filter_glob'])
KunitParseRequest = namedtuple('KunitParseRequest',
['raw_output', 'input_data', 'build_dir', 'json'])
KunitRequest = namedtuple('KunitRequest', ['raw_output','timeout', 'jobs',
- 'build_dir', 'alltests', 'json',
- 'make_options'])
+ 'build_dir', 'alltests', 'filter_glob',
+ 'json', 'make_options'])
KernelDirectoryPath = sys.argv[0].split('tools/testing/kunit/')[0]
@@ -93,6 +93,7 @@ def exec_tests(linux: kunit_kernel.LinuxSourceTree,
test_start = time.time()
result = linux.run_kernel(
timeout=None if request.alltests else request.timeout,
+ filter_glob=request.filter_glob,
build_dir=request.build_dir)
test_end = time.time()
@@ -149,7 +150,7 @@ def run_tests(linux: kunit_kernel.LinuxSourceTree,
return build_result
exec_request = KunitExecRequest(request.timeout, request.build_dir,
- request.alltests)
+ request.alltests, request.filter_glob)
exec_result = exec_tests(linux, exec_request)
if exec_result.status != KunitStatus.SUCCESS:
return exec_result
@@ -182,6 +183,9 @@ def add_common_opts(parser) -> None:
parser.add_argument('--alltests',
help='Run all KUnit tests through allyesconfig',
action='store_true')
+ parser.add_argument('--kunitconfig',
+ help='Path to Kconfig fragment that enables KUnit tests',
+ metavar='kunitconfig')
def add_build_opts(parser) -> None:
parser.add_argument('--jobs',
@@ -197,6 +201,14 @@ def add_exec_opts(parser) -> None:
type=int,
default=300,
metavar='timeout')
+ parser.add_argument('filter_glob',
+ help='maximum number of seconds to allow for all tests '
+ 'to run. This does not include time taken to build the '
+ 'tests.',
+ type=str,
+ nargs='?',
+ default='',
+ metavar='filter_glob')
def add_parse_opts(parser) -> None:
parser.add_argument('--raw_output', help='don\'t format output from kernel',
@@ -256,13 +268,14 @@ def main(argv, linux=None):
os.mkdir(cli_args.build_dir)
if not linux:
- linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir)
+ linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir, kunitconfig_path=cli_args.kunitconfig)
request = KunitRequest(cli_args.raw_output,
cli_args.timeout,
cli_args.jobs,
cli_args.build_dir,
cli_args.alltests,
+ cli_args.filter_glob,
cli_args.json,
cli_args.make_options)
result = run_tests(linux, request)
@@ -274,7 +287,7 @@ def main(argv, linux=None):
os.mkdir(cli_args.build_dir)
if not linux:
- linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir)
+ linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir, kunitconfig_path=cli_args.kunitconfig)
request = KunitConfigRequest(cli_args.build_dir,
cli_args.make_options)
@@ -286,7 +299,7 @@ def main(argv, linux=None):
sys.exit(1)
elif cli_args.subcommand == 'build':
if not linux:
- linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir)
+ linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir, kunitconfig_path=cli_args.kunitconfig)
request = KunitBuildRequest(cli_args.jobs,
cli_args.build_dir,
@@ -304,7 +317,8 @@ def main(argv, linux=None):
exec_request = KunitExecRequest(cli_args.timeout,
cli_args.build_dir,
- cli_args.alltests)
+ cli_args.alltests,
+ cli_args.filter_glob)
exec_result = exec_tests(linux, exec_request)
parse_request = KunitParseRequest(cli_args.raw_output,
exec_result.result,
diff --git a/tools/testing/kunit/kunit_config.py b/tools/testing/kunit/kunit_config.py
index bdd60230764b..1e2683dcc0e7 100644
--- a/tools/testing/kunit/kunit_config.py
+++ b/tools/testing/kunit/kunit_config.py
@@ -13,7 +13,7 @@ from typing import List, Set
CONFIG_IS_NOT_SET_PATTERN = r'^# CONFIG_(\w+) is not set$'
CONFIG_PATTERN = r'^CONFIG_(\w+)=(\S+|".*")$'
-KconfigEntryBase = collections.namedtuple('KconfigEntry', ['name', 'value'])
+KconfigEntryBase = collections.namedtuple('KconfigEntryBase', ['name', 'value'])
class KconfigEntry(KconfigEntryBase):
@@ -41,15 +41,14 @@ class Kconfig(object):
self._entries.append(entry)
def is_subset_of(self, other: 'Kconfig') -> bool:
+ other_dict = {e.name: e.value for e in other.entries()}
for a in self.entries():
- found = False
- for b in other.entries():
- if a.name != b.name:
+ b = other_dict.get(a.name)
+ if b is None:
+ if a.value == 'n':
continue
- if a.value != b.value:
- return False
- found = True
- if a.value != 'n' and found == False:
+ return False
+ elif a.value != b:
return False
return True
diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py
index 2076a5a2d060..f309a33256cd 100644
--- a/tools/testing/kunit/kunit_kernel.py
+++ b/tools/testing/kunit/kunit_kernel.py
@@ -123,7 +123,7 @@ def get_outfile_path(build_dir) -> str:
class LinuxSourceTree(object):
"""Represents a Linux kernel source tree with KUnit tests."""
- def __init__(self, build_dir: str, load_config=True, defconfig=DEFAULT_KUNITCONFIG_PATH) -> None:
+ def __init__(self, build_dir: str, load_config=True, kunitconfig_path='') -> None:
signal.signal(signal.SIGINT, self.signal_handler)
self._ops = LinuxSourceTreeOperations()
@@ -131,9 +131,13 @@ class LinuxSourceTree(object):
if not load_config:
return
- kunitconfig_path = get_kunitconfig_path(build_dir)
- if not os.path.exists(kunitconfig_path):
- shutil.copyfile(defconfig, kunitconfig_path)
+ if kunitconfig_path:
+ if not os.path.exists(kunitconfig_path):
+ raise ConfigError(f'Specified kunitconfig ({kunitconfig_path}) does not exist')
+ else:
+ kunitconfig_path = get_kunitconfig_path(build_dir)
+ if not os.path.exists(kunitconfig_path):
+ shutil.copyfile(DEFAULT_KUNITCONFIG_PATH, kunitconfig_path)
self._kconfig = kunit_config.Kconfig()
self._kconfig.read_from_file(kunitconfig_path)
@@ -199,8 +203,12 @@ class LinuxSourceTree(object):
return False
return self.validate_config(build_dir)
- def run_kernel(self, args=[], build_dir='', timeout=None) -> Iterator[str]:
+ def run_kernel(self, args=None, build_dir='', filter_glob='', timeout=None) -> Iterator[str]:
+ if not args:
+ args = []
args.extend(['mem=1G', 'console=tty'])
+ if filter_glob:
+ args.append('kunit.filter_glob='+filter_glob)
self._ops.linux_bin(args, timeout, build_dir)
outfile = get_outfile_path(build_dir)
subprocess.call(['stty', 'sane'])
diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py
index b593f4448e83..1ad3049e9069 100755
--- a/tools/testing/kunit/kunit_tool_test.py
+++ b/tools/testing/kunit/kunit_tool_test.py
@@ -12,6 +12,7 @@ from unittest import mock
import tempfile, shutil # Handling test_tmpdir
import json
+import signal
import os
import kunit_config
@@ -21,16 +22,18 @@ import kunit_json
import kunit
test_tmpdir = ''
+abs_test_data_dir = ''
def setUpModule():
- global test_tmpdir
+ global test_tmpdir, abs_test_data_dir
test_tmpdir = tempfile.mkdtemp()
+ abs_test_data_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), 'test_data'))
def tearDownModule():
shutil.rmtree(test_tmpdir)
-def get_absolute_path(path):
- return os.path.join(os.path.dirname(__file__), path)
+def test_data_path(path):
+ return os.path.join(abs_test_data_dir, path)
class KconfigTest(unittest.TestCase):
@@ -46,8 +49,7 @@ class KconfigTest(unittest.TestCase):
def test_read_from_file(self):
kconfig = kunit_config.Kconfig()
- kconfig_path = get_absolute_path(
- 'test_data/test_read_from_file.kconfig')
+ kconfig_path = test_data_path('test_read_from_file.kconfig')
kconfig.read_from_file(kconfig_path)
@@ -98,21 +100,18 @@ class KUnitParserTest(unittest.TestCase):
str(needle) + '" not found in "' + str(haystack) + '"!')
def test_output_isolated_correctly(self):
- log_path = get_absolute_path(
- 'test_data/test_output_isolated_correctly.log')
- file = open(log_path)
- result = kunit_parser.isolate_kunit_output(file.readlines())
+ log_path = test_data_path('test_output_isolated_correctly.log')
+ with open(log_path) as file:
+ result = kunit_parser.isolate_kunit_output(file.readlines())
self.assertContains('TAP version 14', result)
self.assertContains(' # Subtest: example', result)
self.assertContains(' 1..2', result)
self.assertContains(' ok 1 - example_simple_test', result)
self.assertContains(' ok 2 - example_mock_test', result)
self.assertContains('ok 1 - example', result)
- file.close()
def test_output_with_prefix_isolated_correctly(self):
- log_path = get_absolute_path(
- 'test_data/test_pound_sign.log')
+ log_path = test_data_path('test_pound_sign.log')
with open(log_path) as file:
result = kunit_parser.isolate_kunit_output(file.readlines())
self.assertContains('TAP version 14', result)
@@ -141,61 +140,51 @@ class KUnitParserTest(unittest.TestCase):
self.assertContains('ok 3 - string-stream-test', result)
def test_parse_successful_test_log(self):
- all_passed_log = get_absolute_path(
- 'test_data/test_is_test_passed-all_passed.log')
- file = open(all_passed_log)
- result = kunit_parser.parse_run_tests(file.readlines())
+ all_passed_log = test_data_path('test_is_test_passed-all_passed.log')
+ with open(all_passed_log) as file:
+ result = kunit_parser.parse_run_tests(file.readlines())
self.assertEqual(
kunit_parser.TestStatus.SUCCESS,
result.status)
- file.close()
def test_parse_failed_test_log(self):
- failed_log = get_absolute_path(
- 'test_data/test_is_test_passed-failure.log')
- file = open(failed_log)
- result = kunit_parser.parse_run_tests(file.readlines())
+ failed_log = test_data_path('test_is_test_passed-failure.log')
+ with open(failed_log) as file:
+ result = kunit_parser.parse_run_tests(file.readlines())
self.assertEqual(
kunit_parser.TestStatus.FAILURE,
result.status)
- file.close()
def test_no_tests(self):
- empty_log = get_absolute_path(
- 'test_data/test_is_test_passed-no_tests_run.log')
- file = open(empty_log)
- result = kunit_parser.parse_run_tests(
- kunit_parser.isolate_kunit_output(file.readlines()))
+ empty_log = test_data_path('test_is_test_passed-no_tests_run.log')
+ with open(empty_log) as file:
+ result = kunit_parser.parse_run_tests(
+ kunit_parser.isolate_kunit_output(file.readlines()))
self.assertEqual(0, len(result.suites))
self.assertEqual(
kunit_parser.TestStatus.NO_TESTS,
result.status)
- file.close()
def test_no_kunit_output(self):
- crash_log = get_absolute_path(
- 'test_data/test_insufficient_memory.log')
- file = open(crash_log)
+ crash_log = test_data_path('test_insufficient_memory.log')
print_mock = mock.patch('builtins.print').start()
- result = kunit_parser.parse_run_tests(
- kunit_parser.isolate_kunit_output(file.readlines()))
+ with open(crash_log) as file:
+ result = kunit_parser.parse_run_tests(
+ kunit_parser.isolate_kunit_output(file.readlines()))
print_mock.assert_any_call(StrContains('no tests run!'))
print_mock.stop()
file.close()
def test_crashed_test(self):
- crashed_log = get_absolute_path(
- 'test_data/test_is_test_passed-crash.log')
- file = open(crashed_log)
- result = kunit_parser.parse_run_tests(file.readlines())
+ crashed_log = test_data_path('test_is_test_passed-crash.log')
+ with open(crashed_log) as file:
+ result = kunit_parser.parse_run_tests(file.readlines())
self.assertEqual(
kunit_parser.TestStatus.TEST_CRASHED,
result.status)
- file.close()
def test_ignores_prefix_printk_time(self):
- prefix_log = get_absolute_path(
- 'test_data/test_config_printk_time.log')
+ prefix_log = test_data_path('test_config_printk_time.log')
with open(prefix_log) as file:
result = kunit_parser.parse_run_tests(file.readlines())
self.assertEqual(
@@ -204,8 +193,7 @@ class KUnitParserTest(unittest.TestCase):
self.assertEqual('kunit-resource-test', result.suites[0].name)
def test_ignores_multiple_prefixes(self):
- prefix_log = get_absolute_path(
- 'test_data/test_multiple_prefixes.log')
+ prefix_log = test_data_path('test_multiple_prefixes.log')
with open(prefix_log) as file:
result = kunit_parser.parse_run_tests(file.readlines())
self.assertEqual(
@@ -214,8 +202,7 @@ class KUnitParserTest(unittest.TestCase):
self.assertEqual('kunit-resource-test', result.suites[0].name)
def test_prefix_mixed_kernel_output(self):
- mixed_prefix_log = get_absolute_path(
- 'test_data/test_interrupted_tap_output.log')
+ mixed_prefix_log = test_data_path('test_interrupted_tap_output.log')
with open(mixed_prefix_log) as file:
result = kunit_parser.parse_run_tests(file.readlines())
self.assertEqual(
@@ -224,7 +211,7 @@ class KUnitParserTest(unittest.TestCase):
self.assertEqual('kunit-resource-test', result.suites[0].name)
def test_prefix_poundsign(self):
- pound_log = get_absolute_path('test_data/test_pound_sign.log')
+ pound_log = test_data_path('test_pound_sign.log')
with open(pound_log) as file:
result = kunit_parser.parse_run_tests(file.readlines())
self.assertEqual(
@@ -233,7 +220,7 @@ class KUnitParserTest(unittest.TestCase):
self.assertEqual('kunit-resource-test', result.suites[0].name)
def test_kernel_panic_end(self):
- panic_log = get_absolute_path('test_data/test_kernel_panic_interrupt.log')
+ panic_log = test_data_path('test_kernel_panic_interrupt.log')
with open(panic_log) as file:
result = kunit_parser.parse_run_tests(file.readlines())
self.assertEqual(
@@ -242,7 +229,7 @@ class KUnitParserTest(unittest.TestCase):
self.assertEqual('kunit-resource-test', result.suites[0].name)
def test_pound_no_prefix(self):
- pound_log = get_absolute_path('test_data/test_pound_no_prefix.log')
+ pound_log = test_data_path('test_pound_no_prefix.log')
with open(pound_log) as file:
result = kunit_parser.parse_run_tests(file.readlines())
self.assertEqual(
@@ -250,10 +237,27 @@ class KUnitParserTest(unittest.TestCase):
result.status)
self.assertEqual('kunit-resource-test', result.suites[0].name)
+class LinuxSourceTreeTest(unittest.TestCase):
+
+ def setUp(self):
+ mock.patch.object(signal, 'signal').start()
+ self.addCleanup(mock.patch.stopall)
+
+ def test_invalid_kunitconfig(self):
+ with self.assertRaisesRegex(kunit_kernel.ConfigError, 'nonexistent.* does not exist'):
+ kunit_kernel.LinuxSourceTree('', kunitconfig_path='/nonexistent_file')
+
+ def test_valid_kunitconfig(self):
+ with tempfile.NamedTemporaryFile('wt') as kunitconfig:
+ tree = kunit_kernel.LinuxSourceTree('', kunitconfig_path=kunitconfig.name)
+
+ # TODO: add more test cases.
+
+
class KUnitJsonTest(unittest.TestCase):
def _json_for(self, log_file):
- with(open(get_absolute_path(log_file))) as file:
+ with open(test_data_path(log_file)) as file:
test_result = kunit_parser.parse_run_tests(file)
json_obj = kunit_json.get_json_result(
test_result=test_result,
@@ -263,22 +267,19 @@ class KUnitJsonTest(unittest.TestCase):
return json.loads(json_obj)
def test_failed_test_json(self):
- result = self._json_for(
- 'test_data/test_is_test_passed-failure.log')
+ result = self._json_for('test_is_test_passed-failure.log')
self.assertEqual(
{'name': 'example_simple_test', 'status': 'FAIL'},
result["sub_groups"][1]["test_cases"][0])
def test_crashed_test_json(self):
- result = self._json_for(
- 'test_data/test_is_test_passed-crash.log')
+ result = self._json_for('test_is_test_passed-crash.log')
self.assertEqual(
{'name': 'example_simple_test', 'status': 'ERROR'},
result["sub_groups"][1]["test_cases"][0])
def test_no_tests_json(self):
- result = self._json_for(
- 'test_data/test_is_test_passed-no_tests_run.log')
+ result = self._json_for('test_is_test_passed-no_tests_run.log')
self.assertEqual(0, len(result['sub_groups']))
class StrContains(str):
@@ -287,106 +288,104 @@ class StrContains(str):
class KUnitMainTest(unittest.TestCase):
def setUp(self):
- path = get_absolute_path('test_data/test_is_test_passed-all_passed.log')
- file = open(path)
- all_passed_log = file.readlines()
- self.print_patch = mock.patch('builtins.print')
- self.print_mock = self.print_patch.start()
+ path = test_data_path('test_is_test_passed-all_passed.log')
+ with open(path) as file:
+ all_passed_log = file.readlines()
+
+ self.print_mock = mock.patch('builtins.print').start()
+ self.addCleanup(mock.patch.stopall)
+
self.linux_source_mock = mock.Mock()
self.linux_source_mock.build_reconfig = mock.Mock(return_value=True)
self.linux_source_mock.build_um_kernel = mock.Mock(return_value=True)
self.linux_source_mock.run_kernel = mock.Mock(return_value=all_passed_log)
- def tearDown(self):
- self.print_patch.stop()
- pass
-
def test_config_passes_args_pass(self):
kunit.main(['config', '--build_dir=.kunit'], self.linux_source_mock)
- assert self.linux_source_mock.build_reconfig.call_count == 1
- assert self.linux_source_mock.run_kernel.call_count == 0
+ self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1)
+ self.assertEqual(self.linux_source_mock.run_kernel.call_count, 0)
def test_build_passes_args_pass(self):
kunit.main(['build'], self.linux_source_mock)
- assert self.linux_source_mock.build_reconfig.call_count == 0
+ self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 0)
self.linux_source_mock.build_um_kernel.assert_called_once_with(False, 8, '.kunit', None)
- assert self.linux_source_mock.run_kernel.call_count == 0
+ self.assertEqual(self.linux_source_mock.run_kernel.call_count, 0)
def test_exec_passes_args_pass(self):
kunit.main(['exec'], self.linux_source_mock)
- assert self.linux_source_mock.build_reconfig.call_count == 0
- assert self.linux_source_mock.run_kernel.call_count == 1
- self.linux_source_mock.run_kernel.assert_called_once_with(build_dir='.kunit', timeout=300)
+ self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 0)
+ self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1)
+ self.linux_source_mock.run_kernel.assert_called_once_with(
+ build_dir='.kunit', filter_glob='', timeout=300)
self.print_mock.assert_any_call(StrContains('Testing complete.'))
def test_run_passes_args_pass(self):
kunit.main(['run'], self.linux_source_mock)
- assert self.linux_source_mock.build_reconfig.call_count == 1
- assert self.linux_source_mock.run_kernel.call_count == 1
+ self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1)
+ self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1)
self.linux_source_mock.run_kernel.assert_called_once_with(
- build_dir='.kunit', timeout=300)
+ build_dir='.kunit', filter_glob='', timeout=300)
self.print_mock.assert_any_call(StrContains('Testing complete.'))
def test_exec_passes_args_fail(self):
self.linux_source_mock.run_kernel = mock.Mock(return_value=[])
with self.assertRaises(SystemExit) as e:
kunit.main(['exec'], self.linux_source_mock)
- assert type(e.exception) == SystemExit
- assert e.exception.code == 1
+ self.assertEqual(e.exception.code, 1)
def test_run_passes_args_fail(self):
self.linux_source_mock.run_kernel = mock.Mock(return_value=[])
with self.assertRaises(SystemExit) as e:
kunit.main(['run'], self.linux_source_mock)
- assert type(e.exception) == SystemExit
- assert e.exception.code == 1
- assert self.linux_source_mock.build_reconfig.call_count == 1
- assert self.linux_source_mock.run_kernel.call_count == 1
+ self.assertEqual(e.exception.code, 1)
+ self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1)
+ self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1)
self.print_mock.assert_any_call(StrContains(' 0 tests run'))
def test_exec_raw_output(self):
self.linux_source_mock.run_kernel = mock.Mock(return_value=[])
kunit.main(['exec', '--raw_output'], self.linux_source_mock)
- assert self.linux_source_mock.run_kernel.call_count == 1
- for kall in self.print_mock.call_args_list:
- assert kall != mock.call(StrContains('Testing complete.'))
- assert kall != mock.call(StrContains(' 0 tests run'))
+ self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1)
+ for call in self.print_mock.call_args_list:
+ self.assertNotEqual(call, mock.call(StrContains('Testing complete.')))
+ self.assertNotEqual(call, mock.call(StrContains(' 0 tests run')))
def test_run_raw_output(self):
self.linux_source_mock.run_kernel = mock.Mock(return_value=[])
kunit.main(['run', '--raw_output'], self.linux_source_mock)
- assert self.linux_source_mock.build_reconfig.call_count == 1
- assert self.linux_source_mock.run_kernel.call_count == 1
- for kall in self.print_mock.call_args_list:
- assert kall != mock.call(StrContains('Testing complete.'))
- assert kall != mock.call(StrContains(' 0 tests run'))
+ self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1)
+ self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1)
+ for call in self.print_mock.call_args_list:
+ self.assertNotEqual(call, mock.call(StrContains('Testing complete.')))
+ self.assertNotEqual(call, mock.call(StrContains(' 0 tests run')))
def test_exec_timeout(self):
timeout = 3453
kunit.main(['exec', '--timeout', str(timeout)], self.linux_source_mock)
- self.linux_source_mock.run_kernel.assert_called_once_with(build_dir='.kunit', timeout=timeout)
+ self.linux_source_mock.run_kernel.assert_called_once_with(
+ build_dir='.kunit', filter_glob='', timeout=timeout)
self.print_mock.assert_any_call(StrContains('Testing complete.'))
def test_run_timeout(self):
timeout = 3453
kunit.main(['run', '--timeout', str(timeout)], self.linux_source_mock)
- assert self.linux_source_mock.build_reconfig.call_count == 1
+ self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1)
self.linux_source_mock.run_kernel.assert_called_once_with(
- build_dir='.kunit', timeout=timeout)
+ build_dir='.kunit', filter_glob='', timeout=timeout)
self.print_mock.assert_any_call(StrContains('Testing complete.'))
def test_run_builddir(self):
build_dir = '.kunit'
kunit.main(['run', '--build_dir=.kunit'], self.linux_source_mock)
- assert self.linux_source_mock.build_reconfig.call_count == 1
+ self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1)
self.linux_source_mock.run_kernel.assert_called_once_with(
- build_dir=build_dir, timeout=300)
+ build_dir=build_dir, filter_glob='', timeout=300)
self.print_mock.assert_any_call(StrContains('Testing complete.'))
def test_config_builddir(self):
build_dir = '.kunit'
kunit.main(['config', '--build_dir', build_dir], self.linux_source_mock)
- assert self.linux_source_mock.build_reconfig.call_count == 1
+ self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1)
def test_build_builddir(self):
build_dir = '.kunit'
@@ -396,8 +395,23 @@ class KUnitMainTest(unittest.TestCase):
def test_exec_builddir(self):
build_dir = '.kunit'
kunit.main(['exec', '--build_dir', build_dir], self.linux_source_mock)
- self.linux_source_mock.run_kernel.assert_called_once_with(build_dir=build_dir, timeout=300)
+ self.linux_source_mock.run_kernel.assert_called_once_with(
+ build_dir=build_dir, filter_glob='', timeout=300)
self.print_mock.assert_any_call(StrContains('Testing complete.'))
+ @mock.patch.object(kunit_kernel, 'LinuxSourceTree')
+ def test_run_kunitconfig(self, mock_linux_init):
+ mock_linux_init.return_value = self.linux_source_mock
+ kunit.main(['run', '--kunitconfig=mykunitconfig'])
+ # Just verify that we parsed and initialized it correctly here.
+ mock_linux_init.assert_called_once_with('.kunit', kunitconfig_path='mykunitconfig')
+
+ @mock.patch.object(kunit_kernel, 'LinuxSourceTree')
+ def test_config_kunitconfig(self, mock_linux_init):
+ mock_linux_init.return_value = self.linux_source_mock
+ kunit.main(['config', '--kunitconfig=mykunitconfig'])
+ # Just verify that we parsed and initialized it correctly here.
+ mock_linux_init.assert_called_once_with('.kunit', kunitconfig_path='mykunitconfig')
+
if __name__ == '__main__':
unittest.main()
diff --git a/tools/testing/scatterlist/main.c b/tools/testing/scatterlist/main.c
index 71c960dcd8a4..652254754b4c 100644
--- a/tools/testing/scatterlist/main.c
+++ b/tools/testing/scatterlist/main.c
@@ -55,7 +55,6 @@ int main(void)
struct test *test, tests[] = {
{ -EINVAL, 1, pfn(0), NULL, PAGE_SIZE, 0, 1 },
{ 0, 1, pfn(0), NULL, PAGE_SIZE, PAGE_SIZE + 1, 1 },
- { 0, 1, pfn(0), NULL, PAGE_SIZE, sgmax + 1, 1 },
{ 0, 1, pfn(0), NULL, PAGE_SIZE, sgmax, 1 },
{ 0, 1, pfn(0), NULL, 1, sgmax, 1 },
{ 0, 2, pfn(0, 1), NULL, 2 * PAGE_SIZE, sgmax, 1 },
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index c42aacec5038..6c575cf34a71 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -33,6 +33,7 @@ TARGETS += memfd
TARGETS += memory-hotplug
TARGETS += mincore
TARGETS += mount
+TARGETS += mount_setattr
TARGETS += mqueue
TARGETS += nci
TARGETS += net
@@ -126,15 +127,6 @@ ARCH ?= $(SUBARCH)
export KSFT_KHDR_INSTALL_DONE := 1
export BUILD
-# build and run gpio when output directory is the src dir.
-# gpio has dependency on tools/gpio and builds tools/gpio
-# objects in the src directory in all cases making the src
-# repo dirty even when objects are relocated.
-ifneq (1,$(DEFAULT_INSTALL_HDR_PATH))
- TMP := $(filter-out gpio, $(TARGETS))
- TARGETS := $(TMP)
-endif
-
# set default goal to all, so make without a target runs all, even when
# all isn't the first target in the file.
.DEFAULT_GOAL := all
diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c
index b2282be6f938..612d3899614a 100644
--- a/tools/testing/selftests/arm64/fp/sve-ptrace.c
+++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c
@@ -332,5 +332,5 @@ int main(void)
ksft_print_cnts();
- return 0;
+ return ret;
}
diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S
index 9210691aa998..e3e08d9c7020 100644
--- a/tools/testing/selftests/arm64/fp/sve-test.S
+++ b/tools/testing/selftests/arm64/fp/sve-test.S
@@ -284,16 +284,28 @@ endfunction
// Set up test pattern in the FFR
// x0: pid
// x2: generation
+//
+// We need to generate a canonical FFR value, which consists of a number of
+// low "1" bits, followed by a number of zeros. This gives us 17 unique values
+// per 16 bits of FFR, so we create a 4 bit signature out of the PID and
+// generation, and use that as the initial number of ones in the pattern.
+// We fill the upper lanes of FFR with zeros.
// Beware: corrupts P0.
function setup_ffr
mov x4, x30
- bl pattern
+ and w0, w0, #0x3
+ bfi w0, w2, #2, #2
+ mov w1, #1
+ lsl w1, w1, w0
+ sub w1, w1, #1
+
ldr x0, =ffrref
- ldr x1, =scratch
- rdvl x2, #1
- lsr x2, x2, #3
- bl memcpy
+ strh w1, [x0], 2
+ rdvl x1, #1
+ lsr x1, x1, #3
+ sub x1, x1, #2
+ bl memclr
mov x0, #0
ldr x1, =ffrref
diff --git a/tools/testing/selftests/arm64/mte/check_buffer_fill.c b/tools/testing/selftests/arm64/mte/check_buffer_fill.c
index c9fa141ebdcc..75fc482d63b6 100644
--- a/tools/testing/selftests/arm64/mte/check_buffer_fill.c
+++ b/tools/testing/selftests/arm64/mte/check_buffer_fill.c
@@ -81,7 +81,7 @@ static int check_buffer_underflow_by_byte(int mem_type, int mode,
last_index = 0;
/* Set some value in tagged memory and make the buffer underflow */
for (j = sizes[i] - 1; (j >= -underflow_range) &&
- (cur_mte_cxt.fault_valid == false); j--) {
+ (!cur_mte_cxt.fault_valid); j--) {
ptr[j] = '1';
last_index = j;
}
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index c0c48fdb9ac1..4866f6a21901 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -1,4 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
+bpf-helpers*
+bpf-syscall*
test_verifier
test_maps
test_lru_map
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 044bfdcf5b74..6448c626498f 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -21,7 +21,7 @@ endif
BPF_GCC ?= $(shell command -v bpf-gcc;)
SAN_CFLAGS ?=
-CFLAGS += -g -rdynamic -Wall -O2 $(GENFLAGS) $(SAN_CFLAGS) \
+CFLAGS += -g -Og -rdynamic -Wall $(GENFLAGS) $(SAN_CFLAGS) \
-I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) \
-I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT) \
-Dbpf_prog_load=bpf_prog_test_load \
@@ -68,6 +68,7 @@ TEST_PROGS := test_kmod.sh \
test_bpftool_build.sh \
test_bpftool.sh \
test_bpftool_metadata.sh \
+ test_doc_build.sh \
test_xsk.sh
TEST_PROGS_EXTENDED := with_addr.sh \
@@ -103,6 +104,7 @@ override define CLEAN
$(call msg,CLEAN)
$(Q)$(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN)
$(Q)$(MAKE) -C bpf_testmod clean
+ $(Q)$(MAKE) docs-clean
endef
include ../lib.mk
@@ -180,6 +182,7 @@ $(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL)
cp $(SCRATCH_DIR)/runqslower $@
$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/test_stub.o $(BPFOBJ)
+$(TEST_GEN_FILES): docs
$(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
$(OUTPUT)/test_skb_cgroup_id_user: cgroup_helpers.c
@@ -198,18 +201,25 @@ $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \
$(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/bpftool
$(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \
CC=$(HOSTCC) LD=$(HOSTLD) \
+ EXTRA_CFLAGS='-g -Og' \
OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \
prefix= DESTDIR=$(HOST_SCRATCH_DIR)/ install
- $(Q)mkdir -p $(BUILD_DIR)/bpftool/Documentation
- $(Q)RST2MAN_OPTS="--exit-status=1" $(MAKE) $(submake_extras) \
- -C $(BPFTOOLDIR)/Documentation \
- OUTPUT=$(BUILD_DIR)/bpftool/Documentation/ \
- prefix= DESTDIR=$(SCRATCH_DIR)/ install
+
+docs:
+ $(Q)RST2MAN_OPTS="--exit-status=1" $(MAKE) $(submake_extras) \
+ -f Makefile.docs \
+ prefix= OUTPUT=$(OUTPUT)/ DESTDIR=$(OUTPUT)/ $@
+
+docs-clean:
+ $(Q)$(MAKE) $(submake_extras) \
+ -f Makefile.docs \
+ prefix= OUTPUT=$(OUTPUT)/ DESTDIR=$(OUTPUT)/ $@
$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
../../../include/uapi/linux/bpf.h \
| $(INCLUDE_DIR) $(BUILD_DIR)/libbpf
$(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \
+ EXTRA_CFLAGS='-g -Og' \
DESTDIR=$(SCRATCH_DIR) prefix= all install_headers
ifneq ($(BPFOBJ),$(HOST_BPFOBJ))
@@ -217,11 +227,12 @@ $(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
../../../include/uapi/linux/bpf.h \
| $(INCLUDE_DIR) $(HOST_BUILD_DIR)/libbpf
$(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) \
- OUTPUT=$(HOST_BUILD_DIR)/libbpf/ CC=$(HOSTCC) LD=$(HOSTLD) \
+ EXTRA_CFLAGS='-g -Og' \
+ OUTPUT=$(HOST_BUILD_DIR)/libbpf/ CC=$(HOSTCC) LD=$(HOSTLD) \
DESTDIR=$(HOST_SCRATCH_DIR)/ prefix= all install_headers
endif
-$(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) | $(BPFTOOL) $(INCLUDE_DIR)
+$(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL) | $(INCLUDE_DIR)
ifeq ($(VMLINUX_H),)
$(call msg,GEN,,$@)
$(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@
@@ -292,6 +303,10 @@ endef
SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c
+LINKED_SKELS := test_static_linked.skel.h
+
+test_static_linked.skel.h-deps := test_static_linked1.o test_static_linked2.o
+
# Set up extra TRUNNER_XXX "temporary" variables in the environment (relies on
# $eval()) and pass control to DEFINE_TEST_RUNNER_RULES.
# Parameters:
@@ -312,6 +327,7 @@ TRUNNER_BPF_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, $$(TRUNNER_BPF_SRCS)
TRUNNER_BPF_SKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.skel.h, \
$$(filter-out $(SKEL_BLACKLIST), \
$$(TRUNNER_BPF_SRCS)))
+TRUNNER_BPF_SKELS_LINKED := $$(addprefix $$(TRUNNER_OUTPUT)/,$(LINKED_SKELS))
TEST_GEN_FILES += $$(TRUNNER_BPF_OBJS)
# Evaluate rules now with extra TRUNNER_XXX variables above already defined
@@ -344,11 +360,22 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o: \
$$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \
$(TRUNNER_BPF_CFLAGS))
-$(TRUNNER_BPF_SKELS): $(TRUNNER_OUTPUT)/%.skel.h: \
- $(TRUNNER_OUTPUT)/%.o \
- | $(BPFTOOL) $(TRUNNER_OUTPUT)
+$(TRUNNER_BPF_SKELS): %.skel.h: %.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
+ $$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@)
+ $(Q)$$(BPFTOOL) gen object $$(<:.o=.linked1.o) $$<
+ $(Q)$$(BPFTOOL) gen object $$(<:.o=.linked2.o) $$(<:.o=.linked1.o)
+ $(Q)$$(BPFTOOL) gen object $$(<:.o=.linked3.o) $$(<:.o=.linked2.o)
+ $(Q)diff $$(<:.o=.linked2.o) $$(<:.o=.linked3.o)
+ $(Q)$$(BPFTOOL) gen skeleton $$(<:.o=.linked3.o) name $$(notdir $$(<:.o=)) > $$@
+
+$(TRUNNER_BPF_SKELS_LINKED): $(TRUNNER_BPF_OBJS) $(BPFTOOL) | $(TRUNNER_OUTPUT)
+ $$(call msg,LINK-BPF,$(TRUNNER_BINARY),$$(@:.skel.h=.o))
+ $(Q)$$(BPFTOOL) gen object $$(@:.skel.h=.linked1.o) $$(addprefix $(TRUNNER_OUTPUT)/,$$($$(@F)-deps))
+ $(Q)$$(BPFTOOL) gen object $$(@:.skel.h=.linked2.o) $$(@:.skel.h=.linked1.o)
+ $(Q)$$(BPFTOOL) gen object $$(@:.skel.h=.linked3.o) $$(@:.skel.h=.linked2.o)
+ $(Q)diff $$(@:.skel.h=.linked2.o) $$(@:.skel.h=.linked3.o)
$$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@)
- $(Q)$$(BPFTOOL) gen skeleton $$< > $$@
+ $(Q)$$(BPFTOOL) gen skeleton $$(@:.skel.h=.linked3.o) name $$(notdir $$(@:.skel.h=)) > $$@
endif
# ensure we set up tests.h header generation rule just once
@@ -370,6 +397,7 @@ $(TRUNNER_TEST_OBJS): $(TRUNNER_OUTPUT)/%.test.o: \
$(TRUNNER_EXTRA_HDRS) \
$(TRUNNER_BPF_OBJS) \
$(TRUNNER_BPF_SKELS) \
+ $(TRUNNER_BPF_SKELS_LINKED) \
$$(BPFOBJ) | $(TRUNNER_OUTPUT)
$$(call msg,TEST-OBJ,$(TRUNNER_BINARY),$$@)
$(Q)cd $$(@D) && $$(CC) -I. $$(CFLAGS) -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F)
@@ -382,11 +410,12 @@ $(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \
$$(call msg,EXT-OBJ,$(TRUNNER_BINARY),$$@)
$(Q)$$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@
-# only copy extra resources if in flavored build
+# non-flavored in-srctree builds receive special treatment, in particular, we
+# do not need to copy extra resources (see e.g. test_btf_dump_case())
$(TRUNNER_BINARY)-extras: $(TRUNNER_EXTRA_FILES) | $(TRUNNER_OUTPUT)
-ifneq ($2,)
+ifneq ($2:$(OUTPUT),:$(shell pwd))
$$(call msg,EXT-COPY,$(TRUNNER_BINARY),$(TRUNNER_EXTRA_FILES))
- $(Q)cp -a $$^ $(TRUNNER_OUTPUT)/
+ $(Q)rsync -aq $$^ $(TRUNNER_OUTPUT)/
endif
$(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \
@@ -476,3 +505,5 @@ EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \
prog_tests/tests.h map_tests/tests.h verifier/tests.h \
feature \
$(addprefix $(OUTPUT)/,*.o *.skel.h no_alu32 bpf_gcc bpf_testmod.ko)
+
+.PHONY: docs docs-clean
diff --git a/tools/testing/selftests/bpf/Makefile.docs b/tools/testing/selftests/bpf/Makefile.docs
new file mode 100644
index 000000000000..ccf260021e83
--- /dev/null
+++ b/tools/testing/selftests/bpf/Makefile.docs
@@ -0,0 +1,82 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+include ../../../scripts/Makefile.include
+include ../../../scripts/utilities.mak
+
+INSTALL ?= install
+RM ?= rm -f
+RMDIR ?= rmdir --ignore-fail-on-non-empty
+
+ifeq ($(V),1)
+ Q =
+else
+ Q = @
+endif
+
+prefix ?= /usr/local
+mandir ?= $(prefix)/man
+man2dir = $(mandir)/man2
+man7dir = $(mandir)/man7
+
+SYSCALL_RST = bpf-syscall.rst
+MAN2_RST = $(SYSCALL_RST)
+
+HELPERS_RST = bpf-helpers.rst
+MAN7_RST = $(HELPERS_RST)
+
+_DOC_MAN2 = $(patsubst %.rst,%.2,$(MAN2_RST))
+DOC_MAN2 = $(addprefix $(OUTPUT),$(_DOC_MAN2))
+
+_DOC_MAN7 = $(patsubst %.rst,%.7,$(MAN7_RST))
+DOC_MAN7 = $(addprefix $(OUTPUT),$(_DOC_MAN7))
+
+DOCTARGETS := helpers syscall
+
+docs: $(DOCTARGETS)
+syscall: man2
+helpers: man7
+man2: $(DOC_MAN2)
+man7: $(DOC_MAN7)
+
+RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null)
+
+# Configure make rules for the man page bpf-$1.$2.
+# $1 - target for scripts/bpf_doc.py
+# $2 - man page section to generate the troff file
+define DOCS_RULES =
+$(OUTPUT)bpf-$1.rst: ../../../../include/uapi/linux/bpf.h
+ $$(QUIET_GEN)../../../../scripts/bpf_doc.py $1 \
+ --filename $$< > $$@
+
+$(OUTPUT)%.$2: $(OUTPUT)%.rst
+ifndef RST2MAN_DEP
+ $$(error "rst2man not found, but required to generate man pages")
+endif
+ $$(QUIET_GEN)rst2man $$< > $$@
+
+docs-clean-$1:
+ $$(call QUIET_CLEAN, eBPF_$1-manpage)
+ $(Q)$(RM) $$(DOC_MAN$2) $(OUTPUT)bpf-$1.rst
+
+docs-install-$1: docs
+ $$(call QUIET_INSTALL, eBPF_$1-manpage)
+ $(Q)$(INSTALL) -d -m 755 $(DESTDIR)$$(man$2dir)
+ $(Q)$(INSTALL) -m 644 $$(DOC_MAN$2) $(DESTDIR)$$(man$2dir)
+
+docs-uninstall-$1:
+ $$(call QUIET_UNINST, eBPF_$1-manpage)
+ $(Q)$(RM) $$(addprefix $(DESTDIR)$$(man$2dir)/,$$(_DOC_MAN$2))
+ $(Q)$(RMDIR) $(DESTDIR)$$(man$2dir)
+
+.PHONY: $1 docs-clean-$1 docs-install-$1 docs-uninstall-$1
+endef
+
+# Create the make targets to generate manual pages by name and section
+$(eval $(call DOCS_RULES,helpers,7))
+$(eval $(call DOCS_RULES,syscall,2))
+
+docs-clean: $(foreach doctarget,$(DOCTARGETS), docs-clean-$(doctarget))
+docs-install: $(foreach doctarget,$(DOCTARGETS), docs-install-$(doctarget))
+docs-uninstall: $(foreach doctarget,$(DOCTARGETS), docs-uninstall-$(doctarget))
+
+.PHONY: docs docs-clean docs-install docs-uninstall man2 man7
diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst
index fd148b8410fa..65fe318d1e71 100644
--- a/tools/testing/selftests/bpf/README.rst
+++ b/tools/testing/selftests/bpf/README.rst
@@ -111,6 +111,45 @@ available in 10.0.1. The patch is available in llvm 11.0.0 trunk.
__ https://reviews.llvm.org/D78466
+bpf_verif_scale/loop6.o test failure with Clang 12
+==================================================
+
+With Clang 12, the following bpf_verif_scale test failed:
+ * ``bpf_verif_scale/loop6.o``
+
+The verifier output looks like
+
+.. code-block:: c
+
+ R1 type=ctx expected=fp
+ The sequence of 8193 jumps is too complex.
+
+The reason is compiler generating the following code
+
+.. code-block:: c
+
+ ; for (i = 0; (i < VIRTIO_MAX_SGS) && (i < num); i++) {
+ 14: 16 05 40 00 00 00 00 00 if w5 == 0 goto +64 <LBB0_6>
+ 15: bc 51 00 00 00 00 00 00 w1 = w5
+ 16: 04 01 00 00 ff ff ff ff w1 += -1
+ 17: 67 05 00 00 20 00 00 00 r5 <<= 32
+ 18: 77 05 00 00 20 00 00 00 r5 >>= 32
+ 19: a6 01 01 00 05 00 00 00 if w1 < 5 goto +1 <LBB0_4>
+ 20: b7 05 00 00 06 00 00 00 r5 = 6
+ 00000000000000a8 <LBB0_4>:
+ 21: b7 02 00 00 00 00 00 00 r2 = 0
+ 22: b7 01 00 00 00 00 00 00 r1 = 0
+ ; for (i = 0; (i < VIRTIO_MAX_SGS) && (i < num); i++) {
+ 23: 7b 1a e0 ff 00 00 00 00 *(u64 *)(r10 - 32) = r1
+ 24: 7b 5a c0 ff 00 00 00 00 *(u64 *)(r10 - 64) = r5
+
+Note that insn #15 has w1 = w5 and w1 is refined later but
+r5(w5) is eventually saved on stack at insn #24 for later use.
+This cause later verifier failure. The bug has been `fixed`__ in
+Clang 13.
+
+__ https://reviews.llvm.org/D97479
+
BPF CO-RE-based tests and Clang version
=======================================
@@ -131,3 +170,26 @@ failures:
.. _2: https://reviews.llvm.org/D85174
.. _3: https://reviews.llvm.org/D83878
.. _4: https://reviews.llvm.org/D83242
+
+Floating-point tests and Clang version
+======================================
+
+Certain selftests, e.g. core_reloc, require support for the floating-point
+types, which was introduced in `Clang 13`__. The older Clang versions will
+either crash when compiling these tests, or generate an incorrect BTF.
+
+__ https://reviews.llvm.org/D83289
+
+Kernel function call test and Clang version
+===========================================
+
+Some selftests (e.g. kfunc_call and bpf_tcp_ca) require a LLVM support
+to generate extern function in BTF. It was introduced in `Clang 13`__.
+
+Without it, the error from compiling bpf selftests looks like:
+
+.. code-block:: console
+
+ libbpf: failed to find BTF for extern 'tcp_slow_start' [25] section: -2
+
+__ https://reviews.llvm.org/D93563
diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
index 91f0fac632f4..029589c008c9 100644
--- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
@@ -187,16 +187,6 @@ struct tcp_congestion_ops {
typeof(y) __y = (y); \
__x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); })
-static __always_inline __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked)
-{
- __u32 cwnd = min(tp->snd_cwnd + acked, tp->snd_ssthresh);
-
- acked -= cwnd - tp->snd_cwnd;
- tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp);
-
- return acked;
-}
-
static __always_inline bool tcp_in_slow_start(const struct tcp_sock *tp)
{
return tp->snd_cwnd < tp->snd_ssthresh;
@@ -213,22 +203,7 @@ static __always_inline bool tcp_is_cwnd_limited(const struct sock *sk)
return !!BPF_CORE_READ_BITFIELD(tp, is_cwnd_limited);
}
-static __always_inline void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked)
-{
- /* If credits accumulated at a higher w, apply them gently now. */
- if (tp->snd_cwnd_cnt >= w) {
- tp->snd_cwnd_cnt = 0;
- tp->snd_cwnd++;
- }
-
- tp->snd_cwnd_cnt += acked;
- if (tp->snd_cwnd_cnt >= w) {
- __u32 delta = tp->snd_cwnd_cnt / w;
-
- tp->snd_cwnd_cnt -= delta * w;
- tp->snd_cwnd += delta;
- }
- tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_cwnd_clamp);
-}
+extern __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked) __ksym;
+extern void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked) __ksym;
#endif
diff --git a/tools/testing/selftests/bpf/btf_helpers.c b/tools/testing/selftests/bpf/btf_helpers.c
index 48f90490f922..b692e6ead9b5 100644
--- a/tools/testing/selftests/bpf/btf_helpers.c
+++ b/tools/testing/selftests/bpf/btf_helpers.c
@@ -23,6 +23,7 @@ static const char * const btf_kind_str_mapping[] = {
[BTF_KIND_FUNC_PROTO] = "FUNC_PROTO",
[BTF_KIND_VAR] = "VAR",
[BTF_KIND_DATASEC] = "DATASEC",
+ [BTF_KIND_FLOAT] = "FLOAT",
};
static const char *btf_kind_str(__u16 kind)
@@ -173,6 +174,9 @@ int fprintf_btf_type_raw(FILE *out, const struct btf *btf, __u32 id)
}
break;
}
+ case BTF_KIND_FLOAT:
+ fprintf(out, " size=%u", t->size);
+ break;
default:
break;
}
diff --git a/tools/testing/selftests/bpf/get_cgroup_id_user.c b/tools/testing/selftests/bpf/get_cgroup_id_user.c
index b8d6aef99db4..99628e1a1e58 100644
--- a/tools/testing/selftests/bpf/get_cgroup_id_user.c
+++ b/tools/testing/selftests/bpf/get_cgroup_id_user.c
@@ -57,6 +57,10 @@ int main(int argc, char **argv)
__u32 key = 0, pid;
int exit_code = 1;
char buf[256];
+ const struct timespec req = {
+ .tv_sec = 1,
+ .tv_nsec = 0,
+ };
cgroup_fd = cgroup_setup_and_join(TEST_CGROUP);
if (CHECK(cgroup_fd < 0, "cgroup_setup_and_join", "err %d errno %d\n", cgroup_fd, errno))
@@ -115,7 +119,7 @@ int main(int argc, char **argv)
goto close_pmu;
/* trigger some syscalls */
- sleep(1);
+ syscall(__NR_nanosleep, &req, NULL);
err = bpf_map_lookup_elem(cgidmap_fd, &key, &kcgid);
if (CHECK(err, "bpf_map_lookup_elem", "err %d errno %d\n", err, errno))
diff --git a/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c
index f0a64d8ac59a..e42ea1195d18 100644
--- a/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c
+++ b/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c
@@ -55,7 +55,6 @@ void test_array_map_batch_ops(void)
int map_fd, *keys, *values, *visited;
__u32 count, total, total_success;
const __u32 max_entries = 10;
- bool nospace_err;
__u64 batch = 0;
int err, step;
DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
@@ -90,7 +89,6 @@ void test_array_map_batch_ops(void)
* elements each.
*/
count = step;
- nospace_err = false;
while (true) {
err = bpf_map_lookup_batch(map_fd,
total ? &batch : NULL, &batch,
@@ -107,9 +105,6 @@ void test_array_map_batch_ops(void)
}
- if (nospace_err == true)
- continue;
-
CHECK(total != max_entries, "lookup with steps",
"total = %u, max_entries = %u\n", total, max_entries);
diff --git a/tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c
new file mode 100644
index 000000000000..2e986e5e4cac
--- /dev/null
+++ b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <arpa/inet.h>
+#include <linux/bpf.h>
+#include <netinet/in.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include <test_maps.h>
+
+struct test_lpm_key {
+ __u32 prefix;
+ struct in_addr ipv4;
+};
+
+static void map_batch_update(int map_fd, __u32 max_entries,
+ struct test_lpm_key *keys, int *values)
+{
+ __u32 i;
+ int err;
+ char buff[16] = { 0 };
+ DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
+ .elem_flags = 0,
+ .flags = 0,
+ );
+
+ for (i = 0; i < max_entries; i++) {
+ keys[i].prefix = 32;
+ snprintf(buff, 16, "192.168.1.%d", i + 1);
+ inet_pton(AF_INET, buff, &keys[i].ipv4);
+ values[i] = i + 1;
+ }
+
+ err = bpf_map_update_batch(map_fd, keys, values, &max_entries, &opts);
+ CHECK(err, "bpf_map_update_batch()", "error:%s\n", strerror(errno));
+}
+
+static void map_batch_verify(int *visited, __u32 max_entries,
+ struct test_lpm_key *keys, int *values)
+{
+ char buff[16] = { 0 };
+ int lower_byte = 0;
+ __u32 i;
+
+ memset(visited, 0, max_entries * sizeof(*visited));
+ for (i = 0; i < max_entries; i++) {
+ inet_ntop(AF_INET, &keys[i].ipv4, buff, 32);
+ CHECK(sscanf(buff, "192.168.1.%d", &lower_byte) == EOF,
+ "sscanf()", "error: i %d\n", i);
+ CHECK(lower_byte != values[i], "key/value checking",
+ "error: i %d key %s value %d\n", i, buff, values[i]);
+ visited[i] = 1;
+ }
+ for (i = 0; i < max_entries; i++) {
+ CHECK(visited[i] != 1, "visited checking",
+ "error: keys array at index %d missing\n", i);
+ }
+}
+
+void test_lpm_trie_map_batch_ops(void)
+{
+ struct bpf_create_map_attr xattr = {
+ .name = "lpm_trie_map",
+ .map_type = BPF_MAP_TYPE_LPM_TRIE,
+ .key_size = sizeof(struct test_lpm_key),
+ .value_size = sizeof(int),
+ .map_flags = BPF_F_NO_PREALLOC,
+ };
+ struct test_lpm_key *keys, key;
+ int map_fd, *values, *visited;
+ __u32 step, count, total, total_success;
+ const __u32 max_entries = 10;
+ __u64 batch = 0;
+ int err;
+ DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
+ .elem_flags = 0,
+ .flags = 0,
+ );
+
+ xattr.max_entries = max_entries;
+ map_fd = bpf_create_map_xattr(&xattr);
+ CHECK(map_fd == -1, "bpf_create_map_xattr()", "error:%s\n",
+ strerror(errno));
+
+ keys = malloc(max_entries * sizeof(struct test_lpm_key));
+ values = malloc(max_entries * sizeof(int));
+ visited = malloc(max_entries * sizeof(int));
+ CHECK(!keys || !values || !visited, "malloc()", "error:%s\n",
+ strerror(errno));
+
+ total_success = 0;
+ for (step = 1; step < max_entries; step++) {
+ map_batch_update(map_fd, max_entries, keys, values);
+ map_batch_verify(visited, max_entries, keys, values);
+ memset(keys, 0, max_entries * sizeof(*keys));
+ memset(values, 0, max_entries * sizeof(*values));
+ batch = 0;
+ total = 0;
+ /* iteratively lookup/delete elements with 'step'
+ * elements each.
+ */
+ count = step;
+ while (true) {
+ err = bpf_map_lookup_batch(map_fd,
+ total ? &batch : NULL, &batch,
+ keys + total, values + total, &count, &opts);
+
+ CHECK((err && errno != ENOENT), "lookup with steps",
+ "error: %s\n", strerror(errno));
+
+ total += count;
+ if (err)
+ break;
+ }
+
+ CHECK(total != max_entries, "lookup with steps",
+ "total = %u, max_entries = %u\n", total, max_entries);
+
+ map_batch_verify(visited, max_entries, keys, values);
+
+ total = 0;
+ count = step;
+ while (total < max_entries) {
+ if (max_entries - total < step)
+ count = max_entries - total;
+ err = bpf_map_delete_batch(map_fd, keys + total, &count,
+ &opts);
+ CHECK((err && errno != ENOENT), "delete batch",
+ "error: %s\n", strerror(errno));
+ total += count;
+ if (err)
+ break;
+ }
+ CHECK(total != max_entries, "delete with steps",
+ "total = %u, max_entries = %u\n", total, max_entries);
+
+ /* check map is empty, errono == ENOENT */
+ err = bpf_map_get_next_key(map_fd, NULL, &key);
+ CHECK(!err || errno != ENOENT, "bpf_map_get_next_key()",
+ "error: %s\n", strerror(errno));
+
+ total_success++;
+ }
+
+ CHECK(total_success == 0, "check total_success",
+ "unexpected failure\n");
+
+ printf("%s:PASS\n", __func__);
+
+ free(keys);
+ free(values);
+ free(visited);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
index a0ee87c8e1ea..9dc4e3dfbcf3 100644
--- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c
+++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
@@ -2,6 +2,44 @@
#include <test_progs.h>
#include "test_attach_probe.skel.h"
+#if defined(__powerpc64__) && defined(_CALL_ELF) && _CALL_ELF == 2
+
+#define OP_RT_RA_MASK 0xffff0000UL
+#define LIS_R2 0x3c400000UL
+#define ADDIS_R2_R12 0x3c4c0000UL
+#define ADDI_R2_R2 0x38420000UL
+
+static ssize_t get_offset(ssize_t addr, ssize_t base)
+{
+ u32 *insn = (u32 *) addr;
+
+ /*
+ * A PPC64 ABIv2 function may have a local and a global entry
+ * point. We need to use the local entry point when patching
+ * functions, so identify and step over the global entry point
+ * sequence.
+ *
+ * The global entry point sequence is always of the form:
+ *
+ * addis r2,r12,XXXX
+ * addi r2,r2,XXXX
+ *
+ * A linker optimisation may convert the addis to lis:
+ *
+ * lis r2,XXXX
+ * addi r2,r2,XXXX
+ */
+ if ((((*insn & OP_RT_RA_MASK) == ADDIS_R2_R12) ||
+ ((*insn & OP_RT_RA_MASK) == LIS_R2)) &&
+ ((*(insn + 1) & OP_RT_RA_MASK) == ADDI_R2_R2))
+ return (ssize_t)(insn + 2) - base;
+ else
+ return addr - base;
+}
+#else
+#define get_offset(addr, base) (addr - base)
+#endif
+
ssize_t get_base_addr() {
size_t start, offset;
char buf[256];
@@ -36,7 +74,7 @@ void test_attach_probe(void)
if (CHECK(base_addr < 0, "get_base_addr",
"failed to find base addr: %zd", base_addr))
return;
- uprobe_offset = (size_t)&get_base_addr - base_addr;
+ uprobe_offset = get_offset((size_t)&get_base_addr, base_addr);
skel = test_attach_probe__open_and_load();
if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
index e698ee6bb6c2..3d002c245d2b 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
@@ -76,6 +76,7 @@ void test_bpf_verif_scale(void)
{ "loop2.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
{ "loop4.o", BPF_PROG_TYPE_SCHED_CLS },
{ "loop5.o", BPF_PROG_TYPE_SCHED_CLS },
+ { "loop6.o", BPF_PROG_TYPE_KPROBE },
/* partial unroll. 19k insn in a loop.
* Total program size 20.8k insn.
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index 6a7ee7420701..0457ae32b270 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -1903,7 +1903,7 @@ static struct btf_raw_test raw_tests[] = {
.raw_types = {
/* int */ /* [1] */
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
- BTF_TYPE_ENC(0, 0x10000000, 4),
+ BTF_TYPE_ENC(0, 0x20000000, 4),
BTF_END_RAW,
},
.str_sec = "",
@@ -3531,6 +3531,136 @@ static struct btf_raw_test raw_tests[] = {
.max_entries = 1,
},
+{
+ .descr = "float test #1, well-formed",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+ /* [1] */
+ BTF_TYPE_FLOAT_ENC(NAME_TBD, 2), /* [2] */
+ BTF_TYPE_FLOAT_ENC(NAME_TBD, 4), /* [3] */
+ BTF_TYPE_FLOAT_ENC(NAME_TBD, 8), /* [4] */
+ BTF_TYPE_FLOAT_ENC(NAME_TBD, 12), /* [5] */
+ BTF_TYPE_FLOAT_ENC(NAME_TBD, 16), /* [6] */
+ BTF_STRUCT_ENC(NAME_TBD, 5, 48), /* [7] */
+ BTF_MEMBER_ENC(NAME_TBD, 2, 0),
+ BTF_MEMBER_ENC(NAME_TBD, 3, 32),
+ BTF_MEMBER_ENC(NAME_TBD, 4, 64),
+ BTF_MEMBER_ENC(NAME_TBD, 5, 128),
+ BTF_MEMBER_ENC(NAME_TBD, 6, 256),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0int\0_Float16\0float\0double\0_Float80\0long_double"
+ "\0floats\0a\0b\0c\0d\0e"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "float_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 48,
+ .key_type_id = 1,
+ .value_type_id = 7,
+ .max_entries = 1,
+},
+{
+ .descr = "float test #2, invalid vlen",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+ /* [1] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_FLOAT, 0, 1), 4),
+ /* [2] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0int\0float"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "float_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 2,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "vlen != 0",
+},
+{
+ .descr = "float test #3, invalid kind_flag",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+ /* [1] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_FLOAT, 1, 0), 4),
+ /* [2] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0int\0float"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "float_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 2,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid btf_info kind_flag",
+},
+{
+ .descr = "float test #4, member does not fit",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+ /* [1] */
+ BTF_TYPE_FLOAT_ENC(NAME_TBD, 4), /* [2] */
+ BTF_STRUCT_ENC(NAME_TBD, 1, 2), /* [3] */
+ BTF_MEMBER_ENC(NAME_TBD, 2, 0),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0int\0float\0floats\0x"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "float_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 3,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Member exceeds struct_size",
+},
+{
+ .descr = "float test #5, member is not properly aligned",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+ /* [1] */
+ BTF_TYPE_FLOAT_ENC(NAME_TBD, 4), /* [2] */
+ BTF_STRUCT_ENC(NAME_TBD, 1, 8), /* [3] */
+ BTF_MEMBER_ENC(NAME_TBD, 2, 8),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0int\0float\0floats\0x"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "float_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 3,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Member is not properly aligned",
+},
+{
+ .descr = "float test #6, invalid size",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+ /* [1] */
+ BTF_TYPE_FLOAT_ENC(NAME_TBD, 6), /* [2] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0int\0float"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "float_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 6,
+ .key_type_id = 1,
+ .value_type_id = 2,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid type_size",
+},
+
}; /* struct btf_raw_test raw_tests[] */
static const char *get_next_str(const char *start, const char *end)
@@ -6281,11 +6411,12 @@ const struct btf_dedup_test dedup_tests[] = {
/* int[16] */
BTF_TYPE_ARRAY_ENC(1, 1, 16), /* [2] */
/* struct s { */
- BTF_STRUCT_ENC(NAME_NTH(2), 4, 84), /* [3] */
+ BTF_STRUCT_ENC(NAME_NTH(2), 5, 88), /* [3] */
BTF_MEMBER_ENC(NAME_NTH(3), 4, 0), /* struct s *next; */
BTF_MEMBER_ENC(NAME_NTH(4), 5, 64), /* const int *a; */
BTF_MEMBER_ENC(NAME_NTH(5), 2, 128), /* int b[16]; */
BTF_MEMBER_ENC(NAME_NTH(6), 1, 640), /* int c; */
+ BTF_MEMBER_ENC(NAME_NTH(8), 13, 672), /* float d; */
/* ptr -> [3] struct s */
BTF_PTR_ENC(3), /* [4] */
/* ptr -> [6] const int */
@@ -6296,39 +6427,43 @@ const struct btf_dedup_test dedup_tests[] = {
/* full copy of the above */
BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), /* [7] */
BTF_TYPE_ARRAY_ENC(7, 7, 16), /* [8] */
- BTF_STRUCT_ENC(NAME_NTH(2), 4, 84), /* [9] */
+ BTF_STRUCT_ENC(NAME_NTH(2), 5, 88), /* [9] */
BTF_MEMBER_ENC(NAME_NTH(3), 10, 0),
BTF_MEMBER_ENC(NAME_NTH(4), 11, 64),
BTF_MEMBER_ENC(NAME_NTH(5), 8, 128),
BTF_MEMBER_ENC(NAME_NTH(6), 7, 640),
+ BTF_MEMBER_ENC(NAME_NTH(8), 13, 672),
BTF_PTR_ENC(9), /* [10] */
BTF_PTR_ENC(12), /* [11] */
BTF_CONST_ENC(7), /* [12] */
+ BTF_TYPE_FLOAT_ENC(NAME_NTH(7), 4), /* [13] */
BTF_END_RAW,
},
- BTF_STR_SEC("\0int\0s\0next\0a\0b\0c\0"),
+ BTF_STR_SEC("\0int\0s\0next\0a\0b\0c\0float\0d"),
},
.expect = {
.raw_types = {
/* int */
- BTF_TYPE_INT_ENC(NAME_NTH(4), BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_INT_ENC(NAME_NTH(5), BTF_INT_SIGNED, 0, 32, 4), /* [1] */
/* int[16] */
BTF_TYPE_ARRAY_ENC(1, 1, 16), /* [2] */
/* struct s { */
- BTF_STRUCT_ENC(NAME_NTH(6), 4, 84), /* [3] */
- BTF_MEMBER_ENC(NAME_NTH(5), 4, 0), /* struct s *next; */
+ BTF_STRUCT_ENC(NAME_NTH(8), 5, 88), /* [3] */
+ BTF_MEMBER_ENC(NAME_NTH(7), 4, 0), /* struct s *next; */
BTF_MEMBER_ENC(NAME_NTH(1), 5, 64), /* const int *a; */
BTF_MEMBER_ENC(NAME_NTH(2), 2, 128), /* int b[16]; */
BTF_MEMBER_ENC(NAME_NTH(3), 1, 640), /* int c; */
+ BTF_MEMBER_ENC(NAME_NTH(4), 7, 672), /* float d; */
/* ptr -> [3] struct s */
BTF_PTR_ENC(3), /* [4] */
/* ptr -> [6] const int */
BTF_PTR_ENC(6), /* [5] */
/* const -> [1] int */
BTF_CONST_ENC(1), /* [6] */
+ BTF_TYPE_FLOAT_ENC(NAME_NTH(7), 4), /* [7] */
BTF_END_RAW,
},
- BTF_STR_SEC("\0a\0b\0c\0int\0next\0s"),
+ BTF_STR_SEC("\0a\0b\0c\0d\0int\0float\0next\0s"),
},
.opts = {
.dont_resolve_fwds = false,
@@ -6449,9 +6584,10 @@ const struct btf_dedup_test dedup_tests[] = {
BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 8),
BTF_FUNC_ENC(NAME_TBD, 12), /* [13] func */
+ BTF_TYPE_FLOAT_ENC(NAME_TBD, 2), /* [14] float */
BTF_END_RAW,
},
- BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M"),
+ BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N"),
},
.expect = {
.raw_types = {
@@ -6474,16 +6610,17 @@ const struct btf_dedup_test dedup_tests[] = {
BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 8),
BTF_FUNC_ENC(NAME_TBD, 12), /* [13] func */
+ BTF_TYPE_FLOAT_ENC(NAME_TBD, 2), /* [14] float */
BTF_END_RAW,
},
- BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M"),
+ BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N"),
},
.opts = {
.dont_resolve_fwds = false,
},
},
{
- .descr = "dedup: no int duplicates",
+ .descr = "dedup: no int/float duplicates",
.input = {
.raw_types = {
BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 8),
@@ -6498,9 +6635,15 @@ const struct btf_dedup_test dedup_tests[] = {
BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 27, 8),
/* different byte size */
BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4),
+ /* all allowed sizes */
+ BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 2),
+ BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 4),
+ BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 8),
+ BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 12),
+ BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 16),
BTF_END_RAW,
},
- BTF_STR_SEC("\0int\0some other int"),
+ BTF_STR_SEC("\0int\0some other int\0float"),
},
.expect = {
.raw_types = {
@@ -6516,9 +6659,15 @@ const struct btf_dedup_test dedup_tests[] = {
BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 27, 8),
/* different byte size */
BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4),
+ /* all allowed sizes */
+ BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 2),
+ BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 4),
+ BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 8),
+ BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 12),
+ BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 16),
BTF_END_RAW,
},
- BTF_STR_SEC("\0int\0some other int"),
+ BTF_STR_SEC("\0int\0some other int\0float"),
},
.opts = {
.dont_resolve_fwds = false,
@@ -6630,6 +6779,7 @@ static int btf_type_size(const struct btf_type *t)
case BTF_KIND_PTR:
case BTF_KIND_TYPEDEF:
case BTF_KIND_FUNC:
+ case BTF_KIND_FLOAT:
return base_size;
case BTF_KIND_INT:
return base_size + sizeof(__u32);
diff --git a/tools/testing/selftests/bpf/prog_tests/check_mtu.c b/tools/testing/selftests/bpf/prog_tests/check_mtu.c
index 36af1c138faf..b62a39315336 100644
--- a/tools/testing/selftests/bpf/prog_tests/check_mtu.c
+++ b/tools/testing/selftests/bpf/prog_tests/check_mtu.c
@@ -128,6 +128,8 @@ static void test_check_mtu_xdp(__u32 mtu, __u32 ifindex)
test_check_mtu_run_xdp(skel, skel->progs.xdp_use_helper, mtu);
test_check_mtu_run_xdp(skel, skel->progs.xdp_exceed_mtu, mtu);
test_check_mtu_run_xdp(skel, skel->progs.xdp_minus_delta, mtu);
+ test_check_mtu_run_xdp(skel, skel->progs.xdp_input_len, mtu);
+ test_check_mtu_run_xdp(skel, skel->progs.xdp_input_len_exceed, mtu);
cleanup:
test_check_mtu__destroy(skel);
@@ -187,6 +189,8 @@ static void test_check_mtu_tc(__u32 mtu, __u32 ifindex)
test_check_mtu_run_tc(skel, skel->progs.tc_exceed_mtu, mtu);
test_check_mtu_run_tc(skel, skel->progs.tc_exceed_mtu_da, mtu);
test_check_mtu_run_tc(skel, skel->progs.tc_minus_delta, mtu);
+ test_check_mtu_run_tc(skel, skel->progs.tc_input_len, mtu);
+ test_check_mtu_run_tc(skel, skel->progs.tc_input_len_exceed, mtu);
cleanup:
test_check_mtu__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index 06eb956ff7bb..d94dcead72e6 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -266,6 +266,7 @@ static int duration = 0;
.arr_elem_sz = sizeof(((type *)0)->arr_field[0]), \
.ptr_sz = 8, /* always 8-byte pointer for BPF */ \
.enum_sz = sizeof(((type *)0)->enum_field), \
+ .float_sz = sizeof(((type *)0)->float_field), \
}
#define SIZE_CASE(name) { \
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c
new file mode 100644
index 000000000000..6c4d42a2386f
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#define _GNU_SOURCE
+#include <sched.h>
+#include <test_progs.h>
+#include <time.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include "fexit_sleep.skel.h"
+
+static int do_sleep(void *skel)
+{
+ struct fexit_sleep *fexit_skel = skel;
+ struct timespec ts1 = { .tv_nsec = 1 };
+ struct timespec ts2 = { .tv_sec = 10 };
+
+ fexit_skel->bss->pid = getpid();
+ (void)syscall(__NR_nanosleep, &ts1, NULL);
+ (void)syscall(__NR_nanosleep, &ts2, NULL);
+ return 0;
+}
+
+#define STACK_SIZE (1024 * 1024)
+static char child_stack[STACK_SIZE];
+
+void test_fexit_sleep(void)
+{
+ struct fexit_sleep *fexit_skel = NULL;
+ int wstatus, duration = 0;
+ pid_t cpid;
+ int err, fexit_cnt;
+
+ fexit_skel = fexit_sleep__open_and_load();
+ if (CHECK(!fexit_skel, "fexit_skel_load", "fexit skeleton failed\n"))
+ goto cleanup;
+
+ err = fexit_sleep__attach(fexit_skel);
+ if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err))
+ goto cleanup;
+
+ cpid = clone(do_sleep, child_stack + STACK_SIZE, CLONE_FILES | SIGCHLD, fexit_skel);
+ if (CHECK(cpid == -1, "clone", strerror(errno)))
+ goto cleanup;
+
+ /* wait until first sys_nanosleep ends and second sys_nanosleep starts */
+ while (READ_ONCE(fexit_skel->bss->fentry_cnt) != 2);
+ fexit_cnt = READ_ONCE(fexit_skel->bss->fexit_cnt);
+ if (CHECK(fexit_cnt != 1, "fexit_cnt", "%d", fexit_cnt))
+ goto cleanup;
+
+ /* close progs and detach them. That will trigger two nop5->jmp5 rewrites
+ * in the trampolines to skip nanosleep_fexit prog.
+ * The nanosleep_fentry prog will get detached first.
+ * The nanosleep_fexit prog will get detached second.
+ * Detaching will trigger freeing of both progs JITed images.
+ * There will be two dying bpf_tramp_image-s, but only the initial
+ * bpf_tramp_image (with both _fentry and _fexit progs will be stuck
+ * waiting for percpu_ref_kill to confirm). The other one
+ * will be freed quickly.
+ */
+ close(bpf_program__fd(fexit_skel->progs.nanosleep_fentry));
+ close(bpf_program__fd(fexit_skel->progs.nanosleep_fexit));
+ fexit_sleep__detach(fexit_skel);
+
+ /* kill the thread to unwind sys_nanosleep stack through the trampoline */
+ kill(cpid, 9);
+
+ if (CHECK(waitpid(cpid, &wstatus, 0) == -1, "waitpid", strerror(errno)))
+ goto cleanup;
+ if (CHECK(WEXITSTATUS(wstatus) != 0, "exitstatus", "failed"))
+ goto cleanup;
+
+ /* The bypassed nanosleep_fexit prog shouldn't have executed.
+ * Unlike progs the maps were not freed and directly accessible.
+ */
+ fexit_cnt = READ_ONCE(fexit_skel->bss->fexit_cnt);
+ if (CHECK(fexit_cnt != 1, "fexit_cnt", "%d", fexit_cnt))
+ goto cleanup;
+
+cleanup:
+ fexit_sleep__destroy(fexit_skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/for_each.c b/tools/testing/selftests/bpf/prog_tests/for_each.c
new file mode 100644
index 000000000000..68eb12a287d4
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/for_each.c
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "for_each_hash_map_elem.skel.h"
+#include "for_each_array_map_elem.skel.h"
+
+static unsigned int duration;
+
+static void test_hash_map(void)
+{
+ int i, err, hashmap_fd, max_entries, percpu_map_fd;
+ struct for_each_hash_map_elem *skel;
+ __u64 *percpu_valbuf = NULL;
+ __u32 key, num_cpus, retval;
+ __u64 val;
+
+ skel = for_each_hash_map_elem__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "for_each_hash_map_elem__open_and_load"))
+ return;
+
+ hashmap_fd = bpf_map__fd(skel->maps.hashmap);
+ max_entries = bpf_map__max_entries(skel->maps.hashmap);
+ for (i = 0; i < max_entries; i++) {
+ key = i;
+ val = i + 1;
+ err = bpf_map_update_elem(hashmap_fd, &key, &val, BPF_ANY);
+ if (!ASSERT_OK(err, "map_update"))
+ goto out;
+ }
+
+ num_cpus = bpf_num_possible_cpus();
+ percpu_map_fd = bpf_map__fd(skel->maps.percpu_map);
+ percpu_valbuf = malloc(sizeof(__u64) * num_cpus);
+ if (!ASSERT_OK_PTR(percpu_valbuf, "percpu_valbuf"))
+ goto out;
+
+ key = 1;
+ for (i = 0; i < num_cpus; i++)
+ percpu_valbuf[i] = i + 1;
+ err = bpf_map_update_elem(percpu_map_fd, &key, percpu_valbuf, BPF_ANY);
+ if (!ASSERT_OK(err, "percpu_map_update"))
+ goto out;
+
+ err = bpf_prog_test_run(bpf_program__fd(skel->progs.test_pkt_access),
+ 1, &pkt_v4, sizeof(pkt_v4), NULL, NULL,
+ &retval, &duration);
+ if (CHECK(err || retval, "ipv4", "err %d errno %d retval %d\n",
+ err, errno, retval))
+ goto out;
+
+ ASSERT_EQ(skel->bss->hashmap_output, 4, "hashmap_output");
+ ASSERT_EQ(skel->bss->hashmap_elems, max_entries, "hashmap_elems");
+
+ key = 1;
+ err = bpf_map_lookup_elem(hashmap_fd, &key, &val);
+ ASSERT_ERR(err, "hashmap_lookup");
+
+ ASSERT_EQ(skel->bss->percpu_called, 1, "percpu_called");
+ ASSERT_LT(skel->bss->cpu, num_cpus, "num_cpus");
+ ASSERT_EQ(skel->bss->percpu_map_elems, 1, "percpu_map_elems");
+ ASSERT_EQ(skel->bss->percpu_key, 1, "percpu_key");
+ ASSERT_EQ(skel->bss->percpu_val, skel->bss->cpu + 1, "percpu_val");
+ ASSERT_EQ(skel->bss->percpu_output, 100, "percpu_output");
+out:
+ free(percpu_valbuf);
+ for_each_hash_map_elem__destroy(skel);
+}
+
+static void test_array_map(void)
+{
+ __u32 key, num_cpus, max_entries, retval;
+ int i, arraymap_fd, percpu_map_fd, err;
+ struct for_each_array_map_elem *skel;
+ __u64 *percpu_valbuf = NULL;
+ __u64 val, expected_total;
+
+ skel = for_each_array_map_elem__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "for_each_array_map_elem__open_and_load"))
+ return;
+
+ arraymap_fd = bpf_map__fd(skel->maps.arraymap);
+ expected_total = 0;
+ max_entries = bpf_map__max_entries(skel->maps.arraymap);
+ for (i = 0; i < max_entries; i++) {
+ key = i;
+ val = i + 1;
+ /* skip the last iteration for expected total */
+ if (i != max_entries - 1)
+ expected_total += val;
+ err = bpf_map_update_elem(arraymap_fd, &key, &val, BPF_ANY);
+ if (!ASSERT_OK(err, "map_update"))
+ goto out;
+ }
+
+ num_cpus = bpf_num_possible_cpus();
+ percpu_map_fd = bpf_map__fd(skel->maps.percpu_map);
+ percpu_valbuf = malloc(sizeof(__u64) * num_cpus);
+ if (!ASSERT_OK_PTR(percpu_valbuf, "percpu_valbuf"))
+ goto out;
+
+ key = 0;
+ for (i = 0; i < num_cpus; i++)
+ percpu_valbuf[i] = i + 1;
+ err = bpf_map_update_elem(percpu_map_fd, &key, percpu_valbuf, BPF_ANY);
+ if (!ASSERT_OK(err, "percpu_map_update"))
+ goto out;
+
+ err = bpf_prog_test_run(bpf_program__fd(skel->progs.test_pkt_access),
+ 1, &pkt_v4, sizeof(pkt_v4), NULL, NULL,
+ &retval, &duration);
+ if (CHECK(err || retval, "ipv4", "err %d errno %d retval %d\n",
+ err, errno, retval))
+ goto out;
+
+ ASSERT_EQ(skel->bss->arraymap_output, expected_total, "array_output");
+ ASSERT_EQ(skel->bss->cpu + 1, skel->bss->percpu_val, "percpu_val");
+
+out:
+ free(percpu_valbuf);
+ for_each_array_map_elem__destroy(skel);
+}
+
+void test_for_each(void)
+{
+ if (test__start_subtest("hash_map"))
+ test_hash_map();
+ if (test__start_subtest("array_map"))
+ test_array_map();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
new file mode 100644
index 000000000000..7fc0951ee75f
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "kfunc_call_test.skel.h"
+#include "kfunc_call_test_subprog.skel.h"
+
+static void test_main(void)
+{
+ struct kfunc_call_test *skel;
+ int prog_fd, retval, err;
+
+ skel = kfunc_call_test__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel"))
+ return;
+
+ prog_fd = bpf_program__fd(skel->progs.kfunc_call_test1);
+ err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+ NULL, NULL, (__u32 *)&retval, NULL);
+ ASSERT_OK(err, "bpf_prog_test_run(test1)");
+ ASSERT_EQ(retval, 12, "test1-retval");
+
+ prog_fd = bpf_program__fd(skel->progs.kfunc_call_test2);
+ err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+ NULL, NULL, (__u32 *)&retval, NULL);
+ ASSERT_OK(err, "bpf_prog_test_run(test2)");
+ ASSERT_EQ(retval, 3, "test2-retval");
+
+ kfunc_call_test__destroy(skel);
+}
+
+static void test_subprog(void)
+{
+ struct kfunc_call_test_subprog *skel;
+ int prog_fd, retval, err;
+
+ skel = kfunc_call_test_subprog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel"))
+ return;
+
+ prog_fd = bpf_program__fd(skel->progs.kfunc_call_test1);
+ err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+ NULL, NULL, (__u32 *)&retval, NULL);
+ ASSERT_OK(err, "bpf_prog_test_run(test1)");
+ ASSERT_EQ(retval, 10, "test1-retval");
+ ASSERT_NEQ(skel->data->active_res, -1, "active_res");
+ ASSERT_EQ(skel->data->sk_state, BPF_TCP_CLOSE, "sk_state");
+
+ kfunc_call_test_subprog__destroy(skel);
+}
+
+void test_kfunc_call(void)
+{
+ if (test__start_subtest("main"))
+ test_main();
+
+ if (test__start_subtest("subprog"))
+ test_subprog();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c b/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
index 935a294f049a..131d7f7eeb42 100644
--- a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
+++ b/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
@@ -2,12 +2,31 @@
#include <test_progs.h>
#include <network_helpers.h>
-void test_prog_run_xattr(void)
+#include "test_pkt_access.skel.h"
+
+static const __u32 duration;
+
+static void check_run_cnt(int prog_fd, __u64 run_cnt)
{
- const char *file = "./test_pkt_access.o";
- struct bpf_object *obj;
- char buf[10];
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
int err;
+
+ err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+ if (CHECK(err, "get_prog_info", "failed to get bpf_prog_info for fd %d\n", prog_fd))
+ return;
+
+ CHECK(run_cnt != info.run_cnt, "run_cnt",
+ "incorrect number of repetitions, want %llu have %llu\n", run_cnt, info.run_cnt);
+}
+
+void test_prog_run_xattr(void)
+{
+ struct test_pkt_access *skel;
+ int err, stats_fd = -1;
+ char buf[10] = {};
+ __u64 run_cnt = 0;
+
struct bpf_prog_test_run_attr tattr = {
.repeat = 1,
.data_in = &pkt_v4,
@@ -16,12 +35,15 @@ void test_prog_run_xattr(void)
.data_size_out = 5,
};
- err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj,
- &tattr.prog_fd);
- if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno))
+ stats_fd = bpf_enable_stats(BPF_STATS_RUN_TIME);
+ if (CHECK_ATTR(stats_fd < 0, "enable_stats", "failed %d\n", errno))
return;
- memset(buf, 0, sizeof(buf));
+ skel = test_pkt_access__open_and_load();
+ if (CHECK_ATTR(!skel, "open_and_load", "failed\n"))
+ goto cleanup;
+
+ tattr.prog_fd = bpf_program__fd(skel->progs.test_pkt_access);
err = bpf_prog_test_run_xattr(&tattr);
CHECK_ATTR(err != -1 || errno != ENOSPC || tattr.retval, "run",
@@ -34,8 +56,12 @@ void test_prog_run_xattr(void)
CHECK_ATTR(buf[5] != 0, "overflow",
"BPF_PROG_TEST_RUN ignored size hint\n");
+ run_cnt += tattr.repeat;
+ check_run_cnt(tattr.prog_fd, run_cnt);
+
tattr.data_out = NULL;
tattr.data_size_out = 0;
+ tattr.repeat = 2;
errno = 0;
err = bpf_prog_test_run_xattr(&tattr);
@@ -46,5 +72,12 @@ void test_prog_run_xattr(void)
err = bpf_prog_test_run_xattr(&tattr);
CHECK_ATTR(err != -EINVAL, "run_wrong_size_out", "err %d\n", err);
- bpf_object__close(obj);
+ run_cnt += tattr.repeat;
+ check_run_cnt(tattr.prog_fd, run_cnt);
+
+cleanup:
+ if (skel)
+ test_pkt_access__destroy(skel);
+ if (stats_fd != -1)
+ close(stats_fd);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
index 9ff0412e1fd3..45c82db3c58c 100644
--- a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
+++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
@@ -241,6 +241,48 @@ fail:
return -1;
}
+static __u64 socket_cookie(int fd)
+{
+ __u64 cookie;
+ socklen_t cookie_len = sizeof(cookie);
+
+ if (CHECK(getsockopt(fd, SOL_SOCKET, SO_COOKIE, &cookie, &cookie_len) < 0,
+ "getsockopt(SO_COOKIE)", "%s\n", strerror(errno)))
+ return 0;
+ return cookie;
+}
+
+static int fill_sk_lookup_ctx(struct bpf_sk_lookup *ctx, const char *local_ip, __u16 local_port,
+ const char *remote_ip, __u16 remote_port)
+{
+ void *local, *remote;
+ int err;
+
+ memset(ctx, 0, sizeof(*ctx));
+ ctx->local_port = local_port;
+ ctx->remote_port = htons(remote_port);
+
+ if (is_ipv6(local_ip)) {
+ ctx->family = AF_INET6;
+ local = &ctx->local_ip6[0];
+ remote = &ctx->remote_ip6[0];
+ } else {
+ ctx->family = AF_INET;
+ local = &ctx->local_ip4;
+ remote = &ctx->remote_ip4;
+ }
+
+ err = inet_pton(ctx->family, local_ip, local);
+ if (CHECK(err != 1, "inet_pton", "local_ip failed\n"))
+ return 1;
+
+ err = inet_pton(ctx->family, remote_ip, remote);
+ if (CHECK(err != 1, "inet_pton", "remote_ip failed\n"))
+ return 1;
+
+ return 0;
+}
+
static int send_byte(int fd)
{
ssize_t n;
@@ -1009,18 +1051,27 @@ static void test_drop_on_reuseport(struct test_sk_lookup *skel)
static void run_sk_assign(struct test_sk_lookup *skel,
struct bpf_program *lookup_prog,
- const char *listen_ip, const char *connect_ip)
+ const char *remote_ip, const char *local_ip)
{
- int client_fd, peer_fd, server_fds[MAX_SERVERS] = { -1 };
- struct bpf_link *lookup_link;
+ int server_fds[MAX_SERVERS] = { -1 };
+ struct bpf_sk_lookup ctx;
+ __u64 server_cookie;
int i, err;
- lookup_link = attach_lookup_prog(lookup_prog);
- if (!lookup_link)
+ DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .ctx_in = &ctx,
+ .ctx_size_in = sizeof(ctx),
+ .ctx_out = &ctx,
+ .ctx_size_out = sizeof(ctx),
+ );
+
+ if (fill_sk_lookup_ctx(&ctx, local_ip, EXT_PORT, remote_ip, INT_PORT))
return;
+ ctx.protocol = IPPROTO_TCP;
+
for (i = 0; i < ARRAY_SIZE(server_fds); i++) {
- server_fds[i] = make_server(SOCK_STREAM, listen_ip, 0, NULL);
+ server_fds[i] = make_server(SOCK_STREAM, local_ip, 0, NULL);
if (server_fds[i] < 0)
goto close_servers;
@@ -1030,23 +1081,25 @@ static void run_sk_assign(struct test_sk_lookup *skel,
goto close_servers;
}
- client_fd = make_client(SOCK_STREAM, connect_ip, EXT_PORT);
- if (client_fd < 0)
+ server_cookie = socket_cookie(server_fds[SERVER_B]);
+ if (!server_cookie)
+ return;
+
+ err = bpf_prog_test_run_opts(bpf_program__fd(lookup_prog), &opts);
+ if (CHECK(err, "test_run", "failed with error %d\n", errno))
+ goto close_servers;
+
+ if (CHECK(ctx.cookie == 0, "ctx.cookie", "no socket selected\n"))
goto close_servers;
- peer_fd = accept(server_fds[SERVER_B], NULL, NULL);
- if (CHECK(peer_fd < 0, "accept", "failed\n"))
- goto close_client;
+ CHECK(ctx.cookie != server_cookie, "ctx.cookie",
+ "selected sk %llu instead of %llu\n", ctx.cookie, server_cookie);
- close(peer_fd);
-close_client:
- close(client_fd);
close_servers:
for (i = 0; i < ARRAY_SIZE(server_fds); i++) {
if (server_fds[i] != -1)
close(server_fds[i]);
}
- bpf_link__destroy(lookup_link);
}
static void run_sk_assign_v4(struct test_sk_lookup *skel,
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index b8b48cac2ac3..ab77596b64e3 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -7,6 +7,7 @@
#include "test_skmsg_load_helpers.skel.h"
#include "test_sockmap_update.skel.h"
#include "test_sockmap_invalid_update.skel.h"
+#include "test_sockmap_skb_verdict_attach.skel.h"
#include "bpf_iter_sockmap.skel.h"
#define TCP_REPAIR 19 /* TCP sock is under repair right now */
@@ -281,6 +282,39 @@ out:
bpf_iter_sockmap__destroy(skel);
}
+static void test_sockmap_skb_verdict_attach(enum bpf_attach_type first,
+ enum bpf_attach_type second)
+{
+ struct test_sockmap_skb_verdict_attach *skel;
+ int err, map, verdict;
+
+ skel = test_sockmap_skb_verdict_attach__open_and_load();
+ if (CHECK_FAIL(!skel)) {
+ perror("test_sockmap_skb_verdict_attach__open_and_load");
+ return;
+ }
+
+ verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+ map = bpf_map__fd(skel->maps.sock_map);
+
+ err = bpf_prog_attach(verdict, map, first, 0);
+ if (CHECK_FAIL(err)) {
+ perror("bpf_prog_attach");
+ goto out;
+ }
+
+ err = bpf_prog_attach(verdict, map, second, 0);
+ assert(err == -1 && errno == EBUSY);
+
+ err = bpf_prog_detach2(verdict, map, first);
+ if (CHECK_FAIL(err)) {
+ perror("bpf_prog_detach2");
+ goto out;
+ }
+out:
+ test_sockmap_skb_verdict_attach__destroy(skel);
+}
+
void test_sockmap_basic(void)
{
if (test__start_subtest("sockmap create_update_free"))
@@ -301,4 +335,10 @@ void test_sockmap_basic(void)
test_sockmap_copy(BPF_MAP_TYPE_SOCKMAP);
if (test__start_subtest("sockhash copy"))
test_sockmap_copy(BPF_MAP_TYPE_SOCKHASH);
+ if (test__start_subtest("sockmap skb_verdict attach")) {
+ test_sockmap_skb_verdict_attach(BPF_SK_SKB_VERDICT,
+ BPF_SK_SKB_STREAM_VERDICT);
+ test_sockmap_skb_verdict_attach(BPF_SK_SKB_STREAM_VERDICT,
+ BPF_SK_SKB_VERDICT);
+ }
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index d7d65a700799..648d9ae898d2 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -1014,8 +1014,8 @@ static void test_skb_redir_to_connected(struct test_sockmap_listen *skel,
struct bpf_map *inner_map, int family,
int sotype)
{
- int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
- int parser = bpf_program__fd(skel->progs.prog_skb_parser);
+ int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
+ int parser = bpf_program__fd(skel->progs.prog_stream_parser);
int verdict_map = bpf_map__fd(skel->maps.verdict_map);
int sock_map = bpf_map__fd(inner_map);
int err;
@@ -1125,8 +1125,8 @@ static void test_skb_redir_to_listening(struct test_sockmap_listen *skel,
struct bpf_map *inner_map, int family,
int sotype)
{
- int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
- int parser = bpf_program__fd(skel->progs.prog_skb_parser);
+ int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
+ int parser = bpf_program__fd(skel->progs.prog_stream_parser);
int verdict_map = bpf_map__fd(skel->maps.verdict_map);
int sock_map = bpf_map__fd(inner_map);
int err;
@@ -1603,6 +1603,141 @@ static void test_reuseport(struct test_sockmap_listen *skel,
}
}
+static void udp_redir_to_connected(int family, int sotype, int sock_mapfd,
+ int verd_mapfd, enum redir_mode mode)
+{
+ const char *log_prefix = redir_mode_str(mode);
+ struct sockaddr_storage addr;
+ int c0, c1, p0, p1;
+ unsigned int pass;
+ socklen_t len;
+ int err, n;
+ u64 value;
+ u32 key;
+ char b;
+
+ zero_verdict_count(verd_mapfd);
+
+ p0 = socket_loopback(family, sotype | SOCK_NONBLOCK);
+ if (p0 < 0)
+ return;
+ len = sizeof(addr);
+ err = xgetsockname(p0, sockaddr(&addr), &len);
+ if (err)
+ goto close_peer0;
+
+ c0 = xsocket(family, sotype | SOCK_NONBLOCK, 0);
+ if (c0 < 0)
+ goto close_peer0;
+ err = xconnect(c0, sockaddr(&addr), len);
+ if (err)
+ goto close_cli0;
+ err = xgetsockname(c0, sockaddr(&addr), &len);
+ if (err)
+ goto close_cli0;
+ err = xconnect(p0, sockaddr(&addr), len);
+ if (err)
+ goto close_cli0;
+
+ p1 = socket_loopback(family, sotype | SOCK_NONBLOCK);
+ if (p1 < 0)
+ goto close_cli0;
+ err = xgetsockname(p1, sockaddr(&addr), &len);
+ if (err)
+ goto close_cli0;
+
+ c1 = xsocket(family, sotype | SOCK_NONBLOCK, 0);
+ if (c1 < 0)
+ goto close_peer1;
+ err = xconnect(c1, sockaddr(&addr), len);
+ if (err)
+ goto close_cli1;
+ err = xgetsockname(c1, sockaddr(&addr), &len);
+ if (err)
+ goto close_cli1;
+ err = xconnect(p1, sockaddr(&addr), len);
+ if (err)
+ goto close_cli1;
+
+ key = 0;
+ value = p0;
+ err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+ if (err)
+ goto close_cli1;
+
+ key = 1;
+ value = p1;
+ err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+ if (err)
+ goto close_cli1;
+
+ n = write(c1, "a", 1);
+ if (n < 0)
+ FAIL_ERRNO("%s: write", log_prefix);
+ if (n == 0)
+ FAIL("%s: incomplete write", log_prefix);
+ if (n < 1)
+ goto close_cli1;
+
+ key = SK_PASS;
+ err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
+ if (err)
+ goto close_cli1;
+ if (pass != 1)
+ FAIL("%s: want pass count 1, have %d", log_prefix, pass);
+
+ n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1);
+ if (n < 0)
+ FAIL_ERRNO("%s: read", log_prefix);
+ if (n == 0)
+ FAIL("%s: incomplete read", log_prefix);
+
+close_cli1:
+ xclose(c1);
+close_peer1:
+ xclose(p1);
+close_cli0:
+ xclose(c0);
+close_peer0:
+ xclose(p0);
+}
+
+static void udp_skb_redir_to_connected(struct test_sockmap_listen *skel,
+ struct bpf_map *inner_map, int family)
+{
+ int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+ int verdict_map = bpf_map__fd(skel->maps.verdict_map);
+ int sock_map = bpf_map__fd(inner_map);
+ int err;
+
+ err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
+ if (err)
+ return;
+
+ skel->bss->test_ingress = false;
+ udp_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
+ REDIR_EGRESS);
+ skel->bss->test_ingress = true;
+ udp_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
+ REDIR_INGRESS);
+
+ xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
+}
+
+static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
+ int family)
+{
+ const char *family_name, *map_name;
+ char s[MAX_TEST_NAME];
+
+ family_name = family_str(family);
+ map_name = map_type_str(map);
+ snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
+ if (!test__start_subtest(s))
+ return;
+ udp_skb_redir_to_connected(skel, map, family);
+}
+
static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
int family)
{
@@ -1611,6 +1746,7 @@ static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
test_redir(skel, map, family, SOCK_STREAM);
test_reuseport(skel, map, family, SOCK_STREAM);
test_reuseport(skel, map, family, SOCK_DGRAM);
+ test_udp_redir(skel, map, family);
}
void test_sockmap_listen(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/static_linked.c b/tools/testing/selftests/bpf/prog_tests/static_linked.c
new file mode 100644
index 000000000000..46556976dccc
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/static_linked.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+
+#include <test_progs.h>
+#include "test_static_linked.skel.h"
+
+void test_static_linked(void)
+{
+ int err;
+ struct test_static_linked* skel;
+
+ skel = test_static_linked__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->rodata->rovar1 = 1;
+ skel->bss->static_var1 = 2;
+ skel->bss->static_var11 = 3;
+
+ skel->rodata->rovar2 = 4;
+ skel->bss->static_var2 = 5;
+ skel->bss->static_var22 = 6;
+
+ err = test_static_linked__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ err = test_static_linked__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ /* trigger */
+ usleep(1);
+
+ ASSERT_EQ(skel->bss->var1, 1 * 2 + 2 + 3, "var1");
+ ASSERT_EQ(skel->bss->var2, 4 * 3 + 5 + 6, "var2");
+
+cleanup:
+ test_static_linked__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
new file mode 100644
index 000000000000..035c263aab1b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#define _GNU_SOURCE /* See feature_test_macros(7) */
+#include <unistd.h>
+#include <sys/syscall.h> /* For SYS_xxx definitions */
+#include <sys/types.h>
+#include <test_progs.h>
+#include "task_local_storage.skel.h"
+#include "task_local_storage_exit_creds.skel.h"
+#include "task_ls_recursion.skel.h"
+
+static void test_sys_enter_exit(void)
+{
+ struct task_local_storage *skel;
+ int err;
+
+ skel = task_local_storage__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ skel->bss->target_pid = syscall(SYS_gettid);
+
+ err = task_local_storage__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto out;
+
+ syscall(SYS_gettid);
+ syscall(SYS_gettid);
+
+ /* 3x syscalls: 1x attach and 2x gettid */
+ ASSERT_EQ(skel->bss->enter_cnt, 3, "enter_cnt");
+ ASSERT_EQ(skel->bss->exit_cnt, 3, "exit_cnt");
+ ASSERT_EQ(skel->bss->mismatch_cnt, 0, "mismatch_cnt");
+out:
+ task_local_storage__destroy(skel);
+}
+
+static void test_exit_creds(void)
+{
+ struct task_local_storage_exit_creds *skel;
+ int err;
+
+ skel = task_local_storage_exit_creds__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ err = task_local_storage_exit_creds__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto out;
+
+ /* trigger at least one exit_creds() */
+ if (CHECK_FAIL(system("ls > /dev/null")))
+ goto out;
+
+ /* sync rcu to make sure exit_creds() is called for "ls" */
+ kern_sync_rcu();
+ ASSERT_EQ(skel->bss->valid_ptr_count, 0, "valid_ptr_count");
+ ASSERT_NEQ(skel->bss->null_ptr_count, 0, "null_ptr_count");
+out:
+ task_local_storage_exit_creds__destroy(skel);
+}
+
+static void test_recursion(void)
+{
+ struct task_ls_recursion *skel;
+ int err;
+
+ skel = task_ls_recursion__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ err = task_ls_recursion__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto out;
+
+ /* trigger sys_enter, make sure it does not cause deadlock */
+ syscall(SYS_gettid);
+
+out:
+ task_ls_recursion__destroy(skel);
+}
+
+void test_task_local_storage(void)
+{
+ if (test__start_subtest("sys_enter_exit"))
+ test_sys_enter_exit();
+ if (test__start_subtest("exit_creds"))
+ test_exit_creds();
+ if (test__start_subtest("recursion"))
+ test_recursion();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_ima.c b/tools/testing/selftests/bpf/prog_tests/test_ima.c
index b54bc0c351b7..0252f61d611a 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_ima.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_ima.c
@@ -68,7 +68,8 @@ void test_test_ima(void)
goto close_prog;
snprintf(cmd, sizeof(cmd), "./ima_setup.sh setup %s", measured_dir);
- if (CHECK_FAIL(system(cmd)))
+ err = system(cmd);
+ if (CHECK(err, "failed to run command", "%s, errno = %d\n", cmd, errno))
goto close_clean;
err = run_measured_process(measured_dir, &skel->bss->monitored_pid);
@@ -81,7 +82,8 @@ void test_test_ima(void)
close_clean:
snprintf(cmd, sizeof(cmd), "./ima_setup.sh cleanup %s", measured_dir);
- CHECK_FAIL(system(cmd));
+ err = system(cmd);
+ CHECK(err, "failed to run command", "%s, errno = %d\n", cmd, errno);
close_prog:
ima__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/progs/bind4_prog.c b/tools/testing/selftests/bpf/progs/bind4_prog.c
index 115a3b0ad984..474c6a62078a 100644
--- a/tools/testing/selftests/bpf/progs/bind4_prog.c
+++ b/tools/testing/selftests/bpf/progs/bind4_prog.c
@@ -57,6 +57,27 @@ static __inline int bind_to_device(struct bpf_sock_addr *ctx)
return 0;
}
+static __inline int bind_reuseport(struct bpf_sock_addr *ctx)
+{
+ int val = 1;
+
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_REUSEPORT,
+ &val, sizeof(val)))
+ return 1;
+ if (bpf_getsockopt(ctx, SOL_SOCKET, SO_REUSEPORT,
+ &val, sizeof(val)) || !val)
+ return 1;
+ val = 0;
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_REUSEPORT,
+ &val, sizeof(val)))
+ return 1;
+ if (bpf_getsockopt(ctx, SOL_SOCKET, SO_REUSEPORT,
+ &val, sizeof(val)) || val)
+ return 1;
+
+ return 0;
+}
+
static __inline int misc_opts(struct bpf_sock_addr *ctx, int opt)
{
int old, tmp, new = 0xeb9f;
@@ -127,6 +148,10 @@ int bind_v4_prog(struct bpf_sock_addr *ctx)
if (misc_opts(ctx, SO_MARK) || misc_opts(ctx, SO_PRIORITY))
return 0;
+ /* Set reuseport and unset */
+ if (bind_reuseport(ctx))
+ return 0;
+
ctx->user_ip4 = bpf_htonl(SERV4_REWRITE_IP);
ctx->user_port = bpf_htons(SERV4_REWRITE_PORT);
diff --git a/tools/testing/selftests/bpf/progs/bind6_prog.c b/tools/testing/selftests/bpf/progs/bind6_prog.c
index 4c0d348034b9..c19cfa869f30 100644
--- a/tools/testing/selftests/bpf/progs/bind6_prog.c
+++ b/tools/testing/selftests/bpf/progs/bind6_prog.c
@@ -63,6 +63,27 @@ static __inline int bind_to_device(struct bpf_sock_addr *ctx)
return 0;
}
+static __inline int bind_reuseport(struct bpf_sock_addr *ctx)
+{
+ int val = 1;
+
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_REUSEPORT,
+ &val, sizeof(val)))
+ return 1;
+ if (bpf_getsockopt(ctx, SOL_SOCKET, SO_REUSEPORT,
+ &val, sizeof(val)) || !val)
+ return 1;
+ val = 0;
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_REUSEPORT,
+ &val, sizeof(val)))
+ return 1;
+ if (bpf_getsockopt(ctx, SOL_SOCKET, SO_REUSEPORT,
+ &val, sizeof(val)) || val)
+ return 1;
+
+ return 0;
+}
+
static __inline int misc_opts(struct bpf_sock_addr *ctx, int opt)
{
int old, tmp, new = 0xeb9f;
@@ -141,6 +162,10 @@ int bind_v6_prog(struct bpf_sock_addr *ctx)
if (misc_opts(ctx, SO_MARK) || misc_opts(ctx, SO_PRIORITY))
return 0;
+ /* Set reuseport and unset */
+ if (bind_reuseport(ctx))
+ return 0;
+
ctx->user_ip6[0] = bpf_htonl(SERV6_REWRITE_IP_0);
ctx->user_ip6[1] = bpf_htonl(SERV6_REWRITE_IP_1);
ctx->user_ip6[2] = bpf_htonl(SERV6_REWRITE_IP_2);
diff --git a/tools/testing/selftests/bpf/progs/bpf_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cubic.c
index 6939bfd8690f..f62df4d023f9 100644
--- a/tools/testing/selftests/bpf/progs/bpf_cubic.c
+++ b/tools/testing/selftests/bpf/progs/bpf_cubic.c
@@ -174,8 +174,8 @@ static __always_inline void bictcp_hystart_reset(struct sock *sk)
* as long as it is used in one of the func ptr
* under SEC(".struct_ops").
*/
-SEC("struct_ops/bictcp_init")
-void BPF_PROG(bictcp_init, struct sock *sk)
+SEC("struct_ops/bpf_cubic_init")
+void BPF_PROG(bpf_cubic_init, struct sock *sk)
{
struct bictcp *ca = inet_csk_ca(sk);
@@ -192,7 +192,7 @@ void BPF_PROG(bictcp_init, struct sock *sk)
* The remaining tcp-cubic functions have an easier way.
*/
SEC("no-sec-prefix-bictcp_cwnd_event")
-void BPF_PROG(bictcp_cwnd_event, struct sock *sk, enum tcp_ca_event event)
+void BPF_PROG(bpf_cubic_cwnd_event, struct sock *sk, enum tcp_ca_event event)
{
if (event == CA_EVENT_TX_START) {
struct bictcp *ca = inet_csk_ca(sk);
@@ -384,7 +384,7 @@ tcp_friendliness:
}
/* Or simply use the BPF_STRUCT_OPS to avoid the SEC boiler plate. */
-void BPF_STRUCT_OPS(bictcp_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
+void BPF_STRUCT_OPS(bpf_cubic_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk);
@@ -403,7 +403,7 @@ void BPF_STRUCT_OPS(bictcp_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
tcp_cong_avoid_ai(tp, ca->cnt, acked);
}
-__u32 BPF_STRUCT_OPS(bictcp_recalc_ssthresh, struct sock *sk)
+__u32 BPF_STRUCT_OPS(bpf_cubic_recalc_ssthresh, struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk);
@@ -420,7 +420,7 @@ __u32 BPF_STRUCT_OPS(bictcp_recalc_ssthresh, struct sock *sk)
return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
}
-void BPF_STRUCT_OPS(bictcp_state, struct sock *sk, __u8 new_state)
+void BPF_STRUCT_OPS(bpf_cubic_state, struct sock *sk, __u8 new_state)
{
if (new_state == TCP_CA_Loss) {
bictcp_reset(inet_csk_ca(sk));
@@ -496,7 +496,7 @@ static __always_inline void hystart_update(struct sock *sk, __u32 delay)
}
}
-void BPF_STRUCT_OPS(bictcp_acked, struct sock *sk,
+void BPF_STRUCT_OPS(bpf_cubic_acked, struct sock *sk,
const struct ack_sample *sample)
{
const struct tcp_sock *tp = tcp_sk(sk);
@@ -525,21 +525,21 @@ void BPF_STRUCT_OPS(bictcp_acked, struct sock *sk,
hystart_update(sk, delay);
}
-__u32 BPF_STRUCT_OPS(tcp_reno_undo_cwnd, struct sock *sk)
-{
- const struct tcp_sock *tp = tcp_sk(sk);
+extern __u32 tcp_reno_undo_cwnd(struct sock *sk) __ksym;
- return max(tp->snd_cwnd, tp->prior_cwnd);
+__u32 BPF_STRUCT_OPS(bpf_cubic_undo_cwnd, struct sock *sk)
+{
+ return tcp_reno_undo_cwnd(sk);
}
SEC(".struct_ops")
struct tcp_congestion_ops cubic = {
- .init = (void *)bictcp_init,
- .ssthresh = (void *)bictcp_recalc_ssthresh,
- .cong_avoid = (void *)bictcp_cong_avoid,
- .set_state = (void *)bictcp_state,
- .undo_cwnd = (void *)tcp_reno_undo_cwnd,
- .cwnd_event = (void *)bictcp_cwnd_event,
- .pkts_acked = (void *)bictcp_acked,
+ .init = (void *)bpf_cubic_init,
+ .ssthresh = (void *)bpf_cubic_recalc_ssthresh,
+ .cong_avoid = (void *)bpf_cubic_cong_avoid,
+ .set_state = (void *)bpf_cubic_state,
+ .undo_cwnd = (void *)bpf_cubic_undo_cwnd,
+ .cwnd_event = (void *)bpf_cubic_cwnd_event,
+ .pkts_acked = (void *)bpf_cubic_acked,
.name = "bpf_cubic",
};
diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp.c b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
index 4dc1a967776a..fd42247da8b4 100644
--- a/tools/testing/selftests/bpf/progs/bpf_dctcp.c
+++ b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
@@ -194,22 +194,12 @@ __u32 BPF_PROG(dctcp_cwnd_undo, struct sock *sk)
return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
}
-SEC("struct_ops/tcp_reno_cong_avoid")
-void BPF_PROG(tcp_reno_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
-{
- struct tcp_sock *tp = tcp_sk(sk);
-
- if (!tcp_is_cwnd_limited(sk))
- return;
+extern void tcp_reno_cong_avoid(struct sock *sk, __u32 ack, __u32 acked) __ksym;
- /* In "safe" area, increase. */
- if (tcp_in_slow_start(tp)) {
- acked = tcp_slow_start(tp, acked);
- if (!acked)
- return;
- }
- /* In dangerous area, increase slowly. */
- tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked);
+SEC("struct_ops/dctcp_reno_cong_avoid")
+void BPF_PROG(dctcp_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
+{
+ tcp_reno_cong_avoid(sk, ack, acked);
}
SEC(".struct_ops")
@@ -226,7 +216,7 @@ struct tcp_congestion_ops dctcp = {
.in_ack_event = (void *)dctcp_update_alpha,
.cwnd_event = (void *)dctcp_cwnd_event,
.ssthresh = (void *)dctcp_ssthresh,
- .cong_avoid = (void *)tcp_reno_cong_avoid,
+ .cong_avoid = (void *)dctcp_cong_avoid,
.undo_cwnd = (void *)dctcp_cwnd_undo,
.set_state = (void *)dctcp_state,
.flags = TCP_CONG_NEEDS_ECN,
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
index 31975c96e2c9..8aaa24a00322 100644
--- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
@@ -174,6 +174,12 @@ struct struct_in_struct {
};
};
+struct struct_in_array {};
+
+struct struct_in_array_typed {};
+
+typedef struct struct_in_array_typed struct_in_array_t[2];
+
struct struct_with_embedded_stuff {
int a;
struct {
@@ -203,6 +209,14 @@ struct struct_with_embedded_stuff {
} r[5];
struct struct_in_struct s[10];
int t[11];
+ struct struct_in_array (*u)[2];
+ struct_in_array_t *v;
+};
+
+struct float_struct {
+ float f;
+ const double *d;
+ volatile long double *ld;
};
struct root_struct {
@@ -219,6 +233,7 @@ struct root_struct {
union_fwd_t *_12;
union_fwd_ptr_t _13;
struct struct_with_embedded_stuff _14;
+ struct float_struct _15;
};
/* ------ END-EXPECTED-OUTPUT ------ */
diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h
index 9a2850850121..9982eb969048 100644
--- a/tools/testing/selftests/bpf/progs/core_reloc_types.h
+++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h
@@ -807,6 +807,7 @@ struct core_reloc_size_output {
int arr_elem_sz;
int ptr_sz;
int enum_sz;
+ int float_sz;
};
struct core_reloc_size {
@@ -816,6 +817,7 @@ struct core_reloc_size {
int arr_field[4];
void *ptr_field;
enum { VALUE = 123 } enum_field;
+ float float_field;
};
struct core_reloc_size___diff_sz {
@@ -825,6 +827,7 @@ struct core_reloc_size___diff_sz {
char arr_field[10];
void *ptr_field;
enum { OTHER_VALUE = 0xFFFFFFFFFFFFFFFF } enum_field;
+ double float_field;
};
/* Error case of two candidates with the fields (int_field) at the same
@@ -839,6 +842,7 @@ struct core_reloc_size___err_ambiguous1 {
int arr_field[4];
void *ptr_field;
enum { VALUE___1 = 123 } enum_field;
+ float float_field;
};
struct core_reloc_size___err_ambiguous2 {
@@ -850,6 +854,7 @@ struct core_reloc_size___err_ambiguous2 {
int arr_field[4];
void *ptr_field;
enum { VALUE___2 = 123 } enum_field;
+ float float_field;
};
/*
diff --git a/tools/testing/selftests/bpf/progs/fentry_test.c b/tools/testing/selftests/bpf/progs/fentry_test.c
index 5f645fdaba6f..52a550d281d9 100644
--- a/tools/testing/selftests/bpf/progs/fentry_test.c
+++ b/tools/testing/selftests/bpf/progs/fentry_test.c
@@ -64,7 +64,7 @@ __u64 test7_result = 0;
SEC("fentry/bpf_fentry_test7")
int BPF_PROG(test7, struct bpf_fentry_test_t *arg)
{
- if (arg == 0)
+ if (!arg)
test7_result = 1;
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/fexit_sleep.c b/tools/testing/selftests/bpf/progs/fexit_sleep.c
new file mode 100644
index 000000000000..03a672d76353
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/fexit_sleep.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char LICENSE[] SEC("license") = "GPL";
+
+int pid = 0;
+int fentry_cnt = 0;
+int fexit_cnt = 0;
+
+SEC("fentry/__x64_sys_nanosleep")
+int BPF_PROG(nanosleep_fentry, const struct pt_regs *regs)
+{
+ if ((int)bpf_get_current_pid_tgid() != pid)
+ return 0;
+
+ fentry_cnt++;
+ return 0;
+}
+
+SEC("fexit/__x64_sys_nanosleep")
+int BPF_PROG(nanosleep_fexit, const struct pt_regs *regs, int ret)
+{
+ if ((int)bpf_get_current_pid_tgid() != pid)
+ return 0;
+
+ fexit_cnt++;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/fexit_test.c b/tools/testing/selftests/bpf/progs/fexit_test.c
index 0952affb22a6..8f1ccb7302e1 100644
--- a/tools/testing/selftests/bpf/progs/fexit_test.c
+++ b/tools/testing/selftests/bpf/progs/fexit_test.c
@@ -65,7 +65,7 @@ __u64 test7_result = 0;
SEC("fexit/bpf_fentry_test7")
int BPF_PROG(test7, struct bpf_fentry_test_t *arg)
{
- if (arg == 0)
+ if (!arg)
test7_result = 1;
return 0;
}
@@ -74,7 +74,7 @@ __u64 test8_result = 0;
SEC("fexit/bpf_fentry_test8")
int BPF_PROG(test8, struct bpf_fentry_test_t *arg)
{
- if (arg->a == 0)
+ if (!arg->a)
test8_result = 1;
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/for_each_array_map_elem.c b/tools/testing/selftests/bpf/progs/for_each_array_map_elem.c
new file mode 100644
index 000000000000..75e8e1069fe7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/for_each_array_map_elem.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 3);
+ __type(key, __u32);
+ __type(value, __u64);
+} arraymap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} percpu_map SEC(".maps");
+
+struct callback_ctx {
+ int output;
+};
+
+static __u64
+check_array_elem(struct bpf_map *map, __u32 *key, __u64 *val,
+ struct callback_ctx *data)
+{
+ data->output += *val;
+ if (*key == 1)
+ return 1; /* stop the iteration */
+ return 0;
+}
+
+__u32 cpu = 0;
+__u64 percpu_val = 0;
+
+static __u64
+check_percpu_elem(struct bpf_map *map, __u32 *key, __u64 *val,
+ struct callback_ctx *data)
+{
+ cpu = bpf_get_smp_processor_id();
+ percpu_val = *val;
+ return 0;
+}
+
+u32 arraymap_output = 0;
+
+SEC("classifier")
+int test_pkt_access(struct __sk_buff *skb)
+{
+ struct callback_ctx data;
+
+ data.output = 0;
+ bpf_for_each_map_elem(&arraymap, check_array_elem, &data, 0);
+ arraymap_output = data.output;
+
+ bpf_for_each_map_elem(&percpu_map, check_percpu_elem, (void *)0, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/for_each_hash_map_elem.c b/tools/testing/selftests/bpf/progs/for_each_hash_map_elem.c
new file mode 100644
index 000000000000..913dd91aafff
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/for_each_hash_map_elem.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 3);
+ __type(key, __u32);
+ __type(value, __u64);
+} hashmap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} percpu_map SEC(".maps");
+
+struct callback_ctx {
+ struct __sk_buff *ctx;
+ int input;
+ int output;
+};
+
+static __u64
+check_hash_elem(struct bpf_map *map, __u32 *key, __u64 *val,
+ struct callback_ctx *data)
+{
+ struct __sk_buff *skb = data->ctx;
+ __u32 k;
+ __u64 v;
+
+ if (skb) {
+ k = *key;
+ v = *val;
+ if (skb->len == 10000 && k == 10 && v == 10)
+ data->output = 3; /* impossible path */
+ else
+ data->output = 4;
+ } else {
+ data->output = data->input;
+ bpf_map_delete_elem(map, key);
+ }
+
+ return 0;
+}
+
+__u32 cpu = 0;
+__u32 percpu_called = 0;
+__u32 percpu_key = 0;
+__u64 percpu_val = 0;
+int percpu_output = 0;
+
+static __u64
+check_percpu_elem(struct bpf_map *map, __u32 *key, __u64 *val,
+ struct callback_ctx *unused)
+{
+ struct callback_ctx data;
+
+ percpu_called++;
+ cpu = bpf_get_smp_processor_id();
+ percpu_key = *key;
+ percpu_val = *val;
+
+ data.ctx = 0;
+ data.input = 100;
+ data.output = 0;
+ bpf_for_each_map_elem(&hashmap, check_hash_elem, &data, 0);
+ percpu_output = data.output;
+
+ return 0;
+}
+
+int hashmap_output = 0;
+int hashmap_elems = 0;
+int percpu_map_elems = 0;
+
+SEC("classifier")
+int test_pkt_access(struct __sk_buff *skb)
+{
+ struct callback_ctx data;
+
+ data.ctx = skb;
+ data.input = 10;
+ data.output = 0;
+ hashmap_elems = bpf_for_each_map_elem(&hashmap, check_hash_elem, &data, 0);
+ hashmap_output = data.output;
+
+ percpu_map_elems = bpf_for_each_map_elem(&percpu_map, check_percpu_elem,
+ (void *)0, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test.c b/tools/testing/selftests/bpf/progs/kfunc_call_test.c
new file mode 100644
index 000000000000..470f8723e463
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_test.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_tcp_helpers.h"
+
+extern int bpf_kfunc_call_test2(struct sock *sk, __u32 a, __u32 b) __ksym;
+extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b,
+ __u32 c, __u64 d) __ksym;
+
+SEC("classifier")
+int kfunc_call_test2(struct __sk_buff *skb)
+{
+ struct bpf_sock *sk = skb->sk;
+
+ if (!sk)
+ return -1;
+
+ sk = bpf_sk_fullsock(sk);
+ if (!sk)
+ return -1;
+
+ return bpf_kfunc_call_test2((struct sock *)sk, 1, 2);
+}
+
+SEC("classifier")
+int kfunc_call_test1(struct __sk_buff *skb)
+{
+ struct bpf_sock *sk = skb->sk;
+ __u64 a = 1ULL << 32;
+ __u32 ret;
+
+ if (!sk)
+ return -1;
+
+ sk = bpf_sk_fullsock(sk);
+ if (!sk)
+ return -1;
+
+ a = bpf_kfunc_call_test1((struct sock *)sk, 1, a | 2, 3, a | 4);
+ ret = a >> 32; /* ret should be 2 */
+ ret += (__u32)a; /* ret should be 12 */
+
+ return ret;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c b/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c
new file mode 100644
index 000000000000..b2dcb7d9cb03
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_tcp_helpers.h"
+
+extern const int bpf_prog_active __ksym;
+extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b,
+ __u32 c, __u64 d) __ksym;
+extern struct sock *bpf_kfunc_call_test3(struct sock *sk) __ksym;
+int active_res = -1;
+int sk_state = -1;
+
+int __noinline f1(struct __sk_buff *skb)
+{
+ struct bpf_sock *sk = skb->sk;
+ int *active;
+
+ if (!sk)
+ return -1;
+
+ sk = bpf_sk_fullsock(sk);
+ if (!sk)
+ return -1;
+
+ active = (int *)bpf_per_cpu_ptr(&bpf_prog_active,
+ bpf_get_smp_processor_id());
+ if (active)
+ active_res = *active;
+
+ sk_state = bpf_kfunc_call_test3((struct sock *)sk)->__sk_common.skc_state;
+
+ return (__u32)bpf_kfunc_call_test1((struct sock *)sk, 1, 2, 3, 4);
+}
+
+SEC("classifier")
+int kfunc_call_test1(struct __sk_buff *skb)
+{
+ return f1(skb);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/loop6.c b/tools/testing/selftests/bpf/progs/loop6.c
new file mode 100644
index 000000000000..38de0331e6b4
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/loop6.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/ptrace.h>
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+/* typically virtio scsi has max SGs of 6 */
+#define VIRTIO_MAX_SGS 6
+
+/* Verifier will fail with SG_MAX = 128. The failure can be
+ * workarounded with a smaller SG_MAX, e.g. 10.
+ */
+#define WORKAROUND
+#ifdef WORKAROUND
+#define SG_MAX 10
+#else
+/* typically virtio blk has max SEG of 128 */
+#define SG_MAX 128
+#endif
+
+#define SG_CHAIN 0x01UL
+#define SG_END 0x02UL
+
+struct scatterlist {
+ unsigned long page_link;
+ unsigned int offset;
+ unsigned int length;
+};
+
+#define sg_is_chain(sg) ((sg)->page_link & SG_CHAIN)
+#define sg_is_last(sg) ((sg)->page_link & SG_END)
+#define sg_chain_ptr(sg) \
+ ((struct scatterlist *) ((sg)->page_link & ~(SG_CHAIN | SG_END)))
+
+static inline struct scatterlist *__sg_next(struct scatterlist *sgp)
+{
+ struct scatterlist sg;
+
+ bpf_probe_read_kernel(&sg, sizeof(sg), sgp);
+ if (sg_is_last(&sg))
+ return NULL;
+
+ sgp++;
+
+ bpf_probe_read_kernel(&sg, sizeof(sg), sgp);
+ if (sg_is_chain(&sg))
+ sgp = sg_chain_ptr(&sg);
+
+ return sgp;
+}
+
+static inline struct scatterlist *get_sgp(struct scatterlist **sgs, int i)
+{
+ struct scatterlist *sgp;
+
+ bpf_probe_read_kernel(&sgp, sizeof(sgp), sgs + i);
+ return sgp;
+}
+
+int config = 0;
+int result = 0;
+
+SEC("kprobe/virtqueue_add_sgs")
+int BPF_KPROBE(trace_virtqueue_add_sgs, void *unused, struct scatterlist **sgs,
+ unsigned int out_sgs, unsigned int in_sgs)
+{
+ struct scatterlist *sgp = NULL;
+ __u64 length1 = 0, length2 = 0;
+ unsigned int i, n, len;
+
+ if (config != 0)
+ return 0;
+
+ for (i = 0; (i < VIRTIO_MAX_SGS) && (i < out_sgs); i++) {
+ for (n = 0, sgp = get_sgp(sgs, i); sgp && (n < SG_MAX);
+ sgp = __sg_next(sgp)) {
+ bpf_probe_read_kernel(&len, sizeof(len), &sgp->length);
+ length1 += len;
+ n++;
+ }
+ }
+
+ for (i = 0; (i < VIRTIO_MAX_SGS) && (i < in_sgs); i++) {
+ for (n = 0, sgp = get_sgp(sgs, i); sgp && (n < SG_MAX);
+ sgp = __sg_next(sgp)) {
+ bpf_probe_read_kernel(&len, sizeof(len), &sgp->length);
+ length2 += len;
+ n++;
+ }
+ }
+
+ config = 1;
+ result = length2 - length1;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/netif_receive_skb.c b/tools/testing/selftests/bpf/progs/netif_receive_skb.c
index 6b670039ea67..1d8918dfbd3f 100644
--- a/tools/testing/selftests/bpf/progs/netif_receive_skb.c
+++ b/tools/testing/selftests/bpf/progs/netif_receive_skb.c
@@ -16,6 +16,13 @@ bool skip = false;
#define STRSIZE 2048
#define EXPECTED_STRSIZE 256
+#if defined(bpf_target_s390)
+/* NULL points to a readable struct lowcore on s390, so take the last page */
+#define BADPTR ((void *)0xFFFFFFFFFFFFF000ULL)
+#else
+#define BADPTR 0
+#endif
+
#ifndef ARRAY_SIZE
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#endif
@@ -113,11 +120,11 @@ int BPF_PROG(trace_netif_receive_skb, struct sk_buff *skb)
}
/* Check invalid ptr value */
- p.ptr = 0;
+ p.ptr = BADPTR;
__ret = bpf_snprintf_btf(str, STRSIZE, &p, sizeof(p), 0);
if (__ret >= 0) {
- bpf_printk("printing NULL should generate error, got (%d)",
- __ret);
+ bpf_printk("printing %llx should generate error, got (%d)",
+ (unsigned long long)BADPTR, __ret);
ret = -ERANGE;
}
diff --git a/tools/testing/selftests/bpf/progs/skb_pkt_end.c b/tools/testing/selftests/bpf/progs/skb_pkt_end.c
index cf6823f42e80..7f2eaa2f89f8 100644
--- a/tools/testing/selftests/bpf/progs/skb_pkt_end.c
+++ b/tools/testing/selftests/bpf/progs/skb_pkt_end.c
@@ -4,7 +4,6 @@
#include <bpf/bpf_core_read.h>
#include <bpf/bpf_helpers.h>
-#define NULL 0
#define INLINE __always_inline
#define skb_shorter(skb, len) ((void *)(long)(skb)->data + (len) > (void *)(long)skb->data_end)
diff --git a/tools/testing/selftests/bpf/progs/task_local_storage.c b/tools/testing/selftests/bpf/progs/task_local_storage.c
new file mode 100644
index 000000000000..80a0a20db88d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_local_storage.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, long);
+} enter_id SEC(".maps");
+
+#define MAGIC_VALUE 0xabcd1234
+
+pid_t target_pid = 0;
+int mismatch_cnt = 0;
+int enter_cnt = 0;
+int exit_cnt = 0;
+
+SEC("tp_btf/sys_enter")
+int BPF_PROG(on_enter, struct pt_regs *regs, long id)
+{
+ struct task_struct *task;
+ long *ptr;
+
+ task = bpf_get_current_task_btf();
+ if (task->pid != target_pid)
+ return 0;
+
+ ptr = bpf_task_storage_get(&enter_id, task, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!ptr)
+ return 0;
+
+ __sync_fetch_and_add(&enter_cnt, 1);
+ *ptr = MAGIC_VALUE + enter_cnt;
+
+ return 0;
+}
+
+SEC("tp_btf/sys_exit")
+int BPF_PROG(on_exit, struct pt_regs *regs, long id)
+{
+ struct task_struct *task;
+ long *ptr;
+
+ task = bpf_get_current_task_btf();
+ if (task->pid != target_pid)
+ return 0;
+
+ ptr = bpf_task_storage_get(&enter_id, task, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!ptr)
+ return 0;
+
+ __sync_fetch_and_add(&exit_cnt, 1);
+ if (*ptr != MAGIC_VALUE + exit_cnt)
+ __sync_fetch_and_add(&mismatch_cnt, 1);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/task_local_storage_exit_creds.c b/tools/testing/selftests/bpf/progs/task_local_storage_exit_creds.c
new file mode 100644
index 000000000000..81758c0aef99
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_local_storage_exit_creds.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, __u64);
+} task_storage SEC(".maps");
+
+int valid_ptr_count = 0;
+int null_ptr_count = 0;
+
+SEC("fentry/exit_creds")
+int BPF_PROG(trace_exit_creds, struct task_struct *task)
+{
+ __u64 *ptr;
+
+ ptr = bpf_task_storage_get(&task_storage, task, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (ptr)
+ __sync_fetch_and_add(&valid_ptr_count, 1);
+ else
+ __sync_fetch_and_add(&null_ptr_count, 1);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/task_ls_recursion.c b/tools/testing/selftests/bpf/progs/task_ls_recursion.c
new file mode 100644
index 000000000000..564583dca7c8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_ls_recursion.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, long);
+} map_a SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, long);
+} map_b SEC(".maps");
+
+SEC("fentry/bpf_local_storage_lookup")
+int BPF_PROG(on_lookup)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+
+ bpf_task_storage_delete(&map_a, task);
+ bpf_task_storage_delete(&map_b, task);
+ return 0;
+}
+
+SEC("fentry/bpf_local_storage_update")
+int BPF_PROG(on_update)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+ long *ptr;
+
+ ptr = bpf_task_storage_get(&map_a, task, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (ptr)
+ *ptr += 1;
+
+ ptr = bpf_task_storage_get(&map_b, task, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (ptr)
+ *ptr += 1;
+
+ return 0;
+}
+
+SEC("tp_btf/sys_enter")
+int BPF_PROG(on_enter, struct pt_regs *regs, long id)
+{
+ struct task_struct *task;
+ long *ptr;
+
+ task = bpf_get_current_task_btf();
+ ptr = bpf_task_storage_get(&map_a, task, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (ptr)
+ *ptr = 200;
+
+ ptr = bpf_task_storage_get(&map_b, task, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (ptr)
+ *ptr = 100;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_check_mtu.c b/tools/testing/selftests/bpf/progs/test_check_mtu.c
index b7787b43f9db..c4a9bae96e75 100644
--- a/tools/testing/selftests/bpf/progs/test_check_mtu.c
+++ b/tools/testing/selftests/bpf/progs/test_check_mtu.c
@@ -105,6 +105,54 @@ int xdp_minus_delta(struct xdp_md *ctx)
return retval;
}
+SEC("xdp")
+int xdp_input_len(struct xdp_md *ctx)
+{
+ int retval = XDP_PASS; /* Expected retval on successful test */
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ __u32 ifindex = GLOBAL_USER_IFINDEX;
+ __u32 data_len = data_end - data;
+
+ /* API allow user give length to check as input via mtu_len param,
+ * resulting MTU value is still output in mtu_len param after call.
+ *
+ * Input len is L3, like MTU and iph->tot_len.
+ * Remember XDP data_len is L2.
+ */
+ __u32 mtu_len = data_len - ETH_HLEN;
+
+ if (bpf_check_mtu(ctx, ifindex, &mtu_len, 0, 0))
+ retval = XDP_ABORTED;
+
+ global_bpf_mtu_xdp = mtu_len;
+ return retval;
+}
+
+SEC("xdp")
+int xdp_input_len_exceed(struct xdp_md *ctx)
+{
+ int retval = XDP_ABORTED; /* Fail */
+ __u32 ifindex = GLOBAL_USER_IFINDEX;
+ int err;
+
+ /* API allow user give length to check as input via mtu_len param,
+ * resulting MTU value is still output in mtu_len param after call.
+ *
+ * Input length value is L3 size like MTU.
+ */
+ __u32 mtu_len = GLOBAL_USER_MTU;
+
+ mtu_len += 1; /* Exceed with 1 */
+
+ err = bpf_check_mtu(ctx, ifindex, &mtu_len, 0, 0);
+ if (err == BPF_MTU_CHK_RET_FRAG_NEEDED)
+ retval = XDP_PASS ; /* Success in exceeding MTU check */
+
+ global_bpf_mtu_xdp = mtu_len;
+ return retval;
+}
+
SEC("classifier")
int tc_use_helper(struct __sk_buff *ctx)
{
@@ -196,3 +244,47 @@ int tc_minus_delta(struct __sk_buff *ctx)
global_bpf_mtu_xdp = mtu_len;
return retval;
}
+
+SEC("classifier")
+int tc_input_len(struct __sk_buff *ctx)
+{
+ int retval = BPF_OK; /* Expected retval on successful test */
+ __u32 ifindex = GLOBAL_USER_IFINDEX;
+
+ /* API allow user give length to check as input via mtu_len param,
+ * resulting MTU value is still output in mtu_len param after call.
+ *
+ * Input length value is L3 size.
+ */
+ __u32 mtu_len = GLOBAL_USER_MTU;
+
+ if (bpf_check_mtu(ctx, ifindex, &mtu_len, 0, 0))
+ retval = BPF_DROP;
+
+ global_bpf_mtu_xdp = mtu_len;
+ return retval;
+}
+
+SEC("classifier")
+int tc_input_len_exceed(struct __sk_buff *ctx)
+{
+ int retval = BPF_DROP; /* Fail */
+ __u32 ifindex = GLOBAL_USER_IFINDEX;
+ int err;
+
+ /* API allow user give length to check as input via mtu_len param,
+ * resulting MTU value is still output in mtu_len param after call.
+ *
+ * Input length value is L3 size like MTU.
+ */
+ __u32 mtu_len = GLOBAL_USER_MTU;
+
+ mtu_len += 1; /* Exceed with 1 */
+
+ err = bpf_check_mtu(ctx, ifindex, &mtu_len, 0, 0);
+ if (err == BPF_MTU_CHK_RET_FRAG_NEEDED)
+ retval = BPF_OK; /* Success in exceeding MTU check */
+
+ global_bpf_mtu_xdp = mtu_len;
+ return retval;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_size.c b/tools/testing/selftests/bpf/progs/test_core_reloc_size.c
index d7fb6cfc7891..7b2d576aeea1 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_size.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_size.c
@@ -21,6 +21,7 @@ struct core_reloc_size_output {
int arr_elem_sz;
int ptr_sz;
int enum_sz;
+ int float_sz;
};
struct core_reloc_size {
@@ -30,6 +31,7 @@ struct core_reloc_size {
int arr_field[4];
void *ptr_field;
enum { VALUE = 123 } enum_field;
+ float float_field;
};
SEC("raw_tracepoint/sys_enter")
@@ -45,6 +47,7 @@ int test_core_size(void *ctx)
out->arr_elem_sz = bpf_core_field_size(in->arr_field[0]);
out->ptr_sz = bpf_core_field_size(in->ptr_field);
out->enum_sz = bpf_core_field_size(in->enum_field);
+ out->float_sz = bpf_core_field_size(in->float_field);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_global_func10.c b/tools/testing/selftests/bpf/progs/test_global_func10.c
index 61c2ae92ce41..97b7031d0e22 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func10.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func10.c
@@ -14,7 +14,7 @@ struct Big {
__noinline int foo(const struct Big *big)
{
- if (big == 0)
+ if (!big)
return 0;
return bpf_get_prandom_u32() < big->y;
diff --git a/tools/testing/selftests/bpf/progs/test_global_func11.c b/tools/testing/selftests/bpf/progs/test_global_func11.c
index 28488047c849..ef5277d982d9 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func11.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func11.c
@@ -15,5 +15,5 @@ __noinline int foo(const struct S *s)
SEC("cgroup_skb/ingress")
int test_cls(struct __sk_buff *skb)
{
- return foo(skb);
+ return foo((const void *)skb);
}
diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup.c b/tools/testing/selftests/bpf/progs/test_sk_lookup.c
index 1032b292af5b..ac6f7f205e25 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_lookup.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_lookup.c
@@ -64,6 +64,10 @@ static const int PROG_DONE = 1;
static const __u32 KEY_SERVER_A = SERVER_A;
static const __u32 KEY_SERVER_B = SERVER_B;
+static const __u16 SRC_PORT = bpf_htons(8008);
+static const __u32 SRC_IP4 = IP4(127, 0, 0, 2);
+static const __u32 SRC_IP6[] = IP6(0xfd000000, 0x0, 0x0, 0x00000002);
+
static const __u16 DST_PORT = 7007; /* Host byte order */
static const __u32 DST_IP4 = IP4(127, 0, 0, 1);
static const __u32 DST_IP6[] = IP6(0xfd000000, 0x0, 0x0, 0x00000001);
@@ -398,11 +402,12 @@ int ctx_narrow_access(struct bpf_sk_lookup *ctx)
if (LSW(ctx->protocol, 0) != IPPROTO_TCP)
return SK_DROP;
- /* Narrow loads from remote_port field. Expect non-0 value. */
- if (LSB(ctx->remote_port, 0) == 0 && LSB(ctx->remote_port, 1) == 0 &&
- LSB(ctx->remote_port, 2) == 0 && LSB(ctx->remote_port, 3) == 0)
+ /* Narrow loads from remote_port field. Expect SRC_PORT. */
+ if (LSB(ctx->remote_port, 0) != ((SRC_PORT >> 0) & 0xff) ||
+ LSB(ctx->remote_port, 1) != ((SRC_PORT >> 8) & 0xff) ||
+ LSB(ctx->remote_port, 2) != 0 || LSB(ctx->remote_port, 3) != 0)
return SK_DROP;
- if (LSW(ctx->remote_port, 0) == 0)
+ if (LSW(ctx->remote_port, 0) != SRC_PORT)
return SK_DROP;
/* Narrow loads from local_port field. Expect DST_PORT. */
@@ -415,11 +420,14 @@ int ctx_narrow_access(struct bpf_sk_lookup *ctx)
/* Narrow loads from IPv4 fields */
if (v4) {
- /* Expect non-0.0.0.0 in remote_ip4 */
- if (LSB(ctx->remote_ip4, 0) == 0 && LSB(ctx->remote_ip4, 1) == 0 &&
- LSB(ctx->remote_ip4, 2) == 0 && LSB(ctx->remote_ip4, 3) == 0)
+ /* Expect SRC_IP4 in remote_ip4 */
+ if (LSB(ctx->remote_ip4, 0) != ((SRC_IP4 >> 0) & 0xff) ||
+ LSB(ctx->remote_ip4, 1) != ((SRC_IP4 >> 8) & 0xff) ||
+ LSB(ctx->remote_ip4, 2) != ((SRC_IP4 >> 16) & 0xff) ||
+ LSB(ctx->remote_ip4, 3) != ((SRC_IP4 >> 24) & 0xff))
return SK_DROP;
- if (LSW(ctx->remote_ip4, 0) == 0 && LSW(ctx->remote_ip4, 1) == 0)
+ if (LSW(ctx->remote_ip4, 0) != ((SRC_IP4 >> 0) & 0xffff) ||
+ LSW(ctx->remote_ip4, 1) != ((SRC_IP4 >> 16) & 0xffff))
return SK_DROP;
/* Expect DST_IP4 in local_ip4 */
@@ -448,20 +456,32 @@ int ctx_narrow_access(struct bpf_sk_lookup *ctx)
/* Narrow loads from IPv6 fields */
if (!v4) {
- /* Expect non-:: IP in remote_ip6 */
- if (LSB(ctx->remote_ip6[0], 0) == 0 && LSB(ctx->remote_ip6[0], 1) == 0 &&
- LSB(ctx->remote_ip6[0], 2) == 0 && LSB(ctx->remote_ip6[0], 3) == 0 &&
- LSB(ctx->remote_ip6[1], 0) == 0 && LSB(ctx->remote_ip6[1], 1) == 0 &&
- LSB(ctx->remote_ip6[1], 2) == 0 && LSB(ctx->remote_ip6[1], 3) == 0 &&
- LSB(ctx->remote_ip6[2], 0) == 0 && LSB(ctx->remote_ip6[2], 1) == 0 &&
- LSB(ctx->remote_ip6[2], 2) == 0 && LSB(ctx->remote_ip6[2], 3) == 0 &&
- LSB(ctx->remote_ip6[3], 0) == 0 && LSB(ctx->remote_ip6[3], 1) == 0 &&
- LSB(ctx->remote_ip6[3], 2) == 0 && LSB(ctx->remote_ip6[3], 3) == 0)
+ /* Expect SRC_IP6 in remote_ip6 */
+ if (LSB(ctx->remote_ip6[0], 0) != ((SRC_IP6[0] >> 0) & 0xff) ||
+ LSB(ctx->remote_ip6[0], 1) != ((SRC_IP6[0] >> 8) & 0xff) ||
+ LSB(ctx->remote_ip6[0], 2) != ((SRC_IP6[0] >> 16) & 0xff) ||
+ LSB(ctx->remote_ip6[0], 3) != ((SRC_IP6[0] >> 24) & 0xff) ||
+ LSB(ctx->remote_ip6[1], 0) != ((SRC_IP6[1] >> 0) & 0xff) ||
+ LSB(ctx->remote_ip6[1], 1) != ((SRC_IP6[1] >> 8) & 0xff) ||
+ LSB(ctx->remote_ip6[1], 2) != ((SRC_IP6[1] >> 16) & 0xff) ||
+ LSB(ctx->remote_ip6[1], 3) != ((SRC_IP6[1] >> 24) & 0xff) ||
+ LSB(ctx->remote_ip6[2], 0) != ((SRC_IP6[2] >> 0) & 0xff) ||
+ LSB(ctx->remote_ip6[2], 1) != ((SRC_IP6[2] >> 8) & 0xff) ||
+ LSB(ctx->remote_ip6[2], 2) != ((SRC_IP6[2] >> 16) & 0xff) ||
+ LSB(ctx->remote_ip6[2], 3) != ((SRC_IP6[2] >> 24) & 0xff) ||
+ LSB(ctx->remote_ip6[3], 0) != ((SRC_IP6[3] >> 0) & 0xff) ||
+ LSB(ctx->remote_ip6[3], 1) != ((SRC_IP6[3] >> 8) & 0xff) ||
+ LSB(ctx->remote_ip6[3], 2) != ((SRC_IP6[3] >> 16) & 0xff) ||
+ LSB(ctx->remote_ip6[3], 3) != ((SRC_IP6[3] >> 24) & 0xff))
return SK_DROP;
- if (LSW(ctx->remote_ip6[0], 0) == 0 && LSW(ctx->remote_ip6[0], 1) == 0 &&
- LSW(ctx->remote_ip6[1], 0) == 0 && LSW(ctx->remote_ip6[1], 1) == 0 &&
- LSW(ctx->remote_ip6[2], 0) == 0 && LSW(ctx->remote_ip6[2], 1) == 0 &&
- LSW(ctx->remote_ip6[3], 0) == 0 && LSW(ctx->remote_ip6[3], 1) == 0)
+ if (LSW(ctx->remote_ip6[0], 0) != ((SRC_IP6[0] >> 0) & 0xffff) ||
+ LSW(ctx->remote_ip6[0], 1) != ((SRC_IP6[0] >> 16) & 0xffff) ||
+ LSW(ctx->remote_ip6[1], 0) != ((SRC_IP6[1] >> 0) & 0xffff) ||
+ LSW(ctx->remote_ip6[1], 1) != ((SRC_IP6[1] >> 16) & 0xffff) ||
+ LSW(ctx->remote_ip6[2], 0) != ((SRC_IP6[2] >> 0) & 0xffff) ||
+ LSW(ctx->remote_ip6[2], 1) != ((SRC_IP6[2] >> 16) & 0xffff) ||
+ LSW(ctx->remote_ip6[3], 0) != ((SRC_IP6[3] >> 0) & 0xffff) ||
+ LSW(ctx->remote_ip6[3], 1) != ((SRC_IP6[3] >> 16) & 0xffff))
return SK_DROP;
/* Expect DST_IP6 in local_ip6 */
if (LSB(ctx->local_ip6[0], 0) != ((DST_IP6[0] >> 0) & 0xff) ||
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
index a3a366c57ce1..a39eba9f5201 100644
--- a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
@@ -29,15 +29,16 @@ struct {
} verdict_map SEC(".maps");
static volatile bool test_sockmap; /* toggled by user-space */
+static volatile bool test_ingress; /* toggled by user-space */
SEC("sk_skb/stream_parser")
-int prog_skb_parser(struct __sk_buff *skb)
+int prog_stream_parser(struct __sk_buff *skb)
{
return skb->len;
}
SEC("sk_skb/stream_verdict")
-int prog_skb_verdict(struct __sk_buff *skb)
+int prog_stream_verdict(struct __sk_buff *skb)
{
unsigned int *count;
__u32 zero = 0;
@@ -55,6 +56,27 @@ int prog_skb_verdict(struct __sk_buff *skb)
return verdict;
}
+SEC("sk_skb/skb_verdict")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+ unsigned int *count;
+ __u32 zero = 0;
+ int verdict;
+
+ if (test_sockmap)
+ verdict = bpf_sk_redirect_map(skb, &sock_map, zero,
+ test_ingress ? BPF_F_INGRESS : 0);
+ else
+ verdict = bpf_sk_redirect_hash(skb, &sock_hash, &zero,
+ test_ingress ? BPF_F_INGRESS : 0);
+
+ count = bpf_map_lookup_elem(&verdict_map, &verdict);
+ if (count)
+ (*count)++;
+
+ return verdict;
+}
+
SEC("sk_msg")
int prog_msg_verdict(struct sk_msg_md *msg)
{
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c b/tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c
new file mode 100644
index 000000000000..2d31f66e4f23
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 2);
+ __type(key, __u32);
+ __type(value, __u64);
+} sock_map SEC(".maps");
+
+SEC("sk_skb/skb_verdict")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+ return SK_DROP;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_static_linked1.c b/tools/testing/selftests/bpf/progs/test_static_linked1.c
new file mode 100644
index 000000000000..ea1a6c4c7172
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_static_linked1.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+/* 8-byte aligned .bss */
+static volatile long static_var1;
+static volatile int static_var11;
+int var1 = 0;
+/* 4-byte aligned .rodata */
+const volatile int rovar1;
+
+/* same "subprog" name in both files */
+static __noinline int subprog(int x)
+{
+ /* but different formula */
+ return x * 2;
+}
+
+SEC("raw_tp/sys_enter")
+int handler1(const void *ctx)
+{
+ var1 = subprog(rovar1) + static_var1 + static_var11;
+
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
+int VERSION SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/test_static_linked2.c b/tools/testing/selftests/bpf/progs/test_static_linked2.c
new file mode 100644
index 000000000000..54d8d1ab577c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_static_linked2.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+/* 4-byte aligned .bss */
+static volatile int static_var2;
+static volatile int static_var22;
+int var2 = 0;
+/* 8-byte aligned .rodata */
+const volatile long rovar2;
+
+/* same "subprog" name in both files */
+static __noinline int subprog(int x)
+{
+ /* but different formula */
+ return x * 3;
+}
+
+SEC("raw_tp/sys_enter")
+int handler2(const void *ctx)
+{
+ var2 = subprog(rovar2) + static_var2 + static_var22;
+
+ return 0;
+}
+
+/* different name and/or type of the variable doesn't matter */
+char _license[] SEC("license") = "GPL";
+int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
index 37bce7a7c394..84cd63259554 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
@@ -24,14 +24,29 @@ static const int cfg_port = 8000;
static const int cfg_udp_src = 20000;
+#define L2_PAD_SZ (sizeof(struct vxlanhdr) + ETH_HLEN)
+
#define UDP_PORT 5555
#define MPLS_OVER_UDP_PORT 6635
#define ETH_OVER_UDP_PORT 7777
+#define VXLAN_UDP_PORT 8472
+
+#define EXTPROTO_VXLAN 0x1
+
+#define VXLAN_N_VID (1u << 24)
+#define VXLAN_VNI_MASK bpf_htonl((VXLAN_N_VID - 1) << 8)
+#define VXLAN_FLAGS 0x8
+#define VXLAN_VNI 1
/* MPLS label 1000 with S bit (last label) set and ttl of 255. */
static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 |
MPLS_LS_S_MASK | 0xff);
+struct vxlanhdr {
+ __be32 vx_flags;
+ __be32 vx_vni;
+} __attribute__((packed));
+
struct gre_hdr {
__be16 flags;
__be16 protocol;
@@ -45,13 +60,13 @@ union l4hdr {
struct v4hdr {
struct iphdr ip;
union l4hdr l4hdr;
- __u8 pad[16]; /* enough space for L2 header */
+ __u8 pad[L2_PAD_SZ]; /* space for L2 header / vxlan header ... */
} __attribute__((packed));
struct v6hdr {
struct ipv6hdr ip;
union l4hdr l4hdr;
- __u8 pad[16]; /* enough space for L2 header */
+ __u8 pad[L2_PAD_SZ]; /* space for L2 header / vxlan header ... */
} __attribute__((packed));
static __always_inline void set_ipv4_csum(struct iphdr *iph)
@@ -69,14 +84,15 @@ static __always_inline void set_ipv4_csum(struct iphdr *iph)
iph->check = ~((csum & 0xffff) + (csum >> 16));
}
-static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
- __u16 l2_proto)
+static __always_inline int __encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
+ __u16 l2_proto, __u16 ext_proto)
{
__u16 udp_dst = UDP_PORT;
struct iphdr iph_inner;
struct v4hdr h_outer;
struct tcphdr tcph;
int olen, l2_len;
+ __u8 *l2_hdr = NULL;
int tcp_off;
__u64 flags;
@@ -141,7 +157,11 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
break;
case ETH_P_TEB:
l2_len = ETH_HLEN;
- udp_dst = ETH_OVER_UDP_PORT;
+ if (ext_proto & EXTPROTO_VXLAN) {
+ udp_dst = VXLAN_UDP_PORT;
+ l2_len += sizeof(struct vxlanhdr);
+ } else
+ udp_dst = ETH_OVER_UDP_PORT;
break;
}
flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
@@ -171,14 +191,26 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
}
/* add L2 encap (if specified) */
+ l2_hdr = (__u8 *)&h_outer + olen;
switch (l2_proto) {
case ETH_P_MPLS_UC:
- *((__u32 *)((__u8 *)&h_outer + olen)) = mpls_label;
+ *(__u32 *)l2_hdr = mpls_label;
break;
case ETH_P_TEB:
- if (bpf_skb_load_bytes(skb, 0, (__u8 *)&h_outer + olen,
- ETH_HLEN))
+ flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
+
+ if (ext_proto & EXTPROTO_VXLAN) {
+ struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
+
+ vxlan_hdr->vx_flags = VXLAN_FLAGS;
+ vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8);
+
+ l2_hdr += sizeof(struct vxlanhdr);
+ }
+
+ if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
return TC_ACT_SHOT;
+
break;
}
olen += l2_len;
@@ -214,14 +246,21 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
return TC_ACT_OK;
}
-static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
+static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
__u16 l2_proto)
{
+ return __encap_ipv4(skb, encap_proto, l2_proto, 0);
+}
+
+static __always_inline int __encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
+ __u16 l2_proto, __u16 ext_proto)
+{
__u16 udp_dst = UDP_PORT;
struct ipv6hdr iph_inner;
struct v6hdr h_outer;
struct tcphdr tcph;
int olen, l2_len;
+ __u8 *l2_hdr = NULL;
__u16 tot_len;
__u64 flags;
@@ -249,7 +288,11 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
break;
case ETH_P_TEB:
l2_len = ETH_HLEN;
- udp_dst = ETH_OVER_UDP_PORT;
+ if (ext_proto & EXTPROTO_VXLAN) {
+ udp_dst = VXLAN_UDP_PORT;
+ l2_len += sizeof(struct vxlanhdr);
+ } else
+ udp_dst = ETH_OVER_UDP_PORT;
break;
}
flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
@@ -267,7 +310,7 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
tot_len = bpf_ntohs(iph_inner.payload_len) + sizeof(iph_inner) +
- sizeof(h_outer.l4hdr.udp);
+ sizeof(h_outer.l4hdr.udp) + l2_len;
h_outer.l4hdr.udp.check = 0;
h_outer.l4hdr.udp.len = bpf_htons(tot_len);
break;
@@ -278,13 +321,24 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
}
/* add L2 encap (if specified) */
+ l2_hdr = (__u8 *)&h_outer + olen;
switch (l2_proto) {
case ETH_P_MPLS_UC:
- *((__u32 *)((__u8 *)&h_outer + olen)) = mpls_label;
+ *(__u32 *)l2_hdr = mpls_label;
break;
case ETH_P_TEB:
- if (bpf_skb_load_bytes(skb, 0, (__u8 *)&h_outer + olen,
- ETH_HLEN))
+ flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
+
+ if (ext_proto & EXTPROTO_VXLAN) {
+ struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
+
+ vxlan_hdr->vx_flags = VXLAN_FLAGS;
+ vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8);
+
+ l2_hdr += sizeof(struct vxlanhdr);
+ }
+
+ if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
return TC_ACT_SHOT;
break;
}
@@ -309,6 +363,12 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
return TC_ACT_OK;
}
+static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
+ __u16 l2_proto)
+{
+ return __encap_ipv6(skb, encap_proto, l2_proto, 0);
+}
+
SEC("encap_ipip_none")
int __encap_ipip_none(struct __sk_buff *skb)
{
@@ -372,6 +432,17 @@ int __encap_udp_eth(struct __sk_buff *skb)
return TC_ACT_OK;
}
+SEC("encap_vxlan_eth")
+int __encap_vxlan_eth(struct __sk_buff *skb)
+{
+ if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
+ return __encap_ipv4(skb, IPPROTO_UDP,
+ ETH_P_TEB,
+ EXTPROTO_VXLAN);
+ else
+ return TC_ACT_OK;
+}
+
SEC("encap_sit_none")
int __encap_sit_none(struct __sk_buff *skb)
{
@@ -444,6 +515,17 @@ int __encap_ip6udp_eth(struct __sk_buff *skb)
return TC_ACT_OK;
}
+SEC("encap_ip6vxlan_eth")
+int __encap_ip6vxlan_eth(struct __sk_buff *skb)
+{
+ if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
+ return __encap_ipv6(skb, IPPROTO_UDP,
+ ETH_P_TEB,
+ EXTPROTO_VXLAN);
+ else
+ return TC_ACT_OK;
+}
+
static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
{
char buf[sizeof(struct v6hdr)];
@@ -479,6 +561,9 @@ static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
case ETH_OVER_UDP_PORT:
olen += ETH_HLEN;
break;
+ case VXLAN_UDP_PORT:
+ olen += ETH_HLEN + sizeof(struct vxlanhdr);
+ break;
}
break;
default:
diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
index a621b58ab079..ba6eadfec565 100644
--- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
@@ -446,10 +446,8 @@ int _geneve_get_tunnel(struct __sk_buff *skb)
}
ret = bpf_skb_get_tunnel_opt(skb, &gopt, sizeof(gopt));
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
+ if (ret < 0)
+ gopt.opt_class = 0;
bpf_trace_printk(fmt, sizeof(fmt),
key.tunnel_id, key.remote_ipv4, gopt.opt_class);
@@ -510,10 +508,8 @@ int _ip6geneve_get_tunnel(struct __sk_buff *skb)
}
ret = bpf_skb_get_tunnel_opt(skb, &gopt, sizeof(gopt));
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
+ if (ret < 0)
+ gopt.opt_class = 0;
bpf_trace_printk(fmt, sizeof(fmt),
key.tunnel_id, key.remote_ipv4, gopt.opt_class);
diff --git a/tools/testing/selftests/bpf/test_bpftool_build.sh b/tools/testing/selftests/bpf/test_bpftool_build.sh
index 2db3c60e1e61..ac349a5cea7e 100755
--- a/tools/testing/selftests/bpf/test_bpftool_build.sh
+++ b/tools/testing/selftests/bpf/test_bpftool_build.sh
@@ -85,23 +85,6 @@ make_with_tmpdir() {
echo
}
-make_doc_and_clean() {
- echo -e "\$PWD: $PWD"
- echo -e "command: make -s $* doc >/dev/null"
- RST2MAN_OPTS="--exit-status=1" make $J -s $* doc
- if [ $? -ne 0 ] ; then
- ERROR=1
- printf "FAILURE: Errors or warnings when building documentation\n"
- fi
- (
- if [ $# -ge 1 ] ; then
- cd ${@: -1}
- fi
- make -s doc-clean
- )
- echo
-}
-
echo "Trying to build bpftool"
echo -e "... through kbuild\n"
@@ -162,7 +145,3 @@ make_and_clean
make_with_tmpdir OUTPUT
make_with_tmpdir O
-
-echo -e "Checking documentation build\n"
-# From tools/bpf/bpftool
-make_doc_and_clean
diff --git a/tools/testing/selftests/bpf/test_btf.h b/tools/testing/selftests/bpf/test_btf.h
index 2023725f1962..e2394eea4b7f 100644
--- a/tools/testing/selftests/bpf/test_btf.h
+++ b/tools/testing/selftests/bpf/test_btf.h
@@ -66,4 +66,7 @@
#define BTF_FUNC_ENC(name, func_proto) \
BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), func_proto)
+#define BTF_TYPE_FLOAT_ENC(name, sz) \
+ BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FLOAT, 0, 0), sz)
+
#endif /* _TEST_BTF_H */
diff --git a/tools/testing/selftests/bpf/test_doc_build.sh b/tools/testing/selftests/bpf/test_doc_build.sh
new file mode 100755
index 000000000000..7eb940a7b2eb
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_doc_build.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+
+# Assume script is located under tools/testing/selftests/bpf/. We want to start
+# build attempts from the top of kernel repository.
+SCRIPT_REL_PATH=$(realpath --relative-to=$PWD $0)
+SCRIPT_REL_DIR=$(dirname $SCRIPT_REL_PATH)
+KDIR_ROOT_DIR=$(realpath $PWD/$SCRIPT_REL_DIR/../../../../)
+cd $KDIR_ROOT_DIR
+
+for tgt in docs docs-clean; do
+ make -s -C $PWD/$SCRIPT_REL_DIR $tgt;
+done
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index f7c2fd89d01a..e87c8546230e 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -152,6 +152,17 @@ extern int test__join_cgroup(const char *path);
___ok; \
})
+#define ASSERT_LT(actual, expected, name) ({ \
+ static int duration = 0; \
+ typeof(actual) ___act = (actual); \
+ typeof(expected) ___exp = (expected); \
+ bool ___ok = ___act < ___exp; \
+ CHECK(!___ok, (name), \
+ "unexpected %s: actual %lld >= expected %lld\n", \
+ (name), (long long)(___act), (long long)(___exp)); \
+ ___ok; \
+})
+
#define ASSERT_STREQ(actual, expected, name) ({ \
static int duration = 0; \
const char *___act = actual; \
diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index 427ca00a3217..eefd445b96fc 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -732,7 +732,7 @@ static int sendmsg_test(struct sockmap_options *opt)
* socket is not a valid test. So in this case lets not
* enable kTLS but still run the test.
*/
- if (!txmsg_redir || (txmsg_redir && txmsg_ingress)) {
+ if (!txmsg_redir || txmsg_ingress) {
err = sockmap_init_ktls(opt->verbose, rx_fd);
if (err)
return err;
diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh
index 7c76b841b17b..c9dde9b9d987 100755
--- a/tools/testing/selftests/bpf/test_tc_tunnel.sh
+++ b/tools/testing/selftests/bpf/test_tc_tunnel.sh
@@ -44,8 +44,8 @@ setup() {
# clamp route to reserve room for tunnel headers
ip -netns "${ns1}" -4 route flush table main
ip -netns "${ns1}" -6 route flush table main
- ip -netns "${ns1}" -4 route add "${ns2_v4}" mtu 1458 dev veth1
- ip -netns "${ns1}" -6 route add "${ns2_v6}" mtu 1438 dev veth1
+ ip -netns "${ns1}" -4 route add "${ns2_v4}" mtu 1450 dev veth1
+ ip -netns "${ns1}" -6 route add "${ns2_v6}" mtu 1430 dev veth1
sleep 1
@@ -105,6 +105,12 @@ if [[ "$#" -eq "0" ]]; then
echo "sit"
$0 ipv6 sit none 100
+ echo "ip4 vxlan"
+ $0 ipv4 vxlan eth 2000
+
+ echo "ip6 vxlan"
+ $0 ipv6 ip6vxlan eth 2000
+
for mac in none mpls eth ; do
echo "ip gre $mac"
$0 ipv4 gre $mac 100
@@ -214,6 +220,9 @@ if [[ "$tuntype" =~ "udp" ]]; then
targs="encap fou encap-sport auto encap-dport $dport"
elif [[ "$tuntype" =~ "gre" && "$mac" == "eth" ]]; then
ttype=$gretaptype
+elif [[ "$tuntype" =~ "vxlan" && "$mac" == "eth" ]]; then
+ ttype="vxlan"
+ targs="id 1 dstport 8472 udp6zerocsumrx"
else
ttype=$tuntype
targs=""
@@ -242,7 +251,7 @@ if [[ "$tuntype" == "ip6udp" && "$mac" == "mpls" ]]; then
elif [[ "$tuntype" =~ "udp" && "$mac" == "eth" ]]; then
# No support for TEB fou tunnel; expect failure.
expect_tun_fail=1
-elif [[ "$tuntype" =~ "gre" && "$mac" == "eth" ]]; then
+elif [[ "$tuntype" =~ (gre|vxlan) && "$mac" == "eth" ]]; then
# Share ethernet address between tunnel/veth2 so L2 decap works.
ethaddr=$(ip netns exec "${ns2}" ip link show veth2 | \
awk '/ether/ { print $2 }')
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 58b5a349d3ba..1512092e1e68 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -105,7 +105,7 @@ struct bpf_test {
enum bpf_prog_type prog_type;
uint8_t flags;
void (*fill_helper)(struct bpf_test *self);
- uint8_t runs;
+ int runs;
#define bpf_testdata_struct_t \
struct { \
uint32_t retval, retval_unpriv; \
@@ -1165,7 +1165,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
run_errs = 0;
run_successes = 0;
- if (!alignment_prevented_execution && fd_prog >= 0) {
+ if (!alignment_prevented_execution && fd_prog >= 0 && test->runs >= 0) {
uint32_t expected_val;
int i;
diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh
index 88a7483eaae4..46633a3bfb0b 100755
--- a/tools/testing/selftests/bpf/test_xsk.sh
+++ b/tools/testing/selftests/bpf/test_xsk.sh
@@ -71,13 +71,21 @@
#
# Run (full output without color-coding):
# sudo ./test_xsk.sh
+#
+# Run with verbose output:
+# sudo ./test_xsk.sh -v
+#
+# Run and dump packet contents:
+# sudo ./test_xsk.sh -D
. xsk_prereqs.sh
-while getopts c flag
+while getopts "cvD" flag
do
case "${flag}" in
c) colorconsole=1;;
+ v) verbose=1;;
+ D) dump_pkts=1;;
esac
done
@@ -95,17 +103,22 @@ NS1=af_xdp${VETH1_POSTFIX}
MTU=1500
setup_vethPairs() {
- echo "setting up ${VETH0}: namespace: ${NS0}"
+ if [[ $verbose -eq 1 ]]; then
+ echo "setting up ${VETH0}: namespace: ${NS0}"
+ fi
ip netns add ${NS1}
- ip link add ${VETH0} type veth peer name ${VETH1}
+ ip link add ${VETH0} numtxqueues 4 numrxqueues 4 type veth peer name ${VETH1} numtxqueues 4 numrxqueues 4
if [ -f /proc/net/if_inet6 ]; then
echo 1 > /proc/sys/net/ipv6/conf/${VETH0}/disable_ipv6
fi
- echo "setting up ${VETH1}: namespace: ${NS1}"
+ if [[ $verbose -eq 1 ]]; then
+ echo "setting up ${VETH1}: namespace: ${NS1}"
+ fi
ip link set ${VETH1} netns ${NS1}
ip netns exec ${NS1} ip link set ${VETH1} mtu ${MTU}
ip link set ${VETH0} mtu ${MTU}
ip netns exec ${NS1} ip link set ${VETH1} up
+ ip netns exec ${NS1} ip link set dev lo up
ip link set ${VETH0} up
}
@@ -125,121 +138,24 @@ echo "${VETH0}:${VETH1},${NS1}" > ${SPECFILE}
validate_veth_spec_file
-echo "Spec file created: ${SPECFILE}"
-
-test_status $retval "${TEST_NAME}"
-
-## START TESTS
-
-statusList=()
-
-### TEST 1
-TEST_NAME="XSK KSELFTEST FRAMEWORK"
-
-echo "Switching interfaces [${VETH0}, ${VETH1}] to XDP Generic mode"
-vethXDPgeneric ${VETH0} ${VETH1} ${NS1}
-
-retval=$?
-if [ $retval -eq 0 ]; then
- echo "Switching interfaces [${VETH0}, ${VETH1}] to XDP Native mode"
- vethXDPnative ${VETH0} ${VETH1} ${NS1}
+if [[ $verbose -eq 1 ]]; then
+ echo "Spec file created: ${SPECFILE}"
+ VERBOSE_ARG="-v"
fi
-retval=$?
-test_status $retval "${TEST_NAME}"
-statusList+=($retval)
-
-### TEST 2
-TEST_NAME="SKB NOPOLL"
-
-vethXDPgeneric ${VETH0} ${VETH1} ${NS1}
-
-params=("-S")
-execxdpxceiver params
-
-retval=$?
-test_status $retval "${TEST_NAME}"
-statusList+=($retval)
-
-### TEST 3
-TEST_NAME="SKB POLL"
-
-vethXDPgeneric ${VETH0} ${VETH1} ${NS1}
-
-params=("-S" "-p")
-execxdpxceiver params
-
-retval=$?
-test_status $retval "${TEST_NAME}"
-statusList+=($retval)
-
-### TEST 4
-TEST_NAME="DRV NOPOLL"
-
-vethXDPnative ${VETH0} ${VETH1} ${NS1}
-
-params=("-N")
-execxdpxceiver params
-
-retval=$?
-test_status $retval "${TEST_NAME}"
-statusList+=($retval)
-
-### TEST 5
-TEST_NAME="DRV POLL"
-
-vethXDPnative ${VETH0} ${VETH1} ${NS1}
-
-params=("-N" "-p")
-execxdpxceiver params
-
-retval=$?
-test_status $retval "${TEST_NAME}"
-statusList+=($retval)
-
-### TEST 6
-TEST_NAME="SKB SOCKET TEARDOWN"
-
-vethXDPgeneric ${VETH0} ${VETH1} ${NS1}
-
-params=("-S" "-T")
-execxdpxceiver params
-
-retval=$?
-test_status $retval "${TEST_NAME}"
-statusList+=($retval)
-
-### TEST 7
-TEST_NAME="DRV SOCKET TEARDOWN"
-
-vethXDPnative ${VETH0} ${VETH1} ${NS1}
-
-params=("-N" "-T")
-execxdpxceiver params
+if [[ $dump_pkts -eq 1 ]]; then
+ DUMP_PKTS_ARG="-D"
+fi
-retval=$?
test_status $retval "${TEST_NAME}"
-statusList+=($retval)
-### TEST 8
-TEST_NAME="SKB BIDIRECTIONAL SOCKETS"
-
-vethXDPgeneric ${VETH0} ${VETH1} ${NS1}
-
-params=("-S" "-B")
-execxdpxceiver params
-
-retval=$?
-test_status $retval "${TEST_NAME}"
-statusList+=($retval)
+## START TESTS
-### TEST 9
-TEST_NAME="DRV BIDIRECTIONAL SOCKETS"
+statusList=()
-vethXDPnative ${VETH0} ${VETH1} ${NS1}
+TEST_NAME="XSK KSELFTESTS"
-params=("-N" "-B")
-execxdpxceiver params
+execxdpxceiver
retval=$?
test_status $retval "${TEST_NAME}"
diff --git a/tools/testing/selftests/bpf/verifier/array_access.c b/tools/testing/selftests/bpf/verifier/array_access.c
index bed53b561e04..1b138cd2b187 100644
--- a/tools/testing/selftests/bpf/verifier/array_access.c
+++ b/tools/testing/selftests/bpf/verifier/array_access.c
@@ -250,12 +250,13 @@
BPF_MOV64_IMM(BPF_REG_5, 0),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
BPF_FUNC_csum_diff),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xffff),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.fixup_map_array_ro = { 3 },
.result = ACCEPT,
- .retval = -29,
+ .retval = 65507,
},
{
"invalid write map access into a read-only array 1",
diff --git a/tools/testing/selftests/bpf/verifier/atomic_and.c b/tools/testing/selftests/bpf/verifier/atomic_and.c
index 1bdc8e6684f7..fe4bb70eb9c5 100644
--- a/tools/testing/selftests/bpf/verifier/atomic_and.c
+++ b/tools/testing/selftests/bpf/verifier/atomic_and.c
@@ -75,3 +75,26 @@
},
.result = ACCEPT,
},
+{
+ "BPF_ATOMIC_AND with fetch - r0 as source reg",
+ .insns = {
+ /* val = 0x110; */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0x110),
+ /* old = atomic_fetch_and(&val, 0x011); */
+ BPF_MOV64_IMM(BPF_REG_0, 0x011),
+ BPF_ATOMIC_OP(BPF_DW, BPF_AND | BPF_FETCH, BPF_REG_10, BPF_REG_0, -8),
+ /* if (old != 0x110) exit(3); */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0x110, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 3),
+ BPF_EXIT_INSN(),
+ /* if (val != 0x010) exit(2); */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x010, 2),
+ BPF_MOV64_IMM(BPF_REG_1, 2),
+ BPF_EXIT_INSN(),
+ /* exit(0); */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+},
diff --git a/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c
index 2efd8bcf57a1..6e52dfc64415 100644
--- a/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c
+++ b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c
@@ -94,3 +94,28 @@
.result = REJECT,
.errstr = "invalid read from stack",
},
+{
+ "BPF_W cmpxchg should zero top 32 bits",
+ .insns = {
+ /* r0 = U64_MAX; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 1),
+ /* u64 val = r0; */
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+ /* r0 = (u32)atomic_cmpxchg((u32 *)&val, r0, 1); */
+ BPF_MOV32_IMM(BPF_REG_1, 1),
+ BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, BPF_REG_10, BPF_REG_1, -8),
+ /* r1 = 0x00000000FFFFFFFFull; */
+ BPF_MOV64_IMM(BPF_REG_1, 1),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 32),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 1),
+ /* if (r0 != r1) exit(1); */
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_1, 2),
+ BPF_MOV32_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ /* exit(0); */
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+},
diff --git a/tools/testing/selftests/bpf/verifier/atomic_or.c b/tools/testing/selftests/bpf/verifier/atomic_or.c
index 70f982e1f9f0..9d0716ac5080 100644
--- a/tools/testing/selftests/bpf/verifier/atomic_or.c
+++ b/tools/testing/selftests/bpf/verifier/atomic_or.c
@@ -75,3 +75,28 @@
},
.result = ACCEPT,
},
+{
+ "BPF_W atomic_fetch_or should zero top 32 bits",
+ .insns = {
+ /* r1 = U64_MAX; */
+ BPF_MOV64_IMM(BPF_REG_1, 0),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 1),
+ /* u64 val = r1; */
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+ /* r1 = (u32)atomic_fetch_or((u32 *)&val, 2); */
+ BPF_MOV32_IMM(BPF_REG_1, 2),
+ BPF_ATOMIC_OP(BPF_W, BPF_OR | BPF_FETCH, BPF_REG_10, BPF_REG_1, -8),
+ /* r2 = 0x00000000FFFFFFFF; */
+ BPF_MOV64_IMM(BPF_REG_2, 1),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 32),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 1),
+ /* if (r2 != r1) exit(1); */
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_2, BPF_REG_1, 2),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ /* exit(0); */
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+},
diff --git a/tools/testing/selftests/bpf/verifier/bounds_deduction.c b/tools/testing/selftests/bpf/verifier/bounds_deduction.c
index 1fd07a4f27ac..c162498a64fc 100644
--- a/tools/testing/selftests/bpf/verifier/bounds_deduction.c
+++ b/tools/testing/selftests/bpf/verifier/bounds_deduction.c
@@ -6,8 +6,9 @@
BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
BPF_EXIT_INSN(),
},
- .result = REJECT,
+ .errstr_unpriv = "R0 tried to sub from different maps, paths, or prohibited types",
.errstr = "R0 tried to subtract pointer from scalar",
+ .result = REJECT,
},
{
"check deducing bounds from const, 2",
@@ -20,6 +21,8 @@
BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R1 tried to sub from different maps, paths, or prohibited types",
+ .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 1,
},
@@ -31,8 +34,9 @@
BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
BPF_EXIT_INSN(),
},
- .result = REJECT,
+ .errstr_unpriv = "R0 tried to sub from different maps, paths, or prohibited types",
.errstr = "R0 tried to subtract pointer from scalar",
+ .result = REJECT,
},
{
"check deducing bounds from const, 4",
@@ -45,6 +49,8 @@
BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R1 tried to sub from different maps, paths, or prohibited types",
+ .result_unpriv = REJECT,
.result = ACCEPT,
},
{
@@ -55,8 +61,9 @@
BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
BPF_EXIT_INSN(),
},
- .result = REJECT,
+ .errstr_unpriv = "R0 tried to sub from different maps, paths, or prohibited types",
.errstr = "R0 tried to subtract pointer from scalar",
+ .result = REJECT,
},
{
"check deducing bounds from const, 6",
@@ -67,8 +74,9 @@
BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
BPF_EXIT_INSN(),
},
- .result = REJECT,
+ .errstr_unpriv = "R0 tried to sub from different maps, paths, or prohibited types",
.errstr = "R0 tried to subtract pointer from scalar",
+ .result = REJECT,
},
{
"check deducing bounds from const, 7",
@@ -80,8 +88,9 @@
offsetof(struct __sk_buff, mark)),
BPF_EXIT_INSN(),
},
- .result = REJECT,
+ .errstr_unpriv = "R1 tried to sub from different maps, paths, or prohibited types",
.errstr = "dereference of modified ctx ptr",
+ .result = REJECT,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
@@ -94,8 +103,9 @@
offsetof(struct __sk_buff, mark)),
BPF_EXIT_INSN(),
},
- .result = REJECT,
+ .errstr_unpriv = "R1 tried to add from different maps, paths, or prohibited types",
.errstr = "dereference of modified ctx ptr",
+ .result = REJECT,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
@@ -106,8 +116,9 @@
BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
BPF_EXIT_INSN(),
},
- .result = REJECT,
+ .errstr_unpriv = "R0 tried to sub from different maps, paths, or prohibited types",
.errstr = "R0 tried to subtract pointer from scalar",
+ .result = REJECT,
},
{
"check deducing bounds from const, 10",
@@ -119,6 +130,6 @@
BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
BPF_EXIT_INSN(),
},
- .result = REJECT,
.errstr = "math between ctx pointer and register with unbounded min value is not allowed",
+ .result = REJECT,
},
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index eb888c8479c3..336a749673d1 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -19,7 +19,7 @@
BPF_MOV64_IMM(BPF_REG_0, 2),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "function calls to other bpf functions are allowed for",
+ .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = 1,
@@ -136,7 +136,7 @@
{
"calls: wrong src reg",
.insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 3, 0, 0),
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
@@ -397,7 +397,7 @@
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "function calls to other bpf functions are allowed for",
+ .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
.fixup_map_hash_48b = { 3 },
.result_unpriv = REJECT,
.result = ACCEPT,
@@ -1977,7 +1977,7 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
- .errstr_unpriv = "function calls to other bpf functions are allowed for",
+ .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
},
@@ -2003,7 +2003,7 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
- .errstr_unpriv = "function calls to other bpf functions are allowed for",
+ .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
.errstr = "!read_ok",
.result = REJECT,
},
@@ -2028,7 +2028,7 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
- .errstr_unpriv = "function calls to other bpf functions are allowed for",
+ .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
.errstr = "!read_ok",
.result = REJECT,
},
diff --git a/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c
index fb13ca2d5606..d78627be060f 100644
--- a/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c
+++ b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c
@@ -239,6 +239,7 @@
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
.expected_attach_type = BPF_SK_LOOKUP,
+ .runs = -1,
},
/* invalid 8-byte reads from a 4-byte fields in bpf_sk_lookup */
{
diff --git a/tools/testing/selftests/bpf/verifier/dead_code.c b/tools/testing/selftests/bpf/verifier/dead_code.c
index 5cf361d8eb1c..17fe33a75034 100644
--- a/tools/testing/selftests/bpf/verifier/dead_code.c
+++ b/tools/testing/selftests/bpf/verifier/dead_code.c
@@ -85,7 +85,7 @@
BPF_MOV64_IMM(BPF_REG_0, 12),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "function calls to other bpf functions are allowed for",
+ .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = 7,
@@ -103,7 +103,7 @@
BPF_MOV64_IMM(BPF_REG_0, 12),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "function calls to other bpf functions are allowed for",
+ .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = 7,
@@ -121,7 +121,7 @@
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -5),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "function calls to other bpf functions are allowed for",
+ .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = 7,
@@ -137,7 +137,7 @@
BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "function calls to other bpf functions are allowed for",
+ .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
@@ -152,7 +152,7 @@
BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "function calls to other bpf functions are allowed for",
+ .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
diff --git a/tools/testing/selftests/bpf/verifier/map_ptr.c b/tools/testing/selftests/bpf/verifier/map_ptr.c
index b117bdd3806d..6f610cfddae5 100644
--- a/tools/testing/selftests/bpf/verifier/map_ptr.c
+++ b/tools/testing/selftests/bpf/verifier/map_ptr.c
@@ -75,6 +75,8 @@
BPF_EXIT_INSN(),
},
.fixup_map_hash_16b = { 4 },
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R1 tried to add from different maps, paths, or prohibited types",
.result = ACCEPT,
},
{
@@ -91,5 +93,7 @@
BPF_EXIT_INSN(),
},
.fixup_map_hash_16b = { 4 },
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R1 tried to add from different maps, paths, or prohibited types",
.result = ACCEPT,
},
diff --git a/tools/testing/selftests/bpf/verifier/unpriv.c b/tools/testing/selftests/bpf/verifier/unpriv.c
index b018ad71e0a8..3e32400c4b44 100644
--- a/tools/testing/selftests/bpf/verifier/unpriv.c
+++ b/tools/testing/selftests/bpf/verifier/unpriv.c
@@ -497,7 +497,7 @@
.result = ACCEPT,
},
{
- "unpriv: adding of fp",
+ "unpriv: adding of fp, reg",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_MOV64_IMM(BPF_REG_1, 0),
@@ -505,6 +505,19 @@
BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, -8),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R1 tried to add from different maps, paths, or prohibited types",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+},
+{
+ "unpriv: adding of fp, imm",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, -8),
+ BPF_EXIT_INSN(),
+ },
.errstr_unpriv = "R1 stack pointer arithmetic goes out of range",
.result_unpriv = REJECT,
.result = ACCEPT,
diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
index ed4e76b24649..feb91266db39 100644
--- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
+++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
@@ -169,7 +169,7 @@
.fixup_map_array_48b = { 1 },
.result = ACCEPT,
.result_unpriv = REJECT,
- .errstr_unpriv = "R2 tried to add from different maps or paths",
+ .errstr_unpriv = "R2 tried to add from different maps, paths, or prohibited types",
.retval = 0,
},
{
@@ -517,6 +517,27 @@
.retval = 0xabcdef12,
},
{
+ "map access: value_ptr += N, value_ptr -= N known scalar",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_MOV32_IMM(BPF_REG_1, 0x12345678),
+ BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2),
+ BPF_MOV64_IMM(BPF_REG_1, 2),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 3 },
+ .result = ACCEPT,
+ .retval = 0x12345678,
+},
+{
"map access: unknown scalar += value_ptr, 1",
.insns = {
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh
index 26ae8d0b6ce3..8889b3f55236 100755
--- a/tools/testing/selftests/bpf/vmtest.sh
+++ b/tools/testing/selftests/bpf/vmtest.sh
@@ -17,19 +17,22 @@ KCONFIG_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vm
KCONFIG_API_URL="https://api.github.com/repos/libbpf/libbpf/contents/travis-ci/vmtest/configs/latest.config"
INDEX_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/INDEX"
NUM_COMPILE_JOBS="$(nproc)"
+LOG_FILE_BASE="$(date +"bpf_selftests.%Y-%m-%d_%H-%M-%S")"
+LOG_FILE="${LOG_FILE_BASE}.log"
+EXIT_STATUS_FILE="${LOG_FILE_BASE}.exit_status"
usage()
{
cat <<EOF
-Usage: $0 [-i] [-d <output_dir>] -- [<command>]
+Usage: $0 [-i] [-s] [-d <output_dir>] -- [<command>]
<command> is the command you would normally run when you are in
tools/testing/selftests/bpf. e.g:
$0 -- ./test_progs -t test_lsm
-If no command is specified, "${DEFAULT_COMMAND}" will be run by
-default.
+If no command is specified and a debug shell (-s) is not requested,
+"${DEFAULT_COMMAND}" will be run by default.
If you build your kernel using KBUILD_OUTPUT= or O= options, these
can be passed as environment variables to the script:
@@ -46,6 +49,9 @@ Options:
-d) Update the output directory (default: ${OUTPUT_DIR})
-j) Number of jobs for compilation, similar to -j in make
(default: ${NUM_COMPILE_JOBS})
+ -s) Instead of powering off the VM, start an interactive
+ shell. If <command> is specified, the shell runs after
+ the command finishes executing
EOF
}
@@ -146,7 +152,7 @@ update_init_script()
local init_script_dir="${OUTPUT_DIR}/${MOUNT_DIR}/etc/rcS.d"
local init_script="${init_script_dir}/S50-startup"
local command="$1"
- local log_file="$2"
+ local exit_command="$2"
mount_image
@@ -160,17 +166,26 @@ EOF
fi
- sudo bash -c "cat >${init_script}" <<EOF
-#!/bin/bash
+ sudo bash -c "echo '#!/bin/bash' > ${init_script}"
+
+ if [[ "${command}" != "" ]]; then
+ sudo bash -c "cat >>${init_script}" <<EOF
+# Have a default value in the exit status file
+# incase the VM is forcefully stopped.
+echo "130" > "/root/${EXIT_STATUS_FILE}"
{
cd /root/bpf
echo ${command}
stdbuf -oL -eL ${command}
-} 2>&1 | tee /root/${log_file}
-poweroff -f
+ echo "\$?" > "/root/${EXIT_STATUS_FILE}"
+} 2>&1 | tee "/root/${LOG_FILE}"
+# Ensure that the logs are written to disk
+sync
EOF
+ fi
+ sudo bash -c "echo ${exit_command} >> ${init_script}"
sudo chmod a+x "${init_script}"
unmount_image
}
@@ -221,10 +236,12 @@ EOF
copy_logs()
{
local mount_dir="${OUTPUT_DIR}/${MOUNT_DIR}"
- local log_file="${mount_dir}/root/$1"
+ local log_file="${mount_dir}/root/${LOG_FILE}"
+ local exit_status_file="${mount_dir}/root/${EXIT_STATUS_FILE}"
mount_image
sudo cp ${log_file} "${OUTPUT_DIR}"
+ sudo cp ${exit_status_file} "${OUTPUT_DIR}"
sudo rm -f ${log_file}
unmount_image
}
@@ -263,14 +280,15 @@ main()
{
local script_dir="$(cd -P -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)"
local kernel_checkout=$(realpath "${script_dir}"/../../../../)
- local log_file="$(date +"bpf_selftests.%Y-%m-%d_%H-%M-%S.log")"
# By default the script searches for the kernel in the checkout directory but
# it also obeys environment variables O= and KBUILD_OUTPUT=
local kernel_bzimage="${kernel_checkout}/${X86_BZIMAGE}"
local command="${DEFAULT_COMMAND}"
local update_image="no"
+ local exit_command="poweroff -f"
+ local debug_shell="no"
- while getopts 'hkid:j:' opt; do
+ while getopts 'hskid:j:' opt; do
case ${opt} in
i)
update_image="yes"
@@ -281,6 +299,11 @@ main()
j)
NUM_COMPILE_JOBS="$OPTARG"
;;
+ s)
+ command=""
+ debug_shell="yes"
+ exit_command="bash"
+ ;;
h)
usage
exit 0
@@ -299,7 +322,7 @@ main()
done
shift $((OPTIND -1))
- if [[ $# -eq 0 ]]; then
+ if [[ $# -eq 0 && "${debug_shell}" == "no" ]]; then
echo "No command specified, will run ${DEFAULT_COMMAND} in the vm"
else
command="$@"
@@ -347,19 +370,25 @@ main()
fi
update_selftests "${kernel_checkout}" "${make_command}"
- update_init_script "${command}" "${log_file}"
+ update_init_script "${command}" "${exit_command}"
run_vm "${kernel_bzimage}"
- copy_logs "${log_file}"
- echo "Logs saved in ${OUTPUT_DIR}/${log_file}"
+ if [[ "${command}" != "" ]]; then
+ copy_logs
+ echo "Logs saved in ${OUTPUT_DIR}/${LOG_FILE}"
+ fi
}
catch()
{
local exit_code=$1
+ local exit_status_file="${OUTPUT_DIR}/${EXIT_STATUS_FILE}"
# This is just a cleanup and the directory may
# have already been unmounted. So, don't let this
# clobber the error code we intend to return.
unmount_image || true
+ if [[ -f "${exit_status_file}" ]]; then
+ exit_code="$(cat ${exit_status_file})"
+ fi
exit ${exit_code}
}
diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index f4a96d5ff524..1135fb980814 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -18,12 +18,7 @@
* These selftests test AF_XDP SKB and Native/DRV modes using veth
* Virtual Ethernet interfaces.
*
- * The following tests are run:
- *
- * 1. AF_XDP SKB mode
- * Generic mode XDP is driver independent, used when the driver does
- * not have support for XDP. Works on any netdevice using sockets and
- * generic XDP path. XDP hook from netif_receive_skb().
+ * For each mode, the following tests are run:
* a. nopoll - soft-irq processing
* b. poll - using poll() syscall
* c. Socket Teardown
@@ -33,19 +28,25 @@
* Configure sockets as bi-directional tx/rx sockets, sets up fill and
* completion rings on each socket, tx/rx in both directions. Only nopoll
* mode is used
+ * e. Statistics
+ * Trigger some error conditions and ensure that the appropriate statistics
+ * are incremented. Within this test, the following statistics are tested:
+ * i. rx dropped
+ * Increase the UMEM frame headroom to a value which results in
+ * insufficient space in the rx buffer for both the packet and the headroom.
+ * ii. tx invalid
+ * Set the 'len' field of tx descriptors to an invalid value (umem frame
+ * size + 1).
+ * iii. rx ring full
+ * Reduce the size of the RX ring to a fraction of the fill ring size.
+ * iv. fill queue empty
+ * Do not populate the fill queue and then try to receive pkts.
+ * f. bpf_link resource persistence
+ * Configure sockets at indexes 0 and 1, run a traffic on queue ids 0,
+ * then remove xsk sockets from queue 0 on both veth interfaces and
+ * finally run a traffic on queues ids 1
*
- * 2. AF_XDP DRV/Native mode
- * Works on any netdevice with XDP_REDIRECT support, driver dependent. Processes
- * packets before SKB allocation. Provides better performance than SKB. Driver
- * hook available just after DMA of buffer descriptor.
- * a. nopoll
- * b. poll
- * c. Socket Teardown
- * d. Bi-directional sockets
- * - Only copy mode is supported because veth does not currently support
- * zero-copy mode
- *
- * Total tests: 8
+ * Total tests: 12
*
* Flow:
* -----
@@ -58,7 +59,7 @@
* - Rx thread verifies if all 10k packets were received and delivered in-order,
* and have the right content
*
- * Enable/disable debug mode:
+ * Enable/disable packet dump mode:
* --------------------------
* To enable L2 - L4 headers and payload dump of each packet on STDOUT, add
* parameter -D to params array in test_xsk.sh, i.e. params=("-S" "-D")
@@ -96,35 +97,34 @@ typedef __u16 __sum16;
#include "xdpxceiver.h"
#include "../kselftest.h"
+static const char *MAC1 = "\x00\x0A\x56\x9E\xEE\x62";
+static const char *MAC2 = "\x00\x0A\x56\x9E\xEE\x61";
+static const char *IP1 = "192.168.100.162";
+static const char *IP2 = "192.168.100.161";
+static const u16 UDP_PORT1 = 2020;
+static const u16 UDP_PORT2 = 2121;
+
static void __exit_with_error(int error, const char *file, const char *func, int line)
{
- ksft_test_result_fail
- ("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error, strerror(error));
- ksft_exit_xfail();
+ if (configured_mode == TEST_MODE_UNCONFIGURED) {
+ ksft_exit_fail_msg
+ ("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error, strerror(error));
+ } else {
+ ksft_test_result_fail
+ ("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error, strerror(error));
+ ksft_exit_xfail();
+ }
}
#define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__)
#define print_ksft_result(void)\
- (ksft_test_result_pass("PASS: %s %s %s%s\n", uut ? "DRV" : "SKB", opt_poll ? "POLL" :\
- "NOPOLL", opt_teardown ? "Socket Teardown" : "",\
- opt_bidi ? "Bi-directional Sockets" : ""))
-
-static void pthread_init_mutex(void)
-{
- pthread_mutex_init(&sync_mutex, NULL);
- pthread_mutex_init(&sync_mutex_tx, NULL);
- pthread_cond_init(&signal_rx_condition, NULL);
- pthread_cond_init(&signal_tx_condition, NULL);
-}
-
-static void pthread_destroy_mutex(void)
-{
- pthread_mutex_destroy(&sync_mutex);
- pthread_mutex_destroy(&sync_mutex_tx);
- pthread_cond_destroy(&signal_rx_condition);
- pthread_cond_destroy(&signal_tx_condition);
-}
+ (ksft_test_result_pass("PASS: %s %s %s%s%s%s\n", configured_mode ? "DRV" : "SKB",\
+ test_type == TEST_TYPE_POLL ? "POLL" : "NOPOLL",\
+ test_type == TEST_TYPE_TEARDOWN ? "Socket Teardown" : "",\
+ test_type == TEST_TYPE_BIDI ? "Bi-directional Sockets" : "",\
+ test_type == TEST_TYPE_STATS ? "Stats" : "",\
+ test_type == TEST_TYPE_BPF_RES ? "BPF RES" : ""))
static void *memset32_htonl(void *dest, u32 val, u32 size)
{
@@ -143,24 +143,11 @@ static void *memset32_htonl(void *dest, u32 val, u32 size)
}
/*
- * This function code has been taken from
- * Linux kernel lib/checksum.c
- */
-static inline unsigned short from32to16(unsigned int x)
-{
- /* add up 16-bit and 16-bit for 16+c bit */
- x = (x & 0xffff) + (x >> 16);
- /* add up carry.. */
- x = (x & 0xffff) + (x >> 16);
- return x;
-}
-
-/*
* Fold a partial checksum
* This function code has been taken from
* Linux kernel include/asm-generic/checksum.h
*/
-static inline __u16 csum_fold(__u32 csum)
+static __u16 csum_fold(__u32 csum)
{
u32 sum = (__force u32)csum;
@@ -173,7 +160,7 @@ static inline __u16 csum_fold(__u32 csum)
* This function code has been taken from
* Linux kernel lib/checksum.c
*/
-static inline u32 from64to32(u64 x)
+static u32 from64to32(u64 x)
{
/* add up 32-bit and 32-bit for 32+c bit */
x = (x & 0xffffffff) + (x >> 32);
@@ -182,13 +169,11 @@ static inline u32 from64to32(u64 x)
return (u32)x;
}
-__u32 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum);
-
/*
* This function code has been taken from
* Linux kernel lib/checksum.c
*/
-__u32 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum)
+static __u32 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum)
{
unsigned long long s = (__force u32)sum;
@@ -206,13 +191,12 @@ __u32 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u3
* This function has been taken from
* Linux kernel include/asm-generic/checksum.h
*/
-static inline __u16
-csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum)
+static __u16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum)
{
return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
}
-static inline u16 udp_csum(u32 saddr, u32 daddr, u32 len, u8 proto, u16 *udp_pkt)
+static u16 udp_csum(u32 saddr, u32 daddr, u32 len, u8 proto, u16 *udp_pkt)
{
u32 csum = 0;
u32 cnt = 0;
@@ -267,26 +251,37 @@ static void gen_eth_frame(struct xsk_umem_info *umem, u64 addr)
memcpy(xsk_umem__get_data(umem->buffer, addr), pkt_data, PKT_SIZE);
}
-static void xsk_configure_umem(struct ifobject *data, void *buffer, u64 size)
+static void xsk_configure_umem(struct ifobject *data, void *buffer, int idx)
{
+ struct xsk_umem_config cfg = {
+ .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+ .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
+ .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
+ .frame_headroom = frame_headroom,
+ .flags = XSK_UMEM__DEFAULT_FLAGS
+ };
+ int size = num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE;
+ struct xsk_umem_info *umem;
int ret;
- data->umem = calloc(1, sizeof(struct xsk_umem_info));
- if (!data->umem)
+ umem = calloc(1, sizeof(struct xsk_umem_info));
+ if (!umem)
exit_with_error(errno);
- ret = xsk_umem__create(&data->umem->umem, buffer, size,
- &data->umem->fq, &data->umem->cq, NULL);
+ ret = xsk_umem__create(&umem->umem, buffer, size,
+ &umem->fq, &umem->cq, &cfg);
if (ret)
exit_with_error(ret);
- data->umem->buffer = buffer;
+ umem->buffer = buffer;
+
+ data->umem_arr[idx] = umem;
}
static void xsk_populate_fill_ring(struct xsk_umem_info *umem)
{
int ret, i;
- u32 idx;
+ u32 idx = 0;
ret = xsk_ring_prod__reserve(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS, &idx);
if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS)
@@ -296,51 +291,48 @@ static void xsk_populate_fill_ring(struct xsk_umem_info *umem)
xsk_ring_prod__submit(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS);
}
-static int xsk_configure_socket(struct ifobject *ifobject)
+static int xsk_configure_socket(struct ifobject *ifobject, int idx)
{
struct xsk_socket_config cfg;
+ struct xsk_socket_info *xsk;
struct xsk_ring_cons *rxr;
struct xsk_ring_prod *txr;
int ret;
- ifobject->xsk = calloc(1, sizeof(struct xsk_socket_info));
- if (!ifobject->xsk)
+ xsk = calloc(1, sizeof(struct xsk_socket_info));
+ if (!xsk)
exit_with_error(errno);
- ifobject->xsk->umem = ifobject->umem;
- cfg.rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
+ xsk->umem = ifobject->umem;
+ cfg.rx_size = rxqsize;
cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
cfg.libbpf_flags = 0;
- cfg.xdp_flags = opt_xdp_flags;
- cfg.bind_flags = opt_xdp_bind_flags;
+ cfg.xdp_flags = xdp_flags;
+ cfg.bind_flags = xdp_bind_flags;
- if (!opt_bidi) {
- rxr = (ifobject->fv.vector == rx) ? &ifobject->xsk->rx : NULL;
- txr = (ifobject->fv.vector == tx) ? &ifobject->xsk->tx : NULL;
+ if (test_type != TEST_TYPE_BIDI) {
+ rxr = (ifobject->fv.vector == rx) ? &xsk->rx : NULL;
+ txr = (ifobject->fv.vector == tx) ? &xsk->tx : NULL;
} else {
- rxr = &ifobject->xsk->rx;
- txr = &ifobject->xsk->tx;
+ rxr = &xsk->rx;
+ txr = &xsk->tx;
}
- ret = xsk_socket__create(&ifobject->xsk->xsk, ifobject->ifname,
- opt_queue, ifobject->umem->umem, rxr, txr, &cfg);
-
+ ret = xsk_socket__create(&xsk->xsk, ifobject->ifname, idx,
+ ifobject->umem->umem, rxr, txr, &cfg);
if (ret)
return 1;
+ ifobject->xsk_arr[idx] = xsk;
+
return 0;
}
static struct option long_options[] = {
{"interface", required_argument, 0, 'i'},
{"queue", optional_argument, 0, 'q'},
- {"poll", no_argument, 0, 'p'},
- {"xdp-skb", no_argument, 0, 'S'},
- {"xdp-native", no_argument, 0, 'N'},
- {"copy", no_argument, 0, 'c'},
- {"tear-down", no_argument, 0, 'T'},
- {"bidi", optional_argument, 0, 'B'},
- {"debug", optional_argument, 0, 'D'},
+ {"dump-pkts", optional_argument, 0, 'D'},
+ {"verbose", no_argument, 0, 'v'},
{"tx-pkt-count", optional_argument, 0, 'C'},
{0, 0, 0, 0}
};
@@ -352,23 +344,21 @@ static void usage(const char *prog)
" Options:\n"
" -i, --interface Use interface\n"
" -q, --queue=n Use queue n (default 0)\n"
- " -p, --poll Use poll syscall\n"
- " -S, --xdp-skb=n Use XDP SKB mode\n"
- " -N, --xdp-native=n Enforce XDP DRV (native) mode\n"
- " -c, --copy Force copy mode\n"
- " -T, --tear-down Tear down sockets by repeatedly recreating them\n"
- " -B, --bidi Bi-directional sockets test\n"
- " -D, --debug Debug mode - dump packets L2 - L5\n"
+ " -D, --dump-pkts Dump packets L2 - L5\n"
+ " -v, --verbose Verbose output\n"
" -C, --tx-pkt-count=n Number of packets to send\n";
ksft_print_msg(str, prog);
}
-static bool switch_namespace(int idx)
+static int switch_namespace(const char *nsname)
{
char fqns[26] = "/var/run/netns/";
int nsfd;
- strncat(fqns, ifdict[idx]->nsname, sizeof(fqns) - strlen(fqns) - 1);
+ if (!nsname || strlen(nsname) == 0)
+ return -1;
+
+ strncat(fqns, nsname, sizeof(fqns) - strlen(fqns) - 1);
nsfd = open(fqns, O_RDONLY);
if (nsfd == -1)
@@ -377,26 +367,9 @@ static bool switch_namespace(int idx)
if (setns(nsfd, 0) == -1)
exit_with_error(errno);
- return true;
-}
+ print_verbose("NS switched: %s\n", nsname);
-static void *nsswitchthread(void *args)
-{
- struct targs *targs = args;
-
- targs->retptr = false;
-
- if (switch_namespace(targs->idx)) {
- ifdict[targs->idx]->ifindex = if_nametoindex(ifdict[targs->idx]->ifname);
- if (!ifdict[targs->idx]->ifindex) {
- ksft_test_result_fail("ERROR: [%s] interface \"%s\" does not exist\n",
- __func__, ifdict[targs->idx]->ifname);
- } else {
- ksft_print_msg("Interface found: %s\n", ifdict[targs->idx]->ifname);
- targs->retptr = true;
- }
- }
- pthread_exit(NULL);
+ return nsfd;
}
static int validate_interfaces(void)
@@ -408,33 +381,6 @@ static int validate_interfaces(void)
ret = false;
ksft_test_result_fail("ERROR: interfaces: -i <int>,<ns> -i <int>,<ns>.");
}
- if (strcmp(ifdict[i]->nsname, "")) {
- struct targs *targs;
-
- targs = malloc(sizeof(*targs));
- if (!targs)
- exit_with_error(errno);
-
- targs->idx = i;
- if (pthread_create(&ns_thread, NULL, nsswitchthread, targs))
- exit_with_error(errno);
-
- pthread_join(ns_thread, NULL);
-
- if (targs->retptr)
- ksft_print_msg("NS switched: %s\n", ifdict[i]->nsname);
-
- free(targs);
- } else {
- ifdict[i]->ifindex = if_nametoindex(ifdict[i]->ifname);
- if (!ifdict[i]->ifindex) {
- ksft_test_result_fail
- ("ERROR: interface \"%s\" does not exist\n", ifdict[i]->ifname);
- ret = false;
- } else {
- ksft_print_msg("Interface found: %s\n", ifdict[i]->ifname);
- }
- }
}
return ret;
}
@@ -446,7 +392,7 @@ static void parse_command_line(int argc, char **argv)
opterr = 0;
for (;;) {
- c = getopt_long(argc, argv, "i:q:pSNcTBDC:", long_options, &option_index);
+ c = getopt_long(argc, argv, "i:DC:v", long_options, &option_index);
if (c == -1)
break;
@@ -466,43 +412,26 @@ static void parse_command_line(int argc, char **argv)
MAX_INTERFACES_NAMESPACE_CHARS);
interface_index++;
break;
- case 'q':
- opt_queue = atoi(optarg);
- break;
- case 'p':
- opt_poll = 1;
- break;
- case 'S':
- opt_xdp_flags |= XDP_FLAGS_SKB_MODE;
- opt_xdp_bind_flags |= XDP_COPY;
- uut = ORDER_CONTENT_VALIDATE_XDP_SKB;
- break;
- case 'N':
- opt_xdp_flags |= XDP_FLAGS_DRV_MODE;
- opt_xdp_bind_flags |= XDP_COPY;
- uut = ORDER_CONTENT_VALIDATE_XDP_DRV;
- break;
- case 'c':
- opt_xdp_bind_flags |= XDP_COPY;
- break;
- case 'T':
- opt_teardown = 1;
- break;
- case 'B':
- opt_bidi = 1;
- break;
case 'D':
debug_pkt_dump = 1;
break;
case 'C':
opt_pkt_count = atoi(optarg);
break;
+ case 'v':
+ opt_verbose = 1;
+ break;
default:
usage(basename(argv[0]));
ksft_exit_xfail();
}
}
+ if (!opt_pkt_count) {
+ print_verbose("No tx-pkt-count specified, using default %u\n", DEFAULT_PKT_CNT);
+ opt_pkt_count = DEFAULT_PKT_CNT;
+ }
+
if (!validate_interfaces()) {
usage(basename(argv[0]));
ksft_exit_xfail();
@@ -519,7 +448,7 @@ static void kick_tx(struct xsk_socket_info *xsk)
exit_with_error(errno);
}
-static inline void complete_tx_only(struct xsk_socket_info *xsk, int batch_size)
+static void complete_tx_only(struct xsk_socket_info *xsk, int batch_size)
{
unsigned int rcvd;
u32 idx;
@@ -527,7 +456,7 @@ static inline void complete_tx_only(struct xsk_socket_info *xsk, int batch_size)
if (!xsk->outstanding_tx)
return;
- if (!NEED_WAKEUP || xsk_ring_prod__needs_wakeup(&xsk->tx))
+ if (xsk_ring_prod__needs_wakeup(&xsk->tx))
kick_tx(xsk);
rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx);
@@ -597,8 +526,10 @@ static void rx_pkt(struct xsk_socket_info *xsk, struct pollfd *fds)
static void tx_only(struct xsk_socket_info *xsk, u32 *frameptr, int batch_size)
{
- u32 idx;
+ u32 idx = 0;
unsigned int i;
+ bool tx_invalid_test = stat_test_type == STAT_TEST_TX_INVALID;
+ u32 len = tx_invalid_test ? XSK_UMEM__DEFAULT_FRAME_SIZE + 1 : PKT_SIZE;
while (xsk_ring_prod__reserve(&xsk->tx, batch_size, &idx) < batch_size)
complete_tx_only(xsk, batch_size);
@@ -607,17 +538,21 @@ static void tx_only(struct xsk_socket_info *xsk, u32 *frameptr, int batch_size)
struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i);
tx_desc->addr = (*frameptr + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT;
- tx_desc->len = PKT_SIZE;
+ tx_desc->len = len;
}
xsk_ring_prod__submit(&xsk->tx, batch_size);
- xsk->outstanding_tx += batch_size;
+ if (!tx_invalid_test) {
+ xsk->outstanding_tx += batch_size;
+ } else if (xsk_ring_prod__needs_wakeup(&xsk->tx)) {
+ kick_tx(xsk);
+ }
*frameptr += batch_size;
*frameptr %= num_frames;
complete_tx_only(xsk, batch_size);
}
-static inline int get_batch_size(int pkt_cnt)
+static int get_batch_size(int pkt_cnt)
{
if (!opt_pkt_count)
return BATCH_SIZE;
@@ -654,7 +589,7 @@ static void tx_only_all(struct ifobject *ifobject)
while ((opt_pkt_count && pkt_cnt < opt_pkt_count) || !opt_pkt_count) {
int batch_size = get_batch_size(pkt_cnt);
- if (opt_poll) {
+ if (test_type == TEST_TYPE_POLL) {
ret = poll(fds, 1, POLL_TMOUT);
if (ret <= 0)
continue;
@@ -673,48 +608,43 @@ static void tx_only_all(struct ifobject *ifobject)
static void worker_pkt_dump(void)
{
- struct in_addr ipaddr;
+ struct ethhdr *ethhdr;
+ struct iphdr *iphdr;
+ struct udphdr *udphdr;
+ char s[128];
+ int payload;
+ void *ptr;
fprintf(stdout, "---------------------------------------\n");
for (int iter = 0; iter < num_frames - 1; iter++) {
+ ptr = pkt_buf[iter]->payload;
+ ethhdr = ptr;
+ iphdr = ptr + sizeof(*ethhdr);
+ udphdr = ptr + sizeof(*ethhdr) + sizeof(*iphdr);
+
/*extract L2 frame */
fprintf(stdout, "DEBUG>> L2: dst mac: ");
for (int i = 0; i < ETH_ALEN; i++)
- fprintf(stdout, "%02X", ((struct ethhdr *)
- pkt_buf[iter]->payload)->h_dest[i]);
+ fprintf(stdout, "%02X", ethhdr->h_dest[i]);
fprintf(stdout, "\nDEBUG>> L2: src mac: ");
for (int i = 0; i < ETH_ALEN; i++)
- fprintf(stdout, "%02X", ((struct ethhdr *)
- pkt_buf[iter]->payload)->h_source[i]);
+ fprintf(stdout, "%02X", ethhdr->h_source[i]);
/*extract L3 frame */
- fprintf(stdout, "\nDEBUG>> L3: ip_hdr->ihl: %02X\n",
- ((struct iphdr *)(pkt_buf[iter]->payload + sizeof(struct ethhdr)))->ihl);
-
- ipaddr.s_addr =
- ((struct iphdr *)(pkt_buf[iter]->payload + sizeof(struct ethhdr)))->saddr;
- fprintf(stdout, "DEBUG>> L3: ip_hdr->saddr: %s\n", inet_ntoa(ipaddr));
-
- ipaddr.s_addr =
- ((struct iphdr *)(pkt_buf[iter]->payload + sizeof(struct ethhdr)))->daddr;
- fprintf(stdout, "DEBUG>> L3: ip_hdr->daddr: %s\n", inet_ntoa(ipaddr));
-
+ fprintf(stdout, "\nDEBUG>> L3: ip_hdr->ihl: %02X\n", iphdr->ihl);
+ fprintf(stdout, "DEBUG>> L3: ip_hdr->saddr: %s\n",
+ inet_ntop(AF_INET, &iphdr->saddr, s, sizeof(s)));
+ fprintf(stdout, "DEBUG>> L3: ip_hdr->daddr: %s\n",
+ inet_ntop(AF_INET, &iphdr->daddr, s, sizeof(s)));
/*extract L4 frame */
- fprintf(stdout, "DEBUG>> L4: udp_hdr->src: %d\n",
- ntohs(((struct udphdr *)(pkt_buf[iter]->payload +
- sizeof(struct ethhdr) +
- sizeof(struct iphdr)))->source));
-
- fprintf(stdout, "DEBUG>> L4: udp_hdr->dst: %d\n",
- ntohs(((struct udphdr *)(pkt_buf[iter]->payload +
- sizeof(struct ethhdr) +
- sizeof(struct iphdr)))->dest));
+ fprintf(stdout, "DEBUG>> L4: udp_hdr->src: %d\n", ntohs(udphdr->source));
+ fprintf(stdout, "DEBUG>> L4: udp_hdr->dst: %d\n", ntohs(udphdr->dest));
/*extract L5 frame */
- int payload = *((uint32_t *)(pkt_buf[iter]->payload + PKT_HDR_SIZE));
+ payload = *((uint32_t *)(ptr + PKT_HDR_SIZE));
if (payload == EOT) {
- ksft_print_msg("End-of-transmission frame received\n");
+ print_verbose("End-of-transmission frame received\n");
fprintf(stdout, "---------------------------------------\n");
break;
}
@@ -723,6 +653,48 @@ static void worker_pkt_dump(void)
}
}
+static void worker_stats_validate(struct ifobject *ifobject)
+{
+ struct xdp_statistics stats;
+ socklen_t optlen;
+ int err;
+ struct xsk_socket *xsk = stat_test_type == STAT_TEST_TX_INVALID ?
+ ifdict[!ifobject->ifdict_index]->xsk->xsk :
+ ifobject->xsk->xsk;
+ int fd = xsk_socket__fd(xsk);
+ unsigned long xsk_stat = 0, expected_stat = opt_pkt_count;
+
+ sigvar = 0;
+
+ optlen = sizeof(stats);
+ err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen);
+ if (err)
+ return;
+
+ if (optlen == sizeof(struct xdp_statistics)) {
+ switch (stat_test_type) {
+ case STAT_TEST_RX_DROPPED:
+ xsk_stat = stats.rx_dropped;
+ break;
+ case STAT_TEST_TX_INVALID:
+ xsk_stat = stats.tx_invalid_descs;
+ break;
+ case STAT_TEST_RX_FULL:
+ xsk_stat = stats.rx_ring_full;
+ expected_stat -= RX_FULL_RXQSIZE;
+ break;
+ case STAT_TEST_RX_FILL_EMPTY:
+ xsk_stat = stats.rx_fill_ring_empty_descs;
+ break;
+ default:
+ break;
+ }
+
+ if (xsk_stat == expected_stat)
+ sigvar = 1;
+ }
+}
+
static void worker_pkt_validate(void)
{
u32 payloadseqnum = -2;
@@ -746,7 +718,7 @@ static void worker_pkt_validate(void)
}
if (payloadseqnum == EOT) {
- ksft_print_msg("End-of-transmission frame received: PASS\n");
+ print_verbose("End-of-transmission frame received: PASS\n");
sigvar = 1;
break;
}
@@ -773,37 +745,69 @@ static void worker_pkt_validate(void)
}
}
-static void thread_common_ops(struct ifobject *ifobject, void *bufs, pthread_mutex_t *mutexptr,
- atomic_int *spinningptr)
+static void thread_common_ops(struct ifobject *ifobject, void *bufs)
{
+ int umem_sz = num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE;
int ctr = 0;
int ret;
- xsk_configure_umem(ifobject, bufs, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE);
- ret = xsk_configure_socket(ifobject);
+ ifobject->ns_fd = switch_namespace(ifobject->nsname);
+
+ if (test_type == TEST_TYPE_BPF_RES)
+ umem_sz *= 2;
+
+ bufs = mmap(NULL, umem_sz,
+ PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (bufs == MAP_FAILED)
+ exit_with_error(errno);
+
+ xsk_configure_umem(ifobject, bufs, 0);
+ ifobject->umem = ifobject->umem_arr[0];
+ ret = xsk_configure_socket(ifobject, 0);
/* Retry Create Socket if it fails as xsk_socket__create()
* is asynchronous
- *
- * Essential to lock Mutex here to prevent Tx thread from
- * entering before Rx and causing a deadlock
*/
- pthread_mutex_lock(mutexptr);
while (ret && ctr < SOCK_RECONF_CTR) {
- atomic_store(spinningptr, 1);
- xsk_configure_umem(ifobject, bufs, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE);
- ret = xsk_configure_socket(ifobject);
+ xsk_configure_umem(ifobject, bufs, 0);
+ ifobject->umem = ifobject->umem_arr[0];
+ ret = xsk_configure_socket(ifobject, 0);
usleep(USLEEP_MAX);
ctr++;
}
- atomic_store(spinningptr, 0);
- pthread_mutex_unlock(mutexptr);
if (ctr >= SOCK_RECONF_CTR)
exit_with_error(ret);
+
+ ifobject->umem = ifobject->umem_arr[0];
+ ifobject->xsk = ifobject->xsk_arr[0];
+
+ if (test_type == TEST_TYPE_BPF_RES) {
+ xsk_configure_umem(ifobject, (u8 *)bufs + (umem_sz / 2), 1);
+ ifobject->umem = ifobject->umem_arr[1];
+ ret = xsk_configure_socket(ifobject, 1);
+ }
+
+ ifobject->umem = ifobject->umem_arr[0];
+ ifobject->xsk = ifobject->xsk_arr[0];
+ print_verbose("Interface [%s] vector [%s]\n",
+ ifobject->ifname, ifobject->fv.vector == tx ? "Tx" : "Rx");
+}
+
+static bool testapp_is_test_two_stepped(void)
+{
+ return (test_type != TEST_TYPE_BIDI && test_type != TEST_TYPE_BPF_RES) || second_step;
+}
+
+static void testapp_cleanup_xsk_res(struct ifobject *ifobj)
+{
+ if (testapp_is_test_two_stepped()) {
+ xsk_socket__delete(ifobj->xsk->xsk);
+ (void)xsk_umem__delete(ifobj->umem->umem);
+ }
}
-static void *worker_testapp_validate(void *arg)
+static void *worker_testapp_validate_tx(void *arg)
{
struct udphdr *udp_hdr =
(struct udphdr *)(pkt_data + sizeof(struct ethhdr) + sizeof(struct iphdr));
@@ -813,149 +817,97 @@ static void *worker_testapp_validate(void *arg)
struct generic_data data;
void *bufs = NULL;
- pthread_attr_setstacksize(&attr, THREAD_STACK);
-
- if (!bidi_pass) {
- bufs = mmap(NULL, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE,
- PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- if (bufs == MAP_FAILED)
- exit_with_error(errno);
-
- if (strcmp(ifobject->nsname, ""))
- switch_namespace(ifobject->ifdict_index);
+ if (!second_step)
+ thread_common_ops(ifobject, bufs);
+
+ for (int i = 0; i < num_frames; i++) {
+ /*send EOT frame */
+ if (i == (num_frames - 1))
+ data.seqnum = -1;
+ else
+ data.seqnum = i;
+ gen_udp_hdr(&data, ifobject, udp_hdr);
+ gen_ip_hdr(ifobject, ip_hdr);
+ gen_udp_csum(udp_hdr, ip_hdr);
+ gen_eth_hdr(ifobject, eth_hdr);
+ gen_eth_frame(ifobject->umem, i * XSK_UMEM__DEFAULT_FRAME_SIZE);
}
- if (ifobject->fv.vector == tx) {
- int spinningrxctr = 0;
-
- if (!bidi_pass)
- thread_common_ops(ifobject, bufs, &sync_mutex_tx, &spinning_tx);
-
- while (atomic_load(&spinning_rx) && spinningrxctr < SOCK_RECONF_CTR) {
- spinningrxctr++;
- usleep(USLEEP_MAX);
- }
+ print_verbose("Sending %d packets on interface %s\n",
+ (opt_pkt_count - 1), ifobject->ifname);
+ tx_only_all(ifobject);
- ksft_print_msg("Interface [%s] vector [Tx]\n", ifobject->ifname);
- for (int i = 0; i < num_frames; i++) {
- /*send EOT frame */
- if (i == (num_frames - 1))
- data.seqnum = -1;
- else
- data.seqnum = i;
- gen_udp_hdr(&data, ifobject, udp_hdr);
- gen_ip_hdr(ifobject, ip_hdr);
- gen_udp_csum(udp_hdr, ip_hdr);
- gen_eth_hdr(ifobject, eth_hdr);
- gen_eth_frame(ifobject->umem, i * XSK_UMEM__DEFAULT_FRAME_SIZE);
- }
+ testapp_cleanup_xsk_res(ifobject);
+ pthread_exit(NULL);
+}
- ksft_print_msg("Sending %d packets on interface %s\n",
- (opt_pkt_count - 1), ifobject->ifname);
- tx_only_all(ifobject);
- } else if (ifobject->fv.vector == rx) {
- struct pollfd fds[MAX_SOCKS] = { };
- int ret;
+static void *worker_testapp_validate_rx(void *arg)
+{
+ struct ifobject *ifobject = (struct ifobject *)arg;
+ struct pollfd fds[MAX_SOCKS] = { };
+ void *bufs = NULL;
- if (!bidi_pass)
- thread_common_ops(ifobject, bufs, &sync_mutex_tx, &spinning_rx);
+ if (!second_step)
+ thread_common_ops(ifobject, bufs);
- ksft_print_msg("Interface [%s] vector [Rx]\n", ifobject->ifname);
+ if (stat_test_type != STAT_TEST_RX_FILL_EMPTY)
xsk_populate_fill_ring(ifobject->umem);
- TAILQ_INIT(&head);
- if (debug_pkt_dump) {
- pkt_buf = calloc(num_frames, sizeof(*pkt_buf));
- if (!pkt_buf)
- exit_with_error(errno);
- }
+ TAILQ_INIT(&head);
+ if (debug_pkt_dump) {
+ pkt_buf = calloc(num_frames, sizeof(*pkt_buf));
+ if (!pkt_buf)
+ exit_with_error(errno);
+ }
- fds[0].fd = xsk_socket__fd(ifobject->xsk->xsk);
- fds[0].events = POLLIN;
+ fds[0].fd = xsk_socket__fd(ifobject->xsk->xsk);
+ fds[0].events = POLLIN;
- pthread_mutex_lock(&sync_mutex);
- pthread_cond_signal(&signal_rx_condition);
- pthread_mutex_unlock(&sync_mutex);
+ pthread_barrier_wait(&barr);
- while (1) {
- if (opt_poll) {
- ret = poll(fds, 1, POLL_TMOUT);
- if (ret <= 0)
- continue;
- }
+ while (1) {
+ if (test_type != TEST_TYPE_STATS) {
rx_pkt(ifobject->xsk, fds);
worker_pkt_validate();
-
- if (sigvar)
- break;
+ } else {
+ worker_stats_validate(ifobject);
}
+ if (sigvar)
+ break;
+ }
- ksft_print_msg("Received %d packets on interface %s\n",
- pkt_counter, ifobject->ifname);
+ print_verbose("Received %d packets on interface %s\n",
+ pkt_counter, ifobject->ifname);
- if (opt_teardown)
- ksft_print_msg("Destroying socket\n");
- }
+ if (test_type == TEST_TYPE_TEARDOWN)
+ print_verbose("Destroying socket\n");
- if (!opt_bidi || bidi_pass) {
- xsk_socket__delete(ifobject->xsk->xsk);
- (void)xsk_umem__delete(ifobject->umem->umem);
- }
+ testapp_cleanup_xsk_res(ifobject);
pthread_exit(NULL);
}
static void testapp_validate(void)
{
- struct timespec max_wait = { 0, 0 };
+ bool bidi = test_type == TEST_TYPE_BIDI;
+ bool bpf = test_type == TEST_TYPE_BPF_RES;
- pthread_attr_init(&attr);
- pthread_attr_setstacksize(&attr, THREAD_STACK);
-
- if (opt_bidi && bidi_pass) {
- pthread_init_mutex();
- if (!switching_notify) {
- ksft_print_msg("Switching Tx/Rx vectors\n");
- switching_notify++;
- }
- }
-
- pthread_mutex_lock(&sync_mutex);
+ if (pthread_barrier_init(&barr, NULL, 2))
+ exit_with_error(errno);
/*Spawn RX thread */
- if (!opt_bidi || !bidi_pass) {
- if (pthread_create(&t0, &attr, worker_testapp_validate, ifdict[1]))
- exit_with_error(errno);
- } else if (opt_bidi && bidi_pass) {
- /*switch Tx/Rx vectors */
- ifdict[0]->fv.vector = rx;
- if (pthread_create(&t0, &attr, worker_testapp_validate, ifdict[0]))
- exit_with_error(errno);
- }
+ pthread_create(&t0, NULL, ifdict_rx->func_ptr, ifdict_rx);
- if (clock_gettime(CLOCK_REALTIME, &max_wait))
+ pthread_barrier_wait(&barr);
+ if (pthread_barrier_destroy(&barr))
exit_with_error(errno);
- max_wait.tv_sec += TMOUT_SEC;
-
- if (pthread_cond_timedwait(&signal_rx_condition, &sync_mutex, &max_wait) == ETIMEDOUT)
- exit_with_error(errno);
-
- pthread_mutex_unlock(&sync_mutex);
/*Spawn TX thread */
- if (!opt_bidi || !bidi_pass) {
- if (pthread_create(&t1, &attr, worker_testapp_validate, ifdict[0]))
- exit_with_error(errno);
- } else if (opt_bidi && bidi_pass) {
- /*switch Tx/Rx vectors */
- ifdict[1]->fv.vector = tx;
- if (pthread_create(&t1, &attr, worker_testapp_validate, ifdict[1]))
- exit_with_error(errno);
- }
+ pthread_create(&t1, NULL, ifdict_tx->func_ptr, ifdict_tx);
pthread_join(t1, NULL);
pthread_join(t0, NULL);
- if (debug_pkt_dump) {
+ if (debug_pkt_dump && test_type != TEST_TYPE_STATS) {
worker_pkt_dump();
for (int iter = 0; iter < num_frames - 1; iter++) {
free(pkt_buf[iter]->payload);
@@ -964,73 +916,217 @@ static void testapp_validate(void)
free(pkt_buf);
}
- if (!opt_teardown && !opt_bidi)
+ if (!(test_type == TEST_TYPE_TEARDOWN) && !bidi && !bpf && !(test_type == TEST_TYPE_STATS))
print_ksft_result();
}
-static void testapp_sockets(void)
+static void testapp_teardown(void)
+{
+ int i;
+
+ for (i = 0; i < MAX_TEARDOWN_ITER; i++) {
+ pkt_counter = 0;
+ prev_pkt = -1;
+ sigvar = 0;
+ print_verbose("Creating socket\n");
+ testapp_validate();
+ }
+
+ print_ksft_result();
+}
+
+static void swap_vectors(struct ifobject *ifobj1, struct ifobject *ifobj2)
{
- for (int i = 0; i < (opt_teardown ? MAX_TEARDOWN_ITER : MAX_BIDI_ITER); i++) {
+ void *(*tmp_func_ptr)(void *) = ifobj1->func_ptr;
+ enum fvector tmp_vector = ifobj1->fv.vector;
+
+ ifobj1->func_ptr = ifobj2->func_ptr;
+ ifobj1->fv.vector = ifobj2->fv.vector;
+
+ ifobj2->func_ptr = tmp_func_ptr;
+ ifobj2->fv.vector = tmp_vector;
+
+ ifdict_tx = ifobj1;
+ ifdict_rx = ifobj2;
+}
+
+static void testapp_bidi(void)
+{
+ for (int i = 0; i < MAX_BIDI_ITER; i++) {
pkt_counter = 0;
prev_pkt = -1;
sigvar = 0;
- ksft_print_msg("Creating socket\n");
+ print_verbose("Creating socket\n");
testapp_validate();
- opt_bidi ? bidi_pass++ : bidi_pass;
+ if (!second_step) {
+ print_verbose("Switching Tx/Rx vectors\n");
+ swap_vectors(ifdict[1], ifdict[0]);
+ }
+ second_step = true;
}
+ swap_vectors(ifdict[0], ifdict[1]);
+
print_ksft_result();
}
-static void init_iface_config(struct ifaceconfigobj *ifaceconfig)
+static void swap_xsk_res(void)
{
- /*Init interface0 */
- ifdict[0]->fv.vector = tx;
- memcpy(ifdict[0]->dst_mac, ifaceconfig->dst_mac, ETH_ALEN);
- memcpy(ifdict[0]->src_mac, ifaceconfig->src_mac, ETH_ALEN);
- ifdict[0]->dst_ip = ifaceconfig->dst_ip.s_addr;
- ifdict[0]->src_ip = ifaceconfig->src_ip.s_addr;
- ifdict[0]->dst_port = ifaceconfig->dst_port;
- ifdict[0]->src_port = ifaceconfig->src_port;
-
- /*Init interface1 */
- ifdict[1]->fv.vector = rx;
- memcpy(ifdict[1]->dst_mac, ifaceconfig->src_mac, ETH_ALEN);
- memcpy(ifdict[1]->src_mac, ifaceconfig->dst_mac, ETH_ALEN);
- ifdict[1]->dst_ip = ifaceconfig->src_ip.s_addr;
- ifdict[1]->src_ip = ifaceconfig->dst_ip.s_addr;
- ifdict[1]->dst_port = ifaceconfig->src_port;
- ifdict[1]->src_port = ifaceconfig->dst_port;
+ xsk_socket__delete(ifdict_tx->xsk->xsk);
+ xsk_umem__delete(ifdict_tx->umem->umem);
+ xsk_socket__delete(ifdict_rx->xsk->xsk);
+ xsk_umem__delete(ifdict_rx->umem->umem);
+ ifdict_tx->umem = ifdict_tx->umem_arr[1];
+ ifdict_tx->xsk = ifdict_tx->xsk_arr[1];
+ ifdict_rx->umem = ifdict_rx->umem_arr[1];
+ ifdict_rx->xsk = ifdict_rx->xsk_arr[1];
+}
+
+static void testapp_bpf_res(void)
+{
+ int i;
+
+ for (i = 0; i < MAX_BPF_ITER; i++) {
+ pkt_counter = 0;
+ prev_pkt = -1;
+ sigvar = 0;
+ print_verbose("Creating socket\n");
+ testapp_validate();
+ if (!second_step)
+ swap_xsk_res();
+ second_step = true;
+ }
+
+ print_ksft_result();
+}
+
+static void testapp_stats(void)
+{
+ for (int i = 0; i < STAT_TEST_TYPE_MAX; i++) {
+ stat_test_type = i;
+
+ /* reset defaults */
+ rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
+ frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM;
+
+ switch (stat_test_type) {
+ case STAT_TEST_RX_DROPPED:
+ frame_headroom = XSK_UMEM__DEFAULT_FRAME_SIZE -
+ XDP_PACKET_HEADROOM - 1;
+ break;
+ case STAT_TEST_RX_FULL:
+ rxqsize = RX_FULL_RXQSIZE;
+ break;
+ default:
+ break;
+ }
+ testapp_validate();
+ }
+
+ print_ksft_result();
+}
+
+static void init_iface(struct ifobject *ifobj, const char *dst_mac,
+ const char *src_mac, const char *dst_ip,
+ const char *src_ip, const u16 dst_port,
+ const u16 src_port, enum fvector vector)
+{
+ struct in_addr ip;
+
+ memcpy(ifobj->dst_mac, dst_mac, ETH_ALEN);
+ memcpy(ifobj->src_mac, src_mac, ETH_ALEN);
+
+ inet_aton(dst_ip, &ip);
+ ifobj->dst_ip = ip.s_addr;
+
+ inet_aton(src_ip, &ip);
+ ifobj->src_ip = ip.s_addr;
+
+ ifobj->dst_port = dst_port;
+ ifobj->src_port = src_port;
+
+ if (vector == tx) {
+ ifobj->fv.vector = tx;
+ ifobj->func_ptr = worker_testapp_validate_tx;
+ ifdict_tx = ifobj;
+ } else {
+ ifobj->fv.vector = rx;
+ ifobj->func_ptr = worker_testapp_validate_rx;
+ ifdict_rx = ifobj;
+ }
+}
+
+static void run_pkt_test(int mode, int type)
+{
+ test_type = type;
+
+ /* reset defaults after potential previous test */
+ xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
+ pkt_counter = 0;
+ second_step = 0;
+ prev_pkt = -1;
+ sigvar = 0;
+ stat_test_type = -1;
+ rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
+ frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM;
+
+ configured_mode = mode;
+
+ switch (mode) {
+ case (TEST_MODE_SKB):
+ xdp_flags |= XDP_FLAGS_SKB_MODE;
+ break;
+ case (TEST_MODE_DRV):
+ xdp_flags |= XDP_FLAGS_DRV_MODE;
+ break;
+ default:
+ break;
+ }
+
+ switch (test_type) {
+ case TEST_TYPE_STATS:
+ testapp_stats();
+ break;
+ case TEST_TYPE_TEARDOWN:
+ testapp_teardown();
+ break;
+ case TEST_TYPE_BIDI:
+ testapp_bidi();
+ break;
+ case TEST_TYPE_BPF_RES:
+ testapp_bpf_res();
+ break;
+ default:
+ testapp_validate();
+ break;
+ }
}
int main(int argc, char **argv)
{
struct rlimit _rlim = { RLIM_INFINITY, RLIM_INFINITY };
+ bool failure = false;
+ int i, j;
if (setrlimit(RLIMIT_MEMLOCK, &_rlim))
exit_with_error(errno);
- const char *MAC1 = "\x00\x0A\x56\x9E\xEE\x62";
- const char *MAC2 = "\x00\x0A\x56\x9E\xEE\x61";
- const char *IP1 = "192.168.100.162";
- const char *IP2 = "192.168.100.161";
- u16 UDP_DST_PORT = 2020;
- u16 UDP_SRC_PORT = 2121;
-
- ifaceconfig = malloc(sizeof(struct ifaceconfigobj));
- memcpy(ifaceconfig->dst_mac, MAC1, ETH_ALEN);
- memcpy(ifaceconfig->src_mac, MAC2, ETH_ALEN);
- inet_aton(IP1, &ifaceconfig->dst_ip);
- inet_aton(IP2, &ifaceconfig->src_ip);
- ifaceconfig->dst_port = UDP_DST_PORT;
- ifaceconfig->src_port = UDP_SRC_PORT;
-
for (int i = 0; i < MAX_INTERFACES; i++) {
ifdict[i] = malloc(sizeof(struct ifobject));
if (!ifdict[i])
exit_with_error(errno);
ifdict[i]->ifdict_index = i;
+ ifdict[i]->xsk_arr = calloc(2, sizeof(struct xsk_socket_info *));
+ if (!ifdict[i]->xsk_arr) {
+ failure = true;
+ goto cleanup;
+ }
+ ifdict[i]->umem_arr = calloc(2, sizeof(struct xsk_umem_info *));
+ if (!ifdict[i]->umem_arr) {
+ failure = true;
+ goto cleanup;
+ }
}
setlocale(LC_ALL, "");
@@ -1039,25 +1135,27 @@ int main(int argc, char **argv)
num_frames = ++opt_pkt_count;
- init_iface_config(ifaceconfig);
-
- pthread_init_mutex();
+ init_iface(ifdict[0], MAC1, MAC2, IP1, IP2, UDP_PORT1, UDP_PORT2, tx);
+ init_iface(ifdict[1], MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1, rx);
- ksft_set_plan(1);
+ ksft_set_plan(TEST_MODE_MAX * TEST_TYPE_MAX);
- if (!opt_teardown && !opt_bidi) {
- testapp_validate();
- } else if (opt_teardown && opt_bidi) {
- ksft_test_result_fail("ERROR: parameters -T and -B cannot be used together\n");
- ksft_exit_xfail();
- } else {
- testapp_sockets();
+ for (i = 0; i < TEST_MODE_MAX; i++) {
+ for (j = 0; j < TEST_TYPE_MAX; j++)
+ run_pkt_test(i, j);
}
- for (int i = 0; i < MAX_INTERFACES; i++)
+cleanup:
+ for (int i = 0; i < MAX_INTERFACES; i++) {
+ if (ifdict[i]->ns_fd != -1)
+ close(ifdict[i]->ns_fd);
+ free(ifdict[i]->xsk_arr);
+ free(ifdict[i]->umem_arr);
free(ifdict[i]);
+ }
- pthread_destroy_mutex();
+ if (failure)
+ exit_with_error(errno);
ksft_exit_pass();
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index 0e9f9b7e61c2..6c428b276ab6 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -23,6 +23,7 @@
#define MAX_SOCKS 1
#define MAX_TEARDOWN_ITER 10
#define MAX_BIDI_ITER 2
+#define MAX_BPF_ITER 2
#define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \
sizeof(struct udphdr))
#define MIN_PKT_SIZE 64
@@ -33,41 +34,63 @@
#define IP_PKT_TOS 0x9
#define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr))
#define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr))
-#define TMOUT_SEC (3)
#define EOT (-1)
#define USLEEP_MAX 200000
-#define THREAD_STACK 60000000
#define SOCK_RECONF_CTR 10
#define BATCH_SIZE 64
#define POLL_TMOUT 1000
-#define NEED_WAKEUP true
+#define DEFAULT_PKT_CNT 10000
+#define RX_FULL_RXQSIZE 32
+
+#define print_verbose(x...) do { if (opt_verbose) ksft_print_msg(x); } while (0)
typedef __u32 u32;
typedef __u16 u16;
typedef __u8 u8;
-enum TESTS {
- ORDER_CONTENT_VALIDATE_XDP_SKB = 0,
- ORDER_CONTENT_VALIDATE_XDP_DRV = 1,
+enum TEST_MODES {
+ TEST_MODE_UNCONFIGURED = -1,
+ TEST_MODE_SKB,
+ TEST_MODE_DRV,
+ TEST_MODE_MAX
+};
+
+enum TEST_TYPES {
+ TEST_TYPE_NOPOLL,
+ TEST_TYPE_POLL,
+ TEST_TYPE_TEARDOWN,
+ TEST_TYPE_BIDI,
+ TEST_TYPE_STATS,
+ TEST_TYPE_BPF_RES,
+ TEST_TYPE_MAX
+};
+
+enum STAT_TEST_TYPES {
+ STAT_TEST_RX_DROPPED,
+ STAT_TEST_TX_INVALID,
+ STAT_TEST_RX_FULL,
+ STAT_TEST_RX_FILL_EMPTY,
+ STAT_TEST_TYPE_MAX
};
-u8 uut;
-u8 debug_pkt_dump;
-u32 num_frames;
-u8 switching_notify;
-u8 bidi_pass;
+static int configured_mode = TEST_MODE_UNCONFIGURED;
+static u8 debug_pkt_dump;
+static u32 num_frames;
+static bool second_step;
+static int test_type;
-static u32 opt_xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
-static int opt_queue;
static int opt_pkt_count;
-static int opt_poll;
-static int opt_teardown;
-static int opt_bidi;
-static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP;
+static u8 opt_verbose;
+
+static u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
+static u32 xdp_bind_flags = XDP_USE_NEED_WAKEUP | XDP_COPY;
static u8 pkt_data[XSK_UMEM__DEFAULT_FRAME_SIZE];
static u32 pkt_counter;
-static u32 prev_pkt = -1;
+static long prev_pkt = -1;
static int sigvar;
+static int stat_test_type;
+static u32 rxqsize;
+static u32 frame_headroom;
struct xsk_umem_info {
struct xsk_ring_prod fq;
@@ -99,47 +122,32 @@ struct generic_data {
u32 seqnum;
};
-struct ifaceconfigobj {
- u8 dst_mac[ETH_ALEN];
- u8 src_mac[ETH_ALEN];
- struct in_addr dst_ip;
- struct in_addr src_ip;
- u16 src_port;
- u16 dst_port;
-} *ifaceconfig;
-
struct ifobject {
- int ifindex;
- int ifdict_index;
char ifname[MAX_INTERFACE_NAME_CHARS];
char nsname[MAX_INTERFACES_NAMESPACE_CHARS];
- struct flow_vector fv;
struct xsk_socket_info *xsk;
+ struct xsk_socket_info **xsk_arr;
+ struct xsk_umem_info **umem_arr;
struct xsk_umem_info *umem;
- u8 dst_mac[ETH_ALEN];
- u8 src_mac[ETH_ALEN];
+ void *(*func_ptr)(void *arg);
+ struct flow_vector fv;
+ int ns_fd;
+ int ifdict_index;
u32 dst_ip;
u32 src_ip;
u16 src_port;
u16 dst_port;
+ u8 dst_mac[ETH_ALEN];
+ u8 src_mac[ETH_ALEN];
};
static struct ifobject *ifdict[MAX_INTERFACES];
+static struct ifobject *ifdict_rx;
+static struct ifobject *ifdict_tx;
/*threads*/
-atomic_int spinning_tx;
-atomic_int spinning_rx;
-pthread_mutex_t sync_mutex;
-pthread_mutex_t sync_mutex_tx;
-pthread_cond_t signal_rx_condition;
-pthread_cond_t signal_tx_condition;
-pthread_t t0, t1, ns_thread;
-pthread_attr_t attr;
-
-struct targs {
- bool retptr;
- int idx;
-};
+pthread_barrier_t barr;
+pthread_t t0, t1;
TAILQ_HEAD(head_s, pkt) head = TAILQ_HEAD_INITIALIZER(head);
struct head_s *head_p;
diff --git a/tools/testing/selftests/bpf/xsk_prereqs.sh b/tools/testing/selftests/bpf/xsk_prereqs.sh
index 9d54c4645127..dac1c5f78752 100755
--- a/tools/testing/selftests/bpf/xsk_prereqs.sh
+++ b/tools/testing/selftests/bpf/xsk_prereqs.sh
@@ -82,24 +82,21 @@ clear_configs()
{
if [ $(ip netns show | grep $3 &>/dev/null; echo $?;) == 0 ]; then
[ $(ip netns exec $3 ip link show $2 &>/dev/null; echo $?;) == 0 ] &&
- { echo "removing link $1:$2"; ip netns exec $3 ip link del $2; }
- echo "removing ns $3"
+ { ip netns exec $3 ip link del $2; }
ip netns del $3
fi
#Once we delete a veth pair node, the entire veth pair is removed,
#this is just to be cautious just incase the NS does not exist then
#veth node inside NS won't get removed so we explicitly remove it
[ $(ip link show $1 &>/dev/null; echo $?;) == 0 ] &&
- { echo "removing link $1"; ip link del $1; }
+ { ip link del $1; }
if [ -f ${SPECFILE} ]; then
- echo "removing spec file:" ${SPECFILE}
rm -f ${SPECFILE}
fi
}
cleanup_exit()
{
- echo "cleaning up..."
clear_configs $1 $2 $3
}
@@ -108,28 +105,7 @@ validate_ip_utility()
[ ! $(type -P ip) ] && { echo "'ip' not found. Skipping tests."; test_exit $ksft_skip 1; }
}
-vethXDPgeneric()
-{
- ip link set dev $1 xdpdrv off
- ip netns exec $3 ip link set dev $2 xdpdrv off
-}
-
-vethXDPnative()
-{
- ip link set dev $1 xdpgeneric off
- ip netns exec $3 ip link set dev $2 xdpgeneric off
-}
-
execxdpxceiver()
{
- local -a 'paramkeys=("${!'"$1"'[@]}")' copy
- paramkeysstr=${paramkeys[*]}
-
- for index in $paramkeysstr;
- do
- current=$1"[$index]"
- copy[$index]=${!current}
- done
-
- ./${XSKOBJ} -i ${VETH0} -i ${VETH1},${NS1} ${copy[*]} -C ${NUMPKTS}
+ ./${XSKOBJ} -i ${VETH0} -i ${VETH1},${NS1} -C ${NUMPKTS} ${VERBOSE_ARG} ${DUMP_PKTS_ARG}
}
diff --git a/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c b/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c
index ad41ea69001b..e7041816085a 100644
--- a/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c
+++ b/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c
@@ -145,7 +145,7 @@ static bool run_test(int wr_size, int wp_size, int wr, int wp)
if (ptrace(PTRACE_CONT, pid, NULL, NULL) < 0) {
ksft_print_msg(
- "ptrace(PTRACE_SINGLESTEP) failed: %s\n",
+ "ptrace(PTRACE_CONT) failed: %s\n",
strerror(errno));
return false;
}
@@ -159,7 +159,7 @@ static bool run_test(int wr_size, int wp_size, int wr, int wp)
}
alarm(0);
if (WIFEXITED(status)) {
- ksft_print_msg("child did not single-step\n");
+ ksft_print_msg("child exited prematurely\n");
return false;
}
if (!WIFSTOPPED(status)) {
diff --git a/tools/testing/selftests/dma/dma_map_benchmark.c b/tools/testing/selftests/dma/dma_map_benchmark.c
index 537d65968c48..fb23ce9617ea 100644
--- a/tools/testing/selftests/dma/dma_map_benchmark.c
+++ b/tools/testing/selftests/dma/dma_map_benchmark.c
@@ -12,9 +12,12 @@
#include <sys/mman.h>
#include <linux/types.h>
+#define NSEC_PER_MSEC 1000000L
+
#define DMA_MAP_BENCHMARK _IOWR('d', 1, struct map_benchmark)
#define DMA_MAP_MAX_THREADS 1024
#define DMA_MAP_MAX_SECONDS 300
+#define DMA_MAP_MAX_TRANS_DELAY (10 * NSEC_PER_MSEC)
#define DMA_MAP_BIDIRECTIONAL 0
#define DMA_MAP_TO_DEVICE 1
@@ -36,7 +39,8 @@ struct map_benchmark {
__s32 node; /* which numa node this benchmark will run on */
__u32 dma_bits; /* DMA addressing capability */
__u32 dma_dir; /* DMA data direction */
- __u8 expansion[84]; /* For future use */
+ __u32 dma_trans_ns; /* time for DMA transmission in ns */
+ __u8 expansion[80]; /* For future use */
};
int main(int argc, char **argv)
@@ -46,12 +50,12 @@ int main(int argc, char **argv)
/* default single thread, run 20 seconds on NUMA_NO_NODE */
int threads = 1, seconds = 20, node = -1;
/* default dma mask 32bit, bidirectional DMA */
- int bits = 32, dir = DMA_MAP_BIDIRECTIONAL;
+ int bits = 32, xdelay = 0, dir = DMA_MAP_BIDIRECTIONAL;
int cmd = DMA_MAP_BENCHMARK;
char *p;
- while ((opt = getopt(argc, argv, "t:s:n:b:d:")) != -1) {
+ while ((opt = getopt(argc, argv, "t:s:n:b:d:x:")) != -1) {
switch (opt) {
case 't':
threads = atoi(optarg);
@@ -68,6 +72,9 @@ int main(int argc, char **argv)
case 'd':
dir = atoi(optarg);
break;
+ case 'x':
+ xdelay = atoi(optarg);
+ break;
default:
return -1;
}
@@ -85,6 +92,12 @@ int main(int argc, char **argv)
exit(1);
}
+ if (xdelay < 0 || xdelay > DMA_MAP_MAX_TRANS_DELAY) {
+ fprintf(stderr, "invalid transmit delay, must be in 0-%ld\n",
+ DMA_MAP_MAX_TRANS_DELAY);
+ exit(1);
+ }
+
/* suppose the mininum DMA zone is 1MB in the world */
if (bits < 20 || bits > 64) {
fprintf(stderr, "invalid dma mask bit, must be in 20-64\n");
@@ -109,6 +122,8 @@ int main(int argc, char **argv)
map.node = node;
map.dma_bits = bits;
map.dma_dir = dir;
+ map.dma_trans_ns = xdelay;
+
if (ioctl(fd, cmd, &map)) {
perror("ioctl");
exit(1);
diff --git a/tools/testing/selftests/dmabuf-heaps/Makefile b/tools/testing/selftests/dmabuf-heaps/Makefile
index 607c2acd2082..604b43ece15f 100644
--- a/tools/testing/selftests/dmabuf-heaps/Makefile
+++ b/tools/testing/selftests/dmabuf-heaps/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-CFLAGS += -static -O3 -Wl,-no-as-needed -Wall -I../../../../usr/include
+CFLAGS += -static -O3 -Wl,-no-as-needed -Wall
TEST_GEN_PROGS = dmabuf-heap
diff --git a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c
index 909da9cdda97..29af27acd40e 100644
--- a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c
+++ b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c
@@ -130,16 +130,13 @@ static int dmabuf_heap_alloc(int fd, size_t len, unsigned int flags,
dmabuf_fd);
}
-static void dmabuf_sync(int fd, int start_stop)
+static int dmabuf_sync(int fd, int start_stop)
{
struct dma_buf_sync sync = {
.flags = start_stop | DMA_BUF_SYNC_RW,
};
- int ret;
- ret = ioctl(fd, DMA_BUF_IOCTL_SYNC, &sync);
- if (ret)
- printf("sync failed %d\n", errno);
+ return ioctl(fd, DMA_BUF_IOCTL_SYNC, &sync);
}
#define ONE_MEG (1024 * 1024)
@@ -151,16 +148,14 @@ static int test_alloc_and_import(char *heap_name)
void *p = NULL;
int ret;
- printf("Testing heap: %s\n", heap_name);
-
heap_fd = dmabuf_heap_open(heap_name);
if (heap_fd < 0)
return -1;
- printf("Allocating 1 MEG\n");
+ printf(" Testing allocation and importing: ");
ret = dmabuf_heap_alloc(heap_fd, ONE_MEG, 0, &dmabuf_fd);
if (ret) {
- printf("Allocation Failed!\n");
+ printf("FAIL (Allocation Failed!)\n");
ret = -1;
goto out;
}
@@ -172,11 +167,10 @@ static int test_alloc_and_import(char *heap_name)
dmabuf_fd,
0);
if (p == MAP_FAILED) {
- printf("mmap() failed: %m\n");
+ printf("FAIL (mmap() failed)\n");
ret = -1;
goto out;
}
- printf("mmap passed\n");
dmabuf_sync(dmabuf_fd, DMA_BUF_SYNC_START);
memset(p, 1, ONE_MEG / 2);
@@ -186,25 +180,31 @@ static int test_alloc_and_import(char *heap_name)
importer_fd = open_vgem();
if (importer_fd < 0) {
ret = importer_fd;
- printf("Failed to open vgem\n");
- goto out;
+ printf("(Could not open vgem - skipping): ");
+ } else {
+ ret = import_vgem_fd(importer_fd, dmabuf_fd, &handle);
+ if (ret < 0) {
+ printf("FAIL (Failed to import buffer)\n");
+ goto out;
+ }
}
- ret = import_vgem_fd(importer_fd, dmabuf_fd, &handle);
+ ret = dmabuf_sync(dmabuf_fd, DMA_BUF_SYNC_START);
if (ret < 0) {
- printf("Failed to import buffer\n");
+ printf("FAIL (DMA_BUF_SYNC_START failed!)\n");
goto out;
}
- printf("import passed\n");
- dmabuf_sync(dmabuf_fd, DMA_BUF_SYNC_START);
memset(p, 0xff, ONE_MEG);
- dmabuf_sync(dmabuf_fd, DMA_BUF_SYNC_END);
- printf("syncs passed\n");
+ ret = dmabuf_sync(dmabuf_fd, DMA_BUF_SYNC_END);
+ if (ret < 0) {
+ printf("FAIL (DMA_BUF_SYNC_END failed!)\n");
+ goto out;
+ }
close_handle(importer_fd, handle);
ret = 0;
-
+ printf(" OK\n");
out:
if (p)
munmap(p, ONE_MEG);
@@ -218,6 +218,84 @@ out:
return ret;
}
+static int test_alloc_zeroed(char *heap_name, size_t size)
+{
+ int heap_fd = -1, dmabuf_fd[32];
+ int i, j, ret;
+ void *p = NULL;
+ char *c;
+
+ printf(" Testing alloced %ldk buffers are zeroed: ", size / 1024);
+ heap_fd = dmabuf_heap_open(heap_name);
+ if (heap_fd < 0)
+ return -1;
+
+ /* Allocate and fill a bunch of buffers */
+ for (i = 0; i < 32; i++) {
+ ret = dmabuf_heap_alloc(heap_fd, size, 0, &dmabuf_fd[i]);
+ if (ret < 0) {
+ printf("FAIL (Allocation (%i) failed)\n", i);
+ goto out;
+ }
+ /* mmap and fill with simple pattern */
+ p = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, dmabuf_fd[i], 0);
+ if (p == MAP_FAILED) {
+ printf("FAIL (mmap() failed!)\n");
+ ret = -1;
+ goto out;
+ }
+ dmabuf_sync(dmabuf_fd[i], DMA_BUF_SYNC_START);
+ memset(p, 0xff, size);
+ dmabuf_sync(dmabuf_fd[i], DMA_BUF_SYNC_END);
+ munmap(p, size);
+ }
+ /* close them all */
+ for (i = 0; i < 32; i++)
+ close(dmabuf_fd[i]);
+
+ /* Allocate and validate all buffers are zeroed */
+ for (i = 0; i < 32; i++) {
+ ret = dmabuf_heap_alloc(heap_fd, size, 0, &dmabuf_fd[i]);
+ if (ret < 0) {
+ printf("FAIL (Allocation (%i) failed)\n", i);
+ goto out;
+ }
+
+ /* mmap and validate everything is zero */
+ p = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, dmabuf_fd[i], 0);
+ if (p == MAP_FAILED) {
+ printf("FAIL (mmap() failed!)\n");
+ ret = -1;
+ goto out;
+ }
+ dmabuf_sync(dmabuf_fd[i], DMA_BUF_SYNC_START);
+ c = (char *)p;
+ for (j = 0; j < size; j++) {
+ if (c[j] != 0) {
+ printf("FAIL (Allocated buffer not zeroed @ %i)\n", j);
+ break;
+ }
+ }
+ dmabuf_sync(dmabuf_fd[i], DMA_BUF_SYNC_END);
+ munmap(p, size);
+ }
+ /* close them all */
+ for (i = 0; i < 32; i++)
+ close(dmabuf_fd[i]);
+
+ close(heap_fd);
+ printf("OK\n");
+ return 0;
+
+out:
+ while (i > 0) {
+ close(dmabuf_fd[i]);
+ i--;
+ }
+ close(heap_fd);
+ return ret;
+}
+
/* Test the ioctl version compatibility w/ a smaller structure then expected */
static int dmabuf_heap_alloc_older(int fd, size_t len, unsigned int flags,
int *dmabuf_fd)
@@ -292,23 +370,24 @@ static int test_alloc_compat(char *heap_name)
if (heap_fd < 0)
return -1;
- printf("Testing (theoretical)older alloc compat\n");
+ printf(" Testing (theoretical)older alloc compat: ");
ret = dmabuf_heap_alloc_older(heap_fd, ONE_MEG, 0, &dmabuf_fd);
if (ret) {
- printf("Older compat allocation failed!\n");
+ printf("FAIL (Older compat allocation failed!)\n");
ret = -1;
goto out;
}
close(dmabuf_fd);
+ printf("OK\n");
- printf("Testing (theoretical)newer alloc compat\n");
+ printf(" Testing (theoretical)newer alloc compat: ");
ret = dmabuf_heap_alloc_newer(heap_fd, ONE_MEG, 0, &dmabuf_fd);
if (ret) {
- printf("Newer compat allocation failed!\n");
+ printf("FAIL (Newer compat allocation failed!)\n");
ret = -1;
goto out;
}
- printf("Ioctl compatibility tests passed\n");
+ printf("OK\n");
out:
if (dmabuf_fd >= 0)
close(dmabuf_fd);
@@ -327,17 +406,17 @@ static int test_alloc_errors(char *heap_name)
if (heap_fd < 0)
return -1;
- printf("Testing expected error cases\n");
+ printf(" Testing expected error cases: ");
ret = dmabuf_heap_alloc(0, ONE_MEG, 0x111111, &dmabuf_fd);
if (!ret) {
- printf("Did not see expected error (invalid fd)!\n");
+ printf("FAIL (Did not see expected error (invalid fd)!)\n");
ret = -1;
goto out;
}
ret = dmabuf_heap_alloc(heap_fd, ONE_MEG, 0x111111, &dmabuf_fd);
if (!ret) {
- printf("Did not see expected error (invalid heap flags)!\n");
+ printf("FAIL (Did not see expected error (invalid heap flags)!)\n");
ret = -1;
goto out;
}
@@ -345,12 +424,12 @@ static int test_alloc_errors(char *heap_name)
ret = dmabuf_heap_alloc_fdflags(heap_fd, ONE_MEG,
~(O_RDWR | O_CLOEXEC), 0, &dmabuf_fd);
if (!ret) {
- printf("Did not see expected error (invalid fd flags)!\n");
+ printf("FAIL (Did not see expected error (invalid fd flags)!)\n");
ret = -1;
goto out;
}
- printf("Expected error checking passed\n");
+ printf("OK\n");
ret = 0;
out:
if (dmabuf_fd >= 0)
@@ -379,10 +458,20 @@ int main(void)
if (!strncmp(dir->d_name, "..", 3))
continue;
+ printf("Testing heap: %s\n", dir->d_name);
+ printf("=======================================\n");
ret = test_alloc_and_import(dir->d_name);
if (ret)
break;
+ ret = test_alloc_zeroed(dir->d_name, 4 * 1024);
+ if (ret)
+ break;
+
+ ret = test_alloc_zeroed(dir->d_name, ONE_MEG);
+ if (ret)
+ break;
+
ret = test_alloc_compat(dir->d_name);
if (ret)
break;
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh
index 1fedfc9da434..42d44e27802c 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh
@@ -446,6 +446,35 @@ __invalid_nexthop_test()
log_test "Unresolved neigh: nexthop does not exist: $desc"
}
+__invalid_nexthop_bucket_test()
+{
+ local desc=$1; shift
+ local dip=$1; shift
+ local via_add=$1; shift
+ local trap_name="unresolved_neigh"
+
+ RET=0
+
+ # Check that route to nexthop that does not exist triggers
+ # unresolved_neigh
+ ip nexthop add id 1 via $via_add dev $rp2
+ ip nexthop add id 10 group 1 type resilient buckets 32
+ ip route add $dip nhid 10
+
+ t0_packets=$(devlink_trap_rx_packets_get $trap_name)
+ ping_do $h1 $dip
+ t1_packets=$(devlink_trap_rx_packets_get $trap_name)
+
+ if [[ $t0_packets -eq $t1_packets ]]; then
+ check_err 1 "Trap counter did not increase"
+ fi
+
+ ip route del $dip nhid 10
+ ip nexthop del id 10
+ ip nexthop del id 1
+ log_test "Unresolved neigh: nexthop bucket does not exist: $desc"
+}
+
unresolved_neigh_test()
{
__host_miss_test "IPv4" 198.51.100.1
@@ -453,6 +482,8 @@ unresolved_neigh_test()
__invalid_nexthop_test "IPv4" 198.51.100.1 198.51.100.3 24 198.51.100.4
__invalid_nexthop_test "IPv6" 2001:db8:2::1 2001:db8:2::3 64 \
2001:db8:2::4
+ __invalid_nexthop_bucket_test "IPv4" 198.51.100.1 198.51.100.4
+ __invalid_nexthop_bucket_test "IPv6" 2001:db8:2::1 2001:db8:2::4
}
vrf_without_routes_create()
diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
index ed346da5d3cb..a217f9f6775b 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
@@ -33,6 +33,7 @@ ALL_TESTS="
nexthop_obj_invalid_test
nexthop_obj_offload_test
nexthop_obj_group_offload_test
+ nexthop_obj_bucket_offload_test
nexthop_obj_blackhole_offload_test
nexthop_obj_route_offload_test
devlink_reload_test
@@ -739,11 +740,28 @@ nexthop_obj_invalid_test()
ip nexthop add id 1 dev $swp1
ip nexthop add id 2 dev $swp1
+ ip nexthop add id 3 via 192.0.2.3 dev $swp1
ip nexthop add id 10 group 1/2
check_fail $? "managed to configure a nexthop group with device-only nexthops when should not"
+ ip nexthop add id 10 group 3 type resilient buckets 7
+ check_fail $? "managed to configure a too small resilient nexthop group when should not"
+
+ ip nexthop add id 10 group 3 type resilient buckets 129
+ check_fail $? "managed to configure a resilient nexthop group with invalid number of buckets when should not"
+
+ ip nexthop add id 10 group 1/2 type resilient buckets 32
+ check_fail $? "managed to configure a resilient nexthop group with device-only nexthops when should not"
+
+ ip nexthop add id 10 group 3 type resilient buckets 32
+ check_err $? "failed to configure a valid resilient nexthop group"
+ ip nexthop replace id 3 dev $swp1
+ check_fail $? "managed to populate a nexthop bucket with a device-only nexthop when should not"
+
log_test "nexthop objects - invalid configurations"
+ ip nexthop del id 10
+ ip nexthop del id 3
ip nexthop del id 2
ip nexthop del id 1
@@ -858,6 +876,70 @@ nexthop_obj_group_offload_test()
simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64
}
+nexthop_obj_bucket_offload_test()
+{
+ # Test offload indication of nexthop buckets
+ RET=0
+
+ simple_if_init $swp1 192.0.2.1/24 2001:db8:1::1/64
+ simple_if_init $swp2
+ setup_wait
+
+ ip nexthop add id 1 via 192.0.2.2 dev $swp1
+ ip nexthop add id 2 via 2001:db8:1::2 dev $swp1
+ ip nexthop add id 10 group 1/2 type resilient buckets 32 idle_timer 0
+ ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud reachable \
+ dev $swp1
+ ip neigh replace 192.0.2.3 lladdr 00:11:22:33:44:55 nud reachable \
+ dev $swp1
+ ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud reachable \
+ dev $swp1
+
+ busywait "$TIMEOUT" wait_for_offload \
+ ip nexthop bucket show nhid 1
+ check_err $? "IPv4 nexthop buckets not marked as offloaded when should"
+ busywait "$TIMEOUT" wait_for_offload \
+ ip nexthop bucket show nhid 2
+ check_err $? "IPv6 nexthop buckets not marked as offloaded when should"
+
+ # Invalidate nexthop id 1
+ ip neigh replace 192.0.2.2 nud failed dev $swp1
+ busywait "$TIMEOUT" wait_for_trap \
+ ip nexthop bucket show nhid 1
+ check_err $? "IPv4 nexthop buckets not marked with trap when should"
+
+ # Invalidate nexthop id 2
+ ip neigh replace 2001:db8:1::2 nud failed dev $swp1
+ busywait "$TIMEOUT" wait_for_trap \
+ ip nexthop bucket show nhid 2
+ check_err $? "IPv6 nexthop buckets not marked with trap when should"
+
+ # Revalidate nexthop id 1 by changing its configuration
+ ip nexthop replace id 1 via 192.0.2.3 dev $swp1
+ busywait "$TIMEOUT" wait_for_offload \
+ ip nexthop bucket show nhid 1
+ check_err $? "nexthop bucket not marked as offloaded after revalidating nexthop"
+
+ # Revalidate nexthop id 2 by changing its neighbour
+ ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud reachable \
+ dev $swp1
+ busywait "$TIMEOUT" wait_for_offload \
+ ip nexthop bucket show nhid 2
+ check_err $? "nexthop bucket not marked as offloaded after revalidating neighbour"
+
+ log_test "nexthop bucket offload indication"
+
+ ip neigh del 2001:db8:1::2 dev $swp1
+ ip neigh del 192.0.2.3 dev $swp1
+ ip neigh del 192.0.2.2 dev $swp1
+ ip nexthop del id 10
+ ip nexthop del id 2
+ ip nexthop del id 1
+
+ simple_if_fini $swp2
+ simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64
+}
+
nexthop_obj_blackhole_offload_test()
{
# Test offload indication of blackhole nexthop objects
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/q_in_vni_veto.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/q_in_vni_veto.sh
deleted file mode 100755
index 0231205a7147..000000000000
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/q_in_vni_veto.sh
+++ /dev/null
@@ -1,77 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-lib_dir=$(dirname $0)/../../../../net/forwarding
-
-VXPORT=4789
-
-ALL_TESTS="
- create_dot1d_and_dot1ad_vxlans
-"
-NUM_NETIFS=2
-source $lib_dir/lib.sh
-
-setup_prepare()
-{
- swp1=${NETIFS[p1]}
- swp2=${NETIFS[p2]}
-
- ip link set dev $swp1 up
- ip link set dev $swp2 up
-}
-
-cleanup()
-{
- pre_cleanup
-
- ip link set dev $swp2 down
- ip link set dev $swp1 down
-}
-
-create_dot1d_and_dot1ad_vxlans()
-{
- RET=0
-
- ip link add dev br0 type bridge vlan_filtering 1 vlan_protocol 802.1ad \
- vlan_default_pvid 0 mcast_snooping 0
- ip link set dev br0 up
-
- ip link add name vx100 type vxlan id 1000 local 192.0.2.17 dstport \
- "$VXPORT" nolearning noudpcsum tos inherit ttl 100
- ip link set dev vx100 up
-
- ip link set dev $swp1 master br0
- ip link set dev vx100 master br0
- bridge vlan add vid 100 dev vx100 pvid untagged
-
- ip link add dev br1 type bridge vlan_filtering 0 mcast_snooping 0
- ip link set dev br1 up
-
- ip link add name vx200 type vxlan id 2000 local 192.0.2.17 dstport \
- "$VXPORT" nolearning noudpcsum tos inherit ttl 100
- ip link set dev vx200 up
-
- ip link set dev $swp2 master br1
- ip link set dev vx200 master br1 2>/dev/null
- check_fail $? "802.1d and 802.1ad VxLANs at the same time not rejected"
-
- ip link set dev vx200 master br1 2>&1 >/dev/null \
- | grep -q mlxsw_spectrum
- check_err $? "802.1d and 802.1ad VxLANs at the same time rejected without extack"
-
- log_test "create 802.1d and 802.1ad VxLANs"
-
- ip link del dev vx200
- ip link del dev br1
- ip link del dev vx100
- ip link del dev br0
-}
-
-trap cleanup EXIT
-
-setup_prepare
-setup_wait
-
-tests_run
-
-exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
index 553cb9fad508..5ec3beb637c8 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
@@ -11,6 +11,7 @@ ALL_TESTS="
matchall_mirror_behind_flower_ingress_test
matchall_sample_behind_flower_ingress_test
matchall_mirror_behind_flower_egress_test
+ matchall_proto_match_test
police_limits_test
multi_police_test
"
@@ -18,6 +19,7 @@ NUM_NETIFS=2
source $lib_dir/tc_common.sh
source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
switch_create()
{
@@ -166,7 +168,8 @@ matchall_sample_egress_test()
RET=0
# It is forbidden in mlxsw driver to have matchall with sample action
- # bound on egress
+ # bound on egress. Spectrum-1 specific restriction
+ [[ "$DEVLINK_VIDDID" != "15b3:cb84" ]] && return
tc qdisc add dev $swp1 clsact
@@ -289,6 +292,22 @@ matchall_mirror_behind_flower_egress_test()
matchall_behind_flower_egress_test "mirror" "mirred egress mirror dev $swp2"
}
+matchall_proto_match_test()
+{
+ RET=0
+
+ tc qdisc add dev $swp1 clsact
+
+ tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \
+ matchall skip_sw \
+ action sample group 1 rate 100
+ check_fail $? "Incorrect success to add matchall rule with protocol match"
+
+ tc qdisc del dev $swp1 clsact
+
+ log_test "matchall protocol match"
+}
+
police_limits_test()
{
RET=0
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh
new file mode 100755
index 000000000000..093bed088ad0
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh
@@ -0,0 +1,657 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test that packets are sampled when tc-sample is used and that reported
+# metadata is correct. Two sets of hosts (with and without LAG) are used, since
+# metadata extraction in mlxsw is a bit different when LAG is involved.
+#
+# +---------------------------------+ +---------------------------------+
+# | H1 (vrf) | | H3 (vrf) |
+# | + $h1 | | + $h3_lag |
+# | | 192.0.2.1/28 | | | 192.0.2.17/28 |
+# | | | | | |
+# | | default via 192.0.2.2 | | | default via 192.0.2.18 |
+# +----|----------------------------+ +----|----------------------------+
+# | |
+# +----|-----------------------------------------|----------------------------+
+# | | 192.0.2.2/28 | 192.0.2.18/28 |
+# | + $rp1 + $rp3_lag |
+# | |
+# | + $rp2 + $rp4_lag |
+# | | 198.51.100.2/28 | 198.51.100.18/28 |
+# +----|-----------------------------------------|----------------------------+
+# | |
+# +----|----------------------------+ +----|----------------------------+
+# | | default via 198.51.100.2 | | | default via 198.51.100.18 |
+# | | | | | |
+# | | 198.51.100.1/28 | | | 198.51.100.17/28 |
+# | + $h2 | | + $h4_lag |
+# | H2 (vrf) | | H4 (vrf) |
+# +---------------------------------+ +---------------------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ tc_sample_rate_test
+ tc_sample_max_rate_test
+ tc_sample_conflict_test
+ tc_sample_group_conflict_test
+ tc_sample_md_iif_test
+ tc_sample_md_lag_iif_test
+ tc_sample_md_oif_test
+ tc_sample_md_lag_oif_test
+ tc_sample_md_out_tc_test
+ tc_sample_md_out_tc_occ_test
+ tc_sample_md_latency_test
+ tc_sample_acl_group_conflict_test
+ tc_sample_acl_rate_test
+ tc_sample_acl_max_rate_test
+"
+NUM_NETIFS=8
+CAPTURE_FILE=$(mktemp)
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+# Available at https://github.com/Mellanox/libpsample
+require_command psample
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28
+
+ ip -4 route add default vrf v$h1 nexthop via 192.0.2.2
+}
+
+h1_destroy()
+{
+ ip -4 route del default vrf v$h1 nexthop via 192.0.2.2
+
+ simple_if_fini $h1 192.0.2.1/28
+}
+
+h2_create()
+{
+ simple_if_init $h2 198.51.100.1/28
+
+ ip -4 route add default vrf v$h2 nexthop via 198.51.100.2
+}
+
+h2_destroy()
+{
+ ip -4 route del default vrf v$h2 nexthop via 198.51.100.2
+
+ simple_if_fini $h2 198.51.100.1/28
+}
+
+h3_create()
+{
+ ip link set dev $h3 down
+ ip link add name ${h3}_bond type bond mode 802.3ad
+ ip link set dev $h3 master ${h3}_bond
+
+ simple_if_init ${h3}_bond 192.0.2.17/28
+
+ ip -4 route add default vrf v${h3}_bond nexthop via 192.0.2.18
+}
+
+h3_destroy()
+{
+ ip -4 route del default vrf v${h3}_bond nexthop via 192.0.2.18
+
+ simple_if_fini ${h3}_bond 192.0.2.17/28
+
+ ip link set dev $h3 nomaster
+ ip link del dev ${h3}_bond
+}
+
+h4_create()
+{
+ ip link set dev $h4 down
+ ip link add name ${h4}_bond type bond mode 802.3ad
+ ip link set dev $h4 master ${h4}_bond
+
+ simple_if_init ${h4}_bond 198.51.100.17/28
+
+ ip -4 route add default vrf v${h4}_bond nexthop via 198.51.100.18
+}
+
+h4_destroy()
+{
+ ip -4 route del default vrf v${h4}_bond nexthop via 198.51.100.18
+
+ simple_if_fini ${h4}_bond 198.51.100.17/28
+
+ ip link set dev $h4 nomaster
+ ip link del dev ${h4}_bond
+}
+
+router_create()
+{
+ ip link set dev $rp1 up
+ __addr_add_del $rp1 add 192.0.2.2/28
+ tc qdisc add dev $rp1 clsact
+
+ ip link set dev $rp2 up
+ __addr_add_del $rp2 add 198.51.100.2/28
+ tc qdisc add dev $rp2 clsact
+
+ ip link add name ${rp3}_bond type bond mode 802.3ad
+ ip link set dev $rp3 master ${rp3}_bond
+ __addr_add_del ${rp3}_bond add 192.0.2.18/28
+ tc qdisc add dev $rp3 clsact
+ ip link set dev ${rp3}_bond up
+
+ ip link add name ${rp4}_bond type bond mode 802.3ad
+ ip link set dev $rp4 master ${rp4}_bond
+ __addr_add_del ${rp4}_bond add 198.51.100.18/28
+ tc qdisc add dev $rp4 clsact
+ ip link set dev ${rp4}_bond up
+}
+
+router_destroy()
+{
+ ip link set dev ${rp4}_bond down
+ tc qdisc del dev $rp4 clsact
+ __addr_add_del ${rp4}_bond del 198.51.100.18/28
+ ip link set dev $rp4 nomaster
+ ip link del dev ${rp4}_bond
+
+ ip link set dev ${rp3}_bond down
+ tc qdisc del dev $rp3 clsact
+ __addr_add_del ${rp3}_bond del 192.0.2.18/28
+ ip link set dev $rp3 nomaster
+ ip link del dev ${rp3}_bond
+
+ tc qdisc del dev $rp2 clsact
+ __addr_add_del $rp2 del 198.51.100.2/28
+ ip link set dev $rp2 down
+
+ tc qdisc del dev $rp1 clsact
+ __addr_add_del $rp1 del 192.0.2.2/28
+ ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ rp1=${NETIFS[p2]}
+ rp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+ h3=${NETIFS[p5]}
+ rp3=${NETIFS[p6]}
+ h4=${NETIFS[p7]}
+ rp4=${NETIFS[p8]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+ h3_create
+ h4_create
+ router_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ rm -f $CAPTURE_FILE
+
+ router_destroy
+ h4_destroy
+ h3_destroy
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+psample_capture_start()
+{
+ rm -f $CAPTURE_FILE
+
+ psample &> $CAPTURE_FILE &
+
+ sleep 1
+}
+
+psample_capture_stop()
+{
+ { kill %% && wait %%; } 2>/dev/null
+}
+
+__tc_sample_rate_test()
+{
+ local desc=$1; shift
+ local dip=$1; shift
+ local pkts pct
+
+ RET=0
+
+ tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+ skip_sw action sample rate 32 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ psample_capture_start
+
+ ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+ -B $dip -t udp dp=52768,sp=42768 -q
+
+ psample_capture_stop
+
+ pkts=$(grep -e "group 1 " $CAPTURE_FILE | wc -l)
+ pct=$((100 * (pkts - 100) / 100))
+ (( -25 <= pct && pct <= 25))
+ check_err $? "Expected 100 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%"
+
+ log_test "tc sample rate ($desc)"
+
+ tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_rate_test()
+{
+ __tc_sample_rate_test "forward" 198.51.100.1
+ __tc_sample_rate_test "local receive" 192.0.2.2
+}
+
+tc_sample_max_rate_test()
+{
+ RET=0
+
+ tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+ skip_sw action sample rate $((35 * 10 ** 8)) group 1
+ check_err $? "Failed to configure sampling rule with max rate"
+
+ tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+
+ tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+ skip_sw action sample rate $((35 * 10 ** 8 + 1)) \
+ group 1 &> /dev/null
+ check_fail $? "Managed to configure sampling rate above maximum"
+
+ log_test "tc sample maximum rate"
+}
+
+tc_sample_conflict_test()
+{
+ RET=0
+
+ # Test that two sampling rules cannot be configured on the same port,
+ # even when they share the same parameters.
+
+ tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+ skip_sw action sample rate 1024 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ tc filter add dev $rp1 ingress protocol all pref 2 handle 102 matchall \
+ skip_sw action sample rate 1024 group 1 &> /dev/null
+ check_fail $? "Managed to configure second sampling rule"
+
+ # Delete the first rule and make sure the second rule can now be
+ # configured.
+
+ tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+
+ tc filter add dev $rp1 ingress protocol all pref 2 handle 102 matchall \
+ skip_sw action sample rate 1024 group 1
+ check_err $? "Failed to configure sampling rule after deletion"
+
+ log_test "tc sample conflict test"
+
+ tc filter del dev $rp1 ingress protocol all pref 2 handle 102 matchall
+}
+
+tc_sample_group_conflict_test()
+{
+ RET=0
+
+ # Test that two sampling rules cannot be configured on the same port
+ # with different groups.
+
+ tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+ skip_sw action sample rate 1024 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ tc filter add dev $rp1 ingress protocol all pref 2 handle 102 matchall \
+ skip_sw action sample rate 1024 group 2 &> /dev/null
+ check_fail $? "Managed to configure sampling rule with conflicting group"
+
+ log_test "tc sample group conflict test"
+
+ tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_md_iif_test()
+{
+ local rp1_ifindex
+
+ RET=0
+
+ tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+ skip_sw action sample rate 5 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ psample_capture_start
+
+ ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+ -B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+ psample_capture_stop
+
+ rp1_ifindex=$(ip -j -p link show dev $rp1 | jq '.[]["ifindex"]')
+ grep -q -e "in-ifindex $rp1_ifindex " $CAPTURE_FILE
+ check_err $? "Sampled packets do not have expected in-ifindex"
+
+ log_test "tc sample iif"
+
+ tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_md_lag_iif_test()
+{
+ local rp3_ifindex
+
+ RET=0
+
+ tc filter add dev $rp3 ingress protocol all pref 1 handle 101 matchall \
+ skip_sw action sample rate 5 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ psample_capture_start
+
+ ip vrf exec v${h3}_bond $MZ ${h3}_bond -c 3200 -d 1msec -p 64 \
+ -A 192.0.2.17 -B 198.51.100.17 -t udp dp=52768,sp=42768 -q
+
+ psample_capture_stop
+
+ rp3_ifindex=$(ip -j -p link show dev $rp3 | jq '.[]["ifindex"]')
+ grep -q -e "in-ifindex $rp3_ifindex " $CAPTURE_FILE
+ check_err $? "Sampled packets do not have expected in-ifindex"
+
+ log_test "tc sample lag iif"
+
+ tc filter del dev $rp3 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_md_oif_test()
+{
+ local rp2_ifindex
+
+ RET=0
+
+ tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+ skip_sw action sample rate 5 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ psample_capture_start
+
+ ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+ -B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+ psample_capture_stop
+
+ rp2_ifindex=$(ip -j -p link show dev $rp2 | jq '.[]["ifindex"]')
+ grep -q -e "out-ifindex $rp2_ifindex " $CAPTURE_FILE
+ check_err $? "Sampled packets do not have expected out-ifindex"
+
+ log_test "tc sample oif"
+
+ tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_md_lag_oif_test()
+{
+ local rp4_ifindex
+
+ RET=0
+
+ tc filter add dev $rp3 ingress protocol all pref 1 handle 101 matchall \
+ skip_sw action sample rate 5 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ psample_capture_start
+
+ ip vrf exec v${h3}_bond $MZ ${h3}_bond -c 3200 -d 1msec -p 64 \
+ -A 192.0.2.17 -B 198.51.100.17 -t udp dp=52768,sp=42768 -q
+
+ psample_capture_stop
+
+ rp4_ifindex=$(ip -j -p link show dev $rp4 | jq '.[]["ifindex"]')
+ grep -q -e "out-ifindex $rp4_ifindex " $CAPTURE_FILE
+ check_err $? "Sampled packets do not have expected out-ifindex"
+
+ log_test "tc sample lag oif"
+
+ tc filter del dev $rp3 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_md_out_tc_test()
+{
+ RET=0
+
+ # Output traffic class is not supported on Spectrum-1.
+ [[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return
+
+ tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+ skip_sw action sample rate 5 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ # By default, all the packets should go to the same traffic class (0).
+
+ psample_capture_start
+
+ ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+ -B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+ psample_capture_stop
+
+ grep -q -e "out-tc 0 " $CAPTURE_FILE
+ check_err $? "Sampled packets do not have expected out-tc (0)"
+
+ # Map all priorities to highest traffic class (7) and check reported
+ # out-tc.
+ tc qdisc replace dev $rp2 root handle 1: \
+ prio bands 3 priomap 0 0 0 0 0 0 0 0
+
+ psample_capture_start
+
+ ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+ -B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+ psample_capture_stop
+
+ grep -q -e "out-tc 7 " $CAPTURE_FILE
+ check_err $? "Sampled packets do not have expected out-tc (7)"
+
+ log_test "tc sample out-tc"
+
+ tc qdisc del dev $rp2 root handle 1:
+ tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_md_out_tc_occ_test()
+{
+ local backlog pct occ
+
+ RET=0
+
+ # Output traffic class occupancy is not supported on Spectrum-1.
+ [[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return
+
+ tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+ skip_sw action sample rate 1024 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ # Configure a shaper on egress to create congestion.
+ tc qdisc replace dev $rp2 root handle 1: \
+ tbf rate 1Mbit burst 256k limit 1M
+
+ psample_capture_start
+
+ ip vrf exec v$h1 $MZ $h1 -c 0 -d 1usec -p 1400 -A 192.0.2.1 \
+ -B 198.51.100.1 -t udp dp=52768,sp=42768 -q &
+
+ # Allow congestion to reach steady state.
+ sleep 10
+
+ backlog=$(tc -j -p -s qdisc show dev $rp2 | jq '.[0]["backlog"]')
+
+ # Kill mausezahn.
+ { kill %% && wait %%; } 2>/dev/null
+
+ psample_capture_stop
+
+ # Record last congestion sample.
+ occ=$(grep -e "out-tc-occ " $CAPTURE_FILE | tail -n 1 | \
+ cut -d ' ' -f 16)
+
+ pct=$((100 * (occ - backlog) / backlog))
+ (( -1 <= pct && pct <= 1))
+ check_err $? "Recorded a congestion of $backlog bytes, but sampled congestion is $occ bytes, which is $pct% off. Required accuracy is +-5%"
+
+ log_test "tc sample out-tc-occ"
+
+ tc qdisc del dev $rp2 root handle 1:
+ tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_md_latency_test()
+{
+ RET=0
+
+ # Egress sampling not supported on Spectrum-1.
+ [[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return
+
+ tc filter add dev $rp2 egress protocol all pref 1 handle 101 matchall \
+ skip_sw action sample rate 5 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ psample_capture_start
+
+ ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+ -B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+ psample_capture_stop
+
+ grep -q -e "latency " $CAPTURE_FILE
+ check_err $? "Sampled packets do not have latency attribute"
+
+ log_test "tc sample latency"
+
+ tc filter del dev $rp2 egress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_acl_group_conflict_test()
+{
+ RET=0
+
+ # Test that two flower sampling rules cannot be configured on the same
+ # port with different groups.
+
+ # Policy-based sampling is not supported on Spectrum-1.
+ [[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return
+
+ tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
+ skip_sw action sample rate 1024 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ tc filter add dev $rp1 ingress protocol ip pref 2 handle 102 flower \
+ skip_sw action sample rate 1024 group 1
+ check_err $? "Failed to configure sampling rule with same group"
+
+ tc filter add dev $rp1 ingress protocol ip pref 3 handle 103 flower \
+ skip_sw action sample rate 1024 group 2 &> /dev/null
+ check_fail $? "Managed to configure sampling rule with conflicting group"
+
+ log_test "tc sample (w/ flower) group conflict test"
+
+ tc filter del dev $rp1 ingress protocol ip pref 2 handle 102 flower
+ tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower
+}
+
+__tc_sample_acl_rate_test()
+{
+ local bind=$1; shift
+ local port=$1; shift
+ local pkts pct
+
+ RET=0
+
+ # Policy-based sampling is not supported on Spectrum-1.
+ [[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return
+
+ tc filter add dev $port $bind protocol ip pref 1 handle 101 flower \
+ skip_sw dst_ip 198.51.100.1 action sample rate 32 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ psample_capture_start
+
+ ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+ -B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+ psample_capture_stop
+
+ pkts=$(grep -e "group 1 " $CAPTURE_FILE | wc -l)
+ pct=$((100 * (pkts - 100) / 100))
+ (( -25 <= pct && pct <= 25))
+ check_err $? "Expected 100 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%"
+
+ # Setup a filter that should not match any packet and make sure packets
+ # are not sampled.
+ tc filter del dev $port $bind protocol ip pref 1 handle 101 flower
+
+ tc filter add dev $port $bind protocol ip pref 1 handle 101 flower \
+ skip_sw dst_ip 198.51.100.10 action sample rate 32 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ psample_capture_start
+
+ ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+ -B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+ psample_capture_stop
+
+ grep -q -e "group 1 " $CAPTURE_FILE
+ check_fail $? "Sampled packets when should not"
+
+ log_test "tc sample (w/ flower) rate ($bind)"
+
+ tc filter del dev $port $bind protocol ip pref 1 handle 101 flower
+}
+
+tc_sample_acl_rate_test()
+{
+ __tc_sample_acl_rate_test ingress $rp1
+ __tc_sample_acl_rate_test egress $rp2
+}
+
+tc_sample_acl_max_rate_test()
+{
+ RET=0
+
+ # Policy-based sampling is not supported on Spectrum-1.
+ [[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return
+
+ tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
+ skip_sw action sample rate $((2 ** 24 - 1)) group 1
+ check_err $? "Failed to configure sampling rule with max rate"
+
+ tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower
+
+ tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
+ skip_sw action sample rate $((2 ** 24)) \
+ group 1 &> /dev/null
+ check_fail $? "Managed to configure sampling rate above maximum"
+
+ log_test "tc sample (w/ flower) maximum rate"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh
index 9f64d5c7107b..7ca1f030d209 100644
--- a/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh
@@ -24,8 +24,11 @@ function check {
local code=$1
local str=$2
local exp_str=$3
+ local exp_fail=$4
- if [ $code -ne 0 ]; then
+ [ -z "$exp_fail" ] && cop="-ne" || cop="-eq"
+
+ if [ $code $cop 0 ]; then
((num_errors++))
return
fi
diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh
new file mode 100755
index 000000000000..0c56746e9ce0
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh
@@ -0,0 +1,110 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+
+source ethtool-common.sh
+
+NSIM_NETDEV=$(make_netdev)
+[ a$ETHTOOL == a ] && ETHTOOL=ethtool
+
+set -o pipefail
+
+# netdevsim starts out with None/None
+s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2)
+check $? "$s" "Configured FEC encodings: None
+Active FEC encoding: None"
+
+# Test Auto
+$ETHTOOL --set-fec $NSIM_NETDEV encoding auto
+check $?
+s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2)
+check $? "$s" "Configured FEC encodings: Auto
+Active FEC encoding: Off"
+
+# Test case in-sensitivity
+for o in off Off OFF; do
+ $ETHTOOL --set-fec $NSIM_NETDEV encoding $o
+ check $?
+ s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2)
+ check $? "$s" "Configured FEC encodings: Off
+Active FEC encoding: Off"
+done
+
+for o in BaseR baser BAser; do
+ $ETHTOOL --set-fec $NSIM_NETDEV encoding $o
+ check $?
+ s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2)
+ check $? "$s" "Configured FEC encodings: BaseR
+Active FEC encoding: BaseR"
+done
+
+for o in llrs rs; do
+ $ETHTOOL --set-fec $NSIM_NETDEV encoding $o
+ check $?
+ s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2)
+ check $? "$s" "Configured FEC encodings: ${o^^}
+Active FEC encoding: ${o^^}"
+done
+
+# Test mutliple bits
+$ETHTOOL --set-fec $NSIM_NETDEV encoding rs llrs
+check $?
+s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2)
+check $? "$s" "Configured FEC encodings: RS LLRS
+Active FEC encoding: LLRS"
+
+$ETHTOOL --set-fec $NSIM_NETDEV encoding rs off auto
+check $?
+s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2)
+check $? "$s" "Configured FEC encodings: Auto Off RS
+Active FEC encoding: RS"
+
+# Make sure other link modes are rejected
+$ETHTOOL --set-fec $NSIM_NETDEV encoding FIBRE 2>/dev/null
+check $? '' '' 1
+
+$ETHTOOL --set-fec $NSIM_NETDEV encoding bla-bla-bla 2>/dev/null
+check $? '' '' 1
+
+# Try JSON
+$ETHTOOL --json --show-fec $NSIM_NETDEV | jq empty >>/dev/null 2>&1
+if [ $? -eq 0 ]; then
+ $ETHTOOL --set-fec $NSIM_NETDEV encoding auto
+ check $?
+
+ s=$($ETHTOOL --json --show-fec $NSIM_NETDEV | jq '.[].config[]')
+ check $? "$s" '"Auto"'
+ s=$($ETHTOOL --json --show-fec $NSIM_NETDEV | jq '.[].active[]')
+ check $? "$s" '"Off"'
+
+ $ETHTOOL --set-fec $NSIM_NETDEV encoding auto RS
+ check $?
+
+ s=$($ETHTOOL --json --show-fec $NSIM_NETDEV | jq '.[].config[]')
+ check $? "$s" '"Auto"
+"RS"'
+ s=$($ETHTOOL --json --show-fec $NSIM_NETDEV | jq '.[].active[]')
+ check $? "$s" '"RS"'
+fi
+
+# Test error injection
+echo 11 > $NSIM_DEV_DFS/ethtool/get_err
+
+$ETHTOOL --show-fec $NSIM_NETDEV >>/dev/null 2>&1
+check $? '' '' 1
+
+echo 0 > $NSIM_DEV_DFS/ethtool/get_err
+echo 11 > $NSIM_DEV_DFS/ethtool/set_err
+
+$ETHTOOL --show-fec $NSIM_NETDEV >>/dev/null 2>&1
+check $?
+
+$ETHTOOL --set-fec $NSIM_NETDEV encoding RS 2>/dev/null
+check $? '' '' 1
+
+if [ $num_errors -eq 0 ]; then
+ echo "PASSED all $((num_passes)) checks"
+ exit 0
+else
+ echo "FAILED $num_errors/$((num_errors+num_passes)) checks"
+ exit 1
+fi
diff --git a/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh
index be0c1b5ee6b8..ba75c81cda91 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh
@@ -11,14 +11,33 @@ ALL_TESTS="
nexthop_single_add_err_test
nexthop_group_add_test
nexthop_group_add_err_test
+ nexthop_res_group_add_test
+ nexthop_res_group_add_err_test
nexthop_group_replace_test
nexthop_group_replace_err_test
+ nexthop_res_group_replace_test
+ nexthop_res_group_replace_err_test
+ nexthop_res_group_idle_timer_test
+ nexthop_res_group_idle_timer_del_test
+ nexthop_res_group_increase_idle_timer_test
+ nexthop_res_group_decrease_idle_timer_test
+ nexthop_res_group_unbalanced_timer_test
+ nexthop_res_group_unbalanced_timer_del_test
+ nexthop_res_group_no_unbalanced_timer_test
+ nexthop_res_group_short_unbalanced_timer_test
+ nexthop_res_group_increase_unbalanced_timer_test
+ nexthop_res_group_decrease_unbalanced_timer_test
+ nexthop_res_group_force_migrate_busy_test
nexthop_single_replace_test
nexthop_single_replace_err_test
nexthop_single_in_group_replace_test
nexthop_single_in_group_replace_err_test
+ nexthop_single_in_res_group_replace_test
+ nexthop_single_in_res_group_replace_err_test
nexthop_single_in_group_delete_test
nexthop_single_in_group_delete_err_test
+ nexthop_single_in_res_group_delete_test
+ nexthop_single_in_res_group_delete_err_test
nexthop_replay_test
nexthop_replay_err_test
"
@@ -27,6 +46,7 @@ DEV_ADDR=1337
DEV=netdevsim${DEV_ADDR}
DEVLINK_DEV=netdevsim/${DEV}
SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/
+DEBUGFS_NET_DIR=/sys/kernel/debug/netdevsim/$DEV/
NUM_NETIFS=0
source $lib_dir/lib.sh
source $lib_dir/devlink_lib.sh
@@ -44,6 +64,28 @@ nexthop_check()
return 0
}
+nexthop_bucket_nhid_count_check()
+{
+ local group_id=$1; shift
+ local expected
+ local count
+ local nhid
+ local ret
+
+ while (($# > 0)); do
+ nhid=$1; shift
+ expected=$1; shift
+
+ count=$($IP nexthop bucket show id $group_id nhid $nhid |
+ grep "trap" | wc -l)
+ if ((expected != count)); then
+ return 1
+ fi
+ done
+
+ return 0
+}
+
nexthop_resource_check()
{
local expected_occ=$1; shift
@@ -159,6 +201,71 @@ nexthop_group_add_err_test()
nexthop_resource_set 9999
}
+nexthop_res_group_add_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+ $IP nexthop add id 10 group 1/2 type resilient buckets 4
+ nexthop_check "id 10" "id 10 group 1/2 type resilient buckets 4 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+ check_err $? "Unexpected nexthop group entry"
+
+ nexthop_bucket_nhid_count_check 10 1 2
+ check_err $? "Wrong nexthop buckets count"
+ nexthop_bucket_nhid_count_check 10 2 2
+ check_err $? "Wrong nexthop buckets count"
+
+ nexthop_resource_check 6
+ check_err $? "Wrong nexthop occupancy"
+
+ $IP nexthop del id 10
+ nexthop_resource_check 2
+ check_err $? "Wrong nexthop occupancy after delete"
+
+ $IP nexthop add id 10 group 1,3/2,2 type resilient buckets 5
+ nexthop_check "id 10" "id 10 group 1,3/2,2 type resilient buckets 5 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+ check_err $? "Unexpected weighted nexthop group entry"
+
+ nexthop_bucket_nhid_count_check 10 1 3
+ check_err $? "Wrong nexthop buckets count"
+ nexthop_bucket_nhid_count_check 10 2 2
+ check_err $? "Wrong nexthop buckets count"
+
+ nexthop_resource_check 7
+ check_err $? "Wrong weighted nexthop occupancy"
+
+ $IP nexthop del id 10
+ nexthop_resource_check 2
+ check_err $? "Wrong nexthop occupancy after delete"
+
+ log_test "Resilient nexthop group add and delete"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_add_err_test()
+{
+ RET=0
+
+ nexthop_resource_set 2
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+ $IP nexthop add id 10 group 1/2 type resilient buckets 4 &> /dev/null
+ check_fail $? "Nexthop group addition succeeded when should fail"
+
+ nexthop_resource_check 2
+ check_err $? "Wrong nexthop occupancy"
+
+ log_test "Resilient nexthop group add failure"
+
+ $IP nexthop flush &> /dev/null
+ nexthop_resource_set 9999
+}
+
nexthop_group_replace_test()
{
RET=0
@@ -206,6 +313,411 @@ nexthop_group_replace_err_test()
nexthop_resource_set 9999
}
+nexthop_res_group_replace_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 3 via 192.0.2.4 dev dummy1
+ $IP nexthop add id 10 group 1/2 type resilient buckets 6
+
+ $IP nexthop replace id 10 group 1/2/3 type resilient
+ nexthop_check "id 10" "id 10 group 1/2/3 type resilient buckets 6 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+ check_err $? "Unexpected nexthop group entry"
+
+ nexthop_bucket_nhid_count_check 10 1 2
+ check_err $? "Wrong nexthop buckets count"
+ nexthop_bucket_nhid_count_check 10 2 2
+ check_err $? "Wrong nexthop buckets count"
+ nexthop_bucket_nhid_count_check 10 3 2
+ check_err $? "Wrong nexthop buckets count"
+
+ nexthop_resource_check 9
+ check_err $? "Wrong nexthop occupancy"
+
+ log_test "Resilient nexthop group replace"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_replace_err_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 3 via 192.0.2.4 dev dummy1
+ $IP nexthop add id 10 group 1/2 type resilient buckets 6
+
+ ip netns exec testns1 \
+ echo 1 > $DEBUGFS_NET_DIR/fib/fail_res_nexthop_group_replace
+ $IP nexthop replace id 10 group 1/2/3 type resilient &> /dev/null
+ check_fail $? "Nexthop group replacement succeeded when should fail"
+
+ nexthop_check "id 10" "id 10 group 1/2 type resilient buckets 6 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+ check_err $? "Unexpected nexthop group entry after failure"
+
+ nexthop_bucket_nhid_count_check 10 1 3
+ check_err $? "Wrong nexthop buckets count"
+ nexthop_bucket_nhid_count_check 10 2 3
+ check_err $? "Wrong nexthop buckets count"
+
+ nexthop_resource_check 9
+ check_err $? "Wrong nexthop occupancy after failure"
+
+ log_test "Resilient nexthop group replace failure"
+
+ $IP nexthop flush &> /dev/null
+ ip netns exec testns1 \
+ echo 0 > $DEBUGFS_NET_DIR/fib/fail_res_nexthop_group_replace
+}
+
+nexthop_res_mark_buckets_busy()
+{
+ local group_id=$1; shift
+ local nhid=$1; shift
+ local count=$1; shift
+ local index
+
+ for index in $($IP -j nexthop bucket show id $group_id nhid $nhid |
+ jq '.[].bucket.index' | head -n ${count:--0})
+ do
+ echo $group_id $index \
+ > $DEBUGFS_NET_DIR/fib/nexthop_bucket_activity
+ done
+}
+
+nexthop_res_num_nhid_buckets()
+{
+ local group_id=$1; shift
+ local nhid=$1; shift
+
+ $IP -j nexthop bucket show id $group_id nhid $nhid | jq length
+}
+
+nexthop_res_group_idle_timer_test()
+{
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+ RET=0
+
+ $IP nexthop add id 10 group 1/2 type resilient buckets 8 idle_timer 4
+ nexthop_res_mark_buckets_busy 10 1
+ $IP nexthop replace id 10 group 1/2,3 type resilient
+
+ nexthop_bucket_nhid_count_check 10 1 4 2 4
+ check_err $? "Group expected to be unbalanced"
+
+ sleep 6
+
+ nexthop_bucket_nhid_count_check 10 1 2 2 6
+ check_err $? "Group expected to be balanced"
+
+ log_test "Bucket migration after idle timer"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_idle_timer_del_test()
+{
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 3 via 192.0.2.3 dev dummy1
+
+ RET=0
+
+ $IP nexthop add id 10 group 1,50/2,50/3,1 \
+ type resilient buckets 8 idle_timer 6
+ nexthop_res_mark_buckets_busy 10 1
+ $IP nexthop replace id 10 group 1,50/2,150/3,1 type resilient
+
+ nexthop_bucket_nhid_count_check 10 1 4 2 4 3 0
+ check_err $? "Group expected to be unbalanced"
+
+ sleep 4
+
+ # Deletion prompts group replacement. Check that the bucket timers
+ # are kept.
+ $IP nexthop delete id 3
+
+ nexthop_bucket_nhid_count_check 10 1 4 2 4
+ check_err $? "Group expected to still be unbalanced"
+
+ sleep 4
+
+ nexthop_bucket_nhid_count_check 10 1 2 2 6
+ check_err $? "Group expected to be balanced"
+
+ log_test "Bucket migration after idle timer (with delete)"
+
+ $IP nexthop flush &> /dev/null
+}
+
+__nexthop_res_group_increase_timer_test()
+{
+ local timer=$1; shift
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+ RET=0
+
+ $IP nexthop add id 10 group 1/2 type resilient buckets 8 $timer 4
+ nexthop_res_mark_buckets_busy 10 1
+ $IP nexthop replace id 10 group 1/2,3 type resilient
+
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_fail $? "Group expected to be unbalanced"
+
+ sleep 2
+ $IP nexthop replace id 10 group 1/2,3 type resilient $timer 8
+ sleep 4
+
+ # 6 seconds, past the original timer.
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_fail $? "Group still expected to be unbalanced"
+
+ sleep 4
+
+ # 10 seconds, past the new timer.
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_err $? "Group expected to be balanced"
+
+ log_test "Bucket migration after $timer increase"
+
+ $IP nexthop flush &> /dev/null
+}
+
+__nexthop_res_group_decrease_timer_test()
+{
+ local timer=$1; shift
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+ RET=0
+
+ $IP nexthop add id 10 group 1/2 type resilient buckets 8 $timer 8
+ nexthop_res_mark_buckets_busy 10 1
+ $IP nexthop replace id 10 group 1/2,3 type resilient
+
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_fail $? "Group expected to be unbalanced"
+
+ sleep 2
+ $IP nexthop replace id 10 group 1/2,3 type resilient $timer 4
+ sleep 4
+
+ # 6 seconds, past the new timer, before the old timer.
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_err $? "Group expected to be balanced"
+
+ log_test "Bucket migration after $timer decrease"
+
+ $IP nexthop flush &> /dev/null
+}
+
+__nexthop_res_group_increase_timer_del_test()
+{
+ local timer=$1; shift
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 3 via 192.0.2.3 dev dummy1
+
+ RET=0
+
+ $IP nexthop add id 10 group 1,100/2,100/3,1 \
+ type resilient buckets 8 $timer 4
+ nexthop_res_mark_buckets_busy 10 1
+ $IP nexthop replace id 10 group 1,100/2,300/3,1 type resilient
+
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_fail $? "Group expected to be unbalanced"
+
+ sleep 2
+ $IP nexthop replace id 10 group 1/2,3 type resilient $timer 8
+ sleep 4
+
+ # 6 seconds, past the original timer.
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_fail $? "Group still expected to be unbalanced"
+
+ sleep 4
+
+ # 10 seconds, past the new timer.
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_err $? "Group expected to be balanced"
+
+ log_test "Bucket migration after $timer increase"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_increase_idle_timer_test()
+{
+ __nexthop_res_group_increase_timer_test idle_timer
+}
+
+nexthop_res_group_decrease_idle_timer_test()
+{
+ __nexthop_res_group_decrease_timer_test idle_timer
+}
+
+nexthop_res_group_unbalanced_timer_test()
+{
+ local i
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+ RET=0
+
+ $IP nexthop add id 10 group 1/2 type resilient \
+ buckets 8 idle_timer 6 unbalanced_timer 10
+ nexthop_res_mark_buckets_busy 10 1
+ $IP nexthop replace id 10 group 1/2,3 type resilient
+
+ for i in 1 2; do
+ sleep 4
+ nexthop_bucket_nhid_count_check 10 1 4 2 4
+ check_err $? "$i: Group expected to be unbalanced"
+ nexthop_res_mark_buckets_busy 10 1
+ done
+
+ # 3 x sleep 4 > unbalanced timer 10
+ sleep 4
+ nexthop_bucket_nhid_count_check 10 1 2 2 6
+ check_err $? "Group expected to be balanced"
+
+ log_test "Bucket migration after unbalanced timer"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_unbalanced_timer_del_test()
+{
+ local i
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 3 via 192.0.2.3 dev dummy1
+
+ RET=0
+
+ $IP nexthop add id 10 group 1,50/2,50/3,1 type resilient \
+ buckets 8 idle_timer 6 unbalanced_timer 10
+ nexthop_res_mark_buckets_busy 10 1
+ $IP nexthop replace id 10 group 1,50/2,150/3,1 type resilient
+
+ # Check that NH delete does not reset unbalanced time.
+ sleep 4
+ $IP nexthop delete id 3
+ nexthop_bucket_nhid_count_check 10 1 4 2 4
+ check_err $? "1: Group expected to be unbalanced"
+ nexthop_res_mark_buckets_busy 10 1
+
+ sleep 4
+ nexthop_bucket_nhid_count_check 10 1 4 2 4
+ check_err $? "2: Group expected to be unbalanced"
+ nexthop_res_mark_buckets_busy 10 1
+
+ # 3 x sleep 4 > unbalanced timer 10
+ sleep 4
+ nexthop_bucket_nhid_count_check 10 1 2 2 6
+ check_err $? "Group expected to be balanced"
+
+ log_test "Bucket migration after unbalanced timer (with delete)"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_no_unbalanced_timer_test()
+{
+ local i
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+ RET=0
+
+ $IP nexthop add id 10 group 1/2 type resilient buckets 8
+ nexthop_res_mark_buckets_busy 10 1
+ $IP nexthop replace id 10 group 1/2,3 type resilient
+
+ for i in $(seq 3); do
+ sleep 60
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_fail $? "$i: Group expected to be unbalanced"
+ nexthop_res_mark_buckets_busy 10 1
+ done
+
+ log_test "Buckets never force-migrated without unbalanced timer"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_short_unbalanced_timer_test()
+{
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+ RET=0
+
+ $IP nexthop add id 10 group 1/2 type resilient \
+ buckets 8 idle_timer 120 unbalanced_timer 4
+ nexthop_res_mark_buckets_busy 10 1
+ $IP nexthop replace id 10 group 1/2,3 type resilient
+
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_fail $? "Group expected to be unbalanced"
+
+ sleep 5
+
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_err $? "Group expected to be balanced"
+
+ log_test "Bucket migration after unbalanced < idle timer"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_increase_unbalanced_timer_test()
+{
+ __nexthop_res_group_increase_timer_test unbalanced_timer
+}
+
+nexthop_res_group_decrease_unbalanced_timer_test()
+{
+ __nexthop_res_group_decrease_timer_test unbalanced_timer
+}
+
+nexthop_res_group_force_migrate_busy_test()
+{
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+ RET=0
+
+ $IP nexthop add id 10 group 1/2 type resilient \
+ buckets 8 idle_timer 120
+ nexthop_res_mark_buckets_busy 10 1
+ $IP nexthop replace id 10 group 1/2,3 type resilient
+
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_fail $? "Group expected to be unbalanced"
+
+ $IP nexthop replace id 10 group 2 type resilient
+ nexthop_bucket_nhid_count_check 10 2 8
+ check_err $? "All buckets expected to have migrated"
+
+ log_test "Busy buckets force-migrated when NH removed"
+
+ $IP nexthop flush &> /dev/null
+}
+
nexthop_single_replace_test()
{
RET=0
@@ -299,6 +811,63 @@ nexthop_single_in_group_replace_err_test()
nexthop_resource_set 9999
}
+nexthop_single_in_res_group_replace_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 10 group 1/2 type resilient buckets 4
+
+ $IP nexthop replace id 1 via 192.0.2.4 dev dummy1
+ check_err $? "Failed to replace nexthop when should not"
+
+ nexthop_check "id 10" "id 10 group 1/2 type resilient buckets 4 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+ check_err $? "Unexpected nexthop group entry"
+
+ nexthop_bucket_nhid_count_check 10 1 2 2 2
+ check_err $? "Wrong nexthop buckets count"
+
+ nexthop_resource_check 6
+ check_err $? "Wrong nexthop occupancy"
+
+ log_test "Single nexthop replace while in resilient group"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_single_in_res_group_replace_err_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 10 group 1/2 type resilient buckets 4
+
+ ip netns exec testns1 \
+ echo 1 > $DEBUGFS_NET_DIR/fib/fail_nexthop_bucket_replace
+ $IP nexthop replace id 1 via 192.0.2.4 dev dummy1 &> /dev/null
+ check_fail $? "Nexthop replacement succeeded when should fail"
+
+ nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap"
+ check_err $? "Unexpected nexthop entry after failure"
+
+ nexthop_check "id 10" "id 10 group 1/2 type resilient buckets 4 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+ check_err $? "Unexpected nexthop group entry after failure"
+
+ nexthop_bucket_nhid_count_check 10 1 2 2 2
+ check_err $? "Wrong nexthop buckets count"
+
+ nexthop_resource_check 6
+ check_err $? "Wrong nexthop occupancy"
+
+ log_test "Single nexthop replace while in resilient group failure"
+
+ $IP nexthop flush &> /dev/null
+ ip netns exec testns1 \
+ echo 0 > $DEBUGFS_NET_DIR/fib/fail_nexthop_bucket_replace
+}
+
nexthop_single_in_group_delete_test()
{
RET=0
@@ -346,6 +915,57 @@ nexthop_single_in_group_delete_err_test()
nexthop_resource_set 9999
}
+nexthop_single_in_res_group_delete_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 10 group 1/2 type resilient buckets 4
+
+ $IP nexthop del id 1
+ nexthop_check "id 10" "id 10 group 2 type resilient buckets 4 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+ check_err $? "Unexpected nexthop group entry"
+
+ nexthop_bucket_nhid_count_check 10 2 4
+ check_err $? "Wrong nexthop buckets count"
+
+ nexthop_resource_check 5
+ check_err $? "Wrong nexthop occupancy"
+
+ log_test "Single nexthop delete while in resilient group"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_single_in_res_group_delete_err_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 3 via 192.0.2.4 dev dummy1
+ $IP nexthop add id 10 group 1/2/3 type resilient buckets 6
+
+ ip netns exec testns1 \
+ echo 1 > $DEBUGFS_NET_DIR/fib/fail_nexthop_bucket_replace
+ $IP nexthop del id 1
+
+ # We failed to replace the two nexthop buckets that were originally
+ # assigned to nhid 1.
+ nexthop_bucket_nhid_count_check 10 2 2 3 2
+ check_err $? "Wrong nexthop buckets count"
+
+ nexthop_resource_check 8
+ check_err $? "Wrong nexthop occupancy"
+
+ log_test "Single nexthop delete while in resilient group failure"
+
+ $IP nexthop flush &> /dev/null
+ ip netns exec testns1 \
+ echo 0 > $DEBUGFS_NET_DIR/fib/fail_nexthop_bucket_replace
+}
+
nexthop_replay_test()
{
RET=0
diff --git a/tools/testing/selftests/drivers/net/netdevsim/psample.sh b/tools/testing/selftests/drivers/net/netdevsim/psample.sh
new file mode 100755
index 000000000000..ee10b1a8933c
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/psample.sh
@@ -0,0 +1,181 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test is for checking the psample module. It makes use of netdevsim
+# which periodically generates "sampled" packets.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ psample_enable_test
+ psample_group_num_test
+ psample_md_test
+"
+NETDEVSIM_PATH=/sys/bus/netdevsim/
+DEV_ADDR=1337
+DEV=netdevsim${DEV_ADDR}
+DEVLINK_DEV=netdevsim/${DEV}
+SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/
+PSAMPLE_DIR=/sys/kernel/debug/netdevsim/$DEV/psample/
+CAPTURE_FILE=$(mktemp)
+NUM_NETIFS=0
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+# Available at https://github.com/Mellanox/libpsample
+require_command psample
+
+psample_capture()
+{
+ rm -f $CAPTURE_FILE
+
+ timeout 2 ip netns exec testns1 psample &> $CAPTURE_FILE
+}
+
+psample_enable_test()
+{
+ RET=0
+
+ echo 1 > $PSAMPLE_DIR/enable
+ check_err $? "Failed to enable sampling when should not"
+
+ echo 1 > $PSAMPLE_DIR/enable 2>/dev/null
+ check_fail $? "Sampling enablement succeeded when should fail"
+
+ psample_capture
+ if [ $(cat $CAPTURE_FILE | wc -l) -eq 0 ]; then
+ check_err 1 "Failed to capture sampled packets"
+ fi
+
+ echo 0 > $PSAMPLE_DIR/enable
+ check_err $? "Failed to disable sampling when should not"
+
+ echo 0 > $PSAMPLE_DIR/enable 2>/dev/null
+ check_fail $? "Sampling disablement succeeded when should fail"
+
+ psample_capture
+ if [ $(cat $CAPTURE_FILE | wc -l) -ne 0 ]; then
+ check_err 1 "Captured sampled packets when should not"
+ fi
+
+ log_test "psample enable / disable"
+}
+
+psample_group_num_test()
+{
+ RET=0
+
+ echo 1234 > $PSAMPLE_DIR/group_num
+ echo 1 > $PSAMPLE_DIR/enable
+
+ psample_capture
+ grep -q -e "group 1234" $CAPTURE_FILE
+ check_err $? "Sampled packets reported with wrong group number"
+
+ # New group number should only be used after disable / enable.
+ echo 4321 > $PSAMPLE_DIR/group_num
+
+ psample_capture
+ grep -q -e "group 4321" $CAPTURE_FILE
+ check_fail $? "Group number changed while sampling is active"
+
+ echo 0 > $PSAMPLE_DIR/enable && echo 1 > $PSAMPLE_DIR/enable
+
+ psample_capture
+ grep -q -e "group 4321" $CAPTURE_FILE
+ check_err $? "Group number did not change after restarting sampling"
+
+ log_test "psample group number"
+
+ echo 0 > $PSAMPLE_DIR/enable
+}
+
+psample_md_test()
+{
+ RET=0
+
+ echo 1 > $PSAMPLE_DIR/enable
+
+ echo 1234 > $PSAMPLE_DIR/in_ifindex
+ echo 4321 > $PSAMPLE_DIR/out_ifindex
+ psample_capture
+
+ grep -q -e "in-ifindex 1234" $CAPTURE_FILE
+ check_err $? "Sampled packets reported with wrong in-ifindex"
+
+ grep -q -e "out-ifindex 4321" $CAPTURE_FILE
+ check_err $? "Sampled packets reported with wrong out-ifindex"
+
+ echo 5 > $PSAMPLE_DIR/out_tc
+ psample_capture
+
+ grep -q -e "out-tc 5" $CAPTURE_FILE
+ check_err $? "Sampled packets reported with wrong out-tc"
+
+ echo $((2**16 - 1)) > $PSAMPLE_DIR/out_tc
+ psample_capture
+
+ grep -q -e "out-tc " $CAPTURE_FILE
+ check_fail $? "Sampled packets reported with out-tc when should not"
+
+ echo 1 > $PSAMPLE_DIR/out_tc
+ echo 10000 > $PSAMPLE_DIR/out_tc_occ_max
+ psample_capture
+
+ grep -q -e "out-tc-occ " $CAPTURE_FILE
+ check_err $? "Sampled packets not reported with out-tc-occ when should"
+
+ echo 0 > $PSAMPLE_DIR/out_tc_occ_max
+ psample_capture
+
+ grep -q -e "out-tc-occ " $CAPTURE_FILE
+ check_fail $? "Sampled packets reported with out-tc-occ when should not"
+
+ echo 10000 > $PSAMPLE_DIR/latency_max
+ psample_capture
+
+ grep -q -e "latency " $CAPTURE_FILE
+ check_err $? "Sampled packets not reported with latency when should"
+
+ echo 0 > $PSAMPLE_DIR/latency_max
+ psample_capture
+
+ grep -q -e "latency " $CAPTURE_FILE
+ check_fail $? "Sampled packets reported with latency when should not"
+
+ log_test "psample metadata"
+
+ echo 0 > $PSAMPLE_DIR/enable
+}
+
+setup_prepare()
+{
+ modprobe netdevsim &> /dev/null
+
+ echo "$DEV_ADDR 1" > ${NETDEVSIM_PATH}/new_device
+ while [ ! -d $SYSFS_NET_DIR ] ; do :; done
+
+ set -e
+
+ ip netns add testns1
+ devlink dev reload $DEVLINK_DEV netns testns1
+
+ set +e
+}
+
+cleanup()
+{
+ pre_cleanup
+ rm -f $CAPTURE_FILE
+ ip netns del testns1
+ echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device
+ modprobe -r netdevsim &> /dev/null
+}
+
+trap cleanup EXIT
+
+setup_prepare
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc
index 59216f3cfb12..2968cdc7df30 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc
@@ -32,6 +32,10 @@ grep "myevent[[:space:]]u64 var1" synthetic_events
# it is not possible to add same name event
! echo "myevent u64 var2" >> synthetic_events
+# make sure !synthetic event doesn't require a field
+echo "!myevent" >> synthetic_events
+echo "myevent u64 var1" >> synthetic_events
+
# Non-append open will cleanup all events and add new one
echo "myevent u64 var2" > synthetic_events
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc
index ada594fe16cb..955e3ceea44b 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc
@@ -1,19 +1,38 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test synthetic_events syntax parser errors
-# requires: synthetic_events error_log
+# requires: synthetic_events error_log "char name[]' >> synthetic_events":README
check_error() { # command-with-error-pos-by-^
ftrace_errlog_check 'synthetic_events' "$1" 'synthetic_events'
}
+check_dyn_error() { # command-with-error-pos-by-^
+ ftrace_errlog_check 'synthetic_events' "$1" 'dynamic_events'
+}
+
check_error 'myevent ^chr arg' # INVALID_TYPE
-check_error 'myevent ^char str[];; int v' # INVALID_TYPE
-check_error 'myevent char ^str]; int v' # INVALID_NAME
-check_error 'myevent char ^str;[]' # INVALID_NAME
-check_error 'myevent ^char str[; int v' # INVALID_TYPE
-check_error '^mye;vent char str[]' # BAD_NAME
-check_error 'myevent char str[]; ^int' # INVALID_FIELD
-check_error '^myevent' # INCOMPLETE_CMD
+check_error 'myevent ^unsigned arg' # INCOMPLETE_TYPE
+
+check_error 'myevent char ^str]; int v' # BAD_NAME
+check_error '^mye-vent char str[]' # BAD_NAME
+check_error 'myevent char ^st-r[]' # BAD_NAME
+
+check_error 'myevent char str;^[]' # INVALID_FIELD
+check_error 'myevent char str; ^int' # INVALID_FIELD
+
+check_error 'myevent char ^str[; int v' # INVALID_ARRAY_SPEC
+check_error 'myevent char ^str[kdjdk]' # INVALID_ARRAY_SPEC
+check_error 'myevent char ^str[257]' # INVALID_ARRAY_SPEC
+
+check_error '^mye;vent char str[]' # INVALID_CMD
+check_error '^myevent ; char str[]' # INVALID_CMD
+check_error '^myevent; char str[]' # INVALID_CMD
+check_error '^myevent ;char str[]' # INVALID_CMD
+check_error '^; char str[]' # INVALID_CMD
+check_error '^;myevent char str[]' # INVALID_CMD
+check_error '^myevent' # INVALID_CMD
+
+check_dyn_error '^s:junk/myevent char str[' # INVALID_DYN_CMD
exit 0
diff --git a/tools/testing/selftests/gpio/.gitignore b/tools/testing/selftests/gpio/.gitignore
index 4c69408f3e84..a4969f7ee020 100644
--- a/tools/testing/selftests/gpio/.gitignore
+++ b/tools/testing/selftests/gpio/.gitignore
@@ -1,2 +1,2 @@
# SPDX-License-Identifier: GPL-2.0-only
-gpio-mockup-chardev
+gpio-mockup-cdev
diff --git a/tools/testing/selftests/gpio/Makefile b/tools/testing/selftests/gpio/Makefile
index 41582fe485ee..39f2bbe8dd3d 100644
--- a/tools/testing/selftests/gpio/Makefile
+++ b/tools/testing/selftests/gpio/Makefile
@@ -1,31 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
-VAR_CFLAGS := $(shell pkg-config --cflags mount 2>/dev/null)
-VAR_LDLIBS := $(shell pkg-config --libs mount 2>/dev/null)
-ifeq ($(VAR_LDLIBS),)
-VAR_LDLIBS := -lmount -I/usr/include/libmount
-endif
-
-CFLAGS += -O2 -g -std=gnu99 -Wall -I../../../../usr/include/ $(VAR_CFLAGS)
-LDLIBS += $(VAR_LDLIBS)
-
TEST_PROGS := gpio-mockup.sh
TEST_FILES := gpio-mockup-sysfs.sh
-TEST_GEN_PROGS_EXTENDED := gpio-mockup-chardev
+TEST_GEN_PROGS_EXTENDED := gpio-mockup-cdev
-KSFT_KHDR_INSTALL := 1
include ../lib.mk
-
-GPIODIR := $(realpath ../../../gpio)
-GPIOOUT := $(OUTPUT)/tools-gpio/
-GPIOOBJ := $(GPIOOUT)/gpio-utils.o
-
-CLEAN += ; $(RM) -rf $(GPIOOUT)
-
-$(TEST_GEN_PROGS_EXTENDED): $(GPIOOBJ)
-
-$(GPIOOUT):
- mkdir -p $@
-
-$(GPIOOBJ): $(GPIOOUT)
- $(MAKE) OUTPUT=$(GPIOOUT) -C $(GPIODIR)
diff --git a/tools/testing/selftests/gpio/config b/tools/testing/selftests/gpio/config
index abaa6902b7b6..ce100342c20b 100644
--- a/tools/testing/selftests/gpio/config
+++ b/tools/testing/selftests/gpio/config
@@ -1,2 +1,3 @@
CONFIG_GPIOLIB=y
+CONFIG_GPIO_CDEV=y
CONFIG_GPIO_MOCKUP=m
diff --git a/tools/testing/selftests/gpio/gpio-mockup-cdev.c b/tools/testing/selftests/gpio/gpio-mockup-cdev.c
new file mode 100644
index 000000000000..e83eac71621a
--- /dev/null
+++ b/tools/testing/selftests/gpio/gpio-mockup-cdev.c
@@ -0,0 +1,198 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * GPIO mockup cdev test helper
+ *
+ * Copyright (C) 2020 Kent Gibson
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <linux/gpio.h>
+
+#define CONSUMER "gpio-mockup-cdev"
+
+static int request_line_v2(int cfd, unsigned int offset,
+ uint64_t flags, unsigned int val)
+{
+ struct gpio_v2_line_request req;
+ int ret;
+
+ memset(&req, 0, sizeof(req));
+ req.num_lines = 1;
+ req.offsets[0] = offset;
+ req.config.flags = flags;
+ strcpy(req.consumer, CONSUMER);
+ if (flags & GPIO_V2_LINE_FLAG_OUTPUT) {
+ req.config.num_attrs = 1;
+ req.config.attrs[0].mask = 1;
+ req.config.attrs[0].attr.id = GPIO_V2_LINE_ATTR_ID_OUTPUT_VALUES;
+ if (val)
+ req.config.attrs[0].attr.values = 1;
+ }
+ ret = ioctl(cfd, GPIO_V2_GET_LINE_IOCTL, &req);
+ if (ret == -1)
+ return -errno;
+ return req.fd;
+}
+
+
+static int get_value_v2(int lfd)
+{
+ struct gpio_v2_line_values vals;
+ int ret;
+
+ memset(&vals, 0, sizeof(vals));
+ vals.mask = 1;
+ ret = ioctl(lfd, GPIO_V2_LINE_GET_VALUES_IOCTL, &vals);
+ if (ret == -1)
+ return -errno;
+ return vals.bits & 0x1;
+}
+
+static int request_line_v1(int cfd, unsigned int offset,
+ uint32_t flags, unsigned int val)
+{
+ struct gpiohandle_request req;
+ int ret;
+
+ memset(&req, 0, sizeof(req));
+ req.lines = 1;
+ req.lineoffsets[0] = offset;
+ req.flags = flags;
+ strcpy(req.consumer_label, CONSUMER);
+ if (flags & GPIOHANDLE_REQUEST_OUTPUT)
+ req.default_values[0] = val;
+
+ ret = ioctl(cfd, GPIO_GET_LINEHANDLE_IOCTL, &req);
+ if (ret == -1)
+ return -errno;
+ return req.fd;
+}
+
+static int get_value_v1(int lfd)
+{
+ struct gpiohandle_data vals;
+ int ret;
+
+ memset(&vals, 0, sizeof(vals));
+ ret = ioctl(lfd, GPIOHANDLE_GET_LINE_VALUES_IOCTL, &vals);
+ if (ret == -1)
+ return -errno;
+ return vals.values[0];
+}
+
+static void usage(char *prog)
+{
+ printf("Usage: %s [-l] [-b <bias>] [-s <value>] [-u <uAPI>] <gpiochip> <offset>\n", prog);
+ printf(" -b: set line bias to one of pull-down, pull-up, disabled\n");
+ printf(" (default is to leave bias unchanged):\n");
+ printf(" -l: set line active low (default is active high)\n");
+ printf(" -s: set line value (default is to get line value)\n");
+ printf(" -u: uAPI version to use (default is 2)\n");
+ exit(-1);
+}
+
+static int wait_signal(void)
+{
+ int sig;
+ sigset_t wset;
+
+ sigemptyset(&wset);
+ sigaddset(&wset, SIGHUP);
+ sigaddset(&wset, SIGINT);
+ sigaddset(&wset, SIGTERM);
+ sigwait(&wset, &sig);
+
+ return sig;
+}
+
+int main(int argc, char *argv[])
+{
+ char *chip;
+ int opt, ret, cfd, lfd;
+ unsigned int offset, val, abiv;
+ uint32_t flags_v1;
+ uint64_t flags_v2;
+
+ abiv = 2;
+ ret = 0;
+ flags_v1 = GPIOHANDLE_REQUEST_INPUT;
+ flags_v2 = GPIO_V2_LINE_FLAG_INPUT;
+
+ while ((opt = getopt(argc, argv, "lb:s:u:")) != -1) {
+ switch (opt) {
+ case 'l':
+ flags_v1 |= GPIOHANDLE_REQUEST_ACTIVE_LOW;
+ flags_v2 |= GPIO_V2_LINE_FLAG_ACTIVE_LOW;
+ break;
+ case 'b':
+ if (strcmp("pull-up", optarg) == 0) {
+ flags_v1 |= GPIOHANDLE_REQUEST_BIAS_PULL_UP;
+ flags_v2 |= GPIO_V2_LINE_FLAG_BIAS_PULL_UP;
+ } else if (strcmp("pull-down", optarg) == 0) {
+ flags_v1 |= GPIOHANDLE_REQUEST_BIAS_PULL_DOWN;
+ flags_v2 |= GPIO_V2_LINE_FLAG_BIAS_PULL_DOWN;
+ } else if (strcmp("disabled", optarg) == 0) {
+ flags_v1 |= GPIOHANDLE_REQUEST_BIAS_DISABLE;
+ flags_v2 |= GPIO_V2_LINE_FLAG_BIAS_DISABLED;
+ }
+ break;
+ case 's':
+ val = atoi(optarg);
+ flags_v1 &= ~GPIOHANDLE_REQUEST_INPUT;
+ flags_v1 |= GPIOHANDLE_REQUEST_OUTPUT;
+ flags_v2 &= ~GPIO_V2_LINE_FLAG_INPUT;
+ flags_v2 |= GPIO_V2_LINE_FLAG_OUTPUT;
+ break;
+ case 'u':
+ abiv = atoi(optarg);
+ break;
+ default:
+ usage(argv[0]);
+ }
+ }
+
+ if (argc < optind + 2)
+ usage(argv[0]);
+
+ chip = argv[optind];
+ offset = atoi(argv[optind + 1]);
+
+ cfd = open(chip, 0);
+ if (cfd == -1) {
+ fprintf(stderr, "Failed to open %s: %s\n", chip, strerror(errno));
+ return -errno;
+ }
+
+ if (abiv == 1)
+ lfd = request_line_v1(cfd, offset, flags_v1, val);
+ else
+ lfd = request_line_v2(cfd, offset, flags_v2, val);
+
+ close(cfd);
+
+ if (lfd < 0) {
+ fprintf(stderr, "Failed to request %s:%d: %s\n", chip, offset, strerror(-lfd));
+ return lfd;
+ }
+
+ if (flags_v2 & GPIO_V2_LINE_FLAG_OUTPUT) {
+ wait_signal();
+ } else {
+ if (abiv == 1)
+ ret = get_value_v1(lfd);
+ else
+ ret = get_value_v2(lfd);
+ }
+
+ close(lfd);
+
+ return ret;
+}
diff --git a/tools/testing/selftests/gpio/gpio-mockup-chardev.c b/tools/testing/selftests/gpio/gpio-mockup-chardev.c
deleted file mode 100644
index 73ead8828d3a..000000000000
--- a/tools/testing/selftests/gpio/gpio-mockup-chardev.c
+++ /dev/null
@@ -1,323 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * GPIO chardev test helper
- *
- * Copyright (C) 2016 Bamvor Jian Zhang
- */
-
-#define _GNU_SOURCE
-#include <unistd.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <string.h>
-#include <fcntl.h>
-#include <getopt.h>
-#include <sys/ioctl.h>
-#include <libmount.h>
-#include <err.h>
-#include <dirent.h>
-#include <linux/gpio.h>
-#include "../../../gpio/gpio-utils.h"
-
-#define CONSUMER "gpio-selftest"
-#define GC_NUM 10
-enum direction {
- OUT,
- IN
-};
-
-static int get_debugfs(char **path)
-{
- struct libmnt_context *cxt;
- struct libmnt_table *tb;
- struct libmnt_iter *itr = NULL;
- struct libmnt_fs *fs;
- int found = 0, ret;
-
- cxt = mnt_new_context();
- if (!cxt)
- err(EXIT_FAILURE, "libmount context allocation failed");
-
- itr = mnt_new_iter(MNT_ITER_FORWARD);
- if (!itr)
- err(EXIT_FAILURE, "failed to initialize libmount iterator");
-
- if (mnt_context_get_mtab(cxt, &tb))
- err(EXIT_FAILURE, "failed to read mtab");
-
- while (mnt_table_next_fs(tb, itr, &fs) == 0) {
- const char *type = mnt_fs_get_fstype(fs);
-
- if (!strcmp(type, "debugfs")) {
- found = 1;
- break;
- }
- }
- if (found) {
- ret = asprintf(path, "%s/gpio", mnt_fs_get_target(fs));
- if (ret < 0)
- err(EXIT_FAILURE, "failed to format string");
- }
-
- mnt_free_iter(itr);
- mnt_free_context(cxt);
-
- if (!found)
- return -1;
-
- return 0;
-}
-
-static int gpio_debugfs_get(const char *consumer, int *dir, int *value)
-{
- char *debugfs;
- FILE *f;
- char *line = NULL;
- size_t len = 0;
- char *cur;
- int found = 0;
-
- if (get_debugfs(&debugfs) != 0)
- err(EXIT_FAILURE, "debugfs is not mounted");
-
- f = fopen(debugfs, "r");
- if (!f)
- err(EXIT_FAILURE, "read from gpio debugfs failed");
-
- /*
- * gpio-2 ( |gpio-selftest ) in lo
- */
- while (getline(&line, &len, f) != -1) {
- cur = strstr(line, consumer);
- if (cur == NULL)
- continue;
-
- cur = strchr(line, ')');
- if (!cur)
- continue;
-
- cur += 2;
- if (!strncmp(cur, "out", 3)) {
- *dir = OUT;
- cur += 4;
- } else if (!strncmp(cur, "in", 2)) {
- *dir = IN;
- cur += 4;
- }
-
- if (!strncmp(cur, "hi", 2))
- *value = 1;
- else if (!strncmp(cur, "lo", 2))
- *value = 0;
-
- found = 1;
- break;
- }
- free(debugfs);
- fclose(f);
- free(line);
-
- if (!found)
- return -1;
-
- return 0;
-}
-
-static struct gpiochip_info *list_gpiochip(const char *gpiochip_name, int *ret)
-{
- struct gpiochip_info *cinfo;
- struct gpiochip_info *current;
- const struct dirent *ent;
- DIR *dp;
- char *chrdev_name;
- int fd;
- int i = 0;
-
- cinfo = calloc(sizeof(struct gpiochip_info) * 4, GC_NUM + 1);
- if (!cinfo)
- err(EXIT_FAILURE, "gpiochip_info allocation failed");
-
- current = cinfo;
- dp = opendir("/dev");
- if (!dp) {
- *ret = -errno;
- goto error_out;
- } else {
- *ret = 0;
- }
-
- while (ent = readdir(dp), ent) {
- if (check_prefix(ent->d_name, "gpiochip")) {
- *ret = asprintf(&chrdev_name, "/dev/%s", ent->d_name);
- if (*ret < 0)
- goto error_out;
-
- fd = open(chrdev_name, 0);
- if (fd == -1) {
- *ret = -errno;
- fprintf(stderr, "Failed to open %s\n",
- chrdev_name);
- goto error_close_dir;
- }
- *ret = ioctl(fd, GPIO_GET_CHIPINFO_IOCTL, current);
- if (*ret == -1) {
- perror("Failed to issue CHIPINFO IOCTL\n");
- goto error_close_dir;
- }
- close(fd);
- if (strcmp(current->label, gpiochip_name) == 0
- || check_prefix(current->label, gpiochip_name)) {
- *ret = 0;
- current++;
- i++;
- }
- }
- }
-
- if ((!*ret && i == 0) || *ret < 0) {
- free(cinfo);
- cinfo = NULL;
- }
- if (!*ret && i > 0) {
- cinfo = realloc(cinfo, sizeof(struct gpiochip_info) * 4 * i);
- *ret = i;
- }
-
-error_close_dir:
- closedir(dp);
-error_out:
- if (*ret < 0)
- err(EXIT_FAILURE, "list gpiochip failed: %s", strerror(*ret));
-
- return cinfo;
-}
-
-int gpio_pin_test(struct gpiochip_info *cinfo, int line, int flag, int value)
-{
- struct gpiohandle_data data;
- unsigned int lines[] = {line};
- int fd;
- int debugfs_dir = IN;
- int debugfs_value = 0;
- int ret;
-
- data.values[0] = value;
- ret = gpiotools_request_linehandle(cinfo->name, lines, 1, flag, &data,
- CONSUMER);
- if (ret < 0)
- goto fail_out;
- else
- fd = ret;
-
- ret = gpio_debugfs_get(CONSUMER, &debugfs_dir, &debugfs_value);
- if (ret) {
- ret = -EINVAL;
- goto fail_out;
- }
- if (flag & GPIOHANDLE_REQUEST_INPUT) {
- if (debugfs_dir != IN) {
- errno = -EINVAL;
- ret = -errno;
- }
- } else if (flag & GPIOHANDLE_REQUEST_OUTPUT) {
- if (flag & GPIOHANDLE_REQUEST_ACTIVE_LOW)
- debugfs_value = !debugfs_value;
-
- if (!(debugfs_dir == OUT && value == debugfs_value)) {
- errno = -EINVAL;
- ret = -errno;
- }
- }
- gpiotools_release_linehandle(fd);
-
-fail_out:
- if (ret)
- err(EXIT_FAILURE, "gpio<%s> line<%d> test flag<0x%x> value<%d>",
- cinfo->name, line, flag, value);
-
- return ret;
-}
-
-void gpio_pin_tests(struct gpiochip_info *cinfo, unsigned int line)
-{
- printf("line<%d>", line);
- gpio_pin_test(cinfo, line, GPIOHANDLE_REQUEST_OUTPUT, 0);
- printf(".");
- gpio_pin_test(cinfo, line, GPIOHANDLE_REQUEST_OUTPUT, 1);
- printf(".");
- gpio_pin_test(cinfo, line,
- GPIOHANDLE_REQUEST_OUTPUT | GPIOHANDLE_REQUEST_ACTIVE_LOW,
- 0);
- printf(".");
- gpio_pin_test(cinfo, line,
- GPIOHANDLE_REQUEST_OUTPUT | GPIOHANDLE_REQUEST_ACTIVE_LOW,
- 1);
- printf(".");
- gpio_pin_test(cinfo, line, GPIOHANDLE_REQUEST_INPUT, 0);
- printf(".");
-}
-
-/*
- * ./gpio-mockup-chardev gpio_chip_name_prefix is_valid_gpio_chip
- * Return 0 if successful or exit with EXIT_FAILURE if test failed.
- * gpio_chip_name_prefix: The prefix of gpiochip you want to test. E.g.
- * gpio-mockup
- * is_valid_gpio_chip: Whether the gpio_chip is valid. 1 means valid,
- * 0 means invalid which could not be found by
- * list_gpiochip.
- */
-int main(int argc, char *argv[])
-{
- char *prefix;
- int valid;
- struct gpiochip_info *cinfo;
- struct gpiochip_info *current;
- int i;
- int ret;
-
- if (argc < 3) {
- printf("Usage: %s prefix is_valid", argv[0]);
- exit(EXIT_FAILURE);
- }
-
- prefix = argv[1];
- valid = strcmp(argv[2], "true") == 0 ? 1 : 0;
-
- printf("Test gpiochip %s: ", prefix);
- cinfo = list_gpiochip(prefix, &ret);
- if (!cinfo) {
- if (!valid && ret == 0) {
- printf("Invalid test successful\n");
- ret = 0;
- goto out;
- } else {
- ret = -EINVAL;
- goto out;
- }
- } else if (cinfo && !valid) {
- ret = -EINVAL;
- goto out;
- }
- current = cinfo;
- for (i = 0; i < ret; i++) {
- gpio_pin_tests(current, 0);
- gpio_pin_tests(current, current->lines - 1);
- gpio_pin_tests(current, random() % current->lines);
- current++;
- }
- ret = 0;
- printf("successful\n");
-
-out:
- if (ret)
- fprintf(stderr, "gpio<%s> test failed\n", prefix);
-
- if (cinfo)
- free(cinfo);
-
- if (ret)
- exit(EXIT_FAILURE);
-
- return ret;
-}
diff --git a/tools/testing/selftests/gpio/gpio-mockup-sysfs.sh b/tools/testing/selftests/gpio/gpio-mockup-sysfs.sh
index dd269d877562..2d2e5d8763b6 100755
--- a/tools/testing/selftests/gpio/gpio-mockup-sysfs.sh
+++ b/tools/testing/selftests/gpio/gpio-mockup-sysfs.sh
@@ -1,135 +1,77 @@
# SPDX-License-Identifier: GPL-2.0
-is_consistent()
-{
- val=
-
- active_low_sysfs=`cat $GPIO_SYSFS/gpio$nr/active_low`
- val_sysfs=`cat $GPIO_SYSFS/gpio$nr/value`
- dir_sysfs=`cat $GPIO_SYSFS/gpio$nr/direction`
- gpio_this_debugfs=`cat $GPIO_DEBUGFS |grep "gpio-$nr" | sed "s/(.*)//g"`
- dir_debugfs=`echo $gpio_this_debugfs | awk '{print $2}'`
- val_debugfs=`echo $gpio_this_debugfs | awk '{print $3}'`
- if [ $val_debugfs = "lo" ]; then
- val=0
- elif [ $val_debugfs = "hi" ]; then
- val=1
- fi
+# Overrides functions in gpio-mockup.sh to test using the GPIO SYSFS uAPI
- if [ $active_low_sysfs = "1" ]; then
- if [ $val = "0" ]; then
- val="1"
- else
- val="0"
- fi
- fi
+SYSFS=`grep -w sysfs /proc/mounts | cut -f2 -d' '`
+[ -d "$SYSFS" ] || skip "sysfs is not mounted"
- if [ $val_sysfs = $val ] && [ $dir_sysfs = $dir_debugfs ]; then
- echo -n "."
- else
- echo "test fail, exit"
- die
- fi
-}
+GPIO_SYSFS="${SYSFS}/class/gpio"
+[ -d "$GPIO_SYSFS" ] || skip "CONFIG_GPIO_SYSFS is not selected"
-test_pin_logic()
-{
- nr=$1
- direction=$2
- active_low=$3
- value=$4
+PLATFORM_SYSFS=$SYSFS/devices/platform
- echo $direction > $GPIO_SYSFS/gpio$nr/direction
- echo $active_low > $GPIO_SYSFS/gpio$nr/active_low
- if [ $direction = "out" ]; then
- echo $value > $GPIO_SYSFS/gpio$nr/value
- fi
- is_consistent $nr
-}
+sysfs_nr=
+sysfs_ldir=
-test_one_pin()
+# determine the sysfs GPIO number given the $chip and $offset
+# e.g. gpiochip1:32
+find_sysfs_nr()
{
- nr=$1
-
- echo -n "test pin<$nr>"
-
- echo $nr > $GPIO_SYSFS/export 2>/dev/null
-
- if [ X$? != X0 ]; then
- echo "test GPIO pin $nr failed"
- die
- fi
-
- #"Checking if the sysfs is consistent with debugfs: "
- is_consistent $nr
-
- #"Checking the logic of active_low: "
- test_pin_logic $nr out 1 1
- test_pin_logic $nr out 1 0
- test_pin_logic $nr out 0 1
- test_pin_logic $nr out 0 0
-
- #"Checking the logic of direction: "
- test_pin_logic $nr in 1 1
- test_pin_logic $nr out 1 0
- test_pin_logic $nr low 0 1
- test_pin_logic $nr high 0 0
-
- echo $nr > $GPIO_SYSFS/unexport
-
- echo "successful"
+ # e.g. /sys/devices/platform/gpio-mockup.1/gpiochip1
+ local platform=$(find $PLATFORM_SYSFS -mindepth 2 -maxdepth 2 -type d -name $chip)
+ [ "$platform" ] || fail "can't find platform of $chip"
+ # e.g. /sys/devices/platform/gpio-mockup.1/gpio/gpiochip508/base
+ local base=$(find ${platform%/*}/gpio/ -mindepth 2 -maxdepth 2 -type f -name base)
+ [ "$base" ] || fail "can't find base of $chip"
+ sysfs_nr=$(($(< "$base") + $offset))
+ sysfs_ldir="$GPIO_SYSFS/gpio$sysfs_nr"
}
-test_one_pin_fail()
+acquire_line()
{
- nr=$1
-
- echo $nr > $GPIO_SYSFS/export 2>/dev/null
-
- if [ X$? != X0 ]; then
- echo "test invalid pin $nr successful"
- else
- echo "test invalid pin $nr failed"
- echo $nr > $GPIO_SYSFS/unexport 2>/dev/null
- die
- fi
+ [ "$sysfs_nr" ] && return
+ find_sysfs_nr
+ echo "$sysfs_nr" > "$GPIO_SYSFS/export"
}
-list_chip()
+# The helpers being overridden...
+get_line()
{
- echo `ls -d $GPIO_DRV_SYSFS/gpiochip* 2>/dev/null`
+ [ -e "$sysfs_ldir/value" ] && echo $(< "$sysfs_ldir/value")
}
-test_chip()
+set_line()
{
- chip=$1
- name=`basename $chip`
- base=`cat $chip/base`
- ngpio=`cat $chip/ngpio`
- printf "%-10s %-5s %-5s\n" $name $base $ngpio
- if [ $ngpio = "0" ]; then
- echo "number of gpio is zero is not allowed".
- fi
- test_one_pin $base
- test_one_pin $(($base + $ngpio - 1))
- test_one_pin $((( RANDOM % $ngpio ) + $base ))
+ acquire_line
+
+ for option in $*; do
+ case $option in
+ active-high)
+ echo 0 > "$sysfs_ldir/active_low"
+ ;;
+ active-low)
+ echo 1 > "$sysfs_ldir/active_low"
+ ;;
+ input)
+ echo "in" > "$sysfs_ldir/direction"
+ ;;
+ 0)
+ echo "out" > "$sysfs_ldir/direction"
+ echo 0 > "$sysfs_ldir/value"
+ ;;
+ 1)
+ echo "out" > "$sysfs_ldir/direction"
+ echo 1 > "$sysfs_ldir/value"
+ ;;
+ esac
+ done
}
-test_chips_sysfs()
+release_line()
{
- gpiochip=`list_chip $module`
- if [ X"$gpiochip" = X ]; then
- if [ X"$valid" = Xfalse ]; then
- echo "successful"
- else
- echo "fail"
- die
- fi
- else
- for chip in $gpiochip; do
- test_chip $chip
- done
- fi
+ [ "$sysfs_nr" ] || return 0
+ echo "$sysfs_nr" > "$GPIO_SYSFS/unexport"
+ sysfs_nr=
+ sysfs_ldir=
}
-
diff --git a/tools/testing/selftests/gpio/gpio-mockup.sh b/tools/testing/selftests/gpio/gpio-mockup.sh
index 7f35b9880485..0d6c5f7f95d2 100755
--- a/tools/testing/selftests/gpio/gpio-mockup.sh
+++ b/tools/testing/selftests/gpio/gpio-mockup.sh
@@ -1,72 +1,57 @@
-#!/bin/bash
+#!/bin/bash -efu
# SPDX-License-Identifier: GPL-2.0
#exit status
-#1: Internal error
-#2: sysfs/debugfs not mount
-#3: insert module fail when gpio-mockup is a module.
-#4: Skip test including run as non-root user.
-#5: other reason.
-
-SYSFS=
-GPIO_SYSFS=
-GPIO_DRV_SYSFS=
+#0: success
+#1: fail
+#4: skip test - including run as non-root user
+
+BASE=${0%/*}
DEBUGFS=
GPIO_DEBUGFS=
-dev_type=
-module=
+dev_type="cdev"
+module="gpio-mockup"
+verbose=
+full_test=
+random=
+uapi_opt=
+active_opt=
+bias_opt=
+line_set_pid=
-# Kselftest framework requirement - SKIP code is 4.
+# Kselftest return codes
+ksft_fail=1
ksft_skip=4
usage()
{
echo "Usage:"
- echo "$0 [-f] [-m name] [-t type]"
- echo "-f: full test. It maybe conflict with existence gpio device."
- echo "-m: module name, default name is gpio-mockup. It could also test"
- echo " other gpio device."
- echo "-t: interface type: chardev(char device) and sysfs(being"
- echo " deprecated). The first one is default"
- echo ""
- echo "$0 -h"
- echo "This usage"
+ echo "$0 [-frv] [-t type]"
+ echo "-f: full test (minimal set run by default)"
+ echo "-r: test random lines as well as fence posts"
+ echo "-t: interface type:"
+ echo " cdev (character device ABI) - default"
+ echo " cdev_v1 (deprecated character device ABI)"
+ echo " sysfs (deprecated SYSFS ABI)"
+ echo "-v: verbose progress reporting"
+ exit $ksft_fail
}
-prerequisite()
+skip()
{
- msg="skip all tests:"
- if [ $UID != 0 ]; then
- echo $msg must be run as root >&2
- exit $ksft_skip
- fi
- SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'`
- if [ ! -d "$SYSFS" ]; then
- echo $msg sysfs is not mounted >&2
- exit 2
- fi
- GPIO_SYSFS=`echo $SYSFS/class/gpio`
- GPIO_DRV_SYSFS=`echo $SYSFS/devices/platform/$module/gpio`
- DEBUGFS=`mount -t debugfs | head -1 | awk '{ print $3 }'`
- if [ ! -d "$DEBUGFS" ]; then
- echo $msg debugfs is not mounted >&2
- exit 2
- fi
- GPIO_DEBUGFS=`echo $DEBUGFS/gpio`
- source gpio-mockup-sysfs.sh
+ echo "$*" >&2
+ echo "GPIO $module test SKIP"
+ exit $ksft_skip
}
-try_insert_module()
+prerequisite()
{
- if [ -d "$GPIO_DRV_SYSFS" ]; then
- echo "$GPIO_DRV_SYSFS exist. Skip insert module"
- else
- modprobe -q $module $1
- if [ X$? != X0 ]; then
- echo $msg insmod $module failed >&2
- exit 3
- fi
- fi
+ [ $(id -u) -eq 0 ] || skip "must be run as root"
+
+ DEBUGFS=$(grep -w debugfs /proc/mounts | cut -f2 -d' ')
+ [ -d "$DEBUGFS" ] || skip "debugfs is not mounted"
+
+ GPIO_DEBUGFS=$DEBUGFS/$module
}
remove_module()
@@ -74,133 +59,345 @@ remove_module()
modprobe -r -q $module
}
-die()
+cleanup()
{
+ set +e
+ release_line
remove_module
- exit 5
+ jobs -p | xargs -r kill > /dev/null 2>&1
}
-test_chips()
+fail()
{
- if [ X$dev_type = Xsysfs ]; then
- echo "WARNING: sysfs ABI of gpio is going to deprecated."
- test_chips_sysfs $*
- else
- $BASE/gpio-mockup-chardev $*
- fi
+ echo "test failed: $*" >&2
+ echo "GPIO $module test FAIL"
+ exit $ksft_fail
+}
+
+try_insert_module()
+{
+ modprobe -q $module "$1" || fail "insert $module failed with error $?"
+}
+
+log()
+{
+ [ -z "$verbose" ] || echo "$*"
}
-gpio_test()
+# The following line helpers, release_Line, get_line and set_line, all
+# make use of the global $chip and $offset variables.
+#
+# This implementation drives the GPIO character device (cdev) uAPI.
+# Other implementations may override these to test different uAPIs.
+
+# Release any resources related to the line
+release_line()
{
- param=$1
- valid=$2
+ [ "$line_set_pid" ] && kill $line_set_pid && wait $line_set_pid || true
+ line_set_pid=
+}
- if [ X"$param" = X ]; then
- die
+# Read the current value of the line
+get_line()
+{
+ release_line
+
+ local cdev_opts=${uapi_opt}${active_opt}
+ $BASE/gpio-mockup-cdev $cdev_opts /dev/$chip $offset
+ echo $?
+}
+
+# Set the state of the line
+#
+# Changes to line configuration are provided as parameters.
+# The line is assumed to be an output if the line value 0 or 1 is
+# specified, else an input.
+set_line()
+{
+ local val=
+
+ release_line
+
+ # parse config options...
+ for option in $*; do
+ case $option in
+ active-low)
+ active_opt="-l "
+ ;;
+ active-high)
+ active_opt=
+ ;;
+ bias-none)
+ bias_opt=
+ ;;
+ pull-down)
+ bias_opt="-bpull-down "
+ ;;
+ pull-up)
+ bias_opt="-bpull-up "
+ ;;
+ 0)
+ val=0
+ ;;
+ 1)
+ val=1
+ ;;
+ esac
+ done
+
+ local cdev_opts=${uapi_opt}${active_opt}
+ if [ "$val" ]; then
+ $BASE/gpio-mockup-cdev $cdev_opts -s$val /dev/$chip $offset &
+ # failure to set is detected by reading mockup and toggling values
+ line_set_pid=$!
+ # allow for gpio-mockup-cdev to launch and request line
+ # (there is limited value in checking if line has been requested)
+ sleep 0.01
+ elif [ "$bias_opt" ]; then
+ cdev_opts=${cdev_opts}${bias_opt}
+ $BASE/gpio-mockup-cdev $cdev_opts /dev/$chip $offset || true
fi
- try_insert_module "gpio_mockup_ranges=$param"
- echo -n "GPIO $module test with ranges: <"
- echo "$param>: "
- printf "%-10s %s\n" $param
- test_chips $module $valid
- remove_module
}
-BASE=`dirname $0`
+assert_line()
+{
+ local val
+ # don't need any retry here as set_mock allows for propagation
+ val=$(get_line)
+ [ "$val" = "$1" ] || fail "line value is ${val:-empty} when $1 was expected"
+}
+
+# The following mockup helpers all make use of the $mock_line
+assert_mock()
+{
+ local backoff_wait=10
+ local retry=0
+ local val
+ # retry allows for set propagation from uAPI to mockup
+ while true; do
+ val=$(< $mock_line)
+ [ "$val" = "$1" ] && break
+ retry=$((retry + 1))
+ [ $retry -lt 5 ] || fail "mockup $mock_line value ${val:-empty} when $1 expected"
+ sleep $(printf "%0.2f" $((backoff_wait))e-3)
+ backoff_wait=$((backoff_wait * 2))
+ done
+}
+
+set_mock()
+{
+ echo "$1" > $mock_line
+ # allow for set propagation - so we won't be in a race with set_line
+ assert_mock "$1"
+}
-dev_type=
-TEMP=`getopt -o fhm:t: -n '$0' -- "$@"`
+# test the functionality of a line
+#
+# The line is set from the mockup side and is read from the userspace side
+# (input), and is set from the userspace side and is read from the mockup side
+# (output).
+#
+# Setting the mockup pull using the userspace interface bias settings is
+# tested where supported by the userspace interface (cdev).
+test_line()
+{
+ chip=$1
+ offset=$2
+ log "test_line $chip $offset"
+ mock_line=$GPIO_DEBUGFS/$chip/$offset
+ [ -e "$mock_line" ] || fail "missing line $chip:$offset"
-if [ "$?" != "0" ]; then
- echo "Parameter process failed, Terminating..." >&2
- exit 1
-fi
+ # test input active-high
+ set_mock 1
+ set_line input active-high
+ assert_line 1
+ set_mock 0
+ assert_line 0
+ set_mock 1
+ assert_line 1
+
+ if [ "$full_test" ]; then
+ if [ "$dev_type" != "sysfs" ]; then
+ # test pulls
+ set_mock 0
+ set_line input pull-up
+ assert_line 1
+ set_mock 0
+ assert_line 0
+
+ set_mock 1
+ set_line input pull-down
+ assert_line 0
+ set_mock 1
+ assert_line 1
+
+ set_line bias-none
+ fi
+
+ # test input active-low
+ set_mock 0
+ set_line active-low
+ assert_line 1
+ set_mock 1
+ assert_line 0
+ set_mock 0
+ assert_line 1
+
+ # test output active-high
+ set_mock 1
+ set_line active-high 0
+ assert_mock 0
+ set_line 1
+ assert_mock 1
+ set_line 0
+ assert_mock 0
+ fi
+
+ # test output active-low
+ set_mock 0
+ set_line active-low 0
+ assert_mock 1
+ set_line 1
+ assert_mock 0
+ set_line 0
+ assert_mock 1
+
+ release_line
+}
+
+test_no_line()
+{
+ log test_no_line "$*"
+ [ ! -e "$GPIO_DEBUGFS/$1/$2" ] || fail "unexpected line $1:$2"
+}
-# Note the quotes around `$TEMP': they are essential!
-eval set -- "$TEMP"
+# Load the module and check that the expected number of gpiochips, with the
+# expected number of lines, are created and are functional.
+#
+# $1 is the gpio_mockup_ranges parameter for the module
+# The remaining parameters are the number of lines, n, expected for each of
+# the gpiochips expected to be created.
+#
+# For each gpiochip the fence post lines, 0 and n-1, are tested, and the
+# line on the far side of the fence post, n, is tested to not exist.
+#
+# If the $random flag is set then a random line in the middle of the
+# gpiochip is tested as well.
+insmod_test()
+{
+ local ranges=
+ local gc=
+ local width=
-while true; do
- case $1 in
- -f)
+ [ "${1:-}" ] || fail "missing ranges"
+ ranges=$1 ; shift
+ try_insert_module "gpio_mockup_ranges=$ranges"
+ log "GPIO $module test with ranges: <$ranges>:"
+ # e.g. /sys/kernel/debug/gpio-mockup/gpiochip1
+ gpiochip=$(find "$DEBUGFS/$module/" -name gpiochip* -type d | sort)
+ for chip in $gpiochip; do
+ gc=${chip##*/}
+ [ "${1:-}" ] || fail "unexpected chip - $gc"
+ width=$1 ; shift
+ test_line $gc 0
+ if [ "$random" -a $width -gt 2 ]; then
+ test_line $gc $((RANDOM % ($width - 2) + 1))
+ fi
+ test_line $gc $(($width - 1))
+ test_no_line $gc $width
+ done
+ [ "${1:-}" ] && fail "missing expected chip of width $1"
+ remove_module || fail "failed to remove module with error $?"
+}
+
+while getopts ":frvt:" opt; do
+ case $opt in
+ f)
full_test=true
- shift
- ;;
- -h)
- usage
- exit
;;
- -m)
- module=$2
- shift 2
+ r)
+ random=true
;;
- -t)
- dev_type=$2
- shift 2
+ t)
+ dev_type=$OPTARG
;;
- --)
- shift
- break
+ v)
+ verbose=true
;;
*)
- echo "Internal error!"
- exit 1
+ usage
;;
esac
done
+shift $((OPTIND - 1))
-if [ X"$module" = X ]; then
- module="gpio-mockup"
-fi
-
-if [ X$dev_type != Xsysfs ]; then
- dev_type="chardev"
-fi
+[ "${1:-}" ] && fail "unknown argument '$1'"
prerequisite
-echo "1. Test dynamic allocation of gpio successful means insert gpiochip and"
-echo " manipulate gpio pin successful"
-gpio_test "-1,32" true
-gpio_test "-1,32,-1,32" true
-gpio_test "-1,32,-1,32,-1,32" true
-if [ X$full_test = Xtrue ]; then
- gpio_test "-1,32,32,64" true
- gpio_test "-1,32,40,64,-1,5" true
- gpio_test "-1,32,32,64,-1,32" true
- gpio_test "0,32,32,64,-1,32,-1,32" true
- gpio_test "-1,32,-1,32,0,32,32,64" true
- echo "2. Do basic test: successful means insert gpiochip and"
- echo " manipulate gpio pin successful"
- gpio_test "0,32" true
- gpio_test "0,32,32,64" true
- gpio_test "0,32,40,64,64,96" true
+trap 'exit $ksft_fail' SIGTERM SIGINT
+trap cleanup EXIT
+
+case "$dev_type" in
+sysfs)
+ source $BASE/gpio-mockup-sysfs.sh
+ echo "WARNING: gpio sysfs ABI is deprecated."
+ ;;
+cdev_v1)
+ echo "WARNING: gpio cdev ABI v1 is deprecated."
+ uapi_opt="-u1 "
+ ;;
+cdev)
+ ;;
+*)
+ fail "unknown interface type: $dev_type"
+ ;;
+esac
+
+remove_module || fail "can't remove existing $module module"
+
+# manual gpio allocation tests fail if a physical chip already exists
+[ "$full_test" -a -e "/dev/gpiochip0" ] && skip "full tests conflict with gpiochip0"
+
+echo "1. Module load tests"
+echo "1.1. dynamic allocation of gpio"
+insmod_test "-1,32" 32
+insmod_test "-1,23,-1,32" 23 32
+insmod_test "-1,23,-1,26,-1,32" 23 26 32
+if [ "$full_test" ]; then
+ echo "1.2. manual allocation of gpio"
+ insmod_test "0,32" 32
+ insmod_test "0,32,32,60" 32 28
+ insmod_test "0,32,40,64,64,96" 32 24 32
+ echo "1.3. dynamic and manual allocation of gpio"
+ insmod_test "-1,32,32,62" 32 30
+ insmod_test "-1,22,-1,23,0,24,32,64" 22 23 24 32
+ insmod_test "-1,32,32,60,-1,29" 32 28 29
+ insmod_test "-1,32,40,64,-1,5" 32 24 5
+ insmod_test "0,32,32,44,-1,22,-1,31" 32 12 22 31
fi
-echo "3. Error test: successful means insert gpiochip failed"
-echo "3.1 Test number of gpio overflow"
-#Currently: The max number of gpio(1024) is defined in arm architecture.
-gpio_test "-1,32,-1,1024" false
-if [ X$full_test = Xtrue ]; then
- echo "3.2 Test zero line of gpio"
- gpio_test "0,0" false
- echo "3.3 Test range overlap"
- echo "3.3.1 Test corner case"
- gpio_test "0,32,0,1" false
- gpio_test "0,32,32,64,32,40" false
- gpio_test "0,32,35,64,35,45" false
- gpio_test "0,32,31,32" false
- gpio_test "0,32,32,64,36,37" false
- gpio_test "0,32,35,64,34,36" false
- echo "3.3.2 Test inserting invalid second gpiochip"
- gpio_test "0,32,30,35" false
- gpio_test "0,32,1,5" false
- gpio_test "10,32,9,14" false
- gpio_test "10,32,30,35" false
- echo "3.3.3 Test others"
- gpio_test "0,32,40,56,39,45" false
- gpio_test "0,32,40,56,30,33" false
- gpio_test "0,32,40,56,30,41" false
- gpio_test "0,32,40,56,20,21" false
+echo "2. Module load error tests"
+echo "2.1 gpio overflow"
+# Currently: The max number of gpio(1024) is defined in arm architecture.
+insmod_test "-1,1024"
+if [ "$full_test" ]; then
+ echo "2.2 no lines defined"
+ insmod_test "0,0"
+ echo "2.3 ignore range overlap"
+ insmod_test "0,32,0,1" 32
+ insmod_test "0,32,1,5" 32
+ insmod_test "0,32,30,35" 32
+ insmod_test "0,32,31,32" 32
+ insmod_test "10,32,30,35" 22
+ insmod_test "10,32,9,14" 22
+ insmod_test "0,32,20,21,40,56" 32 16
+ insmod_test "0,32,32,64,32,40" 32 32
+ insmod_test "0,32,32,64,36,37" 32 32
+ insmod_test "0,32,35,64,34,36" 32 29
+ insmod_test "0,30,35,64,35,45" 30 29
+ insmod_test "0,32,40,56,30,33" 32 16
+ insmod_test "0,32,40,56,30,41" 32 16
+ insmod_test "0,32,40,56,39,45" 32 16
fi
-echo GPIO test PASS
-
+echo "GPIO $module test PASS"
diff --git a/tools/testing/selftests/ipc/msgque.c b/tools/testing/selftests/ipc/msgque.c
index 5ec4d9e18806..656c43c24044 100644
--- a/tools/testing/selftests/ipc/msgque.c
+++ b/tools/testing/selftests/ipc/msgque.c
@@ -69,7 +69,7 @@ int restore_queue(struct msgque_data *msgque)
printf("msgsnd failed (%m)\n");
ret = -errno;
goto destroy;
- };
+ }
}
return 0;
@@ -180,7 +180,7 @@ int fill_msgque(struct msgque_data *msgque)
IPC_NOWAIT) != 0) {
printf("First message send failed (%m)\n");
return -errno;
- };
+ }
msgbuf.mtype = ANOTHER_MSG_TYPE;
memcpy(msgbuf.mtext, ANOTHER_TEST_STRING, sizeof(ANOTHER_TEST_STRING));
@@ -188,7 +188,7 @@ int fill_msgque(struct msgque_data *msgque)
IPC_NOWAIT) != 0) {
printf("Second message send failed (%m)\n");
return -errno;
- };
+ }
return 0;
}
diff --git a/tools/testing/selftests/kselftest_deps.sh b/tools/testing/selftests/kselftest_deps.sh
index bbc04646346b..00e60d6eb16b 100755
--- a/tools/testing/selftests/kselftest_deps.sh
+++ b/tools/testing/selftests/kselftest_deps.sh
@@ -129,13 +129,11 @@ l2_tests=$(grep -r --include=Makefile ": LDLIBS" | \
grep -v "VAR_LDLIBS" | awk -F: '{print $1}')
# Level 3
-# gpio, memfd and others use pkg-config to find mount and fuse libs
+# memfd and others use pkg-config to find mount and fuse libs
# respectively and save it in VAR_LDLIBS. If pkg-config doesn't find
# any, VAR_LDLIBS set to default.
# Use the default value and filter out pkg-config for dependency check.
# e.g:
-# gpio/Makefile
-# VAR_LDLIBS := $(shell pkg-config --libs mount) 2>/dev/null)
# memfd/Makefile
# VAR_LDLIBS := $(shell pkg-config fuse --libs 2>/dev/null)
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index edce85420d19..ae0f0f33b2a6 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -79,7 +79,7 @@
#endif
/**
- * TH_LOG(fmt, ...)
+ * TH_LOG()
*
* @fmt: format string
* @...: optional arguments
@@ -113,12 +113,16 @@
__FILE__, __LINE__, _metadata->name, ##__VA_ARGS__)
/**
- * SKIP(statement, fmt, ...)
+ * SKIP()
*
* @statement: statement to run after reporting SKIP
* @fmt: format string
* @...: optional arguments
*
+ * .. code-block:: c
+ *
+ * SKIP(statement, fmt, ...);
+ *
* This forces a "pass" after reporting why something is being skipped
* and runs "statement", which is usually "return" or "goto skip".
*/
@@ -136,7 +140,7 @@
} while (0)
/**
- * TEST(test_name) - Defines the test function and creates the registration
+ * TEST() - Defines the test function and creates the registration
* stub
*
* @test_name: test name
@@ -155,7 +159,7 @@
#define TEST(test_name) __TEST_IMPL(test_name, -1)
/**
- * TEST_SIGNAL(test_name, signal)
+ * TEST_SIGNAL()
*
* @test_name: test name
* @signal: signal number
@@ -195,7 +199,7 @@
struct __test_metadata __attribute__((unused)) *_metadata)
/**
- * FIXTURE_DATA(datatype_name) - Wraps the struct name so we have one less
+ * FIXTURE_DATA() - Wraps the struct name so we have one less
* argument to pass around
*
* @datatype_name: datatype name
@@ -212,7 +216,7 @@
#define FIXTURE_DATA(datatype_name) struct _test_data_##datatype_name
/**
- * FIXTURE(fixture_name) - Called once per fixture to setup the data and
+ * FIXTURE() - Called once per fixture to setup the data and
* register
*
* @fixture_name: fixture name
@@ -239,7 +243,7 @@
FIXTURE_DATA(fixture_name)
/**
- * FIXTURE_SETUP(fixture_name) - Prepares the setup function for the fixture.
+ * FIXTURE_SETUP() - Prepares the setup function for the fixture.
* *_metadata* is included so that EXPECT_* and ASSERT_* work correctly.
*
* @fixture_name: fixture name
@@ -265,7 +269,7 @@
__attribute__((unused)) *variant)
/**
- * FIXTURE_TEARDOWN(fixture_name)
+ * FIXTURE_TEARDOWN()
* *_metadata* is included so that EXPECT_* and ASSERT_* work correctly.
*
* @fixture_name: fixture name
@@ -286,7 +290,7 @@
FIXTURE_DATA(fixture_name) __attribute__((unused)) *self)
/**
- * FIXTURE_VARIANT(fixture_name) - Optionally called once per fixture
+ * FIXTURE_VARIANT() - Optionally called once per fixture
* to declare fixture variant
*
* @fixture_name: fixture name
@@ -305,7 +309,7 @@
#define FIXTURE_VARIANT(fixture_name) struct _fixture_variant_##fixture_name
/**
- * FIXTURE_VARIANT_ADD(fixture_name, variant_name) - Called once per fixture
+ * FIXTURE_VARIANT_ADD() - Called once per fixture
* variant to setup and register the data
*
* @fixture_name: fixture name
@@ -339,7 +343,7 @@
_##fixture_name##_##variant_name##_variant =
/**
- * TEST_F(fixture_name, test_name) - Emits test registration and helpers for
+ * TEST_F() - Emits test registration and helpers for
* fixture-based test cases
*
* @fixture_name: fixture name
diff --git a/tools/testing/selftests/kselftest_module.h b/tools/testing/selftests/kselftest_module.h
index e8eafaf0941a..e2ea41de3f35 100644
--- a/tools/testing/selftests/kselftest_module.h
+++ b/tools/testing/selftests/kselftest_module.h
@@ -11,7 +11,8 @@
#define KSTM_MODULE_GLOBALS() \
static unsigned int total_tests __initdata; \
-static unsigned int failed_tests __initdata
+static unsigned int failed_tests __initdata; \
+static unsigned int skipped_tests __initdata
#define KSTM_CHECK_ZERO(x) do { \
total_tests++; \
@@ -21,11 +22,16 @@ static unsigned int failed_tests __initdata
} \
} while (0)
-static inline int kstm_report(unsigned int total_tests, unsigned int failed_tests)
+static inline int kstm_report(unsigned int total_tests, unsigned int failed_tests,
+ unsigned int skipped_tests)
{
- if (failed_tests == 0)
- pr_info("all %u tests passed\n", total_tests);
- else
+ if (failed_tests == 0) {
+ if (skipped_tests) {
+ pr_info("skipped %u tests\n", skipped_tests);
+ pr_info("remaining %u tests passed\n", total_tests);
+ } else
+ pr_info("all %u tests passed\n", total_tests);
+ } else
pr_warn("failed %u out of %u tests\n", failed_tests, total_tests);
return failed_tests ? -EINVAL : 0;
@@ -36,7 +42,7 @@ static int __init __module##_init(void) \
{ \
pr_info("loaded.\n"); \
selftest(); \
- return kstm_report(total_tests, failed_tests); \
+ return kstm_report(total_tests, failed_tests, skipped_tests); \
} \
static void __exit __module##_exit(void) \
{ \
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index ce8f4ad39684..7bd7e776c266 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -7,10 +7,14 @@
/x86_64/cr4_cpuid_sync_test
/x86_64/debug_regs
/x86_64/evmcs_test
+/x86_64/get_cpuid_test
+/x86_64/get_msr_index_features
/x86_64/kvm_pv_test
+/x86_64/hyperv_clock
/x86_64/hyperv_cpuid
/x86_64/mmio_warning_test
/x86_64/platform_info_test
+/x86_64/set_boot_cpu_id
/x86_64/set_sregs_test
/x86_64/smm_test
/x86_64/state_test
@@ -24,10 +28,16 @@
/x86_64/vmx_preemption_timer_test
/x86_64/vmx_set_nested_state_test
/x86_64/vmx_tsc_adjust_test
+/x86_64/xapic_ipi_test
+/x86_64/xen_shinfo_test
+/x86_64/xen_vmcall_test
/x86_64/xss_msr_test
+/x86_64/vmx_pmu_msrs_test
/demand_paging_test
/dirty_log_test
/dirty_log_perf_test
+/hardware_disable_test
/kvm_create_max_vcpus
+/memslot_modification_stress_test
/set_memory_region_test
/steal_time
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index fe41c6a0fa67..67eebb53235f 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -39,11 +39,15 @@ LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c
LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c
TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test
+TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features
TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
+TEST_GEN_PROGS_x86_64 += x86_64/get_cpuid_test
+TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test
TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test
TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test
+TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id
TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test
TEST_GEN_PROGS_x86_64 += x86_64/smm_test
TEST_GEN_PROGS_x86_64 += x86_64/state_test
@@ -56,13 +60,19 @@ TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
+TEST_GEN_PROGS_x86_64 += x86_64/xapic_ipi_test
TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test
TEST_GEN_PROGS_x86_64 += x86_64/debug_regs
TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test
+TEST_GEN_PROGS_x86_64 += x86_64/vmx_pmu_msrs_test
+TEST_GEN_PROGS_x86_64 += x86_64/xen_shinfo_test
+TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test
TEST_GEN_PROGS_x86_64 += demand_paging_test
TEST_GEN_PROGS_x86_64 += dirty_log_test
TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
+TEST_GEN_PROGS_x86_64 += hardware_disable_test
TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus
+TEST_GEN_PROGS_x86_64 += memslot_modification_stress_test
TEST_GEN_PROGS_x86_64 += set_memory_region_test
TEST_GEN_PROGS_x86_64 += steal_time
diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c
index cdad1eca72f7..5f7a229c3af1 100644
--- a/tools/testing/selftests/kvm/demand_paging_test.c
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -64,7 +64,7 @@ static void *vcpu_worker(void *data)
exit_reason_str(run->exit_reason));
}
- ts_diff = timespec_diff_now(start);
+ ts_diff = timespec_elapsed(start);
PER_VCPU_DEBUG("vCPU %d execution time: %ld.%.9lds\n", vcpu_id,
ts_diff.tv_sec, ts_diff.tv_nsec);
@@ -95,7 +95,7 @@ static int handle_uffd_page_request(int uffd, uint64_t addr)
return r;
}
- ts_diff = timespec_diff_now(start);
+ ts_diff = timespec_elapsed(start);
PER_PAGE_DEBUG("UFFDIO_COPY %d \t%ld ns\n", tid,
timespec_to_ns(ts_diff));
@@ -190,7 +190,7 @@ static void *uffd_handler_thread_fn(void *arg)
pages++;
}
- ts_diff = timespec_diff_now(start);
+ ts_diff = timespec_elapsed(start);
PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n",
pages, ts_diff.tv_sec, ts_diff.tv_nsec,
pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0));
@@ -250,6 +250,7 @@ static int setup_demand_paging(struct kvm_vm *vm,
struct test_params {
bool use_uffd;
useconds_t uffd_delay;
+ bool partition_vcpu_memory_access;
};
static void run_test(enum vm_guest_mode mode, void *arg)
@@ -265,7 +266,8 @@ static void run_test(enum vm_guest_mode mode, void *arg)
int vcpu_id;
int r;
- vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size);
+ vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size,
+ VM_MEM_SRC_ANONYMOUS);
perf_test_args.wr_fract = 1;
@@ -277,7 +279,8 @@ static void run_test(enum vm_guest_mode mode, void *arg)
vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads));
TEST_ASSERT(vcpu_threads, "Memory allocation failed");
- perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size);
+ perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size,
+ p->partition_vcpu_memory_access);
if (p->use_uffd) {
uffd_handler_threads =
@@ -293,10 +296,19 @@ static void run_test(enum vm_guest_mode mode, void *arg)
for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
vm_paddr_t vcpu_gpa;
void *vcpu_hva;
+ uint64_t vcpu_mem_size;
- vcpu_gpa = guest_test_phys_mem + (vcpu_id * guest_percpu_mem_size);
+
+ if (p->partition_vcpu_memory_access) {
+ vcpu_gpa = guest_test_phys_mem +
+ (vcpu_id * guest_percpu_mem_size);
+ vcpu_mem_size = guest_percpu_mem_size;
+ } else {
+ vcpu_gpa = guest_test_phys_mem;
+ vcpu_mem_size = guest_percpu_mem_size * nr_vcpus;
+ }
PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n",
- vcpu_id, vcpu_gpa, vcpu_gpa + guest_percpu_mem_size);
+ vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_mem_size);
/* Cache the HVA pointer of the region */
vcpu_hva = addr_gpa2hva(vm, vcpu_gpa);
@@ -313,7 +325,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
&uffd_handler_threads[vcpu_id],
pipefds[vcpu_id * 2],
p->uffd_delay, &uffd_args[vcpu_id],
- vcpu_hva, guest_percpu_mem_size);
+ vcpu_hva, vcpu_mem_size);
if (r < 0)
exit(-r);
}
@@ -339,7 +351,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
PER_VCPU_DEBUG("Joined thread for vCPU %d\n", vcpu_id);
}
- ts_diff = timespec_diff_now(start);
+ ts_diff = timespec_elapsed(start);
pr_info("All vCPU threads joined\n");
@@ -376,7 +388,7 @@ static void help(char *name)
{
puts("");
printf("usage: %s [-h] [-m mode] [-u] [-d uffd_delay_usec]\n"
- " [-b memory] [-v vcpus]\n", name);
+ " [-b memory] [-v vcpus] [-o]\n", name);
guest_modes_help();
printf(" -u: use User Fault FD to handle vCPU page\n"
" faults.\n");
@@ -387,6 +399,8 @@ static void help(char *name)
" demand paged by each vCPU. e.g. 10M or 3G.\n"
" Default: 1G\n");
printf(" -v: specify the number of vCPUs to run.\n");
+ printf(" -o: Overlap guest memory accesses instead of partitioning\n"
+ " them into a separate region of memory for each vCPU.\n");
puts("");
exit(0);
}
@@ -394,12 +408,14 @@ static void help(char *name)
int main(int argc, char *argv[])
{
int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
- struct test_params p = {};
+ struct test_params p = {
+ .partition_vcpu_memory_access = true,
+ };
int opt;
guest_modes_append_default();
- while ((opt = getopt(argc, argv, "hm:ud:b:v:")) != -1) {
+ while ((opt = getopt(argc, argv, "hm:ud:b:v:o")) != -1) {
switch (opt) {
case 'm':
guest_modes_cmdline(optarg);
@@ -419,6 +435,9 @@ int main(int argc, char *argv[])
TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus,
"Invalid number of vcpus, must be between 1 and %d", max_vcpus);
break;
+ case 'o':
+ p.partition_vcpu_memory_access = false;
+ break;
case 'h':
default:
help(argv[0]);
diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index 2283a0ec74a9..04a2641261be 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -28,8 +28,8 @@ static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
/* Host variables */
static u64 dirty_log_manual_caps;
static bool host_quit;
-static uint64_t iteration;
-static uint64_t vcpu_last_completed_iteration[KVM_MAX_VCPUS];
+static int iteration;
+static int vcpu_last_completed_iteration[KVM_MAX_VCPUS];
static void *vcpu_worker(void *data)
{
@@ -48,11 +48,11 @@ static void *vcpu_worker(void *data)
run = vcpu_state(vm, vcpu_id);
while (!READ_ONCE(host_quit)) {
- uint64_t current_iteration = READ_ONCE(iteration);
+ int current_iteration = READ_ONCE(iteration);
clock_gettime(CLOCK_MONOTONIC, &start);
ret = _vcpu_run(vm, vcpu_id);
- ts_diff = timespec_diff_now(start);
+ ts_diff = timespec_elapsed(start);
TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret);
TEST_ASSERT(get_ucall(vm, vcpu_id, NULL) == UCALL_SYNC,
@@ -61,17 +61,17 @@ static void *vcpu_worker(void *data)
pr_debug("Got sync event from vCPU %d\n", vcpu_id);
vcpu_last_completed_iteration[vcpu_id] = current_iteration;
- pr_debug("vCPU %d updated last completed iteration to %lu\n",
+ pr_debug("vCPU %d updated last completed iteration to %d\n",
vcpu_id, vcpu_last_completed_iteration[vcpu_id]);
if (current_iteration) {
pages_count += vcpu_args->pages;
total = timespec_add(total, ts_diff);
- pr_debug("vCPU %d iteration %lu dirty memory time: %ld.%.9lds\n",
+ pr_debug("vCPU %d iteration %d dirty memory time: %ld.%.9lds\n",
vcpu_id, current_iteration, ts_diff.tv_sec,
ts_diff.tv_nsec);
} else {
- pr_debug("vCPU %d iteration %lu populate memory time: %ld.%.9lds\n",
+ pr_debug("vCPU %d iteration %d populate memory time: %ld.%.9lds\n",
vcpu_id, current_iteration, ts_diff.tv_sec,
ts_diff.tv_nsec);
}
@@ -81,7 +81,7 @@ static void *vcpu_worker(void *data)
}
avg = timespec_div(total, vcpu_last_completed_iteration[vcpu_id]);
- pr_debug("\nvCPU %d dirtied 0x%lx pages over %lu iterations in %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
+ pr_debug("\nvCPU %d dirtied 0x%lx pages over %d iterations in %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
vcpu_id, pages_count, vcpu_last_completed_iteration[vcpu_id],
total.tv_sec, total.tv_nsec, avg.tv_sec, avg.tv_nsec);
@@ -92,6 +92,8 @@ struct test_params {
unsigned long iterations;
uint64_t phys_offset;
int wr_fract;
+ bool partition_vcpu_memory_access;
+ enum vm_mem_backing_src_type backing_src;
};
static void run_test(enum vm_guest_mode mode, void *arg)
@@ -111,7 +113,8 @@ static void run_test(enum vm_guest_mode mode, void *arg)
struct kvm_enable_cap cap = {};
struct timespec clear_dirty_log_total = (struct timespec){0};
- vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size);
+ vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size,
+ p->backing_src);
perf_test_args.wr_fract = p->wr_fract;
@@ -129,7 +132,8 @@ static void run_test(enum vm_guest_mode mode, void *arg)
vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads));
TEST_ASSERT(vcpu_threads, "Memory allocation failed");
- perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size);
+ perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size,
+ p->partition_vcpu_memory_access);
sync_global_to_guest(vm, perf_test_args);
@@ -139,17 +143,21 @@ static void run_test(enum vm_guest_mode mode, void *arg)
clock_gettime(CLOCK_MONOTONIC, &start);
for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
+ vcpu_last_completed_iteration[vcpu_id] = -1;
+
pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker,
&perf_test_args.vcpu_args[vcpu_id]);
}
- /* Allow the vCPU to populate memory */
- pr_debug("Starting iteration %lu - Populating\n", iteration);
- while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) != iteration)
- pr_debug("Waiting for vcpu_last_completed_iteration == %lu\n",
- iteration);
+ /* Allow the vCPUs to populate memory */
+ pr_debug("Starting iteration %d - Populating\n", iteration);
+ for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
+ while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) !=
+ iteration)
+ ;
+ }
- ts_diff = timespec_diff_now(start);
+ ts_diff = timespec_elapsed(start);
pr_info("Populate memory time: %ld.%.9lds\n",
ts_diff.tv_sec, ts_diff.tv_nsec);
@@ -157,7 +165,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
clock_gettime(CLOCK_MONOTONIC, &start);
vm_mem_region_set_flags(vm, PERF_TEST_MEM_SLOT_INDEX,
KVM_MEM_LOG_DIRTY_PAGES);
- ts_diff = timespec_diff_now(start);
+ ts_diff = timespec_elapsed(start);
pr_info("Enabling dirty logging time: %ld.%.9lds\n\n",
ts_diff.tv_sec, ts_diff.tv_nsec);
@@ -169,25 +177,25 @@ static void run_test(enum vm_guest_mode mode, void *arg)
clock_gettime(CLOCK_MONOTONIC, &start);
iteration++;
- pr_debug("Starting iteration %lu\n", iteration);
+ pr_debug("Starting iteration %d\n", iteration);
for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
- while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) != iteration)
- pr_debug("Waiting for vCPU %d vcpu_last_completed_iteration == %lu\n",
- vcpu_id, iteration);
+ while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id])
+ != iteration)
+ ;
}
- ts_diff = timespec_diff_now(start);
+ ts_diff = timespec_elapsed(start);
vcpu_dirty_total = timespec_add(vcpu_dirty_total, ts_diff);
- pr_info("Iteration %lu dirty memory time: %ld.%.9lds\n",
+ pr_info("Iteration %d dirty memory time: %ld.%.9lds\n",
iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
clock_gettime(CLOCK_MONOTONIC, &start);
kvm_vm_get_dirty_log(vm, PERF_TEST_MEM_SLOT_INDEX, bmap);
- ts_diff = timespec_diff_now(start);
+ ts_diff = timespec_elapsed(start);
get_dirty_log_total = timespec_add(get_dirty_log_total,
ts_diff);
- pr_info("Iteration %lu get dirty log time: %ld.%.9lds\n",
+ pr_info("Iteration %d get dirty log time: %ld.%.9lds\n",
iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
if (dirty_log_manual_caps) {
@@ -195,26 +203,26 @@ static void run_test(enum vm_guest_mode mode, void *arg)
kvm_vm_clear_dirty_log(vm, PERF_TEST_MEM_SLOT_INDEX, bmap, 0,
host_num_pages);
- ts_diff = timespec_diff_now(start);
+ ts_diff = timespec_elapsed(start);
clear_dirty_log_total = timespec_add(clear_dirty_log_total,
ts_diff);
- pr_info("Iteration %lu clear dirty log time: %ld.%.9lds\n",
+ pr_info("Iteration %d clear dirty log time: %ld.%.9lds\n",
iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
}
}
- /* Tell the vcpu thread to quit */
- host_quit = true;
- for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++)
- pthread_join(vcpu_threads[vcpu_id], NULL);
-
/* Disable dirty logging */
clock_gettime(CLOCK_MONOTONIC, &start);
vm_mem_region_set_flags(vm, PERF_TEST_MEM_SLOT_INDEX, 0);
- ts_diff = timespec_diff_now(start);
+ ts_diff = timespec_elapsed(start);
pr_info("Disabling dirty logging time: %ld.%.9lds\n",
ts_diff.tv_sec, ts_diff.tv_nsec);
+ /* Tell the vcpu thread to quit */
+ host_quit = true;
+ for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++)
+ pthread_join(vcpu_threads[vcpu_id], NULL);
+
avg = timespec_div(get_dirty_log_total, p->iterations);
pr_info("Get dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
p->iterations, get_dirty_log_total.tv_sec,
@@ -236,7 +244,7 @@ static void help(char *name)
{
puts("");
printf("usage: %s [-h] [-i iterations] [-p offset] "
- "[-m mode] [-b vcpu bytes] [-v vcpus]\n", name);
+ "[-m mode] [-b vcpu bytes] [-v vcpus] [-o] [-s mem type]\n", name);
puts("");
printf(" -i: specify iteration counts (default: %"PRIu64")\n",
TEST_HOST_LOOP_N);
@@ -251,6 +259,11 @@ static void help(char *name)
" 1/<fraction of pages to write>.\n"
" (default: 1 i.e. all pages are written to.)\n");
printf(" -v: specify the number of vCPUs to run.\n");
+ printf(" -o: Overlap guest memory accesses instead of partitioning\n"
+ " them into a separate region of memory for each vCPU.\n");
+ printf(" -s: specify the type of memory that should be used to\n"
+ " back the guest data region.\n\n");
+ backing_src_help();
puts("");
exit(0);
}
@@ -261,6 +274,8 @@ int main(int argc, char *argv[])
struct test_params p = {
.iterations = TEST_HOST_LOOP_N,
.wr_fract = 1,
+ .partition_vcpu_memory_access = true,
+ .backing_src = VM_MEM_SRC_ANONYMOUS,
};
int opt;
@@ -271,10 +286,10 @@ int main(int argc, char *argv[])
guest_modes_append_default();
- while ((opt = getopt(argc, argv, "hi:p:m:b:f:v:")) != -1) {
+ while ((opt = getopt(argc, argv, "hi:p:m:b:f:v:os:")) != -1) {
switch (opt) {
case 'i':
- p.iterations = strtol(optarg, NULL, 10);
+ p.iterations = atoi(optarg);
break;
case 'p':
p.phys_offset = strtoull(optarg, NULL, 0);
@@ -295,6 +310,11 @@ int main(int argc, char *argv[])
TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus,
"Invalid number of vcpus, must be between 1 and %d", max_vcpus);
break;
+ case 'o':
+ p.partition_vcpu_memory_access = false;
+ case 's':
+ p.backing_src = parse_backing_src_type(optarg);
+ break;
case 'h':
default:
help(argv[0]);
diff --git a/tools/testing/selftests/kvm/hardware_disable_test.c b/tools/testing/selftests/kvm/hardware_disable_test.c
new file mode 100644
index 000000000000..2f2eeb8a1d86
--- /dev/null
+++ b/tools/testing/selftests/kvm/hardware_disable_test.c
@@ -0,0 +1,165 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * This test is intended to reproduce a crash that happens when
+ * kvm_arch_hardware_disable is called and it attempts to unregister the user
+ * return notifiers.
+ */
+
+#define _GNU_SOURCE
+
+#include <fcntl.h>
+#include <pthread.h>
+#include <semaphore.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/wait.h>
+
+#include <test_util.h>
+
+#include "kvm_util.h"
+
+#define VCPU_NUM 4
+#define SLEEPING_THREAD_NUM (1 << 4)
+#define FORK_NUM (1ULL << 9)
+#define DELAY_US_MAX 2000
+#define GUEST_CODE_PIO_PORT 4
+
+sem_t *sem;
+
+/* Arguments for the pthreads */
+struct payload {
+ struct kvm_vm *vm;
+ uint32_t index;
+};
+
+static void guest_code(void)
+{
+ for (;;)
+ ; /* Some busy work */
+ printf("Should not be reached.\n");
+}
+
+static void *run_vcpu(void *arg)
+{
+ struct payload *payload = (struct payload *)arg;
+ struct kvm_run *state = vcpu_state(payload->vm, payload->index);
+
+ vcpu_run(payload->vm, payload->index);
+
+ TEST_ASSERT(false, "%s: exited with reason %d: %s\n",
+ __func__, state->exit_reason,
+ exit_reason_str(state->exit_reason));
+ pthread_exit(NULL);
+}
+
+static void *sleeping_thread(void *arg)
+{
+ int fd;
+
+ while (true) {
+ fd = open("/dev/null", O_RDWR);
+ close(fd);
+ }
+ TEST_ASSERT(false, "%s: exited\n", __func__);
+ pthread_exit(NULL);
+}
+
+static inline void check_create_thread(pthread_t *thread, pthread_attr_t *attr,
+ void *(*f)(void *), void *arg)
+{
+ int r;
+
+ r = pthread_create(thread, attr, f, arg);
+ TEST_ASSERT(r == 0, "%s: failed to create thread", __func__);
+}
+
+static inline void check_set_affinity(pthread_t thread, cpu_set_t *cpu_set)
+{
+ int r;
+
+ r = pthread_setaffinity_np(thread, sizeof(cpu_set_t), cpu_set);
+ TEST_ASSERT(r == 0, "%s: failed set affinity", __func__);
+}
+
+static inline void check_join(pthread_t thread, void **retval)
+{
+ int r;
+
+ r = pthread_join(thread, retval);
+ TEST_ASSERT(r == 0, "%s: failed to join thread", __func__);
+}
+
+static void run_test(uint32_t run)
+{
+ struct kvm_vm *vm;
+ cpu_set_t cpu_set;
+ pthread_t threads[VCPU_NUM];
+ pthread_t throw_away;
+ struct payload payloads[VCPU_NUM];
+ void *b;
+ uint32_t i, j;
+
+ CPU_ZERO(&cpu_set);
+ for (i = 0; i < VCPU_NUM; i++)
+ CPU_SET(i, &cpu_set);
+
+ vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
+ kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+ vm_create_irqchip(vm);
+
+ fprintf(stderr, "%s: [%d] start vcpus\n", __func__, run);
+ for (i = 0; i < VCPU_NUM; ++i) {
+ vm_vcpu_add_default(vm, i, guest_code);
+ payloads[i].vm = vm;
+ payloads[i].index = i;
+
+ check_create_thread(&threads[i], NULL, run_vcpu,
+ (void *)&payloads[i]);
+ check_set_affinity(threads[i], &cpu_set);
+
+ for (j = 0; j < SLEEPING_THREAD_NUM; ++j) {
+ check_create_thread(&throw_away, NULL, sleeping_thread,
+ (void *)NULL);
+ check_set_affinity(throw_away, &cpu_set);
+ }
+ }
+ fprintf(stderr, "%s: [%d] all threads launched\n", __func__, run);
+ sem_post(sem);
+ for (i = 0; i < VCPU_NUM; ++i)
+ check_join(threads[i], &b);
+ /* Should not be reached */
+ TEST_ASSERT(false, "%s: [%d] child escaped the ninja\n", __func__, run);
+}
+
+int main(int argc, char **argv)
+{
+ uint32_t i;
+ int s, r;
+ pid_t pid;
+
+ sem = sem_open("vm_sem", O_CREAT | O_EXCL, 0644, 0);
+ sem_unlink("vm_sem");
+
+ for (i = 0; i < FORK_NUM; ++i) {
+ pid = fork();
+ TEST_ASSERT(pid >= 0, "%s: unable to fork", __func__);
+ if (pid == 0)
+ run_test(i); /* This function always exits */
+
+ fprintf(stderr, "%s: [%d] waiting semaphore\n", __func__, i);
+ sem_wait(sem);
+ r = (rand() % DELAY_US_MAX) + 1;
+ fprintf(stderr, "%s: [%d] waiting %dus\n", __func__, i, r);
+ usleep(r);
+ r = waitpid(pid, &s, WNOHANG);
+ TEST_ASSERT(r != pid,
+ "%s: [%d] child exited unexpectedly status: [%d]",
+ __func__, i, s);
+ fprintf(stderr, "%s: [%d] killing child\n", __func__, i);
+ kill(pid, SIGKILL);
+ }
+
+ sem_destroy(sem);
+ exit(0);
+}
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 5cbb861525ed..0f4258eaa629 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -16,6 +16,7 @@
#include "sparsebit.h"
+#define KVM_DEV_PATH "/dev/kvm"
#define KVM_MAX_VCPUS 512
/*
@@ -79,12 +80,6 @@ struct vm_guest_mode_params {
};
extern const struct vm_guest_mode_params vm_guest_mode_params[];
-enum vm_mem_backing_src_type {
- VM_MEM_SRC_ANONYMOUS,
- VM_MEM_SRC_ANONYMOUS_THP,
- VM_MEM_SRC_ANONYMOUS_HUGETLB,
-};
-
int kvm_check_cap(long cap);
int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap);
int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id,
@@ -139,6 +134,7 @@ void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl,
int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl,
void *arg);
void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
+int _vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg);
void kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
int _kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
diff --git a/tools/testing/selftests/kvm/include/numaif.h b/tools/testing/selftests/kvm/include/numaif.h
new file mode 100644
index 000000000000..b020547403fd
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/numaif.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * tools/testing/selftests/kvm/include/numaif.h
+ *
+ * Copyright (C) 2020, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Header file that provides access to NUMA API functions not explicitly
+ * exported to user space.
+ */
+
+#ifndef SELFTEST_KVM_NUMAIF_H
+#define SELFTEST_KVM_NUMAIF_H
+
+#define __NR_get_mempolicy 239
+#define __NR_migrate_pages 256
+
+/* System calls */
+long get_mempolicy(int *policy, const unsigned long *nmask,
+ unsigned long maxnode, void *addr, int flags)
+{
+ return syscall(__NR_get_mempolicy, policy, nmask,
+ maxnode, addr, flags);
+}
+
+long migrate_pages(int pid, unsigned long maxnode,
+ const unsigned long *frommask,
+ const unsigned long *tomask)
+{
+ return syscall(__NR_migrate_pages, pid, maxnode, frommask, tomask);
+}
+
+/* Policies */
+#define MPOL_DEFAULT 0
+#define MPOL_PREFERRED 1
+#define MPOL_BIND 2
+#define MPOL_INTERLEAVE 3
+
+#define MPOL_MAX MPOL_INTERLEAVE
+
+/* Flags for get_mem_policy */
+#define MPOL_F_NODE (1<<0) /* return next il node or node of address */
+ /* Warning: MPOL_F_NODE is unsupported and
+ * subject to change. Don't use.
+ */
+#define MPOL_F_ADDR (1<<1) /* look up vma using address */
+#define MPOL_F_MEMS_ALLOWED (1<<2) /* query nodes allowed in cpuset */
+
+/* Flags for mbind */
+#define MPOL_MF_STRICT (1<<0) /* Verify existing pages in the mapping */
+#define MPOL_MF_MOVE (1<<1) /* Move pages owned by this process to conform to mapping */
+#define MPOL_MF_MOVE_ALL (1<<2) /* Move every page to conform to mapping */
+
+#endif /* SELFTEST_KVM_NUMAIF_H */
diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h
index b1188823c31b..005f2143adeb 100644
--- a/tools/testing/selftests/kvm/include/perf_test_util.h
+++ b/tools/testing/selftests/kvm/include/perf_test_util.h
@@ -44,8 +44,11 @@ extern struct perf_test_args perf_test_args;
extern uint64_t guest_test_phys_mem;
struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
- uint64_t vcpu_memory_bytes);
+ uint64_t vcpu_memory_bytes,
+ enum vm_mem_backing_src_type backing_src);
void perf_test_destroy_vm(struct kvm_vm *vm);
-void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus, uint64_t vcpu_memory_bytes);
+void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus,
+ uint64_t vcpu_memory_bytes,
+ bool partition_vcpu_memory_access);
#endif /* SELFTEST_KVM_PERF_TEST_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index ffffa560436b..b7f41399f22c 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -64,7 +64,21 @@ int64_t timespec_to_ns(struct timespec ts);
struct timespec timespec_add_ns(struct timespec ts, int64_t ns);
struct timespec timespec_add(struct timespec ts1, struct timespec ts2);
struct timespec timespec_sub(struct timespec ts1, struct timespec ts2);
-struct timespec timespec_diff_now(struct timespec start);
+struct timespec timespec_elapsed(struct timespec start);
struct timespec timespec_div(struct timespec ts, int divisor);
+enum vm_mem_backing_src_type {
+ VM_MEM_SRC_ANONYMOUS,
+ VM_MEM_SRC_ANONYMOUS_THP,
+ VM_MEM_SRC_ANONYMOUS_HUGETLB,
+};
+
+struct vm_mem_backing_src_alias {
+ const char *name;
+ enum vm_mem_backing_src_type type;
+};
+
+void backing_src_help(void);
+enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name);
+
#endif /* SELFTEST_KVM_TEST_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 90cd5984751b..0b30b4e15c38 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -263,6 +263,19 @@ static inline void outl(uint16_t port, uint32_t value)
__asm__ __volatile__("outl %%eax, %%dx" : : "d"(port), "a"(value));
}
+static inline void cpuid(uint32_t *eax, uint32_t *ebx,
+ uint32_t *ecx, uint32_t *edx)
+{
+ /* ecx is often an input as well as an output. */
+ asm volatile("cpuid"
+ : "=a" (*eax),
+ "=b" (*ebx),
+ "=c" (*ecx),
+ "=d" (*edx)
+ : "0" (*eax), "2" (*ecx)
+ : "memory");
+}
+
#define SET_XMM(__var, __xmm) \
asm volatile("movq %0, %%"#__xmm : : "r"(__var) : #__xmm)
@@ -338,8 +351,10 @@ void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_x86_state *state);
struct kvm_msr_list *kvm_get_msr_index_list(void);
-
+uint64_t kvm_get_feature_msr(uint64_t msr_index);
struct kvm_cpuid2 *kvm_get_supported_cpuid(void);
+
+struct kvm_cpuid2 *vcpu_get_cpuid(struct kvm_vm *vm, uint32_t vcpuid);
void vcpu_set_cpuid(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_cpuid2 *cpuid);
@@ -391,6 +406,10 @@ bool set_cpuid(struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 *ent);
uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
uint64_t a3);
+struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void);
+void vcpu_set_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid);
+struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid);
+
/*
* Basic CPU control in CR0
*/
@@ -406,8 +425,27 @@ uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
#define X86_CR0_CD (1UL<<30) /* Cache Disable */
#define X86_CR0_PG (1UL<<31) /* Paging */
+#define APIC_DEFAULT_GPA 0xfee00000ULL
+
+/* APIC base address MSR and fields */
+#define MSR_IA32_APICBASE 0x0000001b
+#define MSR_IA32_APICBASE_BSP (1<<8)
+#define MSR_IA32_APICBASE_EXTD (1<<10)
+#define MSR_IA32_APICBASE_ENABLE (1<<11)
+#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
+#define GET_APIC_BASE(x) (((x) >> 12) << 12)
+
#define APIC_BASE_MSR 0x800
#define X2APIC_ENABLE (1UL << 10)
+#define APIC_ID 0x20
+#define APIC_LVR 0x30
+#define GET_APIC_ID_FIELD(x) (((x) >> 24) & 0xFF)
+#define APIC_TASKPRI 0x80
+#define APIC_PROCPRI 0xA0
+#define APIC_EOI 0xB0
+#define APIC_SPIV 0xF0
+#define APIC_SPIV_FOCUS_DISABLED (1 << 9)
+#define APIC_SPIV_APIC_ENABLED (1 << 8)
#define APIC_ICR 0x300
#define APIC_DEST_SELF 0x40000
#define APIC_DEST_ALLINC 0x80000
@@ -432,6 +470,7 @@ uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
#define APIC_DM_EXTINT 0x00700
#define APIC_VECTOR_MASK 0x000FF
#define APIC_ICR2 0x310
+#define SET_APIC_DEST_FIELD(x) ((x) << 24)
/* VMX_EPT_VPID_CAP bits */
#define VMX_EPT_VPID_CAP_AD_BITS (1ULL << 21)
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index fa5a90e6c6f0..b8849a1aca79 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -21,6 +21,8 @@
#define KVM_UTIL_PGS_PER_HUGEPG 512
#define KVM_UTIL_MIN_PFN 2
+static int vcpu_mmap_sz(void);
+
/* Aligns x up to the next multiple of size. Size must be a power of 2. */
static void *align(void *x, size_t size)
{
@@ -509,7 +511,7 @@ static void vm_vcpu_rm(struct kvm_vm *vm, struct vcpu *vcpu)
vcpu->dirty_gfns = NULL;
}
- ret = munmap(vcpu->state, sizeof(*vcpu->state));
+ ret = munmap(vcpu->state, vcpu_mmap_sz());
TEST_ASSERT(ret == 0, "munmap of VCPU fd failed, rc: %i "
"errno: %i", ret, errno);
close(vcpu->fd);
@@ -978,7 +980,7 @@ void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid)
TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->state), "vcpu mmap size "
"smaller than expected, vcpu_mmap_sz: %i expected_min: %zi",
vcpu_mmap_sz(), sizeof(*vcpu->state));
- vcpu->state = (struct kvm_run *) mmap(NULL, sizeof(*vcpu->state),
+ vcpu->state = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(),
PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0);
TEST_ASSERT(vcpu->state != MAP_FAILED, "mmap vcpu_state failed, "
"vcpu id: %u errno: %i", vcpuid, errno);
@@ -1695,11 +1697,16 @@ void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
{
int ret;
- ret = ioctl(vm->fd, cmd, arg);
+ ret = _vm_ioctl(vm, cmd, arg);
TEST_ASSERT(ret == 0, "vm ioctl %lu failed, rc: %i errno: %i (%s)",
cmd, ret, errno, strerror(errno));
}
+int _vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
+{
+ return ioctl(vm->fd, cmd, arg);
+}
+
/*
* KVM system ioctl
*
@@ -1801,6 +1808,7 @@ static struct exit_reason {
{KVM_EXIT_DIRTY_RING_FULL, "DIRTY_RING_FULL"},
{KVM_EXIT_X86_RDMSR, "RDMSR"},
{KVM_EXIT_X86_WRMSR, "WRMSR"},
+ {KVM_EXIT_XEN, "XEN"},
#ifdef KVM_EXIT_MEMORY_NOT_PRESENT
{KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"},
#endif
diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
index 34465dc562d8..91ce1b5d480b 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h
+++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
@@ -10,8 +10,6 @@
#include "sparsebit.h"
-#define KVM_DEV_PATH "/dev/kvm"
-
struct userspace_mem_region {
struct kvm_userspace_memory_region region;
struct sparsebit *unused_phy_pages;
diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c
index 9be1944c2d1c..81490b9b4e32 100644
--- a/tools/testing/selftests/kvm/lib/perf_test_util.c
+++ b/tools/testing/selftests/kvm/lib/perf_test_util.c
@@ -49,7 +49,8 @@ static void guest_code(uint32_t vcpu_id)
}
struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
- uint64_t vcpu_memory_bytes)
+ uint64_t vcpu_memory_bytes,
+ enum vm_mem_backing_src_type backing_src)
{
struct kvm_vm *vm;
uint64_t guest_num_pages;
@@ -93,8 +94,7 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
/* Add an extra memory slot for testing */
- vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
- guest_test_phys_mem,
+ vm_userspace_mem_region_add(vm, backing_src, guest_test_phys_mem,
PERF_TEST_MEM_SLOT_INDEX,
guest_num_pages, 0);
@@ -112,7 +112,9 @@ void perf_test_destroy_vm(struct kvm_vm *vm)
kvm_vm_free(vm);
}
-void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus, uint64_t vcpu_memory_bytes)
+void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus,
+ uint64_t vcpu_memory_bytes,
+ bool partition_vcpu_memory_access)
{
vm_paddr_t vcpu_gpa;
struct perf_test_vcpu_args *vcpu_args;
@@ -122,13 +124,22 @@ void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus, uint64_t vcpu_memory_by
vcpu_args = &perf_test_args.vcpu_args[vcpu_id];
vcpu_args->vcpu_id = vcpu_id;
- vcpu_args->gva = guest_test_virt_mem +
- (vcpu_id * vcpu_memory_bytes);
- vcpu_args->pages = vcpu_memory_bytes /
- perf_test_args.guest_page_size;
+ if (partition_vcpu_memory_access) {
+ vcpu_args->gva = guest_test_virt_mem +
+ (vcpu_id * vcpu_memory_bytes);
+ vcpu_args->pages = vcpu_memory_bytes /
+ perf_test_args.guest_page_size;
+ vcpu_gpa = guest_test_phys_mem +
+ (vcpu_id * vcpu_memory_bytes);
+ } else {
+ vcpu_args->gva = guest_test_virt_mem;
+ vcpu_args->pages = (vcpus * vcpu_memory_bytes) /
+ perf_test_args.guest_page_size;
+ vcpu_gpa = guest_test_phys_mem;
+ }
- vcpu_gpa = guest_test_phys_mem + (vcpu_id * vcpu_memory_bytes);
pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n",
- vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_memory_bytes);
+ vcpu_id, vcpu_gpa, vcpu_gpa +
+ (vcpu_args->pages * perf_test_args.guest_page_size));
}
}
diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c
index 8e04c0b1608e..906c955384e2 100644
--- a/tools/testing/selftests/kvm/lib/test_util.c
+++ b/tools/testing/selftests/kvm/lib/test_util.c
@@ -10,6 +10,7 @@
#include <limits.h>
#include <stdlib.h>
#include <time.h>
+#include "linux/kernel.h"
#include "test_util.h"
@@ -84,7 +85,7 @@ struct timespec timespec_sub(struct timespec ts1, struct timespec ts2)
return timespec_add_ns((struct timespec){0}, ns1 - ns2);
}
-struct timespec timespec_diff_now(struct timespec start)
+struct timespec timespec_elapsed(struct timespec start)
{
struct timespec end;
@@ -109,3 +110,31 @@ void print_skip(const char *fmt, ...)
va_end(ap);
puts(", skipping test");
}
+
+const struct vm_mem_backing_src_alias backing_src_aliases[] = {
+ {"anonymous", VM_MEM_SRC_ANONYMOUS,},
+ {"anonymous_thp", VM_MEM_SRC_ANONYMOUS_THP,},
+ {"anonymous_hugetlb", VM_MEM_SRC_ANONYMOUS_HUGETLB,},
+};
+
+void backing_src_help(void)
+{
+ int i;
+
+ printf("Available backing src types:\n");
+ for (i = 0; i < ARRAY_SIZE(backing_src_aliases); i++)
+ printf("\t%s\n", backing_src_aliases[i].name);
+}
+
+enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(backing_src_aliases); i++)
+ if (!strcmp(type_name, backing_src_aliases[i].name))
+ return backing_src_aliases[i].type;
+
+ backing_src_help();
+ TEST_FAIL("Unknown backing src type: %s", type_name);
+ return -1;
+}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index 95e1a757c629..a8906e60a108 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -670,6 +670,83 @@ struct kvm_cpuid2 *kvm_get_supported_cpuid(void)
}
/*
+ * KVM Get MSR
+ *
+ * Input Args:
+ * msr_index - Index of MSR
+ *
+ * Output Args: None
+ *
+ * Return: On success, value of the MSR. On failure a TEST_ASSERT is produced.
+ *
+ * Get value of MSR for VCPU.
+ */
+uint64_t kvm_get_feature_msr(uint64_t msr_index)
+{
+ struct {
+ struct kvm_msrs header;
+ struct kvm_msr_entry entry;
+ } buffer = {};
+ int r, kvm_fd;
+
+ buffer.header.nmsrs = 1;
+ buffer.entry.index = msr_index;
+ kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
+ if (kvm_fd < 0)
+ exit(KSFT_SKIP);
+
+ r = ioctl(kvm_fd, KVM_GET_MSRS, &buffer.header);
+ TEST_ASSERT(r == 1, "KVM_GET_MSRS IOCTL failed,\n"
+ " rc: %i errno: %i", r, errno);
+
+ close(kvm_fd);
+ return buffer.entry.data;
+}
+
+/*
+ * VM VCPU CPUID Set
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU id
+ *
+ * Output Args: None
+ *
+ * Return: KVM CPUID (KVM_GET_CPUID2)
+ *
+ * Set the VCPU's CPUID.
+ */
+struct kvm_cpuid2 *vcpu_get_cpuid(struct kvm_vm *vm, uint32_t vcpuid)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ struct kvm_cpuid2 *cpuid;
+ int max_ent;
+ int rc = -1;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ cpuid = allocate_kvm_cpuid2();
+ max_ent = cpuid->nent;
+
+ for (cpuid->nent = 1; cpuid->nent <= max_ent; cpuid->nent++) {
+ rc = ioctl(vcpu->fd, KVM_GET_CPUID2, cpuid);
+ if (!rc)
+ break;
+
+ TEST_ASSERT(rc == -1 && errno == E2BIG,
+ "KVM_GET_CPUID2 should either succeed or give E2BIG: %d %d",
+ rc, errno);
+ }
+
+ TEST_ASSERT(rc == 0, "KVM_GET_CPUID2 failed, rc: %i errno: %i",
+ rc, errno);
+
+ return cpuid;
+}
+
+
+
+/*
* Locate a cpuid entry.
*
* Input Args:
@@ -1224,3 +1301,71 @@ uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
: "b"(a0), "c"(a1), "d"(a2), "S"(a3));
return r;
}
+
+struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void)
+{
+ static struct kvm_cpuid2 *cpuid;
+ int ret;
+ int kvm_fd;
+
+ if (cpuid)
+ return cpuid;
+
+ cpuid = allocate_kvm_cpuid2();
+ kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
+ if (kvm_fd < 0)
+ exit(KSFT_SKIP);
+
+ ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
+ TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_HV_CPUID failed %d %d\n",
+ ret, errno);
+
+ close(kvm_fd);
+ return cpuid;
+}
+
+void vcpu_set_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid)
+{
+ static struct kvm_cpuid2 *cpuid_full;
+ struct kvm_cpuid2 *cpuid_sys, *cpuid_hv;
+ int i, nent = 0;
+
+ if (!cpuid_full) {
+ cpuid_sys = kvm_get_supported_cpuid();
+ cpuid_hv = kvm_get_supported_hv_cpuid();
+
+ cpuid_full = malloc(sizeof(*cpuid_full) +
+ (cpuid_sys->nent + cpuid_hv->nent) *
+ sizeof(struct kvm_cpuid_entry2));
+ if (!cpuid_full) {
+ perror("malloc");
+ abort();
+ }
+
+ /* Need to skip KVM CPUID leaves 0x400000xx */
+ for (i = 0; i < cpuid_sys->nent; i++) {
+ if (cpuid_sys->entries[i].function >= 0x40000000 &&
+ cpuid_sys->entries[i].function < 0x40000100)
+ continue;
+ cpuid_full->entries[nent] = cpuid_sys->entries[i];
+ nent++;
+ }
+
+ memcpy(&cpuid_full->entries[nent], cpuid_hv->entries,
+ cpuid_hv->nent * sizeof(struct kvm_cpuid_entry2));
+ cpuid_full->nent = nent + cpuid_hv->nent;
+ }
+
+ vcpu_set_cpuid(vm, vcpuid, cpuid_full);
+}
+
+struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid)
+{
+ static struct kvm_cpuid2 *cpuid;
+
+ cpuid = allocate_kvm_cpuid2();
+
+ vcpu_ioctl(vm, vcpuid, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
+
+ return cpuid;
+}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/svm.c b/tools/testing/selftests/kvm/lib/x86_64/svm.c
index 3a5c72ed2b79..827fe6028dd4 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/svm.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/svm.c
@@ -74,7 +74,7 @@ void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_r
wrmsr(MSR_VM_HSAVE_PA, svm->save_area_gpa);
memset(vmcb, 0, sizeof(*vmcb));
- asm volatile ("vmsave\n\t" : : "a" (vmcb_gpa) : "memory");
+ asm volatile ("vmsave %0\n\t" : : "a" (vmcb_gpa) : "memory");
vmcb_set_seg(&save->es, get_es(), 0, -1U, data_seg_attr);
vmcb_set_seg(&save->cs, get_cs(), 0, -1U, code_seg_attr);
vmcb_set_seg(&save->ss, get_ss(), 0, -1U, data_seg_attr);
@@ -131,19 +131,19 @@ void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_r
void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa)
{
asm volatile (
- "vmload\n\t"
+ "vmload %[vmcb_gpa]\n\t"
"mov rflags, %%r15\n\t" // rflags
"mov %%r15, 0x170(%[vmcb])\n\t"
"mov guest_regs, %%r15\n\t" // rax
"mov %%r15, 0x1f8(%[vmcb])\n\t"
LOAD_GPR_C
- "vmrun\n\t"
+ "vmrun %[vmcb_gpa]\n\t"
SAVE_GPR_C
"mov 0x170(%[vmcb]), %%r15\n\t" // rflags
"mov %%r15, rflags\n\t"
"mov 0x1f8(%[vmcb]), %%r15\n\t" // rax
"mov %%r15, guest_regs\n\t"
- "vmsave\n\t"
+ "vmsave %[vmcb_gpa]\n\t"
: : [vmcb] "r" (vmcb), [vmcb_gpa] "a" (vmcb_gpa)
: "r15", "memory");
}
diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
new file mode 100644
index 000000000000..6096bf0a5b34
--- /dev/null
+++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
@@ -0,0 +1,212 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM memslot modification stress test
+ * Adapted from demand_paging_test.c
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ * Copyright (C) 2020, Google, Inc.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_name */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <asm/unistd.h>
+#include <time.h>
+#include <poll.h>
+#include <pthread.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+#include <linux/userfaultfd.h>
+
+#include "perf_test_util.h"
+#include "processor.h"
+#include "test_util.h"
+#include "guest_modes.h"
+
+#define DUMMY_MEMSLOT_INDEX 7
+
+#define DEFAULT_MEMSLOT_MODIFICATION_ITERATIONS 10
+
+
+static int nr_vcpus = 1;
+static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
+
+static bool run_vcpus = true;
+
+static void *vcpu_worker(void *data)
+{
+ int ret;
+ struct perf_test_vcpu_args *vcpu_args =
+ (struct perf_test_vcpu_args *)data;
+ int vcpu_id = vcpu_args->vcpu_id;
+ struct kvm_vm *vm = perf_test_args.vm;
+ struct kvm_run *run;
+
+ vcpu_args_set(vm, vcpu_id, 1, vcpu_id);
+ run = vcpu_state(vm, vcpu_id);
+
+ /* Let the guest access its memory until a stop signal is received */
+ while (READ_ONCE(run_vcpus)) {
+ ret = _vcpu_run(vm, vcpu_id);
+ TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret);
+
+ if (get_ucall(vm, vcpu_id, NULL) == UCALL_SYNC)
+ continue;
+
+ TEST_ASSERT(false,
+ "Invalid guest sync status: exit_reason=%s\n",
+ exit_reason_str(run->exit_reason));
+ }
+
+ return NULL;
+}
+
+struct memslot_antagonist_args {
+ struct kvm_vm *vm;
+ useconds_t delay;
+ uint64_t nr_modifications;
+};
+
+static void add_remove_memslot(struct kvm_vm *vm, useconds_t delay,
+ uint64_t nr_modifications, uint64_t gpa)
+{
+ int i;
+
+ for (i = 0; i < nr_modifications; i++) {
+ usleep(delay);
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, gpa,
+ DUMMY_MEMSLOT_INDEX, 1, 0);
+
+ vm_mem_region_delete(vm, DUMMY_MEMSLOT_INDEX);
+ }
+}
+
+struct test_params {
+ useconds_t memslot_modification_delay;
+ uint64_t nr_memslot_modifications;
+ bool partition_vcpu_memory_access;
+};
+
+static void run_test(enum vm_guest_mode mode, void *arg)
+{
+ struct test_params *p = arg;
+ pthread_t *vcpu_threads;
+ struct kvm_vm *vm;
+ int vcpu_id;
+
+ vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size,
+ VM_MEM_SRC_ANONYMOUS);
+
+ perf_test_args.wr_fract = 1;
+
+ vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads));
+ TEST_ASSERT(vcpu_threads, "Memory allocation failed");
+
+ perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size,
+ p->partition_vcpu_memory_access);
+
+ /* Export the shared variables to the guest */
+ sync_global_to_guest(vm, perf_test_args);
+
+ pr_info("Finished creating vCPUs\n");
+
+ for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++)
+ pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker,
+ &perf_test_args.vcpu_args[vcpu_id]);
+
+ pr_info("Started all vCPUs\n");
+
+ add_remove_memslot(vm, p->memslot_modification_delay,
+ p->nr_memslot_modifications,
+ guest_test_phys_mem +
+ (guest_percpu_mem_size * nr_vcpus) +
+ perf_test_args.host_page_size +
+ perf_test_args.guest_page_size);
+
+ run_vcpus = false;
+
+ /* Wait for the vcpu threads to quit */
+ for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++)
+ pthread_join(vcpu_threads[vcpu_id], NULL);
+
+ pr_info("All vCPU threads joined\n");
+
+ ucall_uninit(vm);
+ kvm_vm_free(vm);
+
+ free(vcpu_threads);
+}
+
+static void help(char *name)
+{
+ puts("");
+ printf("usage: %s [-h] [-m mode] [-d delay_usec]\n"
+ " [-b memory] [-v vcpus] [-o] [-i iterations]\n", name);
+ guest_modes_help();
+ printf(" -d: add a delay between each iteration of adding and\n"
+ " deleting a memslot in usec.\n");
+ printf(" -b: specify the size of the memory region which should be\n"
+ " accessed by each vCPU. e.g. 10M or 3G.\n"
+ " Default: 1G\n");
+ printf(" -v: specify the number of vCPUs to run.\n");
+ printf(" -o: Overlap guest memory accesses instead of partitioning\n"
+ " them into a separate region of memory for each vCPU.\n");
+ printf(" -i: specify the number of iterations of adding and removing\n"
+ " a memslot.\n"
+ " Default: %d\n", DEFAULT_MEMSLOT_MODIFICATION_ITERATIONS);
+ puts("");
+ exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+ int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
+ int opt;
+ struct test_params p = {
+ .memslot_modification_delay = 0,
+ .nr_memslot_modifications =
+ DEFAULT_MEMSLOT_MODIFICATION_ITERATIONS,
+ .partition_vcpu_memory_access = true
+ };
+
+ guest_modes_append_default();
+
+ while ((opt = getopt(argc, argv, "hm:d:b:v:oi:")) != -1) {
+ switch (opt) {
+ case 'm':
+ guest_modes_cmdline(optarg);
+ break;
+ case 'd':
+ p.memslot_modification_delay = strtoul(optarg, NULL, 0);
+ TEST_ASSERT(p.memslot_modification_delay >= 0,
+ "A negative delay is not supported.");
+ break;
+ case 'b':
+ guest_percpu_mem_size = parse_size(optarg);
+ break;
+ case 'v':
+ nr_vcpus = atoi(optarg);
+ TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus,
+ "Invalid number of vcpus, must be between 1 and %d",
+ max_vcpus);
+ break;
+ case 'o':
+ p.partition_vcpu_memory_access = false;
+ break;
+ case 'i':
+ p.nr_memslot_modifications = atoi(optarg);
+ break;
+ case 'h':
+ default:
+ help(argv[0]);
+ break;
+ }
+ }
+
+ for_each_guest_mode(run_test, &p);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/settings b/tools/testing/selftests/kvm/settings
new file mode 100644
index 000000000000..6091b45d226b
--- /dev/null
+++ b/tools/testing/selftests/kvm/settings
@@ -0,0 +1 @@
+timeout=120
diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
index 37b8a78f6b74..ca22ee6d19cb 100644
--- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
@@ -99,6 +99,7 @@ int main(int argc, char *argv[])
exit(KSFT_SKIP);
}
+ vcpu_set_hv_cpuid(vm, VCPU_ID);
vcpu_enable_evmcs(vm, VCPU_ID);
run = vcpu_state(vm, VCPU_ID);
@@ -142,7 +143,7 @@ int main(int argc, char *argv[])
/* Restore state in a new VM. */
kvm_vm_restart(vm, O_RDWR);
vm_vcpu_add(vm, VCPU_ID);
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+ vcpu_set_hv_cpuid(vm, VCPU_ID);
vcpu_enable_evmcs(vm, VCPU_ID);
vcpu_load_state(vm, VCPU_ID, state);
run = vcpu_state(vm, VCPU_ID);
diff --git a/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c b/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c
new file mode 100644
index 000000000000..9b78e8889638
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c
@@ -0,0 +1,175 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021, Red Hat Inc.
+ *
+ * Generic tests for KVM CPUID set/get ioctls
+ */
+#include <asm/kvm_para.h>
+#include <linux/kvm_para.h>
+#include <stdint.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#define VCPU_ID 0
+
+/* CPUIDs known to differ */
+struct {
+ u32 function;
+ u32 index;
+} mangled_cpuids[] = {
+ {.function = 0xd, .index = 0},
+};
+
+static void test_guest_cpuids(struct kvm_cpuid2 *guest_cpuid)
+{
+ int i;
+ u32 eax, ebx, ecx, edx;
+
+ for (i = 0; i < guest_cpuid->nent; i++) {
+ eax = guest_cpuid->entries[i].function;
+ ecx = guest_cpuid->entries[i].index;
+
+ cpuid(&eax, &ebx, &ecx, &edx);
+
+ GUEST_ASSERT(eax == guest_cpuid->entries[i].eax &&
+ ebx == guest_cpuid->entries[i].ebx &&
+ ecx == guest_cpuid->entries[i].ecx &&
+ edx == guest_cpuid->entries[i].edx);
+ }
+
+}
+
+static void test_cpuid_40000000(struct kvm_cpuid2 *guest_cpuid)
+{
+ u32 eax = 0x40000000, ebx, ecx = 0, edx;
+
+ cpuid(&eax, &ebx, &ecx, &edx);
+
+ GUEST_ASSERT(eax == 0x40000001);
+}
+
+static void guest_main(struct kvm_cpuid2 *guest_cpuid)
+{
+ GUEST_SYNC(1);
+
+ test_guest_cpuids(guest_cpuid);
+
+ GUEST_SYNC(2);
+
+ test_cpuid_40000000(guest_cpuid);
+
+ GUEST_DONE();
+}
+
+static bool is_cpuid_mangled(struct kvm_cpuid_entry2 *entrie)
+{
+ int i;
+
+ for (i = 0; i < sizeof(mangled_cpuids); i++) {
+ if (mangled_cpuids[i].function == entrie->function &&
+ mangled_cpuids[i].index == entrie->index)
+ return true;
+ }
+
+ return false;
+}
+
+static void check_cpuid(struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 *entrie)
+{
+ int i;
+
+ for (i = 0; i < cpuid->nent; i++) {
+ if (cpuid->entries[i].function == entrie->function &&
+ cpuid->entries[i].index == entrie->index) {
+ if (is_cpuid_mangled(entrie))
+ return;
+
+ TEST_ASSERT(cpuid->entries[i].eax == entrie->eax &&
+ cpuid->entries[i].ebx == entrie->ebx &&
+ cpuid->entries[i].ecx == entrie->ecx &&
+ cpuid->entries[i].edx == entrie->edx,
+ "CPUID 0x%x.%x differ: 0x%x:0x%x:0x%x:0x%x vs 0x%x:0x%x:0x%x:0x%x",
+ entrie->function, entrie->index,
+ cpuid->entries[i].eax, cpuid->entries[i].ebx,
+ cpuid->entries[i].ecx, cpuid->entries[i].edx,
+ entrie->eax, entrie->ebx, entrie->ecx, entrie->edx);
+ return;
+ }
+ }
+
+ TEST_ASSERT(false, "CPUID 0x%x.%x not found", entrie->function, entrie->index);
+}
+
+static void compare_cpuids(struct kvm_cpuid2 *cpuid1, struct kvm_cpuid2 *cpuid2)
+{
+ int i;
+
+ for (i = 0; i < cpuid1->nent; i++)
+ check_cpuid(cpuid2, &cpuid1->entries[i]);
+
+ for (i = 0; i < cpuid2->nent; i++)
+ check_cpuid(cpuid1, &cpuid2->entries[i]);
+}
+
+static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid, int stage)
+{
+ struct ucall uc;
+
+ _vcpu_run(vm, vcpuid);
+
+ switch (get_ucall(vm, vcpuid, &uc)) {
+ case UCALL_SYNC:
+ TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+ uc.args[1] == stage + 1,
+ "Stage %d: Unexpected register values vmexit, got %lx",
+ stage + 1, (ulong)uc.args[1]);
+ return;
+ case UCALL_DONE:
+ return;
+ case UCALL_ABORT:
+ TEST_ASSERT(false, "%s at %s:%ld\n\tvalues: %#lx, %#lx", (const char *)uc.args[0],
+ __FILE__, uc.args[1], uc.args[2], uc.args[3]);
+ default:
+ TEST_ASSERT(false, "Unexpected exit: %s",
+ exit_reason_str(vcpu_state(vm, vcpuid)->exit_reason));
+ }
+}
+
+struct kvm_cpuid2 *vcpu_alloc_cpuid(struct kvm_vm *vm, vm_vaddr_t *p_gva, struct kvm_cpuid2 *cpuid)
+{
+ int size = sizeof(*cpuid) + cpuid->nent * sizeof(cpuid->entries[0]);
+ vm_vaddr_t gva = vm_vaddr_alloc(vm, size,
+ getpagesize(), 0, 0);
+ struct kvm_cpuid2 *guest_cpuids = addr_gva2hva(vm, gva);
+
+ memcpy(guest_cpuids, cpuid, size);
+
+ *p_gva = gva;
+ return guest_cpuids;
+}
+
+int main(void)
+{
+ struct kvm_cpuid2 *supp_cpuid, *cpuid2;
+ vm_vaddr_t cpuid_gva;
+ struct kvm_vm *vm;
+ int stage;
+
+ vm = vm_create_default(VCPU_ID, 0, guest_main);
+
+ supp_cpuid = kvm_get_supported_cpuid();
+ cpuid2 = vcpu_get_cpuid(vm, VCPU_ID);
+
+ compare_cpuids(supp_cpuid, cpuid2);
+
+ vcpu_alloc_cpuid(vm, &cpuid_gva, cpuid2);
+
+ vcpu_args_set(vm, VCPU_ID, 1, cpuid_gva);
+
+ for (stage = 0; stage < 3; stage++)
+ run_vcpu(vm, VCPU_ID, stage);
+
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c b/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c
new file mode 100644
index 000000000000..cb953df4d7d0
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test that KVM_GET_MSR_INDEX_LIST and
+ * KVM_GET_MSR_FEATURE_INDEX_LIST work as intended
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+static int kvm_num_index_msrs(int kvm_fd, int nmsrs)
+{
+ struct kvm_msr_list *list;
+ int r;
+
+ list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
+ list->nmsrs = nmsrs;
+ r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list);
+ TEST_ASSERT(r == -1 && errno == E2BIG,
+ "Unexpected result from KVM_GET_MSR_INDEX_LIST probe, r: %i",
+ r);
+
+ r = list->nmsrs;
+ free(list);
+ return r;
+}
+
+static void test_get_msr_index(void)
+{
+ int old_res, res, kvm_fd, r;
+ struct kvm_msr_list *list;
+
+ kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
+ if (kvm_fd < 0)
+ exit(KSFT_SKIP);
+
+ old_res = kvm_num_index_msrs(kvm_fd, 0);
+ TEST_ASSERT(old_res != 0, "Expecting nmsrs to be > 0");
+
+ if (old_res != 1) {
+ res = kvm_num_index_msrs(kvm_fd, 1);
+ TEST_ASSERT(res > 1, "Expecting nmsrs to be > 1");
+ TEST_ASSERT(res == old_res, "Expecting nmsrs to be identical");
+ }
+
+ list = malloc(sizeof(*list) + old_res * sizeof(list->indices[0]));
+ list->nmsrs = old_res;
+ r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list);
+
+ TEST_ASSERT(r == 0,
+ "Unexpected result from KVM_GET_MSR_FEATURE_INDEX_LIST, r: %i",
+ r);
+ TEST_ASSERT(list->nmsrs == old_res, "Expecting nmsrs to be identical");
+ free(list);
+
+ close(kvm_fd);
+}
+
+static int kvm_num_feature_msrs(int kvm_fd, int nmsrs)
+{
+ struct kvm_msr_list *list;
+ int r;
+
+ list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
+ list->nmsrs = nmsrs;
+ r = ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, list);
+ TEST_ASSERT(r == -1 && errno == E2BIG,
+ "Unexpected result from KVM_GET_MSR_FEATURE_INDEX_LIST probe, r: %i",
+ r);
+
+ r = list->nmsrs;
+ free(list);
+ return r;
+}
+
+struct kvm_msr_list *kvm_get_msr_feature_list(int kvm_fd, int nmsrs)
+{
+ struct kvm_msr_list *list;
+ int r;
+
+ list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
+ list->nmsrs = nmsrs;
+ r = ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, list);
+
+ TEST_ASSERT(r == 0,
+ "Unexpected result from KVM_GET_MSR_FEATURE_INDEX_LIST, r: %i",
+ r);
+
+ return list;
+}
+
+static void test_get_msr_feature(void)
+{
+ int res, old_res, i, kvm_fd;
+ struct kvm_msr_list *feature_list;
+
+ kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
+ if (kvm_fd < 0)
+ exit(KSFT_SKIP);
+
+ old_res = kvm_num_feature_msrs(kvm_fd, 0);
+ TEST_ASSERT(old_res != 0, "Expecting nmsrs to be > 0");
+
+ if (old_res != 1) {
+ res = kvm_num_feature_msrs(kvm_fd, 1);
+ TEST_ASSERT(res > 1, "Expecting nmsrs to be > 1");
+ TEST_ASSERT(res == old_res, "Expecting nmsrs to be identical");
+ }
+
+ feature_list = kvm_get_msr_feature_list(kvm_fd, old_res);
+ TEST_ASSERT(old_res == feature_list->nmsrs,
+ "Unmatching number of msr indexes");
+
+ for (i = 0; i < feature_list->nmsrs; i++)
+ kvm_get_feature_msr(feature_list->indices[i]);
+
+ free(feature_list);
+ close(kvm_fd);
+}
+
+int main(int argc, char *argv[])
+{
+ if (kvm_check_cap(KVM_CAP_GET_MSR_FEATURES))
+ test_get_msr_feature();
+
+ test_get_msr_index();
+}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
new file mode 100644
index 000000000000..ffbc4555c6e2
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
@@ -0,0 +1,260 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021, Red Hat, Inc.
+ *
+ * Tests for Hyper-V clocksources
+ */
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+struct ms_hyperv_tsc_page {
+ volatile u32 tsc_sequence;
+ u32 reserved1;
+ volatile u64 tsc_scale;
+ volatile s64 tsc_offset;
+} __packed;
+
+#define HV_X64_MSR_GUEST_OS_ID 0x40000000
+#define HV_X64_MSR_TIME_REF_COUNT 0x40000020
+#define HV_X64_MSR_REFERENCE_TSC 0x40000021
+#define HV_X64_MSR_TSC_FREQUENCY 0x40000022
+#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106
+#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107
+
+/* Simplified mul_u64_u64_shr() */
+static inline u64 mul_u64_u64_shr64(u64 a, u64 b)
+{
+ union {
+ u64 ll;
+ struct {
+ u32 low, high;
+ } l;
+ } rm, rn, rh, a0, b0;
+ u64 c;
+
+ a0.ll = a;
+ b0.ll = b;
+
+ rm.ll = (u64)a0.l.low * b0.l.high;
+ rn.ll = (u64)a0.l.high * b0.l.low;
+ rh.ll = (u64)a0.l.high * b0.l.high;
+
+ rh.l.low = c = rm.l.high + rn.l.high + rh.l.low;
+ rh.l.high = (c >> 32) + rh.l.high;
+
+ return rh.ll;
+}
+
+static inline void nop_loop(void)
+{
+ int i;
+
+ for (i = 0; i < 1000000; i++)
+ asm volatile("nop");
+}
+
+static inline void check_tsc_msr_rdtsc(void)
+{
+ u64 tsc_freq, r1, r2, t1, t2;
+ s64 delta_ns;
+
+ tsc_freq = rdmsr(HV_X64_MSR_TSC_FREQUENCY);
+ GUEST_ASSERT(tsc_freq > 0);
+
+ /* First, check MSR-based clocksource */
+ r1 = rdtsc();
+ t1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+ nop_loop();
+ r2 = rdtsc();
+ t2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+
+ GUEST_ASSERT(r2 > r1 && t2 > t1);
+
+ /* HV_X64_MSR_TIME_REF_COUNT is in 100ns */
+ delta_ns = ((t2 - t1) * 100) - ((r2 - r1) * 1000000000 / tsc_freq);
+ if (delta_ns < 0)
+ delta_ns = -delta_ns;
+
+ /* 1% tolerance */
+ GUEST_ASSERT(delta_ns * 100 < (t2 - t1) * 100);
+}
+
+static inline void check_tsc_msr_tsc_page(struct ms_hyperv_tsc_page *tsc_page)
+{
+ u64 r1, r2, t1, t2;
+
+ /* Compare TSC page clocksource with HV_X64_MSR_TIME_REF_COUNT */
+ t1 = mul_u64_u64_shr64(rdtsc(), tsc_page->tsc_scale) + tsc_page->tsc_offset;
+ r1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+
+ /* 10 ms tolerance */
+ GUEST_ASSERT(r1 >= t1 && r1 - t1 < 100000);
+ nop_loop();
+
+ t2 = mul_u64_u64_shr64(rdtsc(), tsc_page->tsc_scale) + tsc_page->tsc_offset;
+ r2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+ GUEST_ASSERT(r2 >= t1 && r2 - t2 < 100000);
+}
+
+static void guest_main(struct ms_hyperv_tsc_page *tsc_page, vm_paddr_t tsc_page_gpa)
+{
+ u64 tsc_scale, tsc_offset;
+
+ /* Set Guest OS id to enable Hyper-V emulation */
+ GUEST_SYNC(1);
+ wrmsr(HV_X64_MSR_GUEST_OS_ID, (u64)0x8100 << 48);
+ GUEST_SYNC(2);
+
+ check_tsc_msr_rdtsc();
+
+ GUEST_SYNC(3);
+
+ /* Set up TSC page is disabled state, check that it's clean */
+ wrmsr(HV_X64_MSR_REFERENCE_TSC, tsc_page_gpa);
+ GUEST_ASSERT(tsc_page->tsc_sequence == 0);
+ GUEST_ASSERT(tsc_page->tsc_scale == 0);
+ GUEST_ASSERT(tsc_page->tsc_offset == 0);
+
+ GUEST_SYNC(4);
+
+ /* Set up TSC page is enabled state */
+ wrmsr(HV_X64_MSR_REFERENCE_TSC, tsc_page_gpa | 0x1);
+ GUEST_ASSERT(tsc_page->tsc_sequence != 0);
+
+ GUEST_SYNC(5);
+
+ check_tsc_msr_tsc_page(tsc_page);
+
+ GUEST_SYNC(6);
+
+ tsc_offset = tsc_page->tsc_offset;
+ /* Call KVM_SET_CLOCK from userspace, check that TSC page was updated */
+ GUEST_SYNC(7);
+ GUEST_ASSERT(tsc_page->tsc_offset != tsc_offset);
+
+ nop_loop();
+
+ /*
+ * Enable Re-enlightenment and check that TSC page stays constant across
+ * KVM_SET_CLOCK.
+ */
+ wrmsr(HV_X64_MSR_REENLIGHTENMENT_CONTROL, 0x1 << 16 | 0xff);
+ wrmsr(HV_X64_MSR_TSC_EMULATION_CONTROL, 0x1);
+ tsc_offset = tsc_page->tsc_offset;
+ tsc_scale = tsc_page->tsc_scale;
+ GUEST_SYNC(8);
+ GUEST_ASSERT(tsc_page->tsc_offset == tsc_offset);
+ GUEST_ASSERT(tsc_page->tsc_scale == tsc_scale);
+
+ GUEST_SYNC(9);
+
+ check_tsc_msr_tsc_page(tsc_page);
+
+ /*
+ * Disable re-enlightenment and TSC page, check that KVM doesn't update
+ * it anymore.
+ */
+ wrmsr(HV_X64_MSR_REENLIGHTENMENT_CONTROL, 0);
+ wrmsr(HV_X64_MSR_TSC_EMULATION_CONTROL, 0);
+ wrmsr(HV_X64_MSR_REFERENCE_TSC, 0);
+ memset(tsc_page, 0, sizeof(*tsc_page));
+
+ GUEST_SYNC(10);
+ GUEST_ASSERT(tsc_page->tsc_sequence == 0);
+ GUEST_ASSERT(tsc_page->tsc_offset == 0);
+ GUEST_ASSERT(tsc_page->tsc_scale == 0);
+
+ GUEST_DONE();
+}
+
+#define VCPU_ID 0
+
+static void host_check_tsc_msr_rdtsc(struct kvm_vm *vm)
+{
+ u64 tsc_freq, r1, r2, t1, t2;
+ s64 delta_ns;
+
+ tsc_freq = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TSC_FREQUENCY);
+ TEST_ASSERT(tsc_freq > 0, "TSC frequency must be nonzero");
+
+ /* First, check MSR-based clocksource */
+ r1 = rdtsc();
+ t1 = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TIME_REF_COUNT);
+ nop_loop();
+ r2 = rdtsc();
+ t2 = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TIME_REF_COUNT);
+
+ TEST_ASSERT(t2 > t1, "Time reference MSR is not monotonic (%ld <= %ld)", t1, t2);
+
+ /* HV_X64_MSR_TIME_REF_COUNT is in 100ns */
+ delta_ns = ((t2 - t1) * 100) - ((r2 - r1) * 1000000000 / tsc_freq);
+ if (delta_ns < 0)
+ delta_ns = -delta_ns;
+
+ /* 1% tolerance */
+ TEST_ASSERT(delta_ns * 100 < (t2 - t1) * 100,
+ "Elapsed time does not match (MSR=%ld, TSC=%ld)",
+ (t2 - t1) * 100, (r2 - r1) * 1000000000 / tsc_freq);
+}
+
+int main(void)
+{
+ struct kvm_vm *vm;
+ struct kvm_run *run;
+ struct ucall uc;
+ vm_vaddr_t tsc_page_gva;
+ int stage;
+
+ vm = vm_create_default(VCPU_ID, 0, guest_main);
+ run = vcpu_state(vm, VCPU_ID);
+
+ vcpu_set_hv_cpuid(vm, VCPU_ID);
+
+ tsc_page_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ memset(addr_gpa2hva(vm, tsc_page_gva), 0x0, getpagesize());
+ TEST_ASSERT((addr_gva2gpa(vm, tsc_page_gva) & (getpagesize() - 1)) == 0,
+ "TSC page has to be page aligned\n");
+ vcpu_args_set(vm, VCPU_ID, 2, tsc_page_gva, addr_gva2gpa(vm, tsc_page_gva));
+
+ host_check_tsc_msr_rdtsc(vm);
+
+ for (stage = 1;; stage++) {
+ _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Stage %d: unexpected exit reason: %u (%s),\n",
+ stage, run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vm, VCPU_ID, &uc)) {
+ case UCALL_ABORT:
+ TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
+ __FILE__, uc.args[1]);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ break;
+ case UCALL_DONE:
+ /* Keep in sync with guest_main() */
+ TEST_ASSERT(stage == 11, "Testing ended prematurely, stage %d\n",
+ stage);
+ goto out;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+ TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+ uc.args[1] == stage,
+ "Stage %d: Unexpected register values vmexit, got %lx",
+ stage, (ulong)uc.args[1]);
+
+ /* Reset kvmclock triggering TSC page update */
+ if (stage == 7 || stage == 8 || stage == 10) {
+ struct kvm_clock_data clock = {0};
+
+ vm_ioctl(vm, KVM_SET_CLOCK, &clock);
+ }
+ }
+
+out:
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
index 88a595b7fbdd..7e2d2d17d2ed 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
@@ -125,30 +125,6 @@ void test_hv_cpuid_e2big(struct kvm_vm *vm, bool system)
" it should have: %d %d", system ? "KVM" : "vCPU", ret, errno);
}
-
-struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(struct kvm_vm *vm, bool system)
-{
- int nent = 20; /* should be enough */
- static struct kvm_cpuid2 *cpuid;
-
- cpuid = malloc(sizeof(*cpuid) + nent * sizeof(struct kvm_cpuid_entry2));
-
- if (!cpuid) {
- perror("malloc");
- abort();
- }
-
- cpuid->nent = nent;
-
- if (!system)
- vcpu_ioctl(vm, VCPU_ID, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
- else
- kvm_ioctl(vm, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
-
- return cpuid;
-}
-
-
int main(int argc, char *argv[])
{
struct kvm_vm *vm;
@@ -167,7 +143,7 @@ int main(int argc, char *argv[])
/* Test vCPU ioctl version */
test_hv_cpuid_e2big(vm, false);
- hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm, false);
+ hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vm, VCPU_ID);
test_hv_cpuid(hv_cpuid_entries, false);
free(hv_cpuid_entries);
@@ -177,7 +153,7 @@ int main(int argc, char *argv[])
goto do_sys;
}
vcpu_enable_evmcs(vm, VCPU_ID);
- hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm, false);
+ hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vm, VCPU_ID);
test_hv_cpuid(hv_cpuid_entries, true);
free(hv_cpuid_entries);
@@ -190,9 +166,8 @@ do_sys:
test_hv_cpuid_e2big(vm, true);
- hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm, true);
+ hv_cpuid_entries = kvm_get_supported_hv_cpuid();
test_hv_cpuid(hv_cpuid_entries, nested_vmx_supported());
- free(hv_cpuid_entries);
out:
kvm_vm_free(vm);
diff --git a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c
new file mode 100644
index 000000000000..12c558fc8074
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test that KVM_SET_BOOT_CPU_ID works as intended
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+#define _GNU_SOURCE /* for program_invocation_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#define N_VCPU 2
+#define VCPU_ID0 0
+#define VCPU_ID1 1
+
+static uint32_t get_bsp_flag(void)
+{
+ return rdmsr(MSR_IA32_APICBASE) & MSR_IA32_APICBASE_BSP;
+}
+
+static void guest_bsp_vcpu(void *arg)
+{
+ GUEST_SYNC(1);
+
+ GUEST_ASSERT(get_bsp_flag() != 0);
+
+ GUEST_DONE();
+}
+
+static void guest_not_bsp_vcpu(void *arg)
+{
+ GUEST_SYNC(1);
+
+ GUEST_ASSERT(get_bsp_flag() == 0);
+
+ GUEST_DONE();
+}
+
+static void test_set_boot_busy(struct kvm_vm *vm)
+{
+ int res;
+
+ res = _vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *) VCPU_ID0);
+ TEST_ASSERT(res == -1 && errno == EBUSY,
+ "KVM_SET_BOOT_CPU_ID set while running vm");
+}
+
+static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid)
+{
+ struct ucall uc;
+ int stage;
+
+ for (stage = 0; stage < 2; stage++) {
+
+ vcpu_run(vm, vcpuid);
+
+ switch (get_ucall(vm, vcpuid, &uc)) {
+ case UCALL_SYNC:
+ TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+ uc.args[1] == stage + 1,
+ "Stage %d: Unexpected register values vmexit, got %lx",
+ stage + 1, (ulong)uc.args[1]);
+ test_set_boot_busy(vm);
+ break;
+ case UCALL_DONE:
+ TEST_ASSERT(stage == 1,
+ "Expected GUEST_DONE in stage 2, got stage %d",
+ stage);
+ break;
+ case UCALL_ABORT:
+ TEST_ASSERT(false, "%s at %s:%ld\n\tvalues: %#lx, %#lx",
+ (const char *)uc.args[0], __FILE__,
+ uc.args[1], uc.args[2], uc.args[3]);
+ default:
+ TEST_ASSERT(false, "Unexpected exit: %s",
+ exit_reason_str(vcpu_state(vm, vcpuid)->exit_reason));
+ }
+ }
+}
+
+static struct kvm_vm *create_vm(void)
+{
+ struct kvm_vm *vm;
+ uint64_t vcpu_pages = (DEFAULT_STACK_PGS) * 2;
+ uint64_t extra_pg_pages = vcpu_pages / PTES_PER_MIN_PAGE * N_VCPU;
+ uint64_t pages = DEFAULT_GUEST_PHY_PAGES + vcpu_pages + extra_pg_pages;
+
+ pages = vm_adjust_num_guest_pages(VM_MODE_DEFAULT, pages);
+ vm = vm_create(VM_MODE_DEFAULT, pages, O_RDWR);
+
+ kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+ vm_create_irqchip(vm);
+
+ return vm;
+}
+
+static void add_x86_vcpu(struct kvm_vm *vm, uint32_t vcpuid, bool bsp_code)
+{
+ if (bsp_code)
+ vm_vcpu_add_default(vm, vcpuid, guest_bsp_vcpu);
+ else
+ vm_vcpu_add_default(vm, vcpuid, guest_not_bsp_vcpu);
+
+ vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid());
+}
+
+static void run_vm_bsp(uint32_t bsp_vcpu)
+{
+ struct kvm_vm *vm;
+ bool is_bsp_vcpu1 = bsp_vcpu == VCPU_ID1;
+
+ vm = create_vm();
+
+ if (is_bsp_vcpu1)
+ vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *) VCPU_ID1);
+
+ add_x86_vcpu(vm, VCPU_ID0, !is_bsp_vcpu1);
+ add_x86_vcpu(vm, VCPU_ID1, is_bsp_vcpu1);
+
+ run_vcpu(vm, VCPU_ID0);
+ run_vcpu(vm, VCPU_ID1);
+
+ kvm_vm_free(vm);
+}
+
+static void check_set_bsp_busy(void)
+{
+ struct kvm_vm *vm;
+ int res;
+
+ vm = create_vm();
+
+ add_x86_vcpu(vm, VCPU_ID0, true);
+ add_x86_vcpu(vm, VCPU_ID1, false);
+
+ res = _vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *) VCPU_ID1);
+ TEST_ASSERT(res == -1 && errno == EBUSY, "KVM_SET_BOOT_CPU_ID set after adding vcpu");
+
+ run_vcpu(vm, VCPU_ID0);
+ run_vcpu(vm, VCPU_ID1);
+
+ res = _vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *) VCPU_ID1);
+ TEST_ASSERT(res == -1 && errno == EBUSY, "KVM_SET_BOOT_CPU_ID set to a terminated vcpu");
+
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ if (!kvm_check_cap(KVM_CAP_SET_BOOT_CPU_ID)) {
+ print_skip("set_boot_cpu_id not available");
+ return 0;
+ }
+
+ run_vm_bsp(VCPU_ID0);
+ run_vm_bsp(VCPU_ID1);
+ run_vm_bsp(VCPU_ID0);
+
+ check_set_bsp_busy();
+}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_pmu_msrs_test.c b/tools/testing/selftests/kvm/x86_64/vmx_pmu_msrs_test.c
new file mode 100644
index 000000000000..23051d84b907
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/vmx_pmu_msrs_test.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * VMX-pmu related msrs test
+ *
+ * Copyright (C) 2021 Intel Corporation
+ *
+ * Test to check the effect of various CPUID settings
+ * on the MSR_IA32_PERF_CAPABILITIES MSR, and check that
+ * whatever we write with KVM_SET_MSR is _not_ modified
+ * in the guest and test it can be retrieved with KVM_GET_MSR.
+ *
+ * Test to check that invalid LBR formats are rejected.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <sys/ioctl.h>
+
+#include "kvm_util.h"
+#include "vmx.h"
+
+#define VCPU_ID 0
+
+#define X86_FEATURE_PDCM (1<<15)
+#define PMU_CAP_FW_WRITES (1ULL << 13)
+#define PMU_CAP_LBR_FMT 0x3f
+
+union cpuid10_eax {
+ struct {
+ unsigned int version_id:8;
+ unsigned int num_counters:8;
+ unsigned int bit_width:8;
+ unsigned int mask_length:8;
+ } split;
+ unsigned int full;
+};
+
+union perf_capabilities {
+ struct {
+ u64 lbr_format:6;
+ u64 pebs_trap:1;
+ u64 pebs_arch_reg:1;
+ u64 pebs_format:4;
+ u64 smm_freeze:1;
+ u64 full_width_write:1;
+ u64 pebs_baseline:1;
+ u64 perf_metrics:1;
+ u64 pebs_output_pt_available:1;
+ u64 anythread_deprecated:1;
+ };
+ u64 capabilities;
+};
+
+static void guest_code(void)
+{
+ wrmsr(MSR_IA32_PERF_CAPABILITIES, PMU_CAP_LBR_FMT);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_cpuid2 *cpuid;
+ struct kvm_cpuid_entry2 *entry_1_0;
+ struct kvm_cpuid_entry2 *entry_a_0;
+ bool pdcm_supported = false;
+ struct kvm_vm *vm;
+ int ret;
+ union cpuid10_eax eax;
+ union perf_capabilities host_cap;
+
+ host_cap.capabilities = kvm_get_feature_msr(MSR_IA32_PERF_CAPABILITIES);
+ host_cap.capabilities &= (PMU_CAP_FW_WRITES | PMU_CAP_LBR_FMT);
+
+ /* Create VM */
+ vm = vm_create_default(VCPU_ID, 0, guest_code);
+ cpuid = kvm_get_supported_cpuid();
+
+ if (kvm_get_cpuid_max_basic() >= 0xa) {
+ entry_1_0 = kvm_get_supported_cpuid_index(1, 0);
+ entry_a_0 = kvm_get_supported_cpuid_index(0xa, 0);
+ pdcm_supported = entry_1_0 && !!(entry_1_0->ecx & X86_FEATURE_PDCM);
+ eax.full = entry_a_0->eax;
+ }
+ if (!pdcm_supported) {
+ print_skip("MSR_IA32_PERF_CAPABILITIES is not supported by the vCPU");
+ exit(KSFT_SKIP);
+ }
+ if (!eax.split.version_id) {
+ print_skip("PMU is not supported by the vCPU");
+ exit(KSFT_SKIP);
+ }
+
+ /* testcase 1, set capabilities when we have PDCM bit */
+ vcpu_set_cpuid(vm, VCPU_ID, cpuid);
+ vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, PMU_CAP_FW_WRITES);
+
+ /* check capabilities can be retrieved with KVM_GET_MSR */
+ ASSERT_EQ(vcpu_get_msr(vm, VCPU_ID, MSR_IA32_PERF_CAPABILITIES), PMU_CAP_FW_WRITES);
+
+ /* check whatever we write with KVM_SET_MSR is _not_ modified */
+ vcpu_run(vm, VCPU_ID);
+ ASSERT_EQ(vcpu_get_msr(vm, VCPU_ID, MSR_IA32_PERF_CAPABILITIES), PMU_CAP_FW_WRITES);
+
+ /* testcase 2, check valid LBR formats are accepted */
+ vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, 0);
+ ASSERT_EQ(vcpu_get_msr(vm, VCPU_ID, MSR_IA32_PERF_CAPABILITIES), 0);
+
+ vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, host_cap.lbr_format);
+ ASSERT_EQ(vcpu_get_msr(vm, VCPU_ID, MSR_IA32_PERF_CAPABILITIES), (u64)host_cap.lbr_format);
+
+ /* testcase 3, check invalid LBR format is rejected */
+ ret = _vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, PMU_CAP_LBR_FMT);
+ TEST_ASSERT(ret == 0, "Bad PERF_CAPABILITIES didn't fail.");
+
+ /* testcase 4, set capabilities when we don't have PDCM bit */
+ entry_1_0->ecx &= ~X86_FEATURE_PDCM;
+ vcpu_set_cpuid(vm, VCPU_ID, cpuid);
+ ret = _vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
+ TEST_ASSERT(ret == 0, "Bad PERF_CAPABILITIES didn't fail.");
+
+ /* testcase 5, set capabilities when we don't have PMU version bits */
+ entry_1_0->ecx |= X86_FEATURE_PDCM;
+ eax.split.version_id = 0;
+ entry_1_0->ecx = eax.full;
+ vcpu_set_cpuid(vm, VCPU_ID, cpuid);
+ ret = _vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, PMU_CAP_FW_WRITES);
+ TEST_ASSERT(ret == 0, "Bad PERF_CAPABILITIES didn't fail.");
+
+ vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, 0);
+ ASSERT_EQ(vcpu_get_msr(vm, VCPU_ID, MSR_IA32_PERF_CAPABILITIES), 0);
+
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
new file mode 100644
index 000000000000..2f964cdc273c
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
@@ -0,0 +1,544 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * xapic_ipi_test
+ *
+ * Copyright (C) 2020, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Test that when the APIC is in xAPIC mode, a vCPU can send an IPI to wake
+ * another vCPU that is halted when KVM's backing page for the APIC access
+ * address has been moved by mm.
+ *
+ * The test starts two vCPUs: one that sends IPIs and one that continually
+ * executes HLT. The sender checks that the halter has woken from the HLT and
+ * has reentered HLT before sending the next IPI. While the vCPUs are running,
+ * the host continually calls migrate_pages to move all of the process' pages
+ * amongst the available numa nodes on the machine.
+ *
+ * Migration is a command line option. When used on non-numa machines will
+ * exit with error. Test is still usefull on non-numa for testing IPIs.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <getopt.h>
+#include <pthread.h>
+#include <inttypes.h>
+#include <string.h>
+#include <time.h>
+
+#include "kvm_util.h"
+#include "numaif.h"
+#include "processor.h"
+#include "test_util.h"
+#include "vmx.h"
+
+/* Default running time for the test */
+#define DEFAULT_RUN_SECS 3
+
+/* Default delay between migrate_pages calls (microseconds) */
+#define DEFAULT_DELAY_USECS 500000
+
+#define HALTER_VCPU_ID 0
+#define SENDER_VCPU_ID 1
+
+volatile uint32_t *apic_base = (volatile uint32_t *)APIC_DEFAULT_GPA;
+
+/*
+ * Vector for IPI from sender vCPU to halting vCPU.
+ * Value is arbitrary and was chosen for the alternating bit pattern. Any
+ * value should work.
+ */
+#define IPI_VECTOR 0xa5
+
+/*
+ * Incremented in the IPI handler. Provides evidence to the sender that the IPI
+ * arrived at the destination
+ */
+static volatile uint64_t ipis_rcvd;
+
+/* Data struct shared between host main thread and vCPUs */
+struct test_data_page {
+ uint32_t halter_apic_id;
+ volatile uint64_t hlt_count;
+ volatile uint64_t wake_count;
+ uint64_t ipis_sent;
+ uint64_t migrations_attempted;
+ uint64_t migrations_completed;
+ uint32_t icr;
+ uint32_t icr2;
+ uint32_t halter_tpr;
+ uint32_t halter_ppr;
+
+ /*
+ * Record local version register as a cross-check that APIC access
+ * worked. Value should match what KVM reports (APIC_VERSION in
+ * arch/x86/kvm/lapic.c). If test is failing, check that values match
+ * to determine whether APIC access exits are working.
+ */
+ uint32_t halter_lvr;
+};
+
+struct thread_params {
+ struct test_data_page *data;
+ struct kvm_vm *vm;
+ uint32_t vcpu_id;
+ uint64_t *pipis_rcvd; /* host address of ipis_rcvd global */
+};
+
+uint32_t read_apic_reg(uint reg)
+{
+ return apic_base[reg >> 2];
+}
+
+void write_apic_reg(uint reg, uint32_t val)
+{
+ apic_base[reg >> 2] = val;
+}
+
+void disable_apic(void)
+{
+ wrmsr(MSR_IA32_APICBASE,
+ rdmsr(MSR_IA32_APICBASE) &
+ ~(MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD));
+}
+
+void enable_xapic(void)
+{
+ uint64_t val = rdmsr(MSR_IA32_APICBASE);
+
+ /* Per SDM: to enable xAPIC when in x2APIC must first disable APIC */
+ if (val & MSR_IA32_APICBASE_EXTD) {
+ disable_apic();
+ wrmsr(MSR_IA32_APICBASE,
+ rdmsr(MSR_IA32_APICBASE) | MSR_IA32_APICBASE_ENABLE);
+ } else if (!(val & MSR_IA32_APICBASE_ENABLE)) {
+ wrmsr(MSR_IA32_APICBASE, val | MSR_IA32_APICBASE_ENABLE);
+ }
+
+ /*
+ * Per SDM: reset value of spurious interrupt vector register has the
+ * APIC software enabled bit=0. It must be enabled in addition to the
+ * enable bit in the MSR.
+ */
+ val = read_apic_reg(APIC_SPIV) | APIC_SPIV_APIC_ENABLED;
+ write_apic_reg(APIC_SPIV, val);
+}
+
+void verify_apic_base_addr(void)
+{
+ uint64_t msr = rdmsr(MSR_IA32_APICBASE);
+ uint64_t base = GET_APIC_BASE(msr);
+
+ GUEST_ASSERT(base == APIC_DEFAULT_GPA);
+}
+
+static void halter_guest_code(struct test_data_page *data)
+{
+ verify_apic_base_addr();
+ enable_xapic();
+
+ data->halter_apic_id = GET_APIC_ID_FIELD(read_apic_reg(APIC_ID));
+ data->halter_lvr = read_apic_reg(APIC_LVR);
+
+ /*
+ * Loop forever HLTing and recording halts & wakes. Disable interrupts
+ * each time around to minimize window between signaling the pending
+ * halt to the sender vCPU and executing the halt. No need to disable on
+ * first run as this vCPU executes first and the host waits for it to
+ * signal going into first halt before starting the sender vCPU. Record
+ * TPR and PPR for diagnostic purposes in case the test fails.
+ */
+ for (;;) {
+ data->halter_tpr = read_apic_reg(APIC_TASKPRI);
+ data->halter_ppr = read_apic_reg(APIC_PROCPRI);
+ data->hlt_count++;
+ asm volatile("sti; hlt; cli");
+ data->wake_count++;
+ }
+}
+
+/*
+ * Runs on halter vCPU when IPI arrives. Write an arbitrary non-zero value to
+ * enable diagnosing errant writes to the APIC access address backing page in
+ * case of test failure.
+ */
+static void guest_ipi_handler(struct ex_regs *regs)
+{
+ ipis_rcvd++;
+ write_apic_reg(APIC_EOI, 77);
+}
+
+static void sender_guest_code(struct test_data_page *data)
+{
+ uint64_t last_wake_count;
+ uint64_t last_hlt_count;
+ uint64_t last_ipis_rcvd_count;
+ uint32_t icr_val;
+ uint32_t icr2_val;
+ uint64_t tsc_start;
+
+ verify_apic_base_addr();
+ enable_xapic();
+
+ /*
+ * Init interrupt command register for sending IPIs
+ *
+ * Delivery mode=fixed, per SDM:
+ * "Delivers the interrupt specified in the vector field to the target
+ * processor."
+ *
+ * Destination mode=physical i.e. specify target by its local APIC
+ * ID. This vCPU assumes that the halter vCPU has already started and
+ * set data->halter_apic_id.
+ */
+ icr_val = (APIC_DEST_PHYSICAL | APIC_DM_FIXED | IPI_VECTOR);
+ icr2_val = SET_APIC_DEST_FIELD(data->halter_apic_id);
+ data->icr = icr_val;
+ data->icr2 = icr2_val;
+
+ last_wake_count = data->wake_count;
+ last_hlt_count = data->hlt_count;
+ last_ipis_rcvd_count = ipis_rcvd;
+ for (;;) {
+ /*
+ * Send IPI to halter vCPU.
+ * First IPI can be sent unconditionally because halter vCPU
+ * starts earlier.
+ */
+ write_apic_reg(APIC_ICR2, icr2_val);
+ write_apic_reg(APIC_ICR, icr_val);
+ data->ipis_sent++;
+
+ /*
+ * Wait up to ~1 sec for halter to indicate that it has:
+ * 1. Received the IPI
+ * 2. Woken up from the halt
+ * 3. Gone back into halt
+ * Current CPUs typically run at 2.x Ghz which is ~2
+ * billion ticks per second.
+ */
+ tsc_start = rdtsc();
+ while (rdtsc() - tsc_start < 2000000000) {
+ if ((ipis_rcvd != last_ipis_rcvd_count) &&
+ (data->wake_count != last_wake_count) &&
+ (data->hlt_count != last_hlt_count))
+ break;
+ }
+
+ GUEST_ASSERT((ipis_rcvd != last_ipis_rcvd_count) &&
+ (data->wake_count != last_wake_count) &&
+ (data->hlt_count != last_hlt_count));
+
+ last_wake_count = data->wake_count;
+ last_hlt_count = data->hlt_count;
+ last_ipis_rcvd_count = ipis_rcvd;
+ }
+}
+
+static void *vcpu_thread(void *arg)
+{
+ struct thread_params *params = (struct thread_params *)arg;
+ struct ucall uc;
+ int old;
+ int r;
+ unsigned int exit_reason;
+
+ r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
+ TEST_ASSERT(r == 0,
+ "pthread_setcanceltype failed on vcpu_id=%u with errno=%d",
+ params->vcpu_id, r);
+
+ fprintf(stderr, "vCPU thread running vCPU %u\n", params->vcpu_id);
+ vcpu_run(params->vm, params->vcpu_id);
+ exit_reason = vcpu_state(params->vm, params->vcpu_id)->exit_reason;
+
+ TEST_ASSERT(exit_reason == KVM_EXIT_IO,
+ "vCPU %u exited with unexpected exit reason %u-%s, expected KVM_EXIT_IO",
+ params->vcpu_id, exit_reason, exit_reason_str(exit_reason));
+
+ if (get_ucall(params->vm, params->vcpu_id, &uc) == UCALL_ABORT) {
+ TEST_ASSERT(false,
+ "vCPU %u exited with error: %s.\n"
+ "Sending vCPU sent %lu IPIs to halting vCPU\n"
+ "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n"
+ "Halter TPR=%#x PPR=%#x LVR=%#x\n"
+ "Migrations attempted: %lu\n"
+ "Migrations completed: %lu\n",
+ params->vcpu_id, (const char *)uc.args[0],
+ params->data->ipis_sent, params->data->hlt_count,
+ params->data->wake_count,
+ *params->pipis_rcvd, params->data->halter_tpr,
+ params->data->halter_ppr, params->data->halter_lvr,
+ params->data->migrations_attempted,
+ params->data->migrations_completed);
+ }
+
+ return NULL;
+}
+
+static void cancel_join_vcpu_thread(pthread_t thread, uint32_t vcpu_id)
+{
+ void *retval;
+ int r;
+
+ r = pthread_cancel(thread);
+ TEST_ASSERT(r == 0,
+ "pthread_cancel on vcpu_id=%d failed with errno=%d",
+ vcpu_id, r);
+
+ r = pthread_join(thread, &retval);
+ TEST_ASSERT(r == 0,
+ "pthread_join on vcpu_id=%d failed with errno=%d",
+ vcpu_id, r);
+ TEST_ASSERT(retval == PTHREAD_CANCELED,
+ "expected retval=%p, got %p", PTHREAD_CANCELED,
+ retval);
+}
+
+void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs,
+ uint64_t *pipis_rcvd)
+{
+ long pages_not_moved;
+ unsigned long nodemask = 0;
+ unsigned long nodemasks[sizeof(nodemask) * 8];
+ int nodes = 0;
+ time_t start_time, last_update, now;
+ time_t interval_secs = 1;
+ int i, r;
+ int from, to;
+ unsigned long bit;
+ uint64_t hlt_count;
+ uint64_t wake_count;
+ uint64_t ipis_sent;
+
+ fprintf(stderr, "Calling migrate_pages every %d microseconds\n",
+ delay_usecs);
+
+ /* Get set of first 64 numa nodes available */
+ r = get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8,
+ 0, MPOL_F_MEMS_ALLOWED);
+ TEST_ASSERT(r == 0, "get_mempolicy failed errno=%d", errno);
+
+ fprintf(stderr, "Numa nodes found amongst first %lu possible nodes "
+ "(each 1-bit indicates node is present): %#lx\n",
+ sizeof(nodemask) * 8, nodemask);
+
+ /* Init array of masks containing a single-bit in each, one for each
+ * available node. migrate_pages called below requires specifying nodes
+ * as bit masks.
+ */
+ for (i = 0, bit = 1; i < sizeof(nodemask) * 8; i++, bit <<= 1) {
+ if (nodemask & bit) {
+ nodemasks[nodes] = nodemask & bit;
+ nodes++;
+ }
+ }
+
+ TEST_ASSERT(nodes > 1,
+ "Did not find at least 2 numa nodes. Can't do migration\n");
+
+ fprintf(stderr, "Migrating amongst %d nodes found\n", nodes);
+
+ from = 0;
+ to = 1;
+ start_time = time(NULL);
+ last_update = start_time;
+
+ ipis_sent = data->ipis_sent;
+ hlt_count = data->hlt_count;
+ wake_count = data->wake_count;
+
+ while ((int)(time(NULL) - start_time) < run_secs) {
+ data->migrations_attempted++;
+
+ /*
+ * migrate_pages with PID=0 will migrate all pages of this
+ * process between the nodes specified as bitmasks. The page
+ * backing the APIC access address belongs to this process
+ * because it is allocated by KVM in the context of the
+ * KVM_CREATE_VCPU ioctl. If that assumption ever changes this
+ * test may break or give a false positive signal.
+ */
+ pages_not_moved = migrate_pages(0, sizeof(nodemasks[from]),
+ &nodemasks[from],
+ &nodemasks[to]);
+ if (pages_not_moved < 0)
+ fprintf(stderr,
+ "migrate_pages failed, errno=%d\n", errno);
+ else if (pages_not_moved > 0)
+ fprintf(stderr,
+ "migrate_pages could not move %ld pages\n",
+ pages_not_moved);
+ else
+ data->migrations_completed++;
+
+ from = to;
+ to++;
+ if (to == nodes)
+ to = 0;
+
+ now = time(NULL);
+ if (((now - start_time) % interval_secs == 0) &&
+ (now != last_update)) {
+ last_update = now;
+ fprintf(stderr,
+ "%lu seconds: Migrations attempted=%lu completed=%lu, "
+ "IPIs sent=%lu received=%lu, HLTs=%lu wakes=%lu\n",
+ now - start_time, data->migrations_attempted,
+ data->migrations_completed,
+ data->ipis_sent, *pipis_rcvd,
+ data->hlt_count, data->wake_count);
+
+ TEST_ASSERT(ipis_sent != data->ipis_sent &&
+ hlt_count != data->hlt_count &&
+ wake_count != data->wake_count,
+ "IPI, HLT and wake count have not increased "
+ "in the last %lu seconds. "
+ "HLTer is likely hung.\n", interval_secs);
+
+ ipis_sent = data->ipis_sent;
+ hlt_count = data->hlt_count;
+ wake_count = data->wake_count;
+ }
+ usleep(delay_usecs);
+ }
+}
+
+void get_cmdline_args(int argc, char *argv[], int *run_secs,
+ bool *migrate, int *delay_usecs)
+{
+ for (;;) {
+ int opt = getopt(argc, argv, "s:d:m");
+
+ if (opt == -1)
+ break;
+ switch (opt) {
+ case 's':
+ *run_secs = parse_size(optarg);
+ break;
+ case 'm':
+ *migrate = true;
+ break;
+ case 'd':
+ *delay_usecs = parse_size(optarg);
+ break;
+ default:
+ TEST_ASSERT(false,
+ "Usage: -s <runtime seconds>. Default is %d seconds.\n"
+ "-m adds calls to migrate_pages while vCPUs are running."
+ " Default is no migrations.\n"
+ "-d <delay microseconds> - delay between migrate_pages() calls."
+ " Default is %d microseconds.\n",
+ DEFAULT_RUN_SECS, DEFAULT_DELAY_USECS);
+ }
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ int r;
+ int wait_secs;
+ const int max_halter_wait = 10;
+ int run_secs = 0;
+ int delay_usecs = 0;
+ struct test_data_page *data;
+ vm_vaddr_t test_data_page_vaddr;
+ bool migrate = false;
+ pthread_t threads[2];
+ struct thread_params params[2];
+ struct kvm_vm *vm;
+ uint64_t *pipis_rcvd;
+
+ get_cmdline_args(argc, argv, &run_secs, &migrate, &delay_usecs);
+ if (run_secs <= 0)
+ run_secs = DEFAULT_RUN_SECS;
+ if (delay_usecs <= 0)
+ delay_usecs = DEFAULT_DELAY_USECS;
+
+ vm = vm_create_default(HALTER_VCPU_ID, 0, halter_guest_code);
+ params[0].vm = vm;
+ params[1].vm = vm;
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vm, HALTER_VCPU_ID);
+ vm_handle_exception(vm, IPI_VECTOR, guest_ipi_handler);
+
+ virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA, 0);
+
+ vm_vcpu_add_default(vm, SENDER_VCPU_ID, sender_guest_code);
+
+ test_data_page_vaddr = vm_vaddr_alloc(vm, 0x1000, 0x1000, 0, 0);
+ data =
+ (struct test_data_page *)addr_gva2hva(vm, test_data_page_vaddr);
+ memset(data, 0, sizeof(*data));
+ params[0].data = data;
+ params[1].data = data;
+
+ vcpu_args_set(vm, HALTER_VCPU_ID, 1, test_data_page_vaddr);
+ vcpu_args_set(vm, SENDER_VCPU_ID, 1, test_data_page_vaddr);
+
+ pipis_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ipis_rcvd);
+ params[0].pipis_rcvd = pipis_rcvd;
+ params[1].pipis_rcvd = pipis_rcvd;
+
+ /* Start halter vCPU thread and wait for it to execute first HLT. */
+ params[0].vcpu_id = HALTER_VCPU_ID;
+ r = pthread_create(&threads[0], NULL, vcpu_thread, &params[0]);
+ TEST_ASSERT(r == 0,
+ "pthread_create halter failed errno=%d", errno);
+ fprintf(stderr, "Halter vCPU thread started\n");
+
+ wait_secs = 0;
+ while ((wait_secs < max_halter_wait) && !data->hlt_count) {
+ sleep(1);
+ wait_secs++;
+ }
+
+ TEST_ASSERT(data->hlt_count,
+ "Halter vCPU did not execute first HLT within %d seconds",
+ max_halter_wait);
+
+ fprintf(stderr,
+ "Halter vCPU thread reported its APIC ID: %u after %d seconds.\n",
+ data->halter_apic_id, wait_secs);
+
+ params[1].vcpu_id = SENDER_VCPU_ID;
+ r = pthread_create(&threads[1], NULL, vcpu_thread, &params[1]);
+ TEST_ASSERT(r == 0, "pthread_create sender failed errno=%d", errno);
+
+ fprintf(stderr,
+ "IPI sender vCPU thread started. Letting vCPUs run for %d seconds.\n",
+ run_secs);
+
+ if (!migrate)
+ sleep(run_secs);
+ else
+ do_migrations(data, run_secs, delay_usecs, pipis_rcvd);
+
+ /*
+ * Cancel threads and wait for them to stop.
+ */
+ cancel_join_vcpu_thread(threads[0], HALTER_VCPU_ID);
+ cancel_join_vcpu_thread(threads[1], SENDER_VCPU_ID);
+
+ fprintf(stderr,
+ "Test successful after running for %d seconds.\n"
+ "Sending vCPU sent %lu IPIs to halting vCPU\n"
+ "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n"
+ "Halter APIC ID=%#x\n"
+ "Sender ICR value=%#x ICR2 value=%#x\n"
+ "Halter TPR=%#x PPR=%#x LVR=%#x\n"
+ "Migrations attempted: %lu\n"
+ "Migrations completed: %lu\n",
+ run_secs, data->ipis_sent,
+ data->hlt_count, data->wake_count, *pipis_rcvd,
+ data->halter_apic_id,
+ data->icr, data->icr2,
+ data->halter_tpr, data->halter_ppr, data->halter_lvr,
+ data->migrations_attempted, data->migrations_completed);
+
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
new file mode 100644
index 000000000000..804ff5ff022d
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
@@ -0,0 +1,320 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * svm_vmcall_test
+ *
+ * Copyright © 2021 Amazon.com, Inc. or its affiliates.
+ *
+ * Xen shared_info / pvclock testing
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#include <stdint.h>
+#include <time.h>
+#include <sched.h>
+#include <sys/syscall.h>
+
+#define VCPU_ID 5
+
+#define SHINFO_REGION_GVA 0xc0000000ULL
+#define SHINFO_REGION_GPA 0xc0000000ULL
+#define SHINFO_REGION_SLOT 10
+#define PAGE_SIZE 4096
+
+#define PVTIME_ADDR (SHINFO_REGION_GPA + PAGE_SIZE)
+#define RUNSTATE_ADDR (SHINFO_REGION_GPA + PAGE_SIZE + 0x20)
+
+#define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + 0x20)
+
+static struct kvm_vm *vm;
+
+#define XEN_HYPERCALL_MSR 0x40000000
+
+#define MIN_STEAL_TIME 50000
+
+struct pvclock_vcpu_time_info {
+ u32 version;
+ u32 pad0;
+ u64 tsc_timestamp;
+ u64 system_time;
+ u32 tsc_to_system_mul;
+ s8 tsc_shift;
+ u8 flags;
+ u8 pad[2];
+} __attribute__((__packed__)); /* 32 bytes */
+
+struct pvclock_wall_clock {
+ u32 version;
+ u32 sec;
+ u32 nsec;
+} __attribute__((__packed__));
+
+struct vcpu_runstate_info {
+ uint32_t state;
+ uint64_t state_entry_time;
+ uint64_t time[4];
+};
+
+#define RUNSTATE_running 0
+#define RUNSTATE_runnable 1
+#define RUNSTATE_blocked 2
+#define RUNSTATE_offline 3
+
+static void guest_code(void)
+{
+ struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR;
+
+ /* Test having the host set runstates manually */
+ GUEST_SYNC(RUNSTATE_runnable);
+ GUEST_ASSERT(rs->time[RUNSTATE_runnable] != 0);
+ GUEST_ASSERT(rs->state == 0);
+
+ GUEST_SYNC(RUNSTATE_blocked);
+ GUEST_ASSERT(rs->time[RUNSTATE_blocked] != 0);
+ GUEST_ASSERT(rs->state == 0);
+
+ GUEST_SYNC(RUNSTATE_offline);
+ GUEST_ASSERT(rs->time[RUNSTATE_offline] != 0);
+ GUEST_ASSERT(rs->state == 0);
+
+ /* Test runstate time adjust */
+ GUEST_SYNC(4);
+ GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x5a);
+ GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x6b6b);
+
+ /* Test runstate time set */
+ GUEST_SYNC(5);
+ GUEST_ASSERT(rs->state_entry_time >= 0x8000);
+ GUEST_ASSERT(rs->time[RUNSTATE_runnable] == 0);
+ GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x6b6b);
+ GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x5a);
+
+ /* sched_yield() should result in some 'runnable' time */
+ GUEST_SYNC(6);
+ GUEST_ASSERT(rs->time[RUNSTATE_runnable] >= MIN_STEAL_TIME);
+
+ GUEST_DONE();
+}
+
+static long get_run_delay(void)
+{
+ char path[64];
+ long val[2];
+ FILE *fp;
+
+ sprintf(path, "/proc/%ld/schedstat", syscall(SYS_gettid));
+ fp = fopen(path, "r");
+ fscanf(fp, "%ld %ld ", &val[0], &val[1]);
+ fclose(fp);
+
+ return val[1];
+}
+
+static int cmp_timespec(struct timespec *a, struct timespec *b)
+{
+ if (a->tv_sec > b->tv_sec)
+ return 1;
+ else if (a->tv_sec < b->tv_sec)
+ return -1;
+ else if (a->tv_nsec > b->tv_nsec)
+ return 1;
+ else if (a->tv_nsec < b->tv_nsec)
+ return -1;
+ else
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ struct timespec min_ts, max_ts, vm_ts;
+
+ int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
+ if (!(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO) ) {
+ print_skip("KVM_XEN_HVM_CONFIG_SHARED_INFO not available");
+ exit(KSFT_SKIP);
+ }
+
+ bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE);
+
+ clock_gettime(CLOCK_REALTIME, &min_ts);
+
+ vm = vm_create_default(VCPU_ID, 0, (void *) guest_code);
+ vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+
+ /* Map a region for the shared_info page */
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 2, 0);
+ virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 2, 0);
+
+ struct kvm_xen_hvm_config hvmc = {
+ .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
+ .msr = XEN_HYPERCALL_MSR,
+ };
+ vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc);
+
+ struct kvm_xen_hvm_attr lm = {
+ .type = KVM_XEN_ATTR_TYPE_LONG_MODE,
+ .u.long_mode = 1,
+ };
+ vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm);
+
+ struct kvm_xen_hvm_attr ha = {
+ .type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
+ .u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE,
+ };
+ vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ha);
+
+ struct kvm_xen_vcpu_attr vi = {
+ .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO,
+ .u.gpa = SHINFO_REGION_GPA + 0x40,
+ };
+ vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &vi);
+
+ struct kvm_xen_vcpu_attr pvclock = {
+ .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
+ .u.gpa = PVTIME_ADDR,
+ };
+ vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &pvclock);
+
+ if (do_runstate_tests) {
+ struct kvm_xen_vcpu_attr st = {
+ .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
+ .u.gpa = RUNSTATE_ADDR,
+ };
+ vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &st);
+ }
+
+ struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR);;
+ rs->state = 0x5a;
+
+ for (;;) {
+ volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ struct ucall uc;
+
+ vcpu_run(vm, VCPU_ID);
+
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vm, VCPU_ID, &uc)) {
+ case UCALL_ABORT:
+ TEST_FAIL("%s", (const char *)uc.args[0]);
+ /* NOT REACHED */
+ case UCALL_SYNC: {
+ struct kvm_xen_vcpu_attr rst;
+ long rundelay;
+
+ /* If no runstate support, bail out early */
+ if (!do_runstate_tests)
+ goto done;
+
+ TEST_ASSERT(rs->state_entry_time == rs->time[0] +
+ rs->time[1] + rs->time[2] + rs->time[3],
+ "runstate times don't add up");
+
+ switch (uc.args[1]) {
+ case RUNSTATE_running...RUNSTATE_offline:
+ rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT;
+ rst.u.runstate.state = uc.args[1];
+ vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst);
+ break;
+ case 4:
+ rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST;
+ memset(&rst.u, 0, sizeof(rst.u));
+ rst.u.runstate.state = (uint64_t)-1;
+ rst.u.runstate.time_blocked =
+ 0x5a - rs->time[RUNSTATE_blocked];
+ rst.u.runstate.time_offline =
+ 0x6b6b - rs->time[RUNSTATE_offline];
+ rst.u.runstate.time_runnable = -rst.u.runstate.time_blocked -
+ rst.u.runstate.time_offline;
+ vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst);
+ break;
+
+ case 5:
+ rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA;
+ memset(&rst.u, 0, sizeof(rst.u));
+ rst.u.runstate.state = RUNSTATE_running;
+ rst.u.runstate.state_entry_time = 0x6b6b + 0x5a;
+ rst.u.runstate.time_blocked = 0x6b6b;
+ rst.u.runstate.time_offline = 0x5a;
+ vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst);
+ break;
+ case 6:
+ /* Yield until scheduler delay exceeds target */
+ rundelay = get_run_delay() + MIN_STEAL_TIME;
+ do {
+ sched_yield();
+ } while (get_run_delay() < rundelay);
+ break;
+ }
+ break;
+ }
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+ }
+ }
+
+ done:
+ clock_gettime(CLOCK_REALTIME, &max_ts);
+
+ /*
+ * Just a *really* basic check that things are being put in the
+ * right place. The actual calculations are much the same for
+ * Xen as they are for the KVM variants, so no need to check.
+ */
+ struct pvclock_wall_clock *wc;
+ struct pvclock_vcpu_time_info *ti, *ti2;
+
+ wc = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0xc00);
+ ti = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0x40 + 0x20);
+ ti2 = addr_gpa2hva(vm, PVTIME_ADDR);
+
+ vm_ts.tv_sec = wc->sec;
+ vm_ts.tv_nsec = wc->nsec;
+ TEST_ASSERT(wc->version && !(wc->version & 1),
+ "Bad wallclock version %x", wc->version);
+ TEST_ASSERT(cmp_timespec(&min_ts, &vm_ts) <= 0, "VM time too old");
+ TEST_ASSERT(cmp_timespec(&max_ts, &vm_ts) >= 0, "VM time too new");
+
+ TEST_ASSERT(ti->version && !(ti->version & 1),
+ "Bad time_info version %x", ti->version);
+ TEST_ASSERT(ti2->version && !(ti2->version & 1),
+ "Bad time_info version %x", ti->version);
+
+ if (do_runstate_tests) {
+ /*
+ * Fetch runstate and check sanity. Strictly speaking in the
+ * general case we might not expect the numbers to be identical
+ * but in this case we know we aren't running the vCPU any more.
+ */
+ struct kvm_xen_vcpu_attr rst = {
+ .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA,
+ };
+ vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_GET_ATTR, &rst);
+
+ TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch");
+ TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time,
+ "State entry time mismatch");
+ TEST_ASSERT(rs->time[RUNSTATE_running] == rst.u.runstate.time_running,
+ "Running time mismatch");
+ TEST_ASSERT(rs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable,
+ "Runnable time mismatch");
+ TEST_ASSERT(rs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked,
+ "Blocked time mismatch");
+ TEST_ASSERT(rs->time[RUNSTATE_offline] == rst.u.runstate.time_offline,
+ "Offline time mismatch");
+
+ TEST_ASSERT(rs->state_entry_time == rs->time[0] +
+ rs->time[1] + rs->time[2] + rs->time[3],
+ "runstate times don't add up");
+ }
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
new file mode 100644
index 000000000000..8389e0bfd711
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * xen_vmcall_test
+ *
+ * Copyright © 2020 Amazon.com, Inc. or its affiliates.
+ *
+ * Userspace hypercall testing
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#define VCPU_ID 5
+
+#define HCALL_REGION_GPA 0xc0000000ULL
+#define HCALL_REGION_SLOT 10
+#define PAGE_SIZE 4096
+
+static struct kvm_vm *vm;
+
+#define INPUTVALUE 17
+#define ARGVALUE(x) (0xdeadbeef5a5a0000UL + x)
+#define RETVALUE 0xcafef00dfbfbffffUL
+
+#define XEN_HYPERCALL_MSR 0x40000200
+#define HV_GUEST_OS_ID_MSR 0x40000000
+#define HV_HYPERCALL_MSR 0x40000001
+
+#define HVCALL_SIGNAL_EVENT 0x005d
+#define HV_STATUS_INVALID_ALIGNMENT 4
+
+static void guest_code(void)
+{
+ unsigned long rax = INPUTVALUE;
+ unsigned long rdi = ARGVALUE(1);
+ unsigned long rsi = ARGVALUE(2);
+ unsigned long rdx = ARGVALUE(3);
+ unsigned long rcx;
+ register unsigned long r10 __asm__("r10") = ARGVALUE(4);
+ register unsigned long r8 __asm__("r8") = ARGVALUE(5);
+ register unsigned long r9 __asm__("r9") = ARGVALUE(6);
+
+ /* First a direct invocation of 'vmcall' */
+ __asm__ __volatile__("vmcall" :
+ "=a"(rax) :
+ "a"(rax), "D"(rdi), "S"(rsi), "d"(rdx),
+ "r"(r10), "r"(r8), "r"(r9));
+ GUEST_ASSERT(rax == RETVALUE);
+
+ /* Fill in the Xen hypercall page */
+ __asm__ __volatile__("wrmsr" : : "c" (XEN_HYPERCALL_MSR),
+ "a" (HCALL_REGION_GPA & 0xffffffff),
+ "d" (HCALL_REGION_GPA >> 32));
+
+ /* Set Hyper-V Guest OS ID */
+ __asm__ __volatile__("wrmsr" : : "c" (HV_GUEST_OS_ID_MSR),
+ "a" (0x5a), "d" (0));
+
+ /* Hyper-V hypercall page */
+ u64 msrval = HCALL_REGION_GPA + PAGE_SIZE + 1;
+ __asm__ __volatile__("wrmsr" : : "c" (HV_HYPERCALL_MSR),
+ "a" (msrval & 0xffffffff),
+ "d" (msrval >> 32));
+
+ /* Invoke a Xen hypercall */
+ __asm__ __volatile__("call *%1" : "=a"(rax) :
+ "r"(HCALL_REGION_GPA + INPUTVALUE * 32),
+ "a"(rax), "D"(rdi), "S"(rsi), "d"(rdx),
+ "r"(r10), "r"(r8), "r"(r9));
+ GUEST_ASSERT(rax == RETVALUE);
+
+ /* Invoke a Hyper-V hypercall */
+ rax = 0;
+ rcx = HVCALL_SIGNAL_EVENT; /* code */
+ rdx = 0x5a5a5a5a; /* ingpa (badly aligned) */
+ __asm__ __volatile__("call *%1" : "=a"(rax) :
+ "r"(HCALL_REGION_GPA + PAGE_SIZE),
+ "a"(rax), "c"(rcx), "d"(rdx),
+ "r"(r8));
+ GUEST_ASSERT(rax == HV_STATUS_INVALID_ALIGNMENT);
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ if (!(kvm_check_cap(KVM_CAP_XEN_HVM) &
+ KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL) ) {
+ print_skip("KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL not available");
+ exit(KSFT_SKIP);
+ }
+
+ vm = vm_create_default(VCPU_ID, 0, (void *) guest_code);
+ vcpu_set_hv_cpuid(vm, VCPU_ID);
+
+ struct kvm_xen_hvm_config hvmc = {
+ .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
+ .msr = XEN_HYPERCALL_MSR,
+ };
+ vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc);
+
+ /* Map a region for the hypercall pages */
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ HCALL_REGION_GPA, HCALL_REGION_SLOT, 2, 0);
+ virt_map(vm, HCALL_REGION_GPA, HCALL_REGION_GPA, 2, 0);
+
+ for (;;) {
+ volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ struct ucall uc;
+
+ vcpu_run(vm, VCPU_ID);
+
+ if (run->exit_reason == KVM_EXIT_XEN) {
+ ASSERT_EQ(run->xen.type, KVM_EXIT_XEN_HCALL);
+ ASSERT_EQ(run->xen.u.hcall.cpl, 0);
+ ASSERT_EQ(run->xen.u.hcall.longmode, 1);
+ ASSERT_EQ(run->xen.u.hcall.input, INPUTVALUE);
+ ASSERT_EQ(run->xen.u.hcall.params[0], ARGVALUE(1));
+ ASSERT_EQ(run->xen.u.hcall.params[1], ARGVALUE(2));
+ ASSERT_EQ(run->xen.u.hcall.params[2], ARGVALUE(3));
+ ASSERT_EQ(run->xen.u.hcall.params[3], ARGVALUE(4));
+ ASSERT_EQ(run->xen.u.hcall.params[4], ARGVALUE(5));
+ ASSERT_EQ(run->xen.u.hcall.params[5], ARGVALUE(6));
+ run->xen.u.hcall.result = RETVALUE;
+ continue;
+ }
+
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vm, VCPU_ID, &uc)) {
+ case UCALL_ABORT:
+ TEST_FAIL("%s", (const char *)uc.args[0]);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+ }
+ }
+done:
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/mount_setattr/.gitignore b/tools/testing/selftests/mount_setattr/.gitignore
new file mode 100644
index 000000000000..5f74d8488472
--- /dev/null
+++ b/tools/testing/selftests/mount_setattr/.gitignore
@@ -0,0 +1 @@
+mount_setattr_test
diff --git a/tools/testing/selftests/mount_setattr/Makefile b/tools/testing/selftests/mount_setattr/Makefile
new file mode 100644
index 000000000000..2250f7dcb81e
--- /dev/null
+++ b/tools/testing/selftests/mount_setattr/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for mount selftests.
+CFLAGS = -g -I../../../../usr/include/ -Wall -O2 -pthread
+
+TEST_GEN_FILES += mount_setattr_test
+
+include ../lib.mk
diff --git a/tools/testing/selftests/mount_setattr/config b/tools/testing/selftests/mount_setattr/config
new file mode 100644
index 000000000000..416bd53ce982
--- /dev/null
+++ b/tools/testing/selftests/mount_setattr/config
@@ -0,0 +1 @@
+CONFIG_USER_NS=y
diff --git a/tools/testing/selftests/mount_setattr/mount_setattr_test.c b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
new file mode 100644
index 000000000000..4e94e566e040
--- /dev/null
+++ b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
@@ -0,0 +1,1424 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <sched.h>
+#include <stdio.h>
+#include <errno.h>
+#include <pthread.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/mount.h>
+#include <sys/wait.h>
+#include <sys/vfs.h>
+#include <sys/statvfs.h>
+#include <sys/sysinfo.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <grp.h>
+#include <stdbool.h>
+#include <stdarg.h>
+
+#include "../kselftest_harness.h"
+
+#ifndef CLONE_NEWNS
+#define CLONE_NEWNS 0x00020000
+#endif
+
+#ifndef CLONE_NEWUSER
+#define CLONE_NEWUSER 0x10000000
+#endif
+
+#ifndef MS_REC
+#define MS_REC 16384
+#endif
+
+#ifndef MS_RELATIME
+#define MS_RELATIME (1 << 21)
+#endif
+
+#ifndef MS_STRICTATIME
+#define MS_STRICTATIME (1 << 24)
+#endif
+
+#ifndef MOUNT_ATTR_RDONLY
+#define MOUNT_ATTR_RDONLY 0x00000001
+#endif
+
+#ifndef MOUNT_ATTR_NOSUID
+#define MOUNT_ATTR_NOSUID 0x00000002
+#endif
+
+#ifndef MOUNT_ATTR_NOEXEC
+#define MOUNT_ATTR_NOEXEC 0x00000008
+#endif
+
+#ifndef MOUNT_ATTR_NODIRATIME
+#define MOUNT_ATTR_NODIRATIME 0x00000080
+#endif
+
+#ifndef MOUNT_ATTR__ATIME
+#define MOUNT_ATTR__ATIME 0x00000070
+#endif
+
+#ifndef MOUNT_ATTR_RELATIME
+#define MOUNT_ATTR_RELATIME 0x00000000
+#endif
+
+#ifndef MOUNT_ATTR_NOATIME
+#define MOUNT_ATTR_NOATIME 0x00000010
+#endif
+
+#ifndef MOUNT_ATTR_STRICTATIME
+#define MOUNT_ATTR_STRICTATIME 0x00000020
+#endif
+
+#ifndef AT_RECURSIVE
+#define AT_RECURSIVE 0x8000
+#endif
+
+#ifndef MS_SHARED
+#define MS_SHARED (1 << 20)
+#endif
+
+#define DEFAULT_THREADS 4
+#define ptr_to_int(p) ((int)((intptr_t)(p)))
+#define int_to_ptr(u) ((void *)((intptr_t)(u)))
+
+#ifndef __NR_mount_setattr
+ #if defined __alpha__
+ #define __NR_mount_setattr 552
+ #elif defined _MIPS_SIM
+ #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
+ #define __NR_mount_setattr (442 + 4000)
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
+ #define __NR_mount_setattr (442 + 6000)
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
+ #define __NR_mount_setattr (442 + 5000)
+ #endif
+ #elif defined __ia64__
+ #define __NR_mount_setattr (442 + 1024)
+ #else
+ #define __NR_mount_setattr 442
+ #endif
+
+struct mount_attr {
+ __u64 attr_set;
+ __u64 attr_clr;
+ __u64 propagation;
+ __u64 userns_fd;
+};
+#endif
+
+#ifndef __NR_open_tree
+ #if defined __alpha__
+ #define __NR_open_tree 538
+ #elif defined _MIPS_SIM
+ #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
+ #define __NR_open_tree 4428
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
+ #define __NR_open_tree 6428
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
+ #define __NR_open_tree 5428
+ #endif
+ #elif defined __ia64__
+ #define __NR_open_tree (428 + 1024)
+ #else
+ #define __NR_open_tree 428
+ #endif
+#endif
+
+#ifndef MOUNT_ATTR_IDMAP
+#define MOUNT_ATTR_IDMAP 0x00100000
+#endif
+
+static inline int sys_mount_setattr(int dfd, const char *path, unsigned int flags,
+ struct mount_attr *attr, size_t size)
+{
+ return syscall(__NR_mount_setattr, dfd, path, flags, attr, size);
+}
+
+#ifndef OPEN_TREE_CLONE
+#define OPEN_TREE_CLONE 1
+#endif
+
+#ifndef OPEN_TREE_CLOEXEC
+#define OPEN_TREE_CLOEXEC O_CLOEXEC
+#endif
+
+#ifndef AT_RECURSIVE
+#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */
+#endif
+
+static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags)
+{
+ return syscall(__NR_open_tree, dfd, filename, flags);
+}
+
+static ssize_t write_nointr(int fd, const void *buf, size_t count)
+{
+ ssize_t ret;
+
+ do {
+ ret = write(fd, buf, count);
+ } while (ret < 0 && errno == EINTR);
+
+ return ret;
+}
+
+static int write_file(const char *path, const void *buf, size_t count)
+{
+ int fd;
+ ssize_t ret;
+
+ fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW);
+ if (fd < 0)
+ return -1;
+
+ ret = write_nointr(fd, buf, count);
+ close(fd);
+ if (ret < 0 || (size_t)ret != count)
+ return -1;
+
+ return 0;
+}
+
+static int create_and_enter_userns(void)
+{
+ uid_t uid;
+ gid_t gid;
+ char map[100];
+
+ uid = getuid();
+ gid = getgid();
+
+ if (unshare(CLONE_NEWUSER))
+ return -1;
+
+ if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) &&
+ errno != ENOENT)
+ return -1;
+
+ snprintf(map, sizeof(map), "0 %d 1", uid);
+ if (write_file("/proc/self/uid_map", map, strlen(map)))
+ return -1;
+
+
+ snprintf(map, sizeof(map), "0 %d 1", gid);
+ if (write_file("/proc/self/gid_map", map, strlen(map)))
+ return -1;
+
+ if (setgid(0))
+ return -1;
+
+ if (setuid(0))
+ return -1;
+
+ return 0;
+}
+
+static int prepare_unpriv_mountns(void)
+{
+ if (create_and_enter_userns())
+ return -1;
+
+ if (unshare(CLONE_NEWNS))
+ return -1;
+
+ if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0))
+ return -1;
+
+ return 0;
+}
+
+static int read_mnt_flags(const char *path)
+{
+ int ret;
+ struct statvfs stat;
+ unsigned int mnt_flags;
+
+ ret = statvfs(path, &stat);
+ if (ret != 0)
+ return -EINVAL;
+
+ if (stat.f_flag &
+ ~(ST_RDONLY | ST_NOSUID | ST_NODEV | ST_NOEXEC | ST_NOATIME |
+ ST_NODIRATIME | ST_RELATIME | ST_SYNCHRONOUS | ST_MANDLOCK))
+ return -EINVAL;
+
+ mnt_flags = 0;
+ if (stat.f_flag & ST_RDONLY)
+ mnt_flags |= MS_RDONLY;
+ if (stat.f_flag & ST_NOSUID)
+ mnt_flags |= MS_NOSUID;
+ if (stat.f_flag & ST_NODEV)
+ mnt_flags |= MS_NODEV;
+ if (stat.f_flag & ST_NOEXEC)
+ mnt_flags |= MS_NOEXEC;
+ if (stat.f_flag & ST_NOATIME)
+ mnt_flags |= MS_NOATIME;
+ if (stat.f_flag & ST_NODIRATIME)
+ mnt_flags |= MS_NODIRATIME;
+ if (stat.f_flag & ST_RELATIME)
+ mnt_flags |= MS_RELATIME;
+ if (stat.f_flag & ST_SYNCHRONOUS)
+ mnt_flags |= MS_SYNCHRONOUS;
+ if (stat.f_flag & ST_MANDLOCK)
+ mnt_flags |= ST_MANDLOCK;
+
+ return mnt_flags;
+}
+
+static char *get_field(char *src, int nfields)
+{
+ int i;
+ char *p = src;
+
+ for (i = 0; i < nfields; i++) {
+ while (*p && *p != ' ' && *p != '\t')
+ p++;
+
+ if (!*p)
+ break;
+
+ p++;
+ }
+
+ return p;
+}
+
+static void null_endofword(char *word)
+{
+ while (*word && *word != ' ' && *word != '\t')
+ word++;
+ *word = '\0';
+}
+
+static bool is_shared_mount(const char *path)
+{
+ size_t len = 0;
+ char *line = NULL;
+ FILE *f = NULL;
+
+ f = fopen("/proc/self/mountinfo", "re");
+ if (!f)
+ return false;
+
+ while (getline(&line, &len, f) != -1) {
+ char *opts, *target;
+
+ target = get_field(line, 4);
+ if (!target)
+ continue;
+
+ opts = get_field(target, 2);
+ if (!opts)
+ continue;
+
+ null_endofword(target);
+
+ if (strcmp(target, path) != 0)
+ continue;
+
+ null_endofword(opts);
+ if (strstr(opts, "shared:"))
+ return true;
+ }
+
+ free(line);
+ fclose(f);
+
+ return false;
+}
+
+static void *mount_setattr_thread(void *data)
+{
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID,
+ .attr_clr = 0,
+ .propagation = MS_SHARED,
+ };
+
+ if (sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)))
+ pthread_exit(int_to_ptr(-1));
+
+ pthread_exit(int_to_ptr(0));
+}
+
+/* Attempt to de-conflict with the selftests tree. */
+#ifndef SKIP
+#define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__)
+#endif
+
+static bool mount_setattr_supported(void)
+{
+ int ret;
+
+ ret = sys_mount_setattr(-EBADF, "", AT_EMPTY_PATH, NULL, 0);
+ if (ret < 0 && errno == ENOSYS)
+ return false;
+
+ return true;
+}
+
+FIXTURE(mount_setattr) {
+};
+
+FIXTURE_SETUP(mount_setattr)
+{
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ ASSERT_EQ(prepare_unpriv_mountns(), 0);
+
+ (void)umount2("/mnt", MNT_DETACH);
+ (void)umount2("/tmp", MNT_DETACH);
+
+ ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"), 0);
+
+ ASSERT_EQ(mkdir("/tmp/B", 0777), 0);
+
+ ASSERT_EQ(mount("testing", "/tmp/B", "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"), 0);
+
+ ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0);
+
+ ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"), 0);
+
+ ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"), 0);
+
+ ASSERT_EQ(mkdir("/mnt/A", 0777), 0);
+
+ ASSERT_EQ(mount("testing", "/mnt/A", "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"), 0);
+
+ ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0);
+
+ ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0);
+
+ ASSERT_EQ(mkdir("/mnt/B", 0777), 0);
+
+ ASSERT_EQ(mount("testing", "/mnt/B", "ramfs",
+ MS_NOATIME | MS_NODEV | MS_NOSUID, 0), 0);
+
+ ASSERT_EQ(mkdir("/mnt/B/BB", 0777), 0);
+
+ ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts",
+ MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0);
+}
+
+FIXTURE_TEARDOWN(mount_setattr)
+{
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ (void)umount2("/mnt/A", MNT_DETACH);
+ (void)umount2("/tmp", MNT_DETACH);
+}
+
+TEST_F(mount_setattr, invalid_attributes)
+{
+ struct mount_attr invalid_attr = {
+ .attr_set = (1U << 31),
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
+ sizeof(invalid_attr)), 0);
+
+ invalid_attr.attr_set = 0;
+ invalid_attr.attr_clr = (1U << 31);
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
+ sizeof(invalid_attr)), 0);
+
+ invalid_attr.attr_clr = 0;
+ invalid_attr.propagation = (1U << 31);
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
+ sizeof(invalid_attr)), 0);
+
+ invalid_attr.attr_set = (1U << 31);
+ invalid_attr.attr_clr = (1U << 31);
+ invalid_attr.propagation = (1U << 31);
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
+ sizeof(invalid_attr)), 0);
+
+ ASSERT_NE(sys_mount_setattr(-1, "mnt/A", AT_RECURSIVE, &invalid_attr,
+ sizeof(invalid_attr)), 0);
+}
+
+TEST_F(mount_setattr, extensibility)
+{
+ unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
+ char *s = "dummy";
+ struct mount_attr invalid_attr = {};
+ struct mount_attr_large {
+ struct mount_attr attr1;
+ struct mount_attr attr2;
+ struct mount_attr attr3;
+ } large_attr = {};
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ old_flags = read_mnt_flags("/mnt/A");
+ ASSERT_GT(old_flags, 0);
+
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, NULL,
+ sizeof(invalid_attr)), 0);
+ ASSERT_EQ(errno, EFAULT);
+
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, (void *)s,
+ sizeof(invalid_attr)), 0);
+ ASSERT_EQ(errno, EINVAL);
+
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, 0), 0);
+ ASSERT_EQ(errno, EINVAL);
+
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
+ sizeof(invalid_attr) / 2), 0);
+ ASSERT_EQ(errno, EINVAL);
+
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
+ sizeof(invalid_attr) / 2), 0);
+ ASSERT_EQ(errno, EINVAL);
+
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
+ (void *)&large_attr, sizeof(large_attr)), 0);
+
+ large_attr.attr3.attr_set = MOUNT_ATTR_RDONLY;
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
+ (void *)&large_attr, sizeof(large_attr)), 0);
+
+ large_attr.attr3.attr_set = 0;
+ large_attr.attr1.attr_set = MOUNT_ATTR_RDONLY;
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
+ (void *)&large_attr, sizeof(large_attr)), 0);
+
+ expected_flags = old_flags;
+ expected_flags |= MS_RDONLY;
+
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+}
+
+TEST_F(mount_setattr, basic)
+{
+ unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
+ .attr_clr = MOUNT_ATTR__ATIME,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ old_flags = read_mnt_flags("/mnt/A");
+ ASSERT_GT(old_flags, 0);
+
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", 0, &attr, sizeof(attr)), 0);
+
+ expected_flags = old_flags;
+ expected_flags |= MS_RDONLY;
+ expected_flags |= MS_NOEXEC;
+ expected_flags &= ~MS_NOATIME;
+ expected_flags |= MS_RELATIME;
+
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, old_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, old_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, old_flags);
+}
+
+TEST_F(mount_setattr, basic_recursive)
+{
+ int fd;
+ unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
+ .attr_clr = MOUNT_ATTR__ATIME,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ old_flags = read_mnt_flags("/mnt/A");
+ ASSERT_GT(old_flags, 0);
+
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ expected_flags = old_flags;
+ expected_flags |= MS_RDONLY;
+ expected_flags |= MS_NOEXEC;
+ expected_flags &= ~MS_NOATIME;
+ expected_flags |= MS_RELATIME;
+
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ memset(&attr, 0, sizeof(attr));
+ attr.attr_clr = MOUNT_ATTR_RDONLY;
+ attr.propagation = MS_SHARED;
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ expected_flags &= ~MS_RDONLY;
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A"), true);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
+
+ fd = open("/mnt/A/AA/B/b", O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0777);
+ ASSERT_GE(fd, 0);
+
+ /*
+ * We're holding a fd open for writing so this needs to fail somewhere
+ * in the middle and the mount options need to be unchanged.
+ */
+ attr.attr_set = MOUNT_ATTR_RDONLY;
+ ASSERT_LT(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A"), true);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
+
+ EXPECT_EQ(close(fd), 0);
+}
+
+TEST_F(mount_setattr, mount_has_writers)
+{
+ int fd, dfd;
+ unsigned int old_flags = 0, new_flags = 0;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
+ .attr_clr = MOUNT_ATTR__ATIME,
+ .propagation = MS_SHARED,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ old_flags = read_mnt_flags("/mnt/A");
+ ASSERT_GT(old_flags, 0);
+
+ fd = open("/mnt/A/AA/B/b", O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0777);
+ ASSERT_GE(fd, 0);
+
+ /*
+ * We're holding a fd open to a mount somwhere in the middle so this
+ * needs to fail somewhere in the middle. After this the mount options
+ * need to be unchanged.
+ */
+ ASSERT_LT(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, old_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A"), false);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, old_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A/AA"), false);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, old_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), false);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, old_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), false);
+
+ dfd = open("/mnt/A/AA/B", O_DIRECTORY | O_CLOEXEC);
+ ASSERT_GE(dfd, 0);
+ EXPECT_EQ(fsync(dfd), 0);
+ EXPECT_EQ(close(dfd), 0);
+
+ EXPECT_EQ(fsync(fd), 0);
+ EXPECT_EQ(close(fd), 0);
+
+ /* All writers are gone so this should succeed. */
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+}
+
+TEST_F(mount_setattr, mixed_mount_options)
+{
+ unsigned int old_flags1 = 0, old_flags2 = 0, new_flags = 0, expected_flags = 0;
+ struct mount_attr attr = {
+ .attr_clr = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID | MOUNT_ATTR_NOEXEC | MOUNT_ATTR__ATIME,
+ .attr_set = MOUNT_ATTR_RELATIME,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ old_flags1 = read_mnt_flags("/mnt/B");
+ ASSERT_GT(old_flags1, 0);
+
+ old_flags2 = read_mnt_flags("/mnt/B/BB");
+ ASSERT_GT(old_flags2, 0);
+
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/B", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ expected_flags = old_flags2;
+ expected_flags &= ~(MS_RDONLY | MS_NOEXEC | MS_NOATIME | MS_NOSUID);
+ expected_flags |= MS_RELATIME;
+
+ new_flags = read_mnt_flags("/mnt/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ expected_flags = old_flags2;
+ expected_flags &= ~(MS_RDONLY | MS_NOEXEC | MS_NOATIME | MS_NOSUID);
+ expected_flags |= MS_RELATIME;
+
+ new_flags = read_mnt_flags("/mnt/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+}
+
+TEST_F(mount_setattr, time_changes)
+{
+ unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_NODIRATIME | MOUNT_ATTR_NOATIME,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ attr.attr_set = MOUNT_ATTR_STRICTATIME;
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ attr.attr_set = MOUNT_ATTR_STRICTATIME | MOUNT_ATTR_NOATIME;
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ attr.attr_set = MOUNT_ATTR_STRICTATIME | MOUNT_ATTR_NOATIME;
+ attr.attr_clr = MOUNT_ATTR__ATIME;
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ attr.attr_set = 0;
+ attr.attr_clr = MOUNT_ATTR_STRICTATIME;
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ attr.attr_clr = MOUNT_ATTR_NOATIME;
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ old_flags = read_mnt_flags("/mnt/A");
+ ASSERT_GT(old_flags, 0);
+
+ attr.attr_set = MOUNT_ATTR_NODIRATIME | MOUNT_ATTR_NOATIME;
+ attr.attr_clr = MOUNT_ATTR__ATIME;
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ expected_flags = old_flags;
+ expected_flags |= MS_NOATIME;
+ expected_flags |= MS_NODIRATIME;
+
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ memset(&attr, 0, sizeof(attr));
+ attr.attr_set &= ~MOUNT_ATTR_NOATIME;
+ attr.attr_set |= MOUNT_ATTR_RELATIME;
+ attr.attr_clr |= MOUNT_ATTR__ATIME;
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ expected_flags &= ~MS_NOATIME;
+ expected_flags |= MS_RELATIME;
+
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ memset(&attr, 0, sizeof(attr));
+ attr.attr_set &= ~MOUNT_ATTR_RELATIME;
+ attr.attr_set |= MOUNT_ATTR_STRICTATIME;
+ attr.attr_clr |= MOUNT_ATTR__ATIME;
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ expected_flags &= ~MS_RELATIME;
+
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ memset(&attr, 0, sizeof(attr));
+ attr.attr_set &= ~MOUNT_ATTR_STRICTATIME;
+ attr.attr_set |= MOUNT_ATTR_NOATIME;
+ attr.attr_clr |= MOUNT_ATTR__ATIME;
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ expected_flags |= MS_NOATIME;
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ memset(&attr, 0, sizeof(attr));
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ memset(&attr, 0, sizeof(attr));
+ attr.attr_clr = MOUNT_ATTR_NODIRATIME;
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ expected_flags &= ~MS_NODIRATIME;
+
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+}
+
+TEST_F(mount_setattr, multi_threaded)
+{
+ int i, j, nthreads, ret = 0;
+ unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
+ pthread_attr_t pattr;
+ pthread_t threads[DEFAULT_THREADS];
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ old_flags = read_mnt_flags("/mnt/A");
+ ASSERT_GT(old_flags, 0);
+
+ /* Try to change mount options from multiple threads. */
+ nthreads = get_nprocs_conf();
+ if (nthreads > DEFAULT_THREADS)
+ nthreads = DEFAULT_THREADS;
+
+ pthread_attr_init(&pattr);
+ for (i = 0; i < nthreads; i++)
+ ASSERT_EQ(pthread_create(&threads[i], &pattr, mount_setattr_thread, NULL), 0);
+
+ for (j = 0; j < i; j++) {
+ void *retptr = NULL;
+
+ EXPECT_EQ(pthread_join(threads[j], &retptr), 0);
+
+ ret += ptr_to_int(retptr);
+ EXPECT_EQ(ret, 0);
+ }
+ pthread_attr_destroy(&pattr);
+
+ ASSERT_EQ(ret, 0);
+
+ expected_flags = old_flags;
+ expected_flags |= MS_RDONLY;
+ expected_flags |= MS_NOSUID;
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A"), true);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
+}
+
+TEST_F(mount_setattr, wrong_user_namespace)
+{
+ int ret;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_RDONLY,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ EXPECT_EQ(create_and_enter_userns(), 0);
+ ret = sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr));
+ ASSERT_LT(ret, 0);
+ ASSERT_EQ(errno, EPERM);
+}
+
+TEST_F(mount_setattr, wrong_mount_namespace)
+{
+ int fd, ret;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_RDONLY,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ fd = open("/mnt/A", O_DIRECTORY | O_CLOEXEC);
+ ASSERT_GE(fd, 0);
+
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+
+ ret = sys_mount_setattr(fd, "", AT_EMPTY_PATH | AT_RECURSIVE, &attr, sizeof(attr));
+ ASSERT_LT(ret, 0);
+ ASSERT_EQ(errno, EINVAL);
+}
+
+FIXTURE(mount_setattr_idmapped) {
+};
+
+FIXTURE_SETUP(mount_setattr_idmapped)
+{
+ int img_fd = -EBADF;
+
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+
+ ASSERT_EQ(mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0), 0);
+
+ (void)umount2("/mnt", MNT_DETACH);
+ (void)umount2("/tmp", MNT_DETACH);
+
+ ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"), 0);
+
+ ASSERT_EQ(mkdir("/tmp/B", 0777), 0);
+ ASSERT_EQ(mknodat(-EBADF, "/tmp/B/b", S_IFREG | 0644, 0), 0);
+ ASSERT_EQ(chown("/tmp/B/b", 0, 0), 0);
+
+ ASSERT_EQ(mount("testing", "/tmp/B", "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"), 0);
+
+ ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0);
+ ASSERT_EQ(mknodat(-EBADF, "/tmp/B/BB/b", S_IFREG | 0644, 0), 0);
+ ASSERT_EQ(chown("/tmp/B/BB/b", 0, 0), 0);
+
+ ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"), 0);
+
+ ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"), 0);
+
+ ASSERT_EQ(mkdir("/mnt/A", 0777), 0);
+
+ ASSERT_EQ(mount("testing", "/mnt/A", "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"), 0);
+
+ ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0);
+
+ ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0);
+
+ ASSERT_EQ(mkdir("/mnt/B", 0777), 0);
+
+ ASSERT_EQ(mount("testing", "/mnt/B", "ramfs",
+ MS_NOATIME | MS_NODEV | MS_NOSUID, 0), 0);
+
+ ASSERT_EQ(mkdir("/mnt/B/BB", 0777), 0);
+
+ ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts",
+ MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0);
+
+ ASSERT_EQ(mkdir("/mnt/C", 0777), 0);
+ ASSERT_EQ(mkdir("/mnt/D", 0777), 0);
+ img_fd = openat(-EBADF, "/mnt/C/ext4.img", O_CREAT | O_WRONLY, 0600);
+ ASSERT_GE(img_fd, 0);
+ ASSERT_EQ(ftruncate(img_fd, 1024 * 2048), 0);
+ ASSERT_EQ(system("mkfs.ext4 -q /mnt/C/ext4.img"), 0);
+ ASSERT_EQ(system("mount -o loop -t ext4 /mnt/C/ext4.img /mnt/D/"), 0);
+ ASSERT_EQ(close(img_fd), 0);
+}
+
+FIXTURE_TEARDOWN(mount_setattr_idmapped)
+{
+ (void)umount2("/mnt/A", MNT_DETACH);
+ (void)umount2("/tmp", MNT_DETACH);
+}
+
+/**
+ * Validate that negative fd values are rejected.
+ */
+TEST_F(mount_setattr_idmapped, invalid_fd_negative)
+{
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_IDMAP,
+ .userns_fd = -EBADF,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
+ TH_LOG("failure: created idmapped mount with negative fd");
+ }
+}
+
+/**
+ * Validate that excessively large fd values are rejected.
+ */
+TEST_F(mount_setattr_idmapped, invalid_fd_large)
+{
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_IDMAP,
+ .userns_fd = INT64_MAX,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
+ TH_LOG("failure: created idmapped mount with too large fd value");
+ }
+}
+
+/**
+ * Validate that closed fd values are rejected.
+ */
+TEST_F(mount_setattr_idmapped, invalid_fd_closed)
+{
+ int fd;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_IDMAP,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
+ ASSERT_GE(fd, 0);
+ ASSERT_GE(close(fd), 0);
+
+ attr.userns_fd = fd;
+ ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
+ TH_LOG("failure: created idmapped mount with closed fd");
+ }
+}
+
+/**
+ * Validate that the initial user namespace is rejected.
+ */
+TEST_F(mount_setattr_idmapped, invalid_fd_initial_userns)
+{
+ int open_tree_fd = -EBADF;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_IDMAP,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
+ AT_NO_AUTOMOUNT |
+ AT_SYMLINK_NOFOLLOW |
+ OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
+ ASSERT_GE(open_tree_fd, 0);
+
+ attr.userns_fd = open("/proc/1/ns/user", O_RDONLY | O_CLOEXEC);
+ ASSERT_GE(attr.userns_fd, 0);
+ ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
+ ASSERT_EQ(errno, EPERM);
+ ASSERT_EQ(close(attr.userns_fd), 0);
+ ASSERT_EQ(close(open_tree_fd), 0);
+}
+
+static int map_ids(pid_t pid, unsigned long nsid, unsigned long hostid,
+ unsigned long range)
+{
+ char map[100], procfile[256];
+
+ snprintf(procfile, sizeof(procfile), "/proc/%d/uid_map", pid);
+ snprintf(map, sizeof(map), "%lu %lu %lu", nsid, hostid, range);
+ if (write_file(procfile, map, strlen(map)))
+ return -1;
+
+
+ snprintf(procfile, sizeof(procfile), "/proc/%d/gid_map", pid);
+ snprintf(map, sizeof(map), "%lu %lu %lu", nsid, hostid, range);
+ if (write_file(procfile, map, strlen(map)))
+ return -1;
+
+ return 0;
+}
+
+#define __STACK_SIZE (8 * 1024 * 1024)
+static pid_t do_clone(int (*fn)(void *), void *arg, int flags)
+{
+ void *stack;
+
+ stack = malloc(__STACK_SIZE);
+ if (!stack)
+ return -ENOMEM;
+
+#ifdef __ia64__
+ return __clone2(fn, stack, __STACK_SIZE, flags | SIGCHLD, arg, NULL);
+#else
+ return clone(fn, stack + __STACK_SIZE, flags | SIGCHLD, arg, NULL);
+#endif
+}
+
+static int get_userns_fd_cb(void *data)
+{
+ return kill(getpid(), SIGSTOP);
+}
+
+static int wait_for_pid(pid_t pid)
+{
+ int status, ret;
+
+again:
+ ret = waitpid(pid, &status, 0);
+ if (ret == -1) {
+ if (errno == EINTR)
+ goto again;
+
+ return -1;
+ }
+
+ if (!WIFEXITED(status))
+ return -1;
+
+ return WEXITSTATUS(status);
+}
+
+static int get_userns_fd(unsigned long nsid, unsigned long hostid, unsigned long range)
+{
+ int ret;
+ pid_t pid;
+ char path[256];
+
+ pid = do_clone(get_userns_fd_cb, NULL, CLONE_NEWUSER);
+ if (pid < 0)
+ return -errno;
+
+ ret = map_ids(pid, nsid, hostid, range);
+ if (ret < 0)
+ return ret;
+
+ snprintf(path, sizeof(path), "/proc/%d/ns/user", pid);
+ ret = open(path, O_RDONLY | O_CLOEXEC);
+ kill(pid, SIGKILL);
+ wait_for_pid(pid);
+ return ret;
+}
+
+/**
+ * Validate that an attached mount in our mount namespace can be idmapped.
+ * (The kernel enforces that the mount's mount namespace and the caller's mount
+ * namespace match.)
+ */
+TEST_F(mount_setattr_idmapped, attached_mount_inside_current_mount_namespace)
+{
+ int open_tree_fd = -EBADF;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_IDMAP,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
+ AT_EMPTY_PATH |
+ AT_NO_AUTOMOUNT |
+ AT_SYMLINK_NOFOLLOW |
+ OPEN_TREE_CLOEXEC);
+ ASSERT_GE(open_tree_fd, 0);
+
+ attr.userns_fd = get_userns_fd(0, 10000, 10000);
+ ASSERT_GE(attr.userns_fd, 0);
+ ASSERT_EQ(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
+ ASSERT_EQ(close(attr.userns_fd), 0);
+ ASSERT_EQ(close(open_tree_fd), 0);
+}
+
+/**
+ * Validate that idmapping a mount is rejected if the mount's mount namespace
+ * and our mount namespace don't match.
+ * (The kernel enforces that the mount's mount namespace and the caller's mount
+ * namespace match.)
+ */
+TEST_F(mount_setattr_idmapped, attached_mount_outside_current_mount_namespace)
+{
+ int open_tree_fd = -EBADF;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_IDMAP,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
+ AT_EMPTY_PATH |
+ AT_NO_AUTOMOUNT |
+ AT_SYMLINK_NOFOLLOW |
+ OPEN_TREE_CLOEXEC);
+ ASSERT_GE(open_tree_fd, 0);
+
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+
+ attr.userns_fd = get_userns_fd(0, 10000, 10000);
+ ASSERT_GE(attr.userns_fd, 0);
+ ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr,
+ sizeof(attr)), 0);
+ ASSERT_EQ(close(attr.userns_fd), 0);
+ ASSERT_EQ(close(open_tree_fd), 0);
+}
+
+/**
+ * Validate that an attached mount in our mount namespace can be idmapped.
+ */
+TEST_F(mount_setattr_idmapped, detached_mount_inside_current_mount_namespace)
+{
+ int open_tree_fd = -EBADF;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_IDMAP,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
+ AT_EMPTY_PATH |
+ AT_NO_AUTOMOUNT |
+ AT_SYMLINK_NOFOLLOW |
+ OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(open_tree_fd, 0);
+
+ /* Changing mount properties on a detached mount. */
+ attr.userns_fd = get_userns_fd(0, 10000, 10000);
+ ASSERT_GE(attr.userns_fd, 0);
+ ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
+ AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
+ ASSERT_EQ(close(attr.userns_fd), 0);
+ ASSERT_EQ(close(open_tree_fd), 0);
+}
+
+/**
+ * Validate that a detached mount not in our mount namespace can be idmapped.
+ */
+TEST_F(mount_setattr_idmapped, detached_mount_outside_current_mount_namespace)
+{
+ int open_tree_fd = -EBADF;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_IDMAP,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
+ AT_EMPTY_PATH |
+ AT_NO_AUTOMOUNT |
+ AT_SYMLINK_NOFOLLOW |
+ OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(open_tree_fd, 0);
+
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+
+ /* Changing mount properties on a detached mount. */
+ attr.userns_fd = get_userns_fd(0, 10000, 10000);
+ ASSERT_GE(attr.userns_fd, 0);
+ ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
+ AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
+ ASSERT_EQ(close(attr.userns_fd), 0);
+ ASSERT_EQ(close(open_tree_fd), 0);
+}
+
+/**
+ * Validate that currently changing the idmapping of an idmapped mount fails.
+ */
+TEST_F(mount_setattr_idmapped, change_idmapping)
+{
+ int open_tree_fd = -EBADF;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_IDMAP,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
+ AT_EMPTY_PATH |
+ AT_NO_AUTOMOUNT |
+ AT_SYMLINK_NOFOLLOW |
+ OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(open_tree_fd, 0);
+
+ attr.userns_fd = get_userns_fd(0, 10000, 10000);
+ ASSERT_GE(attr.userns_fd, 0);
+ ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
+ AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
+ ASSERT_EQ(close(attr.userns_fd), 0);
+
+ /* Change idmapping on a detached mount that is already idmapped. */
+ attr.userns_fd = get_userns_fd(0, 20000, 10000);
+ ASSERT_GE(attr.userns_fd, 0);
+ ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
+ ASSERT_EQ(close(attr.userns_fd), 0);
+ ASSERT_EQ(close(open_tree_fd), 0);
+}
+
+static bool expected_uid_gid(int dfd, const char *path, int flags,
+ uid_t expected_uid, gid_t expected_gid)
+{
+ int ret;
+ struct stat st;
+
+ ret = fstatat(dfd, path, &st, flags);
+ if (ret < 0)
+ return false;
+
+ return st.st_uid == expected_uid && st.st_gid == expected_gid;
+}
+
+TEST_F(mount_setattr_idmapped, idmap_mount_tree_invalid)
+{
+ int open_tree_fd = -EBADF;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_IDMAP,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/b", 0, 0, 0), 0);
+ ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0);
+
+ open_tree_fd = sys_open_tree(-EBADF, "/mnt/A",
+ AT_RECURSIVE |
+ AT_EMPTY_PATH |
+ AT_NO_AUTOMOUNT |
+ AT_SYMLINK_NOFOLLOW |
+ OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(open_tree_fd, 0);
+
+ attr.userns_fd = get_userns_fd(0, 10000, 10000);
+ ASSERT_GE(attr.userns_fd, 0);
+ ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
+ ASSERT_EQ(close(attr.userns_fd), 0);
+ ASSERT_EQ(close(open_tree_fd), 0);
+
+ ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/b", 0, 0, 0), 0);
+ ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0);
+ ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/b", 0, 0, 0), 0);
+ ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/BB/b", 0, 0, 0), 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 25f198bec0b2..2d71b283dde3 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -23,6 +23,7 @@ TEST_PROGS += drop_monitor_tests.sh
TEST_PROGS += vrf_route_leaking.sh
TEST_PROGS += bareudp.sh
TEST_PROGS += unicast_extensions.sh
+TEST_PROGS += udpgro_fwd.sh
TEST_PROGS_EXTENDED := in_netns.sh
TEST_GEN_FILES = socket nettest
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index 4c7d33618437..56dd0c6f2e96 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -19,10 +19,39 @@ ret=0
ksft_skip=4
# all tests in this script. Can be overridden with -t option
-IPV4_TESTS="ipv4_fcnal ipv4_grp_fcnal ipv4_withv6_fcnal ipv4_fcnal_runtime ipv4_large_grp ipv4_compat_mode ipv4_fdb_grp_fcnal ipv4_torture"
-IPV6_TESTS="ipv6_fcnal ipv6_grp_fcnal ipv6_fcnal_runtime ipv6_large_grp ipv6_compat_mode ipv6_fdb_grp_fcnal ipv6_torture"
-
-ALL_TESTS="basic ${IPV4_TESTS} ${IPV6_TESTS}"
+IPV4_TESTS="
+ ipv4_fcnal
+ ipv4_grp_fcnal
+ ipv4_res_grp_fcnal
+ ipv4_withv6_fcnal
+ ipv4_fcnal_runtime
+ ipv4_large_grp
+ ipv4_large_res_grp
+ ipv4_compat_mode
+ ipv4_fdb_grp_fcnal
+ ipv4_torture
+ ipv4_res_torture
+"
+
+IPV6_TESTS="
+ ipv6_fcnal
+ ipv6_grp_fcnal
+ ipv6_res_grp_fcnal
+ ipv6_fcnal_runtime
+ ipv6_large_grp
+ ipv6_large_res_grp
+ ipv6_compat_mode
+ ipv6_fdb_grp_fcnal
+ ipv6_torture
+ ipv6_res_torture
+"
+
+ALL_TESTS="
+ basic
+ basic_res
+ ${IPV4_TESTS}
+ ${IPV6_TESTS}
+"
TESTS="${ALL_TESTS}"
VERBOSE=0
PAUSE_ON_FAIL=no
@@ -232,6 +261,19 @@ check_nexthop()
check_output "${out}" "${expected}"
}
+check_nexthop_bucket()
+{
+ local nharg="$1"
+ local expected="$2"
+ local out
+
+ # remove the idle time since we cannot match it
+ out=$($IP nexthop bucket ${nharg} \
+ | sed s/idle_time\ [0-9.]*\ // 2>/dev/null)
+
+ check_output "${out}" "${expected}"
+}
+
check_route()
{
local pfx="$1"
@@ -308,6 +350,25 @@ check_large_grp()
log_test $? 0 "Dump large (x$ecmp) ecmp groups"
}
+check_large_res_grp()
+{
+ local ipv=$1
+ local buckets=$2
+ local ipstr=""
+
+ if [ $ipv -eq 4 ]; then
+ ipstr="172.16.1.2"
+ else
+ ipstr="2001:db8:91::2"
+ fi
+
+ # create a resilient group with $buckets buckets and dump them
+ run_cmd "$IP nexthop add id 100 via $ipstr dev veth1"
+ run_cmd "$IP nexthop add id 1000 group 100 type resilient buckets $buckets"
+ run_cmd "$IP nexthop bucket list"
+ log_test $? 0 "Dump large (x$buckets) nexthop buckets"
+}
+
start_ip_monitor()
{
local mtype=$1
@@ -344,6 +405,15 @@ check_nexthop_fdb_support()
fi
}
+check_nexthop_res_support()
+{
+ $IP nexthop help 2>&1 | grep -q resilient
+ if [ $? -ne 0 ]; then
+ echo "SKIP: iproute2 too old, missing resilient nexthop group support"
+ return $ksft_skip
+ fi
+}
+
ipv6_fdb_grp_fcnal()
{
local rc
@@ -666,6 +736,70 @@ ipv6_grp_fcnal()
log_test $? 2 "Nexthop group can not have a blackhole and another nexthop"
}
+ipv6_res_grp_fcnal()
+{
+ local rc
+
+ echo
+ echo "IPv6 resilient groups functional"
+ echo "--------------------------------"
+
+ check_nexthop_res_support
+ if [ $? -eq $ksft_skip ]; then
+ return $ksft_skip
+ fi
+
+ #
+ # migration of nexthop buckets - equal weights
+ #
+ run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1"
+ run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+ run_cmd "$IP nexthop add id 102 group 62/63 type resilient buckets 2 idle_timer 0"
+
+ run_cmd "$IP nexthop del id 63"
+ check_nexthop "id 102" \
+ "id 102 group 62 type resilient buckets 2 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+ log_test $? 0 "Nexthop group updated when entry is deleted"
+ check_nexthop_bucket "list id 102" \
+ "id 102 index 0 nhid 62 id 102 index 1 nhid 62"
+ log_test $? 0 "Nexthop buckets updated when entry is deleted"
+
+ run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+ run_cmd "$IP nexthop replace id 102 group 62/63 type resilient buckets 2 idle_timer 0"
+ check_nexthop "id 102" \
+ "id 102 group 62/63 type resilient buckets 2 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+ log_test $? 0 "Nexthop group updated after replace"
+ check_nexthop_bucket "list id 102" \
+ "id 102 index 0 nhid 63 id 102 index 1 nhid 62"
+ log_test $? 0 "Nexthop buckets updated after replace"
+
+ $IP nexthop flush >/dev/null 2>&1
+
+ #
+ # migration of nexthop buckets - unequal weights
+ #
+ run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1"
+ run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+ run_cmd "$IP nexthop add id 102 group 62,3/63,1 type resilient buckets 4 idle_timer 0"
+
+ run_cmd "$IP nexthop del id 63"
+ check_nexthop "id 102" \
+ "id 102 group 62,3 type resilient buckets 4 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+ log_test $? 0 "Nexthop group updated when entry is deleted - nECMP"
+ check_nexthop_bucket "list id 102" \
+ "id 102 index 0 nhid 62 id 102 index 1 nhid 62 id 102 index 2 nhid 62 id 102 index 3 nhid 62"
+ log_test $? 0 "Nexthop buckets updated when entry is deleted - nECMP"
+
+ run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+ run_cmd "$IP nexthop replace id 102 group 62,3/63,1 type resilient buckets 4 idle_timer 0"
+ check_nexthop "id 102" \
+ "id 102 group 62,3/63 type resilient buckets 4 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+ log_test $? 0 "Nexthop group updated after replace - nECMP"
+ check_nexthop_bucket "list id 102" \
+ "id 102 index 0 nhid 63 id 102 index 1 nhid 62 id 102 index 2 nhid 62 id 102 index 3 nhid 62"
+ log_test $? 0 "Nexthop buckets updated after replace - nECMP"
+}
+
ipv6_fcnal_runtime()
{
local rc
@@ -824,6 +958,22 @@ ipv6_large_grp()
$IP nexthop flush >/dev/null 2>&1
}
+ipv6_large_res_grp()
+{
+ echo
+ echo "IPv6 large resilient group (128k buckets)"
+ echo "-----------------------------------------"
+
+ check_nexthop_res_support
+ if [ $? -eq $ksft_skip ]; then
+ return $ksft_skip
+ fi
+
+ check_large_res_grp 6 $((128 * 1024))
+
+ $IP nexthop flush >/dev/null 2>&1
+}
+
ipv6_del_add_loop1()
{
while :; do
@@ -874,11 +1024,67 @@ ipv6_torture()
sleep 300
kill -9 $pid1 $pid2 $pid3 $pid4 $pid5
+ wait $pid1 $pid2 $pid3 $pid4 $pid5 2>/dev/null
# if we did not crash, success
log_test 0 0 "IPv6 torture test"
}
+ipv6_res_grp_replace_loop()
+{
+ while :; do
+ $IP nexthop replace id 102 group 100/101 type resilient
+ done >/dev/null 2>&1
+}
+
+ipv6_res_torture()
+{
+ local pid1
+ local pid2
+ local pid3
+ local pid4
+ local pid5
+
+ echo
+ echo "IPv6 runtime resilient nexthop group torture"
+ echo "--------------------------------------------"
+
+ check_nexthop_res_support
+ if [ $? -eq $ksft_skip ]; then
+ return $ksft_skip
+ fi
+
+ if [ ! -x "$(command -v mausezahn)" ]; then
+ echo "SKIP: Could not run test; need mausezahn tool"
+ return
+ fi
+
+ run_cmd "$IP nexthop add id 100 via 2001:db8:91::2 dev veth1"
+ run_cmd "$IP nexthop add id 101 via 2001:db8:92::2 dev veth3"
+ run_cmd "$IP nexthop add id 102 group 100/101 type resilient buckets 512 idle_timer 0"
+ run_cmd "$IP route add 2001:db8:101::1 nhid 102"
+ run_cmd "$IP route add 2001:db8:101::2 nhid 102"
+
+ ipv6_del_add_loop1 &
+ pid1=$!
+ ipv6_res_grp_replace_loop &
+ pid2=$!
+ ip netns exec me ping -f 2001:db8:101::1 >/dev/null 2>&1 &
+ pid3=$!
+ ip netns exec me ping -f 2001:db8:101::2 >/dev/null 2>&1 &
+ pid4=$!
+ ip netns exec me mausezahn -6 veth1 \
+ -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 \
+ -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 &
+ pid5=$!
+
+ sleep 300
+ kill -9 $pid1 $pid2 $pid3 $pid4 $pid5
+ wait $pid1 $pid2 $pid3 $pid4 $pid5 2>/dev/null
+
+ # if we did not crash, success
+ log_test 0 0 "IPv6 resilient nexthop group torture test"
+}
ipv4_fcnal()
{
@@ -1038,6 +1244,70 @@ ipv4_grp_fcnal()
log_test $? 2 "Nexthop group can not have a blackhole and another nexthop"
}
+ipv4_res_grp_fcnal()
+{
+ local rc
+
+ echo
+ echo "IPv4 resilient groups functional"
+ echo "--------------------------------"
+
+ check_nexthop_res_support
+ if [ $? -eq $ksft_skip ]; then
+ return $ksft_skip
+ fi
+
+ #
+ # migration of nexthop buckets - equal weights
+ #
+ run_cmd "$IP nexthop add id 12 via 172.16.1.2 dev veth1"
+ run_cmd "$IP nexthop add id 13 via 172.16.1.3 dev veth1"
+ run_cmd "$IP nexthop add id 102 group 12/13 type resilient buckets 2 idle_timer 0"
+
+ run_cmd "$IP nexthop del id 13"
+ check_nexthop "id 102" \
+ "id 102 group 12 type resilient buckets 2 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+ log_test $? 0 "Nexthop group updated when entry is deleted"
+ check_nexthop_bucket "list id 102" \
+ "id 102 index 0 nhid 12 id 102 index 1 nhid 12"
+ log_test $? 0 "Nexthop buckets updated when entry is deleted"
+
+ run_cmd "$IP nexthop add id 13 via 172.16.1.3 dev veth1"
+ run_cmd "$IP nexthop replace id 102 group 12/13 type resilient buckets 2 idle_timer 0"
+ check_nexthop "id 102" \
+ "id 102 group 12/13 type resilient buckets 2 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+ log_test $? 0 "Nexthop group updated after replace"
+ check_nexthop_bucket "list id 102" \
+ "id 102 index 0 nhid 13 id 102 index 1 nhid 12"
+ log_test $? 0 "Nexthop buckets updated after replace"
+
+ $IP nexthop flush >/dev/null 2>&1
+
+ #
+ # migration of nexthop buckets - unequal weights
+ #
+ run_cmd "$IP nexthop add id 12 via 172.16.1.2 dev veth1"
+ run_cmd "$IP nexthop add id 13 via 172.16.1.3 dev veth1"
+ run_cmd "$IP nexthop add id 102 group 12,3/13,1 type resilient buckets 4 idle_timer 0"
+
+ run_cmd "$IP nexthop del id 13"
+ check_nexthop "id 102" \
+ "id 102 group 12,3 type resilient buckets 4 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+ log_test $? 0 "Nexthop group updated when entry is deleted - nECMP"
+ check_nexthop_bucket "list id 102" \
+ "id 102 index 0 nhid 12 id 102 index 1 nhid 12 id 102 index 2 nhid 12 id 102 index 3 nhid 12"
+ log_test $? 0 "Nexthop buckets updated when entry is deleted - nECMP"
+
+ run_cmd "$IP nexthop add id 13 via 172.16.1.3 dev veth1"
+ run_cmd "$IP nexthop replace id 102 group 12,3/13,1 type resilient buckets 4 idle_timer 0"
+ check_nexthop "id 102" \
+ "id 102 group 12,3/13 type resilient buckets 4 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+ log_test $? 0 "Nexthop group updated after replace - nECMP"
+ check_nexthop_bucket "list id 102" \
+ "id 102 index 0 nhid 13 id 102 index 1 nhid 12 id 102 index 2 nhid 12 id 102 index 3 nhid 12"
+ log_test $? 0 "Nexthop buckets updated after replace - nECMP"
+}
+
ipv4_withv6_fcnal()
{
local lladdr
@@ -1259,6 +1529,22 @@ ipv4_large_grp()
$IP nexthop flush >/dev/null 2>&1
}
+ipv4_large_res_grp()
+{
+ echo
+ echo "IPv4 large resilient group (128k buckets)"
+ echo "-----------------------------------------"
+
+ check_nexthop_res_support
+ if [ $? -eq $ksft_skip ]; then
+ return $ksft_skip
+ fi
+
+ check_large_res_grp 4 $((128 * 1024))
+
+ $IP nexthop flush >/dev/null 2>&1
+}
+
sysctl_nexthop_compat_mode_check()
{
local sysctlname="net.ipv4.nexthop_compat_mode"
@@ -1476,11 +1762,68 @@ ipv4_torture()
sleep 300
kill -9 $pid1 $pid2 $pid3 $pid4 $pid5
+ wait $pid1 $pid2 $pid3 $pid4 $pid5 2>/dev/null
# if we did not crash, success
log_test 0 0 "IPv4 torture test"
}
+ipv4_res_grp_replace_loop()
+{
+ while :; do
+ $IP nexthop replace id 102 group 100/101 type resilient
+ done >/dev/null 2>&1
+}
+
+ipv4_res_torture()
+{
+ local pid1
+ local pid2
+ local pid3
+ local pid4
+ local pid5
+
+ echo
+ echo "IPv4 runtime resilient nexthop group torture"
+ echo "--------------------------------------------"
+
+ check_nexthop_res_support
+ if [ $? -eq $ksft_skip ]; then
+ return $ksft_skip
+ fi
+
+ if [ ! -x "$(command -v mausezahn)" ]; then
+ echo "SKIP: Could not run test; need mausezahn tool"
+ return
+ fi
+
+ run_cmd "$IP nexthop add id 100 via 172.16.1.2 dev veth1"
+ run_cmd "$IP nexthop add id 101 via 172.16.2.2 dev veth3"
+ run_cmd "$IP nexthop add id 102 group 100/101 type resilient buckets 512 idle_timer 0"
+ run_cmd "$IP route add 172.16.101.1 nhid 102"
+ run_cmd "$IP route add 172.16.101.2 nhid 102"
+
+ ipv4_del_add_loop1 &
+ pid1=$!
+ ipv4_res_grp_replace_loop &
+ pid2=$!
+ ip netns exec me ping -f 172.16.101.1 >/dev/null 2>&1 &
+ pid3=$!
+ ip netns exec me ping -f 172.16.101.2 >/dev/null 2>&1 &
+ pid4=$!
+ ip netns exec me mausezahn veth1 \
+ -B 172.16.101.2 -A 172.16.1.1 -c 0 \
+ -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 &
+ pid5=$!
+
+ sleep 300
+ kill -9 $pid1 $pid2 $pid3 $pid4 $pid5
+ wait $pid1 $pid2 $pid3 $pid4 $pid5 2>/dev/null
+
+ # if we did not crash, success
+ log_test 0 0 "IPv4 resilient nexthop group torture test"
+}
+
basic()
{
echo
@@ -1524,6 +1867,14 @@ basic()
run_cmd "$IP nexthop replace id 2 blackhole dev veth1"
log_test $? 2 "Blackhole nexthop with other attributes"
+ # blackhole nexthop should not be affected by the state of the loopback
+ # device
+ run_cmd "$IP link set dev lo down"
+ check_nexthop "id 2" "id 2 blackhole"
+ log_test $? 0 "Blackhole nexthop with loopback device down"
+
+ run_cmd "$IP link set dev lo up"
+
#
# groups
#
@@ -1584,6 +1935,204 @@ basic()
$IP nexthop flush >/dev/null 2>&1
}
+check_nexthop_buckets_balance()
+{
+ local nharg=$1; shift
+ local ret
+
+ while (($# > 0)); do
+ local selector=$1; shift
+ local condition=$1; shift
+ local count
+
+ count=$($IP -j nexthop bucket ${nharg} ${selector} | jq length)
+ (( $count $condition ))
+ ret=$?
+ if ((ret != 0)); then
+ return $ret
+ fi
+ done
+
+ return 0
+}
+
+basic_res()
+{
+ echo
+ echo "Basic resilient nexthop group functional tests"
+ echo "----------------------------------------------"
+
+ check_nexthop_res_support
+ if [ $? -eq $ksft_skip ]; then
+ return $ksft_skip
+ fi
+
+ run_cmd "$IP nexthop add id 1 dev veth1"
+
+ #
+ # resilient nexthop group addition
+ #
+
+ run_cmd "$IP nexthop add id 101 group 1 type resilient buckets 8"
+ log_test $? 0 "Add a nexthop group with default parameters"
+
+ run_cmd "$IP nexthop get id 101"
+ check_nexthop "id 101" \
+ "id 101 group 1 type resilient buckets 8 idle_timer 120 unbalanced_timer 0 unbalanced_time 0"
+ log_test $? 0 "Get a nexthop group with default parameters"
+
+ run_cmd "$IP nexthop add id 102 group 1 type resilient
+ buckets 4 idle_timer 100 unbalanced_timer 5"
+ run_cmd "$IP nexthop get id 102"
+ check_nexthop "id 102" \
+ "id 102 group 1 type resilient buckets 4 idle_timer 100 unbalanced_timer 5 unbalanced_time 0"
+ log_test $? 0 "Get a nexthop group with non-default parameters"
+
+ run_cmd "$IP nexthop add id 103 group 1 type resilient buckets 0"
+ log_test $? 2 "Add a nexthop group with 0 buckets"
+
+ #
+ # resilient nexthop group replacement
+ #
+
+ run_cmd "$IP nexthop replace id 101 group 1 type resilient
+ buckets 8 idle_timer 240 unbalanced_timer 80"
+ log_test $? 0 "Replace nexthop group parameters"
+ check_nexthop "id 101" \
+ "id 101 group 1 type resilient buckets 8 idle_timer 240 unbalanced_timer 80 unbalanced_time 0"
+ log_test $? 0 "Get a nexthop group after replacing parameters"
+
+ run_cmd "$IP nexthop replace id 101 group 1 type resilient idle_timer 512"
+ log_test $? 0 "Replace idle timer"
+ check_nexthop "id 101" \
+ "id 101 group 1 type resilient buckets 8 idle_timer 512 unbalanced_timer 80 unbalanced_time 0"
+ log_test $? 0 "Get a nexthop group after replacing idle timer"
+
+ run_cmd "$IP nexthop replace id 101 group 1 type resilient unbalanced_timer 256"
+ log_test $? 0 "Replace unbalanced timer"
+ check_nexthop "id 101" \
+ "id 101 group 1 type resilient buckets 8 idle_timer 512 unbalanced_timer 256 unbalanced_time 0"
+ log_test $? 0 "Get a nexthop group after replacing unbalanced timer"
+
+ run_cmd "$IP nexthop replace id 101 group 1 type resilient"
+ log_test $? 0 "Replace with no parameters"
+ check_nexthop "id 101" \
+ "id 101 group 1 type resilient buckets 8 idle_timer 512 unbalanced_timer 256 unbalanced_time 0"
+ log_test $? 0 "Get a nexthop group after replacing no parameters"
+
+ run_cmd "$IP nexthop replace id 101 group 1"
+ log_test $? 2 "Replace nexthop group type - implicit"
+
+ run_cmd "$IP nexthop replace id 101 group 1 type mpath"
+ log_test $? 2 "Replace nexthop group type - explicit"
+
+ run_cmd "$IP nexthop replace id 101 group 1 type resilient buckets 1024"
+ log_test $? 2 "Replace number of nexthop buckets"
+
+ check_nexthop "id 101" \
+ "id 101 group 1 type resilient buckets 8 idle_timer 512 unbalanced_timer 256 unbalanced_time 0"
+ log_test $? 0 "Get a nexthop group after replacing with invalid parameters"
+
+ #
+ # resilient nexthop buckets dump
+ #
+
+ $IP nexthop flush >/dev/null 2>&1
+ run_cmd "$IP nexthop add id 1 dev veth1"
+ run_cmd "$IP nexthop add id 2 dev veth3"
+ run_cmd "$IP nexthop add id 101 group 1/2 type resilient buckets 4"
+ run_cmd "$IP nexthop add id 201 group 1/2"
+
+ check_nexthop_bucket "" \
+ "id 101 index 0 nhid 2 id 101 index 1 nhid 2 id 101 index 2 nhid 1 id 101 index 3 nhid 1"
+ log_test $? 0 "Dump all nexthop buckets"
+
+ check_nexthop_bucket "list id 101" \
+ "id 101 index 0 nhid 2 id 101 index 1 nhid 2 id 101 index 2 nhid 1 id 101 index 3 nhid 1"
+ log_test $? 0 "Dump all nexthop buckets in a group"
+
+ (( $($IP -j nexthop bucket list id 101 |
+ jq '[.[] | select(.bucket.idle_time > 0 and
+ .bucket.idle_time < 2)] | length') == 4 ))
+ log_test $? 0 "All nexthop buckets report a positive near-zero idle time"
+
+ check_nexthop_bucket "list dev veth1" \
+ "id 101 index 2 nhid 1 id 101 index 3 nhid 1"
+ log_test $? 0 "Dump all nexthop buckets with a specific nexthop device"
+
+ check_nexthop_bucket "list nhid 2" \
+ "id 101 index 0 nhid 2 id 101 index 1 nhid 2"
+ log_test $? 0 "Dump all nexthop buckets with a specific nexthop identifier"
+
+ run_cmd "$IP nexthop bucket list id 111"
+ log_test $? 2 "Dump all nexthop buckets in a non-existent group"
+
+ run_cmd "$IP nexthop bucket list id 201"
+ log_test $? 2 "Dump all nexthop buckets in a non-resilient group"
+
+ run_cmd "$IP nexthop bucket list dev bla"
+ log_test $? 255 "Dump all nexthop buckets using a non-existent device"
+
+ run_cmd "$IP nexthop bucket list groups"
+ log_test $? 255 "Dump all nexthop buckets with invalid 'groups' keyword"
+
+ run_cmd "$IP nexthop bucket list fdb"
+ log_test $? 255 "Dump all nexthop buckets with invalid 'fdb' keyword"
+
+ #
+ # resilient nexthop buckets get requests
+ #
+
+ check_nexthop_bucket "get id 101 index 0" "id 101 index 0 nhid 2"
+ log_test $? 0 "Get a valid nexthop bucket"
+
+ run_cmd "$IP nexthop bucket get id 101 index 999"
+ log_test $? 2 "Get a nexthop bucket with valid group, but invalid index"
+
+ run_cmd "$IP nexthop bucket get id 201 index 0"
+ log_test $? 2 "Get a nexthop bucket from a non-resilient group"
+
+ run_cmd "$IP nexthop bucket get id 999 index 0"
+ log_test $? 2 "Get a nexthop bucket from a non-existent group"
+
+ #
+ # tests for bucket migration
+ #
+
+ $IP nexthop flush >/dev/null 2>&1
+
+ run_cmd "$IP nexthop add id 1 dev veth1"
+ run_cmd "$IP nexthop add id 2 dev veth3"
+ run_cmd "$IP nexthop add id 101
+ group 1/2 type resilient buckets 10
+ idle_timer 1 unbalanced_timer 20"
+
+ check_nexthop_buckets_balance "list id 101" \
+ "nhid 1" "== 5" \
+ "nhid 2" "== 5"
+ log_test $? 0 "Initial bucket allocation"
+
+ run_cmd "$IP nexthop replace id 101
+ group 1,2/2,3 type resilient"
+ check_nexthop_buckets_balance "list id 101" \
+ "nhid 1" "== 4" \
+ "nhid 2" "== 6"
+ log_test $? 0 "Bucket allocation after replace"
+
+ # Check that increase in idle timer does not make buckets appear busy.
+ run_cmd "$IP nexthop replace id 101
+ group 1,2/2,3 type resilient
+ idle_timer 10"
+ run_cmd "$IP nexthop replace id 101
+ group 1/2 type resilient"
+ check_nexthop_buckets_balance "list id 101" \
+ "nhid 1" "== 5" \
+ "nhid 2" "== 5"
+ log_test $? 0 "Buckets migrated after idle timer change"
+
+ $IP nexthop flush >/dev/null 2>&1
+}
+
################################################################################
# usage
diff --git a/tools/testing/selftests/net/forwarding/dual_vxlan_bridge.sh b/tools/testing/selftests/net/forwarding/dual_vxlan_bridge.sh
new file mode 100755
index 000000000000..5148d97a5df8
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/dual_vxlan_bridge.sh
@@ -0,0 +1,366 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +--------------------+ +----------------------+
+# | H1 (vrf) | | H2 (vrf) |
+# | + h1.10 | | + h2.20 |
+# | | 192.0.2.1/28 | | | 192.0.2.2/28 |
+# | | | | | |
+# | + $h1 | | + $h2 |
+# | | | | | |
+# +----|---------------+ +--|-------------------+
+# | |
+# +----|--------------------------------------------------|--------------------+
+# | SW | | |
+# | +--|-------------------------------+ +----------------|------------------+ |
+# | | + $swp1 BR1 (802.1ad) | | BR2 (802.1d) + $swp2 | |
+# | | vid 100 pvid untagged | | | | |
+# | | | | + $swp2.20 | |
+# | | | | | |
+# | | + vx100 (vxlan) | | + vx200 (vxlan) | |
+# | | local 192.0.2.17 | | local 192.0.2.17 | |
+# | | remote 192.0.2.34 | | remote 192.0.2.50 | |
+# | | id 1000 dstport $VXPORT | | id 2000 dstport $VXPORT | |
+# | | vid 100 pvid untagged | | | |
+# | +--------------------------------- + +-----------------------------------+ |
+# | |
+# | 192.0.2.32/28 via 192.0.2.18 |
+# | 192.0.2.48/28 via 192.0.2.18 |
+# | |
+# | + $rp1 |
+# | | 192.0.2.17/28 |
+# +----|-----------------------------------------------------------------------+
+# |
+# +----|--------------------------------------------------------+
+# | | VRP2 (vrf) |
+# | + $rp2 |
+# | 192.0.2.18/28 |
+# | | (maybe) HW
+# =============================================================================
+# | | (likely) SW
+# | + v1 (veth) + v3 (veth) |
+# | | 192.0.2.33/28 | 192.0.2.49/28 |
+# +----|---------------------------------------|----------------+
+# | |
+# +----|------------------------------+ +----|------------------------------+
+# | + v2 (veth) NS1 (netns) | | + v4 (veth) NS2 (netns) |
+# | 192.0.2.34/28 | | 192.0.2.50/28 |
+# | | | |
+# | 192.0.2.16/28 via 192.0.2.33 | | 192.0.2.16/28 via 192.0.2.49 |
+# | 192.0.2.50/32 via 192.0.2.33 | | 192.0.2.34/32 via 192.0.2.49 |
+# | | | |
+# | +-------------------------------+ | | +-------------------------------+ |
+# | | BR3 (802.1ad) | | | | BR3 (802.1d) | |
+# | | + vx100 (vxlan) | | | | + vx200 (vxlan) | |
+# | | local 192.0.2.34 | | | | local 192.0.2.50 | |
+# | | remote 192.0.2.17 | | | | remote 192.0.2.17 | |
+# | | remote 192.0.2.50 | | | | remote 192.0.2.34 | |
+# | | id 1000 dstport $VXPORT | | | | id 2000 dstport $VXPORT | |
+# | | vid 100 pvid untagged | | | | | |
+# | | | | | | + w1.20 | |
+# | | | | | | | | |
+# | | + w1 (veth) | | | | + w1 (veth) | |
+# | | | vid 100 pvid untagged | | | | | | |
+# | +--|----------------------------+ | | +--|----------------------------+ |
+# | | | | | |
+# | +--|----------------------------+ | | +--|----------------------------+ |
+# | | | VW2 (vrf) | | | | | VW2 (vrf) | |
+# | | + w2 (veth) | | | | + w2 (veth) | |
+# | | | | | | | | | |
+# | | | | | | | | | |
+# | | + w2.10 | | | | + w2.20 | |
+# | | 192.0.2.3/28 | | | | 192.0.2.4/28 | |
+# | +-------------------------------+ | | +-------------------------------+ |
+# +-----------------------------------+ +-----------------------------------+
+
+: ${VXPORT:=4789}
+export VXPORT
+
+: ${ALL_TESTS:="
+ ping_ipv4
+ "}
+
+NUM_NETIFS=6
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1
+ tc qdisc add dev $h1 clsact
+ vlan_create $h1 10 v$h1 192.0.2.1/28
+}
+
+h1_destroy()
+{
+ vlan_destroy $h1 10
+ tc qdisc del dev $h1 clsact
+ simple_if_fini $h1
+}
+
+h2_create()
+{
+ simple_if_init $h2
+ tc qdisc add dev $h2 clsact
+ vlan_create $h2 20 v$h2 192.0.2.2/28
+}
+
+h2_destroy()
+{
+ vlan_destroy $h2 20
+ tc qdisc del dev $h2 clsact
+ simple_if_fini $h2
+}
+
+rp1_set_addr()
+{
+ ip address add dev $rp1 192.0.2.17/28
+
+ ip route add 192.0.2.32/28 nexthop via 192.0.2.18
+ ip route add 192.0.2.48/28 nexthop via 192.0.2.18
+}
+
+rp1_unset_addr()
+{
+ ip route del 192.0.2.48/28 nexthop via 192.0.2.18
+ ip route del 192.0.2.32/28 nexthop via 192.0.2.18
+
+ ip address del dev $rp1 192.0.2.17/28
+}
+
+switch_create()
+{
+ #### BR1 ####
+ ip link add name br1 type bridge vlan_filtering 1 \
+ vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0
+ # Make sure the bridge uses the MAC address of the local port and not
+ # that of the VxLAN's device.
+ ip link set dev br1 address $(mac_get $swp1)
+ ip link set dev br1 up
+
+ #### BR2 ####
+ ip link add name br2 type bridge vlan_filtering 0 mcast_snooping 0
+ # Make sure the bridge uses the MAC address of the local port and not
+ # that of the VxLAN's device.
+ ip link set dev br2 address $(mac_get $swp2)
+ ip link set dev br2 up
+
+ ip link set dev $rp1 up
+ rp1_set_addr
+
+ #### VX100 ####
+ ip link add name vx100 type vxlan id 1000 local 192.0.2.17 \
+ dstport "$VXPORT" nolearning noudpcsum tos inherit ttl 100
+ ip link set dev vx100 up
+
+ ip link set dev vx100 master br1
+ bridge vlan add vid 100 dev vx100 pvid untagged
+
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+ bridge vlan add vid 100 dev $swp1 pvid untagged
+
+ #### VX200 ####
+ ip link add name vx200 type vxlan id 2000 local 192.0.2.17 \
+ dstport "$VXPORT" nolearning noudpcsum tos inherit ttl 100
+ ip link set dev vx200 up
+
+ ip link set dev vx200 master br2
+
+ ip link set dev $swp2 up
+ ip link add name $swp2.20 link $swp2 type vlan id 20
+ ip link set dev $swp2.20 master br2
+ ip link set dev $swp2.20 up
+
+ bridge fdb append dev vx100 00:00:00:00:00:00 dst 192.0.2.34 self
+ bridge fdb append dev vx200 00:00:00:00:00:00 dst 192.0.2.50 self
+}
+
+switch_destroy()
+{
+ bridge fdb del dev vx200 00:00:00:00:00:00 dst 192.0.2.50 self
+ bridge fdb del dev vx100 00:00:00:00:00:00 dst 192.0.2.34 self
+
+ ip link set dev vx200 nomaster
+ ip link set dev vx200 down
+ ip link del dev vx200
+
+ ip link del dev $swp2.20
+ ip link set dev $swp2 down
+ ip link set dev $swp2 nomaster
+
+ bridge vlan del vid 100 dev $swp1
+ ip link set dev $swp1 down
+ ip link set dev $swp1 nomaster
+
+ ip link set dev vx100 nomaster
+ ip link set dev vx100 down
+ ip link del dev vx100
+
+ rp1_unset_addr
+ ip link set dev $rp1 down
+
+ ip link set dev br2 down
+ ip link del dev br2
+
+ ip link set dev br1 down
+ ip link del dev br1
+}
+
+vrp2_create()
+{
+ simple_if_init $rp2 192.0.2.18/28
+ __simple_if_init v1 v$rp2 192.0.2.33/28
+ __simple_if_init v3 v$rp2 192.0.2.49/28
+ tc qdisc add dev v1 clsact
+}
+
+vrp2_destroy()
+{
+ tc qdisc del dev v1 clsact
+ __simple_if_fini v3 192.0.2.49/28
+ __simple_if_fini v1 192.0.2.33/28
+ simple_if_fini $rp2 192.0.2.18/28
+}
+
+ns_init_common()
+{
+ local in_if=$1; shift
+ local in_addr=$1; shift
+ local other_in_addr=$1; shift
+ local vxlan_name=$1; shift
+ local vxlan_id=$1; shift
+ local vlan_id=$1; shift
+ local host_addr=$1; shift
+ local nh_addr=$1; shift
+
+ ip link set dev $in_if up
+ ip address add dev $in_if $in_addr/28
+ tc qdisc add dev $in_if clsact
+
+ ip link add name br3 type bridge vlan_filtering 0
+ ip link set dev br3 up
+
+ ip link add name w1 type veth peer name w2
+
+ ip link set dev w1 master br3
+ ip link set dev w1 up
+
+ ip link add name $vxlan_name type vxlan id $vxlan_id local $in_addr \
+ dstport "$VXPORT"
+ ip link set dev $vxlan_name up
+ bridge fdb append dev $vxlan_name 00:00:00:00:00:00 dst 192.0.2.17 self
+ bridge fdb append dev $vxlan_name 00:00:00:00:00:00 dst $other_in_addr self
+
+ ip link set dev $vxlan_name master br3
+ tc qdisc add dev $vxlan_name clsact
+
+ simple_if_init w2
+ vlan_create w2 $vlan_id vw2 $host_addr/28
+
+ ip route add 192.0.2.16/28 nexthop via $nh_addr
+ ip route add $other_in_addr/32 nexthop via $nh_addr
+}
+export -f ns_init_common
+
+ns1_create()
+{
+ ip netns add ns1
+ ip link set dev v2 netns ns1
+ in_ns ns1 \
+ ns_init_common v2 192.0.2.34 192.0.2.50 vx100 1000 10 192.0.2.3 \
+ 192.0.2.33
+
+ in_ns ns1 bridge vlan add vid 100 dev vx100 pvid untagged
+}
+
+ns1_destroy()
+{
+ ip netns exec ns1 ip link set dev v2 netns 1
+ ip netns del ns1
+}
+
+ns2_create()
+{
+ ip netns add ns2
+ ip link set dev v4 netns ns2
+ in_ns ns2 \
+ ns_init_common v4 192.0.2.50 192.0.2.34 vx200 2000 20 192.0.2.4 \
+ 192.0.2.49
+
+ in_ns ns2 ip link add name w1.20 link w1 type vlan id 20
+ in_ns ns2 ip link set dev w1.20 master br3
+ in_ns ns2 ip link set dev w1.20 up
+}
+
+ns2_destroy()
+{
+ ip netns exec ns2 ip link set dev v4 netns 1
+ ip netns del ns2
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ rp1=${NETIFS[p5]}
+ rp2=${NETIFS[p6]}
+
+ vrf_prepare
+ forwarding_enable
+
+ h1_create
+ h2_create
+ switch_create
+
+ ip link add name v1 type veth peer name v2
+ ip link add name v3 type veth peer name v4
+ vrp2_create
+ ns1_create
+ ns2_create
+
+ r1_mac=$(in_ns ns1 mac_get w2)
+ r2_mac=$(in_ns ns2 mac_get w2)
+ h2_mac=$(mac_get $h2)
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ns2_destroy
+ ns1_destroy
+ vrp2_destroy
+ ip link del dev v3
+ ip link del dev v1
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+
+ forwarding_restore
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.3 ": local->remote 1 through VxLAN with an 802.1ad bridge"
+ ping_test $h2 192.0.2.4 ": local->remote 2 through VxLAN with an 802.1d bridge"
+}
+
+test_all()
+{
+ echo "Running tests with UDP port $VXPORT"
+ tests_run
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+test_all
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/fib_offload_lib.sh b/tools/testing/selftests/net/forwarding/fib_offload_lib.sh
index 66496659bea7..e134a5f529c9 100644
--- a/tools/testing/selftests/net/forwarding/fib_offload_lib.sh
+++ b/tools/testing/selftests/net/forwarding/fib_offload_lib.sh
@@ -224,7 +224,7 @@ fib_ipv4_plen_test()
ip -n $ns link set dev dummy1 up
# Add two routes with the same key and different prefix length and
- # make sure both are in hardware. It can be verfied that both are
+ # make sure both are in hardware. It can be verified that both are
# sharing the same leaf by checking the /proc/net/fib_trie
ip -n $ns route add 192.0.2.0/24 dev dummy1
ip -n $ns route add 192.0.2.0/25 dev dummy1
diff --git a/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh b/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh
new file mode 100755
index 000000000000..088b65e64d66
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh
@@ -0,0 +1,361 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test traffic distribution when a wECMP route forwards traffic to two GRE
+# tunnels.
+#
+# +-------------------------+
+# | H1 |
+# | $h1 + |
+# | 192.0.2.1/28 | |
+# | 2001:db8:1::1/64 | |
+# +-------------------|-----+
+# |
+# +-------------------|------------------------+
+# | SW1 | |
+# | $ol1 + |
+# | 192.0.2.2/28 |
+# | 2001:db8:1::2/64 |
+# | |
+# | + g1a (gre) + g1b (gre) |
+# | loc=192.0.2.65 loc=192.0.2.81 |
+# | rem=192.0.2.66 --. rem=192.0.2.82 --. |
+# | tos=inherit | tos=inherit | |
+# | .------------------' | |
+# | | .------------------' |
+# | v v |
+# | + $ul1.111 (vlan) + $ul1.222 (vlan) |
+# | | 192.0.2.129/28 | 192.0.2.145/28 |
+# | \ / |
+# | \________________/ |
+# | | |
+# | + $ul1 |
+# +------------|-------------------------------+
+# |
+# +------------|-------------------------------+
+# | SW2 + $ul2 |
+# | _______|________ |
+# | / \ |
+# | / \ |
+# | + $ul2.111 (vlan) + $ul2.222 (vlan) |
+# | ^ 192.0.2.130/28 ^ 192.0.2.146/28 |
+# | | | |
+# | | '------------------. |
+# | '------------------. | |
+# | + g2a (gre) | + g2b (gre) | |
+# | loc=192.0.2.66 | loc=192.0.2.82 | |
+# | rem=192.0.2.65 --' rem=192.0.2.81 --' |
+# | tos=inherit tos=inherit |
+# | |
+# | $ol2 + |
+# | 192.0.2.17/28 | |
+# | 2001:db8:2::1/64 | |
+# +-------------------|------------------------+
+# |
+# +-------------------|-----+
+# | H2 | |
+# | $h2 + |
+# | 192.0.2.18/28 |
+# | 2001:db8:2::2/64 |
+# +-------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+ multipath_ipv4
+ multipath_ipv6
+ multipath_ipv6_l4
+"
+
+NUM_NETIFS=6
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
+ ip route add vrf v$h1 192.0.2.16/28 via 192.0.2.2
+ ip route add vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+ ip route del vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2
+ ip route del vrf v$h1 192.0.2.16/28 via 192.0.2.2
+ simple_if_fini $h1 192.0.2.1/28
+}
+
+sw1_create()
+{
+ simple_if_init $ol1 192.0.2.2/28 2001:db8:1::2/64
+ __simple_if_init $ul1 v$ol1
+ vlan_create $ul1 111 v$ol1 192.0.2.129/28
+ vlan_create $ul1 222 v$ol1 192.0.2.145/28
+
+ tunnel_create g1a gre 192.0.2.65 192.0.2.66 tos inherit dev v$ol1
+ __simple_if_init g1a v$ol1 192.0.2.65/32
+ ip route add vrf v$ol1 192.0.2.66/32 via 192.0.2.130
+
+ tunnel_create g1b gre 192.0.2.81 192.0.2.82 tos inherit dev v$ol1
+ __simple_if_init g1b v$ol1 192.0.2.81/32
+ ip route add vrf v$ol1 192.0.2.82/32 via 192.0.2.146
+
+ ip -6 nexthop add id 101 dev g1a
+ ip -6 nexthop add id 102 dev g1b
+ ip nexthop add id 103 group 101/102 type resilient buckets 512 \
+ idle_timer 0
+
+ ip route add vrf v$ol1 192.0.2.16/28 nhid 103
+ ip route add vrf v$ol1 2001:db8:2::/64 nhid 103
+}
+
+sw1_destroy()
+{
+ ip route del vrf v$ol1 2001:db8:2::/64
+ ip route del vrf v$ol1 192.0.2.16/28
+
+ ip nexthop del id 103
+ ip -6 nexthop del id 102
+ ip -6 nexthop del id 101
+
+ ip route del vrf v$ol1 192.0.2.82/32 via 192.0.2.146
+ __simple_if_fini g1b 192.0.2.81/32
+ tunnel_destroy g1b
+
+ ip route del vrf v$ol1 192.0.2.66/32 via 192.0.2.130
+ __simple_if_fini g1a 192.0.2.65/32
+ tunnel_destroy g1a
+
+ vlan_destroy $ul1 222
+ vlan_destroy $ul1 111
+ __simple_if_fini $ul1
+ simple_if_fini $ol1 192.0.2.2/28 2001:db8:1::2/64
+}
+
+sw2_create()
+{
+ simple_if_init $ol2 192.0.2.17/28 2001:db8:2::1/64
+ __simple_if_init $ul2 v$ol2
+ vlan_create $ul2 111 v$ol2 192.0.2.130/28
+ vlan_create $ul2 222 v$ol2 192.0.2.146/28
+
+ tunnel_create g2a gre 192.0.2.66 192.0.2.65 tos inherit dev v$ol2
+ __simple_if_init g2a v$ol2 192.0.2.66/32
+ ip route add vrf v$ol2 192.0.2.65/32 via 192.0.2.129
+
+ tunnel_create g2b gre 192.0.2.82 192.0.2.81 tos inherit dev v$ol2
+ __simple_if_init g2b v$ol2 192.0.2.82/32
+ ip route add vrf v$ol2 192.0.2.81/32 via 192.0.2.145
+
+ ip -6 nexthop add id 201 dev g2a
+ ip -6 nexthop add id 202 dev g2b
+ ip nexthop add id 203 group 201/202 type resilient buckets 512 \
+ idle_timer 0
+
+ ip route add vrf v$ol2 192.0.2.0/28 nhid 203
+ ip route add vrf v$ol2 2001:db8:1::/64 nhid 203
+
+ tc qdisc add dev $ul2 clsact
+ tc filter add dev $ul2 ingress pref 111 prot 802.1Q \
+ flower vlan_id 111 action pass
+ tc filter add dev $ul2 ingress pref 222 prot 802.1Q \
+ flower vlan_id 222 action pass
+}
+
+sw2_destroy()
+{
+ tc qdisc del dev $ul2 clsact
+
+ ip route del vrf v$ol2 2001:db8:1::/64
+ ip route del vrf v$ol2 192.0.2.0/28
+
+ ip nexthop del id 203
+ ip -6 nexthop del id 202
+ ip -6 nexthop del id 201
+
+ ip route del vrf v$ol2 192.0.2.81/32 via 192.0.2.145
+ __simple_if_fini g2b 192.0.2.82/32
+ tunnel_destroy g2b
+
+ ip route del vrf v$ol2 192.0.2.65/32 via 192.0.2.129
+ __simple_if_fini g2a 192.0.2.66/32
+ tunnel_destroy g2a
+
+ vlan_destroy $ul2 222
+ vlan_destroy $ul2 111
+ __simple_if_fini $ul2
+ simple_if_fini $ol2 192.0.2.17/28 2001:db8:2::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.18/28 2001:db8:2::2/64
+ ip route add vrf v$h2 192.0.2.0/28 via 192.0.2.17
+ ip route add vrf v$h2 2001:db8:1::/64 via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+ ip route del vrf v$h2 2001:db8:1::/64 via 2001:db8:2::1
+ ip route del vrf v$h2 192.0.2.0/28 via 192.0.2.17
+ simple_if_fini $h2 192.0.2.18/28 2001:db8:2::2/64
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ ol1=${NETIFS[p2]}
+
+ ul1=${NETIFS[p3]}
+ ul2=${NETIFS[p4]}
+
+ ol2=${NETIFS[p5]}
+ h2=${NETIFS[p6]}
+
+ vrf_prepare
+ h1_create
+ sw1_create
+ sw2_create
+ h2_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ h2_destroy
+ sw2_destroy
+ sw1_destroy
+ h1_destroy
+ vrf_cleanup
+}
+
+multipath4_test()
+{
+ local what=$1; shift
+ local weight1=$1; shift
+ local weight2=$1; shift
+
+ sysctl_set net.ipv4.fib_multipath_hash_policy 1
+ ip nexthop replace id 103 group 101,$weight1/102,$weight2 \
+ type resilient
+
+ local t0_111=$(tc_rule_stats_get $ul2 111 ingress)
+ local t0_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+ ip vrf exec v$h1 \
+ $MZ $h1 -q -p 64 -A 192.0.2.1 -B 192.0.2.18 \
+ -d 1msec -t udp "sp=1024,dp=0-32768"
+
+ local t1_111=$(tc_rule_stats_get $ul2 111 ingress)
+ local t1_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+ local d111=$((t1_111 - t0_111))
+ local d222=$((t1_222 - t0_222))
+ multipath_eval "$what" $weight1 $weight2 $d111 $d222
+
+ ip nexthop replace id 103 group 101/102 type resilient
+ sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+multipath6_test()
+{
+ local what=$1; shift
+ local weight1=$1; shift
+ local weight2=$1; shift
+
+ sysctl_set net.ipv6.fib_multipath_hash_policy 0
+ ip nexthop replace id 103 group 101,$weight1/102,$weight2 \
+ type resilient
+
+ local t0_111=$(tc_rule_stats_get $ul2 111 ingress)
+ local t0_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+ # Generate 16384 echo requests, each with a random flow label.
+ for ((i=0; i < 16384; ++i)); do
+ ip vrf exec v$h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q &> /dev/null
+ done
+
+ local t1_111=$(tc_rule_stats_get $ul2 111 ingress)
+ local t1_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+ local d111=$((t1_111 - t0_111))
+ local d222=$((t1_222 - t0_222))
+ multipath_eval "$what" $weight1 $weight2 $d111 $d222
+
+ ip nexthop replace id 103 group 101/102 type resilient
+ sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
+
+multipath6_l4_test()
+{
+ local what=$1; shift
+ local weight1=$1; shift
+ local weight2=$1; shift
+
+ sysctl_set net.ipv6.fib_multipath_hash_policy 1
+ ip nexthop replace id 103 group 101,$weight1/102,$weight2 \
+ type resilient
+
+ local t0_111=$(tc_rule_stats_get $ul2 111 ingress)
+ local t0_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+ ip vrf exec v$h1 \
+ $MZ $h1 -6 -q -p 64 -A 2001:db8:1::1 -B 2001:db8:2::2 \
+ -d 1msec -t udp "sp=1024,dp=0-32768"
+
+ local t1_111=$(tc_rule_stats_get $ul2 111 ingress)
+ local t1_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+ local d111=$((t1_111 - t0_111))
+ local d222=$((t1_222 - t0_222))
+ multipath_eval "$what" $weight1 $weight2 $d111 $d222
+
+ ip nexthop replace id 103 group 101/102 type resilient
+ sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.18
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:2::2
+}
+
+multipath_ipv4()
+{
+ log_info "Running IPv4 multipath tests"
+ multipath4_test "ECMP" 1 1
+ multipath4_test "Weighted MP 2:1" 2 1
+ multipath4_test "Weighted MP 11:45" 11 45
+}
+
+multipath_ipv6()
+{
+ log_info "Running IPv6 multipath tests"
+ multipath6_test "ECMP" 1 1
+ multipath6_test "Weighted MP 2:1" 2 1
+ multipath6_test "Weighted MP 11:45" 11 45
+}
+
+multipath_ipv6_l4()
+{
+ log_info "Running IPv6 L4 hash multipath tests"
+ multipath6_l4_test "ECMP" 1 1
+ multipath6_l4_test "Weighted MP 2:1" 2 1
+ multipath6_l4_test "Weighted MP 11:45" 11 45
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index be71012b8fc5..42e28c983d41 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -353,6 +353,11 @@ wait_for_offload()
"$@" | grep -q offload
}
+wait_for_trap()
+{
+ "$@" | grep -q trap
+}
+
until_counter_is()
{
local expr=$1; shift
@@ -767,6 +772,15 @@ rate()
echo $((8 * (t1 - t0) / interval))
}
+packets_rate()
+{
+ local t0=$1; shift
+ local t1=$1; shift
+ local interval=$1; shift
+
+ echo $(((t1 - t0) / interval))
+}
+
mac_get()
{
local if_name=$1
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh
index 197e769c2ed1..f8cda822c1ce 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh
@@ -86,11 +86,20 @@ test_ip6gretap()
test_gretap_stp()
{
+ # Sometimes after mirror installation, the neighbor's state is not valid.
+ # The reason is that there is no SW datapath activity related to the
+ # neighbor for the remote GRE address. Therefore whether the corresponding
+ # neighbor will be valid is a matter of luck, and the test is thus racy.
+ # Set the neighbor's state to permanent, so it would be always valid.
+ ip neigh replace 192.0.2.130 lladdr $(mac_get $h3) \
+ nud permanent dev br2
full_test_span_gre_stp gt4 $swp3.555 "mirror to gretap"
}
test_ip6gretap_stp()
{
+ ip neigh replace 2001:db8:2::2 lladdr $(mac_get $h3) \
+ nud permanent dev br2
full_test_span_gre_stp gt6 $swp3.555 "mirror to ip6gretap"
}
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
new file mode 100755
index 000000000000..4898dd4118f1
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
@@ -0,0 +1,400 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+ multipath_test
+"
+NUM_NETIFS=8
+source lib.sh
+
+h1_create()
+{
+ vrf_create "vrf-h1"
+ ip link set dev $h1 master vrf-h1
+
+ ip link set dev vrf-h1 up
+ ip link set dev $h1 up
+
+ ip address add 192.0.2.2/24 dev $h1
+ ip address add 2001:db8:1::2/64 dev $h1
+
+ ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+ ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1
+}
+
+h1_destroy()
+{
+ ip route del 2001:db8:2::/64 vrf vrf-h1
+ ip route del 198.51.100.0/24 vrf vrf-h1
+
+ ip address del 2001:db8:1::2/64 dev $h1
+ ip address del 192.0.2.2/24 dev $h1
+
+ ip link set dev $h1 down
+ vrf_destroy "vrf-h1"
+}
+
+h2_create()
+{
+ vrf_create "vrf-h2"
+ ip link set dev $h2 master vrf-h2
+
+ ip link set dev vrf-h2 up
+ ip link set dev $h2 up
+
+ ip address add 198.51.100.2/24 dev $h2
+ ip address add 2001:db8:2::2/64 dev $h2
+
+ ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+ ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+ ip route del 2001:db8:1::/64 vrf vrf-h2
+ ip route del 192.0.2.0/24 vrf vrf-h2
+
+ ip address del 2001:db8:2::2/64 dev $h2
+ ip address del 198.51.100.2/24 dev $h2
+
+ ip link set dev $h2 down
+ vrf_destroy "vrf-h2"
+}
+
+router1_create()
+{
+ vrf_create "vrf-r1"
+ ip link set dev $rp11 master vrf-r1
+ ip link set dev $rp12 master vrf-r1
+ ip link set dev $rp13 master vrf-r1
+
+ ip link set dev vrf-r1 up
+ ip link set dev $rp11 up
+ ip link set dev $rp12 up
+ ip link set dev $rp13 up
+
+ ip address add 192.0.2.1/24 dev $rp11
+ ip address add 2001:db8:1::1/64 dev $rp11
+
+ ip address add 169.254.2.12/24 dev $rp12
+ ip address add fe80:2::12/64 dev $rp12
+
+ ip address add 169.254.3.13/24 dev $rp13
+ ip address add fe80:3::13/64 dev $rp13
+}
+
+router1_destroy()
+{
+ ip route del 2001:db8:2::/64 vrf vrf-r1
+ ip route del 198.51.100.0/24 vrf vrf-r1
+
+ ip address del fe80:3::13/64 dev $rp13
+ ip address del 169.254.3.13/24 dev $rp13
+
+ ip address del fe80:2::12/64 dev $rp12
+ ip address del 169.254.2.12/24 dev $rp12
+
+ ip address del 2001:db8:1::1/64 dev $rp11
+ ip address del 192.0.2.1/24 dev $rp11
+
+ ip nexthop del id 103
+ ip nexthop del id 101
+ ip nexthop del id 102
+ ip nexthop del id 106
+ ip nexthop del id 104
+ ip nexthop del id 105
+
+ ip link set dev $rp13 down
+ ip link set dev $rp12 down
+ ip link set dev $rp11 down
+
+ vrf_destroy "vrf-r1"
+}
+
+router2_create()
+{
+ vrf_create "vrf-r2"
+ ip link set dev $rp21 master vrf-r2
+ ip link set dev $rp22 master vrf-r2
+ ip link set dev $rp23 master vrf-r2
+
+ ip link set dev vrf-r2 up
+ ip link set dev $rp21 up
+ ip link set dev $rp22 up
+ ip link set dev $rp23 up
+
+ ip address add 198.51.100.1/24 dev $rp21
+ ip address add 2001:db8:2::1/64 dev $rp21
+
+ ip address add 169.254.2.22/24 dev $rp22
+ ip address add fe80:2::22/64 dev $rp22
+
+ ip address add 169.254.3.23/24 dev $rp23
+ ip address add fe80:3::23/64 dev $rp23
+}
+
+router2_destroy()
+{
+ ip route del 2001:db8:1::/64 vrf vrf-r2
+ ip route del 192.0.2.0/24 vrf vrf-r2
+
+ ip address del fe80:3::23/64 dev $rp23
+ ip address del 169.254.3.23/24 dev $rp23
+
+ ip address del fe80:2::22/64 dev $rp22
+ ip address del 169.254.2.22/24 dev $rp22
+
+ ip address del 2001:db8:2::1/64 dev $rp21
+ ip address del 198.51.100.1/24 dev $rp21
+
+ ip nexthop del id 201
+ ip nexthop del id 202
+ ip nexthop del id 204
+ ip nexthop del id 205
+
+ ip link set dev $rp23 down
+ ip link set dev $rp22 down
+ ip link set dev $rp21 down
+
+ vrf_destroy "vrf-r2"
+}
+
+routing_nh_obj()
+{
+ ip nexthop add id 101 via 169.254.2.22 dev $rp12
+ ip nexthop add id 102 via 169.254.3.23 dev $rp13
+ ip nexthop add id 103 group 101/102 type resilient buckets 512 \
+ idle_timer 0
+ ip route add 198.51.100.0/24 vrf vrf-r1 nhid 103
+
+ ip nexthop add id 104 via fe80:2::22 dev $rp12
+ ip nexthop add id 105 via fe80:3::23 dev $rp13
+ ip nexthop add id 106 group 104/105 type resilient buckets 512 \
+ idle_timer 0
+ ip route add 2001:db8:2::/64 vrf vrf-r1 nhid 106
+
+ ip nexthop add id 201 via 169.254.2.12 dev $rp22
+ ip nexthop add id 202 via 169.254.3.13 dev $rp23
+ ip nexthop add id 203 group 201/202 type resilient buckets 512 \
+ idle_timer 0
+ ip route add 192.0.2.0/24 vrf vrf-r2 nhid 203
+
+ ip nexthop add id 204 via fe80:2::12 dev $rp22
+ ip nexthop add id 205 via fe80:3::13 dev $rp23
+ ip nexthop add id 206 group 204/205 type resilient buckets 512 \
+ idle_timer 0
+ ip route add 2001:db8:1::/64 vrf vrf-r2 nhid 206
+}
+
+multipath4_test()
+{
+ local desc="$1"
+ local weight_rp12=$2
+ local weight_rp13=$3
+ local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+ local packets_rp12 packets_rp13
+
+ # Transmit multiple flows from h1 to h2 and make sure they are
+ # distributed between both multipath links (rp12 and rp13)
+ # according to the provided weights.
+ sysctl_set net.ipv4.fib_multipath_hash_policy 1
+
+ t0_rp12=$(link_stats_tx_packets_get $rp12)
+ t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+ ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \
+ -d 1msec -t udp "sp=1024,dp=0-32768"
+
+ t1_rp12=$(link_stats_tx_packets_get $rp12)
+ t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+ let "packets_rp12 = $t1_rp12 - $t0_rp12"
+ let "packets_rp13 = $t1_rp13 - $t0_rp13"
+ multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+ # Restore settings.
+ sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+multipath6_l4_test()
+{
+ local desc="$1"
+ local weight_rp12=$2
+ local weight_rp13=$3
+ local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+ local packets_rp12 packets_rp13
+
+ # Transmit multiple flows from h1 to h2 and make sure they are
+ # distributed between both multipath links (rp12 and rp13)
+ # according to the provided weights.
+ sysctl_set net.ipv6.fib_multipath_hash_policy 1
+
+ t0_rp12=$(link_stats_tx_packets_get $rp12)
+ t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+ $MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
+ -d 1msec -t udp "sp=1024,dp=0-32768"
+
+ t1_rp12=$(link_stats_tx_packets_get $rp12)
+ t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+ let "packets_rp12 = $t1_rp12 - $t0_rp12"
+ let "packets_rp13 = $t1_rp13 - $t0_rp13"
+ multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+ sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
+
+multipath_test()
+{
+ # Without an idle timer, weight replacement should happen immediately.
+ log_info "Running multipath tests without an idle timer"
+ ip nexthop replace id 103 group 101/102 type resilient idle_timer 0
+ ip nexthop replace id 106 group 104/105 type resilient idle_timer 0
+
+ log_info "Running IPv4 multipath tests"
+ ip nexthop replace id 103 group 101,1/102,1 type resilient
+ multipath4_test "ECMP" 1 1
+ ip nexthop replace id 103 group 101,2/102,1 type resilient
+ multipath4_test "Weighted MP 2:1" 2 1
+ ip nexthop replace id 103 group 101,11/102,45 type resilient
+ multipath4_test "Weighted MP 11:45" 11 45
+
+ ip nexthop replace id 103 group 101,1/102,1 type resilient
+
+ log_info "Running IPv6 L4 hash multipath tests"
+ ip nexthop replace id 106 group 104,1/105,1 type resilient
+ multipath6_l4_test "ECMP" 1 1
+ ip nexthop replace id 106 group 104,2/105,1 type resilient
+ multipath6_l4_test "Weighted MP 2:1" 2 1
+ ip nexthop replace id 106 group 104,11/105,45 type resilient
+ multipath6_l4_test "Weighted MP 11:45" 11 45
+
+ ip nexthop replace id 106 group 104,1/105,1 type resilient
+
+ # With an idle timer, weight replacement should not happen, so the
+ # expected ratio should always be the initial one (1:1).
+ log_info "Running multipath tests with an idle timer of 120 seconds"
+ ip nexthop replace id 103 group 101/102 type resilient idle_timer 120
+ ip nexthop replace id 106 group 104/105 type resilient idle_timer 120
+
+ log_info "Running IPv4 multipath tests"
+ ip nexthop replace id 103 group 101,1/102,1 type resilient
+ multipath4_test "ECMP" 1 1
+ ip nexthop replace id 103 group 101,2/102,1 type resilient
+ multipath4_test "Weighted MP 2:1" 1 1
+ ip nexthop replace id 103 group 101,11/102,45 type resilient
+ multipath4_test "Weighted MP 11:45" 1 1
+
+ ip nexthop replace id 103 group 101,1/102,1 type resilient
+
+ log_info "Running IPv6 L4 hash multipath tests"
+ ip nexthop replace id 106 group 104,1/105,1 type resilient
+ multipath6_l4_test "ECMP" 1 1
+ ip nexthop replace id 106 group 104,2/105,1 type resilient
+ multipath6_l4_test "Weighted MP 2:1" 1 1
+ ip nexthop replace id 106 group 104,11/105,45 type resilient
+ multipath6_l4_test "Weighted MP 11:45" 1 1
+
+ ip nexthop replace id 106 group 104,1/105,1 type resilient
+
+ # With a short idle timer and enough idle time, weight replacement
+ # should happen.
+ log_info "Running multipath tests with an idle timer of 5 seconds"
+ ip nexthop replace id 103 group 101/102 type resilient idle_timer 5
+ ip nexthop replace id 106 group 104/105 type resilient idle_timer 5
+
+ log_info "Running IPv4 multipath tests"
+ sleep 10
+ ip nexthop replace id 103 group 101,1/102,1 type resilient
+ multipath4_test "ECMP" 1 1
+ sleep 10
+ ip nexthop replace id 103 group 101,2/102,1 type resilient
+ multipath4_test "Weighted MP 2:1" 2 1
+ sleep 10
+ ip nexthop replace id 103 group 101,11/102,45 type resilient
+ multipath4_test "Weighted MP 11:45" 11 45
+
+ ip nexthop replace id 103 group 101,1/102,1 type resilient
+
+ log_info "Running IPv6 L4 hash multipath tests"
+ sleep 10
+ ip nexthop replace id 106 group 104,1/105,1 type resilient
+ multipath6_l4_test "ECMP" 1 1
+ sleep 10
+ ip nexthop replace id 106 group 104,2/105,1 type resilient
+ multipath6_l4_test "Weighted MP 2:1" 2 1
+ sleep 10
+ ip nexthop replace id 106 group 104,11/105,45 type resilient
+ multipath6_l4_test "Weighted MP 11:45" 11 45
+
+ ip nexthop replace id 106 group 104,1/105,1 type resilient
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ rp11=${NETIFS[p2]}
+
+ rp12=${NETIFS[p3]}
+ rp22=${NETIFS[p4]}
+
+ rp13=${NETIFS[p5]}
+ rp23=${NETIFS[p6]}
+
+ rp21=${NETIFS[p7]}
+ h2=${NETIFS[p8]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ router1_create
+ router2_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ router2_destroy
+ router1_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 198.51.100.2
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:2::2
+}
+
+ip nexthop ls >/dev/null 2>&1
+if [ $? -ne 0 ]; then
+ echo "Nexthop objects not supported; skipping tests"
+ exit 0
+fi
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+routing_nh_obj
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_police.sh b/tools/testing/selftests/net/forwarding/tc_police.sh
index 160f9cccdfb7..4f9f17cb45d6 100755
--- a/tools/testing/selftests/net/forwarding/tc_police.sh
+++ b/tools/testing/selftests/net/forwarding/tc_police.sh
@@ -35,6 +35,8 @@ ALL_TESTS="
police_shared_test
police_rx_mirror_test
police_tx_mirror_test
+ police_pps_rx_test
+ police_pps_tx_test
"
NUM_NETIFS=6
source tc_common.sh
@@ -290,6 +292,60 @@ police_tx_mirror_test()
police_mirror_common_test $rp2 egress "police tx and mirror"
}
+police_pps_common_test()
+{
+ local test_name=$1; shift
+
+ RET=0
+
+ # Rule to measure bandwidth on ingress of $h2
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
+ action drop
+
+ mausezahn $h1 -a own -b $(mac_get $rp1) -A 192.0.2.1 -B 198.51.100.1 \
+ -t udp sp=12345,dp=54321 -p 1000 -c 0 -q &
+
+ local t0=$(tc_rule_stats_get $h2 1 ingress .packets)
+ sleep 10
+ local t1=$(tc_rule_stats_get $h2 1 ingress .packets)
+
+ local er=$((2000))
+ local nr=$(packets_rate $t0 $t1 10)
+ local nr_pct=$((100 * (nr - er) / er))
+ ((-10 <= nr_pct && nr_pct <= 10))
+ check_err $? "Expected rate $(humanize $er), got $(humanize $nr), which is $nr_pct% off. Required accuracy is +-10%."
+
+ log_test "$test_name"
+
+ { kill %% && wait %%; } 2>/dev/null
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+}
+
+police_pps_rx_test()
+{
+ # Rule to police traffic destined to $h2 on ingress of $rp1
+ tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
+ dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
+ action police pkts_rate 2000 pkts_burst 400 conform-exceed drop/ok
+
+ police_pps_common_test "police pps on rx"
+
+ tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower
+}
+
+police_pps_tx_test()
+{
+ # Rule to police traffic destined to $h2 on egress of $rp2
+ tc filter add dev $rp2 egress protocol ip pref 1 handle 101 flower \
+ dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
+ action police pkts_rate 2000 pkts_burst 400 conform-exceed drop/ok
+
+ police_pps_common_test "police pps on tx"
+
+ tc filter del dev $rp2 egress protocol ip pref 1 handle 101 flower
+}
+
setup_prepare()
{
h1=${NETIFS[p1]}
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
index ce6bea9675c0..0ccb1dda099a 100755
--- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
@@ -658,7 +658,7 @@ test_ecn_decap()
# In accordance with INET_ECN_decapsulate()
__test_ecn_decap 00 00 0x00
__test_ecn_decap 01 01 0x01
- __test_ecn_decap 02 01 0x02
+ __test_ecn_decap 02 01 0x01
__test_ecn_decap 01 03 0x03
__test_ecn_decap 02 03 0x03
test_ecn_decap_error
diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c
index 17ced7d6ce25..f23438d512c5 100644
--- a/tools/testing/selftests/net/ipsec.c
+++ b/tools/testing/selftests/net/ipsec.c
@@ -1785,7 +1785,7 @@ static void grand_child_serv(unsigned int nr, int cmd_fd, void *buf,
break;
default:
printk("got unknown msg type %d", msg->type);
- };
+ }
}
static int grand_child_f(unsigned int nr, int cmd_fd, void *buf)
diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh
index 39edce4f541c..2674ba20d524 100755
--- a/tools/testing/selftests/net/mptcp/diag.sh
+++ b/tools/testing/selftests/net/mptcp/diag.sh
@@ -5,8 +5,9 @@ rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
ns="ns1-$rndh"
ksft_skip=4
test_cnt=1
+timeout_poll=100
+timeout_test=$((timeout_poll * 2 + 1))
ret=0
-pids=()
flush_pids()
{
@@ -14,18 +15,14 @@ flush_pids()
# give it some time
sleep 1.1
- for pid in ${pids[@]}; do
- [ -d /proc/$pid ] && kill -SIGUSR1 $pid >/dev/null 2>&1
- done
- pids=()
+ ip netns pids "${ns}" | xargs --no-run-if-empty kill -SIGUSR1 &>/dev/null
}
cleanup()
{
+ ip netns pids "${ns}" | xargs --no-run-if-empty kill -SIGKILL &>/dev/null
+
ip netns del $ns
- for pid in ${pids[@]}; do
- [ -d /proc/$pid ] && kill -9 $pid >/dev/null 2>&1
- done
}
ip -Version > /dev/null 2>&1
@@ -79,39 +76,57 @@ trap cleanup EXIT
ip netns add $ns
ip -n $ns link set dev lo up
-echo "a" | ip netns exec $ns ./mptcp_connect -p 10000 -l 0.0.0.0 -t 100 >/dev/null &
+echo "a" | \
+ timeout ${timeout_test} \
+ ip netns exec $ns \
+ ./mptcp_connect -p 10000 -l -t ${timeout_poll} \
+ 0.0.0.0 >/dev/null &
sleep 0.1
-pids[0]=$!
chk_msk_nr 0 "no msk on netns creation"
-echo "b" | ip netns exec $ns ./mptcp_connect -p 10000 127.0.0.1 -j -t 100 >/dev/null &
+echo "b" | \
+ timeout ${timeout_test} \
+ ip netns exec $ns \
+ ./mptcp_connect -p 10000 -j -t ${timeout_poll} \
+ 127.0.0.1 >/dev/null &
sleep 0.1
-pids[1]=$!
chk_msk_nr 2 "after MPC handshake "
chk_msk_remote_key_nr 2 "....chk remote_key"
chk_msk_fallback_nr 0 "....chk no fallback"
flush_pids
-echo "a" | ip netns exec $ns ./mptcp_connect -p 10001 -s TCP -l 0.0.0.0 -t 100 >/dev/null &
-pids[0]=$!
+echo "a" | \
+ timeout ${timeout_test} \
+ ip netns exec $ns \
+ ./mptcp_connect -p 10001 -l -s TCP -t ${timeout_poll} \
+ 0.0.0.0 >/dev/null &
sleep 0.1
-echo "b" | ip netns exec $ns ./mptcp_connect -p 10001 127.0.0.1 -j -t 100 >/dev/null &
-pids[1]=$!
+echo "b" | \
+ timeout ${timeout_test} \
+ ip netns exec $ns \
+ ./mptcp_connect -p 10001 -j -t ${timeout_poll} \
+ 127.0.0.1 >/dev/null &
sleep 0.1
chk_msk_fallback_nr 1 "check fallback"
flush_pids
NR_CLIENTS=100
for I in `seq 1 $NR_CLIENTS`; do
- echo "a" | ip netns exec $ns ./mptcp_connect -p $((I+10001)) -l 0.0.0.0 -t 100 -w 10 >/dev/null &
- pids[$((I*2))]=$!
+ echo "a" | \
+ timeout ${timeout_test} \
+ ip netns exec $ns \
+ ./mptcp_connect -p $((I+10001)) -l -w 10 \
+ -t ${timeout_poll} 0.0.0.0 >/dev/null &
done
sleep 0.1
for I in `seq 1 $NR_CLIENTS`; do
- echo "b" | ip netns exec $ns ./mptcp_connect -p $((I+10001)) 127.0.0.1 -t 100 -w 10 >/dev/null &
- pids[$((I*2 + 1))]=$!
+ echo "b" | \
+ timeout ${timeout_test} \
+ ip netns exec $ns \
+ ./mptcp_connect -p $((I+10001)) -w 10 \
+ -t ${timeout_poll} 127.0.0.1 >/dev/null &
done
sleep 1.5
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index 77bb62feb872..69d89b5d666f 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -55,6 +55,7 @@ static int cfg_sndbuf;
static int cfg_rcvbuf;
static bool cfg_join;
static bool cfg_remove;
+static unsigned int cfg_do_w;
static int cfg_wait;
static void die_usage(void)
@@ -272,8 +273,8 @@ static size_t do_rnd_write(const int fd, char *buf, const size_t len)
if (cfg_join && first && do_w > 100)
do_w = 100;
- if (cfg_remove && do_w > 50)
- do_w = 50;
+ if (cfg_remove && do_w > cfg_do_w)
+ do_w = cfg_do_w;
bw = write(fd, buf, do_w);
if (bw < 0)
@@ -829,7 +830,7 @@ static void parse_opts(int argc, char **argv)
{
int c;
- while ((c = getopt(argc, argv, "6jrlp:s:hut:m:S:R:w:")) != -1) {
+ while ((c = getopt(argc, argv, "6jr:lp:s:hut:m:S:R:w:")) != -1) {
switch (c) {
case 'j':
cfg_join = true;
@@ -840,6 +841,9 @@ static void parse_opts(int argc, char **argv)
cfg_remove = true;
cfg_mode = CFG_MODE_POLL;
cfg_wait = 400000;
+ cfg_do_w = atoi(optarg);
+ if (cfg_do_w <= 0)
+ cfg_do_w = 50;
break;
case 'l':
listen_mode = true;
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 10a030b53b23..385cdc98aed8 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -11,7 +11,8 @@ cin=""
cout=""
ksft_skip=4
capture=false
-timeout=30
+timeout_poll=30
+timeout_test=$((timeout_poll * 2 + 1))
ipv6=true
ethtool_random_on=true
tc_delay="$((RANDOM%50))"
@@ -273,7 +274,7 @@ check_mptcp_disabled()
ip netns exec ${disabled_ns} sysctl -q net.mptcp.enabled=0
local err=0
- LANG=C ip netns exec ${disabled_ns} ./mptcp_connect -t $timeout -p 10000 -s MPTCP 127.0.0.1 < "$cin" 2>&1 | \
+ LANG=C ip netns exec ${disabled_ns} ./mptcp_connect -p 10000 -s MPTCP 127.0.0.1 < "$cin" 2>&1 | \
grep -q "^socket: Protocol not available$" && err=1
ip netns delete ${disabled_ns}
@@ -425,19 +426,32 @@ do_transfer()
sleep 1
fi
+ NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
+ nstat -n
+ if [ ${listener_ns} != ${connector_ns} ]; then
+ NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
+ nstat -n
+ fi
+
local stat_synrx_last_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
local stat_ackrx_last_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
local stat_cookietx_last=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesSent")
local stat_cookierx_last=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesRecv")
- ip netns exec ${listener_ns} ./mptcp_connect -t $timeout -l -p $port -s ${srv_proto} $extra_args $local_addr < "$sin" > "$sout" &
+ timeout ${timeout_test} \
+ ip netns exec ${listener_ns} \
+ ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
+ $extra_args $local_addr < "$sin" > "$sout" &
local spid=$!
wait_local_port_listen "${listener_ns}" "${port}"
local start
start=$(date +%s%3N)
- ip netns exec ${connector_ns} ./mptcp_connect -t $timeout -p $port -s ${cl_proto} $extra_args $connect_addr < "$cin" > "$cout" &
+ timeout ${timeout_test} \
+ ip netns exec ${connector_ns} \
+ ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
+ $extra_args $connect_addr < "$cin" > "$cout" &
local cpid=$!
wait $cpid
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 964db9ed544f..fd99485cf2a4 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -8,9 +8,11 @@ cin=""
cinsent=""
cout=""
ksft_skip=4
-timeout=30
+timeout_poll=30
+timeout_test=$((timeout_poll * 2 + 1))
mptcp_connect=""
capture=0
+do_all_tests=1
TEST_COUNT=0
@@ -76,6 +78,7 @@ cleanup_partial()
for netns in "$ns1" "$ns2"; do
ip netns del $netns
+ rm -f /tmp/$netns.{nstat,out}
done
}
@@ -121,12 +124,6 @@ reset_with_add_addr_timeout()
-j DROP
}
-for arg in "$@"; do
- if [ "$arg" = "-c" ]; then
- capture=1
- fi
-done
-
ip -Version > /dev/null 2>&1
if [ $? -ne 0 ];then
echo "SKIP: Could not run test without ip tool"
@@ -237,10 +234,17 @@ do_transfer()
sleep 1
fi
+ NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
+ nstat -n
+ NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
+ nstat -n
+
if [ $speed = "fast" ]; then
mptcp_connect="./mptcp_connect -j"
- else
- mptcp_connect="./mptcp_connect -r"
+ elif [ $speed = "slow" ]; then
+ mptcp_connect="./mptcp_connect -r 50"
+ elif [ $speed = "least" ]; then
+ mptcp_connect="./mptcp_connect -r 10"
fi
local local_addr
@@ -250,17 +254,26 @@ do_transfer()
local_addr="0.0.0.0"
fi
- ip netns exec ${listener_ns} $mptcp_connect -t $timeout -l -p $port \
- -s ${srv_proto} ${local_addr} < "$sin" > "$sout" &
+ timeout ${timeout_test} \
+ ip netns exec ${listener_ns} \
+ $mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
+ ${local_addr} < "$sin" > "$sout" &
spid=$!
sleep 1
if [ "$test_link_fail" -eq 0 ];then
- ip netns exec ${connector_ns} $mptcp_connect -t $timeout -p $port -s ${cl_proto} $connect_addr < "$cin" > "$cout" &
+ timeout ${timeout_test} \
+ ip netns exec ${connector_ns} \
+ $mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
+ $connect_addr < "$cin" > "$cout" &
else
- ( cat "$cin" ; sleep 2; link_failure $listener_ns ; cat "$cin" ) | tee "$cinsent" | \
- ip netns exec ${connector_ns} $mptcp_connect -t $timeout -p $port -s ${cl_proto} $connect_addr > "$cout" &
+ ( cat "$cin" ; sleep 2; link_failure $listener_ns ; cat "$cin" ) | \
+ tee "$cinsent" | \
+ timeout ${timeout_test} \
+ ip netns exec ${connector_ns} \
+ $mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
+ $connect_addr > "$cout" &
fi
cpid=$!
@@ -284,17 +297,25 @@ do_transfer()
let rm_nr_ns1=-addr_nr_ns1
if [ $rm_nr_ns1 -lt 8 ]; then
counter=1
- sleep 1
-
- while [ $counter -le $rm_nr_ns1 ]
- do
- ip netns exec ${listener_ns} ./pm_nl_ctl del $counter
+ dump=(`ip netns exec ${listener_ns} ./pm_nl_ctl dump`)
+ if [ ${#dump[@]} -gt 0 ]; then
+ id=${dump[1]}
sleep 1
- let counter+=1
- done
- else
+
+ while [ $counter -le $rm_nr_ns1 ]
+ do
+ ip netns exec ${listener_ns} ./pm_nl_ctl del $id
+ sleep 1
+ let counter+=1
+ let id+=1
+ done
+ fi
+ elif [ $rm_nr_ns1 -eq 8 ]; then
sleep 1
ip netns exec ${listener_ns} ./pm_nl_ctl flush
+ elif [ $rm_nr_ns1 -eq 9 ]; then
+ sleep 1
+ ip netns exec ${listener_ns} ./pm_nl_ctl del 0 ${connect_addr}
fi
fi
@@ -318,17 +339,31 @@ do_transfer()
let rm_nr_ns2=-addr_nr_ns2
if [ $rm_nr_ns2 -lt 8 ]; then
counter=1
- sleep 1
-
- while [ $counter -le $rm_nr_ns2 ]
- do
- ip netns exec ${connector_ns} ./pm_nl_ctl del $counter
+ dump=(`ip netns exec ${connector_ns} ./pm_nl_ctl dump`)
+ if [ ${#dump[@]} -gt 0 ]; then
+ id=${dump[1]}
sleep 1
- let counter+=1
- done
- else
+
+ while [ $counter -le $rm_nr_ns2 ]
+ do
+ ip netns exec ${connector_ns} ./pm_nl_ctl del $id
+ sleep 1
+ let counter+=1
+ let id+=1
+ done
+ fi
+ elif [ $rm_nr_ns2 -eq 8 ]; then
sleep 1
ip netns exec ${connector_ns} ./pm_nl_ctl flush
+ elif [ $rm_nr_ns2 -eq 9 ]; then
+ local addr
+ if is_v6 "${connect_addr}"; then
+ addr="dead:beef:1::2"
+ else
+ addr="10.0.1.2"
+ fi
+ sleep 1
+ ip netns exec ${connector_ns} ./pm_nl_ctl del 0 $addr
fi
fi
@@ -354,12 +389,19 @@ do_transfer()
kill $cappid
fi
+ NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
+ nstat | grep Tcp > /tmp/${listener_ns}.out
+ NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
+ nstat | grep Tcp > /tmp/${connector_ns}.out
+
if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
echo " client exit code $retc, server $rets" 1>&2
echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2
- ip netns exec ${listener_ns} ss -nita 1>&2 -o "sport = :$port"
+ ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port"
+ cat /tmp/${listener_ns}.out
echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2
- ip netns exec ${connector_ns} ss -nita 1>&2 -o "dport = :$port"
+ ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port"
+ cat /tmp/${connector_ns}.out
cat "$capout"
ret=1
@@ -610,11 +652,22 @@ chk_rm_nr()
{
local rm_addr_nr=$1
local rm_subflow_nr=$2
+ local invert=${3:-""}
local count
local dump_stats
+ local addr_ns
+ local subflow_ns
+
+ if [ -z $invert ]; then
+ addr_ns=$ns1
+ subflow_ns=$ns2
+ elif [ $invert = "invert" ]; then
+ addr_ns=$ns2
+ subflow_ns=$ns1
+ fi
printf "%-39s %s" " " "rm "
- count=`ip netns exec $ns1 nstat -as | grep MPTcpExtRmAddr | awk '{print $2}'`
+ count=`ip netns exec $addr_ns nstat -as | grep MPTcpExtRmAddr | awk '{print $2}'`
[ -z "$count" ] && count=0
if [ "$count" != "$rm_addr_nr" ]; then
echo "[fail] got $count RM_ADDR[s] expected $rm_addr_nr"
@@ -625,7 +678,7 @@ chk_rm_nr()
fi
echo -n " - sf "
- count=`ip netns exec $ns2 nstat -as | grep MPTcpExtRmSubflow | awk '{print $2}'`
+ count=`ip netns exec $subflow_ns nstat -as | grep MPTcpExtRmSubflow | awk '{print $2}'`
[ -z "$count" ] && count=0
if [ "$count" != "$rm_subflow_nr" ]; then
echo "[fail] got $count RM_SUBFLOW[s] expected $rm_subflow_nr"
@@ -724,6 +777,14 @@ subflows_tests()
ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "multiple subflows, limited by server" 2 2 1
+
+ # single subflow, dev
+ reset
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow dev ns2eth3
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr "single subflow, dev" 1 1 1
}
signal_address_tests()
@@ -767,6 +828,28 @@ signal_address_tests()
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "multiple subflows and signal" 3 3 3
chk_add_nr 1 1
+
+ # signal addresses
+ reset
+ ip netns exec $ns1 ./pm_nl_ctl limits 3 3
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.4.1 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl limits 3 3
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr "signal addresses" 3 3 3
+ chk_add_nr 3 3
+
+ # signal invalid addresses
+ reset
+ ip netns exec $ns1 ./pm_nl_ctl limits 3 3
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.12.1 flags signal
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.14.1 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl limits 3 3
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr "signal invalid addresses" 1 1 1
+ chk_add_nr 3 3
}
link_failure_tests()
@@ -802,6 +885,26 @@ add_addr_timeout_tests()
run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow
chk_join_nr "signal address, ADD_ADDR6 timeout" 1 1 1
chk_add_nr 4 0
+
+ # signal addresses timeout
+ reset_with_add_addr_timeout
+ ip netns exec $ns1 ./pm_nl_ctl limits 2 2
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl limits 2 2
+ run_tests $ns1 $ns2 10.0.1.1 0 0 0 least
+ chk_join_nr "signal addresses, ADD_ADDR timeout" 2 2 2
+ chk_add_nr 8 0
+
+ # signal invalid addresses timeout
+ reset_with_add_addr_timeout
+ ip netns exec $ns1 ./pm_nl_ctl limits 2 2
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.12.1 flags signal
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl limits 2 2
+ run_tests $ns1 $ns2 10.0.1.1 0 0 0 least
+ chk_join_nr "invalid address, ADD_ADDR timeout" 1 1 1
+ chk_add_nr 8 0
}
remove_tests()
@@ -833,7 +936,7 @@ remove_tests()
run_tests $ns1 $ns2 10.0.1.1 0 -1 0 slow
chk_join_nr "remove single address" 1 1 1
chk_add_nr 1 1
- chk_rm_nr 0 0
+ chk_rm_nr 1 1 invert
# subflow and signal, remove
reset
@@ -858,6 +961,30 @@ remove_tests()
chk_add_nr 1 1
chk_rm_nr 2 2
+ # addresses remove
+ reset
+ ip netns exec $ns1 ./pm_nl_ctl limits 3 3
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal id 250
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.4.1 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl limits 3 3
+ run_tests $ns1 $ns2 10.0.1.1 0 -3 0 slow
+ chk_join_nr "remove addresses" 3 3 3
+ chk_add_nr 3 3
+ chk_rm_nr 3 3 invert
+
+ # invalid addresses remove
+ reset
+ ip netns exec $ns1 ./pm_nl_ctl limits 3 3
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.12.1 flags signal
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.14.1 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl limits 3 3
+ run_tests $ns1 $ns2 10.0.1.1 0 -3 0 slow
+ chk_join_nr "remove invalid addresses" 1 1 1
+ chk_add_nr 3 3
+ chk_rm_nr 3 1 invert
+
# subflows and signal, flush
reset
ip netns exec $ns1 ./pm_nl_ctl limits 0 3
@@ -869,6 +996,60 @@ remove_tests()
chk_join_nr "flush subflows and signal" 3 3 3
chk_add_nr 1 1
chk_rm_nr 2 2
+
+ # subflows flush
+ reset
+ ip netns exec $ns1 ./pm_nl_ctl limits 3 3
+ ip netns exec $ns2 ./pm_nl_ctl limits 3 3
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow id 150
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow
+ chk_join_nr "flush subflows" 3 3 3
+ chk_rm_nr 3 3
+
+ # addresses flush
+ reset
+ ip netns exec $ns1 ./pm_nl_ctl limits 3 3
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal id 250
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.4.1 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl limits 3 3
+ run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow
+ chk_join_nr "flush addresses" 3 3 3
+ chk_add_nr 3 3
+ chk_rm_nr 3 3 invert
+
+ # invalid addresses flush
+ reset
+ ip netns exec $ns1 ./pm_nl_ctl limits 3 3
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.12.1 flags signal
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.14.1 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl limits 3 3
+ run_tests $ns1 $ns2 10.0.1.1 0 -8 0 slow
+ chk_join_nr "flush invalid addresses" 1 1 1
+ chk_add_nr 3 3
+ chk_rm_nr 3 1 invert
+
+ # remove id 0 subflow
+ reset
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1 0 0 -9 slow
+ chk_join_nr "remove id 0 subflow" 1 1 1
+ chk_rm_nr 1 1
+
+ # remove id 0 address
+ reset
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+ run_tests $ns1 $ns2 10.0.1.1 0 -9 0 slow
+ chk_join_nr "remove id 0 address" 1 1 1
+ chk_add_nr 1 1
+ chk_rm_nr 1 1 invert
}
add_tests()
@@ -945,7 +1126,7 @@ ipv6_tests()
run_tests $ns1 $ns2 dead:beef:1::1 0 -1 0 slow
chk_join_nr "remove single address IPv6" 1 1 1
chk_add_nr 1 1
- chk_rm_nr 0 0
+ chk_rm_nr 1 1 invert
# subflow and signal IPv6, remove
reset
@@ -1088,7 +1269,7 @@ add_addr_ports_tests()
run_tests $ns1 $ns2 10.0.1.1 0 -1 0 slow
chk_join_nr "remove single address with port" 1 1 1
chk_add_nr 1 1 1
- chk_rm_nr 0 0
+ chk_rm_nr 1 1 invert
# subflow and signal with port, remove
reset
@@ -1221,7 +1402,8 @@ usage()
echo " -4 v4mapped_tests"
echo " -b backup_tests"
echo " -p add_addr_ports_tests"
- echo " -c syncookies_tests"
+ echo " -k syncookies_tests"
+ echo " -c capture pcap files"
echo " -h help"
}
@@ -1235,12 +1417,24 @@ make_file "$cin" "client" 1
make_file "$sin" "server" 1
trap cleanup EXIT
-if [ -z $1 ]; then
+for arg in "$@"; do
+ # check for "capture" arg before launching tests
+ if [[ "${arg}" =~ ^"-"[0-9a-zA-Z]*"c"[0-9a-zA-Z]*$ ]]; then
+ capture=1
+ fi
+
+ # exception for the capture option, the rest means: a part of the tests
+ if [ "${arg}" != "-c" ]; then
+ do_all_tests=0
+ fi
+done
+
+if [ $do_all_tests -eq 1 ]; then
all_tests
exit $ret
fi
-while getopts 'fsltra64bpch' opt; do
+while getopts 'fsltra64bpkch' opt; do
case $opt in
f)
subflows_tests
@@ -1272,9 +1466,11 @@ while getopts 'fsltra64bpch' opt; do
p)
add_addr_ports_tests
;;
- c)
+ k)
syncookies_tests
;;
+ c)
+ ;;
h | *)
usage
;;
diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh
index a617e293734c..3c741abe034e 100755
--- a/tools/testing/selftests/net/mptcp/pm_netlink.sh
+++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh
@@ -100,12 +100,12 @@ done
check "ip netns exec $ns1 ./pm_nl_ctl get 9" "id 9 flags signal 10.0.1.9" "hard addr limit"
check "ip netns exec $ns1 ./pm_nl_ctl get 10" "" "above hard addr limit"
-for i in `seq 9 256`; do
+ip netns exec $ns1 ./pm_nl_ctl del 9
+for i in `seq 10 255`; do
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.9 id $i
ip netns exec $ns1 ./pm_nl_ctl del $i
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.9 id $((i+1))
done
check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags 10.0.1.1
-id 2 flags 10.0.0.9
id 3 flags signal,backup 10.0.1.3
id 4 flags signal 10.0.1.4
id 5 flags signal 10.0.1.5
diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
index 7b4167f3f9a2..115decfdc1ef 100644
--- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
+++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
@@ -26,7 +26,7 @@ static void syntax(char *argv[])
{
fprintf(stderr, "%s add|get|set|del|flush|dump|accept [<args>]\n", argv[0]);
fprintf(stderr, "\tadd [flags signal|subflow|backup] [id <nr>] [dev <name>] <ip>\n");
- fprintf(stderr, "\tdel <id>\n");
+ fprintf(stderr, "\tdel <id> [<ip>]\n");
fprintf(stderr, "\tget <id>\n");
fprintf(stderr, "\tset <ip> [flags backup|nobackup]\n");
fprintf(stderr, "\tflush\n");
@@ -301,6 +301,7 @@ int del_addr(int fd, int pm_family, int argc, char *argv[])
1024];
struct rtattr *rta, *nest;
struct nlmsghdr *nh;
+ u_int16_t family;
int nest_start;
u_int8_t id;
int off = 0;
@@ -310,11 +311,14 @@ int del_addr(int fd, int pm_family, int argc, char *argv[])
off = init_genl_req(data, pm_family, MPTCP_PM_CMD_DEL_ADDR,
MPTCP_PM_VER);
- /* the only argument is the address id */
- if (argc != 3)
+ /* the only argument is the address id (nonzero) */
+ if (argc != 3 && argc != 4)
syntax(argv);
id = atoi(argv[2]);
+ /* zero id with the IP address */
+ if (!id && argc != 4)
+ syntax(argv);
nest_start = off;
nest = (void *)(data + off);
@@ -328,6 +332,30 @@ int del_addr(int fd, int pm_family, int argc, char *argv[])
rta->rta_len = RTA_LENGTH(1);
memcpy(RTA_DATA(rta), &id, 1);
off += NLMSG_ALIGN(rta->rta_len);
+
+ if (!id) {
+ /* addr data */
+ rta = (void *)(data + off);
+ if (inet_pton(AF_INET, argv[3], RTA_DATA(rta))) {
+ family = AF_INET;
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4;
+ rta->rta_len = RTA_LENGTH(4);
+ } else if (inet_pton(AF_INET6, argv[3], RTA_DATA(rta))) {
+ family = AF_INET6;
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6;
+ rta->rta_len = RTA_LENGTH(16);
+ } else {
+ error(1, errno, "can't parse ip %s", argv[3]);
+ }
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ /* family */
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY;
+ rta->rta_len = RTA_LENGTH(2);
+ memcpy(RTA_DATA(rta), &family, 2);
+ off += NLMSG_ALIGN(rta->rta_len);
+ }
nest->rta_len = off - nest_start;
do_nl_req(fd, nh, off, 0);
diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
index f039ee57eb3c..3aeef3bcb101 100755
--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -7,7 +7,8 @@ ns2="ns2-$rndh"
ns3="ns3-$rndh"
capture=false
ksft_skip=4
-timeout=30
+timeout_poll=30
+timeout_test=$((timeout_poll * 2 + 1))
test_cnt=1
ret=0
bail=0
@@ -157,14 +158,20 @@ do_transfer()
sleep 1
fi
- ip netns exec ${ns3} ./mptcp_connect -jt $timeout -l -p $port 0.0.0.0 < "$sin" > "$sout" &
+ timeout ${timeout_test} \
+ ip netns exec ${ns3} \
+ ./mptcp_connect -jt ${timeout_poll} -l -p $port \
+ 0.0.0.0 < "$sin" > "$sout" &
local spid=$!
wait_local_port_listen "${ns3}" "${port}"
local start
start=$(date +%s%3N)
- ip netns exec ${ns1} ./mptcp_connect -jt $timeout -p $port 10.0.3.3 < "$cin" > "$cout" &
+ timeout ${timeout_test} \
+ ip netns exec ${ns1} \
+ ./mptcp_connect -jt ${timeout_poll} -p $port \
+ 10.0.3.3 < "$cin" > "$cout" &
local cpid=$!
wait $cpid
diff --git a/tools/testing/selftests/net/reuseaddr_ports_exhausted.c b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c
index 7b01b7c2ec10..066efd30e294 100644
--- a/tools/testing/selftests/net/reuseaddr_ports_exhausted.c
+++ b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c
@@ -30,25 +30,25 @@ struct reuse_opts {
};
struct reuse_opts unreusable_opts[12] = {
- {0, 0, 0, 0},
- {0, 0, 0, 1},
- {0, 0, 1, 0},
- {0, 0, 1, 1},
- {0, 1, 0, 0},
- {0, 1, 0, 1},
- {0, 1, 1, 0},
- {0, 1, 1, 1},
- {1, 0, 0, 0},
- {1, 0, 0, 1},
- {1, 0, 1, 0},
- {1, 0, 1, 1},
+ {{0, 0}, {0, 0}},
+ {{0, 0}, {0, 1}},
+ {{0, 0}, {1, 0}},
+ {{0, 0}, {1, 1}},
+ {{0, 1}, {0, 0}},
+ {{0, 1}, {0, 1}},
+ {{0, 1}, {1, 0}},
+ {{0, 1}, {1, 1}},
+ {{1, 0}, {0, 0}},
+ {{1, 0}, {0, 1}},
+ {{1, 0}, {1, 0}},
+ {{1, 0}, {1, 1}},
};
struct reuse_opts reusable_opts[4] = {
- {1, 1, 0, 0},
- {1, 1, 0, 1},
- {1, 1, 1, 0},
- {1, 1, 1, 1},
+ {{1, 1}, {0, 0}},
+ {{1, 1}, {0, 1}},
+ {{1, 1}, {1, 0}},
+ {{1, 1}, {1, 1}},
};
int bind_port(struct __test_metadata *_metadata, int reuseaddr, int reuseport)
diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/net/so_txtime.c
index b4cca382d125..59067f64b775 100644
--- a/tools/testing/selftests/net/so_txtime.c
+++ b/tools/testing/selftests/net/so_txtime.c
@@ -2,9 +2,12 @@
/*
* Test the SO_TXTIME API
*
- * Takes two streams of { payload, delivery time }[], one input and one output.
- * Sends the input stream and verifies arrival matches the output stream.
- * The two streams can differ due to out-of-order delivery and drops.
+ * Takes a stream of { payload, delivery time }[], to be sent across two
+ * processes. Start this program on two separate network namespaces or
+ * connected hosts, one instance in transmit mode and the other in receive
+ * mode using the '-r' option. Receiver will compare arrival timestamps to
+ * the expected stream. Sender will read transmit timestamps from the error
+ * queue. The streams can differ due to out-of-order delivery and drops.
*/
#define _GNU_SOURCE
@@ -28,14 +31,17 @@
#include <sys/types.h>
#include <time.h>
#include <unistd.h>
+#include <poll.h>
static int cfg_clockid = CLOCK_TAI;
-static bool cfg_do_ipv4;
-static bool cfg_do_ipv6;
static uint16_t cfg_port = 8000;
static int cfg_variance_us = 4000;
+static uint64_t cfg_start_time_ns;
+static int cfg_mark;
+static bool cfg_rx;
static uint64_t glob_tstart;
+static uint64_t tdeliver_max;
/* encode one timed transmission (of a 1B payload) */
struct timed_send {
@@ -44,18 +50,21 @@ struct timed_send {
};
#define MAX_NUM_PKT 8
-static struct timed_send cfg_in[MAX_NUM_PKT];
-static struct timed_send cfg_out[MAX_NUM_PKT];
+static struct timed_send cfg_buf[MAX_NUM_PKT];
static int cfg_num_pkt;
static int cfg_errq_level;
static int cfg_errq_type;
-static uint64_t gettime_ns(void)
+static struct sockaddr_storage cfg_dst_addr;
+static struct sockaddr_storage cfg_src_addr;
+static socklen_t cfg_alen;
+
+static uint64_t gettime_ns(clockid_t clock)
{
struct timespec ts;
- if (clock_gettime(cfg_clockid, &ts))
+ if (clock_gettime(clock, &ts))
error(1, errno, "gettime");
return ts.tv_sec * (1000ULL * 1000 * 1000) + ts.tv_nsec;
@@ -75,6 +84,8 @@ static void do_send_one(int fdt, struct timed_send *ts)
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
+ msg.msg_name = (struct sockaddr *)&cfg_dst_addr;
+ msg.msg_namelen = cfg_alen;
if (ts->delay_us >= 0) {
memset(control, 0, sizeof(control));
@@ -82,6 +93,8 @@ static void do_send_one(int fdt, struct timed_send *ts)
msg.msg_controllen = sizeof(control);
tdeliver = glob_tstart + ts->delay_us * 1000;
+ tdeliver_max = tdeliver_max > tdeliver ?
+ tdeliver_max : tdeliver;
cm = CMSG_FIRSTHDR(&msg);
cm->cmsg_level = SOL_SOCKET;
@@ -98,7 +111,7 @@ static void do_send_one(int fdt, struct timed_send *ts)
}
-static bool do_recv_one(int fdr, struct timed_send *ts)
+static void do_recv_one(int fdr, struct timed_send *ts)
{
int64_t tstop, texpect;
char rbuf[2];
@@ -106,13 +119,13 @@ static bool do_recv_one(int fdr, struct timed_send *ts)
ret = recv(fdr, rbuf, sizeof(rbuf), 0);
if (ret == -1 && errno == EAGAIN)
- return true;
+ error(1, EAGAIN, "recv: timeout");
if (ret == -1)
error(1, errno, "read");
if (ret != 1)
error(1, 0, "read: %dB", ret);
- tstop = (gettime_ns() - glob_tstart) / 1000;
+ tstop = (gettime_ns(cfg_clockid) - glob_tstart) / 1000;
texpect = ts->delay_us >= 0 ? ts->delay_us : 0;
fprintf(stderr, "payload:%c delay:%lld expected:%lld (us)\n",
@@ -123,8 +136,6 @@ static bool do_recv_one(int fdr, struct timed_send *ts)
if (llabs(tstop - texpect) > cfg_variance_us)
error(1, 0, "exceeds variance (%d us)", cfg_variance_us);
-
- return false;
}
static void do_recv_verify_empty(int fdr)
@@ -137,18 +148,18 @@ static void do_recv_verify_empty(int fdr)
error(1, 0, "recv: not empty as expected (%d, %d)", ret, errno);
}
-static void do_recv_errqueue_timeout(int fdt)
+static int do_recv_errqueue_timeout(int fdt)
{
char control[CMSG_SPACE(sizeof(struct sock_extended_err)) +
CMSG_SPACE(sizeof(struct sockaddr_in6))] = {0};
char data[sizeof(struct ethhdr) + sizeof(struct ipv6hdr) +
sizeof(struct udphdr) + 1];
struct sock_extended_err *err;
+ int ret, num_tstamp = 0;
struct msghdr msg = {0};
struct iovec iov = {0};
struct cmsghdr *cm;
int64_t tstamp = 0;
- int ret;
iov.iov_base = data;
iov.iov_len = sizeof(data);
@@ -206,9 +217,47 @@ static void do_recv_errqueue_timeout(int fdt)
msg.msg_flags = 0;
msg.msg_controllen = sizeof(control);
+ num_tstamp++;
}
- error(1, 0, "recv: timeout");
+ return num_tstamp;
+}
+
+static void recv_errqueue_msgs(int fdt)
+{
+ struct pollfd pfd = { .fd = fdt, .events = POLLERR };
+ const int timeout_ms = 10;
+ int ret, num_tstamp = 0;
+
+ do {
+ ret = poll(&pfd, 1, timeout_ms);
+ if (ret == -1)
+ error(1, errno, "poll");
+
+ if (ret && (pfd.revents & POLLERR))
+ num_tstamp += do_recv_errqueue_timeout(fdt);
+
+ if (num_tstamp == cfg_num_pkt)
+ break;
+
+ } while (gettime_ns(cfg_clockid) < tdeliver_max);
+}
+
+static void start_time_wait(void)
+{
+ uint64_t now;
+ int err;
+
+ if (!cfg_start_time_ns)
+ return;
+
+ now = gettime_ns(CLOCK_REALTIME);
+ if (cfg_start_time_ns < now)
+ return;
+
+ err = usleep((cfg_start_time_ns - now) / 1000);
+ if (err)
+ error(1, errno, "usleep");
}
static void setsockopt_txtime(int fd)
@@ -245,6 +294,10 @@ static int setup_tx(struct sockaddr *addr, socklen_t alen)
setsockopt_txtime(fd);
+ if (cfg_mark &&
+ setsockopt(fd, SOL_SOCKET, SO_MARK, &cfg_mark, sizeof(cfg_mark)))
+ error(1, errno, "setsockopt mark");
+
return fd;
}
@@ -266,31 +319,70 @@ static int setup_rx(struct sockaddr *addr, socklen_t alen)
return fd;
}
-static void do_test(struct sockaddr *addr, socklen_t alen)
+static void do_test_tx(struct sockaddr *addr, socklen_t alen)
{
- int fdt, fdr, i;
+ int fdt, i;
fprintf(stderr, "\nSO_TXTIME ipv%c clock %s\n",
addr->sa_family == PF_INET ? '4' : '6',
cfg_clockid == CLOCK_TAI ? "tai" : "monotonic");
fdt = setup_tx(addr, alen);
- fdr = setup_rx(addr, alen);
- glob_tstart = gettime_ns();
+ start_time_wait();
+ glob_tstart = gettime_ns(cfg_clockid);
for (i = 0; i < cfg_num_pkt; i++)
- do_send_one(fdt, &cfg_in[i]);
+ do_send_one(fdt, &cfg_buf[i]);
+
+ recv_errqueue_msgs(fdt);
+
+ if (close(fdt))
+ error(1, errno, "close t");
+}
+
+static void do_test_rx(struct sockaddr *addr, socklen_t alen)
+{
+ int fdr, i;
+
+ fdr = setup_rx(addr, alen);
+
+ start_time_wait();
+ glob_tstart = gettime_ns(cfg_clockid);
+
for (i = 0; i < cfg_num_pkt; i++)
- if (do_recv_one(fdr, &cfg_out[i]))
- do_recv_errqueue_timeout(fdt);
+ do_recv_one(fdr, &cfg_buf[i]);
do_recv_verify_empty(fdr);
if (close(fdr))
error(1, errno, "close r");
- if (close(fdt))
- error(1, errno, "close t");
+}
+
+static void setup_sockaddr(int domain, const char *str_addr,
+ struct sockaddr_storage *sockaddr)
+{
+ struct sockaddr_in6 *addr6 = (void *) sockaddr;
+ struct sockaddr_in *addr4 = (void *) sockaddr;
+
+ switch (domain) {
+ case PF_INET:
+ memset(addr4, 0, sizeof(*addr4));
+ addr4->sin_family = AF_INET;
+ addr4->sin_port = htons(cfg_port);
+ if (str_addr &&
+ inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1)
+ error(1, 0, "ipv4 parse error: %s", str_addr);
+ break;
+ case PF_INET6:
+ memset(addr6, 0, sizeof(*addr6));
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_port = htons(cfg_port);
+ if (str_addr &&
+ inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1)
+ error(1, 0, "ipv6 parse error: %s", str_addr);
+ break;
+ }
}
static int parse_io(const char *optarg, struct timed_send *array)
@@ -323,17 +415,46 @@ static int parse_io(const char *optarg, struct timed_send *array)
return aoff / 2;
}
+static void usage(const char *progname)
+{
+ fprintf(stderr, "\nUsage: %s [options] <payload>\n"
+ "Options:\n"
+ " -4 only IPv4\n"
+ " -6 only IPv6\n"
+ " -c <clock> monotonic (default) or tai\n"
+ " -D <addr> destination IP address (server)\n"
+ " -S <addr> source IP address (client)\n"
+ " -r run rx mode\n"
+ " -t <nsec> start time (UTC nanoseconds)\n"
+ " -m <mark> socket mark\n"
+ "\n",
+ progname);
+ exit(1);
+}
+
static void parse_opts(int argc, char **argv)
{
- int c, ilen, olen;
+ char *daddr = NULL, *saddr = NULL;
+ int domain = PF_UNSPEC;
+ int c;
- while ((c = getopt(argc, argv, "46c:")) != -1) {
+ while ((c = getopt(argc, argv, "46c:S:D:rt:m:")) != -1) {
switch (c) {
case '4':
- cfg_do_ipv4 = true;
+ if (domain != PF_UNSPEC)
+ error(1, 0, "Pass one of -4 or -6");
+ domain = PF_INET;
+ cfg_alen = sizeof(struct sockaddr_in);
+ cfg_errq_level = SOL_IP;
+ cfg_errq_type = IP_RECVERR;
break;
case '6':
- cfg_do_ipv6 = true;
+ if (domain != PF_UNSPEC)
+ error(1, 0, "Pass one of -4 or -6");
+ domain = PF_INET6;
+ cfg_alen = sizeof(struct sockaddr_in6);
+ cfg_errq_level = SOL_IPV6;
+ cfg_errq_type = IPV6_RECVERR;
break;
case 'c':
if (!strcmp(optarg, "tai"))
@@ -344,50 +465,50 @@ static void parse_opts(int argc, char **argv)
else
error(1, 0, "unknown clock id %s", optarg);
break;
+ case 'S':
+ saddr = optarg;
+ break;
+ case 'D':
+ daddr = optarg;
+ break;
+ case 'r':
+ cfg_rx = true;
+ break;
+ case 't':
+ cfg_start_time_ns = strtol(optarg, NULL, 0);
+ break;
+ case 'm':
+ cfg_mark = strtol(optarg, NULL, 0);
+ break;
default:
- error(1, 0, "parse error at %d", optind);
+ usage(argv[0]);
}
}
- if (argc - optind != 2)
- error(1, 0, "Usage: %s [-46] -c <clock> <in> <out>", argv[0]);
+ if (argc - optind != 1)
+ usage(argv[0]);
+
+ if (domain == PF_UNSPEC)
+ error(1, 0, "Pass one of -4 or -6");
+ if (!daddr)
+ error(1, 0, "-D <server addr> required\n");
+ if (!cfg_rx && !saddr)
+ error(1, 0, "-S <client addr> required\n");
- ilen = parse_io(argv[optind], cfg_in);
- olen = parse_io(argv[optind + 1], cfg_out);
- if (ilen != olen)
- error(1, 0, "i/o streams len mismatch (%d, %d)\n", ilen, olen);
- cfg_num_pkt = ilen;
+ setup_sockaddr(domain, daddr, &cfg_dst_addr);
+ setup_sockaddr(domain, saddr, &cfg_src_addr);
+
+ cfg_num_pkt = parse_io(argv[optind], cfg_buf);
}
int main(int argc, char **argv)
{
parse_opts(argc, argv);
- if (cfg_do_ipv6) {
- struct sockaddr_in6 addr6 = {0};
-
- addr6.sin6_family = AF_INET6;
- addr6.sin6_port = htons(cfg_port);
- addr6.sin6_addr = in6addr_loopback;
-
- cfg_errq_level = SOL_IPV6;
- cfg_errq_type = IPV6_RECVERR;
-
- do_test((void *)&addr6, sizeof(addr6));
- }
-
- if (cfg_do_ipv4) {
- struct sockaddr_in addr4 = {0};
-
- addr4.sin_family = AF_INET;
- addr4.sin_port = htons(cfg_port);
- addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
-
- cfg_errq_level = SOL_IP;
- cfg_errq_type = IP_RECVERR;
-
- do_test((void *)&addr4, sizeof(addr4));
- }
+ if (cfg_rx)
+ do_test_rx((void *)&cfg_dst_addr, cfg_alen);
+ else
+ do_test_tx((void *)&cfg_src_addr, cfg_alen);
return 0;
}
diff --git a/tools/testing/selftests/net/so_txtime.sh b/tools/testing/selftests/net/so_txtime.sh
index 3f7800eaecb1..3f06f4d286a9 100755
--- a/tools/testing/selftests/net/so_txtime.sh
+++ b/tools/testing/selftests/net/so_txtime.sh
@@ -3,32 +3,85 @@
#
# Regression tests for the SO_TXTIME interface
-# Run in network namespace
-if [[ $# -eq 0 ]]; then
- if ! ./in_netns.sh $0 __subprocess; then
- # test is time sensitive, can be flaky
- echo "test failed: retry once"
- ./in_netns.sh $0 __subprocess
+set -e
+
+readonly DEV="veth0"
+readonly BIN="./so_txtime"
+
+readonly RAND="$(mktemp -u XXXXXX)"
+readonly NSPREFIX="ns-${RAND}"
+readonly NS1="${NSPREFIX}1"
+readonly NS2="${NSPREFIX}2"
+
+readonly SADDR4='192.168.1.1'
+readonly DADDR4='192.168.1.2'
+readonly SADDR6='fd::1'
+readonly DADDR6='fd::2'
+
+cleanup() {
+ ip netns del "${NS2}"
+ ip netns del "${NS1}"
+}
+
+trap cleanup EXIT
+
+# Create virtual ethernet pair between network namespaces
+ip netns add "${NS1}"
+ip netns add "${NS2}"
+
+ip link add "${DEV}" netns "${NS1}" type veth \
+ peer name "${DEV}" netns "${NS2}"
+
+# Bring the devices up
+ip -netns "${NS1}" link set "${DEV}" up
+ip -netns "${NS2}" link set "${DEV}" up
+
+# Set fixed MAC addresses on the devices
+ip -netns "${NS1}" link set dev "${DEV}" address 02:02:02:02:02:02
+ip -netns "${NS2}" link set dev "${DEV}" address 06:06:06:06:06:06
+
+# Add fixed IP addresses to the devices
+ip -netns "${NS1}" addr add 192.168.1.1/24 dev "${DEV}"
+ip -netns "${NS2}" addr add 192.168.1.2/24 dev "${DEV}"
+ip -netns "${NS1}" addr add fd::1/64 dev "${DEV}" nodad
+ip -netns "${NS2}" addr add fd::2/64 dev "${DEV}" nodad
+
+do_test() {
+ local readonly IP="$1"
+ local readonly CLOCK="$2"
+ local readonly TXARGS="$3"
+ local readonly RXARGS="$4"
+
+ if [[ "${IP}" == "4" ]]; then
+ local readonly SADDR="${SADDR4}"
+ local readonly DADDR="${DADDR4}"
+ elif [[ "${IP}" == "6" ]]; then
+ local readonly SADDR="${SADDR6}"
+ local readonly DADDR="${DADDR6}"
+ else
+ echo "Invalid IP version ${IP}"
+ exit 1
fi
- exit $?
-fi
+ local readonly START="$(date +%s%N --date="+ 0.1 seconds")"
+ ip netns exec "${NS2}" "${BIN}" -"${IP}" -c "${CLOCK}" -t "${START}" -S "${SADDR}" -D "${DADDR}" "${RXARGS}" -r &
+ ip netns exec "${NS1}" "${BIN}" -"${IP}" -c "${CLOCK}" -t "${START}" -S "${SADDR}" -D "${DADDR}" "${TXARGS}"
+ wait "$!"
+}
-set -e
+ip netns exec "${NS1}" tc qdisc add dev "${DEV}" root fq
+do_test 4 mono a,-1 a,-1
+do_test 6 mono a,0 a,0
+do_test 6 mono a,10 a,10
+do_test 4 mono a,10,b,20 a,10,b,20
+do_test 6 mono a,20,b,10 b,20,a,20
-tc qdisc add dev lo root fq
-./so_txtime -4 -6 -c mono a,-1 a,-1
-./so_txtime -4 -6 -c mono a,0 a,0
-./so_txtime -4 -6 -c mono a,10 a,10
-./so_txtime -4 -6 -c mono a,10,b,20 a,10,b,20
-./so_txtime -4 -6 -c mono a,20,b,10 b,20,a,20
-
-if tc qdisc replace dev lo root etf clockid CLOCK_TAI delta 400000; then
- ! ./so_txtime -4 -6 -c tai a,-1 a,-1
- ! ./so_txtime -4 -6 -c tai a,0 a,0
- ./so_txtime -4 -6 -c tai a,10 a,10
- ./so_txtime -4 -6 -c tai a,10,b,20 a,10,b,20
- ./so_txtime -4 -6 -c tai a,20,b,10 b,10,a,20
+if ip netns exec "${NS1}" tc qdisc replace dev "${DEV}" root etf clockid CLOCK_TAI delta 400000; then
+ ! do_test 4 tai a,-1 a,-1
+ ! do_test 6 tai a,0 a,0
+ do_test 6 tai a,10 a,10
+ do_test 4 tai a,10,b,20 a,10,b,20
+ do_test 6 tai a,20,b,10 b,10,a,20
else
echo "tc ($(tc -V)) does not support qdisc etf. skipping"
fi
diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh
new file mode 100755
index 000000000000..a8fa64136282
--- /dev/null
+++ b/tools/testing/selftests/net/udpgro_fwd.sh
@@ -0,0 +1,251 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+readonly BASE="ns-$(mktemp -u XXXXXX)"
+readonly SRC=2
+readonly DST=1
+readonly DST_NAT=100
+readonly NS_SRC=$BASE$SRC
+readonly NS_DST=$BASE$DST
+
+# "baremetal" network used for raw UDP traffic
+readonly BM_NET_V4=192.168.1.
+readonly BM_NET_V6=2001:db8::
+
+# "overlay" network used for UDP over UDP tunnel traffic
+readonly OL_NET_V4=172.16.1.
+readonly OL_NET_V6=2001:db8:1::
+readonly NPROCS=`nproc`
+
+cleanup() {
+ local ns
+ local -r jobs="$(jobs -p)"
+ [ -n "${jobs}" ] && kill -1 ${jobs} 2>/dev/null
+
+ for ns in $NS_SRC $NS_DST; do
+ ip netns del $ns 2>/dev/null
+ done
+}
+
+trap cleanup EXIT
+
+create_ns() {
+ local net
+ local ns
+
+ for ns in $NS_SRC $NS_DST; do
+ ip netns add $ns
+ ip -n $ns link set dev lo up
+ done
+
+ ip link add name veth$SRC type veth peer name veth$DST
+
+ for ns in $SRC $DST; do
+ ip link set dev veth$ns netns $BASE$ns
+ ip -n $BASE$ns link set dev veth$ns up
+ ip -n $BASE$ns addr add dev veth$ns $BM_NET_V4$ns/24
+ ip -n $BASE$ns addr add dev veth$ns $BM_NET_V6$ns/64 nodad
+ done
+ ip -n $NS_DST link set veth$DST xdp object ../bpf/xdp_dummy.o section xdp_dummy 2>/dev/null
+}
+
+create_vxlan_endpoint() {
+ local -r netns=$1
+ local -r bm_dev=$2
+ local -r bm_rem_addr=$3
+ local -r vxlan_dev=$4
+ local -r vxlan_id=$5
+ local -r vxlan_port=4789
+
+ ip -n $netns link set dev $bm_dev up
+ ip -n $netns link add dev $vxlan_dev type vxlan id $vxlan_id \
+ dstport $vxlan_port remote $bm_rem_addr
+ ip -n $netns link set dev $vxlan_dev up
+}
+
+create_vxlan_pair() {
+ local ns
+
+ create_ns
+
+ for ns in $SRC $DST; do
+ # note that 3 - $SRC == $DST and 3 - $DST == $SRC
+ create_vxlan_endpoint $BASE$ns veth$ns $BM_NET_V4$((3 - $ns)) vxlan$ns 4
+ ip -n $BASE$ns addr add dev vxlan$ns $OL_NET_V4$ns/24
+ done
+ for ns in $SRC $DST; do
+ create_vxlan_endpoint $BASE$ns veth$ns $BM_NET_V6$((3 - $ns)) vxlan6$ns 6
+ ip -n $BASE$ns addr add dev vxlan6$ns $OL_NET_V6$ns/24 nodad
+ done
+}
+
+is_ipv6() {
+ if [[ $1 =~ .*:.* ]]; then
+ return 0
+ fi
+ return 1
+}
+
+run_test() {
+ local -r msg=$1
+ local -r dst=$2
+ local -r pkts=$3
+ local -r vxpkts=$4
+ local bind=$5
+ local rx_args=""
+ local rx_family="-4"
+ local family=-4
+ local filter=IpInReceives
+ local ipt=iptables
+
+ printf "%-40s" "$msg"
+
+ if is_ipv6 $dst; then
+ # rx program does not support '-6' and implies ipv6 usage by default
+ rx_family=""
+ family=-6
+ filter=Ip6InReceives
+ ipt=ip6tables
+ fi
+
+ rx_args="$rx_family"
+ [ -n "$bind" ] && rx_args="$rx_args -b $bind"
+
+ # send a single GSO packet, segmented in 10 UDP frames.
+ # Always expect 10 UDP frames on RX side as rx socket does
+ # not enable GRO
+ ip netns exec $NS_DST $ipt -A INPUT -p udp --dport 4789
+ ip netns exec $NS_DST $ipt -A INPUT -p udp --dport 8000
+ ip netns exec $NS_DST ./udpgso_bench_rx -C 1000 -R 10 -n 10 -l 1300 $rx_args &
+ local spid=$!
+ sleep 0.1
+ ip netns exec $NS_SRC ./udpgso_bench_tx $family -M 1 -s 13000 -S 1300 -D $dst
+ local retc=$?
+ wait $spid
+ local rets=$?
+ if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
+ echo " fail client exit code $retc, server $rets"
+ ret=1
+ return
+ fi
+
+ local rcv=`ip netns exec $NS_DST $ipt"-save" -c | grep 'dport 8000' | \
+ sed -e 's/\[//' -e 's/:.*//'`
+ if [ $rcv != $pkts ]; then
+ echo " fail - received $rvs packets, expected $pkts"
+ ret=1
+ return
+ fi
+
+ local vxrcv=`ip netns exec $NS_DST $ipt"-save" -c | grep 'dport 4789' | \
+ sed -e 's/\[//' -e 's/:.*//'`
+
+ # upper net can generate a little noise, allow some tolerance
+ if [ $vxrcv -lt $vxpkts -o $vxrcv -gt $((vxpkts + 3)) ]; then
+ echo " fail - received $vxrcv vxlan packets, expected $vxpkts"
+ ret=1
+ return
+ fi
+ echo " ok"
+}
+
+run_bench() {
+ local -r msg=$1
+ local -r dst=$2
+ local family=-4
+
+ printf "%-40s" "$msg"
+ if [ $NPROCS -lt 2 ]; then
+ echo " skip - needed 2 CPUs found $NPROCS"
+ return
+ fi
+
+ is_ipv6 $dst && family=-6
+
+ # bind the sender and the receiver to different CPUs to try
+ # get reproducible results
+ ip netns exec $NS_DST bash -c "echo 2 > /sys/class/net/veth$DST/queues/rx-0/rps_cpus"
+ ip netns exec $NS_DST taskset 0x2 ./udpgso_bench_rx -C 1000 -R 10 &
+ local spid=$!
+ sleep 0.1
+ ip netns exec $NS_SRC taskset 0x1 ./udpgso_bench_tx $family -l 3 -S 1300 -D $dst
+ local retc=$?
+ wait $spid
+ local rets=$?
+ if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
+ echo " fail client exit code $retc, server $rets"
+ ret=1
+ return
+ fi
+}
+
+for family in 4 6; do
+ BM_NET=$BM_NET_V4
+ OL_NET=$OL_NET_V4
+ IPT=iptables
+ SUFFIX=24
+ VXDEV=vxlan
+
+ if [ $family = 6 ]; then
+ BM_NET=$BM_NET_V6
+ OL_NET=$OL_NET_V6
+ SUFFIX="64 nodad"
+ VXDEV=vxlan6
+ IPT=ip6tables
+ fi
+
+ echo "IPv$family"
+
+ create_ns
+ run_test "No GRO" $BM_NET$DST 10 0
+ cleanup
+
+ create_ns
+ ip netns exec $NS_DST ethtool -K veth$DST rx-gro-list on
+ run_test "GRO frag list" $BM_NET$DST 1 0
+ cleanup
+
+ # UDP GRO fwd skips aggregation when find an udp socket with the GRO option
+ # if there is an UDP tunnel in the running system, such lookup happen
+ # take place.
+ # use NAT to circumvent GRO FWD check
+ create_ns
+ ip -n $NS_DST addr add dev veth$DST $BM_NET$DST_NAT/$SUFFIX
+ ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on
+ ip netns exec $NS_DST $IPT -t nat -I PREROUTING -d $BM_NET$DST_NAT \
+ -j DNAT --to-destination $BM_NET$DST
+ run_test "GRO fwd" $BM_NET$DST_NAT 1 0 $BM_NET$DST
+ cleanup
+
+ create_ns
+ run_bench "UDP fwd perf" $BM_NET$DST
+ ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on
+ run_bench "UDP GRO fwd perf" $BM_NET$DST
+ cleanup
+
+ create_vxlan_pair
+ ip netns exec $NS_DST ethtool -K veth$DST rx-gro-list on
+ run_test "GRO frag list over UDP tunnel" $OL_NET$DST 1 1
+ cleanup
+
+ # use NAT to circumvent GRO FWD check
+ create_vxlan_pair
+ ip -n $NS_DST addr add dev $VXDEV$DST $OL_NET$DST_NAT/$SUFFIX
+ ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on
+ ip netns exec $NS_DST $IPT -t nat -I PREROUTING -d $OL_NET$DST_NAT \
+ -j DNAT --to-destination $OL_NET$DST
+
+ # load arp cache before running the test to reduce the amount of
+ # stray traffic on top of the UDP tunnel
+ ip netns exec $NS_SRC ping -q -c 1 $OL_NET$DST_NAT >/dev/null
+ run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 1 1 $OL_NET$DST
+ cleanup
+
+ create_vxlan_pair
+ run_bench "UDP tunnel fwd perf" $OL_NET$DST
+ ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on
+ run_bench "UDP tunnel GRO fwd perf" $OL_NET$DST
+ cleanup
+done
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile
index 3006a8e5b41a..3171069a6b46 100644
--- a/tools/testing/selftests/netfilter/Makefile
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -4,7 +4,7 @@
TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \
conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \
nft_concat_range.sh nft_conntrack_helper.sh \
- nft_queue.sh nft_meta.sh \
+ nft_queue.sh nft_meta.sh nf_nat_edemux.sh \
ipip-conntrack-mtu.sh
LDLIBS = -lmnl
diff --git a/tools/testing/selftests/netfilter/nf_nat_edemux.sh b/tools/testing/selftests/netfilter/nf_nat_edemux.sh
new file mode 100755
index 000000000000..cfee3b65be0f
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nf_nat_edemux.sh
@@ -0,0 +1,99 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test NAT source port clash resolution
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+
+sfx=$(mktemp -u "XXXXXXXX")
+ns1="ns1-$sfx"
+ns2="ns2-$sfx"
+
+cleanup()
+{
+ ip netns del $ns1
+ ip netns del $ns2
+}
+
+iperf3 -v > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without iperf3"
+ exit $ksft_skip
+fi
+
+iptables --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without iptables"
+ exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+ip netns add "$ns1"
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not create net namespace $ns1"
+ exit $ksft_skip
+fi
+
+trap cleanup EXIT
+
+ip netns add $ns2
+
+# Connect the namespaces using a veth pair
+ip link add name veth2 type veth peer name veth1
+ip link set netns $ns1 dev veth1
+ip link set netns $ns2 dev veth2
+
+ip netns exec $ns1 ip link set up dev lo
+ip netns exec $ns1 ip link set up dev veth1
+ip netns exec $ns1 ip addr add 192.168.1.1/24 dev veth1
+
+ip netns exec $ns2 ip link set up dev lo
+ip netns exec $ns2 ip link set up dev veth2
+ip netns exec $ns2 ip addr add 192.168.1.2/24 dev veth2
+
+# Create a server in one namespace
+ip netns exec $ns1 iperf3 -s > /dev/null 2>&1 &
+iperfs=$!
+
+# Restrict source port to just one so we don't have to exhaust
+# all others.
+ip netns exec $ns2 sysctl -q net.ipv4.ip_local_port_range="10000 10000"
+
+# add a virtual IP using DNAT
+ip netns exec $ns2 iptables -t nat -A OUTPUT -d 10.96.0.1/32 -p tcp --dport 443 -j DNAT --to-destination 192.168.1.1:5201
+
+# ... and route it to the other namespace
+ip netns exec $ns2 ip route add 10.96.0.1 via 192.168.1.1
+
+sleep 1
+
+# add a persistent connection from the other namespace
+ip netns exec $ns2 nc -q 10 -w 10 192.168.1.1 5201 > /dev/null &
+
+sleep 1
+
+# ip daddr:dport will be rewritten to 192.168.1.1 5201
+# NAT must reallocate source port 10000 because
+# 192.168.1.2:10000 -> 192.168.1.1:5201 is already in use
+echo test | ip netns exec $ns2 nc -w 3 -q 3 10.96.0.1 443 >/dev/null
+ret=$?
+
+kill $iperfs
+
+# Check nc can connect to 10.96.0.1:443 (aka 192.168.1.1:5201).
+if [ $ret -eq 0 ]; then
+ echo "PASS: nc can connect via NAT'd address"
+else
+ echo "FAIL: nc cannot connect via NAT'd address"
+ exit 1
+fi
+
+exit 0
diff --git a/tools/testing/selftests/netfilter/nft_flowtable.sh b/tools/testing/selftests/netfilter/nft_flowtable.sh
index 431296c0f91c..427d94816f2d 100755
--- a/tools/testing/selftests/netfilter/nft_flowtable.sh
+++ b/tools/testing/selftests/netfilter/nft_flowtable.sh
@@ -371,6 +371,88 @@ else
ip netns exec nsr1 nft list ruleset
fi
+# Another test:
+# Add bridge interface br0 to Router1, with NAT enabled.
+ip -net nsr1 link add name br0 type bridge
+ip -net nsr1 addr flush dev veth0
+ip -net nsr1 link set up dev veth0
+ip -net nsr1 link set veth0 master br0
+ip -net nsr1 addr add 10.0.1.1/24 dev br0
+ip -net nsr1 addr add dead:1::1/64 dev br0
+ip -net nsr1 link set up dev br0
+
+ip netns exec nsr1 sysctl net.ipv4.conf.br0.forwarding=1 > /dev/null
+
+# br0 with NAT enabled.
+ip netns exec nsr1 nft -f - <<EOF
+flush table ip nat
+table ip nat {
+ chain prerouting {
+ type nat hook prerouting priority 0; policy accept;
+ meta iif "br0" ip daddr 10.6.6.6 tcp dport 1666 counter dnat ip to 10.0.2.99:12345
+ }
+
+ chain postrouting {
+ type nat hook postrouting priority 0; policy accept;
+ meta oifname "veth1" counter masquerade
+ }
+}
+EOF
+
+if test_tcp_forwarding_nat ns1 ns2; then
+ echo "PASS: flow offloaded for ns1/ns2 with bridge NAT"
+else
+ echo "FAIL: flow offload for ns1/ns2 with bridge NAT" 1>&2
+ ip netns exec nsr1 nft list ruleset
+ ret=1
+fi
+
+# Another test:
+# Add bridge interface br0 to Router1, with NAT and VLAN.
+ip -net nsr1 link set veth0 nomaster
+ip -net nsr1 link set down dev veth0
+ip -net nsr1 link add link veth0 name veth0.10 type vlan id 10
+ip -net nsr1 link set up dev veth0
+ip -net nsr1 link set up dev veth0.10
+ip -net nsr1 link set veth0.10 master br0
+
+ip -net ns1 addr flush dev eth0
+ip -net ns1 link add link eth0 name eth0.10 type vlan id 10
+ip -net ns1 link set eth0 up
+ip -net ns1 link set eth0.10 up
+ip -net ns1 addr add 10.0.1.99/24 dev eth0.10
+ip -net ns1 route add default via 10.0.1.1
+ip -net ns1 addr add dead:1::99/64 dev eth0.10
+
+if test_tcp_forwarding_nat ns1 ns2; then
+ echo "PASS: flow offloaded for ns1/ns2 with bridge NAT and VLAN"
+else
+ echo "FAIL: flow offload for ns1/ns2 with bridge NAT and VLAN" 1>&2
+ ip netns exec nsr1 nft list ruleset
+ ret=1
+fi
+
+# restore test topology (remove bridge and VLAN)
+ip -net nsr1 link set veth0 nomaster
+ip -net nsr1 link set veth0 down
+ip -net nsr1 link set veth0.10 down
+ip -net nsr1 link delete veth0.10 type vlan
+ip -net nsr1 link delete br0 type bridge
+ip -net ns1 addr flush dev eth0.10
+ip -net ns1 link set eth0.10 down
+ip -net ns1 link set eth0 down
+ip -net ns1 link delete eth0.10 type vlan
+
+# restore address in ns1 and nsr1
+ip -net ns1 link set eth0 up
+ip -net ns1 addr add 10.0.1.99/24 dev eth0
+ip -net ns1 route add default via 10.0.1.1
+ip -net ns1 addr add dead:1::99/64 dev eth0
+ip -net ns1 route add default via dead:1::1
+ip -net nsr1 addr add 10.0.1.1/24 dev veth0
+ip -net nsr1 addr add dead:1::1/64 dev veth0
+ip -net nsr1 link set up dev veth0
+
KEY_SHA="0x"$(ps -xaf | sha1sum | cut -d " " -f 1)
KEY_AES="0x"$(ps -xaf | md5sum | cut -d " " -f 1)
SPI1=$RANDOM
diff --git a/tools/testing/selftests/powerpc/eeh/eeh-basic.sh b/tools/testing/selftests/powerpc/eeh/eeh-basic.sh
index 0d783e1065c8..442b666ccdb5 100755
--- a/tools/testing/selftests/powerpc/eeh/eeh-basic.sh
+++ b/tools/testing/selftests/powerpc/eeh/eeh-basic.sh
@@ -1,28 +1,13 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0-only
-KSELFTESTS_SKIP=4
-
. ./eeh-functions.sh
-if ! eeh_supported ; then
- echo "EEH not supported on this system, skipping"
- exit $KSELFTESTS_SKIP;
-fi
-
-if [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_check" ] && \
- [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_break" ] ; then
- echo "debugfs EEH testing files are missing. Is debugfs mounted?"
- exit $KSELFTESTS_SKIP;
-fi
+eeh_test_prep # NB: may exit
pre_lspci=`mktemp`
lspci > $pre_lspci
-# Bump the max freeze count to something absurd so we don't
-# trip over it while breaking things.
-echo 5000 > /sys/kernel/debug/powerpc/eeh_max_freezes
-
# record the devices that we break in here. Assuming everything
# goes to plan we should get them back once the recover process
# is finished.
@@ -30,34 +15,16 @@ devices=""
# Build up a list of candidate devices.
for dev in `ls -1 /sys/bus/pci/devices/ | grep '\.0$'` ; do
- # skip bridges since we can't recover them (yet...)
- if [ -e "/sys/bus/pci/devices/$dev/pci_bus" ] ; then
- echo "$dev, Skipped: bridge"
+ if ! eeh_can_break $dev ; then
continue;
fi
- # Skip VFs for now since we don't have a reliable way
- # to break them.
+ # Skip VFs for now since we don't have a reliable way to break them.
if [ -e "/sys/bus/pci/devices/$dev/physfn" ] ; then
echo "$dev, Skipped: virtfn"
continue;
fi
- if [ "ahci" = "$(basename $(realpath /sys/bus/pci/devices/$dev/driver))" ] ; then
- echo "$dev, Skipped: ahci doesn't support recovery"
- continue
- fi
-
- # Don't inject errosr into an already-frozen PE. This happens with
- # PEs that contain multiple PCI devices (e.g. multi-function cards)
- # and injecting new errors during the recovery process will probably
- # result in the recovery failing and the device being marked as
- # failed.
- if ! pe_ok $dev ; then
- echo "$dev, Skipped: Bad initial PE state"
- continue;
- fi
-
echo "$dev, Added"
# Add to this list of device to check
@@ -86,5 +53,5 @@ echo "$failed devices failed to recover ($dev_count tested)"
lspci | diff -u $pre_lspci -
rm -f $pre_lspci
-test "$failed" == 0
+test "$failed" -eq 0
exit $?
diff --git a/tools/testing/selftests/powerpc/eeh/eeh-functions.sh b/tools/testing/selftests/powerpc/eeh/eeh-functions.sh
index 00dc32c0ed75..70daa3925dcb 100755..100644
--- a/tools/testing/selftests/powerpc/eeh/eeh-functions.sh
+++ b/tools/testing/selftests/powerpc/eeh/eeh-functions.sh
@@ -1,6 +1,12 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0-only
+export KSELFTESTS_SKIP=4
+
+log() {
+ echo >/dev/stderr $*
+}
+
pe_ok() {
local dev="$1"
local path="/sys/bus/pci/devices/$dev/eeh_pe_state"
@@ -39,6 +45,52 @@ eeh_supported() {
grep -q 'EEH Subsystem is enabled' /proc/powerpc/eeh
}
+eeh_test_prep() {
+ if ! eeh_supported ; then
+ echo "EEH not supported on this system, skipping"
+ exit $KSELFTESTS_SKIP;
+ fi
+
+ if [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_check" ] && \
+ [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_break" ] ; then
+ log "debugfs EEH testing files are missing. Is debugfs mounted?"
+ exit $KSELFTESTS_SKIP;
+ fi
+
+ # Bump the max freeze count to something absurd so we don't
+ # trip over it while breaking things.
+ echo 5000 > /sys/kernel/debug/powerpc/eeh_max_freezes
+}
+
+eeh_can_break() {
+ # skip bridges since we can't recover them (yet...)
+ if [ -e "/sys/bus/pci/devices/$dev/pci_bus" ] ; then
+ log "$dev, Skipped: bridge"
+ return 1;
+ fi
+
+ # The ahci driver doesn't support error recovery. If the ahci device
+ # happens to be hosting the root filesystem, and then we go and break
+ # it the system will generally go down. We should probably fix that
+ # at some point
+ if [ "ahci" = "$(basename $(realpath /sys/bus/pci/devices/$dev/driver))" ] ; then
+ log "$dev, Skipped: ahci doesn't support recovery"
+ return 1;
+ fi
+
+ # Don't inject errosr into an already-frozen PE. This happens with
+ # PEs that contain multiple PCI devices (e.g. multi-function cards)
+ # and injecting new errors during the recovery process will probably
+ # result in the recovery failing and the device being marked as
+ # failed.
+ if ! pe_ok $dev ; then
+ log "$dev, Skipped: Bad initial PE state"
+ return 1;
+ fi
+
+ return 0
+}
+
eeh_one_dev() {
local dev="$1"
@@ -46,7 +98,7 @@ eeh_one_dev() {
# testing so check that the argument is a well-formed sysfs device
# name.
if ! test -e /sys/bus/pci/devices/$dev/ ; then
- echo "Error: '$dev' must be a sysfs device name (DDDD:BB:DD.F)"
+ log "Error: '$dev' must be a sysfs device name (DDDD:BB:DD.F)"
return 1;
fi
@@ -70,16 +122,124 @@ eeh_one_dev() {
if pe_ok $dev ; then
break;
fi
- echo "$dev, waited $i/${max_wait}"
+ log "$dev, waited $i/${max_wait}"
sleep 1
done
if ! pe_ok $dev ; then
- echo "$dev, Failed to recover!"
+ log "$dev, Failed to recover!"
return 1;
fi
- echo "$dev, Recovered after $i seconds"
+ log "$dev, Recovered after $i seconds"
return 0;
}
+eeh_has_driver() {
+ test -e /sys/bus/pci/devices/$1/driver;
+ return $?
+}
+
+eeh_can_recover() {
+ # we'll get an IO error if the device's current driver doesn't support
+ # error recovery
+ echo $1 > '/sys/kernel/debug/powerpc/eeh_dev_can_recover' 2>/dev/null
+
+ return $?
+}
+
+eeh_find_all_pfs() {
+ devices=""
+
+ # SR-IOV on pseries requires hypervisor support, so check for that
+ is_pseries=""
+ if grep -q pSeries /proc/cpuinfo ; then
+ if [ ! -f /proc/device-tree/rtas/ibm,open-sriov-allow-unfreeze ] ||
+ [ ! -f /proc/device-tree/rtas/ibm,open-sriov-map-pe-number ] ; then
+ return 1;
+ fi
+
+ is_pseries="true"
+ fi
+
+ for dev in `ls -1 /sys/bus/pci/devices/` ; do
+ sysfs="/sys/bus/pci/devices/$dev"
+ if [ ! -e "$sysfs/sriov_numvfs" ] ; then
+ continue
+ fi
+
+ # skip unsupported PFs on pseries
+ if [ -z "$is_pseries" ] &&
+ [ ! -f "$sysfs/of_node/ibm,is-open-sriov-pf" ] &&
+ [ ! -f "$sysfs/of_node/ibm,open-sriov-vf-bar-info" ] ; then
+ continue;
+ fi
+
+ # no driver, no vfs
+ if ! eeh_has_driver $dev ; then
+ continue
+ fi
+
+ devices="$devices $dev"
+ done
+
+ if [ -z "$devices" ] ; then
+ return 1;
+ fi
+
+ echo $devices
+ return 0;
+}
+
+# attempts to enable one VF on each PF so we can do VF specific tests.
+# stdout: list of enabled VFs, one per line
+# return code: 0 if vfs are found, 1 otherwise
+eeh_enable_vfs() {
+ pf_list="$(eeh_find_all_pfs)"
+
+ vfs=0
+ for dev in $pf_list ; do
+ pf_sysfs="/sys/bus/pci/devices/$dev"
+
+ # make sure we have a single VF
+ echo 0 > "$pf_sysfs/sriov_numvfs"
+ echo 1 > "$pf_sysfs/sriov_numvfs"
+ if [ "$?" != 0 ] ; then
+ log "Unable to enable VFs on $pf, skipping"
+ continue;
+ fi
+
+ vf="$(basename $(realpath "$pf_sysfs/virtfn0"))"
+ if [ $? != 0 ] ; then
+ log "unable to find enabled vf on $pf"
+ echo 0 > "$pf_sysfs/sriov_numvfs"
+ continue;
+ fi
+
+ if ! eeh_can_break $vf ; then
+ log "skipping "
+
+ echo 0 > "$pf_sysfs/sriov_numvfs"
+ continue;
+ fi
+
+ vfs="$((vfs + 1))"
+ echo $vf
+ done
+
+ test "$vfs" != 0
+ return $?
+}
+
+eeh_disable_vfs() {
+ pf_list="$(eeh_find_all_pfs)"
+ if [ -z "$pf_list" ] ; then
+ return 1;
+ fi
+
+ for dev in $pf_list ; do
+ echo 0 > "/sys/bus/pci/devices/$dev/sriov_numvfs"
+ done
+
+ return 0;
+}
diff --git a/tools/testing/selftests/powerpc/eeh/eeh-vf-aware.sh b/tools/testing/selftests/powerpc/eeh/eeh-vf-aware.sh
new file mode 100755
index 000000000000..874c11953bb6
--- /dev/null
+++ b/tools/testing/selftests/powerpc/eeh/eeh-vf-aware.sh
@@ -0,0 +1,45 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-only
+
+. ./eeh-functions.sh
+
+eeh_test_prep # NB: may exit
+
+vf_list="$(eeh_enable_vfs)";
+if $? != 0 ; then
+ log "No usable VFs found. Skipping EEH unaware VF test"
+ exit $KSELFTESTS_SKIP;
+fi
+
+log "Enabled VFs: $vf_list"
+
+tested=0
+passed=0
+for vf in $vf_list ; do
+ log "Testing $vf"
+
+ if ! eeh_can_recover $vf ; then
+ log "Driver for $vf doesn't support error recovery, skipping"
+ continue;
+ fi
+
+ tested="$((tested + 1))"
+
+ log "Breaking $vf..."
+ if ! eeh_one_dev $vf ; then
+ log "$vf failed to recover"
+ continue;
+ fi
+
+ passed="$((passed + 1))"
+done
+
+eeh_disable_vfs
+
+if [ "$tested" == 0 ] ; then
+ echo "No VFs with EEH aware drivers found, skipping"
+ exit $KSELFTESTS_SKIP
+fi
+
+test "$failed" != 0
+exit $?;
diff --git a/tools/testing/selftests/powerpc/eeh/eeh-vf-unaware.sh b/tools/testing/selftests/powerpc/eeh/eeh-vf-unaware.sh
new file mode 100755
index 000000000000..8a4c147b9d43
--- /dev/null
+++ b/tools/testing/selftests/powerpc/eeh/eeh-vf-unaware.sh
@@ -0,0 +1,35 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-only
+
+. ./eeh-functions.sh
+
+eeh_test_prep # NB: may exit
+
+vf_list="$(eeh_enable_vfs)";
+if $? != 0 ; then
+ log "No usable VFs found. Skipping EEH unaware VF test"
+ exit $KSELFTESTS_SKIP;
+fi
+
+log "Enabled VFs: $vf_list"
+
+failed=0
+for vf in $vf_list ; do
+ log "Testing $vf"
+
+ if eeh_can_recover $vf ; then
+ log "Driver for $vf supports error recovery. Unbinding..."
+ echo "$vf" > /sys/bus/pci/devices/$vf/driver/unbind
+ fi
+
+ log "Breaking $vf..."
+ if ! eeh_one_dev $vf ; then
+ log "$vf failed to recover"
+ failed="$((failed + 1))"
+ fi
+done
+
+eeh_disable_vfs
+
+test "$failed" != 0
+exit $?;
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 26c72f2b61b1..98c3b647f54d 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -315,7 +315,7 @@ TEST(kcmp)
ret = __filecmp(getpid(), getpid(), 1, 1);
EXPECT_EQ(ret, 0);
if (ret != 0 && errno == ENOSYS)
- SKIP(return, "Kernel does not support kcmp() (missing CONFIG_CHECKPOINT_RESTORE?)");
+ SKIP(return, "Kernel does not support kcmp() (missing CONFIG_KCMP?)");
}
TEST(mode_strict_support)
@@ -4019,18 +4019,14 @@ TEST(user_notification_addfd)
/* Verify we can set an arbitrary remote fd */
fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
- /*
- * The child has fds 0(stdin), 1(stdout), 2(stderr), 3(memfd),
- * 4(listener), so the newly allocated fd should be 5.
- */
- EXPECT_EQ(fd, 5);
+ EXPECT_GE(fd, 0);
EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0);
/* Verify we can set an arbitrary remote fd with large size */
memset(&big, 0x0, sizeof(big));
big.addfd = addfd;
fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big);
- EXPECT_EQ(fd, 6);
+ EXPECT_GE(fd, 0);
/* Verify we can set a specific remote fd */
addfd.newfd = 42;
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json
index b8268da5adaa..8e45792703ed 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json
@@ -764,5 +764,53 @@
"teardown": [
"$TC actions flush action police"
]
+ },
+ {
+ "id": "cdd7",
+ "name": "Add valid police action with packets per second rate limit",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police pkts_rate 1000 pkts_burst 200 index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police 0x1 rate 0bit burst 0b mtu 4096Mb pkts_rate 1000 pkts_burst 200",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "f5bc",
+ "name": "Add invalid police action with both bps and pps",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 1kbit burst 10k pkts_rate 1000 pkts_burst 200 index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police 0x1 ",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json b/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json
index 8e8c1ae12260..e0c5f060ccb9 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json
@@ -24,6 +24,30 @@
]
},
{
+ "id": "4297",
+ "name": "Add simple action with change command",
+ "category": [
+ "actions",
+ "simple"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action simple",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions change action simple sdata \"Not changed\" index 60",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action simple",
+ "matchPattern": "action order [0-9]*: Simple <Not changed>.*index 60 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action simple"
+ ]
+ },
+ {
"id": "6d4c",
"name": "Add simple action with duplicate index",
"category": [
@@ -151,5 +175,64 @@
"teardown": [
"$TC actions flush action simple"
]
+ },
+ {
+ "id": "8d07",
+ "name": "Verify cleanup of failed actions batch add",
+ "category": [
+ "actions",
+ "simple"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action simple",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action simple sdata \"2\" index 2",
+ [
+ "$TC actions add action simple sdata \"1\" index 1 action simple sdata \"2\" index 2",
+ 255
+ ],
+ "$TC actions flush action simple"
+ ],
+ "cmdUnderTest": "$TC actions add action simple sdata \"2\" index 2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action simple",
+ "matchPattern": "action order [0-9]*: Simple <2>.*index 2 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action simple"
+ ]
+ },
+ {
+ "id": "a68a",
+ "name": "Verify cleanup of failed actions batch change",
+ "category": [
+ "actions",
+ "simple"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action simple",
+ 0,
+ 1,
+ 255
+ ],
+ [
+ "$TC actions change action simple sdata \"1\" index 1 action simple sdata \"2\" goto chain 42 index 2",
+ 255
+ ],
+ "$TC actions flush action simple"
+ ],
+ "cmdUnderTest": "$TC actions add action simple sdata \"1\" index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action simple",
+ "matchPattern": "action order [0-9]*: Simple <1>.*index 1 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action simple"
+ ]
}
]
diff --git a/tools/testing/selftests/timens/.gitignore b/tools/testing/selftests/timens/.gitignore
index 2e43851b47c1..fe1eb8271b35 100644
--- a/tools/testing/selftests/timens/.gitignore
+++ b/tools/testing/selftests/timens/.gitignore
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
clock_nanosleep
exec
+futex
gettime_perf
gettime_perf_cold
procfs
diff --git a/tools/testing/selftests/vDSO/vdso_config.h b/tools/testing/selftests/vDSO/vdso_config.h
index 6a6fe8d4ff55..6188b16827d1 100644
--- a/tools/testing/selftests/vDSO/vdso_config.h
+++ b/tools/testing/selftests/vDSO/vdso_config.h
@@ -47,10 +47,12 @@
#elif defined(__x86_64__)
#define VDSO_VERSION 0
#define VDSO_NAMES 1
-#elif defined(__riscv__)
+#elif defined(__riscv__) || defined(__riscv)
#define VDSO_VERSION 5
#define VDSO_NAMES 1
+#if __riscv_xlen == 32
#define VDSO_32BIT 1
+#endif
#else /* nds32 */
#define VDSO_VERSION 4
#define VDSO_NAMES 1
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index d42115e4284d..8b0cd421ebd3 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -101,7 +101,7 @@ endef
ifeq ($(CAN_BUILD_I386),1)
$(BINARIES_32): CFLAGS += -m32
$(BINARIES_32): LDLIBS += -lrt -ldl -lm
-$(BINARIES_32): %_32: %.c
+$(BINARIES_32): $(OUTPUT)/%_32: %.c
$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@
$(foreach t,$(TARGETS),$(eval $(call gen-target-rule-32,$(t))))
endif
@@ -109,7 +109,7 @@ endif
ifeq ($(CAN_BUILD_X86_64),1)
$(BINARIES_64): CFLAGS += -m64
$(BINARIES_64): LDLIBS += -lrt -ldl
-$(BINARIES_64): %_64: %.c
+$(BINARIES_64): $(OUTPUT)/%_64: %.c
$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@
$(foreach t,$(TARGETS),$(eval $(call gen-target-rule-64,$(t))))
endif
diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh
index 74c69b75f6f5..7ed7cd95e58f 100755
--- a/tools/testing/selftests/wireguard/netns.sh
+++ b/tools/testing/selftests/wireguard/netns.sh
@@ -39,7 +39,7 @@ ip0() { pretty 0 "ip $*"; ip -n $netns0 "$@"; }
ip1() { pretty 1 "ip $*"; ip -n $netns1 "$@"; }
ip2() { pretty 2 "ip $*"; ip -n $netns2 "$@"; }
sleep() { read -t "$1" -N 1 || true; }
-waitiperf() { pretty "${1//*-}" "wait for iperf:5201 pid $2"; while [[ $(ss -N "$1" -tlpH 'sport = 5201') != *\"iperf3\",pid=$2,fd=* ]]; do sleep 0.1; done; }
+waitiperf() { pretty "${1//*-}" "wait for iperf:${3:-5201} pid $2"; while [[ $(ss -N "$1" -tlpH "sport = ${3:-5201}") != *\"iperf3\",pid=$2,fd=* ]]; do sleep 0.1; done; }
waitncatudp() { pretty "${1//*-}" "wait for udp:1111 pid $2"; while [[ $(ss -N "$1" -ulpH 'sport = 1111') != *\"ncat\",pid=$2,fd=* ]]; do sleep 0.1; done; }
waitiface() { pretty "${1//*-}" "wait for $2 to come up"; ip netns exec "$1" bash -c "while [[ \$(< \"/sys/class/net/$2/operstate\") != up ]]; do read -t .1 -N 0 || true; done;"; }
@@ -141,6 +141,19 @@ tests() {
n2 iperf3 -s -1 -B fd00::2 &
waitiperf $netns2 $!
n1 iperf3 -Z -t 3 -b 0 -u -c fd00::2
+
+ # TCP over IPv4, in parallel
+ for max in 4 5 50; do
+ local pids=( )
+ for ((i=0; i < max; ++i)) do
+ n2 iperf3 -p $(( 5200 + i )) -s -1 -B 192.168.241.2 &
+ pids+=( $! ); waitiperf $netns2 $! $(( 5200 + i ))
+ done
+ for ((i=0; i < max; ++i)) do
+ n1 iperf3 -Z -t 3 -p $(( 5200 + i )) -c 192.168.241.2 &
+ done
+ wait "${pids[@]}"
+ done
}
[[ $(ip1 link show dev wg0) =~ mtu\ ([0-9]+) ]] && orig_mtu="${BASH_REMATCH[1]}"
diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c
index 1aef72df20a1..3a29346e1452 100644
--- a/tools/testing/selftests/x86/ldt_gdt.c
+++ b/tools/testing/selftests/x86/ldt_gdt.c
@@ -607,7 +607,7 @@ static void do_multicpu_tests(void)
failures++;
asm volatile ("mov %0, %%ss" : : "rm" (orig_ss));
- };
+ }
ftx = 100; /* Kill the thread. */
syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
diff --git a/tools/tracing/Makefile b/tools/tracing/Makefile
new file mode 100644
index 000000000000..87e0ec48e2e7
--- /dev/null
+++ b/tools/tracing/Makefile
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0
+include ../scripts/Makefile.include
+
+all: latency
+
+clean: latency_clean
+
+install: latency_install
+
+latency:
+ $(call descend,latency)
+
+latency_install:
+ $(call descend,latency,install)
+
+latency_clean:
+ $(call descend,latency,clean)
+
+.PHONY: all install clean latency latency_install latency_clean
diff --git a/tools/tracing/latency/.gitignore b/tools/tracing/latency/.gitignore
new file mode 100644
index 000000000000..0863960761e7
--- /dev/null
+++ b/tools/tracing/latency/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+latency-collector
diff --git a/tools/tracing/latency/Makefile b/tools/tracing/latency/Makefile
new file mode 100644
index 000000000000..40c4ddaf8be1
--- /dev/null
+++ b/tools/tracing/latency/Makefile
@@ -0,0 +1,24 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for vm tools
+#
+VAR_CFLAGS := $(shell pkg-config --cflags libtracefs 2>/dev/null)
+VAR_LDLIBS := $(shell pkg-config --libs libtracefs 2>/dev/null)
+
+TARGETS = latency-collector
+CFLAGS = -Wall -Wextra -g -O2 $(VAR_CFLAGS)
+LDFLAGS = -lpthread $(VAR_LDLIBS)
+
+all: $(TARGETS)
+
+%: %.c
+ $(CC) $(CFLAGS) -o $@ $< $(LDFLAGS)
+
+clean:
+ $(RM) latency-collector
+
+prefix ?= /usr/local
+sbindir ?= ${prefix}/sbin
+
+install: all
+ install -d $(DESTDIR)$(sbindir)
+ install -m 755 -p $(TARGETS) $(DESTDIR)$(sbindir)
diff --git a/tools/tracing/latency/latency-collector.c b/tools/tracing/latency/latency-collector.c
new file mode 100644
index 000000000000..b69de9263ee6
--- /dev/null
+++ b/tools/tracing/latency/latency-collector.c
@@ -0,0 +1,2108 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2017, 2018, 2019, 2021 BMW Car IT GmbH
+ * Author: Viktor Rosendahl (viktor.rosendahl@bmw.de)
+ */
+
+#define _GNU_SOURCE
+#define _POSIX_C_SOURCE 200809L
+
+#include <ctype.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <sched.h>
+#include <linux/unistd.h>
+#include <signal.h>
+#include <sys/inotify.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <tracefs.h>
+
+static const char *prg_name;
+static const char *prg_unknown = "unknown program name";
+
+static int fd_stdout;
+
+static int sched_policy;
+static bool sched_policy_set;
+
+static int sched_pri;
+static bool sched_pri_set;
+
+static bool trace_enable = true;
+static bool setup_ftrace = true;
+static bool use_random_sleep;
+
+#define TRACE_OPTS \
+ C(FUNC_TR, "function-trace"), \
+ C(DISP_GR, "display-graph"), \
+ C(NR, NULL)
+
+#undef C
+#define C(a, b) OPTIDX_##a
+
+enum traceopt {
+ TRACE_OPTS
+};
+
+#undef C
+#define C(a, b) b
+
+static const char *const optstr[] = {
+ TRACE_OPTS
+};
+
+enum errhandling {
+ ERR_EXIT = 0,
+ ERR_WARN,
+ ERR_CLEANUP,
+};
+
+static bool use_options[OPTIDX_NR];
+
+static char inotify_buffer[655360];
+
+#define likely(x) __builtin_expect(!!(x), 1)
+#define unlikely(x) __builtin_expect(!!(x), 0)
+#define bool2str(x) (x ? "true":"false")
+
+#define DEFAULT_NR_PRINTER_THREADS (3)
+static unsigned int nr_threads = DEFAULT_NR_PRINTER_THREADS;
+
+#define DEFAULT_TABLE_SIZE (2)
+static unsigned int table_startsize = DEFAULT_TABLE_SIZE;
+
+static int verbosity;
+
+#define verbose_sizechange() (verbosity >= 1)
+#define verbose_lostevent() (verbosity >= 2)
+#define verbose_ftrace() (verbosity >= 1)
+
+#define was_changed(ORIG, CUR) (strcmp(ORIG, CUR) != 0)
+#define needs_change(CUR, WANTED) (strcmp(CUR, WANTED) != 0)
+
+static const char *debug_tracefile;
+static const char *debug_tracefile_dflt;
+static const char *debug_maxlat;
+static const char *debug_maxlat_dflt;
+static const char * const DEBUG_NOFILE = "[file not found]";
+
+static const char * const TR_MAXLAT = "tracing_max_latency";
+static const char * const TR_THRESH = "tracing_thresh";
+static const char * const TR_CURRENT = "current_tracer";
+static const char * const TR_OPTIONS = "trace_options";
+
+static const char * const NOP_TRACER = "nop";
+
+static const char * const OPT_NO_PREFIX = "no";
+
+#define DFLT_THRESHOLD_US "0"
+static const char *threshold = DFLT_THRESHOLD_US;
+
+#define DEV_URANDOM "/dev/urandom"
+#define RT_DEFAULT_PRI (99)
+#define DEFAULT_PRI (0)
+
+#define USEC_PER_MSEC (1000L)
+#define NSEC_PER_USEC (1000L)
+#define NSEC_PER_MSEC (USEC_PER_MSEC * NSEC_PER_USEC)
+
+#define MSEC_PER_SEC (1000L)
+#define USEC_PER_SEC (USEC_PER_MSEC * MSEC_PER_SEC)
+#define NSEC_PER_SEC (NSEC_PER_MSEC * MSEC_PER_SEC)
+
+#define SLEEP_TIME_MS_DEFAULT (1000L)
+#define TRY_PRINTMUTEX_MS (1000)
+
+static long sleep_time = (USEC_PER_MSEC * SLEEP_TIME_MS_DEFAULT);
+
+static const char * const queue_full_warning =
+"Could not queue trace for printing. It is likely that events happen faster\n"
+"than what they can be printed. Probably partly because of random sleeping\n";
+
+static const char * const no_tracer_msg =
+"Could not find any tracers! Running this program as root may help!\n";
+
+static const char * const no_latency_tr_msg =
+"No latency tracers are supported by your kernel!\n";
+
+struct policy {
+ const char *name;
+ int policy;
+ int default_pri;
+};
+
+static const struct policy policies[] = {
+ { "other", SCHED_OTHER, DEFAULT_PRI },
+ { "batch", SCHED_BATCH, DEFAULT_PRI },
+ { "idle", SCHED_IDLE, DEFAULT_PRI },
+ { "rr", SCHED_RR, RT_DEFAULT_PRI },
+ { "fifo", SCHED_FIFO, RT_DEFAULT_PRI },
+ { NULL, 0, DEFAULT_PRI }
+};
+
+/*
+ * The default tracer will be the first on this list that is supported by the
+ * currently running Linux kernel.
+ */
+static const char * const relevant_tracers[] = {
+ "preemptirqsoff",
+ "preemptoff",
+ "irqsoff",
+ "wakeup",
+ "wakeup_rt",
+ "wakeup_dl",
+ NULL
+};
+
+/* This is the list of tracers for which random sleep makes sense */
+static const char * const random_tracers[] = {
+ "preemptirqsoff",
+ "preemptoff",
+ "irqsoff",
+ NULL
+};
+
+static const char *current_tracer;
+static bool force_tracer;
+
+struct ftrace_state {
+ char *tracer;
+ char *thresh;
+ bool opt[OPTIDX_NR];
+ bool opt_valid[OPTIDX_NR];
+ pthread_mutex_t mutex;
+};
+
+struct entry {
+ int ticket;
+ int ticket_completed_ref;
+};
+
+struct print_state {
+ int ticket_counter;
+ int ticket_completed;
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+ int cnt;
+ pthread_mutex_t cnt_mutex;
+};
+
+struct short_msg {
+ char buf[160];
+ int len;
+};
+
+static struct print_state printstate;
+static struct ftrace_state save_state;
+volatile sig_atomic_t signal_flag;
+
+#define PROB_TABLE_MAX_SIZE (1000)
+
+int probabilities[PROB_TABLE_MAX_SIZE];
+
+struct sleep_table {
+ int *table;
+ int size;
+ pthread_mutex_t mutex;
+};
+
+static struct sleep_table sleeptable;
+
+#define QUEUE_SIZE (10)
+
+struct queue {
+ struct entry entries[QUEUE_SIZE];
+ int next_prod_idx;
+ int next_cons_idx;
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+};
+
+#define MAX_THREADS (40)
+
+struct queue printqueue;
+pthread_t printthread[MAX_THREADS];
+pthread_mutex_t print_mtx;
+#define PRINT_BUFFER_SIZE (16 * 1024 * 1024)
+
+static void cleanup_exit(int status);
+static int set_trace_opt(const char *opt, bool value);
+
+static __always_inline void *malloc_or_die(size_t size)
+{
+ void *ptr = malloc(size);
+
+ if (unlikely(ptr == NULL)) {
+ warn("malloc() failed");
+ cleanup_exit(EXIT_FAILURE);
+ }
+ return ptr;
+}
+
+static __always_inline void *malloc_or_die_nocleanup(size_t size)
+{
+ void *ptr = malloc(size);
+
+ if (unlikely(ptr == NULL))
+ err(0, "malloc() failed");
+ return ptr;
+}
+
+static __always_inline void write_or_die(int fd, const char *buf, size_t count)
+{
+ ssize_t r;
+
+ do {
+ r = write(fd, buf, count);
+ if (unlikely(r < 0)) {
+ if (errno == EINTR)
+ continue;
+ warn("write() failed");
+ cleanup_exit(EXIT_FAILURE);
+ }
+ count -= r;
+ buf += r;
+ } while (count > 0);
+}
+
+static __always_inline void clock_gettime_or_die(clockid_t clk_id,
+ struct timespec *tp)
+{
+ int r = clock_gettime(clk_id, tp);
+
+ if (unlikely(r != 0))
+ err(EXIT_FAILURE, "clock_gettime() failed");
+}
+
+static __always_inline void sigemptyset_or_die(sigset_t *s)
+{
+ if (unlikely(sigemptyset(s) != 0)) {
+ warn("sigemptyset() failed");
+ cleanup_exit(EXIT_FAILURE);
+ }
+}
+
+static __always_inline void sigaddset_or_die(sigset_t *s, int signum)
+{
+ if (unlikely(sigaddset(s, signum) != 0)) {
+ warn("sigemptyset() failed");
+ cleanup_exit(EXIT_FAILURE);
+ }
+}
+
+static __always_inline void sigaction_or_die(int signum,
+ const struct sigaction *act,
+ struct sigaction *oldact)
+{
+ if (unlikely(sigaction(signum, act, oldact) != 0)) {
+ warn("sigaction() failed");
+ cleanup_exit(EXIT_FAILURE);
+ }
+}
+
+static void open_stdout(void)
+{
+ if (setvbuf(stdout, NULL, _IONBF, 0) != 0)
+ err(EXIT_FAILURE, "setvbuf() failed");
+ fd_stdout = fileno(stdout);
+ if (fd_stdout < 0)
+ err(EXIT_FAILURE, "fileno() failed");
+}
+
+/*
+ * It's not worth it to call cleanup_exit() from mutex functions because
+ * cleanup_exit() uses mutexes.
+ */
+static __always_inline void mutex_lock(pthread_mutex_t *mtx)
+{
+ errno = pthread_mutex_lock(mtx);
+ if (unlikely(errno))
+ err(EXIT_FAILURE, "pthread_mutex_lock() failed");
+}
+
+
+static __always_inline void mutex_unlock(pthread_mutex_t *mtx)
+{
+ errno = pthread_mutex_unlock(mtx);
+ if (unlikely(errno))
+ err(EXIT_FAILURE, "pthread_mutex_unlock() failed");
+}
+
+static __always_inline void cond_signal(pthread_cond_t *cond)
+{
+ errno = pthread_cond_signal(cond);
+ if (unlikely(errno))
+ err(EXIT_FAILURE, "pthread_cond_signal() failed");
+}
+
+static __always_inline void cond_wait(pthread_cond_t *restrict cond,
+ pthread_mutex_t *restrict mutex)
+{
+ errno = pthread_cond_wait(cond, mutex);
+ if (unlikely(errno))
+ err(EXIT_FAILURE, "pthread_cond_wait() failed");
+}
+
+static __always_inline void cond_broadcast(pthread_cond_t *cond)
+{
+ errno = pthread_cond_broadcast(cond);
+ if (unlikely(errno))
+ err(EXIT_FAILURE, "pthread_cond_broadcast() failed");
+}
+
+static __always_inline void
+mutex_init(pthread_mutex_t *mutex,
+ const pthread_mutexattr_t *attr)
+{
+ errno = pthread_mutex_init(mutex, attr);
+ if (errno)
+ err(EXIT_FAILURE, "pthread_mutex_init() failed");
+}
+
+static __always_inline void mutexattr_init(pthread_mutexattr_t *attr)
+{
+ errno = pthread_mutexattr_init(attr);
+ if (errno)
+ err(EXIT_FAILURE, "pthread_mutexattr_init() failed");
+}
+
+static __always_inline void mutexattr_destroy(pthread_mutexattr_t *attr)
+{
+ errno = pthread_mutexattr_destroy(attr);
+ if (errno)
+ err(EXIT_FAILURE, "pthread_mutexattr_destroy() failed");
+}
+
+static __always_inline void mutexattr_settype(pthread_mutexattr_t *attr,
+ int type)
+{
+ errno = pthread_mutexattr_settype(attr, type);
+ if (errno)
+ err(EXIT_FAILURE, "pthread_mutexattr_settype() failed");
+}
+
+static __always_inline void condattr_init(pthread_condattr_t *attr)
+{
+ errno = pthread_condattr_init(attr);
+ if (errno)
+ err(EXIT_FAILURE, "pthread_condattr_init() failed");
+}
+
+static __always_inline void condattr_destroy(pthread_condattr_t *attr)
+{
+ errno = pthread_condattr_destroy(attr);
+ if (errno)
+ err(EXIT_FAILURE, "pthread_condattr_destroy() failed");
+}
+
+static __always_inline void condattr_setclock(pthread_condattr_t *attr,
+ clockid_t clock_id)
+{
+ errno = pthread_condattr_setclock(attr, clock_id);
+ if (unlikely(errno))
+ err(EXIT_FAILURE, "pthread_condattr_setclock() failed");
+}
+
+static __always_inline void cond_init(pthread_cond_t *cond,
+ const pthread_condattr_t *attr)
+{
+ errno = pthread_cond_init(cond, attr);
+ if (errno)
+ err(EXIT_FAILURE, "pthread_cond_init() failed");
+}
+
+static __always_inline int
+cond_timedwait(pthread_cond_t *restrict cond,
+ pthread_mutex_t *restrict mutex,
+ const struct timespec *restrict abstime)
+{
+ errno = pthread_cond_timedwait(cond, mutex, abstime);
+ if (errno && errno != ETIMEDOUT)
+ err(EXIT_FAILURE, "pthread_cond_timedwait() failed");
+ return errno;
+}
+
+static void init_printstate(void)
+{
+ pthread_condattr_t cattr;
+
+ printstate.ticket_counter = 0;
+ printstate.ticket_completed = 0;
+ printstate.cnt = 0;
+
+ mutex_init(&printstate.mutex, NULL);
+
+ condattr_init(&cattr);
+ condattr_setclock(&cattr, CLOCK_MONOTONIC);
+ cond_init(&printstate.cond, &cattr);
+ condattr_destroy(&cattr);
+}
+
+static void init_print_mtx(void)
+{
+ pthread_mutexattr_t mattr;
+
+ mutexattr_init(&mattr);
+ mutexattr_settype(&mattr, PTHREAD_MUTEX_RECURSIVE);
+ mutex_init(&print_mtx, &mattr);
+ mutexattr_destroy(&mattr);
+
+}
+
+static void signal_blocking(int how)
+{
+ sigset_t s;
+
+ sigemptyset_or_die(&s);
+ sigaddset_or_die(&s, SIGHUP);
+ sigaddset_or_die(&s, SIGTERM);
+ sigaddset_or_die(&s, SIGINT);
+
+ errno = pthread_sigmask(how, &s, NULL);
+ if (unlikely(errno)) {
+ warn("pthread_sigmask() failed");
+ cleanup_exit(EXIT_FAILURE);
+ }
+}
+
+static void signal_handler(int num)
+{
+ signal_flag = num;
+}
+
+static void setup_sig_handler(void)
+{
+ struct sigaction sa;
+
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = signal_handler;
+
+ sigaction_or_die(SIGHUP, &sa, NULL);
+ sigaction_or_die(SIGTERM, &sa, NULL);
+ sigaction_or_die(SIGINT, &sa, NULL);
+}
+
+static void process_signal(int signal)
+{
+ char *name;
+
+ name = strsignal(signal);
+ if (name == NULL)
+ printf("Received signal %d\n", signal);
+ else
+ printf("Received signal %d (%s)\n", signal, name);
+ cleanup_exit(EXIT_SUCCESS);
+}
+
+static __always_inline void check_signals(void)
+{
+ int signal = signal_flag;
+
+ if (unlikely(signal))
+ process_signal(signal);
+}
+
+static __always_inline void get_time_in_future(struct timespec *future,
+ long time_us)
+{
+ long nsec;
+
+ clock_gettime_or_die(CLOCK_MONOTONIC, future);
+ future->tv_sec += time_us / USEC_PER_SEC;
+ nsec = future->tv_nsec + (time_us * NSEC_PER_USEC) % NSEC_PER_SEC;
+ if (nsec >= NSEC_PER_SEC) {
+ future->tv_nsec = nsec % NSEC_PER_SEC;
+ future->tv_sec += 1;
+ }
+}
+
+static __always_inline bool time_has_passed(const struct timespec *time)
+{
+ struct timespec now;
+
+ clock_gettime_or_die(CLOCK_MONOTONIC, &now);
+ if (now.tv_sec > time->tv_sec)
+ return true;
+ if (now.tv_sec < time->tv_sec)
+ return false;
+ return (now.tv_nsec >= time->tv_nsec);
+}
+
+static bool mutex_trylock_limit(pthread_mutex_t *mutex, int time_ms)
+{
+ long time_us = time_ms * USEC_PER_MSEC;
+ struct timespec limit;
+
+ get_time_in_future(&limit, time_us);
+ do {
+ errno = pthread_mutex_trylock(mutex);
+ if (errno && errno != EBUSY)
+ err(EXIT_FAILURE, "pthread_mutex_trylock() failed");
+ } while (errno && !time_has_passed(&limit));
+ return errno == 0;
+}
+
+static void restore_trace_opts(const struct ftrace_state *state,
+ const bool *cur)
+{
+ int i;
+ int r;
+
+ for (i = 0; i < OPTIDX_NR; i++)
+ if (state->opt_valid[i] && state->opt[i] != cur[i]) {
+ r = set_trace_opt(optstr[i], state->opt[i]);
+ if (r < 0)
+ warnx("Failed to restore the %s option to %s",
+ optstr[i], bool2str(state->opt[i]));
+ else if (verbose_ftrace())
+ printf("Restored the %s option in %s to %s\n",
+ optstr[i], TR_OPTIONS,
+ bool2str(state->opt[i]));
+ }
+}
+
+static char *read_file(const char *file, enum errhandling h)
+{
+ int psize;
+ char *r;
+ static const char *emsg = "Failed to read the %s file";
+
+ r = tracefs_instance_file_read(NULL, file, &psize);
+ if (!r) {
+ if (h) {
+ warn(emsg, file);
+ if (h == ERR_CLEANUP)
+ cleanup_exit(EXIT_FAILURE);
+ } else
+ errx(EXIT_FAILURE, emsg, file);
+ }
+
+ if (r && r[psize - 1] == '\n')
+ r[psize - 1] = '\0';
+ return r;
+}
+
+static void restore_file(const char *file, char **saved, const char *cur)
+{
+ if (*saved && was_changed(*saved, cur)) {
+ if (tracefs_instance_file_write(NULL, file, *saved) < 0)
+ warnx("Failed to restore %s to %s!", file, *saved);
+ else if (verbose_ftrace())
+ printf("Restored %s to %s\n", file, *saved);
+ free(*saved);
+ *saved = NULL;
+ }
+}
+
+static void restore_ftrace(void)
+{
+ mutex_lock(&save_state.mutex);
+
+ restore_file(TR_CURRENT, &save_state.tracer, current_tracer);
+ restore_file(TR_THRESH, &save_state.thresh, threshold);
+ restore_trace_opts(&save_state, use_options);
+
+ mutex_unlock(&save_state.mutex);
+}
+
+static void cleanup_exit(int status)
+{
+ char *maxlat;
+
+ if (!setup_ftrace)
+ exit(status);
+
+ /*
+ * We try the print_mtx for 1 sec in order to avoid garbled
+ * output if possible, but if it cannot be obtained we proceed anyway.
+ */
+ mutex_trylock_limit(&print_mtx, TRY_PRINTMUTEX_MS);
+
+ maxlat = read_file(TR_MAXLAT, ERR_WARN);
+ if (maxlat) {
+ printf("The maximum detected latency was: %sus\n", maxlat);
+ free(maxlat);
+ }
+
+ restore_ftrace();
+ /*
+ * We do not need to unlock the print_mtx here because we will exit at
+ * the end of this function. Unlocking print_mtx causes problems if a
+ * print thread happens to be waiting for the mutex because we have
+ * just changed the ftrace settings to the original and thus the
+ * print thread would output incorrect data from ftrace.
+ */
+ exit(status);
+}
+
+static void init_save_state(void)
+{
+ pthread_mutexattr_t mattr;
+
+ mutexattr_init(&mattr);
+ mutexattr_settype(&mattr, PTHREAD_MUTEX_RECURSIVE);
+ mutex_init(&save_state.mutex, &mattr);
+ mutexattr_destroy(&mattr);
+
+ save_state.tracer = NULL;
+ save_state.thresh = NULL;
+ save_state.opt_valid[OPTIDX_FUNC_TR] = false;
+ save_state.opt_valid[OPTIDX_DISP_GR] = false;
+}
+
+static int printstate_next_ticket(struct entry *req)
+{
+ int r;
+
+ r = ++(printstate.ticket_counter);
+ req->ticket = r;
+ req->ticket_completed_ref = printstate.ticket_completed;
+ cond_broadcast(&printstate.cond);
+ return r;
+}
+
+static __always_inline
+void printstate_mark_req_completed(const struct entry *req)
+{
+ if (req->ticket > printstate.ticket_completed)
+ printstate.ticket_completed = req->ticket;
+}
+
+static __always_inline
+bool printstate_has_new_req_arrived(const struct entry *req)
+{
+ return (printstate.ticket_counter != req->ticket);
+}
+
+static __always_inline int printstate_cnt_inc(void)
+{
+ int value;
+
+ mutex_lock(&printstate.cnt_mutex);
+ value = ++printstate.cnt;
+ mutex_unlock(&printstate.cnt_mutex);
+ return value;
+}
+
+static __always_inline int printstate_cnt_dec(void)
+{
+ int value;
+
+ mutex_lock(&printstate.cnt_mutex);
+ value = --printstate.cnt;
+ mutex_unlock(&printstate.cnt_mutex);
+ return value;
+}
+
+static __always_inline int printstate_cnt_read(void)
+{
+ int value;
+
+ mutex_lock(&printstate.cnt_mutex);
+ value = printstate.cnt;
+ mutex_unlock(&printstate.cnt_mutex);
+ return value;
+}
+
+static __always_inline
+bool prev_req_won_race(const struct entry *req)
+{
+ return (printstate.ticket_completed != req->ticket_completed_ref);
+}
+
+static void sleeptable_resize(int size, bool printout, struct short_msg *msg)
+{
+ int bytes;
+
+ if (printout) {
+ msg->len = 0;
+ if (unlikely(size > PROB_TABLE_MAX_SIZE))
+ bytes = snprintf(msg->buf, sizeof(msg->buf),
+"Cannot increase probability table to %d (maximum size reached)\n", size);
+ else
+ bytes = snprintf(msg->buf, sizeof(msg->buf),
+"Increasing probability table to %d\n", size);
+ if (bytes < 0)
+ warn("snprintf() failed");
+ else
+ msg->len = bytes;
+ }
+
+ if (unlikely(size < 0)) {
+ /* Should never happen */
+ warnx("Bad program state at %s:%d", __FILE__, __LINE__);
+ cleanup_exit(EXIT_FAILURE);
+ return;
+ }
+ sleeptable.size = size;
+ sleeptable.table = &probabilities[PROB_TABLE_MAX_SIZE - size];
+}
+
+static void init_probabilities(void)
+{
+ int i;
+ int j = 1000;
+
+ for (i = 0; i < PROB_TABLE_MAX_SIZE; i++) {
+ probabilities[i] = 1000 / j;
+ j--;
+ }
+ mutex_init(&sleeptable.mutex, NULL);
+}
+
+static int table_get_probability(const struct entry *req,
+ struct short_msg *msg)
+{
+ int diff = req->ticket - req->ticket_completed_ref;
+ int rval = 0;
+
+ msg->len = 0;
+ diff--;
+ /* Should never happen...*/
+ if (unlikely(diff < 0)) {
+ warnx("Programmer assumption error at %s:%d\n", __FILE__,
+ __LINE__);
+ cleanup_exit(EXIT_FAILURE);
+ }
+ mutex_lock(&sleeptable.mutex);
+ if (diff >= (sleeptable.size - 1)) {
+ rval = sleeptable.table[sleeptable.size - 1];
+ sleeptable_resize(sleeptable.size + 1, verbose_sizechange(),
+ msg);
+ } else {
+ rval = sleeptable.table[diff];
+ }
+ mutex_unlock(&sleeptable.mutex);
+ return rval;
+}
+
+static void init_queue(struct queue *q)
+{
+ q->next_prod_idx = 0;
+ q->next_cons_idx = 0;
+ mutex_init(&q->mutex, NULL);
+ errno = pthread_cond_init(&q->cond, NULL);
+ if (errno)
+ err(EXIT_FAILURE, "pthread_cond_init() failed");
+}
+
+static __always_inline int queue_len(const struct queue *q)
+{
+ if (q->next_prod_idx >= q->next_cons_idx)
+ return q->next_prod_idx - q->next_cons_idx;
+ else
+ return QUEUE_SIZE - q->next_cons_idx + q->next_prod_idx;
+}
+
+static __always_inline int queue_nr_free(const struct queue *q)
+{
+ int nr_free = QUEUE_SIZE - queue_len(q);
+
+ /*
+ * If there is only one slot left we will anyway lie and claim that the
+ * queue is full because adding an element will make it appear empty
+ */
+ if (nr_free == 1)
+ nr_free = 0;
+ return nr_free;
+}
+
+static __always_inline void queue_idx_inc(int *idx)
+{
+ *idx = (*idx + 1) % QUEUE_SIZE;
+}
+
+static __always_inline void queue_push_to_back(struct queue *q,
+ const struct entry *e)
+{
+ q->entries[q->next_prod_idx] = *e;
+ queue_idx_inc(&q->next_prod_idx);
+}
+
+static __always_inline struct entry queue_pop_from_front(struct queue *q)
+{
+ struct entry e = q->entries[q->next_cons_idx];
+
+ queue_idx_inc(&q->next_cons_idx);
+ return e;
+}
+
+static __always_inline void queue_cond_signal(struct queue *q)
+{
+ cond_signal(&q->cond);
+}
+
+static __always_inline void queue_cond_wait(struct queue *q)
+{
+ cond_wait(&q->cond, &q->mutex);
+}
+
+static __always_inline int queue_try_to_add_entry(struct queue *q,
+ const struct entry *e)
+{
+ int r = 0;
+
+ mutex_lock(&q->mutex);
+ if (queue_nr_free(q) > 0) {
+ queue_push_to_back(q, e);
+ cond_signal(&q->cond);
+ } else
+ r = -1;
+ mutex_unlock(&q->mutex);
+ return r;
+}
+
+static struct entry queue_wait_for_entry(struct queue *q)
+{
+ struct entry e;
+
+ mutex_lock(&q->mutex);
+ while (true) {
+ if (queue_len(&printqueue) > 0) {
+ e = queue_pop_from_front(q);
+ break;
+ }
+ queue_cond_wait(q);
+ }
+ mutex_unlock(&q->mutex);
+
+ return e;
+}
+
+static const struct policy *policy_from_name(const char *name)
+{
+ const struct policy *p = &policies[0];
+
+ while (p->name != NULL) {
+ if (!strcmp(name, p->name))
+ return p;
+ p++;
+ }
+ return NULL;
+}
+
+static const char *policy_name(int policy)
+{
+ const struct policy *p = &policies[0];
+ static const char *rval = "unknown";
+
+ while (p->name != NULL) {
+ if (p->policy == policy)
+ return p->name;
+ p++;
+ }
+ return rval;
+}
+
+static bool is_relevant_tracer(const char *name)
+{
+ unsigned int i;
+
+ for (i = 0; relevant_tracers[i]; i++)
+ if (!strcmp(name, relevant_tracers[i]))
+ return true;
+ return false;
+}
+
+static bool random_makes_sense(const char *name)
+{
+ unsigned int i;
+
+ for (i = 0; random_tracers[i]; i++)
+ if (!strcmp(name, random_tracers[i]))
+ return true;
+ return false;
+}
+
+static void show_available(void)
+{
+ char **tracers;
+ int found = 0;
+ int i;
+
+ tracers = tracefs_tracers(NULL);
+ for (i = 0; tracers && tracers[i]; i++) {
+ if (is_relevant_tracer(tracers[i]))
+ found++;
+ }
+
+ if (!tracers) {
+ warnx(no_tracer_msg);
+ return;
+ }
+
+ if (!found) {
+ warnx(no_latency_tr_msg);
+ tracefs_list_free(tracers);
+ return;
+ }
+
+ printf("The following latency tracers are available on your system:\n");
+ for (i = 0; tracers[i]; i++) {
+ if (is_relevant_tracer(tracers[i]))
+ printf("%s\n", tracers[i]);
+ }
+ tracefs_list_free(tracers);
+}
+
+static bool tracer_valid(const char *name, bool *notracer)
+{
+ char **tracers;
+ int i;
+ bool rval = false;
+
+ *notracer = false;
+ tracers = tracefs_tracers(NULL);
+ if (!tracers) {
+ *notracer = true;
+ return false;
+ }
+ for (i = 0; tracers[i]; i++)
+ if (!strcmp(tracers[i], name)) {
+ rval = true;
+ break;
+ }
+ tracefs_list_free(tracers);
+ return rval;
+}
+
+static const char *find_default_tracer(void)
+{
+ int i;
+ bool notracer;
+ bool valid;
+
+ for (i = 0; relevant_tracers[i]; i++) {
+ valid = tracer_valid(relevant_tracers[i], &notracer);
+ if (notracer)
+ errx(EXIT_FAILURE, no_tracer_msg);
+ if (valid)
+ return relevant_tracers[i];
+ }
+ return NULL;
+}
+
+static bool toss_coin(struct drand48_data *buffer, unsigned int prob)
+{
+ long r;
+
+ if (unlikely(lrand48_r(buffer, &r))) {
+ warnx("lrand48_r() failed");
+ cleanup_exit(EXIT_FAILURE);
+ }
+ r = r % 1000L;
+ if (r < prob)
+ return true;
+ else
+ return false;
+}
+
+
+static long go_to_sleep(const struct entry *req)
+{
+ struct timespec future;
+ long delay = sleep_time;
+
+ get_time_in_future(&future, delay);
+
+ mutex_lock(&printstate.mutex);
+ while (!printstate_has_new_req_arrived(req)) {
+ cond_timedwait(&printstate.cond, &printstate.mutex, &future);
+ if (time_has_passed(&future))
+ break;
+ };
+
+ if (printstate_has_new_req_arrived(req))
+ delay = -1;
+ mutex_unlock(&printstate.mutex);
+
+ return delay;
+}
+
+
+static void set_priority(void)
+{
+ int r;
+ pid_t pid;
+ struct sched_param param;
+
+ memset(&param, 0, sizeof(param));
+ param.sched_priority = sched_pri;
+
+ pid = getpid();
+ r = sched_setscheduler(pid, sched_policy, &param);
+
+ if (r != 0)
+ err(EXIT_FAILURE, "sched_setscheduler() failed");
+}
+
+pid_t latency_collector_gettid(void)
+{
+ return (pid_t) syscall(__NR_gettid);
+}
+
+static void print_priority(void)
+{
+ pid_t tid;
+ int policy;
+ int r;
+ struct sched_param param;
+
+ tid = latency_collector_gettid();
+ r = pthread_getschedparam(pthread_self(), &policy, &param);
+ if (r != 0) {
+ warn("pthread_getschedparam() failed");
+ cleanup_exit(EXIT_FAILURE);
+ }
+ mutex_lock(&print_mtx);
+ printf("Thread %d runs with scheduling policy %s and priority %d\n",
+ tid, policy_name(policy), param.sched_priority);
+ mutex_unlock(&print_mtx);
+}
+
+static __always_inline
+void __print_skipmessage(const struct short_msg *resize_msg,
+ const struct timespec *timestamp, char *buffer,
+ size_t bufspace, const struct entry *req, bool excuse,
+ const char *str)
+{
+ ssize_t bytes = 0;
+ char *p = &buffer[0];
+ long us, sec;
+ int r;
+
+ sec = timestamp->tv_sec;
+ us = timestamp->tv_nsec / 1000;
+
+ if (resize_msg != NULL && resize_msg->len > 0) {
+ strncpy(p, resize_msg->buf, resize_msg->len);
+ bytes += resize_msg->len;
+ p += resize_msg->len;
+ bufspace -= resize_msg->len;
+ }
+
+ if (excuse)
+ r = snprintf(p, bufspace,
+"%ld.%06ld Latency %d printout skipped due to %s\n",
+ sec, us, req->ticket, str);
+ else
+ r = snprintf(p, bufspace, "%ld.%06ld Latency %d detected\n",
+ sec, us, req->ticket);
+
+ if (r < 0)
+ warn("snprintf() failed");
+ else
+ bytes += r;
+
+ /* These prints could happen concurrently */
+ mutex_lock(&print_mtx);
+ write_or_die(fd_stdout, buffer, bytes);
+ mutex_unlock(&print_mtx);
+}
+
+static void print_skipmessage(const struct short_msg *resize_msg,
+ const struct timespec *timestamp, char *buffer,
+ size_t bufspace, const struct entry *req,
+ bool excuse)
+{
+ __print_skipmessage(resize_msg, timestamp, buffer, bufspace, req,
+ excuse, "random delay");
+}
+
+static void print_lostmessage(const struct timespec *timestamp, char *buffer,
+ size_t bufspace, const struct entry *req,
+ const char *reason)
+{
+ __print_skipmessage(NULL, timestamp, buffer, bufspace, req, true,
+ reason);
+}
+
+static void print_tracefile(const struct short_msg *resize_msg,
+ const struct timespec *timestamp, char *buffer,
+ size_t bufspace, long slept,
+ const struct entry *req)
+{
+ static const int reserve = 256;
+ char *p = &buffer[0];
+ ssize_t bytes = 0;
+ ssize_t bytes_tot = 0;
+ long us, sec;
+ long slept_ms;
+ int trace_fd;
+
+ /* Save some space for the final string and final null char */
+ bufspace = bufspace - reserve - 1;
+
+ if (resize_msg != NULL && resize_msg->len > 0) {
+ bytes = resize_msg->len;
+ strncpy(p, resize_msg->buf, bytes);
+ bytes_tot += bytes;
+ p += bytes;
+ bufspace -= bytes;
+ }
+
+ trace_fd = open(debug_tracefile, O_RDONLY);
+
+ if (trace_fd < 0) {
+ warn("open() failed on %s", debug_tracefile);
+ return;
+ }
+
+ sec = timestamp->tv_sec;
+ us = timestamp->tv_nsec / 1000;
+
+ if (slept != 0) {
+ slept_ms = slept / 1000;
+ bytes = snprintf(p, bufspace,
+"%ld.%06ld Latency %d randomly sleep for %ld ms before print\n",
+ sec, us, req->ticket, slept_ms);
+ } else {
+ bytes = snprintf(p, bufspace,
+ "%ld.%06ld Latency %d immediate print\n", sec,
+ us, req->ticket);
+ }
+
+ if (bytes < 0) {
+ warn("snprintf() failed");
+ return;
+ }
+ p += bytes;
+ bufspace -= bytes;
+ bytes_tot += bytes;
+
+ bytes = snprintf(p, bufspace,
+">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> BEGIN <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n"
+ );
+
+ if (bytes < 0) {
+ warn("snprintf() failed");
+ return;
+ }
+
+ p += bytes;
+ bufspace -= bytes;
+ bytes_tot += bytes;
+
+ do {
+ bytes = read(trace_fd, p, bufspace);
+ if (bytes < 0) {
+ if (errno == EINTR)
+ continue;
+ warn("read() failed on %s", debug_tracefile);
+ if (unlikely(close(trace_fd) != 0))
+ warn("close() failed on %s", debug_tracefile);
+ return;
+ }
+ if (bytes == 0)
+ break;
+ p += bytes;
+ bufspace -= bytes;
+ bytes_tot += bytes;
+ } while (true);
+
+ if (unlikely(close(trace_fd) != 0))
+ warn("close() failed on %s", debug_tracefile);
+
+ printstate_cnt_dec();
+ /* Add the reserve space back to the budget for the final string */
+ bufspace += reserve;
+
+ bytes = snprintf(p, bufspace,
+ ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> END <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n\n");
+
+ if (bytes < 0) {
+ warn("snprintf() failed");
+ return;
+ }
+
+ bytes_tot += bytes;
+
+ /* These prints could happen concurrently */
+ mutex_lock(&print_mtx);
+ write_or_die(fd_stdout, buffer, bytes_tot);
+ mutex_unlock(&print_mtx);
+}
+
+static char *get_no_opt(const char *opt)
+{
+ char *no_opt;
+ int s;
+
+ s = strlen(opt) + strlen(OPT_NO_PREFIX) + 1;
+ /* We may be called from cleanup_exit() via set_trace_opt() */
+ no_opt = malloc_or_die_nocleanup(s);
+ strcpy(no_opt, OPT_NO_PREFIX);
+ strcat(no_opt, opt);
+ return no_opt;
+}
+
+static char *find_next_optstr(const char *allopt, const char **next)
+{
+ const char *begin;
+ const char *end;
+ char *r;
+ int s = 0;
+
+ if (allopt == NULL)
+ return NULL;
+
+ for (begin = allopt; *begin != '\0'; begin++) {
+ if (isgraph(*begin))
+ break;
+ }
+
+ if (*begin == '\0')
+ return NULL;
+
+ for (end = begin; *end != '\0' && isgraph(*end); end++)
+ s++;
+
+ r = malloc_or_die_nocleanup(s + 1);
+ strncpy(r, begin, s);
+ r[s] = '\0';
+ *next = begin + s;
+ return r;
+}
+
+static bool get_trace_opt(const char *allopt, const char *opt, bool *found)
+{
+ *found = false;
+ char *no_opt;
+ char *str;
+ const char *next = allopt;
+ bool rval = false;
+
+ no_opt = get_no_opt(opt);
+
+ do {
+ str = find_next_optstr(next, &next);
+ if (str == NULL)
+ break;
+ if (!strcmp(str, opt)) {
+ *found = true;
+ rval = true;
+ free(str);
+ break;
+ }
+ if (!strcmp(str, no_opt)) {
+ *found = true;
+ rval = false;
+ free(str);
+ break;
+ }
+ free(str);
+ } while (true);
+ free(no_opt);
+
+ return rval;
+}
+
+static int set_trace_opt(const char *opt, bool value)
+{
+ char *str;
+ int r;
+
+ if (value)
+ str = strdup(opt);
+ else
+ str = get_no_opt(opt);
+
+ r = tracefs_instance_file_write(NULL, TR_OPTIONS, str);
+ free(str);
+ return r;
+}
+
+void save_trace_opts(struct ftrace_state *state)
+{
+ char *allopt;
+ int psize;
+ int i;
+
+ allopt = tracefs_instance_file_read(NULL, TR_OPTIONS, &psize);
+ if (!allopt)
+ errx(EXIT_FAILURE, "Failed to read the %s file\n", TR_OPTIONS);
+
+ for (i = 0; i < OPTIDX_NR; i++)
+ state->opt[i] = get_trace_opt(allopt, optstr[i],
+ &state->opt_valid[i]);
+
+ free(allopt);
+}
+
+static void write_file(const char *file, const char *cur, const char *new,
+ enum errhandling h)
+{
+ int r;
+ static const char *emsg = "Failed to write to the %s file!";
+
+ /* Do nothing if we now that the current and new value are equal */
+ if (cur && !needs_change(cur, new))
+ return;
+
+ r = tracefs_instance_file_write(NULL, file, new);
+ if (r < 0) {
+ if (h) {
+ warnx(emsg, file);
+ if (h == ERR_CLEANUP)
+ cleanup_exit(EXIT_FAILURE);
+ } else
+ errx(EXIT_FAILURE, emsg, file);
+ }
+ if (verbose_ftrace()) {
+ mutex_lock(&print_mtx);
+ printf("%s was set to %s\n", file, new);
+ mutex_unlock(&print_mtx);
+ }
+}
+
+static void reset_max_latency(void)
+{
+ write_file(TR_MAXLAT, NULL, "0", ERR_CLEANUP);
+}
+
+static void save_and_disable_tracer(void)
+{
+ char *orig_th;
+ char *tracer;
+ bool need_nop = false;
+
+ mutex_lock(&save_state.mutex);
+
+ save_trace_opts(&save_state);
+ tracer = read_file(TR_CURRENT, ERR_EXIT);
+ orig_th = read_file(TR_THRESH, ERR_EXIT);
+
+ if (needs_change(tracer, NOP_TRACER)) {
+ mutex_lock(&print_mtx);
+ if (force_tracer) {
+ printf(
+ "The %s tracer is already in use but proceeding anyway!\n",
+ tracer);
+ } else {
+ printf(
+ "The %s tracer is already in use, cowardly bailing out!\n"
+ "This could indicate that another program or instance is tracing.\n"
+ "Use the -F [--force] option to disregard the current tracer.\n", tracer);
+ exit(0);
+ }
+ mutex_unlock(&print_mtx);
+ need_nop = true;
+ }
+
+ save_state.tracer = tracer;
+ save_state.thresh = orig_th;
+
+ if (need_nop)
+ write_file(TR_CURRENT, NULL, NOP_TRACER, ERR_EXIT);
+
+ mutex_unlock(&save_state.mutex);
+}
+
+void set_trace_opts(struct ftrace_state *state, bool *new)
+{
+ int i;
+ int r;
+
+ /*
+ * We only set options if we earlier detected that the option exists in
+ * the trace_options file and that the wanted setting is different from
+ * the one we saw in save_and_disable_tracer()
+ */
+ for (i = 0; i < OPTIDX_NR; i++)
+ if (state->opt_valid[i] &&
+ state->opt[i] != new[i]) {
+ r = set_trace_opt(optstr[i], new[i]);
+ if (r < 0) {
+ warnx("Failed to set the %s option to %s",
+ optstr[i], bool2str(new[i]));
+ cleanup_exit(EXIT_FAILURE);
+ }
+ if (verbose_ftrace()) {
+ mutex_lock(&print_mtx);
+ printf("%s in %s was set to %s\n", optstr[i],
+ TR_OPTIONS, bool2str(new[i]));
+ mutex_unlock(&print_mtx);
+ }
+ }
+}
+
+static void enable_tracer(void)
+{
+ mutex_lock(&save_state.mutex);
+ set_trace_opts(&save_state, use_options);
+
+ write_file(TR_THRESH, save_state.thresh, threshold, ERR_CLEANUP);
+ write_file(TR_CURRENT, NOP_TRACER, current_tracer, ERR_CLEANUP);
+
+ mutex_unlock(&save_state.mutex);
+}
+
+static void tracing_loop(void)
+{
+ int ifd = inotify_init();
+ int wd;
+ const ssize_t bufsize = sizeof(inotify_buffer);
+ const ssize_t istructsize = sizeof(struct inotify_event);
+ char *buf = &inotify_buffer[0];
+ ssize_t nr_read;
+ char *p;
+ int modified;
+ struct inotify_event *event;
+ struct entry req;
+ char *buffer;
+ const size_t bufspace = PRINT_BUFFER_SIZE;
+ struct timespec timestamp;
+
+ print_priority();
+
+ buffer = malloc_or_die(bufspace);
+
+ if (ifd < 0)
+ err(EXIT_FAILURE, "inotify_init() failed!");
+
+
+ if (setup_ftrace) {
+ /*
+ * We must disable the tracer before resetting the max_latency
+ */
+ save_and_disable_tracer();
+ /*
+ * We must reset the max_latency before the inotify_add_watch()
+ * call.
+ */
+ reset_max_latency();
+ }
+
+ wd = inotify_add_watch(ifd, debug_maxlat, IN_MODIFY);
+ if (wd < 0)
+ err(EXIT_FAILURE, "inotify_add_watch() failed!");
+
+ if (setup_ftrace)
+ enable_tracer();
+
+ signal_blocking(SIG_UNBLOCK);
+
+ while (true) {
+ modified = 0;
+ check_signals();
+ nr_read = read(ifd, buf, bufsize);
+ check_signals();
+ if (nr_read < 0) {
+ if (errno == EINTR)
+ continue;
+ warn("read() failed on inotify fd!");
+ cleanup_exit(EXIT_FAILURE);
+ }
+ if (nr_read == bufsize)
+ warnx("inotify() buffer filled, skipping events");
+ if (nr_read < istructsize) {
+ warnx("read() returned too few bytes on inotify fd");
+ cleanup_exit(EXIT_FAILURE);
+ }
+
+ for (p = buf; p < buf + nr_read;) {
+ event = (struct inotify_event *) p;
+ if ((event->mask & IN_MODIFY) != 0)
+ modified++;
+ p += istructsize + event->len;
+ }
+ while (modified > 0) {
+ check_signals();
+ mutex_lock(&printstate.mutex);
+ check_signals();
+ printstate_next_ticket(&req);
+ if (printstate_cnt_read() > 0) {
+ printstate_mark_req_completed(&req);
+ mutex_unlock(&printstate.mutex);
+ if (verbose_lostevent()) {
+ clock_gettime_or_die(CLOCK_MONOTONIC,
+ &timestamp);
+ print_lostmessage(&timestamp, buffer,
+ bufspace, &req,
+ "inotify loop");
+ }
+ break;
+ }
+ mutex_unlock(&printstate.mutex);
+ if (queue_try_to_add_entry(&printqueue, &req) != 0) {
+ /* These prints could happen concurrently */
+ check_signals();
+ mutex_lock(&print_mtx);
+ check_signals();
+ write_or_die(fd_stdout, queue_full_warning,
+ sizeof(queue_full_warning));
+ mutex_unlock(&print_mtx);
+ }
+ modified--;
+ }
+ }
+}
+
+static void *do_printloop(void *arg)
+{
+ const size_t bufspace = PRINT_BUFFER_SIZE;
+ char *buffer;
+ long *rseed = (long *) arg;
+ struct drand48_data drandbuf;
+ long slept = 0;
+ struct entry req;
+ int prob = 0;
+ struct timespec timestamp;
+ struct short_msg resize_msg;
+
+ print_priority();
+
+ if (srand48_r(*rseed, &drandbuf) != 0) {
+ warn("srand48_r() failed!\n");
+ cleanup_exit(EXIT_FAILURE);
+ }
+
+ buffer = malloc_or_die(bufspace);
+
+ while (true) {
+ req = queue_wait_for_entry(&printqueue);
+ clock_gettime_or_die(CLOCK_MONOTONIC, &timestamp);
+ mutex_lock(&printstate.mutex);
+ if (prev_req_won_race(&req)) {
+ printstate_mark_req_completed(&req);
+ mutex_unlock(&printstate.mutex);
+ if (verbose_lostevent())
+ print_lostmessage(&timestamp, buffer, bufspace,
+ &req, "print loop");
+ continue;
+ }
+ mutex_unlock(&printstate.mutex);
+
+ /*
+ * Toss a coin to decide if we want to sleep before printing
+ * out the backtrace. The reason for this is that opening
+ * /sys/kernel/debug/tracing/trace will cause a blackout of
+ * hundreds of ms, where no latencies will be noted by the
+ * latency tracer. Thus by randomly sleeping we try to avoid
+ * missing traces systematically due to this. With this option
+ * we will sometimes get the first latency, some other times
+ * some of the later ones, in case of closely spaced traces.
+ */
+ if (trace_enable && use_random_sleep) {
+ slept = 0;
+ prob = table_get_probability(&req, &resize_msg);
+ if (!toss_coin(&drandbuf, prob))
+ slept = go_to_sleep(&req);
+ if (slept >= 0) {
+ /* A print is ongoing */
+ printstate_cnt_inc();
+ /*
+ * We will do the printout below so we have to
+ * mark it as completed while we still have the
+ * mutex.
+ */
+ mutex_lock(&printstate.mutex);
+ printstate_mark_req_completed(&req);
+ mutex_unlock(&printstate.mutex);
+ }
+ }
+ if (trace_enable) {
+ /*
+ * slept < 0 means that we detected another
+ * notification in go_to_sleep() above
+ */
+ if (slept >= 0)
+ /*
+ * N.B. printstate_cnt_dec(); will be called
+ * inside print_tracefile()
+ */
+ print_tracefile(&resize_msg, &timestamp, buffer,
+ bufspace, slept, &req);
+ else
+ print_skipmessage(&resize_msg, &timestamp,
+ buffer, bufspace, &req, true);
+ } else {
+ print_skipmessage(&resize_msg, &timestamp, buffer,
+ bufspace, &req, false);
+ }
+ }
+ return NULL;
+}
+
+static void start_printthread(void)
+{
+ unsigned int i;
+ long *seed;
+ int ufd;
+
+ ufd = open(DEV_URANDOM, O_RDONLY);
+ if (nr_threads > MAX_THREADS) {
+ warnx(
+"Number of requested print threads was %d, max number is %d\n",
+ nr_threads, MAX_THREADS);
+ nr_threads = MAX_THREADS;
+ }
+ for (i = 0; i < nr_threads; i++) {
+ seed = malloc_or_die(sizeof(*seed));
+ if (ufd < 0 ||
+ read(ufd, seed, sizeof(*seed)) != sizeof(*seed)) {
+ printf(
+"Warning! Using trivial random number seed, since %s not available\n",
+ DEV_URANDOM);
+ fflush(stdout);
+ *seed = i;
+ }
+ errno = pthread_create(&printthread[i], NULL, do_printloop,
+ seed);
+ if (errno)
+ err(EXIT_FAILURE, "pthread_create()");
+ }
+ if (ufd > 0 && close(ufd) != 0)
+ warn("close() failed");
+}
+
+static void show_usage(void)
+{
+ printf(
+"Usage: %s [OPTION]...\n\n"
+"Collect closely occurring latencies from %s\n"
+"with any of the following tracers: preemptirqsoff, preemptoff, irqsoff, "
+"wakeup,\nwakeup_dl, or wakeup_rt.\n\n"
+
+"The occurrence of a latency is detected by monitoring the file\n"
+"%s with inotify.\n\n"
+
+"The following options are supported:\n\n"
+
+"-l, --list\t\tList the latency tracers that are supported by the\n"
+"\t\t\tcurrently running Linux kernel. If you don't see the\n"
+"\t\t\ttracer that you want, you will probably need to\n"
+"\t\t\tchange your kernel config and build a new kernel.\n\n"
+
+"-t, --tracer TR\t\tUse the tracer TR. The default is to use the first\n"
+"\t\t\ttracer that is supported by the kernel in the following\n"
+"\t\t\torder of precedence:\n\n"
+"\t\t\tpreemptirqsoff\n"
+"\t\t\tpreemptoff\n"
+"\t\t\tirqsoff\n"
+"\t\t\twakeup\n"
+"\t\t\twakeup_rt\n"
+"\t\t\twakeup_dl\n"
+"\n"
+"\t\t\tIf TR is not on the list above, then a warning will be\n"
+"\t\t\tprinted.\n\n"
+
+"-F, --force\t\tProceed even if another ftrace tracer is active. Without\n"
+"\t\t\tthis option, the program will refuse to start tracing if\n"
+"\t\t\tany other tracer than the nop tracer is active.\n\n"
+
+"-s, --threshold TH\tConfigure ftrace to use a threshold of TH microseconds\n"
+"\t\t\tfor the tracer. The default is 0, which means that\n"
+"\t\t\ttracing_max_latency will be used. tracing_max_latency is\n"
+"\t\t\tset to 0 when the program is started and contains the\n"
+"\t\t\tmaximum of the latencies that have been encountered.\n\n"
+
+"-f, --function\t\tEnable the function-trace option in trace_options. With\n"
+"\t\t\tthis option, ftrace will trace the functions that are\n"
+"\t\t\texecuted during a latency, without it we only get the\n"
+"\t\t\tbeginning, end, and backtrace.\n\n"
+
+"-g, --graph\t\tEnable the display-graph option in trace_option. This\n"
+"\t\t\toption causes ftrace to show the graph of how functions\n"
+"\t\t\tare calling other functions.\n\n"
+
+"-c, --policy POL\tRun the program with scheduling policy POL. POL can be\n"
+"\t\t\tother, batch, idle, rr or fifo. The default is rr. When\n"
+"\t\t\tusing rr or fifo, remember that these policies may cause\n"
+"\t\t\tother tasks to experience latencies.\n\n"
+
+"-p, --priority PRI\tRun the program with priority PRI. The acceptable range\n"
+"\t\t\tof PRI depends on the scheduling policy.\n\n"
+
+"-n, --notrace\t\tIf latency is detected, do not print out the content of\n"
+"\t\t\tthe trace file to standard output\n\n"
+
+"-t, --threads NRTHR\tRun NRTHR threads for printing. Default is %d.\n\n"
+
+"-r, --random\t\tArbitrarily sleep a certain amount of time, default\n"
+"\t\t\t%ld ms, before reading the trace file. The\n"
+"\t\t\tprobabilities for sleep are chosen so that the\n"
+"\t\t\tprobability of obtaining any of a cluster of closely\n"
+"\t\t\toccurring latencies are equal, i.e. we will randomly\n"
+"\t\t\tchoose which one we collect from the trace file.\n\n"
+"\t\t\tThis option is probably only useful with the irqsoff,\n"
+"\t\t\tpreemptoff, and preemptirqsoff tracers.\n\n"
+
+"-a, --nrlat NRLAT\tFor the purpose of arbitrary delay, assume that there\n"
+"\t\t\tare no more than NRLAT clustered latencies. If NRLAT\n"
+"\t\t\tlatencies are detected during a run, this value will\n"
+"\t\t\tautomatically be increased to NRLAT + 1 and then to\n"
+"\t\t\tNRLAT + 2 and so on. The default is %d. This option\n"
+"\t\t\timplies -r. We need to know this number in order to\n"
+"\t\t\tbe able to calculate the probabilities of sleeping.\n"
+"\t\t\tSpecifically, the probabilities of not sleeping, i.e. to\n"
+"\t\t\tdo an immediate printout will be:\n\n"
+"\t\t\t1/NRLAT 1/(NRLAT - 1) ... 1/3 1/2 1\n\n"
+"\t\t\tThe probability of sleeping will be:\n\n"
+"\t\t\t1 - P, where P is from the series above\n\n"
+"\t\t\tThis descending probability will cause us to choose\n"
+"\t\t\tan occurrence at random. Observe that the final\n"
+"\t\t\tprobability is 0, it is when we reach this probability\n"
+"\t\t\tthat we increase NRLAT automatically. As an example,\n"
+"\t\t\twith the default value of 2, the probabilities will be:\n\n"
+"\t\t\t1/2 0\n\n"
+"\t\t\tThis means, when a latency is detected we will sleep\n"
+"\t\t\twith 50%% probability. If we ever detect another latency\n"
+"\t\t\tduring the sleep period, then the probability of sleep\n"
+"\t\t\twill be 0%% and the table will be expanded to:\n\n"
+"\t\t\t1/3 1/2 0\n\n"
+
+"-v, --verbose\t\tIncrease the verbosity. If this option is given once,\n"
+"\t\t\tthen print a message every time that the NRLAT value\n"
+"\t\t\tis automatically increased. It also causes a message to\n"
+"\t\t\tbe printed when the ftrace settings are changed. If this\n"
+"\t\t\toption is given at least twice, then also print a\n"
+"\t\t\twarning for lost events.\n\n"
+
+"-u, --time TIME\t\tArbitrarily sleep for a specified time TIME ms before\n"
+"\t\t\tprinting out the trace from the trace file. The default\n"
+"\t\t\tis %ld ms. This option implies -r.\n\n"
+
+"-x, --no-ftrace\t\tDo not configure ftrace. This assume that the user\n"
+"\t\t\tconfigures the ftrace files in sysfs such as\n"
+"\t\t\t/sys/kernel/tracing/current_tracer or equivalent.\n\n"
+
+"-i, --tracefile FILE\tUse FILE as trace file. The default is\n"
+"\t\t\t%s.\n"
+"\t\t\tThis options implies -x\n\n"
+
+"-m, --max-lat FILE\tUse FILE as tracing_max_latency file. The default is\n"
+"\t\t\t%s.\n"
+"\t\t\tThis options implies -x\n\n"
+,
+prg_name, debug_tracefile_dflt, debug_maxlat_dflt, DEFAULT_NR_PRINTER_THREADS,
+SLEEP_TIME_MS_DEFAULT, DEFAULT_TABLE_SIZE, SLEEP_TIME_MS_DEFAULT,
+debug_tracefile_dflt, debug_maxlat_dflt);
+}
+
+static void find_tracefiles(void)
+{
+ debug_tracefile_dflt = tracefs_get_tracing_file("trace");
+ if (debug_tracefile_dflt == NULL) {
+ /* This is needed in show_usage() */
+ debug_tracefile_dflt = DEBUG_NOFILE;
+ }
+
+ debug_maxlat_dflt = tracefs_get_tracing_file("tracing_max_latency");
+ if (debug_maxlat_dflt == NULL) {
+ /* This is needed in show_usage() */
+ debug_maxlat_dflt = DEBUG_NOFILE;
+ }
+
+ debug_tracefile = debug_tracefile_dflt;
+ debug_maxlat = debug_maxlat_dflt;
+}
+
+bool alldigits(const char *s)
+{
+ for (; *s != '\0'; s++)
+ if (!isdigit(*s))
+ return false;
+ return true;
+}
+
+void check_alldigits(const char *optarg, const char *argname)
+{
+ if (!alldigits(optarg))
+ errx(EXIT_FAILURE,
+ "The %s parameter expects a decimal argument\n", argname);
+}
+
+static void scan_arguments(int argc, char *argv[])
+{
+ int c;
+ int i;
+ int option_idx = 0;
+
+ static struct option long_options[] = {
+ { "list", no_argument, 0, 'l' },
+ { "tracer", required_argument, 0, 't' },
+ { "force", no_argument, 0, 'F' },
+ { "threshold", required_argument, 0, 's' },
+ { "function", no_argument, 0, 'f' },
+ { "graph", no_argument, 0, 'g' },
+ { "policy", required_argument, 0, 'c' },
+ { "priority", required_argument, 0, 'p' },
+ { "help", no_argument, 0, 'h' },
+ { "notrace", no_argument, 0, 'n' },
+ { "random", no_argument, 0, 'r' },
+ { "nrlat", required_argument, 0, 'a' },
+ { "threads", required_argument, 0, 'e' },
+ { "time", required_argument, 0, 'u' },
+ { "verbose", no_argument, 0, 'v' },
+ { "no-ftrace", no_argument, 0, 'x' },
+ { "tracefile", required_argument, 0, 'i' },
+ { "max-lat", required_argument, 0, 'm' },
+ { 0, 0, 0, 0 }
+ };
+ const struct policy *p;
+ int max, min;
+ int value;
+ bool notracer, valid;
+
+ /*
+ * We must do this before parsing the arguments because show_usage()
+ * needs to display these.
+ */
+ find_tracefiles();
+
+ while (true) {
+ c = getopt_long(argc, argv, "lt:Fs:fgc:p:hnra:e:u:vxi:m:",
+ long_options, &option_idx);
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 'l':
+ show_available();
+ exit(0);
+ break;
+ case 't':
+ current_tracer = strdup(optarg);
+ if (!is_relevant_tracer(current_tracer)) {
+ warnx("%s is not a known latency tracer!\n",
+ current_tracer);
+ }
+ valid = tracer_valid(current_tracer, &notracer);
+ if (notracer)
+ errx(EXIT_FAILURE, no_tracer_msg);
+ if (!valid)
+ errx(EXIT_FAILURE,
+"The tracer %s is not supported by your kernel!\n", current_tracer);
+ break;
+ case 'F':
+ force_tracer = true;
+ break;
+ case 's':
+ check_alldigits(optarg, "-s [--threshold]");
+ threshold = strdup(optarg);
+ break;
+ case 'f':
+ use_options[OPTIDX_FUNC_TR] = true;
+ break;
+ case 'g':
+ use_options[OPTIDX_DISP_GR] = true;
+ break;
+ case 'c':
+ p = policy_from_name(optarg);
+ if (p != NULL) {
+ sched_policy = p->policy;
+ sched_policy_set = true;
+ if (!sched_pri_set) {
+ sched_pri = p->default_pri;
+ sched_pri_set = true;
+ }
+ } else {
+ warnx("Unknown scheduling %s\n", optarg);
+ show_usage();
+ exit(0);
+ }
+ break;
+ case 'p':
+ check_alldigits(optarg, "-p [--priority]");
+ sched_pri = atoi(optarg);
+ sched_pri_set = true;
+ break;
+ case 'h':
+ show_usage();
+ exit(0);
+ break;
+ case 'n':
+ trace_enable = false;
+ use_random_sleep = false;
+ break;
+ case 'e':
+ check_alldigits(optarg, "-e [--threads]");
+ value = atoi(optarg);
+ if (value > 0)
+ nr_threads = value;
+ else {
+ warnx("NRTHR must be > 0\n");
+ show_usage();
+ exit(0);
+ }
+ break;
+ case 'u':
+ check_alldigits(optarg, "-u [--time]");
+ value = atoi(optarg);
+ if (value < 0) {
+ warnx("TIME must be >= 0\n");
+ show_usage();
+ ;
+ }
+ trace_enable = true;
+ use_random_sleep = true;
+ sleep_time = value * USEC_PER_MSEC;
+ break;
+ case 'v':
+ verbosity++;
+ break;
+ case 'r':
+ trace_enable = true;
+ use_random_sleep = true;
+ break;
+ case 'a':
+ check_alldigits(optarg, "-a [--nrlat]");
+ value = atoi(optarg);
+ if (value <= 0) {
+ warnx("NRLAT must be > 0\n");
+ show_usage();
+ exit(0);
+ }
+ trace_enable = true;
+ use_random_sleep = true;
+ table_startsize = value;
+ break;
+ case 'x':
+ setup_ftrace = false;
+ break;
+ case 'i':
+ setup_ftrace = false;
+ debug_tracefile = strdup(optarg);
+ break;
+ case 'm':
+ setup_ftrace = false;
+ debug_maxlat = strdup(optarg);
+ break;
+ default:
+ show_usage();
+ exit(0);
+ break;
+ }
+ }
+
+ if (setup_ftrace) {
+ if (!current_tracer) {
+ current_tracer = find_default_tracer();
+ if (!current_tracer)
+ errx(EXIT_FAILURE,
+"No default tracer found and tracer not specified\n");
+ }
+
+ if (use_random_sleep && !random_makes_sense(current_tracer)) {
+ warnx("WARNING: The tracer is %s and random sleep has",
+ current_tracer);
+ fprintf(stderr,
+"been enabled. Random sleep is intended for the following tracers:\n");
+ for (i = 0; random_tracers[i]; i++)
+ fprintf(stderr, "%s\n", random_tracers[i]);
+ fprintf(stderr, "\n");
+ }
+ }
+
+ if (debug_tracefile == DEBUG_NOFILE ||
+ debug_maxlat == DEBUG_NOFILE)
+ errx(EXIT_FAILURE,
+"Could not find tracing directory e.g. /sys/kernel/tracing\n");
+
+ if (!sched_policy_set) {
+ sched_policy = SCHED_RR;
+ sched_policy_set = true;
+ if (!sched_pri_set) {
+ sched_pri = RT_DEFAULT_PRI;
+ sched_pri_set = true;
+ }
+ }
+
+ max = sched_get_priority_max(sched_policy);
+ min = sched_get_priority_min(sched_policy);
+
+ if (sched_pri < min) {
+ printf(
+"ATTENTION: Increasing priority to minimum, which is %d\n", min);
+ sched_pri = min;
+ }
+ if (sched_pri > max) {
+ printf(
+"ATTENTION: Reducing priority to maximum, which is %d\n", max);
+ sched_pri = max;
+ }
+}
+
+static void show_params(void)
+{
+ printf(
+"\n"
+"Running with scheduling policy %s and priority %d. Using %d print threads.\n",
+ policy_name(sched_policy), sched_pri, nr_threads);
+ if (trace_enable) {
+ if (use_random_sleep) {
+ printf(
+"%s will be printed with random delay\n"
+"Start size of the probability table:\t\t\t%d\n"
+"Print a message when the prob. table changes size:\t%s\n"
+"Print a warning when an event has been lost:\t\t%s\n"
+"Sleep time is:\t\t\t\t\t\t%ld ms\n",
+debug_tracefile,
+table_startsize,
+bool2str(verbose_sizechange()),
+bool2str(verbose_lostevent()),
+sleep_time / USEC_PER_MSEC);
+ } else {
+ printf("%s will be printed immediately\n",
+ debug_tracefile);
+ }
+ } else {
+ printf("%s will not be printed\n",
+ debug_tracefile);
+ }
+ if (setup_ftrace) {
+ printf("Tracer:\t\t\t\t\t\t\t%s\n"
+ "%s option:\t\t\t\t\t%s\n"
+ "%s option:\t\t\t\t\t%s\n",
+ current_tracer,
+ optstr[OPTIDX_FUNC_TR],
+ bool2str(use_options[OPTIDX_FUNC_TR]),
+ optstr[OPTIDX_DISP_GR],
+ bool2str(use_options[OPTIDX_DISP_GR]));
+ if (!strcmp(threshold, "0"))
+ printf("Threshold:\t\t\t\t\t\ttracing_max_latency\n");
+ else
+ printf("Threshold:\t\t\t\t\t\t%s\n", threshold);
+ }
+ printf("\n");
+}
+
+int main(int argc, char *argv[])
+{
+ init_save_state();
+ signal_blocking(SIG_BLOCK);
+ setup_sig_handler();
+ open_stdout();
+
+ if (argc >= 1)
+ prg_name = argv[0];
+ else
+ prg_name = prg_unknown;
+
+ scan_arguments(argc, argv);
+ show_params();
+
+ init_printstate();
+ init_print_mtx();
+ if (use_random_sleep) {
+ init_probabilities();
+ if (verbose_sizechange())
+ printf("Initializing probability table to %d\n",
+ table_startsize);
+ sleeptable_resize(table_startsize, false, NULL);
+ }
+ set_priority();
+ init_queue(&printqueue);
+ start_printthread();
+ tracing_loop();
+ return 0;
+}