aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/arch/arm64/include/uapi/asm/kvm.h11
-rw-r--r--tools/arch/mips/include/uapi/asm/perf_regs.h40
-rw-r--r--tools/arch/x86/include/asm/asm.h193
-rw-r--r--tools/arch/x86/include/asm/cpufeatures.h3
-rw-r--r--tools/arch/x86/include/asm/disabled-features.h7
-rw-r--r--tools/arch/x86/include/asm/irq_vectors.h7
-rw-r--r--tools/arch/x86/include/asm/msr-index.h4
-rw-r--r--tools/arch/x86/include/asm/nops.h24
-rw-r--r--tools/arch/x86/include/uapi/asm/kvm.h15
-rw-r--r--tools/arch/x86/include/uapi/asm/svm.h3
-rw-r--r--tools/bootconfig/include/linux/bootconfig.h4
-rw-r--r--tools/bootconfig/main.c1
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-cgroup.rst4
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-prog.rst2
-rw-r--r--tools/bpf/bpftool/Makefile5
-rw-r--r--tools/bpf/bpftool/bash-completion/bpftool6
-rw-r--r--tools/bpf/bpftool/cgroup.c3
-rw-r--r--tools/bpf/bpftool/gen.c421
-rw-r--r--tools/bpf/bpftool/main.c11
-rw-r--r--tools/bpf/bpftool/main.h1
-rw-r--r--tools/bpf/bpftool/prog.c109
-rw-r--r--tools/bpf/bpftool/xlated_dumper.c3
-rw-r--r--tools/bpf/resolve_btfids/main.c3
-rw-r--r--tools/build/Makefile.build22
-rwxr-xr-xtools/debugging/kernel-chktaint2
-rw-r--r--tools/include/linux/bitmap.h11
-rw-r--r--tools/include/linux/bits.h2
-rw-r--r--tools/include/linux/const.h8
-rw-r--r--tools/include/uapi/asm-generic/unistd.h3
-rw-r--r--tools/include/uapi/linux/bpf.h82
-rw-r--r--tools/include/uapi/linux/fs.h2
-rw-r--r--tools/include/uapi/linux/in.h3
-rw-r--r--tools/include/uapi/linux/kvm.h110
-rw-r--r--tools/include/uapi/linux/mount.h1
-rw-r--r--tools/include/uapi/linux/perf_event.h2
-rw-r--r--tools/include/uapi/linux/prctl.h8
-rw-r--r--tools/lib/bitmap.c14
-rw-r--r--tools/lib/bpf/Build2
-rw-r--r--tools/lib/bpf/Makefile18
-rw-r--r--tools/lib/bpf/README.rst168
-rw-r--r--tools/lib/bpf/bpf.c179
-rw-r--r--tools/lib/bpf/bpf.h2
-rw-r--r--tools/lib/bpf/bpf_gen_internal.h41
-rw-r--r--tools/lib/bpf/bpf_helpers.h66
-rw-r--r--tools/lib/bpf/bpf_prog_linfo.c18
-rw-r--r--tools/lib/bpf/bpf_tracing.h108
-rw-r--r--tools/lib/bpf/btf.c302
-rw-r--r--tools/lib/bpf/btf_dump.c14
-rw-r--r--tools/lib/bpf/gen_loader.c729
-rw-r--r--tools/lib/bpf/libbpf.c965
-rw-r--r--tools/lib/bpf/libbpf.h68
-rw-r--r--tools/lib/bpf/libbpf.map13
-rw-r--r--tools/lib/bpf/libbpf_errno.c7
-rw-r--r--tools/lib/bpf/libbpf_internal.h66
-rw-r--r--tools/lib/bpf/libbpf_legacy.h59
-rw-r--r--tools/lib/bpf/linker.c41
-rw-r--r--tools/lib/bpf/netlink.c572
-rw-r--r--tools/lib/bpf/nlattr.c2
-rw-r--r--tools/lib/bpf/nlattr.h60
-rw-r--r--tools/lib/bpf/ringbuf.c26
-rw-r--r--tools/lib/bpf/skel_internal.h123
-rw-r--r--tools/lib/bpf/xsk.c2
-rw-r--r--tools/lib/traceevent/plugins/plugin_kvm.c4
-rw-r--r--tools/objtool/arch/x86/decode.c6
-rw-r--r--tools/objtool/arch/x86/include/arch/special.h1
-rw-r--r--tools/objtool/check.c38
-rw-r--r--tools/objtool/elf.c135
-rw-r--r--tools/objtool/include/objtool/elf.h18
-rw-r--r--tools/objtool/include/objtool/objtool.h3
-rw-r--r--tools/objtool/include/objtool/special.h1
-rw-r--r--tools/objtool/special.c14
-rw-r--r--tools/perf/Documentation/itrace.txt1
-rw-r--r--tools/perf/Documentation/perf-annotate.txt7
-rw-r--r--tools/perf/Documentation/perf-dlfilter.txt251
-rw-r--r--tools/perf/Documentation/perf-inject.txt10
-rw-r--r--tools/perf/Documentation/perf-intel-pt.txt125
-rw-r--r--tools/perf/Documentation/perf-probe.txt19
-rw-r--r--tools/perf/Documentation/perf-script-python.txt46
-rw-r--r--tools/perf/Documentation/perf-script.txt22
-rw-r--r--tools/perf/Documentation/perf-top.txt12
-rw-r--r--tools/perf/Documentation/perf.data-file-format.txt33
-rw-r--r--tools/perf/Makefile.config15
-rw-r--r--tools/perf/Makefile.perf4
-rw-r--r--tools/perf/arch/arm/include/arch-tests.h5
-rw-r--r--tools/perf/arch/arm/util/cs-etm.c133
-rw-r--r--tools/perf/arch/arm64/include/arch-tests.h5
-rw-r--r--tools/perf/arch/arm64/util/arm-spe.c45
-rw-r--r--tools/perf/arch/arm64/util/mem-events.c2
-rw-r--r--tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl2
-rw-r--r--tools/perf/arch/powerpc/entry/syscalls/syscall.tbl2
-rw-r--r--tools/perf/arch/powerpc/include/arch-tests.h7
-rw-r--r--tools/perf/arch/powerpc/tests/dwarf-unwind.c1
-rw-r--r--tools/perf/arch/powerpc/util/mem-events.c2
-rw-r--r--tools/perf/arch/s390/entry/syscalls/syscall.tbl2
-rw-r--r--tools/perf/arch/x86/entry/syscalls/syscall_64.tbl2
-rw-r--r--tools/perf/arch/x86/include/arch-tests.h12
-rw-r--r--tools/perf/arch/x86/tests/dwarf-unwind.c1
-rw-r--r--tools/perf/arch/x86/util/kvm-stat.c46
-rw-r--r--tools/perf/arch/x86/util/mem-events.c54
-rw-r--r--tools/perf/builtin-annotate.c11
-rw-r--r--tools/perf/builtin-buildid-list.c3
-rw-r--r--tools/perf/builtin-c2c.c40
-rw-r--r--tools/perf/builtin-inject.c98
-rw-r--r--tools/perf/builtin-mem.c51
-rw-r--r--tools/perf/builtin-probe.c12
-rw-r--r--tools/perf/builtin-record.c33
-rw-r--r--tools/perf/builtin-report.c2
-rw-r--r--tools/perf/builtin-script.c235
-rw-r--r--tools/perf/builtin-stat.c13
-rw-r--r--tools/perf/builtin-top.c8
-rwxr-xr-xtools/perf/check-headers.sh1
-rw-r--r--tools/perf/perf.c4
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power10/cache.json30
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power10/floating_point.json2
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power10/frontend.json124
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power10/locks.json4
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power10/marked.json61
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power10/memory.json79
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power10/others.json133
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power10/pipeline.json135
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power10/pmc.json8
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power10/translation.json22
-rw-r--r--tools/perf/pmu-events/arch/x86/icelake/cache.json724
-rw-r--r--tools/perf/pmu-events/arch/x86/icelake/floating-point.json101
-rw-r--r--tools/perf/pmu-events/arch/x86/icelake/frontend.json610
-rw-r--r--tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json273
-rw-r--r--tools/perf/pmu-events/arch/x86/icelake/memory.json654
-rw-r--r--tools/perf/pmu-events/arch/x86/icelake/other.json1089
-rw-r--r--tools/perf/pmu-events/arch/x86/icelake/pipeline.json1169
-rw-r--r--tools/perf/pmu-events/arch/x86/icelake/virtual-memory.json251
-rw-r--r--tools/perf/pmu-events/arch/x86/icelakex/cache.json706
-rw-r--r--tools/perf/pmu-events/arch/x86/icelakex/floating-point.json95
-rw-r--r--tools/perf/pmu-events/arch/x86/icelakex/frontend.json469
-rw-r--r--tools/perf/pmu-events/arch/x86/icelakex/memory.json291
-rw-r--r--tools/perf/pmu-events/arch/x86/icelakex/other.json181
-rw-r--r--tools/perf/pmu-events/arch/x86/icelakex/pipeline.json972
-rw-r--r--tools/perf/pmu-events/arch/x86/icelakex/uncore-memory.json333
-rw-r--r--tools/perf/pmu-events/arch/x86/icelakex/uncore-other.json2476
-rw-r--r--tools/perf/pmu-events/arch/x86/icelakex/uncore-power.json10
-rw-r--r--tools/perf/pmu-events/arch/x86/icelakex/virtual-memory.json245
-rw-r--r--tools/perf/pmu-events/arch/x86/mapfile.csv2
-rw-r--r--tools/perf/pmu-events/jevents.c2
-rw-r--r--tools/perf/scripts/python/Perf-Trace-Util/Context.c168
-rw-r--r--tools/perf/scripts/python/bin/intel-pt-events-record4
-rw-r--r--tools/perf/scripts/python/bin/intel-pt-events-report4
-rwxr-xr-xtools/perf/scripts/python/exported-sql-viewer.py101
-rw-r--r--tools/perf/scripts/python/intel-pt-events.py283
-rw-r--r--tools/perf/scripts/python/libxed.py107
-rw-r--r--tools/perf/tests/attr/base-record2
-rw-r--r--tools/perf/tests/builtin-test.c43
-rw-r--r--tools/perf/tests/dwarf-unwind.c4
-rw-r--r--tools/perf/tests/make7
-rw-r--r--tools/perf/tests/pfm.c14
-rwxr-xr-xtools/perf/tests/shell/stat_bpf_counters.sh20
-rw-r--r--tools/perf/tests/tests.h2
-rw-r--r--tools/perf/trace/beauty/include/linux/socket.h2
-rw-r--r--tools/perf/ui/browsers/annotate.c32
-rw-r--r--tools/perf/util/Build7
-rw-r--r--tools/perf/util/arm-spe.c73
-rw-r--r--tools/perf/util/auxtrace.c18
-rw-r--r--tools/perf/util/auxtrace.h53
-rw-r--r--tools/perf/util/bpf_counter.c62
-rw-r--r--tools/perf/util/bpf_counter.h52
-rw-r--r--tools/perf/util/cgroup.c44
-rw-r--r--tools/perf/util/cgroup.h12
-rw-r--r--tools/perf/util/cputopo.c80
-rw-r--r--tools/perf/util/cputopo.h13
-rw-r--r--tools/perf/util/cs-etm-decoder/cs-etm-decoder.c61
-rw-r--r--tools/perf/util/cs-etm.c83
-rw-r--r--tools/perf/util/cs-etm.h4
-rw-r--r--tools/perf/util/data.c3
-rw-r--r--tools/perf/util/data.h1
-rw-r--r--tools/perf/util/db-export.c12
-rw-r--r--tools/perf/util/db-export.h2
-rw-r--r--tools/perf/util/dlfilter.c615
-rw-r--r--tools/perf/util/dlfilter.h97
-rw-r--r--tools/perf/util/dwarf-aux.c8
-rw-r--r--tools/perf/util/env.c13
-rw-r--r--tools/perf/util/env.h16
-rw-r--r--tools/perf/util/event.h2
-rw-r--r--tools/perf/util/evlist.c28
-rw-r--r--tools/perf/util/evlist.h1
-rw-r--r--tools/perf/util/evsel.c26
-rw-r--r--tools/perf/util/evsel.h4
-rw-r--r--tools/perf/util/header.c254
-rw-r--r--tools/perf/util/header.h2
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.c729
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.h20
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-log.h5
-rw-r--r--tools/perf/util/intel-pt.c230
-rw-r--r--tools/perf/util/llvm-utils.c2
-rw-r--r--tools/perf/util/machine.c3
-rw-r--r--tools/perf/util/mem-events.c103
-rw-r--r--tools/perf/util/mem-events.h4
-rw-r--r--tools/perf/util/metricgroup.c14
-rw-r--r--tools/perf/util/parse-events.c13
-rw-r--r--tools/perf/util/parse-events.l1
-rw-r--r--tools/perf/util/perf_api_probe.c10
-rw-r--r--tools/perf/util/perf_api_probe.h1
-rw-r--r--tools/perf/util/perf_dlfilter.h150
-rw-r--r--tools/perf/util/pfm.c11
-rw-r--r--tools/perf/util/pmu-hybrid.h11
-rw-r--r--tools/perf/util/probe-event.c203
-rw-r--r--tools/perf/util/probe-event.h2
-rw-r--r--tools/perf/util/probe-file.c95
-rw-r--r--tools/perf/util/probe-finder.c3
-rw-r--r--tools/perf/util/scripting-engines/trace-event-perl.c13
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c359
-rw-r--r--tools/perf/util/session.c12
-rw-r--r--tools/perf/util/srccode.c3
-rw-r--r--tools/perf/util/stat-display.c16
-rw-r--r--tools/perf/util/stat.c12
-rw-r--r--tools/perf/util/symbol-elf.c1
-rw-r--r--tools/perf/util/trace-event-scripting.c32
-rw-r--r--tools/perf/util/trace-event.h29
-rw-r--r--tools/power/x86/intel-speed-select/isst-config.c18
-rw-r--r--tools/power/x86/intel-speed-select/isst-core.c15
-rw-r--r--tools/power/x86/intel-speed-select/isst-display.c2
-rw-r--r--tools/power/x86/intel-speed-select/isst.h2
-rw-r--r--tools/scripts/Makefile.include30
-rw-r--r--tools/testing/selftests/Makefile1
-rw-r--r--tools/testing/selftests/arm64/fp/sve-probe-vls.c2
-rw-r--r--tools/testing/selftests/bpf/.gitignore4
-rw-r--r--tools/testing/selftests/bpf/Makefile19
-rw-r--r--tools/testing/selftests/bpf/Makefile.docs3
-rw-r--r--tools/testing/selftests/bpf/README.rst19
-rw-r--r--tools/testing/selftests/bpf/bench.c1
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_rename.c2
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_ringbufs.c6
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_trigger.c2
-rw-r--r--tools/testing/selftests/bpf/network_helpers.c2
-rw-r--r--tools/testing/selftests/bpf/network_helpers.h1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/atomics.c72
-rw-r--r--tools/testing/selftests/bpf/prog_tests/attach_probe.c12
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_iter.c31
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf.c93
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_dump.c8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_write.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c84
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_link.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/check_mtu.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_reloc.c15
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fentry_fexit.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fentry_test.c10
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c25
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_sleep.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_test.c10
-rw-r--r--tools/testing/selftests/bpf/prog_tests/flow_dissector.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c10
-rw-r--r--tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c10
-rw-r--r--tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/hashmap.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kfree_skb.c19
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kfunc_call.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ksyms_btf.c3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ksyms_module.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/link_pinning.c7
-rw-r--r--tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c288
-rw-r--r--tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c559
-rw-r--r--tools/testing/selftests/bpf/prog_tests/obj_name.c8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/perf_branches.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/perf_buffer.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/probe_user.c7
-rw-r--r--tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/rdonly_maps.c7
-rw-r--r--tools/testing/selftests/bpf/prog_tests/reference_tracking.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/resolve_btfids.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ringbuf.c57
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/select_reuseport.c53
-rw-r--r--tools/testing/selftests/bpf/prog_tests/send_signal.c5
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sk_lookup.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/skeleton.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sock_fields.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_basic.c8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_listen.c17
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_map.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c5
-rw-r--r--tools/testing/selftests/bpf/prog_tests/static_linked.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/syscall.c55
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_bpf.c395
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_redirect.c785
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c15
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_overhead.c12
-rw-r--r--tools/testing/selftests/bpf/prog_tests/trace_printk.c5
-rw-r--r--tools/testing/selftests/bpf/prog_tests/trampoline_count.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/udp_limit.c7
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_link.c8
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c1
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c1
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c1
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_netlink.c1
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_task.c1
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c1
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_task_file.c1
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c1
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c1
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c1
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c1
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c4
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_udp4.c1
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_udp6.c1
-rw-r--r--tools/testing/selftests/bpf/progs/kfree_skb.c4
-rw-r--r--tools/testing/selftests/bpf/progs/linked_maps1.c2
-rw-r--r--tools/testing/selftests/bpf/progs/syscall.c121
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall3.c2
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall4.c2
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall5.c2
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c2
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_check_mtu.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_cls_redirect.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func_args.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_lookup_and_delete.c26
-rw-r--r--tools/testing/selftests/bpf/progs/test_migrate_reuseport.c135
-rw-r--r--tools/testing/selftests/bpf/progs/test_rdonly_maps.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_ringbuf.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_skeleton.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_snprintf.c1
-rw-r--r--tools/testing/selftests/bpf/progs/test_snprintf_single.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_listen.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_static_linked1.c10
-rw-r--r--tools/testing/selftests/bpf/progs/test_static_linked2.c10
-rw-r--r--tools/testing/selftests/bpf/progs/test_subprogs.c13
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_bpf.c12
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_neigh.c33
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c9
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_peer.c56
-rw-r--r--tools/testing/selftests/bpf/progs/trace_printk.c6
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c94
-rwxr-xr-xtools/testing/selftests/bpf/test_doc_build.sh1
-rw-r--r--tools/testing/selftests/bpf/test_lru_map.c8
-rw-r--r--tools/testing/selftests/bpf/test_maps.c185
-rw-r--r--tools/testing/selftests/bpf/test_progs.c3
-rw-r--r--tools/testing/selftests/bpf/test_progs.h9
-rwxr-xr-xtools/testing/selftests/bpf/test_tc_redirect.sh216
-rw-r--r--tools/testing/selftests/bpf/test_tcpnotify_user.c7
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c2
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_redirect_multi.sh204
-rw-r--r--tools/testing/selftests/bpf/verifier/and.c2
-rw-r--r--tools/testing/selftests/bpf/verifier/bounds.c14
-rw-r--r--tools/testing/selftests/bpf/verifier/dead_code.c2
-rw-r--r--tools/testing/selftests/bpf/verifier/jmp32.c22
-rw-r--r--tools/testing/selftests/bpf/verifier/jset.c10
-rw-r--r--tools/testing/selftests/bpf/verifier/stack_ptr.c2
-rw-r--r--tools/testing/selftests/bpf/verifier/unpriv.c2
-rw-r--r--tools/testing/selftests/bpf/verifier/value_ptr_arith.c15
-rw-r--r--tools/testing/selftests/bpf/xdp_redirect_multi.c226
-rw-r--r--tools/testing/selftests/cgroup/.gitignore3
-rw-r--r--tools/testing/selftests/cgroup/Makefile2
-rw-r--r--tools/testing/selftests/cgroup/cgroup_util.c51
-rw-r--r--tools/testing/selftests/cgroup/cgroup_util.h2
-rw-r--r--tools/testing/selftests/cgroup/test_freezer.c57
-rw-r--r--tools/testing/selftests/cgroup/test_kill.c297
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh3
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh3
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/port_scale.sh4
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh2
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh69
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh14
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh24
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/router_scale.sh2
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/tc_sample.sh12
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/devlink.sh167
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh14
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/fib.sh6
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/nexthop.sh4
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/psample.sh4
-rw-r--r--tools/testing/selftests/exec/Makefile6
-rw-r--r--tools/testing/selftests/futex/functional/.gitignore2
-rw-r--r--tools/testing/selftests/futex/functional/Makefile7
-rw-r--r--tools/testing/selftests/futex/functional/futex_requeue.c136
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait.c171
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait_timeout.c126
-rwxr-xr-xtools/testing/selftests/futex/functional/run.sh6
-rw-r--r--tools/testing/selftests/kvm/.gitignore8
-rw-r--r--tools/testing/selftests/kvm/Makefile16
-rw-r--r--tools/testing/selftests/kvm/aarch64/debug-exceptions.c250
-rw-r--r--tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c3
-rw-r--r--tools/testing/selftests/kvm/aarch64/get-reg-list.c439
-rw-r--r--tools/testing/selftests/kvm/demand_paging_test.c174
-rw-r--r--tools/testing/selftests/kvm/dirty_log_test.c5
-rw-r--r--tools/testing/selftests/kvm/hardware_disable_test.c34
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/processor.h83
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util.h58
-rw-r--r--tools/testing/selftests/kvm/include/test_util.h12
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/apic.h91
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/evmcs.h (renamed from tools/testing/selftests/kvm/include/evmcs.h)2
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/hyperv.h185
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/processor.h66
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/vmx.h11
-rw-r--r--tools/testing/selftests/kvm/kvm_binary_stats_test.c237
-rw-r--r--tools/testing/selftests/kvm/kvm_page_table_test.c4
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/handlers.S126
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/processor.c131
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/ucall.c2
-rw-r--r--tools/testing/selftests/kvm/lib/elf.c6
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c404
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util_internal.h17
-rw-r--r--tools/testing/selftests/kvm/lib/perf_test_util.c8
-rw-r--r--tools/testing/selftests/kvm/lib/rbtree.c1
-rw-r--r--tools/testing/selftests/kvm/lib/s390x/processor.c17
-rw-r--r--tools/testing/selftests/kvm/lib/test_util.c51
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/apic.c45
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/processor.c368
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/svm.c9
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/vmx.c52
-rw-r--r--tools/testing/selftests/kvm/memslot_modification_stress_test.c18
-rw-r--r--tools/testing/selftests/kvm/memslot_perf_test.c1037
-rw-r--r--tools/testing/selftests/kvm/set_memory_region_test.c6
-rw-r--r--tools/testing/selftests/kvm/steal_time.c4
-rw-r--r--tools/testing/selftests/kvm/x86_64/emulator_error_test.c219
-rw-r--r--tools/testing/selftests/kvm/x86_64/evmcs_test.c79
-rw-r--r--tools/testing/selftests/kvm/x86_64/get_cpuid_test.c8
-rw-r--r--tools/testing/selftests/kvm/x86_64/get_msr_index_features.c8
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_clock.c10
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_features.c649
-rw-r--r--tools/testing/selftests/kvm/x86_64/kvm_pv_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/mmu_role_test.c147
-rw-r--r--tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c10
-rw-r--r--tools/testing/selftests/kvm/x86_64/smm_test.c4
-rw-r--r--tools/testing/selftests/kvm/x86_64/sync_regs_test.c7
-rw-r--r--tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c9
-rw-r--r--tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c8
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c8
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c242
-rw-r--r--tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c65
-rw-r--r--tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c2
-rw-r--r--tools/testing/selftests/lib/Makefile2
-rw-r--r--tools/testing/selftests/lib/config1
-rwxr-xr-xtools/testing/selftests/lib/scanf.sh4
-rw-r--r--tools/testing/selftests/mount_setattr/mount_setattr_test.c88
-rw-r--r--tools/testing/selftests/nci/.gitignore1
-rw-r--r--tools/testing/selftests/net/.gitignore1
-rw-r--r--tools/testing/selftests/net/Makefile2
-rw-r--r--tools/testing/selftests/net/config1
-rwxr-xr-xtools/testing/selftests/net/devlink_port_split.py8
-rwxr-xr-xtools/testing/selftests/net/fib_nexthops.sh12
-rwxr-xr-xtools/testing/selftests/net/fib_tests.sh25
-rwxr-xr-xtools/testing/selftests/net/forwarding/custom_multipath_hash.sh364
-rw-r--r--tools/testing/selftests/net/forwarding/devlink_lib.sh32
-rwxr-xr-xtools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh456
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh458
-rwxr-xr-xtools/testing/selftests/net/forwarding/pedit_dsfield.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/pedit_l4port.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/skbedit_priority.sh2
-rwxr-xr-xtools/testing/selftests/net/icmp.sh74
-rwxr-xr-xtools/testing/selftests/net/icmp_redirect.sh8
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_connect.c125
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh83
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh180
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_sockopt.sh4
-rwxr-xr-xtools/testing/selftests/net/mptcp/simult_flows.sh3
-rw-r--r--tools/testing/selftests/net/so_netns_cookie.c61
-rwxr-xr-xtools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh573
-rw-r--r--tools/testing/selftests/net/tls.c87
-rwxr-xr-xtools/testing/selftests/net/udpgro_fwd.sh2
-rwxr-xr-xtools/testing/selftests/net/unicast_extensions.sh17
-rwxr-xr-xtools/testing/selftests/net/veth.sh5
-rw-r--r--tools/testing/selftests/netfilter/Makefile2
-rwxr-xr-xtools/testing/selftests/netfilter/nft_fib.sh221
-rw-r--r--tools/testing/selftests/openat2/openat2_test.c7
-rw-r--r--tools/testing/selftests/perf_events/sigtrap_threads.c14
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/null_syscall.c3
-rw-r--r--tools/testing/selftests/powerpc/nx-gzip/Makefile4
-rw-r--r--tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c17
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/Makefile2
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/ebb.h2
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/regs_access_pmccext_test.c63
-rw-r--r--tools/testing/selftests/powerpc/security/Makefile2
-rwxr-xr-xtools/testing/selftests/powerpc/security/mitigation-patching.sh75
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c1
-rw-r--r--tools/testing/selftests/proc/.gitignore1
-rw-r--r--tools/testing/selftests/rlimits/.gitignore2
-rw-r--r--tools/testing/selftests/rlimits/Makefile6
-rw-r--r--tools/testing/selftests/rlimits/config1
-rw-r--r--tools/testing/selftests/rlimits/rlimits-per-userns.c161
-rw-r--r--tools/testing/selftests/sched/.gitignore1
-rw-r--r--tools/testing/selftests/sched/Makefile14
-rw-r--r--tools/testing/selftests/sched/config1
-rw-r--r--tools/testing/selftests/sched/cs_prctl_test.c338
-rw-r--r--tools/testing/selftests/seccomp/seccomp_benchmark.c10
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c78
-rw-r--r--tools/testing/selftests/tc-testing/plugin-lib/scapyPlugin.py42
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/ct.json45
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json28
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json8
-rw-r--r--tools/testing/selftests/vm/.gitignore3
-rw-r--r--tools/testing/selftests/vm/Makefile5
-rw-r--r--tools/testing/selftests/vm/gup_test.c96
-rw-r--r--tools/testing/selftests/vm/hmm-tests.c158
-rw-r--r--tools/testing/selftests/vm/khugepaged.c4
-rw-r--r--tools/testing/selftests/vm/madv_populate.c342
-rw-r--r--tools/testing/selftests/vm/pkey-x86.h1
-rw-r--r--tools/testing/selftests/vm/protection_keys.c85
-rwxr-xr-xtools/testing/selftests/vm/run_vmtests.sh16
-rw-r--r--tools/testing/selftests/vm/userfaultfd.c1054
-rwxr-xr-xtools/testing/selftests/wireguard/netns.sh1
-rw-r--r--tools/testing/selftests/wireguard/qemu/kernel.config1
-rw-r--r--tools/testing/selftests/x86/syscall_numbering.c491
-rw-r--r--tools/testing/vsock/util.c32
-rw-r--r--tools/testing/vsock/util.h3
-rw-r--r--tools/testing/vsock/vsock_test.c116
-rw-r--r--tools/vm/page_owner_sort.c4
515 files changed, 33222 insertions, 6356 deletions
diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h
index 24223adae150..b3edde68bc3e 100644
--- a/tools/arch/arm64/include/uapi/asm/kvm.h
+++ b/tools/arch/arm64/include/uapi/asm/kvm.h
@@ -184,6 +184,17 @@ struct kvm_vcpu_events {
__u32 reserved[12];
};
+struct kvm_arm_copy_mte_tags {
+ __u64 guest_ipa;
+ __u64 length;
+ void __user *addr;
+ __u64 flags;
+ __u64 reserved[2];
+};
+
+#define KVM_ARM_TAGS_TO_GUEST 0
+#define KVM_ARM_TAGS_FROM_GUEST 1
+
/* If you need to interpret the index values, here is the key: */
#define KVM_REG_ARM_COPROC_MASK 0x000000000FFF0000
#define KVM_REG_ARM_COPROC_SHIFT 16
diff --git a/tools/arch/mips/include/uapi/asm/perf_regs.h b/tools/arch/mips/include/uapi/asm/perf_regs.h
new file mode 100644
index 000000000000..d0f4ecd616cf
--- /dev/null
+++ b/tools/arch/mips/include/uapi/asm/perf_regs.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ASM_MIPS_PERF_REGS_H
+#define _ASM_MIPS_PERF_REGS_H
+
+enum perf_event_mips_regs {
+ PERF_REG_MIPS_PC,
+ PERF_REG_MIPS_R1,
+ PERF_REG_MIPS_R2,
+ PERF_REG_MIPS_R3,
+ PERF_REG_MIPS_R4,
+ PERF_REG_MIPS_R5,
+ PERF_REG_MIPS_R6,
+ PERF_REG_MIPS_R7,
+ PERF_REG_MIPS_R8,
+ PERF_REG_MIPS_R9,
+ PERF_REG_MIPS_R10,
+ PERF_REG_MIPS_R11,
+ PERF_REG_MIPS_R12,
+ PERF_REG_MIPS_R13,
+ PERF_REG_MIPS_R14,
+ PERF_REG_MIPS_R15,
+ PERF_REG_MIPS_R16,
+ PERF_REG_MIPS_R17,
+ PERF_REG_MIPS_R18,
+ PERF_REG_MIPS_R19,
+ PERF_REG_MIPS_R20,
+ PERF_REG_MIPS_R21,
+ PERF_REG_MIPS_R22,
+ PERF_REG_MIPS_R23,
+ PERF_REG_MIPS_R24,
+ PERF_REG_MIPS_R25,
+ PERF_REG_MIPS_R26,
+ PERF_REG_MIPS_R27,
+ PERF_REG_MIPS_R28,
+ PERF_REG_MIPS_R29,
+ PERF_REG_MIPS_R30,
+ PERF_REG_MIPS_R31,
+ PERF_REG_MIPS_MAX = PERF_REG_MIPS_R31 + 1,
+};
+#endif /* _ASM_MIPS_PERF_REGS_H */
diff --git a/tools/arch/x86/include/asm/asm.h b/tools/arch/x86/include/asm/asm.h
new file mode 100644
index 000000000000..3ad3da9a7d97
--- /dev/null
+++ b/tools/arch/x86/include/asm/asm.h
@@ -0,0 +1,193 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_ASM_H
+#define _ASM_X86_ASM_H
+
+#ifdef __ASSEMBLY__
+# define __ASM_FORM(x, ...) x,## __VA_ARGS__
+# define __ASM_FORM_RAW(x, ...) x,## __VA_ARGS__
+# define __ASM_FORM_COMMA(x, ...) x,## __VA_ARGS__,
+#else
+#include <linux/stringify.h>
+# define __ASM_FORM(x, ...) " " __stringify(x,##__VA_ARGS__) " "
+# define __ASM_FORM_RAW(x, ...) __stringify(x,##__VA_ARGS__)
+# define __ASM_FORM_COMMA(x, ...) " " __stringify(x,##__VA_ARGS__) ","
+#endif
+
+#define _ASM_BYTES(x, ...) __ASM_FORM(.byte x,##__VA_ARGS__ ;)
+
+#ifndef __x86_64__
+/* 32 bit */
+# define __ASM_SEL(a,b) __ASM_FORM(a)
+# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a)
+#else
+/* 64 bit */
+# define __ASM_SEL(a,b) __ASM_FORM(b)
+# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(b)
+#endif
+
+#define __ASM_SIZE(inst, ...) __ASM_SEL(inst##l##__VA_ARGS__, \
+ inst##q##__VA_ARGS__)
+#define __ASM_REG(reg) __ASM_SEL_RAW(e##reg, r##reg)
+
+#define _ASM_PTR __ASM_SEL(.long, .quad)
+#define _ASM_ALIGN __ASM_SEL(.balign 4, .balign 8)
+
+#define _ASM_MOV __ASM_SIZE(mov)
+#define _ASM_INC __ASM_SIZE(inc)
+#define _ASM_DEC __ASM_SIZE(dec)
+#define _ASM_ADD __ASM_SIZE(add)
+#define _ASM_SUB __ASM_SIZE(sub)
+#define _ASM_XADD __ASM_SIZE(xadd)
+#define _ASM_MUL __ASM_SIZE(mul)
+
+#define _ASM_AX __ASM_REG(ax)
+#define _ASM_BX __ASM_REG(bx)
+#define _ASM_CX __ASM_REG(cx)
+#define _ASM_DX __ASM_REG(dx)
+#define _ASM_SP __ASM_REG(sp)
+#define _ASM_BP __ASM_REG(bp)
+#define _ASM_SI __ASM_REG(si)
+#define _ASM_DI __ASM_REG(di)
+
+#ifndef __x86_64__
+/* 32 bit */
+
+#define _ASM_ARG1 _ASM_AX
+#define _ASM_ARG2 _ASM_DX
+#define _ASM_ARG3 _ASM_CX
+
+#define _ASM_ARG1L eax
+#define _ASM_ARG2L edx
+#define _ASM_ARG3L ecx
+
+#define _ASM_ARG1W ax
+#define _ASM_ARG2W dx
+#define _ASM_ARG3W cx
+
+#define _ASM_ARG1B al
+#define _ASM_ARG2B dl
+#define _ASM_ARG3B cl
+
+#else
+/* 64 bit */
+
+#define _ASM_ARG1 _ASM_DI
+#define _ASM_ARG2 _ASM_SI
+#define _ASM_ARG3 _ASM_DX
+#define _ASM_ARG4 _ASM_CX
+#define _ASM_ARG5 r8
+#define _ASM_ARG6 r9
+
+#define _ASM_ARG1Q rdi
+#define _ASM_ARG2Q rsi
+#define _ASM_ARG3Q rdx
+#define _ASM_ARG4Q rcx
+#define _ASM_ARG5Q r8
+#define _ASM_ARG6Q r9
+
+#define _ASM_ARG1L edi
+#define _ASM_ARG2L esi
+#define _ASM_ARG3L edx
+#define _ASM_ARG4L ecx
+#define _ASM_ARG5L r8d
+#define _ASM_ARG6L r9d
+
+#define _ASM_ARG1W di
+#define _ASM_ARG2W si
+#define _ASM_ARG3W dx
+#define _ASM_ARG4W cx
+#define _ASM_ARG5W r8w
+#define _ASM_ARG6W r9w
+
+#define _ASM_ARG1B dil
+#define _ASM_ARG2B sil
+#define _ASM_ARG3B dl
+#define _ASM_ARG4B cl
+#define _ASM_ARG5B r8b
+#define _ASM_ARG6B r9b
+
+#endif
+
+/*
+ * Macros to generate condition code outputs from inline assembly,
+ * The output operand must be type "bool".
+ */
+#ifdef __GCC_ASM_FLAG_OUTPUTS__
+# define CC_SET(c) "\n\t/* output condition code " #c "*/\n"
+# define CC_OUT(c) "=@cc" #c
+#else
+# define CC_SET(c) "\n\tset" #c " %[_cc_" #c "]\n"
+# define CC_OUT(c) [_cc_ ## c] "=qm"
+#endif
+
+#ifdef __KERNEL__
+
+/* Exception table entry */
+#ifdef __ASSEMBLY__
+# define _ASM_EXTABLE_HANDLE(from, to, handler) \
+ .pushsection "__ex_table","a" ; \
+ .balign 4 ; \
+ .long (from) - . ; \
+ .long (to) - . ; \
+ .long (handler) - . ; \
+ .popsection
+
+# define _ASM_EXTABLE(from, to) \
+ _ASM_EXTABLE_HANDLE(from, to, ex_handler_default)
+
+# define _ASM_EXTABLE_UA(from, to) \
+ _ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess)
+
+# define _ASM_EXTABLE_CPY(from, to) \
+ _ASM_EXTABLE_HANDLE(from, to, ex_handler_copy)
+
+# define _ASM_EXTABLE_FAULT(from, to) \
+ _ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
+
+# ifdef CONFIG_KPROBES
+# define _ASM_NOKPROBE(entry) \
+ .pushsection "_kprobe_blacklist","aw" ; \
+ _ASM_ALIGN ; \
+ _ASM_PTR (entry); \
+ .popsection
+# else
+# define _ASM_NOKPROBE(entry)
+# endif
+
+#else /* ! __ASSEMBLY__ */
+# define _EXPAND_EXTABLE_HANDLE(x) #x
+# define _ASM_EXTABLE_HANDLE(from, to, handler) \
+ " .pushsection \"__ex_table\",\"a\"\n" \
+ " .balign 4\n" \
+ " .long (" #from ") - .\n" \
+ " .long (" #to ") - .\n" \
+ " .long (" _EXPAND_EXTABLE_HANDLE(handler) ") - .\n" \
+ " .popsection\n"
+
+# define _ASM_EXTABLE(from, to) \
+ _ASM_EXTABLE_HANDLE(from, to, ex_handler_default)
+
+# define _ASM_EXTABLE_UA(from, to) \
+ _ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess)
+
+# define _ASM_EXTABLE_CPY(from, to) \
+ _ASM_EXTABLE_HANDLE(from, to, ex_handler_copy)
+
+# define _ASM_EXTABLE_FAULT(from, to) \
+ _ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
+
+/* For C file, we already have NOKPROBE_SYMBOL macro */
+
+/*
+ * This output constraint should be used for any inline asm which has a "call"
+ * instruction. Otherwise the asm may be inserted before the frame pointer
+ * gets set up by the containing function. If you forget to do this, objtool
+ * may print a "call without frame pointer save/setup" warning.
+ */
+register unsigned long current_stack_pointer asm(_ASM_SP);
+#define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer)
+#endif /* __ASSEMBLY__ */
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_X86_ASM_H */
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index ac37830ae941..d0ce5cfd3ac1 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -108,7 +108,7 @@
#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* Extended APICID (8 bits) */
#define X86_FEATURE_AMD_DCM ( 3*32+27) /* AMD multi-node processor */
#define X86_FEATURE_APERFMPERF ( 3*32+28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */
-/* free ( 3*32+29) */
+#define X86_FEATURE_RAPL ( 3*32+29) /* AMD/Hygon RAPL interface */
#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */
@@ -378,6 +378,7 @@
#define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* AVX-512 Intersect for D/Q */
#define X86_FEATURE_SRBDS_CTRL (18*32+ 9) /* "" SRBDS mitigation MSR available */
#define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */
+#define X86_FEATURE_RTM_ALWAYS_ABORT (18*32+11) /* "" RTM transaction always aborts */
#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */
#define X86_FEATURE_SERIALIZE (18*32+14) /* SERIALIZE instruction */
#define X86_FEATURE_HYBRID_CPU (18*32+15) /* "" This part has CPUs of more than one type */
diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h
index b7dd944dc867..8f28fafa98b3 100644
--- a/tools/arch/x86/include/asm/disabled-features.h
+++ b/tools/arch/x86/include/asm/disabled-features.h
@@ -56,11 +56,8 @@
# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31))
#endif
-#ifdef CONFIG_IOMMU_SUPPORT
-# define DISABLE_ENQCMD 0
-#else
-# define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31))
-#endif
+/* Force disable because it's broken beyond repair */
+#define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31))
#ifdef CONFIG_X86_SGX
# define DISABLE_SGX 0
diff --git a/tools/arch/x86/include/asm/irq_vectors.h b/tools/arch/x86/include/asm/irq_vectors.h
index 889f8b1b5b7f..43dcb9284208 100644
--- a/tools/arch/x86/include/asm/irq_vectors.h
+++ b/tools/arch/x86/include/asm/irq_vectors.h
@@ -26,8 +26,8 @@
* This file enumerates the exact layout of them:
*/
+/* This is used as an interrupt vector when programming the APIC. */
#define NMI_VECTOR 0x02
-#define MCE_VECTOR 0x12
/*
* IDT vectors usable for external interrupt sources start at 0x20.
@@ -84,7 +84,7 @@
*/
#define IRQ_WORK_VECTOR 0xf6
-#define UV_BAU_MESSAGE 0xf5
+/* 0xf5 - unused, was UV_BAU_MESSAGE */
#define DEFERRED_ERROR_VECTOR 0xf4
/* Vector on which hypervisor callbacks will be delivered */
@@ -114,6 +114,9 @@
#define FIRST_SYSTEM_VECTOR NR_VECTORS
#endif
+#define NR_EXTERNAL_VECTORS (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
+#define NR_SYSTEM_VECTORS (NR_VECTORS - FIRST_SYSTEM_VECTOR)
+
/*
* Size the maximum number of interrupts.
*
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index 211ba3375ee9..a7c413432b33 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -772,6 +772,10 @@
#define MSR_TFA_RTM_FORCE_ABORT_BIT 0
#define MSR_TFA_RTM_FORCE_ABORT BIT_ULL(MSR_TFA_RTM_FORCE_ABORT_BIT)
+#define MSR_TFA_TSX_CPUID_CLEAR_BIT 1
+#define MSR_TFA_TSX_CPUID_CLEAR BIT_ULL(MSR_TFA_TSX_CPUID_CLEAR_BIT)
+#define MSR_TFA_SDV_ENABLE_RTM_BIT 2
+#define MSR_TFA_SDV_ENABLE_RTM BIT_ULL(MSR_TFA_SDV_ENABLE_RTM_BIT)
/* P4/Xeon+ specific */
#define MSR_IA32_MCG_EAX 0x00000180
diff --git a/tools/arch/x86/include/asm/nops.h b/tools/arch/x86/include/asm/nops.h
index c1e5e818ba16..c5573eaa5bb9 100644
--- a/tools/arch/x86/include/asm/nops.h
+++ b/tools/arch/x86/include/asm/nops.h
@@ -2,6 +2,8 @@
#ifndef _ASM_X86_NOPS_H
#define _ASM_X86_NOPS_H
+#include <asm/asm.h>
+
/*
* Define nops for use with alternative() and for tracing.
*/
@@ -57,20 +59,14 @@
#endif /* CONFIG_64BIT */
-#ifdef __ASSEMBLY__
-#define _ASM_MK_NOP(x) .byte x
-#else
-#define _ASM_MK_NOP(x) ".byte " __stringify(x) "\n"
-#endif
-
-#define ASM_NOP1 _ASM_MK_NOP(BYTES_NOP1)
-#define ASM_NOP2 _ASM_MK_NOP(BYTES_NOP2)
-#define ASM_NOP3 _ASM_MK_NOP(BYTES_NOP3)
-#define ASM_NOP4 _ASM_MK_NOP(BYTES_NOP4)
-#define ASM_NOP5 _ASM_MK_NOP(BYTES_NOP5)
-#define ASM_NOP6 _ASM_MK_NOP(BYTES_NOP6)
-#define ASM_NOP7 _ASM_MK_NOP(BYTES_NOP7)
-#define ASM_NOP8 _ASM_MK_NOP(BYTES_NOP8)
+#define ASM_NOP1 _ASM_BYTES(BYTES_NOP1)
+#define ASM_NOP2 _ASM_BYTES(BYTES_NOP2)
+#define ASM_NOP3 _ASM_BYTES(BYTES_NOP3)
+#define ASM_NOP4 _ASM_BYTES(BYTES_NOP4)
+#define ASM_NOP5 _ASM_BYTES(BYTES_NOP5)
+#define ASM_NOP6 _ASM_BYTES(BYTES_NOP6)
+#define ASM_NOP7 _ASM_BYTES(BYTES_NOP7)
+#define ASM_NOP8 _ASM_BYTES(BYTES_NOP8)
#define ASM_NOP_MAX 8
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index 5a3022c8af82..a6c327f8ad9e 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -159,6 +159,19 @@ struct kvm_sregs {
__u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64];
};
+struct kvm_sregs2 {
+ /* out (KVM_GET_SREGS2) / in (KVM_SET_SREGS2) */
+ struct kvm_segment cs, ds, es, fs, gs, ss;
+ struct kvm_segment tr, ldt;
+ struct kvm_dtable gdt, idt;
+ __u64 cr0, cr2, cr3, cr4, cr8;
+ __u64 efer;
+ __u64 apic_base;
+ __u64 flags;
+ __u64 pdptrs[4];
+};
+#define KVM_SREGS2_FLAGS_PDPTRS_VALID 1
+
/* for KVM_GET_FPU and KVM_SET_FPU */
struct kvm_fpu {
__u8 fpr[8][16];
@@ -437,6 +450,8 @@ struct kvm_vmx_nested_state_hdr {
__u16 flags;
} smm;
+ __u16 pad;
+
__u32 flags;
__u64 preemption_timer_deadline;
};
diff --git a/tools/arch/x86/include/uapi/asm/svm.h b/tools/arch/x86/include/uapi/asm/svm.h
index 554f75fe013c..efa969325ede 100644
--- a/tools/arch/x86/include/uapi/asm/svm.h
+++ b/tools/arch/x86/include/uapi/asm/svm.h
@@ -110,6 +110,9 @@
#define SVM_VMGEXIT_GET_AP_JUMP_TABLE 1
#define SVM_VMGEXIT_UNSUPPORTED_EVENT 0x8000ffff
+/* Exit code reserved for hypervisor/software use */
+#define SVM_EXIT_SW 0xf0000000
+
#define SVM_EXIT_ERR -1
#define SVM_EXIT_REASONS \
diff --git a/tools/bootconfig/include/linux/bootconfig.h b/tools/bootconfig/include/linux/bootconfig.h
index 078cbd2ba651..de7f30f99af3 100644
--- a/tools/bootconfig/include/linux/bootconfig.h
+++ b/tools/bootconfig/include/linux/bootconfig.h
@@ -4,4 +4,8 @@
#include "../../../../include/linux/bootconfig.h"
+#ifndef fallthrough
+# define fallthrough
+#endif
+
#endif
diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c
index 7362bef1a368..6cd6080cac04 100644
--- a/tools/bootconfig/main.c
+++ b/tools/bootconfig/main.c
@@ -399,6 +399,7 @@ static int apply_xbc(const char *path, const char *xbc_path)
}
/* TODO: Ensure the @path is initramfs/initrd image */
if (fstat(fd, &stat) < 0) {
+ ret = -errno;
pr_err("Failed to get the size of %s\n", path);
goto out;
}
diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
index 790944c35602..baee8591ac76 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
@@ -30,7 +30,8 @@ CGROUP COMMANDS
| *ATTACH_TYPE* := { **ingress** | **egress** | **sock_create** | **sock_ops** | **device** |
| **bind4** | **bind6** | **post_bind4** | **post_bind6** | **connect4** | **connect6** |
| **getpeername4** | **getpeername6** | **getsockname4** | **getsockname6** | **sendmsg4** |
-| **sendmsg6** | **recvmsg4** | **recvmsg6** | **sysctl** | **getsockopt** | **setsockopt** }
+| **sendmsg6** | **recvmsg4** | **recvmsg6** | **sysctl** | **getsockopt** | **setsockopt** |
+| **sock_release** }
| *ATTACH_FLAGS* := { **multi** | **override** }
DESCRIPTION
@@ -106,6 +107,7 @@ DESCRIPTION
**getpeername6** call to getpeername(2) for an inet6 socket (since 5.8);
**getsockname4** call to getsockname(2) for an inet4 socket (since 5.8);
**getsockname6** call to getsockname(2) for an inet6 socket (since 5.8).
+ **sock_release** closing an userspace inet socket (since 5.9).
**bpftool cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG*
Detach *PROG* from the cgroup *CGROUP* and attach type
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index 358c7309d419..fe1b38e7e887 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -44,7 +44,7 @@ PROG COMMANDS
| **cgroup/connect4** | **cgroup/connect6** | **cgroup/getpeername4** | **cgroup/getpeername6** |
| **cgroup/getsockname4** | **cgroup/getsockname6** | **cgroup/sendmsg4** | **cgroup/sendmsg6** |
| **cgroup/recvmsg4** | **cgroup/recvmsg6** | **cgroup/sysctl** |
-| **cgroup/getsockopt** | **cgroup/setsockopt** |
+| **cgroup/getsockopt** | **cgroup/setsockopt** | **cgroup/sock_release** |
| **struct_ops** | **fentry** | **fexit** | **freplace** | **sk_lookup**
| }
| *ATTACH_TYPE* := {
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index b3073ae84018..d73232be1e99 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -136,7 +136,7 @@ endif
BPFTOOL_BOOTSTRAP := $(BOOTSTRAP_OUTPUT)bpftool
-BOOTSTRAP_OBJS = $(addprefix $(BOOTSTRAP_OUTPUT),main.o common.o json_writer.o gen.o btf.o)
+BOOTSTRAP_OBJS = $(addprefix $(BOOTSTRAP_OUTPUT),main.o common.o json_writer.o gen.o btf.o xlated_dumper.o btf_dumper.o disasm.o)
OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o
VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \
@@ -180,6 +180,9 @@ endif
CFLAGS += $(if $(BUILD_BPF_SKELS),,-DBPFTOOL_WITHOUT_SKELETONS)
+$(BOOTSTRAP_OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c
+ $(QUIET_CC)$(HOSTCC) $(CFLAGS) -c -MMD -o $@ $<
+
$(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c
$(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $<
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index d67518bcbd44..cc33c5824a2f 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -478,7 +478,7 @@ _bpftool()
cgroup/recvmsg4 cgroup/recvmsg6 \
cgroup/post_bind4 cgroup/post_bind6 \
cgroup/sysctl cgroup/getsockopt \
- cgroup/setsockopt struct_ops \
+ cgroup/setsockopt cgroup/sock_release struct_ops \
fentry fexit freplace sk_lookup" -- \
"$cur" ) )
return 0
@@ -1021,7 +1021,7 @@ _bpftool()
device bind4 bind6 post_bind4 post_bind6 connect4 connect6 \
getpeername4 getpeername6 getsockname4 getsockname6 \
sendmsg4 sendmsg6 recvmsg4 recvmsg6 sysctl getsockopt \
- setsockopt'
+ setsockopt sock_release'
local ATTACH_FLAGS='multi override'
local PROG_TYPE='id pinned tag name'
case $prev in
@@ -1032,7 +1032,7 @@ _bpftool()
ingress|egress|sock_create|sock_ops|device|bind4|bind6|\
post_bind4|post_bind6|connect4|connect6|getpeername4|\
getpeername6|getsockname4|getsockname6|sendmsg4|sendmsg6|\
- recvmsg4|recvmsg6|sysctl|getsockopt|setsockopt)
+ recvmsg4|recvmsg6|sysctl|getsockopt|setsockopt|sock_release)
COMPREPLY=( $( compgen -W "$PROG_TYPE" -- \
"$cur" ) )
return 0
diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
index d901cc1b904a..6e53b1d393f4 100644
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -28,7 +28,8 @@
" connect6 | getpeername4 | getpeername6 |\n" \
" getsockname4 | getsockname6 | sendmsg4 |\n" \
" sendmsg6 | recvmsg4 | recvmsg6 |\n" \
- " sysctl | getsockopt | setsockopt }"
+ " sysctl | getsockopt | setsockopt |\n" \
+ " sock_release }"
static unsigned int query_flags;
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index 31ade77f5ef8..1d71ff8c52fa 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -18,6 +18,7 @@
#include <sys/stat.h>
#include <sys/mman.h>
#include <bpf/btf.h>
+#include <bpf/bpf_gen_internal.h>
#include "json_writer.h"
#include "main.h"
@@ -106,8 +107,10 @@ static int codegen_datasec_def(struct bpf_object *obj,
if (strcmp(sec_name, ".data") == 0) {
sec_ident = "data";
+ strip_mods = true;
} else if (strcmp(sec_name, ".bss") == 0) {
sec_ident = "bss";
+ strip_mods = true;
} else if (strcmp(sec_name, ".rodata") == 0) {
sec_ident = "rodata";
strip_mods = true;
@@ -129,6 +132,10 @@ static int codegen_datasec_def(struct bpf_object *obj,
int need_off = sec_var->offset, align_off, align;
__u32 var_type_id = var->type;
+ /* static variables are not exposed through BPF skeleton */
+ if (btf_var(var)->linkage == BTF_VAR_STATIC)
+ continue;
+
if (off > need_off) {
p_err("Something is wrong for %s's variable #%d: need offset %d, already at %d.\n",
sec_name, i, need_off, off);
@@ -268,6 +275,327 @@ static void codegen(const char *template, ...)
free(s);
}
+static void print_hex(const char *data, int data_sz)
+{
+ int i, len;
+
+ for (i = 0, len = 0; i < data_sz; i++) {
+ int w = data[i] ? 4 : 2;
+
+ len += w;
+ if (len > 78) {
+ printf("\\\n");
+ len = w;
+ }
+ if (!data[i])
+ printf("\\0");
+ else
+ printf("\\x%02x", (unsigned char)data[i]);
+ }
+}
+
+static size_t bpf_map_mmap_sz(const struct bpf_map *map)
+{
+ long page_sz = sysconf(_SC_PAGE_SIZE);
+ size_t map_sz;
+
+ map_sz = (size_t)roundup(bpf_map__value_size(map), 8) * bpf_map__max_entries(map);
+ map_sz = roundup(map_sz, page_sz);
+ return map_sz;
+}
+
+static void codegen_attach_detach(struct bpf_object *obj, const char *obj_name)
+{
+ struct bpf_program *prog;
+
+ bpf_object__for_each_program(prog, obj) {
+ const char *tp_name;
+
+ codegen("\
+ \n\
+ \n\
+ static inline int \n\
+ %1$s__%2$s__attach(struct %1$s *skel) \n\
+ { \n\
+ int prog_fd = skel->progs.%2$s.prog_fd; \n\
+ ", obj_name, bpf_program__name(prog));
+
+ switch (bpf_program__get_type(prog)) {
+ case BPF_PROG_TYPE_RAW_TRACEPOINT:
+ tp_name = strchr(bpf_program__section_name(prog), '/') + 1;
+ printf("\tint fd = bpf_raw_tracepoint_open(\"%s\", prog_fd);\n", tp_name);
+ break;
+ case BPF_PROG_TYPE_TRACING:
+ printf("\tint fd = bpf_raw_tracepoint_open(NULL, prog_fd);\n");
+ break;
+ default:
+ printf("\tint fd = ((void)prog_fd, 0); /* auto-attach not supported */\n");
+ break;
+ }
+ codegen("\
+ \n\
+ \n\
+ if (fd > 0) \n\
+ skel->links.%1$s_fd = fd; \n\
+ return fd; \n\
+ } \n\
+ ", bpf_program__name(prog));
+ }
+
+ codegen("\
+ \n\
+ \n\
+ static inline int \n\
+ %1$s__attach(struct %1$s *skel) \n\
+ { \n\
+ int ret = 0; \n\
+ \n\
+ ", obj_name);
+
+ bpf_object__for_each_program(prog, obj) {
+ codegen("\
+ \n\
+ ret = ret < 0 ? ret : %1$s__%2$s__attach(skel); \n\
+ ", obj_name, bpf_program__name(prog));
+ }
+
+ codegen("\
+ \n\
+ return ret < 0 ? ret : 0; \n\
+ } \n\
+ \n\
+ static inline void \n\
+ %1$s__detach(struct %1$s *skel) \n\
+ { \n\
+ ", obj_name);
+
+ bpf_object__for_each_program(prog, obj) {
+ codegen("\
+ \n\
+ skel_closenz(skel->links.%1$s_fd); \n\
+ ", bpf_program__name(prog));
+ }
+
+ codegen("\
+ \n\
+ } \n\
+ ");
+}
+
+static void codegen_destroy(struct bpf_object *obj, const char *obj_name)
+{
+ struct bpf_program *prog;
+ struct bpf_map *map;
+
+ codegen("\
+ \n\
+ static void \n\
+ %1$s__destroy(struct %1$s *skel) \n\
+ { \n\
+ if (!skel) \n\
+ return; \n\
+ %1$s__detach(skel); \n\
+ ",
+ obj_name);
+
+ bpf_object__for_each_program(prog, obj) {
+ codegen("\
+ \n\
+ skel_closenz(skel->progs.%1$s.prog_fd); \n\
+ ", bpf_program__name(prog));
+ }
+
+ bpf_object__for_each_map(map, obj) {
+ const char * ident;
+
+ ident = get_map_ident(map);
+ if (!ident)
+ continue;
+ if (bpf_map__is_internal(map) &&
+ (bpf_map__def(map)->map_flags & BPF_F_MMAPABLE))
+ printf("\tmunmap(skel->%1$s, %2$zd);\n",
+ ident, bpf_map_mmap_sz(map));
+ codegen("\
+ \n\
+ skel_closenz(skel->maps.%1$s.map_fd); \n\
+ ", ident);
+ }
+ codegen("\
+ \n\
+ free(skel); \n\
+ } \n\
+ ",
+ obj_name);
+}
+
+static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *header_guard)
+{
+ struct bpf_object_load_attr load_attr = {};
+ DECLARE_LIBBPF_OPTS(gen_loader_opts, opts);
+ struct bpf_map *map;
+ int err = 0;
+
+ err = bpf_object__gen_loader(obj, &opts);
+ if (err)
+ return err;
+
+ load_attr.obj = obj;
+ if (verifier_logs)
+ /* log_level1 + log_level2 + stats, but not stable UAPI */
+ load_attr.log_level = 1 + 2 + 4;
+
+ err = bpf_object__load_xattr(&load_attr);
+ if (err) {
+ p_err("failed to load object file");
+ goto out;
+ }
+ /* If there was no error during load then gen_loader_opts
+ * are populated with the loader program.
+ */
+
+ /* finish generating 'struct skel' */
+ codegen("\
+ \n\
+ }; \n\
+ ", obj_name);
+
+
+ codegen_attach_detach(obj, obj_name);
+
+ codegen_destroy(obj, obj_name);
+
+ codegen("\
+ \n\
+ static inline struct %1$s * \n\
+ %1$s__open(void) \n\
+ { \n\
+ struct %1$s *skel; \n\
+ \n\
+ skel = calloc(sizeof(*skel), 1); \n\
+ if (!skel) \n\
+ goto cleanup; \n\
+ skel->ctx.sz = (void *)&skel->links - (void *)skel; \n\
+ ",
+ obj_name, opts.data_sz);
+ bpf_object__for_each_map(map, obj) {
+ const char *ident;
+ const void *mmap_data = NULL;
+ size_t mmap_size = 0;
+
+ ident = get_map_ident(map);
+ if (!ident)
+ continue;
+
+ if (!bpf_map__is_internal(map) ||
+ !(bpf_map__def(map)->map_flags & BPF_F_MMAPABLE))
+ continue;
+
+ codegen("\
+ \n\
+ skel->%1$s = \n\
+ mmap(NULL, %2$zd, PROT_READ | PROT_WRITE,\n\
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0); \n\
+ if (skel->%1$s == (void *) -1) \n\
+ goto cleanup; \n\
+ memcpy(skel->%1$s, (void *)\"\\ \n\
+ ", ident, bpf_map_mmap_sz(map));
+ mmap_data = bpf_map__initial_value(map, &mmap_size);
+ print_hex(mmap_data, mmap_size);
+ printf("\", %2$zd);\n"
+ "\tskel->maps.%1$s.initial_value = (__u64)(long)skel->%1$s;\n",
+ ident, mmap_size);
+ }
+ codegen("\
+ \n\
+ return skel; \n\
+ cleanup: \n\
+ %1$s__destroy(skel); \n\
+ return NULL; \n\
+ } \n\
+ \n\
+ static inline int \n\
+ %1$s__load(struct %1$s *skel) \n\
+ { \n\
+ struct bpf_load_and_run_opts opts = {}; \n\
+ int err; \n\
+ \n\
+ opts.ctx = (struct bpf_loader_ctx *)skel; \n\
+ opts.data_sz = %2$d; \n\
+ opts.data = (void *)\"\\ \n\
+ ",
+ obj_name, opts.data_sz);
+ print_hex(opts.data, opts.data_sz);
+ codegen("\
+ \n\
+ \"; \n\
+ ");
+
+ codegen("\
+ \n\
+ opts.insns_sz = %d; \n\
+ opts.insns = (void *)\"\\ \n\
+ ",
+ opts.insns_sz);
+ print_hex(opts.insns, opts.insns_sz);
+ codegen("\
+ \n\
+ \"; \n\
+ err = bpf_load_and_run(&opts); \n\
+ if (err < 0) \n\
+ return err; \n\
+ ", obj_name);
+ bpf_object__for_each_map(map, obj) {
+ const char *ident, *mmap_flags;
+
+ ident = get_map_ident(map);
+ if (!ident)
+ continue;
+
+ if (!bpf_map__is_internal(map) ||
+ !(bpf_map__def(map)->map_flags & BPF_F_MMAPABLE))
+ continue;
+ if (bpf_map__def(map)->map_flags & BPF_F_RDONLY_PROG)
+ mmap_flags = "PROT_READ";
+ else
+ mmap_flags = "PROT_READ | PROT_WRITE";
+
+ printf("\tskel->%1$s =\n"
+ "\t\tmmap(skel->%1$s, %2$zd, %3$s, MAP_SHARED | MAP_FIXED,\n"
+ "\t\t\tskel->maps.%1$s.map_fd, 0);\n",
+ ident, bpf_map_mmap_sz(map), mmap_flags);
+ }
+ codegen("\
+ \n\
+ return 0; \n\
+ } \n\
+ \n\
+ static inline struct %1$s * \n\
+ %1$s__open_and_load(void) \n\
+ { \n\
+ struct %1$s *skel; \n\
+ \n\
+ skel = %1$s__open(); \n\
+ if (!skel) \n\
+ return NULL; \n\
+ if (%1$s__load(skel)) { \n\
+ %1$s__destroy(skel); \n\
+ return NULL; \n\
+ } \n\
+ return skel; \n\
+ } \n\
+ ", obj_name);
+
+ codegen("\
+ \n\
+ \n\
+ #endif /* %s */ \n\
+ ",
+ header_guard);
+ err = 0;
+out:
+ return err;
+}
+
static int do_skeleton(int argc, char **argv)
{
char header_guard[MAX_OBJ_NAME_LEN + sizeof("__SKEL_H__")];
@@ -277,7 +605,7 @@ static int do_skeleton(int argc, char **argv)
struct bpf_object *obj = NULL;
const char *file, *ident;
struct bpf_program *prog;
- int fd, len, err = -1;
+ int fd, err = -1;
struct bpf_map *map;
struct btf *btf;
struct stat st;
@@ -359,7 +687,25 @@ static int do_skeleton(int argc, char **argv)
}
get_header_guard(header_guard, obj_name);
- codegen("\
+ if (use_loader) {
+ codegen("\
+ \n\
+ /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ \n\
+ /* THIS FILE IS AUTOGENERATED! */ \n\
+ #ifndef %2$s \n\
+ #define %2$s \n\
+ \n\
+ #include <stdlib.h> \n\
+ #include <bpf/bpf.h> \n\
+ #include <bpf/skel_internal.h> \n\
+ \n\
+ struct %1$s { \n\
+ struct bpf_loader_ctx ctx; \n\
+ ",
+ obj_name, header_guard
+ );
+ } else {
+ codegen("\
\n\
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ \n\
\n\
@@ -367,6 +713,7 @@ static int do_skeleton(int argc, char **argv)
#ifndef %2$s \n\
#define %2$s \n\
\n\
+ #include <errno.h> \n\
#include <stdlib.h> \n\
#include <bpf/libbpf.h> \n\
\n\
@@ -375,7 +722,8 @@ static int do_skeleton(int argc, char **argv)
struct bpf_object *obj; \n\
",
obj_name, header_guard
- );
+ );
+ }
if (map_cnt) {
printf("\tstruct {\n");
@@ -383,7 +731,10 @@ static int do_skeleton(int argc, char **argv)
ident = get_map_ident(map);
if (!ident)
continue;
- printf("\t\tstruct bpf_map *%s;\n", ident);
+ if (use_loader)
+ printf("\t\tstruct bpf_map_desc %s;\n", ident);
+ else
+ printf("\t\tstruct bpf_map *%s;\n", ident);
}
printf("\t} maps;\n");
}
@@ -391,14 +742,22 @@ static int do_skeleton(int argc, char **argv)
if (prog_cnt) {
printf("\tstruct {\n");
bpf_object__for_each_program(prog, obj) {
- printf("\t\tstruct bpf_program *%s;\n",
- bpf_program__name(prog));
+ if (use_loader)
+ printf("\t\tstruct bpf_prog_desc %s;\n",
+ bpf_program__name(prog));
+ else
+ printf("\t\tstruct bpf_program *%s;\n",
+ bpf_program__name(prog));
}
printf("\t} progs;\n");
printf("\tstruct {\n");
bpf_object__for_each_program(prog, obj) {
- printf("\t\tstruct bpf_link *%s;\n",
- bpf_program__name(prog));
+ if (use_loader)
+ printf("\t\tint %s_fd;\n",
+ bpf_program__name(prog));
+ else
+ printf("\t\tstruct bpf_link *%s;\n",
+ bpf_program__name(prog));
}
printf("\t} links;\n");
}
@@ -409,6 +768,10 @@ static int do_skeleton(int argc, char **argv)
if (err)
goto out;
}
+ if (use_loader) {
+ err = gen_trace(obj, obj_name, header_guard);
+ goto out;
+ }
codegen("\
\n\
@@ -431,18 +794,23 @@ static int do_skeleton(int argc, char **argv)
%1$s__open_opts(const struct bpf_object_open_opts *opts) \n\
{ \n\
struct %1$s *obj; \n\
+ int err; \n\
\n\
obj = (struct %1$s *)calloc(1, sizeof(*obj)); \n\
- if (!obj) \n\
+ if (!obj) { \n\
+ errno = ENOMEM; \n\
return NULL; \n\
- if (%1$s__create_skeleton(obj)) \n\
- goto err; \n\
- if (bpf_object__open_skeleton(obj->skeleton, opts)) \n\
- goto err; \n\
+ } \n\
+ \n\
+ err = %1$s__create_skeleton(obj); \n\
+ err = err ?: bpf_object__open_skeleton(obj->skeleton, opts);\n\
+ if (err) \n\
+ goto err_out; \n\
\n\
return obj; \n\
- err: \n\
+ err_out: \n\
%1$s__destroy(obj); \n\
+ errno = -err; \n\
return NULL; \n\
} \n\
\n\
@@ -462,12 +830,15 @@ static int do_skeleton(int argc, char **argv)
%1$s__open_and_load(void) \n\
{ \n\
struct %1$s *obj; \n\
+ int err; \n\
\n\
obj = %1$s__open(); \n\
if (!obj) \n\
return NULL; \n\
- if (%1$s__load(obj)) { \n\
+ err = %1$s__load(obj); \n\
+ if (err) { \n\
%1$s__destroy(obj); \n\
+ errno = -err; \n\
return NULL; \n\
} \n\
return obj; \n\
@@ -498,7 +869,7 @@ static int do_skeleton(int argc, char **argv)
\n\
s = (struct bpf_object_skeleton *)calloc(1, sizeof(*s));\n\
if (!s) \n\
- return -1; \n\
+ goto err; \n\
obj->skeleton = s; \n\
\n\
s->sz = sizeof(*s); \n\
@@ -578,19 +949,7 @@ static int do_skeleton(int argc, char **argv)
file_sz);
/* embed contents of BPF object file */
- for (i = 0, len = 0; i < file_sz; i++) {
- int w = obj_data[i] ? 4 : 2;
-
- len += w;
- if (len > 78) {
- printf("\\\n");
- len = w;
- }
- if (!obj_data[i])
- printf("\\0");
- else
- printf("\\x%02x", (unsigned char)obj_data[i]);
- }
+ print_hex(obj_data, file_sz);
codegen("\
\n\
@@ -599,7 +958,7 @@ static int do_skeleton(int argc, char **argv)
return 0; \n\
err: \n\
bpf_object__destroy_skeleton(s); \n\
- return -1; \n\
+ return -ENOMEM; \n\
} \n\
\n\
#endif /* %s */ \n\
@@ -636,7 +995,7 @@ static int do_object(int argc, char **argv)
while (argc) {
file = GET_ARG();
- err = bpf_linker__add_file(linker, file);
+ err = bpf_linker__add_file(linker, file, NULL);
if (err) {
p_err("failed to link '%s': %s (%d)", file, strerror(err), err);
goto out;
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index d9afb730136a..3ddfd4843738 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -29,6 +29,7 @@ bool show_pinned;
bool block_mount;
bool verifier_logs;
bool relaxed_maps;
+bool use_loader;
struct btf *base_btf;
struct pinned_obj_table prog_table;
struct pinned_obj_table map_table;
@@ -340,8 +341,10 @@ static int do_batch(int argc, char **argv)
n_argc = make_args(buf, n_argv, BATCH_ARG_NB_MAX, lines);
if (!n_argc)
continue;
- if (n_argc < 0)
+ if (n_argc < 0) {
+ err = n_argc;
goto err_close;
+ }
if (json_output) {
jsonw_start_object(json_wtr);
@@ -392,6 +395,7 @@ int main(int argc, char **argv)
{ "mapcompat", no_argument, NULL, 'm' },
{ "nomount", no_argument, NULL, 'n' },
{ "debug", no_argument, NULL, 'd' },
+ { "use-loader", no_argument, NULL, 'L' },
{ "base-btf", required_argument, NULL, 'B' },
{ 0 }
};
@@ -409,7 +413,7 @@ int main(int argc, char **argv)
hash_init(link_table.table);
opterr = 0;
- while ((opt = getopt_long(argc, argv, "VhpjfmndB:",
+ while ((opt = getopt_long(argc, argv, "VhpjfLmndB:",
options, NULL)) >= 0) {
switch (opt) {
case 'V':
@@ -452,6 +456,9 @@ int main(int argc, char **argv)
return -1;
}
break;
+ case 'L':
+ use_loader = true;
+ break;
default:
p_err("unrecognized option '%s'", argv[optind - 1]);
if (json_output)
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 76e91641262b..c1cf29798b99 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -90,6 +90,7 @@ extern bool show_pids;
extern bool block_mount;
extern bool verifier_logs;
extern bool relaxed_maps;
+extern bool use_loader;
extern struct btf *base_btf;
extern struct pinned_obj_table prog_table;
extern struct pinned_obj_table map_table;
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 3f067d2d7584..cc48726740ad 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -16,6 +16,7 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/syscall.h>
+#include <dirent.h>
#include <linux/err.h>
#include <linux/perf_event.h>
@@ -24,6 +25,8 @@
#include <bpf/bpf.h>
#include <bpf/btf.h>
#include <bpf/libbpf.h>
+#include <bpf/bpf_gen_internal.h>
+#include <bpf/skel_internal.h>
#include "cfg.h"
#include "main.h"
@@ -1499,7 +1502,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
set_max_rlimit();
obj = bpf_object__open_file(file, &open_opts);
- if (IS_ERR_OR_NULL(obj)) {
+ if (libbpf_get_error(obj)) {
p_err("failed to open object file");
goto err_free_reuse_maps;
}
@@ -1645,8 +1648,110 @@ err_free_reuse_maps:
return -1;
}
+static int count_open_fds(void)
+{
+ DIR *dp = opendir("/proc/self/fd");
+ struct dirent *de;
+ int cnt = -3;
+
+ if (!dp)
+ return -1;
+
+ while ((de = readdir(dp)))
+ cnt++;
+
+ closedir(dp);
+ return cnt;
+}
+
+static int try_loader(struct gen_loader_opts *gen)
+{
+ struct bpf_load_and_run_opts opts = {};
+ struct bpf_loader_ctx *ctx;
+ int ctx_sz = sizeof(*ctx) + 64 * max(sizeof(struct bpf_map_desc),
+ sizeof(struct bpf_prog_desc));
+ int log_buf_sz = (1u << 24) - 1;
+ int err, fds_before, fd_delta;
+ char *log_buf;
+
+ ctx = alloca(ctx_sz);
+ memset(ctx, 0, ctx_sz);
+ ctx->sz = ctx_sz;
+ ctx->log_level = 1;
+ ctx->log_size = log_buf_sz;
+ log_buf = malloc(log_buf_sz);
+ if (!log_buf)
+ return -ENOMEM;
+ ctx->log_buf = (long) log_buf;
+ opts.ctx = ctx;
+ opts.data = gen->data;
+ opts.data_sz = gen->data_sz;
+ opts.insns = gen->insns;
+ opts.insns_sz = gen->insns_sz;
+ fds_before = count_open_fds();
+ err = bpf_load_and_run(&opts);
+ fd_delta = count_open_fds() - fds_before;
+ if (err < 0) {
+ fprintf(stderr, "err %d\n%s\n%s", err, opts.errstr, log_buf);
+ if (fd_delta)
+ fprintf(stderr, "loader prog leaked %d FDs\n",
+ fd_delta);
+ }
+ free(log_buf);
+ return err;
+}
+
+static int do_loader(int argc, char **argv)
+{
+ DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts);
+ DECLARE_LIBBPF_OPTS(gen_loader_opts, gen);
+ struct bpf_object_load_attr load_attr = {};
+ struct bpf_object *obj;
+ const char *file;
+ int err = 0;
+
+ if (!REQ_ARGS(1))
+ return -1;
+ file = GET_ARG();
+
+ obj = bpf_object__open_file(file, &open_opts);
+ if (libbpf_get_error(obj)) {
+ p_err("failed to open object file");
+ goto err_close_obj;
+ }
+
+ err = bpf_object__gen_loader(obj, &gen);
+ if (err)
+ goto err_close_obj;
+
+ load_attr.obj = obj;
+ if (verifier_logs)
+ /* log_level1 + log_level2 + stats, but not stable UAPI */
+ load_attr.log_level = 1 + 2 + 4;
+
+ err = bpf_object__load_xattr(&load_attr);
+ if (err) {
+ p_err("failed to load object file");
+ goto err_close_obj;
+ }
+
+ if (verifier_logs) {
+ struct dump_data dd = {};
+
+ kernel_syms_load(&dd);
+ dump_xlated_plain(&dd, (void *)gen.insns, gen.insns_sz, false, false);
+ kernel_syms_destroy(&dd);
+ }
+ err = try_loader(&gen);
+err_close_obj:
+ bpf_object__close(obj);
+ return err;
+}
+
static int do_load(int argc, char **argv)
{
+ if (use_loader)
+ return do_loader(argc, argv);
return load_with_options(argc, argv, true);
}
@@ -2138,7 +2243,7 @@ static int do_help(int argc, char **argv)
" cgroup/getpeername4 | cgroup/getpeername6 |\n"
" cgroup/getsockname4 | cgroup/getsockname6 | cgroup/sendmsg4 |\n"
" cgroup/sendmsg6 | cgroup/recvmsg4 | cgroup/recvmsg6 |\n"
- " cgroup/getsockopt | cgroup/setsockopt |\n"
+ " cgroup/getsockopt | cgroup/setsockopt | cgroup/sock_release |\n"
" struct_ops | fentry | fexit | freplace | sk_lookup }\n"
" ATTACH_TYPE := { msg_verdict | stream_verdict | stream_parser |\n"
" flow_dissector }\n"
diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c
index 6fc3e6f7f40c..f1f32e21d5cd 100644
--- a/tools/bpf/bpftool/xlated_dumper.c
+++ b/tools/bpf/bpftool/xlated_dumper.c
@@ -196,6 +196,9 @@ static const char *print_imm(void *private_data,
else if (insn->src_reg == BPF_PSEUDO_MAP_VALUE)
snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
"map[id:%u][0]+%u", insn->imm, (insn + 1)->imm);
+ else if (insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE)
+ snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+ "map[idx:%u]+%u", insn->imm, (insn + 1)->imm);
else if (insn->src_reg == BPF_PSEUDO_FUNC)
snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
"subprog[%+d]", insn->imm);
diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c
index 7550fd9c3188..3ad9301b0f00 100644
--- a/tools/bpf/resolve_btfids/main.c
+++ b/tools/bpf/resolve_btfids/main.c
@@ -655,6 +655,9 @@ static int symbols_patch(struct object *obj)
if (sets_patch(obj))
return -1;
+ /* Set type to ensure endian translation occurs. */
+ obj->efile.idlist->d_type = ELF_T_WORD;
+
elf_flagdata(obj->efile.idlist, ELF_C_SET, ELF_F_DIRTY);
err = elf_update(obj->efile.elf, ELF_C_WRITE);
diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build
index cd72016c3cfa..715092fc6a23 100644
--- a/tools/build/Makefile.build
+++ b/tools/build/Makefile.build
@@ -51,39 +51,39 @@ subdir-obj-y :=
build-file := $(dir)/Build
-include $(build-file)
-quiet_cmd_flex = FLEX $@
-quiet_cmd_bison = BISON $@
+quiet_cmd_flex = FLEX $@
+quiet_cmd_bison = BISON $@
# Create directory unless it exists
-quiet_cmd_mkdir = MKDIR $(dir $@)
+quiet_cmd_mkdir = MKDIR $(dir $@)
cmd_mkdir = mkdir -p $(dir $@)
rule_mkdir = $(if $(wildcard $(dir $@)),,@$(call echo-cmd,mkdir) $(cmd_mkdir))
# Compile command
-quiet_cmd_cc_o_c = CC $@
+quiet_cmd_cc_o_c = CC $@
cmd_cc_o_c = $(CC) $(c_flags) -c -o $@ $<
-quiet_cmd_host_cc_o_c = HOSTCC $@
+quiet_cmd_host_cc_o_c = HOSTCC $@
cmd_host_cc_o_c = $(HOSTCC) $(host_c_flags) -c -o $@ $<
-quiet_cmd_cxx_o_c = CXX $@
+quiet_cmd_cxx_o_c = CXX $@
cmd_cxx_o_c = $(CXX) $(cxx_flags) -c -o $@ $<
-quiet_cmd_cpp_i_c = CPP $@
+quiet_cmd_cpp_i_c = CPP $@
cmd_cpp_i_c = $(CC) $(c_flags) -E -o $@ $<
-quiet_cmd_cc_s_c = AS $@
+quiet_cmd_cc_s_c = AS $@
cmd_cc_s_c = $(CC) $(c_flags) -S -o $@ $<
-quiet_cmd_gen = GEN $@
+quiet_cmd_gen = GEN $@
# Link agregate command
# If there's nothing to link, create empty $@ object.
-quiet_cmd_ld_multi = LD $@
+quiet_cmd_ld_multi = LD $@
cmd_ld_multi = $(if $(strip $(obj-y)),\
$(LD) -r -o $@ $(filter $(obj-y),$^),rm -f $@; $(AR) rcs $@)
-quiet_cmd_host_ld_multi = HOSTLD $@
+quiet_cmd_host_ld_multi = HOSTLD $@
cmd_host_ld_multi = $(if $(strip $(obj-y)),\
$(HOSTLD) -r -o $@ $(filter $(obj-y),$^),rm -f $@; $(HOSTAR) rcs $@)
diff --git a/tools/debugging/kernel-chktaint b/tools/debugging/kernel-chktaint
index 719f18b1edf0..f1af27ce9f20 100755
--- a/tools/debugging/kernel-chktaint
+++ b/tools/debugging/kernel-chktaint
@@ -196,7 +196,7 @@ else
fi
echo "For a more detailed explanation of the various taint flags see"
-echo " Documentation/admin-guide/tainted-kernels.rst in the the Linux kernel sources"
+echo " Documentation/admin-guide/tainted-kernels.rst in the Linux kernel sources"
echo " or https://kernel.org/doc/html/latest/admin-guide/tainted-kernels.html"
echo "Raw taint value as int/string: $taint/'$out'"
#EOF#
diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h
index 330dbf7509cc..9d959bc24859 100644
--- a/tools/include/linux/bitmap.h
+++ b/tools/include/linux/bitmap.h
@@ -18,6 +18,8 @@ int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
int __bitmap_equal(const unsigned long *bitmap1,
const unsigned long *bitmap2, unsigned int bits);
void bitmap_clear(unsigned long *map, unsigned int start, int len);
+int __bitmap_intersects(const unsigned long *bitmap1,
+ const unsigned long *bitmap2, unsigned int bits);
#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1)))
#define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1)))
@@ -170,4 +172,13 @@ static inline int bitmap_equal(const unsigned long *src1,
return __bitmap_equal(src1, src2, nbits);
}
+static inline int bitmap_intersects(const unsigned long *src1,
+ const unsigned long *src2, unsigned int nbits)
+{
+ if (small_const_nbits(nbits))
+ return ((*src1 & *src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0;
+ else
+ return __bitmap_intersects(src1, src2, nbits);
+}
+
#endif /* _PERF_BITOPS_H */
diff --git a/tools/include/linux/bits.h b/tools/include/linux/bits.h
index 7f475d59a097..87d112650dfb 100644
--- a/tools/include/linux/bits.h
+++ b/tools/include/linux/bits.h
@@ -22,7 +22,7 @@
#include <linux/build_bug.h>
#define GENMASK_INPUT_CHECK(h, l) \
(BUILD_BUG_ON_ZERO(__builtin_choose_expr( \
- __builtin_constant_p((l) > (h)), (l) > (h), 0)))
+ __is_constexpr((l) > (h)), (l) > (h), 0)))
#else
/*
* BUILD_BUG_ON_ZERO is not available in h files included from asm files,
diff --git a/tools/include/linux/const.h b/tools/include/linux/const.h
index 81b8aae5a855..435ddd72d2c4 100644
--- a/tools/include/linux/const.h
+++ b/tools/include/linux/const.h
@@ -3,4 +3,12 @@
#include <vdso/const.h>
+/*
+ * This returns a constant expression while determining if an argument is
+ * a constant expression, most importantly without evaluating the argument.
+ * Glory to Martin Uecker <Martin.Uecker@med.uni-goettingen.de>
+ */
+#define __is_constexpr(x) \
+ (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8)))
+
#endif /* _LINUX_CONST_H */
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index 6de5a7fc066b..d2a942086fcb 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -863,8 +863,7 @@ __SYSCALL(__NR_process_madvise, sys_process_madvise)
__SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2)
#define __NR_mount_setattr 442
__SYSCALL(__NR_mount_setattr, sys_mount_setattr)
-#define __NR_quotactl_path 443
-__SYSCALL(__NR_quotactl_path, sys_quotactl_path)
+/* 443 is reserved for quotactl_path */
#define __NR_landlock_create_ruleset 444
__SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset)
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index ec6d85a81744..bf9252c7381e 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -527,6 +527,15 @@ union bpf_iter_link_info {
* Look up an element with the given *key* in the map referred to
* by the file descriptor *fd*, and if found, delete the element.
*
+ * For **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map
+ * types, the *flags* argument needs to be set to 0, but for other
+ * map types, it may be specified as:
+ *
+ * **BPF_F_LOCK**
+ * Look up and delete the value of a spin-locked map
+ * without returning the lock. This must be specified if
+ * the elements contain a spinlock.
+ *
* The **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map types
* implement this command as a "pop" operation, deleting the top
* element rather than one corresponding to *key*.
@@ -536,6 +545,10 @@ union bpf_iter_link_info {
* This command is only valid for the following map types:
* * **BPF_MAP_TYPE_QUEUE**
* * **BPF_MAP_TYPE_STACK**
+ * * **BPF_MAP_TYPE_HASH**
+ * * **BPF_MAP_TYPE_PERCPU_HASH**
+ * * **BPF_MAP_TYPE_LRU_HASH**
+ * * **BPF_MAP_TYPE_LRU_PERCPU_HASH**
*
* Return
* Returns zero on success. On error, -1 is returned and *errno*
@@ -837,6 +850,7 @@ enum bpf_cmd {
BPF_PROG_ATTACH,
BPF_PROG_DETACH,
BPF_PROG_TEST_RUN,
+ BPF_PROG_RUN = BPF_PROG_TEST_RUN,
BPF_PROG_GET_NEXT_ID,
BPF_MAP_GET_NEXT_ID,
BPF_PROG_GET_FD_BY_ID,
@@ -937,6 +951,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_EXT,
BPF_PROG_TYPE_LSM,
BPF_PROG_TYPE_SK_LOOKUP,
+ BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */
};
enum bpf_attach_type {
@@ -979,6 +994,8 @@ enum bpf_attach_type {
BPF_SK_LOOKUP,
BPF_XDP,
BPF_SK_SKB_VERDICT,
+ BPF_SK_REUSEPORT_SELECT,
+ BPF_SK_REUSEPORT_SELECT_OR_MIGRATE,
__MAX_BPF_ATTACH_TYPE
};
@@ -1097,8 +1114,8 @@ enum bpf_link_type {
/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
* the following extensions:
*
- * insn[0].src_reg: BPF_PSEUDO_MAP_FD
- * insn[0].imm: map fd
+ * insn[0].src_reg: BPF_PSEUDO_MAP_[FD|IDX]
+ * insn[0].imm: map fd or fd_idx
* insn[1].imm: 0
* insn[0].off: 0
* insn[1].off: 0
@@ -1106,15 +1123,19 @@ enum bpf_link_type {
* verifier type: CONST_PTR_TO_MAP
*/
#define BPF_PSEUDO_MAP_FD 1
-/* insn[0].src_reg: BPF_PSEUDO_MAP_VALUE
- * insn[0].imm: map fd
+#define BPF_PSEUDO_MAP_IDX 5
+
+/* insn[0].src_reg: BPF_PSEUDO_MAP_[IDX_]VALUE
+ * insn[0].imm: map fd or fd_idx
* insn[1].imm: offset into value
* insn[0].off: 0
* insn[1].off: 0
* ldimm64 rewrite: address of map[0]+offset
* verifier type: PTR_TO_MAP_VALUE
*/
-#define BPF_PSEUDO_MAP_VALUE 2
+#define BPF_PSEUDO_MAP_VALUE 2
+#define BPF_PSEUDO_MAP_IDX_VALUE 6
+
/* insn[0].src_reg: BPF_PSEUDO_BTF_ID
* insn[0].imm: kernel btd id of VAR
* insn[1].imm: 0
@@ -1314,6 +1335,8 @@ union bpf_attr {
/* or valid module BTF object fd or 0 to attach to vmlinux */
__u32 attach_btf_obj_fd;
};
+ __u32 :32; /* pad */
+ __aligned_u64 fd_array; /* array of FDs */
};
struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -2534,8 +2557,12 @@ union bpf_attr {
* The lower two bits of *flags* are used as the return code if
* the map lookup fails. This is so that the return value can be
* one of the XDP program return codes up to **XDP_TX**, as chosen
- * by the caller. Any higher bits in the *flags* argument must be
- * unset.
+ * by the caller. The higher bits of *flags* can be set to
+ * BPF_F_BROADCAST or BPF_F_EXCLUDE_INGRESS as defined below.
+ *
+ * With BPF_F_BROADCAST the packet will be broadcasted to all the
+ * interfaces in the map, with BPF_F_EXCLUDE_INGRESS the ingress
+ * interface will be excluded when do broadcasting.
*
* See also **bpf_redirect**\ (), which only supports redirecting
* to an ifindex, but doesn't require a map to do so.
@@ -4735,6 +4762,24 @@ union bpf_attr {
* be zero-terminated except when **str_size** is 0.
*
* Or **-EBUSY** if the per-CPU memory copy buffer is busy.
+ *
+ * long bpf_sys_bpf(u32 cmd, void *attr, u32 attr_size)
+ * Description
+ * Execute bpf syscall with given arguments.
+ * Return
+ * A syscall result.
+ *
+ * long bpf_btf_find_by_name_kind(char *name, int name_sz, u32 kind, int flags)
+ * Description
+ * Find BTF type with given name and kind in vmlinux BTF or in module's BTFs.
+ * Return
+ * Returns btf_id and btf_obj_fd in lower and upper 32 bits.
+ *
+ * long bpf_sys_close(u32 fd)
+ * Description
+ * Execute close syscall for given FD.
+ * Return
+ * A syscall result.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -4903,6 +4948,9 @@ union bpf_attr {
FN(check_mtu), \
FN(for_each_map_elem), \
FN(snprintf), \
+ FN(sys_bpf), \
+ FN(btf_find_by_name_kind), \
+ FN(sys_close), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -5080,6 +5128,12 @@ enum {
BPF_F_BPRM_SECUREEXEC = (1ULL << 0),
};
+/* Flags for bpf_redirect_map helper */
+enum {
+ BPF_F_BROADCAST = (1ULL << 3),
+ BPF_F_EXCLUDE_INGRESS = (1ULL << 4),
+};
+
#define __bpf_md_ptr(type, name) \
union { \
type name; \
@@ -5364,6 +5418,20 @@ struct sk_reuseport_md {
__u32 ip_protocol; /* IP protocol. e.g. IPPROTO_TCP, IPPROTO_UDP */
__u32 bind_inany; /* Is sock bound to an INANY address? */
__u32 hash; /* A hash of the packet 4 tuples */
+ /* When reuse->migrating_sk is NULL, it is selecting a sk for the
+ * new incoming connection request (e.g. selecting a listen sk for
+ * the received SYN in the TCP case). reuse->sk is one of the sk
+ * in the reuseport group. The bpf prog can use reuse->sk to learn
+ * the local listening ip/port without looking into the skb.
+ *
+ * When reuse->migrating_sk is not NULL, reuse->sk is closed and
+ * reuse->migrating_sk is the socket that needs to be migrated
+ * to another listening socket. migrating_sk could be a fullsock
+ * sk that is fully established or a reqsk that is in-the-middle
+ * of 3-way handshake.
+ */
+ __bpf_md_ptr(struct bpf_sock *, sk);
+ __bpf_md_ptr(struct bpf_sock *, migrating_sk);
};
#define BPF_TAG_SIZE 8
diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h
index f44eb0a04afd..4c32e97dcdf0 100644
--- a/tools/include/uapi/linux/fs.h
+++ b/tools/include/uapi/linux/fs.h
@@ -185,7 +185,7 @@ struct fsxattr {
#define BLKROTATIONAL _IO(0x12,126)
#define BLKZEROOUT _IO(0x12,127)
/*
- * A jump here: 130-131 are reserved for zoned block devices
+ * A jump here: 130-136 are reserved for zoned block devices
* (see uapi/linux/blkzoned.h)
*/
diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h
index 7d6687618d80..d1b327036ae4 100644
--- a/tools/include/uapi/linux/in.h
+++ b/tools/include/uapi/linux/in.h
@@ -289,6 +289,9 @@ struct sockaddr_in {
/* Address indicating an error return. */
#define INADDR_NONE ((unsigned long int) 0xffffffff)
+/* Dummy address for src of ICMP replies if no real address is set (RFC7600). */
+#define INADDR_DUMMY ((unsigned long int) 0xc0000008)
+
/* Network number for local host loopback. */
#define IN_LOOPBACKNET 127
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 3fd9a7e9d90c..d9e4aabcb31a 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -8,6 +8,7 @@
* Note: you must update KVM_API_VERSION if you change this interface.
*/
+#include <linux/const.h>
#include <linux/types.h>
#include <linux/compiler.h>
#include <linux/ioctl.h>
@@ -279,6 +280,9 @@ struct kvm_xen_exit {
/* Encounter unexpected vm-exit reason */
#define KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON 4
+/* Flags that describe what fields in emulation_failure hold valid data. */
+#define KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES (1ULL << 0)
+
/* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
struct kvm_run {
/* in */
@@ -382,6 +386,25 @@ struct kvm_run {
__u32 ndata;
__u64 data[16];
} internal;
+ /*
+ * KVM_INTERNAL_ERROR_EMULATION
+ *
+ * "struct emulation_failure" is an overlay of "struct internal"
+ * that is used for the KVM_INTERNAL_ERROR_EMULATION sub-type of
+ * KVM_EXIT_INTERNAL_ERROR. Note, unlike other internal error
+ * sub-types, this struct is ABI! It also needs to be backwards
+ * compatible with "struct internal". Take special care that
+ * "ndata" is correct, that new fields are enumerated in "flags",
+ * and that each flag enumerates fields that are 64-bit aligned
+ * and sized (so that ndata+internal.data[] is valid/accurate).
+ */
+ struct {
+ __u32 suberror;
+ __u32 ndata;
+ __u64 flags;
+ __u8 insn_size;
+ __u8 insn_bytes[15];
+ } emulation_failure;
/* KVM_EXIT_OSI */
struct {
__u64 gprs[32];
@@ -1082,6 +1105,13 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_SGX_ATTRIBUTE 196
#define KVM_CAP_VM_COPY_ENC_CONTEXT_FROM 197
#define KVM_CAP_PTP_KVM 198
+#define KVM_CAP_HYPERV_ENFORCE_CPUID 199
+#define KVM_CAP_SREGS2 200
+#define KVM_CAP_EXIT_HYPERCALL 201
+#define KVM_CAP_PPC_RPT_INVALIDATE 202
+#define KVM_CAP_BINARY_STATS_FD 203
+#define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204
+#define KVM_CAP_ARM_MTE 205
#ifdef KVM_CAP_IRQ_ROUTING
@@ -1427,6 +1457,7 @@ struct kvm_s390_ucas_mapping {
/* Available with KVM_CAP_PMU_EVENT_FILTER */
#define KVM_SET_PMU_EVENT_FILTER _IOW(KVMIO, 0xb2, struct kvm_pmu_event_filter)
#define KVM_PPC_SVM_OFF _IO(KVMIO, 0xb3)
+#define KVM_ARM_MTE_COPY_TAGS _IOR(KVMIO, 0xb4, struct kvm_arm_copy_mte_tags)
/* ioctl for vm fd */
#define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device)
@@ -1620,6 +1651,9 @@ struct kvm_xen_hvm_attr {
#define KVM_XEN_VCPU_GET_ATTR _IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr)
#define KVM_XEN_VCPU_SET_ATTR _IOW(KVMIO, 0xcb, struct kvm_xen_vcpu_attr)
+#define KVM_GET_SREGS2 _IOR(KVMIO, 0xcc, struct kvm_sregs2)
+#define KVM_SET_SREGS2 _IOW(KVMIO, 0xcd, struct kvm_sregs2)
+
struct kvm_xen_vcpu_attr {
__u16 type;
__u16 pad[3];
@@ -1879,8 +1913,8 @@ struct kvm_hyperv_eventfd {
* conversion after harvesting an entry. Also, it must not skip any
* dirty bits, so that dirty bits are always harvested in sequence.
*/
-#define KVM_DIRTY_GFN_F_DIRTY BIT(0)
-#define KVM_DIRTY_GFN_F_RESET BIT(1)
+#define KVM_DIRTY_GFN_F_DIRTY _BITUL(0)
+#define KVM_DIRTY_GFN_F_RESET _BITUL(1)
#define KVM_DIRTY_GFN_F_MASK 0x3
/*
@@ -1898,4 +1932,76 @@ struct kvm_dirty_gfn {
#define KVM_BUS_LOCK_DETECTION_OFF (1 << 0)
#define KVM_BUS_LOCK_DETECTION_EXIT (1 << 1)
+/**
+ * struct kvm_stats_header - Header of per vm/vcpu binary statistics data.
+ * @flags: Some extra information for header, always 0 for now.
+ * @name_size: The size in bytes of the memory which contains statistics
+ * name string including trailing '\0'. The memory is allocated
+ * at the send of statistics descriptor.
+ * @num_desc: The number of statistics the vm or vcpu has.
+ * @id_offset: The offset of the vm/vcpu stats' id string in the file pointed
+ * by vm/vcpu stats fd.
+ * @desc_offset: The offset of the vm/vcpu stats' descriptor block in the file
+ * pointd by vm/vcpu stats fd.
+ * @data_offset: The offset of the vm/vcpu stats' data block in the file
+ * pointed by vm/vcpu stats fd.
+ *
+ * This is the header userspace needs to read from stats fd before any other
+ * readings. It is used by userspace to discover all the information about the
+ * vm/vcpu's binary statistics.
+ * Userspace reads this header from the start of the vm/vcpu's stats fd.
+ */
+struct kvm_stats_header {
+ __u32 flags;
+ __u32 name_size;
+ __u32 num_desc;
+ __u32 id_offset;
+ __u32 desc_offset;
+ __u32 data_offset;
+};
+
+#define KVM_STATS_TYPE_SHIFT 0
+#define KVM_STATS_TYPE_MASK (0xF << KVM_STATS_TYPE_SHIFT)
+#define KVM_STATS_TYPE_CUMULATIVE (0x0 << KVM_STATS_TYPE_SHIFT)
+#define KVM_STATS_TYPE_INSTANT (0x1 << KVM_STATS_TYPE_SHIFT)
+#define KVM_STATS_TYPE_PEAK (0x2 << KVM_STATS_TYPE_SHIFT)
+#define KVM_STATS_TYPE_MAX KVM_STATS_TYPE_PEAK
+
+#define KVM_STATS_UNIT_SHIFT 4
+#define KVM_STATS_UNIT_MASK (0xF << KVM_STATS_UNIT_SHIFT)
+#define KVM_STATS_UNIT_NONE (0x0 << KVM_STATS_UNIT_SHIFT)
+#define KVM_STATS_UNIT_BYTES (0x1 << KVM_STATS_UNIT_SHIFT)
+#define KVM_STATS_UNIT_SECONDS (0x2 << KVM_STATS_UNIT_SHIFT)
+#define KVM_STATS_UNIT_CYCLES (0x3 << KVM_STATS_UNIT_SHIFT)
+#define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_CYCLES
+
+#define KVM_STATS_BASE_SHIFT 8
+#define KVM_STATS_BASE_MASK (0xF << KVM_STATS_BASE_SHIFT)
+#define KVM_STATS_BASE_POW10 (0x0 << KVM_STATS_BASE_SHIFT)
+#define KVM_STATS_BASE_POW2 (0x1 << KVM_STATS_BASE_SHIFT)
+#define KVM_STATS_BASE_MAX KVM_STATS_BASE_POW2
+
+/**
+ * struct kvm_stats_desc - Descriptor of a KVM statistics.
+ * @flags: Annotations of the stats, like type, unit, etc.
+ * @exponent: Used together with @flags to determine the unit.
+ * @size: The number of data items for this stats.
+ * Every data item is of type __u64.
+ * @offset: The offset of the stats to the start of stat structure in
+ * struture kvm or kvm_vcpu.
+ * @unused: Unused field for future usage. Always 0 for now.
+ * @name: The name string for the stats. Its size is indicated by the
+ * &kvm_stats_header->name_size.
+ */
+struct kvm_stats_desc {
+ __u32 flags;
+ __s16 exponent;
+ __u16 size;
+ __u32 offset;
+ __u32 unused;
+ char name[];
+};
+
+#define KVM_GET_STATS_FD _IO(KVMIO, 0xce)
+
#endif /* __LINUX_KVM_H */
diff --git a/tools/include/uapi/linux/mount.h b/tools/include/uapi/linux/mount.h
index e6524ead2b7b..dd7a166fdf9c 100644
--- a/tools/include/uapi/linux/mount.h
+++ b/tools/include/uapi/linux/mount.h
@@ -120,6 +120,7 @@ enum fsconfig_command {
#define MOUNT_ATTR_STRICTATIME 0x00000020 /* - Always perform atime updates */
#define MOUNT_ATTR_NODIRATIME 0x00000080 /* Do not update directory access times */
#define MOUNT_ATTR_IDMAP 0x00100000 /* Idmap mount to @userns_fd in struct mount_attr. */
+#define MOUNT_ATTR_NOSYMFOLLOW 0x00200000 /* Do not follow symlinks */
/*
* mount_setattr()
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index bf8143505c49..f92880a15645 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -464,7 +464,7 @@ struct perf_event_attr {
/*
* User provided data if sigtrap=1, passed back to user via
- * siginfo_t::si_perf, e.g. to permit user to identify the event.
+ * siginfo_t::si_perf_data, e.g. to permit user to identify the event.
*/
__u64 sig_data;
};
diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h
index 18a9f59dc067..967d9c55323d 100644
--- a/tools/include/uapi/linux/prctl.h
+++ b/tools/include/uapi/linux/prctl.h
@@ -259,4 +259,12 @@ struct prctl_mm_map {
#define PR_PAC_SET_ENABLED_KEYS 60
#define PR_PAC_GET_ENABLED_KEYS 61
+/* Request the scheduler to share a core */
+#define PR_SCHED_CORE 62
+# define PR_SCHED_CORE_GET 0
+# define PR_SCHED_CORE_CREATE 1 /* create unique core_sched cookie */
+# define PR_SCHED_CORE_SHARE_TO 2 /* push core_sched cookie to pid */
+# define PR_SCHED_CORE_SHARE_FROM 3 /* pull core_sched cookie to pid */
+# define PR_SCHED_CORE_MAX 4
+
#endif /* _LINUX_PRCTL_H */
diff --git a/tools/lib/bitmap.c b/tools/lib/bitmap.c
index f4e914712b6f..db466ef7be9d 100644
--- a/tools/lib/bitmap.c
+++ b/tools/lib/bitmap.c
@@ -86,3 +86,17 @@ int __bitmap_equal(const unsigned long *bitmap1,
return 1;
}
+
+int __bitmap_intersects(const unsigned long *bitmap1,
+ const unsigned long *bitmap2, unsigned int bits)
+{
+ unsigned int k, lim = bits/BITS_PER_LONG;
+ for (k = 0; k < lim; ++k)
+ if (bitmap1[k] & bitmap2[k])
+ return 1;
+
+ if (bits % BITS_PER_LONG)
+ if ((bitmap1[k] & bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits))
+ return 1;
+ return 0;
+}
diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
index 9b057cc7650a..430f6874fa41 100644
--- a/tools/lib/bpf/Build
+++ b/tools/lib/bpf/Build
@@ -1,3 +1,3 @@
libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \
netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o hashmap.o \
- btf_dump.o ringbuf.o strset.o linker.o
+ btf_dump.o ringbuf.o strset.o linker.o gen_loader.o
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index e43e1896cb4b..ec14aa725bb0 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -223,18 +223,14 @@ install_lib: all_cmd
$(call do_install_mkdir,$(libdir_SQ)); \
cp -fpR $(LIB_FILE) $(DESTDIR)$(libdir_SQ)
+INSTALL_HEADERS = bpf.h libbpf.h btf.h libbpf_common.h libbpf_legacy.h xsk.h \
+ bpf_helpers.h $(BPF_HELPER_DEFS) bpf_tracing.h \
+ bpf_endian.h bpf_core_read.h skel_internal.h
+
install_headers: $(BPF_HELPER_DEFS)
- $(call QUIET_INSTALL, headers) \
- $(call do_install,bpf.h,$(prefix)/include/bpf,644); \
- $(call do_install,libbpf.h,$(prefix)/include/bpf,644); \
- $(call do_install,btf.h,$(prefix)/include/bpf,644); \
- $(call do_install,libbpf_common.h,$(prefix)/include/bpf,644); \
- $(call do_install,xsk.h,$(prefix)/include/bpf,644); \
- $(call do_install,bpf_helpers.h,$(prefix)/include/bpf,644); \
- $(call do_install,$(BPF_HELPER_DEFS),$(prefix)/include/bpf,644); \
- $(call do_install,bpf_tracing.h,$(prefix)/include/bpf,644); \
- $(call do_install,bpf_endian.h,$(prefix)/include/bpf,644); \
- $(call do_install,bpf_core_read.h,$(prefix)/include/bpf,644);
+ $(call QUIET_INSTALL, headers) \
+ $(foreach hdr,$(INSTALL_HEADERS), \
+ $(call do_install,$(hdr),$(prefix)/include/bpf,644);)
install_pkgconfig: $(PC_FILE)
$(call QUIET_INSTALL, $(PC_FILE)) \
diff --git a/tools/lib/bpf/README.rst b/tools/lib/bpf/README.rst
deleted file mode 100644
index 8928f7787f2d..000000000000
--- a/tools/lib/bpf/README.rst
+++ /dev/null
@@ -1,168 +0,0 @@
-.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
-
-libbpf API naming convention
-============================
-
-libbpf API provides access to a few logically separated groups of
-functions and types. Every group has its own naming convention
-described here. It's recommended to follow these conventions whenever a
-new function or type is added to keep libbpf API clean and consistent.
-
-All types and functions provided by libbpf API should have one of the
-following prefixes: ``bpf_``, ``btf_``, ``libbpf_``, ``xsk_``,
-``perf_buffer_``.
-
-System call wrappers
---------------------
-
-System call wrappers are simple wrappers for commands supported by
-sys_bpf system call. These wrappers should go to ``bpf.h`` header file
-and map one-on-one to corresponding commands.
-
-For example ``bpf_map_lookup_elem`` wraps ``BPF_MAP_LOOKUP_ELEM``
-command of sys_bpf, ``bpf_prog_attach`` wraps ``BPF_PROG_ATTACH``, etc.
-
-Objects
--------
-
-Another class of types and functions provided by libbpf API is "objects"
-and functions to work with them. Objects are high-level abstractions
-such as BPF program or BPF map. They're represented by corresponding
-structures such as ``struct bpf_object``, ``struct bpf_program``,
-``struct bpf_map``, etc.
-
-Structures are forward declared and access to their fields should be
-provided via corresponding getters and setters rather than directly.
-
-These objects are associated with corresponding parts of ELF object that
-contains compiled BPF programs.
-
-For example ``struct bpf_object`` represents ELF object itself created
-from an ELF file or from a buffer, ``struct bpf_program`` represents a
-program in ELF object and ``struct bpf_map`` is a map.
-
-Functions that work with an object have names built from object name,
-double underscore and part that describes function purpose.
-
-For example ``bpf_object__open`` consists of the name of corresponding
-object, ``bpf_object``, double underscore and ``open`` that defines the
-purpose of the function to open ELF file and create ``bpf_object`` from
-it.
-
-Another example: ``bpf_program__load`` is named for corresponding
-object, ``bpf_program``, that is separated from other part of the name
-by double underscore.
-
-All objects and corresponding functions other than BTF related should go
-to ``libbpf.h``. BTF types and functions should go to ``btf.h``.
-
-Auxiliary functions
--------------------
-
-Auxiliary functions and types that don't fit well in any of categories
-described above should have ``libbpf_`` prefix, e.g.
-``libbpf_get_error`` or ``libbpf_prog_type_by_name``.
-
-AF_XDP functions
--------------------
-
-AF_XDP functions should have an ``xsk_`` prefix, e.g.
-``xsk_umem__get_data`` or ``xsk_umem__create``. The interface consists
-of both low-level ring access functions and high-level configuration
-functions. These can be mixed and matched. Note that these functions
-are not reentrant for performance reasons.
-
-Please take a look at Documentation/networking/af_xdp.rst in the Linux
-kernel source tree on how to use XDP sockets and for some common
-mistakes in case you do not get any traffic up to user space.
-
-libbpf ABI
-==========
-
-libbpf can be both linked statically or used as DSO. To avoid possible
-conflicts with other libraries an application is linked with, all
-non-static libbpf symbols should have one of the prefixes mentioned in
-API documentation above. See API naming convention to choose the right
-name for a new symbol.
-
-Symbol visibility
------------------
-
-libbpf follow the model when all global symbols have visibility "hidden"
-by default and to make a symbol visible it has to be explicitly
-attributed with ``LIBBPF_API`` macro. For example:
-
-.. code-block:: c
-
- LIBBPF_API int bpf_prog_get_fd_by_id(__u32 id);
-
-This prevents from accidentally exporting a symbol, that is not supposed
-to be a part of ABI what, in turn, improves both libbpf developer- and
-user-experiences.
-
-ABI versionning
----------------
-
-To make future ABI extensions possible libbpf ABI is versioned.
-Versioning is implemented by ``libbpf.map`` version script that is
-passed to linker.
-
-Version name is ``LIBBPF_`` prefix + three-component numeric version,
-starting from ``0.0.1``.
-
-Every time ABI is being changed, e.g. because a new symbol is added or
-semantic of existing symbol is changed, ABI version should be bumped.
-This bump in ABI version is at most once per kernel development cycle.
-
-For example, if current state of ``libbpf.map`` is:
-
-.. code-block::
- LIBBPF_0.0.1 {
- global:
- bpf_func_a;
- bpf_func_b;
- local:
- \*;
- };
-
-, and a new symbol ``bpf_func_c`` is being introduced, then
-``libbpf.map`` should be changed like this:
-
-.. code-block::
- LIBBPF_0.0.1 {
- global:
- bpf_func_a;
- bpf_func_b;
- local:
- \*;
- };
- LIBBPF_0.0.2 {
- global:
- bpf_func_c;
- } LIBBPF_0.0.1;
-
-, where new version ``LIBBPF_0.0.2`` depends on the previous
-``LIBBPF_0.0.1``.
-
-Format of version script and ways to handle ABI changes, including
-incompatible ones, described in details in [1].
-
-Stand-alone build
-=================
-
-Under https://github.com/libbpf/libbpf there is a (semi-)automated
-mirror of the mainline's version of libbpf for a stand-alone build.
-
-However, all changes to libbpf's code base must be upstreamed through
-the mainline kernel tree.
-
-License
-=======
-
-libbpf is dual-licensed under LGPL 2.1 and BSD 2-Clause.
-
-Links
-=====
-
-[1] https://www.akkadia.org/drepper/dsohowto.pdf
- (Chapter 3. Maintaining APIs and ABIs).
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index bba48ff4c5c0..86dcac44f32f 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -80,6 +80,7 @@ static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size)
int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr)
{
union bpf_attr attr;
+ int fd;
memset(&attr, '\0', sizeof(attr));
@@ -102,7 +103,8 @@ int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr)
else
attr.inner_map_fd = create_attr->inner_map_fd;
- return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+ fd = sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+ return libbpf_err_errno(fd);
}
int bpf_create_map_node(enum bpf_map_type map_type, const char *name,
@@ -160,6 +162,7 @@ int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name,
__u32 map_flags, int node)
{
union bpf_attr attr;
+ int fd;
memset(&attr, '\0', sizeof(attr));
@@ -178,7 +181,8 @@ int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name,
attr.numa_node = node;
}
- return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+ fd = sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+ return libbpf_err_errno(fd);
}
int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name,
@@ -222,10 +226,10 @@ int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr)
int fd;
if (!load_attr->log_buf != !load_attr->log_buf_sz)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (load_attr->log_level > (4 | 2 | 1) || (load_attr->log_level && !load_attr->log_buf))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
memset(&attr, 0, sizeof(attr));
attr.prog_type = load_attr->prog_type;
@@ -281,8 +285,10 @@ int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr)
load_attr->func_info_cnt,
load_attr->func_info_rec_size,
attr.func_info_rec_size);
- if (!finfo)
+ if (!finfo) {
+ errno = E2BIG;
goto done;
+ }
attr.func_info = ptr_to_u64(finfo);
attr.func_info_rec_size = load_attr->func_info_rec_size;
@@ -293,8 +299,10 @@ int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr)
load_attr->line_info_cnt,
load_attr->line_info_rec_size,
attr.line_info_rec_size);
- if (!linfo)
+ if (!linfo) {
+ errno = E2BIG;
goto done;
+ }
attr.line_info = ptr_to_u64(linfo);
attr.line_info_rec_size = load_attr->line_info_rec_size;
@@ -318,9 +326,10 @@ int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr)
fd = sys_bpf_prog_load(&attr, sizeof(attr));
done:
+ /* free() doesn't affect errno, so we don't need to restore it */
free(finfo);
free(linfo);
- return fd;
+ return libbpf_err_errno(fd);
}
int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
@@ -329,7 +338,7 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
struct bpf_prog_load_params p = {};
if (!load_attr || !log_buf != !log_buf_sz)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
p.prog_type = load_attr->prog_type;
p.expected_attach_type = load_attr->expected_attach_type;
@@ -391,6 +400,7 @@ int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
int log_level)
{
union bpf_attr attr;
+ int fd;
memset(&attr, 0, sizeof(attr));
attr.prog_type = type;
@@ -404,13 +414,15 @@ int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
attr.kern_version = kern_version;
attr.prog_flags = prog_flags;
- return sys_bpf_prog_load(&attr, sizeof(attr));
+ fd = sys_bpf_prog_load(&attr, sizeof(attr));
+ return libbpf_err_errno(fd);
}
int bpf_map_update_elem(int fd, const void *key, const void *value,
__u64 flags)
{
union bpf_attr attr;
+ int ret;
memset(&attr, 0, sizeof(attr));
attr.map_fd = fd;
@@ -418,24 +430,28 @@ int bpf_map_update_elem(int fd, const void *key, const void *value,
attr.value = ptr_to_u64(value);
attr.flags = flags;
- return sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
+ return libbpf_err_errno(ret);
}
int bpf_map_lookup_elem(int fd, const void *key, void *value)
{
union bpf_attr attr;
+ int ret;
memset(&attr, 0, sizeof(attr));
attr.map_fd = fd;
attr.key = ptr_to_u64(key);
attr.value = ptr_to_u64(value);
- return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
+ return libbpf_err_errno(ret);
}
int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, __u64 flags)
{
union bpf_attr attr;
+ int ret;
memset(&attr, 0, sizeof(attr));
attr.map_fd = fd;
@@ -443,17 +459,33 @@ int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, __u64 flags)
attr.value = ptr_to_u64(value);
attr.flags = flags;
- return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
+ return libbpf_err_errno(ret);
}
int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value)
{
union bpf_attr attr;
+ int ret;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.map_fd = fd;
+ attr.key = ptr_to_u64(key);
+ attr.value = ptr_to_u64(value);
+
+ ret = sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr));
+ return libbpf_err_errno(ret);
+}
+
+int bpf_map_lookup_and_delete_elem_flags(int fd, const void *key, void *value, __u64 flags)
+{
+ union bpf_attr attr;
memset(&attr, 0, sizeof(attr));
attr.map_fd = fd;
attr.key = ptr_to_u64(key);
attr.value = ptr_to_u64(value);
+ attr.flags = flags;
return sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr));
}
@@ -461,34 +493,40 @@ int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value)
int bpf_map_delete_elem(int fd, const void *key)
{
union bpf_attr attr;
+ int ret;
memset(&attr, 0, sizeof(attr));
attr.map_fd = fd;
attr.key = ptr_to_u64(key);
- return sys_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
+ return libbpf_err_errno(ret);
}
int bpf_map_get_next_key(int fd, const void *key, void *next_key)
{
union bpf_attr attr;
+ int ret;
memset(&attr, 0, sizeof(attr));
attr.map_fd = fd;
attr.key = ptr_to_u64(key);
attr.next_key = ptr_to_u64(next_key);
- return sys_bpf(BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr));
+ return libbpf_err_errno(ret);
}
int bpf_map_freeze(int fd)
{
union bpf_attr attr;
+ int ret;
memset(&attr, 0, sizeof(attr));
attr.map_fd = fd;
- return sys_bpf(BPF_MAP_FREEZE, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_MAP_FREEZE, &attr, sizeof(attr));
+ return libbpf_err_errno(ret);
}
static int bpf_map_batch_common(int cmd, int fd, void *in_batch,
@@ -500,7 +538,7 @@ static int bpf_map_batch_common(int cmd, int fd, void *in_batch,
int ret;
if (!OPTS_VALID(opts, bpf_map_batch_opts))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
memset(&attr, 0, sizeof(attr));
attr.batch.map_fd = fd;
@@ -515,7 +553,7 @@ static int bpf_map_batch_common(int cmd, int fd, void *in_batch,
ret = sys_bpf(cmd, &attr, sizeof(attr));
*count = attr.batch.count;
- return ret;
+ return libbpf_err_errno(ret);
}
int bpf_map_delete_batch(int fd, void *keys, __u32 *count,
@@ -552,22 +590,26 @@ int bpf_map_update_batch(int fd, void *keys, void *values, __u32 *count,
int bpf_obj_pin(int fd, const char *pathname)
{
union bpf_attr attr;
+ int ret;
memset(&attr, 0, sizeof(attr));
attr.pathname = ptr_to_u64((void *)pathname);
attr.bpf_fd = fd;
- return sys_bpf(BPF_OBJ_PIN, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_OBJ_PIN, &attr, sizeof(attr));
+ return libbpf_err_errno(ret);
}
int bpf_obj_get(const char *pathname)
{
union bpf_attr attr;
+ int fd;
memset(&attr, 0, sizeof(attr));
attr.pathname = ptr_to_u64((void *)pathname);
- return sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr));
+ fd = sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr));
+ return libbpf_err_errno(fd);
}
int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type,
@@ -585,9 +627,10 @@ int bpf_prog_attach_xattr(int prog_fd, int target_fd,
const struct bpf_prog_attach_opts *opts)
{
union bpf_attr attr;
+ int ret;
if (!OPTS_VALID(opts, bpf_prog_attach_opts))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
memset(&attr, 0, sizeof(attr));
attr.target_fd = target_fd;
@@ -596,30 +639,35 @@ int bpf_prog_attach_xattr(int prog_fd, int target_fd,
attr.attach_flags = OPTS_GET(opts, flags, 0);
attr.replace_bpf_fd = OPTS_GET(opts, replace_prog_fd, 0);
- return sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr));
+ return libbpf_err_errno(ret);
}
int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
{
union bpf_attr attr;
+ int ret;
memset(&attr, 0, sizeof(attr));
attr.target_fd = target_fd;
attr.attach_type = type;
- return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
+ return libbpf_err_errno(ret);
}
int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type)
{
union bpf_attr attr;
+ int ret;
memset(&attr, 0, sizeof(attr));
attr.target_fd = target_fd;
attr.attach_bpf_fd = prog_fd;
attr.attach_type = type;
- return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
+ return libbpf_err_errno(ret);
}
int bpf_link_create(int prog_fd, int target_fd,
@@ -628,15 +676,16 @@ int bpf_link_create(int prog_fd, int target_fd,
{
__u32 target_btf_id, iter_info_len;
union bpf_attr attr;
+ int fd;
if (!OPTS_VALID(opts, bpf_link_create_opts))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
iter_info_len = OPTS_GET(opts, iter_info_len, 0);
target_btf_id = OPTS_GET(opts, target_btf_id, 0);
if (iter_info_len && target_btf_id)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
memset(&attr, 0, sizeof(attr));
attr.link_create.prog_fd = prog_fd;
@@ -652,26 +701,30 @@ int bpf_link_create(int prog_fd, int target_fd,
attr.link_create.target_btf_id = target_btf_id;
}
- return sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr));
+ fd = sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr));
+ return libbpf_err_errno(fd);
}
int bpf_link_detach(int link_fd)
{
union bpf_attr attr;
+ int ret;
memset(&attr, 0, sizeof(attr));
attr.link_detach.link_fd = link_fd;
- return sys_bpf(BPF_LINK_DETACH, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_LINK_DETACH, &attr, sizeof(attr));
+ return libbpf_err_errno(ret);
}
int bpf_link_update(int link_fd, int new_prog_fd,
const struct bpf_link_update_opts *opts)
{
union bpf_attr attr;
+ int ret;
if (!OPTS_VALID(opts, bpf_link_update_opts))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
memset(&attr, 0, sizeof(attr));
attr.link_update.link_fd = link_fd;
@@ -679,17 +732,20 @@ int bpf_link_update(int link_fd, int new_prog_fd,
attr.link_update.flags = OPTS_GET(opts, flags, 0);
attr.link_update.old_prog_fd = OPTS_GET(opts, old_prog_fd, 0);
- return sys_bpf(BPF_LINK_UPDATE, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_LINK_UPDATE, &attr, sizeof(attr));
+ return libbpf_err_errno(ret);
}
int bpf_iter_create(int link_fd)
{
union bpf_attr attr;
+ int fd;
memset(&attr, 0, sizeof(attr));
attr.iter_create.link_fd = link_fd;
- return sys_bpf(BPF_ITER_CREATE, &attr, sizeof(attr));
+ fd = sys_bpf(BPF_ITER_CREATE, &attr, sizeof(attr));
+ return libbpf_err_errno(fd);
}
int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
@@ -706,10 +762,12 @@ int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
attr.query.prog_ids = ptr_to_u64(prog_ids);
ret = sys_bpf(BPF_PROG_QUERY, &attr, sizeof(attr));
+
if (attach_flags)
*attach_flags = attr.query.attach_flags;
*prog_cnt = attr.query.prog_cnt;
- return ret;
+
+ return libbpf_err_errno(ret);
}
int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size,
@@ -727,13 +785,15 @@ int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size,
attr.test.repeat = repeat;
ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
+
if (size_out)
*size_out = attr.test.data_size_out;
if (retval)
*retval = attr.test.retval;
if (duration)
*duration = attr.test.duration;
- return ret;
+
+ return libbpf_err_errno(ret);
}
int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr)
@@ -742,7 +802,7 @@ int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr)
int ret;
if (!test_attr->data_out && test_attr->data_size_out > 0)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
memset(&attr, 0, sizeof(attr));
attr.test.prog_fd = test_attr->prog_fd;
@@ -757,11 +817,13 @@ int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr)
attr.test.repeat = test_attr->repeat;
ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
+
test_attr->data_size_out = attr.test.data_size_out;
test_attr->ctx_size_out = attr.test.ctx_size_out;
test_attr->retval = attr.test.retval;
test_attr->duration = attr.test.duration;
- return ret;
+
+ return libbpf_err_errno(ret);
}
int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts)
@@ -770,7 +832,7 @@ int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts)
int ret;
if (!OPTS_VALID(opts, bpf_test_run_opts))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
memset(&attr, 0, sizeof(attr));
attr.test.prog_fd = prog_fd;
@@ -788,11 +850,13 @@ int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts)
attr.test.data_out = ptr_to_u64(OPTS_GET(opts, data_out, NULL));
ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
+
OPTS_SET(opts, data_size_out, attr.test.data_size_out);
OPTS_SET(opts, ctx_size_out, attr.test.ctx_size_out);
OPTS_SET(opts, duration, attr.test.duration);
OPTS_SET(opts, retval, attr.test.retval);
- return ret;
+
+ return libbpf_err_errno(ret);
}
static int bpf_obj_get_next_id(__u32 start_id, __u32 *next_id, int cmd)
@@ -807,7 +871,7 @@ static int bpf_obj_get_next_id(__u32 start_id, __u32 *next_id, int cmd)
if (!err)
*next_id = attr.next_id;
- return err;
+ return libbpf_err_errno(err);
}
int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id)
@@ -833,41 +897,49 @@ int bpf_link_get_next_id(__u32 start_id, __u32 *next_id)
int bpf_prog_get_fd_by_id(__u32 id)
{
union bpf_attr attr;
+ int fd;
memset(&attr, 0, sizeof(attr));
attr.prog_id = id;
- return sys_bpf(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr));
+ fd = sys_bpf(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr));
+ return libbpf_err_errno(fd);
}
int bpf_map_get_fd_by_id(__u32 id)
{
union bpf_attr attr;
+ int fd;
memset(&attr, 0, sizeof(attr));
attr.map_id = id;
- return sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr));
+ fd = sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr));
+ return libbpf_err_errno(fd);
}
int bpf_btf_get_fd_by_id(__u32 id)
{
union bpf_attr attr;
+ int fd;
memset(&attr, 0, sizeof(attr));
attr.btf_id = id;
- return sys_bpf(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr));
+ fd = sys_bpf(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr));
+ return libbpf_err_errno(fd);
}
int bpf_link_get_fd_by_id(__u32 id)
{
union bpf_attr attr;
+ int fd;
memset(&attr, 0, sizeof(attr));
attr.link_id = id;
- return sys_bpf(BPF_LINK_GET_FD_BY_ID, &attr, sizeof(attr));
+ fd = sys_bpf(BPF_LINK_GET_FD_BY_ID, &attr, sizeof(attr));
+ return libbpf_err_errno(fd);
}
int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len)
@@ -881,21 +953,24 @@ int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len)
attr.info.info = ptr_to_u64(info);
err = sys_bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr));
+
if (!err)
*info_len = attr.info.info_len;
- return err;
+ return libbpf_err_errno(err);
}
int bpf_raw_tracepoint_open(const char *name, int prog_fd)
{
union bpf_attr attr;
+ int fd;
memset(&attr, 0, sizeof(attr));
attr.raw_tracepoint.name = ptr_to_u64(name);
attr.raw_tracepoint.prog_fd = prog_fd;
- return sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr));
+ fd = sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr));
+ return libbpf_err_errno(fd);
}
int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size,
@@ -915,12 +990,13 @@ retry:
}
fd = sys_bpf(BPF_BTF_LOAD, &attr, sizeof(attr));
- if (fd == -1 && !do_log && log_buf && log_buf_size) {
+
+ if (fd < 0 && !do_log && log_buf && log_buf_size) {
do_log = true;
goto retry;
}
- return fd;
+ return libbpf_err_errno(fd);
}
int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,
@@ -937,37 +1013,42 @@ int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,
attr.task_fd_query.buf_len = *buf_len;
err = sys_bpf(BPF_TASK_FD_QUERY, &attr, sizeof(attr));
+
*buf_len = attr.task_fd_query.buf_len;
*prog_id = attr.task_fd_query.prog_id;
*fd_type = attr.task_fd_query.fd_type;
*probe_offset = attr.task_fd_query.probe_offset;
*probe_addr = attr.task_fd_query.probe_addr;
- return err;
+ return libbpf_err_errno(err);
}
int bpf_enable_stats(enum bpf_stats_type type)
{
union bpf_attr attr;
+ int fd;
memset(&attr, 0, sizeof(attr));
attr.enable_stats.type = type;
- return sys_bpf(BPF_ENABLE_STATS, &attr, sizeof(attr));
+ fd = sys_bpf(BPF_ENABLE_STATS, &attr, sizeof(attr));
+ return libbpf_err_errno(fd);
}
int bpf_prog_bind_map(int prog_fd, int map_fd,
const struct bpf_prog_bind_opts *opts)
{
union bpf_attr attr;
+ int ret;
if (!OPTS_VALID(opts, bpf_prog_bind_opts))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
memset(&attr, 0, sizeof(attr));
attr.prog_bind_map.prog_fd = prog_fd;
attr.prog_bind_map.map_fd = map_fd;
attr.prog_bind_map.flags = OPTS_GET(opts, flags, 0);
- return sys_bpf(BPF_PROG_BIND_MAP, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_PROG_BIND_MAP, &attr, sizeof(attr));
+ return libbpf_err_errno(ret);
}
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 875dde20d56e..4f758f8f50cd 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -124,6 +124,8 @@ LIBBPF_API int bpf_map_lookup_elem_flags(int fd, const void *key, void *value,
__u64 flags);
LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key,
void *value);
+LIBBPF_API int bpf_map_lookup_and_delete_elem_flags(int fd, const void *key,
+ void *value, __u64 flags);
LIBBPF_API int bpf_map_delete_elem(int fd, const void *key);
LIBBPF_API int bpf_map_get_next_key(int fd, const void *key, void *next_key);
LIBBPF_API int bpf_map_freeze(int fd);
diff --git a/tools/lib/bpf/bpf_gen_internal.h b/tools/lib/bpf/bpf_gen_internal.h
new file mode 100644
index 000000000000..615400391e57
--- /dev/null
+++ b/tools/lib/bpf/bpf_gen_internal.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+/* Copyright (c) 2021 Facebook */
+#ifndef __BPF_GEN_INTERNAL_H
+#define __BPF_GEN_INTERNAL_H
+
+struct ksym_relo_desc {
+ const char *name;
+ int kind;
+ int insn_idx;
+};
+
+struct bpf_gen {
+ struct gen_loader_opts *opts;
+ void *data_start;
+ void *data_cur;
+ void *insn_start;
+ void *insn_cur;
+ ssize_t cleanup_label;
+ __u32 nr_progs;
+ __u32 nr_maps;
+ int log_level;
+ int error;
+ struct ksym_relo_desc *relos;
+ int relo_cnt;
+ char attach_target[128];
+ int attach_kind;
+};
+
+void bpf_gen__init(struct bpf_gen *gen, int log_level);
+int bpf_gen__finish(struct bpf_gen *gen);
+void bpf_gen__free(struct bpf_gen *gen);
+void bpf_gen__load_btf(struct bpf_gen *gen, const void *raw_data, __u32 raw_size);
+void bpf_gen__map_create(struct bpf_gen *gen, struct bpf_create_map_attr *map_attr, int map_idx);
+struct bpf_prog_load_params;
+void bpf_gen__prog_load(struct bpf_gen *gen, struct bpf_prog_load_params *load_attr, int prog_idx);
+void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *value, __u32 value_size);
+void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx);
+void bpf_gen__record_attach_target(struct bpf_gen *gen, const char *name, enum bpf_attach_type type);
+void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, int kind, int insn_idx);
+
+#endif
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index 9720dc0b4605..b9987c3efa3c 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -158,4 +158,70 @@ enum libbpf_tristate {
#define __kconfig __attribute__((section(".kconfig")))
#define __ksym __attribute__((section(".ksyms")))
+#ifndef ___bpf_concat
+#define ___bpf_concat(a, b) a ## b
+#endif
+#ifndef ___bpf_apply
+#define ___bpf_apply(fn, n) ___bpf_concat(fn, n)
+#endif
+#ifndef ___bpf_nth
+#define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N
+#endif
+#ifndef ___bpf_narg
+#define ___bpf_narg(...) \
+ ___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
+#endif
+
+#define ___bpf_fill0(arr, p, x) do {} while (0)
+#define ___bpf_fill1(arr, p, x) arr[p] = x
+#define ___bpf_fill2(arr, p, x, args...) arr[p] = x; ___bpf_fill1(arr, p + 1, args)
+#define ___bpf_fill3(arr, p, x, args...) arr[p] = x; ___bpf_fill2(arr, p + 1, args)
+#define ___bpf_fill4(arr, p, x, args...) arr[p] = x; ___bpf_fill3(arr, p + 1, args)
+#define ___bpf_fill5(arr, p, x, args...) arr[p] = x; ___bpf_fill4(arr, p + 1, args)
+#define ___bpf_fill6(arr, p, x, args...) arr[p] = x; ___bpf_fill5(arr, p + 1, args)
+#define ___bpf_fill7(arr, p, x, args...) arr[p] = x; ___bpf_fill6(arr, p + 1, args)
+#define ___bpf_fill8(arr, p, x, args...) arr[p] = x; ___bpf_fill7(arr, p + 1, args)
+#define ___bpf_fill9(arr, p, x, args...) arr[p] = x; ___bpf_fill8(arr, p + 1, args)
+#define ___bpf_fill10(arr, p, x, args...) arr[p] = x; ___bpf_fill9(arr, p + 1, args)
+#define ___bpf_fill11(arr, p, x, args...) arr[p] = x; ___bpf_fill10(arr, p + 1, args)
+#define ___bpf_fill12(arr, p, x, args...) arr[p] = x; ___bpf_fill11(arr, p + 1, args)
+#define ___bpf_fill(arr, args...) \
+ ___bpf_apply(___bpf_fill, ___bpf_narg(args))(arr, 0, args)
+
+/*
+ * BPF_SEQ_PRINTF to wrap bpf_seq_printf to-be-printed values
+ * in a structure.
+ */
+#define BPF_SEQ_PRINTF(seq, fmt, args...) \
+({ \
+ static const char ___fmt[] = fmt; \
+ unsigned long long ___param[___bpf_narg(args)]; \
+ \
+ _Pragma("GCC diagnostic push") \
+ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
+ ___bpf_fill(___param, args); \
+ _Pragma("GCC diagnostic pop") \
+ \
+ bpf_seq_printf(seq, ___fmt, sizeof(___fmt), \
+ ___param, sizeof(___param)); \
+})
+
+/*
+ * BPF_SNPRINTF wraps the bpf_snprintf helper with variadic arguments instead of
+ * an array of u64.
+ */
+#define BPF_SNPRINTF(out, out_size, fmt, args...) \
+({ \
+ static const char ___fmt[] = fmt; \
+ unsigned long long ___param[___bpf_narg(args)]; \
+ \
+ _Pragma("GCC diagnostic push") \
+ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
+ ___bpf_fill(___param, args); \
+ _Pragma("GCC diagnostic pop") \
+ \
+ bpf_snprintf(out, out_size, ___fmt, \
+ ___param, sizeof(___param)); \
+})
+
#endif
diff --git a/tools/lib/bpf/bpf_prog_linfo.c b/tools/lib/bpf/bpf_prog_linfo.c
index 3ed1a27b5f7c..5c503096ef43 100644
--- a/tools/lib/bpf/bpf_prog_linfo.c
+++ b/tools/lib/bpf/bpf_prog_linfo.c
@@ -106,7 +106,7 @@ struct bpf_prog_linfo *bpf_prog_linfo__new(const struct bpf_prog_info *info)
nr_linfo = info->nr_line_info;
if (!nr_linfo)
- return NULL;
+ return errno = EINVAL, NULL;
/*
* The min size that bpf_prog_linfo has to access for
@@ -114,11 +114,11 @@ struct bpf_prog_linfo *bpf_prog_linfo__new(const struct bpf_prog_info *info)
*/
if (info->line_info_rec_size <
offsetof(struct bpf_line_info, file_name_off))
- return NULL;
+ return errno = EINVAL, NULL;
prog_linfo = calloc(1, sizeof(*prog_linfo));
if (!prog_linfo)
- return NULL;
+ return errno = ENOMEM, NULL;
/* Copy xlated line_info */
prog_linfo->nr_linfo = nr_linfo;
@@ -174,7 +174,7 @@ struct bpf_prog_linfo *bpf_prog_linfo__new(const struct bpf_prog_info *info)
err_free:
bpf_prog_linfo__free(prog_linfo);
- return NULL;
+ return errno = EINVAL, NULL;
}
const struct bpf_line_info *
@@ -186,11 +186,11 @@ bpf_prog_linfo__lfind_addr_func(const struct bpf_prog_linfo *prog_linfo,
const __u64 *jited_linfo;
if (func_idx >= prog_linfo->nr_jited_func)
- return NULL;
+ return errno = ENOENT, NULL;
nr_linfo = prog_linfo->nr_jited_linfo_per_func[func_idx];
if (nr_skip >= nr_linfo)
- return NULL;
+ return errno = ENOENT, NULL;
start = prog_linfo->jited_linfo_func_idx[func_idx] + nr_skip;
jited_rec_size = prog_linfo->jited_rec_size;
@@ -198,7 +198,7 @@ bpf_prog_linfo__lfind_addr_func(const struct bpf_prog_linfo *prog_linfo,
(start * jited_rec_size);
jited_linfo = raw_jited_linfo;
if (addr < *jited_linfo)
- return NULL;
+ return errno = ENOENT, NULL;
nr_linfo -= nr_skip;
rec_size = prog_linfo->rec_size;
@@ -225,13 +225,13 @@ bpf_prog_linfo__lfind(const struct bpf_prog_linfo *prog_linfo,
nr_linfo = prog_linfo->nr_linfo;
if (nr_skip >= nr_linfo)
- return NULL;
+ return errno = ENOENT, NULL;
rec_size = prog_linfo->rec_size;
raw_linfo = prog_linfo->raw_linfo + (nr_skip * rec_size);
linfo = raw_linfo;
if (insn_off < linfo->insn_off)
- return NULL;
+ return errno = ENOENT, NULL;
nr_linfo -= nr_skip;
for (i = 0; i < nr_linfo; i++) {
diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h
index 8c954ebc0c7c..d6bfbe009296 100644
--- a/tools/lib/bpf/bpf_tracing.h
+++ b/tools/lib/bpf/bpf_tracing.h
@@ -25,26 +25,35 @@
#define bpf_target_sparc
#define bpf_target_defined
#else
- #undef bpf_target_defined
-#endif
/* Fall back to what the compiler says */
-#ifndef bpf_target_defined
#if defined(__x86_64__)
#define bpf_target_x86
+ #define bpf_target_defined
#elif defined(__s390__)
#define bpf_target_s390
+ #define bpf_target_defined
#elif defined(__arm__)
#define bpf_target_arm
+ #define bpf_target_defined
#elif defined(__aarch64__)
#define bpf_target_arm64
+ #define bpf_target_defined
#elif defined(__mips__)
#define bpf_target_mips
+ #define bpf_target_defined
#elif defined(__powerpc__)
#define bpf_target_powerpc
+ #define bpf_target_defined
#elif defined(__sparc__)
#define bpf_target_sparc
+ #define bpf_target_defined
+#endif /* no compiler target */
+
#endif
+
+#ifndef __BPF_TARGET_MISSING
+#define __BPF_TARGET_MISSING "GCC error \"Must specify a BPF target arch via __TARGET_ARCH_xxx\""
#endif
#if defined(bpf_target_x86)
@@ -287,7 +296,7 @@ struct pt_regs;
#elif defined(bpf_target_sparc)
#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = PT_REGS_RET(ctx); })
#define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP
-#else
+#elif defined(bpf_target_defined)
#define BPF_KPROBE_READ_RET_IP(ip, ctx) \
({ bpf_probe_read_kernel(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); })
#define BPF_KRETPROBE_READ_RET_IP(ip, ctx) \
@@ -295,13 +304,48 @@ struct pt_regs;
(void *)(PT_REGS_FP(ctx) + sizeof(ip))); })
#endif
+#if !defined(bpf_target_defined)
+
+#define PT_REGS_PARM1(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM2(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM3(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM4(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM5(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_RET(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_FP(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_RC(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_SP(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_IP(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+
+#define PT_REGS_PARM1_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM2_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM3_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM4_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM5_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_RET_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_FP_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_RC_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_SP_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_IP_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+
+#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define BPF_KRETPROBE_READ_RET_IP(ip, ctx) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+
+#endif /* !defined(bpf_target_defined) */
+
+#ifndef ___bpf_concat
#define ___bpf_concat(a, b) a ## b
+#endif
+#ifndef ___bpf_apply
#define ___bpf_apply(fn, n) ___bpf_concat(fn, n)
+#endif
+#ifndef ___bpf_nth
#define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N
+#endif
+#ifndef ___bpf_narg
#define ___bpf_narg(...) \
___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
-#define ___bpf_empty(...) \
- ___bpf_nth(_, ##__VA_ARGS__, N, N, N, N, N, N, N, N, N, N, 0)
+#endif
#define ___bpf_ctx_cast0() ctx
#define ___bpf_ctx_cast1(x) ___bpf_ctx_cast0(), (void *)ctx[0]
@@ -413,56 +457,4 @@ typeof(name(0)) name(struct pt_regs *ctx) \
} \
static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args)
-#define ___bpf_fill0(arr, p, x) do {} while (0)
-#define ___bpf_fill1(arr, p, x) arr[p] = x
-#define ___bpf_fill2(arr, p, x, args...) arr[p] = x; ___bpf_fill1(arr, p + 1, args)
-#define ___bpf_fill3(arr, p, x, args...) arr[p] = x; ___bpf_fill2(arr, p + 1, args)
-#define ___bpf_fill4(arr, p, x, args...) arr[p] = x; ___bpf_fill3(arr, p + 1, args)
-#define ___bpf_fill5(arr, p, x, args...) arr[p] = x; ___bpf_fill4(arr, p + 1, args)
-#define ___bpf_fill6(arr, p, x, args...) arr[p] = x; ___bpf_fill5(arr, p + 1, args)
-#define ___bpf_fill7(arr, p, x, args...) arr[p] = x; ___bpf_fill6(arr, p + 1, args)
-#define ___bpf_fill8(arr, p, x, args...) arr[p] = x; ___bpf_fill7(arr, p + 1, args)
-#define ___bpf_fill9(arr, p, x, args...) arr[p] = x; ___bpf_fill8(arr, p + 1, args)
-#define ___bpf_fill10(arr, p, x, args...) arr[p] = x; ___bpf_fill9(arr, p + 1, args)
-#define ___bpf_fill11(arr, p, x, args...) arr[p] = x; ___bpf_fill10(arr, p + 1, args)
-#define ___bpf_fill12(arr, p, x, args...) arr[p] = x; ___bpf_fill11(arr, p + 1, args)
-#define ___bpf_fill(arr, args...) \
- ___bpf_apply(___bpf_fill, ___bpf_narg(args))(arr, 0, args)
-
-/*
- * BPF_SEQ_PRINTF to wrap bpf_seq_printf to-be-printed values
- * in a structure.
- */
-#define BPF_SEQ_PRINTF(seq, fmt, args...) \
-({ \
- static const char ___fmt[] = fmt; \
- unsigned long long ___param[___bpf_narg(args)]; \
- \
- _Pragma("GCC diagnostic push") \
- _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
- ___bpf_fill(___param, args); \
- _Pragma("GCC diagnostic pop") \
- \
- bpf_seq_printf(seq, ___fmt, sizeof(___fmt), \
- ___param, sizeof(___param)); \
-})
-
-/*
- * BPF_SNPRINTF wraps the bpf_snprintf helper with variadic arguments instead of
- * an array of u64.
- */
-#define BPF_SNPRINTF(out, out_size, fmt, args...) \
-({ \
- static const char ___fmt[] = fmt; \
- unsigned long long ___param[___bpf_narg(args)]; \
- \
- _Pragma("GCC diagnostic push") \
- _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
- ___bpf_fill(___param, args); \
- _Pragma("GCC diagnostic pop") \
- \
- bpf_snprintf(out, out_size, ___fmt, \
- ___param, sizeof(___param)); \
-})
-
#endif
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index d57e13a13798..b46760b93bb4 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -443,7 +443,7 @@ struct btf_type *btf_type_by_id(struct btf *btf, __u32 type_id)
const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 type_id)
{
if (type_id >= btf->start_id + btf->nr_types)
- return NULL;
+ return errno = EINVAL, NULL;
return btf_type_by_id((struct btf *)btf, type_id);
}
@@ -510,7 +510,7 @@ size_t btf__pointer_size(const struct btf *btf)
int btf__set_pointer_size(struct btf *btf, size_t ptr_sz)
{
if (ptr_sz != 4 && ptr_sz != 8)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
btf->ptr_sz = ptr_sz;
return 0;
}
@@ -537,7 +537,7 @@ enum btf_endianness btf__endianness(const struct btf *btf)
int btf__set_endianness(struct btf *btf, enum btf_endianness endian)
{
if (endian != BTF_LITTLE_ENDIAN && endian != BTF_BIG_ENDIAN)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
btf->swapped_endian = is_host_big_endian() != (endian == BTF_BIG_ENDIAN);
if (!btf->swapped_endian) {
@@ -568,8 +568,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
int i;
t = btf__type_by_id(btf, type_id);
- for (i = 0; i < MAX_RESOLVE_DEPTH && !btf_type_is_void_or_null(t);
- i++) {
+ for (i = 0; i < MAX_RESOLVE_DEPTH && !btf_type_is_void_or_null(t); i++) {
switch (btf_kind(t)) {
case BTF_KIND_INT:
case BTF_KIND_STRUCT:
@@ -592,12 +591,12 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
case BTF_KIND_ARRAY:
array = btf_array(t);
if (nelems && array->nelems > UINT32_MAX / nelems)
- return -E2BIG;
+ return libbpf_err(-E2BIG);
nelems *= array->nelems;
type_id = array->type;
break;
default:
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
t = btf__type_by_id(btf, type_id);
@@ -605,9 +604,9 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
done:
if (size < 0)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (nelems && size > UINT32_MAX / nelems)
- return -E2BIG;
+ return libbpf_err(-E2BIG);
return nelems * size;
}
@@ -640,7 +639,7 @@ int btf__align_of(const struct btf *btf, __u32 id)
for (i = 0; i < vlen; i++, m++) {
align = btf__align_of(btf, m->type);
if (align <= 0)
- return align;
+ return libbpf_err(align);
max_align = max(max_align, align);
}
@@ -648,7 +647,7 @@ int btf__align_of(const struct btf *btf, __u32 id)
}
default:
pr_warn("unsupported BTF_KIND:%u\n", btf_kind(t));
- return 0;
+ return errno = EINVAL, 0;
}
}
@@ -667,7 +666,7 @@ int btf__resolve_type(const struct btf *btf, __u32 type_id)
}
if (depth == MAX_RESOLVE_DEPTH || btf_type_is_void_or_null(t))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
return type_id;
}
@@ -687,7 +686,7 @@ __s32 btf__find_by_name(const struct btf *btf, const char *type_name)
return i;
}
- return -ENOENT;
+ return libbpf_err(-ENOENT);
}
__s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name,
@@ -709,7 +708,7 @@ __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name,
return i;
}
- return -ENOENT;
+ return libbpf_err(-ENOENT);
}
static bool btf_is_modifiable(const struct btf *btf)
@@ -785,12 +784,12 @@ static struct btf *btf_new_empty(struct btf *base_btf)
struct btf *btf__new_empty(void)
{
- return btf_new_empty(NULL);
+ return libbpf_ptr(btf_new_empty(NULL));
}
struct btf *btf__new_empty_split(struct btf *base_btf)
{
- return btf_new_empty(base_btf);
+ return libbpf_ptr(btf_new_empty(base_btf));
}
static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf)
@@ -846,7 +845,7 @@ done:
struct btf *btf__new(const void *data, __u32 size)
{
- return btf_new(data, size, NULL);
+ return libbpf_ptr(btf_new(data, size, NULL));
}
static struct btf *btf_parse_elf(const char *path, struct btf *base_btf,
@@ -937,7 +936,8 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf,
goto done;
}
btf = btf_new(btf_data->d_buf, btf_data->d_size, base_btf);
- if (IS_ERR(btf))
+ err = libbpf_get_error(btf);
+ if (err)
goto done;
switch (gelf_getclass(elf)) {
@@ -953,9 +953,9 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf,
}
if (btf_ext && btf_ext_data) {
- *btf_ext = btf_ext__new(btf_ext_data->d_buf,
- btf_ext_data->d_size);
- if (IS_ERR(*btf_ext))
+ *btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size);
+ err = libbpf_get_error(*btf_ext);
+ if (err)
goto done;
} else if (btf_ext) {
*btf_ext = NULL;
@@ -965,30 +965,24 @@ done:
elf_end(elf);
close(fd);
- if (err)
- return ERR_PTR(err);
- /*
- * btf is always parsed before btf_ext, so no need to clean up
- * btf_ext, if btf loading failed
- */
- if (IS_ERR(btf))
+ if (!err)
return btf;
- if (btf_ext && IS_ERR(*btf_ext)) {
- btf__free(btf);
- err = PTR_ERR(*btf_ext);
- return ERR_PTR(err);
- }
- return btf;
+
+ if (btf_ext)
+ btf_ext__free(*btf_ext);
+ btf__free(btf);
+
+ return ERR_PTR(err);
}
struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext)
{
- return btf_parse_elf(path, NULL, btf_ext);
+ return libbpf_ptr(btf_parse_elf(path, NULL, btf_ext));
}
struct btf *btf__parse_elf_split(const char *path, struct btf *base_btf)
{
- return btf_parse_elf(path, base_btf, NULL);
+ return libbpf_ptr(btf_parse_elf(path, base_btf, NULL));
}
static struct btf *btf_parse_raw(const char *path, struct btf *base_btf)
@@ -1056,36 +1050,39 @@ err_out:
struct btf *btf__parse_raw(const char *path)
{
- return btf_parse_raw(path, NULL);
+ return libbpf_ptr(btf_parse_raw(path, NULL));
}
struct btf *btf__parse_raw_split(const char *path, struct btf *base_btf)
{
- return btf_parse_raw(path, base_btf);
+ return libbpf_ptr(btf_parse_raw(path, base_btf));
}
static struct btf *btf_parse(const char *path, struct btf *base_btf, struct btf_ext **btf_ext)
{
struct btf *btf;
+ int err;
if (btf_ext)
*btf_ext = NULL;
btf = btf_parse_raw(path, base_btf);
- if (!IS_ERR(btf) || PTR_ERR(btf) != -EPROTO)
+ err = libbpf_get_error(btf);
+ if (!err)
return btf;
-
+ if (err != -EPROTO)
+ return ERR_PTR(err);
return btf_parse_elf(path, base_btf, btf_ext);
}
struct btf *btf__parse(const char *path, struct btf_ext **btf_ext)
{
- return btf_parse(path, NULL, btf_ext);
+ return libbpf_ptr(btf_parse(path, NULL, btf_ext));
}
struct btf *btf__parse_split(const char *path, struct btf *base_btf)
{
- return btf_parse(path, base_btf, NULL);
+ return libbpf_ptr(btf_parse(path, base_btf, NULL));
}
static int compare_vsi_off(const void *_a, const void *_b)
@@ -1178,7 +1175,7 @@ int btf__finalize_data(struct bpf_object *obj, struct btf *btf)
}
}
- return err;
+ return libbpf_err(err);
}
static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian);
@@ -1191,13 +1188,13 @@ int btf__load(struct btf *btf)
int err = 0;
if (btf->fd >= 0)
- return -EEXIST;
+ return libbpf_err(-EEXIST);
retry_load:
if (log_buf_size) {
log_buf = malloc(log_buf_size);
if (!log_buf)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
*log_buf = 0;
}
@@ -1229,7 +1226,7 @@ retry_load:
done:
free(log_buf);
- return err;
+ return libbpf_err(err);
}
int btf__fd(const struct btf *btf)
@@ -1305,7 +1302,7 @@ const void *btf__get_raw_data(const struct btf *btf_ro, __u32 *size)
data = btf_get_raw_data(btf, &data_sz, btf->swapped_endian);
if (!data)
- return NULL;
+ return errno = -ENOMEM, NULL;
btf->raw_size = data_sz;
if (btf->swapped_endian)
@@ -1323,7 +1320,7 @@ const char *btf__str_by_offset(const struct btf *btf, __u32 offset)
else if (offset - btf->start_str_off < btf->hdr->str_len)
return btf_strs_data(btf) + (offset - btf->start_str_off);
else
- return NULL;
+ return errno = EINVAL, NULL;
}
const char *btf__name_by_offset(const struct btf *btf, __u32 offset)
@@ -1388,17 +1385,20 @@ exit_free:
int btf__get_from_id(__u32 id, struct btf **btf)
{
struct btf *res;
- int btf_fd;
+ int err, btf_fd;
*btf = NULL;
btf_fd = bpf_btf_get_fd_by_id(id);
if (btf_fd < 0)
- return -errno;
+ return libbpf_err(-errno);
res = btf_get_from_fd(btf_fd, NULL);
+ err = libbpf_get_error(res);
+
close(btf_fd);
- if (IS_ERR(res))
- return PTR_ERR(res);
+
+ if (err)
+ return libbpf_err(err);
*btf = res;
return 0;
@@ -1415,31 +1415,30 @@ int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
__s64 key_size, value_size;
__s32 container_id;
- if (snprintf(container_name, max_name, "____btf_map_%s", map_name) ==
- max_name) {
+ if (snprintf(container_name, max_name, "____btf_map_%s", map_name) == max_name) {
pr_warn("map:%s length of '____btf_map_%s' is too long\n",
map_name, map_name);
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
container_id = btf__find_by_name(btf, container_name);
if (container_id < 0) {
pr_debug("map:%s container_name:%s cannot be found in BTF. Missing BPF_ANNOTATE_KV_PAIR?\n",
map_name, container_name);
- return container_id;
+ return libbpf_err(container_id);
}
container_type = btf__type_by_id(btf, container_id);
if (!container_type) {
pr_warn("map:%s cannot find BTF type for container_id:%u\n",
map_name, container_id);
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
if (!btf_is_struct(container_type) || btf_vlen(container_type) < 2) {
pr_warn("map:%s container_name:%s is an invalid container struct\n",
map_name, container_name);
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
key = btf_members(container_type);
@@ -1448,25 +1447,25 @@ int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
key_size = btf__resolve_size(btf, key->type);
if (key_size < 0) {
pr_warn("map:%s invalid BTF key_type_size\n", map_name);
- return key_size;
+ return libbpf_err(key_size);
}
if (expected_key_size != key_size) {
pr_warn("map:%s btf_key_type_size:%u != map_def_key_size:%u\n",
map_name, (__u32)key_size, expected_key_size);
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
value_size = btf__resolve_size(btf, value->type);
if (value_size < 0) {
pr_warn("map:%s invalid BTF value_type_size\n", map_name);
- return value_size;
+ return libbpf_err(value_size);
}
if (expected_value_size != value_size) {
pr_warn("map:%s btf_value_type_size:%u != map_def_value_size:%u\n",
map_name, (__u32)value_size, expected_value_size);
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
*key_type_id = key->type;
@@ -1563,11 +1562,11 @@ int btf__find_str(struct btf *btf, const char *s)
/* BTF needs to be in a modifiable state to build string lookup index */
if (btf_ensure_modifiable(btf))
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
off = strset__find_str(btf->strs_set, s);
if (off < 0)
- return off;
+ return libbpf_err(off);
return btf->start_str_off + off;
}
@@ -1588,11 +1587,11 @@ int btf__add_str(struct btf *btf, const char *s)
}
if (btf_ensure_modifiable(btf))
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
off = strset__add_str(btf->strs_set, s);
if (off < 0)
- return off;
+ return libbpf_err(off);
btf->hdr->str_len = strset__data_size(btf->strs_set);
@@ -1616,7 +1615,7 @@ static int btf_commit_type(struct btf *btf, int data_sz)
err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
if (err)
- return err;
+ return libbpf_err(err);
btf->hdr->type_len += data_sz;
btf->hdr->str_off += data_sz;
@@ -1653,21 +1652,21 @@ int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_t
sz = btf_type_size(src_type);
if (sz < 0)
- return sz;
+ return libbpf_err(sz);
/* deconstruct BTF, if necessary, and invalidate raw_data */
if (btf_ensure_modifiable(btf))
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
t = btf_add_type_mem(btf, sz);
if (!t)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
memcpy(t, src_type, sz);
err = btf_type_visit_str_offs(t, btf_rewrite_str, &p);
if (err)
- return err;
+ return libbpf_err(err);
return btf_commit_type(btf, sz);
}
@@ -1688,21 +1687,21 @@ int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding
/* non-empty name */
if (!name || !name[0])
- return -EINVAL;
+ return libbpf_err(-EINVAL);
/* byte_sz must be power of 2 */
if (!byte_sz || (byte_sz & (byte_sz - 1)) || byte_sz > 16)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (encoding & ~(BTF_INT_SIGNED | BTF_INT_CHAR | BTF_INT_BOOL))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
/* deconstruct BTF, if necessary, and invalidate raw_data */
if (btf_ensure_modifiable(btf))
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
sz = sizeof(struct btf_type) + sizeof(int);
t = btf_add_type_mem(btf, sz);
if (!t)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
/* if something goes wrong later, we might end up with an extra string,
* but that shouldn't be a problem, because BTF can't be constructed
@@ -1736,20 +1735,20 @@ int btf__add_float(struct btf *btf, const char *name, size_t byte_sz)
/* non-empty name */
if (!name || !name[0])
- return -EINVAL;
+ return libbpf_err(-EINVAL);
/* byte_sz must be one of the explicitly allowed values */
if (byte_sz != 2 && byte_sz != 4 && byte_sz != 8 && byte_sz != 12 &&
byte_sz != 16)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (btf_ensure_modifiable(btf))
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
sz = sizeof(struct btf_type);
t = btf_add_type_mem(btf, sz);
if (!t)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
name_off = btf__add_str(btf, name);
if (name_off < 0)
@@ -1780,15 +1779,15 @@ static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref
int sz, name_off = 0;
if (validate_type_id(ref_type_id))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (btf_ensure_modifiable(btf))
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
sz = sizeof(struct btf_type);
t = btf_add_type_mem(btf, sz);
if (!t)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
if (name && name[0]) {
name_off = btf__add_str(btf, name);
@@ -1831,15 +1830,15 @@ int btf__add_array(struct btf *btf, int index_type_id, int elem_type_id, __u32 n
int sz;
if (validate_type_id(index_type_id) || validate_type_id(elem_type_id))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (btf_ensure_modifiable(btf))
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
sz = sizeof(struct btf_type) + sizeof(struct btf_array);
t = btf_add_type_mem(btf, sz);
if (!t)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
t->name_off = 0;
t->info = btf_type_info(BTF_KIND_ARRAY, 0, 0);
@@ -1860,12 +1859,12 @@ static int btf_add_composite(struct btf *btf, int kind, const char *name, __u32
int sz, name_off = 0;
if (btf_ensure_modifiable(btf))
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
sz = sizeof(struct btf_type);
t = btf_add_type_mem(btf, sz);
if (!t)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
if (name && name[0]) {
name_off = btf__add_str(btf, name);
@@ -1943,30 +1942,30 @@ int btf__add_field(struct btf *btf, const char *name, int type_id,
/* last type should be union/struct */
if (btf->nr_types == 0)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
t = btf_last_type(btf);
if (!btf_is_composite(t))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (validate_type_id(type_id))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
/* best-effort bit field offset/size enforcement */
is_bitfield = bit_size || (bit_offset % 8 != 0);
if (is_bitfield && (bit_size == 0 || bit_size > 255 || bit_offset > 0xffffff))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
/* only offset 0 is allowed for unions */
if (btf_is_union(t) && bit_offset)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
/* decompose and invalidate raw data */
if (btf_ensure_modifiable(btf))
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
sz = sizeof(struct btf_member);
m = btf_add_type_mem(btf, sz);
if (!m)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
if (name && name[0]) {
name_off = btf__add_str(btf, name);
@@ -2008,15 +2007,15 @@ int btf__add_enum(struct btf *btf, const char *name, __u32 byte_sz)
/* byte_sz must be power of 2 */
if (!byte_sz || (byte_sz & (byte_sz - 1)) || byte_sz > 8)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (btf_ensure_modifiable(btf))
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
sz = sizeof(struct btf_type);
t = btf_add_type_mem(btf, sz);
if (!t)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
if (name && name[0]) {
name_off = btf__add_str(btf, name);
@@ -2048,25 +2047,25 @@ int btf__add_enum_value(struct btf *btf, const char *name, __s64 value)
/* last type should be BTF_KIND_ENUM */
if (btf->nr_types == 0)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
t = btf_last_type(btf);
if (!btf_is_enum(t))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
/* non-empty name */
if (!name || !name[0])
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (value < INT_MIN || value > UINT_MAX)
- return -E2BIG;
+ return libbpf_err(-E2BIG);
/* decompose and invalidate raw data */
if (btf_ensure_modifiable(btf))
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
sz = sizeof(struct btf_enum);
v = btf_add_type_mem(btf, sz);
if (!v)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
name_off = btf__add_str(btf, name);
if (name_off < 0)
@@ -2096,7 +2095,7 @@ int btf__add_enum_value(struct btf *btf, const char *name, __s64 value)
int btf__add_fwd(struct btf *btf, const char *name, enum btf_fwd_kind fwd_kind)
{
if (!name || !name[0])
- return -EINVAL;
+ return libbpf_err(-EINVAL);
switch (fwd_kind) {
case BTF_FWD_STRUCT:
@@ -2117,7 +2116,7 @@ int btf__add_fwd(struct btf *btf, const char *name, enum btf_fwd_kind fwd_kind)
*/
return btf__add_enum(btf, name, sizeof(int));
default:
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
}
@@ -2132,7 +2131,7 @@ int btf__add_fwd(struct btf *btf, const char *name, enum btf_fwd_kind fwd_kind)
int btf__add_typedef(struct btf *btf, const char *name, int ref_type_id)
{
if (!name || !name[0])
- return -EINVAL;
+ return libbpf_err(-EINVAL);
return btf_add_ref_kind(btf, BTF_KIND_TYPEDEF, name, ref_type_id);
}
@@ -2187,10 +2186,10 @@ int btf__add_func(struct btf *btf, const char *name,
int id;
if (!name || !name[0])
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (linkage != BTF_FUNC_STATIC && linkage != BTF_FUNC_GLOBAL &&
linkage != BTF_FUNC_EXTERN)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
id = btf_add_ref_kind(btf, BTF_KIND_FUNC, name, proto_type_id);
if (id > 0) {
@@ -2198,7 +2197,7 @@ int btf__add_func(struct btf *btf, const char *name,
t->info = btf_type_info(BTF_KIND_FUNC, linkage, 0);
}
- return id;
+ return libbpf_err(id);
}
/*
@@ -2219,15 +2218,15 @@ int btf__add_func_proto(struct btf *btf, int ret_type_id)
int sz;
if (validate_type_id(ret_type_id))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (btf_ensure_modifiable(btf))
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
sz = sizeof(struct btf_type);
t = btf_add_type_mem(btf, sz);
if (!t)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
/* start out with vlen=0; this will be adjusted when adding enum
* values, if necessary
@@ -2254,23 +2253,23 @@ int btf__add_func_param(struct btf *btf, const char *name, int type_id)
int sz, name_off = 0;
if (validate_type_id(type_id))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
/* last type should be BTF_KIND_FUNC_PROTO */
if (btf->nr_types == 0)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
t = btf_last_type(btf);
if (!btf_is_func_proto(t))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
/* decompose and invalidate raw data */
if (btf_ensure_modifiable(btf))
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
sz = sizeof(struct btf_param);
p = btf_add_type_mem(btf, sz);
if (!p)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
if (name && name[0]) {
name_off = btf__add_str(btf, name);
@@ -2308,21 +2307,21 @@ int btf__add_var(struct btf *btf, const char *name, int linkage, int type_id)
/* non-empty name */
if (!name || !name[0])
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (linkage != BTF_VAR_STATIC && linkage != BTF_VAR_GLOBAL_ALLOCATED &&
linkage != BTF_VAR_GLOBAL_EXTERN)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (validate_type_id(type_id))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
/* deconstruct BTF, if necessary, and invalidate raw_data */
if (btf_ensure_modifiable(btf))
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
sz = sizeof(struct btf_type) + sizeof(struct btf_var);
t = btf_add_type_mem(btf, sz);
if (!t)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
name_off = btf__add_str(btf, name);
if (name_off < 0)
@@ -2357,15 +2356,15 @@ int btf__add_datasec(struct btf *btf, const char *name, __u32 byte_sz)
/* non-empty name */
if (!name || !name[0])
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (btf_ensure_modifiable(btf))
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
sz = sizeof(struct btf_type);
t = btf_add_type_mem(btf, sz);
if (!t)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
name_off = btf__add_str(btf, name);
if (name_off < 0)
@@ -2397,22 +2396,22 @@ int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __
/* last type should be BTF_KIND_DATASEC */
if (btf->nr_types == 0)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
t = btf_last_type(btf);
if (!btf_is_datasec(t))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (validate_type_id(var_type_id))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
/* decompose and invalidate raw data */
if (btf_ensure_modifiable(btf))
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
sz = sizeof(struct btf_var_secinfo);
v = btf_add_type_mem(btf, sz);
if (!v)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
v->type = var_type_id;
v->offset = offset;
@@ -2614,11 +2613,11 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
err = btf_ext_parse_hdr(data, size);
if (err)
- return ERR_PTR(err);
+ return libbpf_err_ptr(err);
btf_ext = calloc(1, sizeof(struct btf_ext));
if (!btf_ext)
- return ERR_PTR(-ENOMEM);
+ return libbpf_err_ptr(-ENOMEM);
btf_ext->data_size = size;
btf_ext->data = malloc(size);
@@ -2628,9 +2627,11 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
}
memcpy(btf_ext->data, data, size);
- if (btf_ext->hdr->hdr_len <
- offsetofend(struct btf_ext_header, line_info_len))
+ if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, line_info_len)) {
+ err = -EINVAL;
goto done;
+ }
+
err = btf_ext_setup_func_info(btf_ext);
if (err)
goto done;
@@ -2639,8 +2640,11 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
if (err)
goto done;
- if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len))
+ if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len)) {
+ err = -EINVAL;
goto done;
+ }
+
err = btf_ext_setup_core_relos(btf_ext);
if (err)
goto done;
@@ -2648,7 +2652,7 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
done:
if (err) {
btf_ext__free(btf_ext);
- return ERR_PTR(err);
+ return libbpf_err_ptr(err);
}
return btf_ext;
@@ -2687,7 +2691,7 @@ static int btf_ext_reloc_info(const struct btf *btf,
existing_len = (*cnt) * record_size;
data = realloc(*info, existing_len + records_len);
if (!data)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
memcpy(data + existing_len, sinfo->data, records_len);
/* adjust insn_off only, the rest data will be passed
@@ -2697,15 +2701,14 @@ static int btf_ext_reloc_info(const struct btf *btf,
__u32 *insn_off;
insn_off = data + existing_len + (i * record_size);
- *insn_off = *insn_off / sizeof(struct bpf_insn) +
- insns_cnt;
+ *insn_off = *insn_off / sizeof(struct bpf_insn) + insns_cnt;
}
*info = data;
*cnt += sinfo->num_info;
return 0;
}
- return -ENOENT;
+ return libbpf_err(-ENOENT);
}
int btf_ext__reloc_func_info(const struct btf *btf,
@@ -2894,11 +2897,11 @@ int btf__dedup(struct btf *btf, struct btf_ext *btf_ext,
if (IS_ERR(d)) {
pr_debug("btf_dedup_new failed: %ld", PTR_ERR(d));
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
if (btf_ensure_modifiable(btf))
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
err = btf_dedup_prep(d);
if (err) {
@@ -2938,7 +2941,7 @@ int btf__dedup(struct btf *btf, struct btf_ext *btf_ext,
done:
btf_dedup_free(d);
- return err;
+ return libbpf_err(err);
}
#define BTF_UNPROCESSED_ID ((__u32)-1)
@@ -4411,7 +4414,7 @@ struct btf *libbpf_find_kernel_btf(void)
char path[PATH_MAX + 1];
struct utsname buf;
struct btf *btf;
- int i;
+ int i, err;
uname(&buf);
@@ -4425,17 +4428,16 @@ struct btf *libbpf_find_kernel_btf(void)
btf = btf__parse_raw(path);
else
btf = btf__parse_elf(path, NULL);
-
- pr_debug("loading kernel BTF '%s': %ld\n",
- path, IS_ERR(btf) ? PTR_ERR(btf) : 0);
- if (IS_ERR(btf))
+ err = libbpf_get_error(btf);
+ pr_debug("loading kernel BTF '%s': %d\n", path, err);
+ if (err)
continue;
return btf;
}
pr_warn("failed to find valid kernel BTF\n");
- return ERR_PTR(-ESRCH);
+ return libbpf_err_ptr(-ESRCH);
}
int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ctx)
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index 5e2809d685bf..5dc6b5172bb3 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -128,7 +128,7 @@ struct btf_dump *btf_dump__new(const struct btf *btf,
d = calloc(1, sizeof(struct btf_dump));
if (!d)
- return ERR_PTR(-ENOMEM);
+ return libbpf_err_ptr(-ENOMEM);
d->btf = btf;
d->btf_ext = btf_ext;
@@ -156,7 +156,7 @@ struct btf_dump *btf_dump__new(const struct btf *btf,
return d;
err:
btf_dump__free(d);
- return ERR_PTR(err);
+ return libbpf_err_ptr(err);
}
static int btf_dump_resize(struct btf_dump *d)
@@ -236,16 +236,16 @@ int btf_dump__dump_type(struct btf_dump *d, __u32 id)
int err, i;
if (id > btf__get_nr_types(d->btf))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
err = btf_dump_resize(d);
if (err)
- return err;
+ return libbpf_err(err);
d->emit_queue_cnt = 0;
err = btf_dump_order_type(d, id, false);
if (err < 0)
- return err;
+ return libbpf_err(err);
for (i = 0; i < d->emit_queue_cnt; i++)
btf_dump_emit_type(d, d->emit_queue[i], 0 /*top-level*/);
@@ -1075,11 +1075,11 @@ int btf_dump__emit_type_decl(struct btf_dump *d, __u32 id,
int lvl, err;
if (!OPTS_VALID(opts, btf_dump_emit_type_decl_opts))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
err = btf_dump_resize(d);
if (err)
- return -EINVAL;
+ return libbpf_err(err);
fname = OPTS_GET(opts, field_name, "");
lvl = OPTS_GET(opts, indent_level, 0);
diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c
new file mode 100644
index 000000000000..8df718a6b142
--- /dev/null
+++ b/tools/lib/bpf/gen_loader.c
@@ -0,0 +1,729 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (c) 2021 Facebook */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <linux/filter.h>
+#include "btf.h"
+#include "bpf.h"
+#include "libbpf.h"
+#include "libbpf_internal.h"
+#include "hashmap.h"
+#include "bpf_gen_internal.h"
+#include "skel_internal.h"
+
+#define MAX_USED_MAPS 64
+#define MAX_USED_PROGS 32
+
+/* The following structure describes the stack layout of the loader program.
+ * In addition R6 contains the pointer to context.
+ * R7 contains the result of the last sys_bpf command (typically error or FD).
+ * R9 contains the result of the last sys_close command.
+ *
+ * Naming convention:
+ * ctx - bpf program context
+ * stack - bpf program stack
+ * blob - bpf_attr-s, strings, insns, map data.
+ * All the bytes that loader prog will use for read/write.
+ */
+struct loader_stack {
+ __u32 btf_fd;
+ __u32 map_fd[MAX_USED_MAPS];
+ __u32 prog_fd[MAX_USED_PROGS];
+ __u32 inner_map_fd;
+};
+
+#define stack_off(field) \
+ (__s16)(-sizeof(struct loader_stack) + offsetof(struct loader_stack, field))
+
+#define attr_field(attr, field) (attr + offsetof(union bpf_attr, field))
+
+static int realloc_insn_buf(struct bpf_gen *gen, __u32 size)
+{
+ size_t off = gen->insn_cur - gen->insn_start;
+ void *insn_start;
+
+ if (gen->error)
+ return gen->error;
+ if (size > INT32_MAX || off + size > INT32_MAX) {
+ gen->error = -ERANGE;
+ return -ERANGE;
+ }
+ insn_start = realloc(gen->insn_start, off + size);
+ if (!insn_start) {
+ gen->error = -ENOMEM;
+ free(gen->insn_start);
+ gen->insn_start = NULL;
+ return -ENOMEM;
+ }
+ gen->insn_start = insn_start;
+ gen->insn_cur = insn_start + off;
+ return 0;
+}
+
+static int realloc_data_buf(struct bpf_gen *gen, __u32 size)
+{
+ size_t off = gen->data_cur - gen->data_start;
+ void *data_start;
+
+ if (gen->error)
+ return gen->error;
+ if (size > INT32_MAX || off + size > INT32_MAX) {
+ gen->error = -ERANGE;
+ return -ERANGE;
+ }
+ data_start = realloc(gen->data_start, off + size);
+ if (!data_start) {
+ gen->error = -ENOMEM;
+ free(gen->data_start);
+ gen->data_start = NULL;
+ return -ENOMEM;
+ }
+ gen->data_start = data_start;
+ gen->data_cur = data_start + off;
+ return 0;
+}
+
+static void emit(struct bpf_gen *gen, struct bpf_insn insn)
+{
+ if (realloc_insn_buf(gen, sizeof(insn)))
+ return;
+ memcpy(gen->insn_cur, &insn, sizeof(insn));
+ gen->insn_cur += sizeof(insn);
+}
+
+static void emit2(struct bpf_gen *gen, struct bpf_insn insn1, struct bpf_insn insn2)
+{
+ emit(gen, insn1);
+ emit(gen, insn2);
+}
+
+void bpf_gen__init(struct bpf_gen *gen, int log_level)
+{
+ size_t stack_sz = sizeof(struct loader_stack);
+ int i;
+
+ gen->log_level = log_level;
+ /* save ctx pointer into R6 */
+ emit(gen, BPF_MOV64_REG(BPF_REG_6, BPF_REG_1));
+
+ /* bzero stack */
+ emit(gen, BPF_MOV64_REG(BPF_REG_1, BPF_REG_10));
+ emit(gen, BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -stack_sz));
+ emit(gen, BPF_MOV64_IMM(BPF_REG_2, stack_sz));
+ emit(gen, BPF_MOV64_IMM(BPF_REG_3, 0));
+ emit(gen, BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel));
+
+ /* jump over cleanup code */
+ emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0,
+ /* size of cleanup code below */
+ (stack_sz / 4) * 3 + 2));
+
+ /* remember the label where all error branches will jump to */
+ gen->cleanup_label = gen->insn_cur - gen->insn_start;
+ /* emit cleanup code: close all temp FDs */
+ for (i = 0; i < stack_sz; i += 4) {
+ emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_10, -stack_sz + i));
+ emit(gen, BPF_JMP_IMM(BPF_JSLE, BPF_REG_1, 0, 1));
+ emit(gen, BPF_EMIT_CALL(BPF_FUNC_sys_close));
+ }
+ /* R7 contains the error code from sys_bpf. Copy it into R0 and exit. */
+ emit(gen, BPF_MOV64_REG(BPF_REG_0, BPF_REG_7));
+ emit(gen, BPF_EXIT_INSN());
+}
+
+static int add_data(struct bpf_gen *gen, const void *data, __u32 size)
+{
+ void *prev;
+
+ if (realloc_data_buf(gen, size))
+ return 0;
+ prev = gen->data_cur;
+ memcpy(gen->data_cur, data, size);
+ gen->data_cur += size;
+ return prev - gen->data_start;
+}
+
+static int insn_bytes_to_bpf_size(__u32 sz)
+{
+ switch (sz) {
+ case 8: return BPF_DW;
+ case 4: return BPF_W;
+ case 2: return BPF_H;
+ case 1: return BPF_B;
+ default: return -1;
+ }
+}
+
+/* *(u64 *)(blob + off) = (u64)(void *)(blob + data) */
+static void emit_rel_store(struct bpf_gen *gen, int off, int data)
+{
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE,
+ 0, 0, 0, data));
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE,
+ 0, 0, 0, off));
+ emit(gen, BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0));
+}
+
+/* *(u64 *)(blob + off) = (u64)(void *)(%sp + stack_off) */
+static void emit_rel_store_sp(struct bpf_gen *gen, int off, int stack_off)
+{
+ emit(gen, BPF_MOV64_REG(BPF_REG_0, BPF_REG_10));
+ emit(gen, BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, stack_off));
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE,
+ 0, 0, 0, off));
+ emit(gen, BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0));
+}
+
+static void move_ctx2blob(struct bpf_gen *gen, int off, int size, int ctx_off,
+ bool check_non_zero)
+{
+ emit(gen, BPF_LDX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_0, BPF_REG_6, ctx_off));
+ if (check_non_zero)
+ /* If value in ctx is zero don't update the blob.
+ * For example: when ctx->map.max_entries == 0, keep default max_entries from bpf.c
+ */
+ emit(gen, BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3));
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE,
+ 0, 0, 0, off));
+ emit(gen, BPF_STX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_1, BPF_REG_0, 0));
+}
+
+static void move_stack2blob(struct bpf_gen *gen, int off, int size, int stack_off)
+{
+ emit(gen, BPF_LDX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_0, BPF_REG_10, stack_off));
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE,
+ 0, 0, 0, off));
+ emit(gen, BPF_STX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_1, BPF_REG_0, 0));
+}
+
+static void move_stack2ctx(struct bpf_gen *gen, int ctx_off, int size, int stack_off)
+{
+ emit(gen, BPF_LDX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_0, BPF_REG_10, stack_off));
+ emit(gen, BPF_STX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_6, BPF_REG_0, ctx_off));
+}
+
+static void emit_sys_bpf(struct bpf_gen *gen, int cmd, int attr, int attr_size)
+{
+ emit(gen, BPF_MOV64_IMM(BPF_REG_1, cmd));
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_2, BPF_PSEUDO_MAP_IDX_VALUE,
+ 0, 0, 0, attr));
+ emit(gen, BPF_MOV64_IMM(BPF_REG_3, attr_size));
+ emit(gen, BPF_EMIT_CALL(BPF_FUNC_sys_bpf));
+ /* remember the result in R7 */
+ emit(gen, BPF_MOV64_REG(BPF_REG_7, BPF_REG_0));
+}
+
+static bool is_simm16(__s64 value)
+{
+ return value == (__s64)(__s16)value;
+}
+
+static void emit_check_err(struct bpf_gen *gen)
+{
+ __s64 off = -(gen->insn_cur - gen->insn_start - gen->cleanup_label) / 8 - 1;
+
+ /* R7 contains result of last sys_bpf command.
+ * if (R7 < 0) goto cleanup;
+ */
+ if (is_simm16(off)) {
+ emit(gen, BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0, off));
+ } else {
+ gen->error = -ERANGE;
+ emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, -1));
+ }
+}
+
+/* reg1 and reg2 should not be R1 - R5. They can be R0, R6 - R10 */
+static void emit_debug(struct bpf_gen *gen, int reg1, int reg2,
+ const char *fmt, va_list args)
+{
+ char buf[1024];
+ int addr, len, ret;
+
+ if (!gen->log_level)
+ return;
+ ret = vsnprintf(buf, sizeof(buf), fmt, args);
+ if (ret < 1024 - 7 && reg1 >= 0 && reg2 < 0)
+ /* The special case to accommodate common debug_ret():
+ * to avoid specifying BPF_REG_7 and adding " r=%%d" to
+ * prints explicitly.
+ */
+ strcat(buf, " r=%d");
+ len = strlen(buf) + 1;
+ addr = add_data(gen, buf, len);
+
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE,
+ 0, 0, 0, addr));
+ emit(gen, BPF_MOV64_IMM(BPF_REG_2, len));
+ if (reg1 >= 0)
+ emit(gen, BPF_MOV64_REG(BPF_REG_3, reg1));
+ if (reg2 >= 0)
+ emit(gen, BPF_MOV64_REG(BPF_REG_4, reg2));
+ emit(gen, BPF_EMIT_CALL(BPF_FUNC_trace_printk));
+}
+
+static void debug_regs(struct bpf_gen *gen, int reg1, int reg2, const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ emit_debug(gen, reg1, reg2, fmt, args);
+ va_end(args);
+}
+
+static void debug_ret(struct bpf_gen *gen, const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ emit_debug(gen, BPF_REG_7, -1, fmt, args);
+ va_end(args);
+}
+
+static void __emit_sys_close(struct bpf_gen *gen)
+{
+ emit(gen, BPF_JMP_IMM(BPF_JSLE, BPF_REG_1, 0,
+ /* 2 is the number of the following insns
+ * * 6 is additional insns in debug_regs
+ */
+ 2 + (gen->log_level ? 6 : 0)));
+ emit(gen, BPF_MOV64_REG(BPF_REG_9, BPF_REG_1));
+ emit(gen, BPF_EMIT_CALL(BPF_FUNC_sys_close));
+ debug_regs(gen, BPF_REG_9, BPF_REG_0, "close(%%d) = %%d");
+}
+
+static void emit_sys_close_stack(struct bpf_gen *gen, int stack_off)
+{
+ emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_10, stack_off));
+ __emit_sys_close(gen);
+}
+
+static void emit_sys_close_blob(struct bpf_gen *gen, int blob_off)
+{
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE,
+ 0, 0, 0, blob_off));
+ emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0));
+ __emit_sys_close(gen);
+}
+
+int bpf_gen__finish(struct bpf_gen *gen)
+{
+ int i;
+
+ emit_sys_close_stack(gen, stack_off(btf_fd));
+ for (i = 0; i < gen->nr_progs; i++)
+ move_stack2ctx(gen,
+ sizeof(struct bpf_loader_ctx) +
+ sizeof(struct bpf_map_desc) * gen->nr_maps +
+ sizeof(struct bpf_prog_desc) * i +
+ offsetof(struct bpf_prog_desc, prog_fd), 4,
+ stack_off(prog_fd[i]));
+ for (i = 0; i < gen->nr_maps; i++)
+ move_stack2ctx(gen,
+ sizeof(struct bpf_loader_ctx) +
+ sizeof(struct bpf_map_desc) * i +
+ offsetof(struct bpf_map_desc, map_fd), 4,
+ stack_off(map_fd[i]));
+ emit(gen, BPF_MOV64_IMM(BPF_REG_0, 0));
+ emit(gen, BPF_EXIT_INSN());
+ pr_debug("gen: finish %d\n", gen->error);
+ if (!gen->error) {
+ struct gen_loader_opts *opts = gen->opts;
+
+ opts->insns = gen->insn_start;
+ opts->insns_sz = gen->insn_cur - gen->insn_start;
+ opts->data = gen->data_start;
+ opts->data_sz = gen->data_cur - gen->data_start;
+ }
+ return gen->error;
+}
+
+void bpf_gen__free(struct bpf_gen *gen)
+{
+ if (!gen)
+ return;
+ free(gen->data_start);
+ free(gen->insn_start);
+ free(gen);
+}
+
+void bpf_gen__load_btf(struct bpf_gen *gen, const void *btf_raw_data,
+ __u32 btf_raw_size)
+{
+ int attr_size = offsetofend(union bpf_attr, btf_log_level);
+ int btf_data, btf_load_attr;
+ union bpf_attr attr;
+
+ memset(&attr, 0, attr_size);
+ pr_debug("gen: load_btf: size %d\n", btf_raw_size);
+ btf_data = add_data(gen, btf_raw_data, btf_raw_size);
+
+ attr.btf_size = btf_raw_size;
+ btf_load_attr = add_data(gen, &attr, attr_size);
+
+ /* populate union bpf_attr with user provided log details */
+ move_ctx2blob(gen, attr_field(btf_load_attr, btf_log_level), 4,
+ offsetof(struct bpf_loader_ctx, log_level), false);
+ move_ctx2blob(gen, attr_field(btf_load_attr, btf_log_size), 4,
+ offsetof(struct bpf_loader_ctx, log_size), false);
+ move_ctx2blob(gen, attr_field(btf_load_attr, btf_log_buf), 8,
+ offsetof(struct bpf_loader_ctx, log_buf), false);
+ /* populate union bpf_attr with a pointer to the BTF data */
+ emit_rel_store(gen, attr_field(btf_load_attr, btf), btf_data);
+ /* emit BTF_LOAD command */
+ emit_sys_bpf(gen, BPF_BTF_LOAD, btf_load_attr, attr_size);
+ debug_ret(gen, "btf_load size %d", btf_raw_size);
+ emit_check_err(gen);
+ /* remember btf_fd in the stack, if successful */
+ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7, stack_off(btf_fd)));
+}
+
+void bpf_gen__map_create(struct bpf_gen *gen,
+ struct bpf_create_map_attr *map_attr, int map_idx)
+{
+ int attr_size = offsetofend(union bpf_attr, btf_vmlinux_value_type_id);
+ bool close_inner_map_fd = false;
+ int map_create_attr;
+ union bpf_attr attr;
+
+ memset(&attr, 0, attr_size);
+ attr.map_type = map_attr->map_type;
+ attr.key_size = map_attr->key_size;
+ attr.value_size = map_attr->value_size;
+ attr.map_flags = map_attr->map_flags;
+ memcpy(attr.map_name, map_attr->name,
+ min((unsigned)strlen(map_attr->name), BPF_OBJ_NAME_LEN - 1));
+ attr.numa_node = map_attr->numa_node;
+ attr.map_ifindex = map_attr->map_ifindex;
+ attr.max_entries = map_attr->max_entries;
+ switch (attr.map_type) {
+ case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
+ case BPF_MAP_TYPE_CGROUP_ARRAY:
+ case BPF_MAP_TYPE_STACK_TRACE:
+ case BPF_MAP_TYPE_ARRAY_OF_MAPS:
+ case BPF_MAP_TYPE_HASH_OF_MAPS:
+ case BPF_MAP_TYPE_DEVMAP:
+ case BPF_MAP_TYPE_DEVMAP_HASH:
+ case BPF_MAP_TYPE_CPUMAP:
+ case BPF_MAP_TYPE_XSKMAP:
+ case BPF_MAP_TYPE_SOCKMAP:
+ case BPF_MAP_TYPE_SOCKHASH:
+ case BPF_MAP_TYPE_QUEUE:
+ case BPF_MAP_TYPE_STACK:
+ case BPF_MAP_TYPE_RINGBUF:
+ break;
+ default:
+ attr.btf_key_type_id = map_attr->btf_key_type_id;
+ attr.btf_value_type_id = map_attr->btf_value_type_id;
+ }
+
+ pr_debug("gen: map_create: %s idx %d type %d value_type_id %d\n",
+ attr.map_name, map_idx, map_attr->map_type, attr.btf_value_type_id);
+
+ map_create_attr = add_data(gen, &attr, attr_size);
+ if (attr.btf_value_type_id)
+ /* populate union bpf_attr with btf_fd saved in the stack earlier */
+ move_stack2blob(gen, attr_field(map_create_attr, btf_fd), 4,
+ stack_off(btf_fd));
+ switch (attr.map_type) {
+ case BPF_MAP_TYPE_ARRAY_OF_MAPS:
+ case BPF_MAP_TYPE_HASH_OF_MAPS:
+ move_stack2blob(gen, attr_field(map_create_attr, inner_map_fd), 4,
+ stack_off(inner_map_fd));
+ close_inner_map_fd = true;
+ break;
+ default:
+ break;
+ }
+ /* conditionally update max_entries */
+ if (map_idx >= 0)
+ move_ctx2blob(gen, attr_field(map_create_attr, max_entries), 4,
+ sizeof(struct bpf_loader_ctx) +
+ sizeof(struct bpf_map_desc) * map_idx +
+ offsetof(struct bpf_map_desc, max_entries),
+ true /* check that max_entries != 0 */);
+ /* emit MAP_CREATE command */
+ emit_sys_bpf(gen, BPF_MAP_CREATE, map_create_attr, attr_size);
+ debug_ret(gen, "map_create %s idx %d type %d value_size %d value_btf_id %d",
+ attr.map_name, map_idx, map_attr->map_type, attr.value_size,
+ attr.btf_value_type_id);
+ emit_check_err(gen);
+ /* remember map_fd in the stack, if successful */
+ if (map_idx < 0) {
+ /* This bpf_gen__map_create() function is called with map_idx >= 0
+ * for all maps that libbpf loading logic tracks.
+ * It's called with -1 to create an inner map.
+ */
+ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7,
+ stack_off(inner_map_fd)));
+ } else if (map_idx != gen->nr_maps) {
+ gen->error = -EDOM; /* internal bug */
+ return;
+ } else {
+ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7,
+ stack_off(map_fd[map_idx])));
+ gen->nr_maps++;
+ }
+ if (close_inner_map_fd)
+ emit_sys_close_stack(gen, stack_off(inner_map_fd));
+}
+
+void bpf_gen__record_attach_target(struct bpf_gen *gen, const char *attach_name,
+ enum bpf_attach_type type)
+{
+ const char *prefix;
+ int kind, ret;
+
+ btf_get_kernel_prefix_kind(type, &prefix, &kind);
+ gen->attach_kind = kind;
+ ret = snprintf(gen->attach_target, sizeof(gen->attach_target), "%s%s",
+ prefix, attach_name);
+ if (ret == sizeof(gen->attach_target))
+ gen->error = -ENOSPC;
+}
+
+static void emit_find_attach_target(struct bpf_gen *gen)
+{
+ int name, len = strlen(gen->attach_target) + 1;
+
+ pr_debug("gen: find_attach_tgt %s %d\n", gen->attach_target, gen->attach_kind);
+ name = add_data(gen, gen->attach_target, len);
+
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE,
+ 0, 0, 0, name));
+ emit(gen, BPF_MOV64_IMM(BPF_REG_2, len));
+ emit(gen, BPF_MOV64_IMM(BPF_REG_3, gen->attach_kind));
+ emit(gen, BPF_MOV64_IMM(BPF_REG_4, 0));
+ emit(gen, BPF_EMIT_CALL(BPF_FUNC_btf_find_by_name_kind));
+ emit(gen, BPF_MOV64_REG(BPF_REG_7, BPF_REG_0));
+ debug_ret(gen, "find_by_name_kind(%s,%d)",
+ gen->attach_target, gen->attach_kind);
+ emit_check_err(gen);
+ /* if successful, btf_id is in lower 32-bit of R7 and
+ * btf_obj_fd is in upper 32-bit
+ */
+}
+
+void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, int kind,
+ int insn_idx)
+{
+ struct ksym_relo_desc *relo;
+
+ relo = libbpf_reallocarray(gen->relos, gen->relo_cnt + 1, sizeof(*relo));
+ if (!relo) {
+ gen->error = -ENOMEM;
+ return;
+ }
+ gen->relos = relo;
+ relo += gen->relo_cnt;
+ relo->name = name;
+ relo->kind = kind;
+ relo->insn_idx = insn_idx;
+ gen->relo_cnt++;
+}
+
+static void emit_relo(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insns)
+{
+ int name, insn, len = strlen(relo->name) + 1;
+
+ pr_debug("gen: emit_relo: %s at %d\n", relo->name, relo->insn_idx);
+ name = add_data(gen, relo->name, len);
+
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE,
+ 0, 0, 0, name));
+ emit(gen, BPF_MOV64_IMM(BPF_REG_2, len));
+ emit(gen, BPF_MOV64_IMM(BPF_REG_3, relo->kind));
+ emit(gen, BPF_MOV64_IMM(BPF_REG_4, 0));
+ emit(gen, BPF_EMIT_CALL(BPF_FUNC_btf_find_by_name_kind));
+ emit(gen, BPF_MOV64_REG(BPF_REG_7, BPF_REG_0));
+ debug_ret(gen, "find_by_name_kind(%s,%d)", relo->name, relo->kind);
+ emit_check_err(gen);
+ /* store btf_id into insn[insn_idx].imm */
+ insn = insns + sizeof(struct bpf_insn) * relo->insn_idx +
+ offsetof(struct bpf_insn, imm);
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE,
+ 0, 0, 0, insn));
+ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, 0));
+ if (relo->kind == BTF_KIND_VAR) {
+ /* store btf_obj_fd into insn[insn_idx + 1].imm */
+ emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_7, 32));
+ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_7,
+ sizeof(struct bpf_insn)));
+ }
+}
+
+static void emit_relos(struct bpf_gen *gen, int insns)
+{
+ int i;
+
+ for (i = 0; i < gen->relo_cnt; i++)
+ emit_relo(gen, gen->relos + i, insns);
+}
+
+static void cleanup_relos(struct bpf_gen *gen, int insns)
+{
+ int i, insn;
+
+ for (i = 0; i < gen->relo_cnt; i++) {
+ if (gen->relos[i].kind != BTF_KIND_VAR)
+ continue;
+ /* close fd recorded in insn[insn_idx + 1].imm */
+ insn = insns +
+ sizeof(struct bpf_insn) * (gen->relos[i].insn_idx + 1) +
+ offsetof(struct bpf_insn, imm);
+ emit_sys_close_blob(gen, insn);
+ }
+ if (gen->relo_cnt) {
+ free(gen->relos);
+ gen->relo_cnt = 0;
+ gen->relos = NULL;
+ }
+}
+
+void bpf_gen__prog_load(struct bpf_gen *gen,
+ struct bpf_prog_load_params *load_attr, int prog_idx)
+{
+ int attr_size = offsetofend(union bpf_attr, fd_array);
+ int prog_load_attr, license, insns, func_info, line_info;
+ union bpf_attr attr;
+
+ memset(&attr, 0, attr_size);
+ pr_debug("gen: prog_load: type %d insns_cnt %zd\n",
+ load_attr->prog_type, load_attr->insn_cnt);
+ /* add license string to blob of bytes */
+ license = add_data(gen, load_attr->license, strlen(load_attr->license) + 1);
+ /* add insns to blob of bytes */
+ insns = add_data(gen, load_attr->insns,
+ load_attr->insn_cnt * sizeof(struct bpf_insn));
+
+ attr.prog_type = load_attr->prog_type;
+ attr.expected_attach_type = load_attr->expected_attach_type;
+ attr.attach_btf_id = load_attr->attach_btf_id;
+ attr.prog_ifindex = load_attr->prog_ifindex;
+ attr.kern_version = 0;
+ attr.insn_cnt = (__u32)load_attr->insn_cnt;
+ attr.prog_flags = load_attr->prog_flags;
+
+ attr.func_info_rec_size = load_attr->func_info_rec_size;
+ attr.func_info_cnt = load_attr->func_info_cnt;
+ func_info = add_data(gen, load_attr->func_info,
+ attr.func_info_cnt * attr.func_info_rec_size);
+
+ attr.line_info_rec_size = load_attr->line_info_rec_size;
+ attr.line_info_cnt = load_attr->line_info_cnt;
+ line_info = add_data(gen, load_attr->line_info,
+ attr.line_info_cnt * attr.line_info_rec_size);
+
+ memcpy(attr.prog_name, load_attr->name,
+ min((unsigned)strlen(load_attr->name), BPF_OBJ_NAME_LEN - 1));
+ prog_load_attr = add_data(gen, &attr, attr_size);
+
+ /* populate union bpf_attr with a pointer to license */
+ emit_rel_store(gen, attr_field(prog_load_attr, license), license);
+
+ /* populate union bpf_attr with a pointer to instructions */
+ emit_rel_store(gen, attr_field(prog_load_attr, insns), insns);
+
+ /* populate union bpf_attr with a pointer to func_info */
+ emit_rel_store(gen, attr_field(prog_load_attr, func_info), func_info);
+
+ /* populate union bpf_attr with a pointer to line_info */
+ emit_rel_store(gen, attr_field(prog_load_attr, line_info), line_info);
+
+ /* populate union bpf_attr fd_array with a pointer to stack where map_fds are saved */
+ emit_rel_store_sp(gen, attr_field(prog_load_attr, fd_array),
+ stack_off(map_fd[0]));
+
+ /* populate union bpf_attr with user provided log details */
+ move_ctx2blob(gen, attr_field(prog_load_attr, log_level), 4,
+ offsetof(struct bpf_loader_ctx, log_level), false);
+ move_ctx2blob(gen, attr_field(prog_load_attr, log_size), 4,
+ offsetof(struct bpf_loader_ctx, log_size), false);
+ move_ctx2blob(gen, attr_field(prog_load_attr, log_buf), 8,
+ offsetof(struct bpf_loader_ctx, log_buf), false);
+ /* populate union bpf_attr with btf_fd saved in the stack earlier */
+ move_stack2blob(gen, attr_field(prog_load_attr, prog_btf_fd), 4,
+ stack_off(btf_fd));
+ if (gen->attach_kind) {
+ emit_find_attach_target(gen);
+ /* populate union bpf_attr with btf_id and btf_obj_fd found by helper */
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE,
+ 0, 0, 0, prog_load_attr));
+ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_7,
+ offsetof(union bpf_attr, attach_btf_id)));
+ emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_7, 32));
+ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_7,
+ offsetof(union bpf_attr, attach_btf_obj_fd)));
+ }
+ emit_relos(gen, insns);
+ /* emit PROG_LOAD command */
+ emit_sys_bpf(gen, BPF_PROG_LOAD, prog_load_attr, attr_size);
+ debug_ret(gen, "prog_load %s insn_cnt %d", attr.prog_name, attr.insn_cnt);
+ /* successful or not, close btf module FDs used in extern ksyms and attach_btf_obj_fd */
+ cleanup_relos(gen, insns);
+ if (gen->attach_kind)
+ emit_sys_close_blob(gen,
+ attr_field(prog_load_attr, attach_btf_obj_fd));
+ emit_check_err(gen);
+ /* remember prog_fd in the stack, if successful */
+ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7,
+ stack_off(prog_fd[gen->nr_progs])));
+ gen->nr_progs++;
+}
+
+void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *pvalue,
+ __u32 value_size)
+{
+ int attr_size = offsetofend(union bpf_attr, flags);
+ int map_update_attr, value, key;
+ union bpf_attr attr;
+ int zero = 0;
+
+ memset(&attr, 0, attr_size);
+ pr_debug("gen: map_update_elem: idx %d\n", map_idx);
+
+ value = add_data(gen, pvalue, value_size);
+ key = add_data(gen, &zero, sizeof(zero));
+
+ /* if (map_desc[map_idx].initial_value)
+ * copy_from_user(value, initial_value, value_size);
+ */
+ emit(gen, BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_6,
+ sizeof(struct bpf_loader_ctx) +
+ sizeof(struct bpf_map_desc) * map_idx +
+ offsetof(struct bpf_map_desc, initial_value)));
+ emit(gen, BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0, 4));
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE,
+ 0, 0, 0, value));
+ emit(gen, BPF_MOV64_IMM(BPF_REG_2, value_size));
+ emit(gen, BPF_EMIT_CALL(BPF_FUNC_copy_from_user));
+
+ map_update_attr = add_data(gen, &attr, attr_size);
+ move_stack2blob(gen, attr_field(map_update_attr, map_fd), 4,
+ stack_off(map_fd[map_idx]));
+ emit_rel_store(gen, attr_field(map_update_attr, key), key);
+ emit_rel_store(gen, attr_field(map_update_attr, value), value);
+ /* emit MAP_UPDATE_ELEM command */
+ emit_sys_bpf(gen, BPF_MAP_UPDATE_ELEM, map_update_attr, attr_size);
+ debug_ret(gen, "update_elem idx %d value_size %d", map_idx, value_size);
+ emit_check_err(gen);
+}
+
+void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx)
+{
+ int attr_size = offsetofend(union bpf_attr, map_fd);
+ int map_freeze_attr;
+ union bpf_attr attr;
+
+ memset(&attr, 0, attr_size);
+ pr_debug("gen: map_freeze: idx %d\n", map_idx);
+ map_freeze_attr = add_data(gen, &attr, attr_size);
+ move_stack2blob(gen, attr_field(map_freeze_attr, map_fd), 4,
+ stack_off(map_fd[map_idx]));
+ /* emit MAP_FREEZE command */
+ emit_sys_bpf(gen, BPF_MAP_FREEZE, map_freeze_attr, attr_size);
+ debug_ret(gen, "map_freeze");
+ emit_check_err(gen);
+}
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index e2a3cf437814..1e04ce724240 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -54,6 +54,7 @@
#include "str_error.h"
#include "libbpf_internal.h"
#include "hashmap.h"
+#include "bpf_gen_internal.h"
#ifndef BPF_FS_MAGIC
#define BPF_FS_MAGIC 0xcafe4a11
@@ -150,6 +151,23 @@ static inline __u64 ptr_to_u64(const void *ptr)
return (__u64) (unsigned long) ptr;
}
+/* this goes away in libbpf 1.0 */
+enum libbpf_strict_mode libbpf_mode = LIBBPF_STRICT_NONE;
+
+int libbpf_set_strict_mode(enum libbpf_strict_mode mode)
+{
+ /* __LIBBPF_STRICT_LAST is the last power-of-2 value used + 1, so to
+ * get all possible values we compensate last +1, and then (2*x - 1)
+ * to get the bit mask
+ */
+ if (mode != LIBBPF_STRICT_ALL
+ && (mode & ~((__LIBBPF_STRICT_LAST - 1) * 2 - 1)))
+ return errno = EINVAL, -EINVAL;
+
+ libbpf_mode = mode;
+ return 0;
+}
+
enum kern_feature_id {
/* v4.14: kernel support for program & map names. */
FEAT_PROG_NAME,
@@ -178,7 +196,7 @@ enum kern_feature_id {
__FEAT_CNT,
};
-static bool kernel_supports(enum kern_feature_id feat_id);
+static bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id);
enum reloc_type {
RELO_LD64,
@@ -432,6 +450,8 @@ struct bpf_object {
bool loaded;
bool has_subcalls;
+ struct bpf_gen *gen_loader;
+
/*
* Information when doing elf related work. Only valid if fd
* is valid.
@@ -677,6 +697,11 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
return -LIBBPF_ERRNO__FORMAT;
}
+ if (sec_idx != obj->efile.text_shndx && GELF_ST_BIND(sym.st_info) == STB_LOCAL) {
+ pr_warn("sec '%s': program '%s' is static and not supported\n", sec_name, name);
+ return -ENOTSUP;
+ }
+
pr_debug("sec '%s': found program '%s' at insn offset %zu (%zu bytes), code size %zu insns (%zu bytes)\n",
sec_name, name, sec_off / BPF_INSN_SZ, sec_off, prog_sz / BPF_INSN_SZ, prog_sz);
@@ -700,13 +725,14 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
if (err)
return err;
- /* if function is a global/weak symbol, but has hidden
- * visibility (STV_HIDDEN), mark its BTF FUNC as static to
- * enable more permissive BPF verification mode with more
- * outside context available to BPF verifier
+ /* if function is a global/weak symbol, but has restricted
+ * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF FUNC
+ * as static to enable more permissive BPF verification mode
+ * with more outside context available to BPF verifier
*/
if (GELF_ST_BIND(sym.st_info) != STB_LOCAL
- && GELF_ST_VISIBILITY(sym.st_other) == STV_HIDDEN)
+ && (GELF_ST_VISIBILITY(sym.st_other) == STV_HIDDEN
+ || GELF_ST_VISIBILITY(sym.st_other) == STV_INTERNAL))
prog->mark_btf_static = true;
nr_progs++;
@@ -1794,7 +1820,6 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
if (!symbols)
return -EINVAL;
-
scn = elf_sec_by_idx(obj, obj->efile.maps_shndx);
data = elf_sec_data(obj, scn);
if (!scn || !data) {
@@ -1854,6 +1879,12 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
return -LIBBPF_ERRNO__FORMAT;
}
+ if (GELF_ST_TYPE(sym.st_info) == STT_SECTION
+ || GELF_ST_BIND(sym.st_info) == STB_LOCAL) {
+ pr_warn("map '%s' (legacy): static maps are not supported\n", map_name);
+ return -ENOTSUP;
+ }
+
map->libbpf_type = LIBBPF_MAP_UNSPEC;
map->sec_idx = sym.st_shndx;
map->sec_offset = sym.st_value;
@@ -2261,6 +2292,16 @@ static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def
pr_debug("map '%s': found inner map definition.\n", map->name);
}
+static const char *btf_var_linkage_str(__u32 linkage)
+{
+ switch (linkage) {
+ case BTF_VAR_STATIC: return "static";
+ case BTF_VAR_GLOBAL_ALLOCATED: return "global";
+ case BTF_VAR_GLOBAL_EXTERN: return "extern";
+ default: return "unknown";
+ }
+}
+
static int bpf_object__init_user_btf_map(struct bpf_object *obj,
const struct btf_type *sec,
int var_idx, int sec_idx,
@@ -2293,10 +2334,9 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
map_name, btf_kind_str(var));
return -EINVAL;
}
- if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED &&
- var_extra->linkage != BTF_VAR_STATIC) {
- pr_warn("map '%s': unsupported var linkage %u.\n",
- map_name, var_extra->linkage);
+ if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED) {
+ pr_warn("map '%s': unsupported map linkage %s.\n",
+ map_name, btf_var_linkage_str(var_extra->linkage));
return -EOPNOTSUPP;
}
@@ -2425,10 +2465,8 @@ static int bpf_object__init_maps(struct bpf_object *obj,
err = err ?: bpf_object__init_global_data_maps(obj);
err = err ?: bpf_object__init_kconfig_map(obj);
err = err ?: bpf_object__init_struct_ops_maps(obj);
- if (err)
- return err;
- return 0;
+ return err;
}
static bool section_have_execinstr(struct bpf_object *obj, int idx)
@@ -2443,20 +2481,20 @@ static bool section_have_execinstr(struct bpf_object *obj, int idx)
static bool btf_needs_sanitization(struct bpf_object *obj)
{
- bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC);
- bool has_datasec = kernel_supports(FEAT_BTF_DATASEC);
- bool has_float = kernel_supports(FEAT_BTF_FLOAT);
- bool has_func = kernel_supports(FEAT_BTF_FUNC);
+ bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC);
+ bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC);
+ bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT);
+ bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
return !has_func || !has_datasec || !has_func_global || !has_float;
}
static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
{
- bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC);
- bool has_datasec = kernel_supports(FEAT_BTF_DATASEC);
- bool has_float = kernel_supports(FEAT_BTF_FLOAT);
- bool has_func = kernel_supports(FEAT_BTF_FUNC);
+ bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC);
+ bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC);
+ bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT);
+ bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
struct btf_type *t;
int i, j, vlen;
@@ -2539,16 +2577,14 @@ static int bpf_object__init_btf(struct bpf_object *obj,
if (btf_data) {
obj->btf = btf__new(btf_data->d_buf, btf_data->d_size);
- if (IS_ERR(obj->btf)) {
- err = PTR_ERR(obj->btf);
+ err = libbpf_get_error(obj->btf);
+ if (err) {
obj->btf = NULL;
- pr_warn("Error loading ELF section %s: %d.\n",
- BTF_ELF_SEC, err);
+ pr_warn("Error loading ELF section %s: %d.\n", BTF_ELF_SEC, err);
goto out;
}
/* enforce 8-byte pointers for BPF-targeted BTFs */
btf__set_pointer_size(obj->btf, 8);
- err = 0;
}
if (btf_ext_data) {
if (!obj->btf) {
@@ -2556,11 +2592,11 @@ static int bpf_object__init_btf(struct bpf_object *obj,
BTF_EXT_ELF_SEC, BTF_ELF_SEC);
goto out;
}
- obj->btf_ext = btf_ext__new(btf_ext_data->d_buf,
- btf_ext_data->d_size);
- if (IS_ERR(obj->btf_ext)) {
- pr_warn("Error loading ELF section %s: %ld. Ignored and continue.\n",
- BTF_EXT_ELF_SEC, PTR_ERR(obj->btf_ext));
+ obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size);
+ err = libbpf_get_error(obj->btf_ext);
+ if (err) {
+ pr_warn("Error loading ELF section %s: %d. Ignored and continue.\n",
+ BTF_EXT_ELF_SEC, err);
obj->btf_ext = NULL;
goto out;
}
@@ -2637,15 +2673,15 @@ static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force)
int err;
/* btf_vmlinux could be loaded earlier */
- if (obj->btf_vmlinux)
+ if (obj->btf_vmlinux || obj->gen_loader)
return 0;
if (!force && !obj_needs_vmlinux_btf(obj))
return 0;
obj->btf_vmlinux = libbpf_find_kernel_btf();
- if (IS_ERR(obj->btf_vmlinux)) {
- err = PTR_ERR(obj->btf_vmlinux);
+ err = libbpf_get_error(obj->btf_vmlinux);
+ if (err) {
pr_warn("Error loading vmlinux BTF: %d\n", err);
obj->btf_vmlinux = NULL;
return err;
@@ -2662,7 +2698,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
if (!obj->btf)
return 0;
- if (!kernel_supports(FEAT_BTF)) {
+ if (!kernel_supports(obj, FEAT_BTF)) {
if (kernel_needs_btf(obj)) {
err = -EOPNOTSUPP;
goto report;
@@ -2711,15 +2747,29 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
/* clone BTF to sanitize a copy and leave the original intact */
raw_data = btf__get_raw_data(obj->btf, &sz);
kern_btf = btf__new(raw_data, sz);
- if (IS_ERR(kern_btf))
- return PTR_ERR(kern_btf);
+ err = libbpf_get_error(kern_btf);
+ if (err)
+ return err;
/* enforce 8-byte pointers for BPF-targeted BTFs */
btf__set_pointer_size(obj->btf, 8);
bpf_object__sanitize_btf(obj, kern_btf);
}
- err = btf__load(kern_btf);
+ if (obj->gen_loader) {
+ __u32 raw_size = 0;
+ const void *raw_data = btf__get_raw_data(kern_btf, &raw_size);
+
+ if (!raw_data)
+ return -ENOMEM;
+ bpf_gen__load_btf(obj->gen_loader, raw_data, raw_size);
+ /* Pretend to have valid FD to pass various fd >= 0 checks.
+ * This fd == 0 will not be used with any syscall and will be reset to -1 eventually.
+ */
+ btf__set_fd(kern_btf, 0);
+ } else {
+ err = btf__load(kern_btf);
+ }
if (sanitize) {
if (!err) {
/* move fd to libbpf's BTF */
@@ -3216,6 +3266,9 @@ static int add_dummy_ksym_var(struct btf *btf)
const struct btf_var_secinfo *vs;
const struct btf_type *sec;
+ if (!btf)
+ return 0;
+
sec_btf_id = btf__find_by_name_kind(btf, KSYMS_SEC,
BTF_KIND_DATASEC);
if (sec_btf_id < 0)
@@ -3470,7 +3523,7 @@ bpf_object__find_program_by_title(const struct bpf_object *obj,
if (pos->sec_name && !strcmp(pos->sec_name, title))
return pos;
}
- return NULL;
+ return errno = ENOENT, NULL;
}
static bool prog_is_subprog(const struct bpf_object *obj,
@@ -3503,7 +3556,7 @@ bpf_object__find_program_by_name(const struct bpf_object *obj,
if (!strcmp(prog->name, name))
return prog;
}
- return NULL;
+ return errno = ENOENT, NULL;
}
static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
@@ -3850,11 +3903,11 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd)
err = bpf_obj_get_info_by_fd(fd, &info, &len);
if (err)
- return err;
+ return libbpf_err(err);
new_name = strdup(info.name);
if (!new_name)
- return -errno;
+ return libbpf_err(-errno);
new_fd = open("/", O_RDONLY | O_CLOEXEC);
if (new_fd < 0) {
@@ -3892,7 +3945,7 @@ err_close_new_fd:
close(new_fd);
err_free_new_name:
free(new_name);
- return err;
+ return libbpf_err(err);
}
__u32 bpf_map__max_entries(const struct bpf_map *map)
@@ -3903,7 +3956,7 @@ __u32 bpf_map__max_entries(const struct bpf_map *map)
struct bpf_map *bpf_map__inner_map(struct bpf_map *map)
{
if (!bpf_map_type__is_map_in_map(map->def.type))
- return NULL;
+ return errno = EINVAL, NULL;
return map->inner_map;
}
@@ -3911,7 +3964,7 @@ struct bpf_map *bpf_map__inner_map(struct bpf_map *map)
int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
{
if (map->fd >= 0)
- return -EBUSY;
+ return libbpf_err(-EBUSY);
map->def.max_entries = max_entries;
return 0;
}
@@ -3919,7 +3972,7 @@ int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
int bpf_map__resize(struct bpf_map *map, __u32 max_entries)
{
if (!map || !max_entries)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
return bpf_map__set_max_entries(map, max_entries);
}
@@ -3935,6 +3988,9 @@ bpf_object__probe_loading(struct bpf_object *obj)
};
int ret;
+ if (obj->gen_loader)
+ return 0;
+
/* make sure basic loading works */
memset(&attr, 0, sizeof(attr));
@@ -3945,6 +4001,10 @@ bpf_object__probe_loading(struct bpf_object *obj)
ret = bpf_load_program_xattr(&attr, NULL, 0);
if (ret < 0) {
+ attr.prog_type = BPF_PROG_TYPE_TRACEPOINT;
+ ret = bpf_load_program_xattr(&attr, NULL, 0);
+ }
+ if (ret < 0) {
ret = errno;
cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
pr_warn("Error in %s():%s(%d). Couldn't load trivial BPF "
@@ -4290,11 +4350,17 @@ static struct kern_feature_desc {
},
};
-static bool kernel_supports(enum kern_feature_id feat_id)
+static bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
{
struct kern_feature_desc *feat = &feature_probes[feat_id];
int ret;
+ if (obj->gen_loader)
+ /* To generate loader program assume the latest kernel
+ * to avoid doing extra prog_load, map_create syscalls.
+ */
+ return true;
+
if (READ_ONCE(feat->res) == FEAT_UNKNOWN) {
ret = feat->probe();
if (ret > 0) {
@@ -4377,6 +4443,13 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
char *cp, errmsg[STRERR_BUFSIZE];
int err, zero = 0;
+ if (obj->gen_loader) {
+ bpf_gen__map_update_elem(obj->gen_loader, map - obj->maps,
+ map->mmaped, map->def.value_size);
+ if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG)
+ bpf_gen__map_freeze(obj->gen_loader, map - obj->maps);
+ return 0;
+ }
err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0);
if (err) {
err = -errno;
@@ -4402,14 +4475,14 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
static void bpf_map__destroy(struct bpf_map *map);
-static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map)
+static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner)
{
struct bpf_create_map_attr create_attr;
struct bpf_map_def *def = &map->def;
memset(&create_attr, 0, sizeof(create_attr));
- if (kernel_supports(FEAT_PROG_NAME))
+ if (kernel_supports(obj, FEAT_PROG_NAME))
create_attr.name = map->name;
create_attr.map_ifindex = map->map_ifindex;
create_attr.map_type = def->type;
@@ -4450,7 +4523,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map)
if (map->inner_map) {
int err;
- err = bpf_object__create_map(obj, map->inner_map);
+ err = bpf_object__create_map(obj, map->inner_map, true);
if (err) {
pr_warn("map '%s': failed to create inner map: %d\n",
map->name, err);
@@ -4462,7 +4535,15 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map)
create_attr.inner_map_fd = map->inner_map_fd;
}
- map->fd = bpf_create_map_xattr(&create_attr);
+ if (obj->gen_loader) {
+ bpf_gen__map_create(obj->gen_loader, &create_attr, is_inner ? -1 : map - obj->maps);
+ /* Pretend to have valid FD to pass various fd >= 0 checks.
+ * This fd == 0 will not be used with any syscall and will be reset to -1 eventually.
+ */
+ map->fd = 0;
+ } else {
+ map->fd = bpf_create_map_xattr(&create_attr);
+ }
if (map->fd < 0 && (create_attr.btf_key_type_id ||
create_attr.btf_value_type_id)) {
char *cp, errmsg[STRERR_BUFSIZE];
@@ -4483,6 +4564,8 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map)
return -errno;
if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) {
+ if (obj->gen_loader)
+ map->inner_map->fd = -1;
bpf_map__destroy(map->inner_map);
zfree(&map->inner_map);
}
@@ -4490,11 +4573,11 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map)
return 0;
}
-static int init_map_slots(struct bpf_map *map)
+static int init_map_slots(struct bpf_object *obj, struct bpf_map *map)
{
const struct bpf_map *targ_map;
unsigned int i;
- int fd, err;
+ int fd, err = 0;
for (i = 0; i < map->init_slots_sz; i++) {
if (!map->init_slots[i])
@@ -4502,7 +4585,13 @@ static int init_map_slots(struct bpf_map *map)
targ_map = map->init_slots[i];
fd = bpf_map__fd(targ_map);
- err = bpf_map_update_elem(map->fd, &i, &fd, 0);
+ if (obj->gen_loader) {
+ pr_warn("// TODO map_update_elem: idx %td key %d value==map_idx %td\n",
+ map - obj->maps, i, targ_map - obj->maps);
+ return -ENOTSUP;
+ } else {
+ err = bpf_map_update_elem(map->fd, &i, &fd, 0);
+ }
if (err) {
err = -errno;
pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n",
@@ -4544,7 +4633,7 @@ bpf_object__create_maps(struct bpf_object *obj)
pr_debug("map '%s': skipping creation (preset fd=%d)\n",
map->name, map->fd);
} else {
- err = bpf_object__create_map(obj, map);
+ err = bpf_object__create_map(obj, map, false);
if (err)
goto err_out;
@@ -4560,7 +4649,7 @@ bpf_object__create_maps(struct bpf_object *obj)
}
if (map->init_slots_sz) {
- err = init_map_slots(map);
+ err = init_map_slots(obj, map);
if (err < 0) {
zclose(map->fd);
goto err_out;
@@ -4970,11 +5059,14 @@ static int load_module_btfs(struct bpf_object *obj)
if (obj->btf_modules_loaded)
return 0;
+ if (obj->gen_loader)
+ return 0;
+
/* don't do this again, even if we find no module BTFs */
obj->btf_modules_loaded = true;
/* kernel too old to support module BTFs */
- if (!kernel_supports(FEAT_MODULE_BTF))
+ if (!kernel_supports(obj, FEAT_MODULE_BTF))
return 0;
while (true) {
@@ -5015,10 +5107,10 @@ static int load_module_btfs(struct bpf_object *obj)
}
btf = btf_get_from_fd(fd, obj->btf_vmlinux);
- if (IS_ERR(btf)) {
- pr_warn("failed to load module [%s]'s BTF object #%d: %ld\n",
- name, id, PTR_ERR(btf));
- err = PTR_ERR(btf);
+ err = libbpf_get_error(btf);
+ if (err) {
+ pr_warn("failed to load module [%s]'s BTF object #%d: %d\n",
+ name, id, err);
goto err_out;
}
@@ -6117,6 +6209,12 @@ static int bpf_core_apply_relo(struct bpf_program *prog,
if (str_is_empty(spec_str))
return -EINVAL;
+ if (prog->obj->gen_loader) {
+ pr_warn("// TODO core_relo: prog %td insn[%d] %s %s kind %d\n",
+ prog - prog->obj->programs, relo->insn_off / 8,
+ local_name, spec_str, relo->kind);
+ return -ENOTSUP;
+ }
err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec);
if (err) {
pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n",
@@ -6272,8 +6370,8 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
if (targ_btf_path) {
obj->btf_vmlinux_override = btf__parse(targ_btf_path, NULL);
- if (IS_ERR_OR_NULL(obj->btf_vmlinux_override)) {
- err = PTR_ERR(obj->btf_vmlinux_override);
+ err = libbpf_get_error(obj->btf_vmlinux_override);
+ if (err) {
pr_warn("failed to parse target BTF: %d\n", err);
return err;
}
@@ -6368,19 +6466,34 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
switch (relo->type) {
case RELO_LD64:
- insn[0].src_reg = BPF_PSEUDO_MAP_FD;
- insn[0].imm = obj->maps[relo->map_idx].fd;
+ if (obj->gen_loader) {
+ insn[0].src_reg = BPF_PSEUDO_MAP_IDX;
+ insn[0].imm = relo->map_idx;
+ } else {
+ insn[0].src_reg = BPF_PSEUDO_MAP_FD;
+ insn[0].imm = obj->maps[relo->map_idx].fd;
+ }
break;
case RELO_DATA:
- insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
insn[1].imm = insn[0].imm + relo->sym_off;
- insn[0].imm = obj->maps[relo->map_idx].fd;
+ if (obj->gen_loader) {
+ insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE;
+ insn[0].imm = relo->map_idx;
+ } else {
+ insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
+ insn[0].imm = obj->maps[relo->map_idx].fd;
+ }
break;
case RELO_EXTERN_VAR:
ext = &obj->externs[relo->sym_off];
if (ext->type == EXT_KCFG) {
- insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
- insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
+ if (obj->gen_loader) {
+ insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE;
+ insn[0].imm = obj->kconfig_map_idx;
+ } else {
+ insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
+ insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
+ }
insn[1].imm = ext->kcfg.data_off;
} else /* EXT_KSYM */ {
if (ext->ksym.type_id) { /* typed ksyms */
@@ -6399,11 +6512,15 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
insn[0].imm = ext->ksym.kernel_btf_id;
break;
case RELO_SUBPROG_ADDR:
- insn[0].src_reg = BPF_PSEUDO_FUNC;
- /* will be handled as a follow up pass */
+ if (insn[0].src_reg != BPF_PSEUDO_FUNC) {
+ pr_warn("prog '%s': relo #%d: bad insn\n",
+ prog->name, i);
+ return -EINVAL;
+ }
+ /* handled already */
break;
case RELO_CALL:
- /* will be handled as a follow up pass */
+ /* handled already */
break;
default:
pr_warn("prog '%s': relo #%d: bad relo type %d\n",
@@ -6494,7 +6611,7 @@ reloc_prog_func_and_line_info(const struct bpf_object *obj,
/* no .BTF.ext relocation if .BTF.ext is missing or kernel doesn't
* supprot func/line info
*/
- if (!obj->btf_ext || !kernel_supports(FEAT_BTF_FUNC))
+ if (!obj->btf_ext || !kernel_supports(obj, FEAT_BTF_FUNC))
return 0;
/* only attempt func info relocation if main program's func_info
@@ -6572,6 +6689,30 @@ static struct reloc_desc *find_prog_insn_relo(const struct bpf_program *prog, si
sizeof(*prog->reloc_desc), cmp_relo_by_insn_idx);
}
+static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_program *subprog)
+{
+ int new_cnt = main_prog->nr_reloc + subprog->nr_reloc;
+ struct reloc_desc *relos;
+ int i;
+
+ if (main_prog == subprog)
+ return 0;
+ relos = libbpf_reallocarray(main_prog->reloc_desc, new_cnt, sizeof(*relos));
+ if (!relos)
+ return -ENOMEM;
+ memcpy(relos + main_prog->nr_reloc, subprog->reloc_desc,
+ sizeof(*relos) * subprog->nr_reloc);
+
+ for (i = main_prog->nr_reloc; i < new_cnt; i++)
+ relos[i].insn_idx += subprog->sub_insn_off;
+ /* After insn_idx adjustment the 'relos' array is still sorted
+ * by insn_idx and doesn't break bsearch.
+ */
+ main_prog->reloc_desc = relos;
+ main_prog->nr_reloc = new_cnt;
+ return 0;
+}
+
static int
bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
struct bpf_program *prog)
@@ -6592,6 +6733,11 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
continue;
relo = find_prog_insn_relo(prog, insn_idx);
+ if (relo && relo->type == RELO_EXTERN_FUNC)
+ /* kfunc relocations will be handled later
+ * in bpf_object__relocate_data()
+ */
+ continue;
if (relo && relo->type != RELO_CALL && relo->type != RELO_SUBPROG_ADDR) {
pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n",
prog->name, insn_idx, relo->type);
@@ -6666,6 +6812,10 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n",
main_prog->name, subprog->insns_cnt, subprog->name);
+ /* The subprog insns are now appended. Append its relos too. */
+ err = append_subprog_relos(main_prog, subprog);
+ if (err)
+ return err;
err = bpf_object__reloc_code(obj, main_prog, subprog);
if (err)
return err;
@@ -6795,11 +6945,25 @@ bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog)
return 0;
}
+static void
+bpf_object__free_relocs(struct bpf_object *obj)
+{
+ struct bpf_program *prog;
+ int i;
+
+ /* free up relocation descriptors */
+ for (i = 0; i < obj->nr_programs; i++) {
+ prog = &obj->programs[i];
+ zfree(&prog->reloc_desc);
+ prog->nr_reloc = 0;
+ }
+}
+
static int
bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
{
struct bpf_program *prog;
- size_t i;
+ size_t i, j;
int err;
if (obj->btf_ext) {
@@ -6810,23 +6974,32 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
return err;
}
}
- /* relocate data references first for all programs and sub-programs,
- * as they don't change relative to code locations, so subsequent
- * subprogram processing won't need to re-calculate any of them
+
+ /* Before relocating calls pre-process relocations and mark
+ * few ld_imm64 instructions that points to subprogs.
+ * Otherwise bpf_object__reloc_code() later would have to consider
+ * all ld_imm64 insns as relocation candidates. That would
+ * reduce relocation speed, since amount of find_prog_insn_relo()
+ * would increase and most of them will fail to find a relo.
*/
for (i = 0; i < obj->nr_programs; i++) {
prog = &obj->programs[i];
- err = bpf_object__relocate_data(obj, prog);
- if (err) {
- pr_warn("prog '%s': failed to relocate data references: %d\n",
- prog->name, err);
- return err;
+ for (j = 0; j < prog->nr_reloc; j++) {
+ struct reloc_desc *relo = &prog->reloc_desc[j];
+ struct bpf_insn *insn = &prog->insns[relo->insn_idx];
+
+ /* mark the insn, so it's recognized by insn_is_pseudo_func() */
+ if (relo->type == RELO_SUBPROG_ADDR)
+ insn[0].src_reg = BPF_PSEUDO_FUNC;
}
}
- /* now relocate subprogram calls and append used subprograms to main
+
+ /* relocate subprogram calls and append used subprograms to main
* programs; each copy of subprogram code needs to be relocated
* differently for each main program, because its code location might
- * have changed
+ * have changed.
+ * Append subprog relos to main programs to allow data relos to be
+ * processed after text is completely relocated.
*/
for (i = 0; i < obj->nr_programs; i++) {
prog = &obj->programs[i];
@@ -6843,12 +7016,20 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
return err;
}
}
- /* free up relocation descriptors */
+ /* Process data relos for main programs */
for (i = 0; i < obj->nr_programs; i++) {
prog = &obj->programs[i];
- zfree(&prog->reloc_desc);
- prog->nr_reloc = 0;
+ if (prog_is_subprog(obj, prog))
+ continue;
+ err = bpf_object__relocate_data(obj, prog);
+ if (err) {
+ pr_warn("prog '%s': failed to relocate data references: %d\n",
+ prog->name, err);
+ return err;
+ }
}
+ if (!obj->gen_loader)
+ bpf_object__free_relocs(obj);
return 0;
}
@@ -7037,6 +7218,9 @@ static int bpf_object__sanitize_prog(struct bpf_object *obj, struct bpf_program
enum bpf_func_id func_id;
int i;
+ if (obj->gen_loader)
+ return 0;
+
for (i = 0; i < prog->insns_cnt; i++, insn++) {
if (!insn_is_helper_call(insn, &func_id))
continue;
@@ -7048,12 +7232,12 @@ static int bpf_object__sanitize_prog(struct bpf_object *obj, struct bpf_program
switch (func_id) {
case BPF_FUNC_probe_read_kernel:
case BPF_FUNC_probe_read_user:
- if (!kernel_supports(FEAT_PROBE_READ_KERN))
+ if (!kernel_supports(obj, FEAT_PROBE_READ_KERN))
insn->imm = BPF_FUNC_probe_read;
break;
case BPF_FUNC_probe_read_kernel_str:
case BPF_FUNC_probe_read_user_str:
- if (!kernel_supports(FEAT_PROBE_READ_KERN))
+ if (!kernel_supports(obj, FEAT_PROBE_READ_KERN))
insn->imm = BPF_FUNC_probe_read_str;
break;
default:
@@ -7088,12 +7272,12 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
load_attr.prog_type = prog->type;
/* old kernels might not support specifying expected_attach_type */
- if (!kernel_supports(FEAT_EXP_ATTACH_TYPE) && prog->sec_def &&
+ if (!kernel_supports(prog->obj, FEAT_EXP_ATTACH_TYPE) && prog->sec_def &&
prog->sec_def->is_exp_attach_type_optional)
load_attr.expected_attach_type = 0;
else
load_attr.expected_attach_type = prog->expected_attach_type;
- if (kernel_supports(FEAT_PROG_NAME))
+ if (kernel_supports(prog->obj, FEAT_PROG_NAME))
load_attr.name = prog->name;
load_attr.insns = insns;
load_attr.insn_cnt = insns_cnt;
@@ -7109,7 +7293,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
/* specify func_info/line_info only if kernel supports them */
btf_fd = bpf_object__btf_fd(prog->obj);
- if (btf_fd >= 0 && kernel_supports(FEAT_BTF_FUNC)) {
+ if (btf_fd >= 0 && kernel_supports(prog->obj, FEAT_BTF_FUNC)) {
load_attr.prog_btf_fd = btf_fd;
load_attr.func_info = prog->func_info;
load_attr.func_info_rec_size = prog->func_info_rec_size;
@@ -7121,6 +7305,12 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
load_attr.log_level = prog->log_level;
load_attr.prog_flags = prog->prog_flags;
+ if (prog->obj->gen_loader) {
+ bpf_gen__prog_load(prog->obj->gen_loader, &load_attr,
+ prog - prog->obj->programs);
+ *pfd = -1;
+ return 0;
+ }
retry_load:
if (log_buf_size) {
log_buf = malloc(log_buf_size);
@@ -7139,7 +7329,7 @@ retry_load:
pr_debug("verifier log:\n%s", log_buf);
if (prog->obj->rodata_map_idx >= 0 &&
- kernel_supports(FEAT_PROG_BIND_MAP)) {
+ kernel_supports(prog->obj, FEAT_PROG_BIND_MAP)) {
struct bpf_map *rodata_map =
&prog->obj->maps[prog->obj->rodata_map_idx];
@@ -7198,6 +7388,38 @@ out:
return ret;
}
+static int bpf_program__record_externs(struct bpf_program *prog)
+{
+ struct bpf_object *obj = prog->obj;
+ int i;
+
+ for (i = 0; i < prog->nr_reloc; i++) {
+ struct reloc_desc *relo = &prog->reloc_desc[i];
+ struct extern_desc *ext = &obj->externs[relo->sym_off];
+
+ switch (relo->type) {
+ case RELO_EXTERN_VAR:
+ if (ext->type != EXT_KSYM)
+ continue;
+ if (!ext->ksym.type_id) {
+ pr_warn("typeless ksym %s is not supported yet\n",
+ ext->name);
+ return -ENOTSUP;
+ }
+ bpf_gen__record_extern(obj->gen_loader, ext->name, BTF_KIND_VAR,
+ relo->insn_idx);
+ break;
+ case RELO_EXTERN_FUNC:
+ bpf_gen__record_extern(obj->gen_loader, ext->name, BTF_KIND_FUNC,
+ relo->insn_idx);
+ break;
+ default:
+ continue;
+ }
+ }
+ return 0;
+}
+
static int libbpf_find_attach_btf_id(struct bpf_program *prog, int *btf_obj_fd, int *btf_type_id);
int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
@@ -7206,7 +7428,7 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
if (prog->obj->loaded) {
pr_warn("prog '%s': can't load after object was loaded\n", prog->name);
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
if ((prog->type == BPF_PROG_TYPE_TRACING ||
@@ -7216,7 +7438,7 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
err = libbpf_find_attach_btf_id(prog, &btf_obj_fd, &btf_type_id);
if (err)
- return err;
+ return libbpf_err(err);
prog->attach_btf_obj_fd = btf_obj_fd;
prog->attach_btf_id = btf_type_id;
@@ -7226,13 +7448,13 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
if (prog->preprocessor) {
pr_warn("Internal error: can't load program '%s'\n",
prog->name);
- return -LIBBPF_ERRNO__INTERNAL;
+ return libbpf_err(-LIBBPF_ERRNO__INTERNAL);
}
prog->instances.fds = malloc(sizeof(int));
if (!prog->instances.fds) {
pr_warn("Not enough memory for BPF fds\n");
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
}
prog->instances.nr = 1;
prog->instances.fds[0] = -1;
@@ -7243,6 +7465,8 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
pr_warn("prog '%s': inconsistent nr(%d) != 1\n",
prog->name, prog->instances.nr);
}
+ if (prog->obj->gen_loader)
+ bpf_program__record_externs(prog);
err = load_program(prog, prog->insns, prog->insns_cnt,
license, kern_ver, &fd);
if (!err)
@@ -7289,7 +7513,7 @@ out:
pr_warn("failed to load program '%s'\n", prog->name);
zfree(&prog->insns);
prog->insns_cnt = 0;
- return err;
+ return libbpf_err(err);
}
static int
@@ -7319,6 +7543,8 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level)
if (err)
return err;
}
+ if (obj->gen_loader)
+ bpf_object__free_relocs(obj);
return 0;
}
@@ -7420,7 +7646,7 @@ __bpf_object__open_xattr(struct bpf_object_open_attr *attr, int flags)
struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr)
{
- return __bpf_object__open_xattr(attr, 0);
+ return libbpf_ptr(__bpf_object__open_xattr(attr, 0));
}
struct bpf_object *bpf_object__open(const char *path)
@@ -7430,18 +7656,18 @@ struct bpf_object *bpf_object__open(const char *path)
.prog_type = BPF_PROG_TYPE_UNSPEC,
};
- return bpf_object__open_xattr(&attr);
+ return libbpf_ptr(__bpf_object__open_xattr(&attr, 0));
}
struct bpf_object *
bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts)
{
if (!path)
- return ERR_PTR(-EINVAL);
+ return libbpf_err_ptr(-EINVAL);
pr_debug("loading %s\n", path);
- return __bpf_object__open(path, NULL, 0, opts);
+ return libbpf_ptr(__bpf_object__open(path, NULL, 0, opts));
}
struct bpf_object *
@@ -7449,9 +7675,9 @@ bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
const struct bpf_object_open_opts *opts)
{
if (!obj_buf || obj_buf_sz == 0)
- return ERR_PTR(-EINVAL);
+ return libbpf_err_ptr(-EINVAL);
- return __bpf_object__open(NULL, obj_buf, obj_buf_sz, opts);
+ return libbpf_ptr(__bpf_object__open(NULL, obj_buf, obj_buf_sz, opts));
}
struct bpf_object *
@@ -7466,9 +7692,9 @@ bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz,
/* returning NULL is wrong, but backwards-compatible */
if (!obj_buf || obj_buf_sz == 0)
- return NULL;
+ return errno = EINVAL, NULL;
- return bpf_object__open_mem(obj_buf, obj_buf_sz, &opts);
+ return libbpf_ptr(__bpf_object__open(NULL, obj_buf, obj_buf_sz, &opts));
}
int bpf_object__unload(struct bpf_object *obj)
@@ -7476,7 +7702,7 @@ int bpf_object__unload(struct bpf_object *obj)
size_t i;
if (!obj)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
for (i = 0; i < obj->nr_maps; i++) {
zclose(obj->maps[i].fd);
@@ -7497,11 +7723,11 @@ static int bpf_object__sanitize_maps(struct bpf_object *obj)
bpf_object__for_each_map(m, obj) {
if (!bpf_map__is_internal(m))
continue;
- if (!kernel_supports(FEAT_GLOBAL_DATA)) {
+ if (!kernel_supports(obj, FEAT_GLOBAL_DATA)) {
pr_warn("kernel doesn't support global data\n");
return -ENOTSUP;
}
- if (!kernel_supports(FEAT_ARRAY_MMAP))
+ if (!kernel_supports(obj, FEAT_ARRAY_MMAP))
m->def.map_flags ^= BPF_F_MMAPABLE;
}
@@ -7699,6 +7925,12 @@ static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj)
if (ext->type != EXT_KSYM || !ext->ksym.type_id)
continue;
+ if (obj->gen_loader) {
+ ext->is_set = true;
+ ext->ksym.kernel_btf_obj_fd = 0;
+ ext->ksym.kernel_btf_id = 0;
+ continue;
+ }
t = btf__type_by_id(obj->btf, ext->btf_id);
if (btf_is_var(t))
err = bpf_object__resolve_ksym_var_btf_id(obj, ext);
@@ -7803,16 +8035,19 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
int err, i;
if (!attr)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
obj = attr->obj;
if (!obj)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (obj->loaded) {
pr_warn("object '%s': load can't be attempted twice\n", obj->name);
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
+ if (obj->gen_loader)
+ bpf_gen__init(obj->gen_loader, attr->log_level);
+
err = bpf_object__probe_loading(obj);
err = err ? : bpf_object__load_vmlinux_btf(obj, false);
err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
@@ -7823,6 +8058,15 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
err = err ? : bpf_object__relocate(obj, attr->target_btf_path);
err = err ? : bpf_object__load_progs(obj, attr->log_level);
+ if (obj->gen_loader) {
+ /* reset FDs */
+ btf__set_fd(obj->btf, -1);
+ for (i = 0; i < obj->nr_maps; i++)
+ obj->maps[i].fd = -1;
+ if (!err)
+ err = bpf_gen__finish(obj->gen_loader);
+ }
+
/* clean up module BTFs */
for (i = 0; i < obj->btf_module_cnt; i++) {
close(obj->btf_modules[i].fd);
@@ -7849,7 +8093,7 @@ out:
bpf_object__unload(obj);
pr_warn("failed to load object '%s'\n", obj->path);
- return err;
+ return libbpf_err(err);
}
int bpf_object__load(struct bpf_object *obj)
@@ -7921,28 +8165,28 @@ int bpf_program__pin_instance(struct bpf_program *prog, const char *path,
err = make_parent_dir(path);
if (err)
- return err;
+ return libbpf_err(err);
err = check_path(path);
if (err)
- return err;
+ return libbpf_err(err);
if (prog == NULL) {
pr_warn("invalid program pointer\n");
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
if (instance < 0 || instance >= prog->instances.nr) {
pr_warn("invalid prog instance %d of prog %s (max %d)\n",
instance, prog->name, prog->instances.nr);
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
if (bpf_obj_pin(prog->instances.fds[instance], path)) {
err = -errno;
cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
pr_warn("failed to pin program: %s\n", cp);
- return err;
+ return libbpf_err(err);
}
pr_debug("pinned program '%s'\n", path);
@@ -7956,22 +8200,23 @@ int bpf_program__unpin_instance(struct bpf_program *prog, const char *path,
err = check_path(path);
if (err)
- return err;
+ return libbpf_err(err);
if (prog == NULL) {
pr_warn("invalid program pointer\n");
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
if (instance < 0 || instance >= prog->instances.nr) {
pr_warn("invalid prog instance %d of prog %s (max %d)\n",
instance, prog->name, prog->instances.nr);
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
err = unlink(path);
if (err != 0)
- return -errno;
+ return libbpf_err(-errno);
+
pr_debug("unpinned program '%s'\n", path);
return 0;
@@ -7983,20 +8228,20 @@ int bpf_program__pin(struct bpf_program *prog, const char *path)
err = make_parent_dir(path);
if (err)
- return err;
+ return libbpf_err(err);
err = check_path(path);
if (err)
- return err;
+ return libbpf_err(err);
if (prog == NULL) {
pr_warn("invalid program pointer\n");
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
if (prog->instances.nr <= 0) {
pr_warn("no instances of prog %s to pin\n", prog->name);
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
if (prog->instances.nr == 1) {
@@ -8040,7 +8285,7 @@ err_unpin:
rmdir(path);
- return err;
+ return libbpf_err(err);
}
int bpf_program__unpin(struct bpf_program *prog, const char *path)
@@ -8049,16 +8294,16 @@ int bpf_program__unpin(struct bpf_program *prog, const char *path)
err = check_path(path);
if (err)
- return err;
+ return libbpf_err(err);
if (prog == NULL) {
pr_warn("invalid program pointer\n");
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
if (prog->instances.nr <= 0) {
pr_warn("no instances of prog %s to pin\n", prog->name);
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
if (prog->instances.nr == 1) {
@@ -8072,9 +8317,9 @@ int bpf_program__unpin(struct bpf_program *prog, const char *path)
len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
if (len < 0)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
else if (len >= PATH_MAX)
- return -ENAMETOOLONG;
+ return libbpf_err(-ENAMETOOLONG);
err = bpf_program__unpin_instance(prog, buf, i);
if (err)
@@ -8083,7 +8328,7 @@ int bpf_program__unpin(struct bpf_program *prog, const char *path)
err = rmdir(path);
if (err)
- return -errno;
+ return libbpf_err(-errno);
return 0;
}
@@ -8095,14 +8340,14 @@ int bpf_map__pin(struct bpf_map *map, const char *path)
if (map == NULL) {
pr_warn("invalid map pointer\n");
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
if (map->pin_path) {
if (path && strcmp(path, map->pin_path)) {
pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
bpf_map__name(map), map->pin_path, path);
- return -EINVAL;
+ return libbpf_err(-EINVAL);
} else if (map->pinned) {
pr_debug("map '%s' already pinned at '%s'; not re-pinning\n",
bpf_map__name(map), map->pin_path);
@@ -8112,10 +8357,10 @@ int bpf_map__pin(struct bpf_map *map, const char *path)
if (!path) {
pr_warn("missing a path to pin map '%s' at\n",
bpf_map__name(map));
- return -EINVAL;
+ return libbpf_err(-EINVAL);
} else if (map->pinned) {
pr_warn("map '%s' already pinned\n", bpf_map__name(map));
- return -EEXIST;
+ return libbpf_err(-EEXIST);
}
map->pin_path = strdup(path);
@@ -8127,11 +8372,11 @@ int bpf_map__pin(struct bpf_map *map, const char *path)
err = make_parent_dir(map->pin_path);
if (err)
- return err;
+ return libbpf_err(err);
err = check_path(map->pin_path);
if (err)
- return err;
+ return libbpf_err(err);
if (bpf_obj_pin(map->fd, map->pin_path)) {
err = -errno;
@@ -8146,7 +8391,7 @@ int bpf_map__pin(struct bpf_map *map, const char *path)
out_err:
cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
pr_warn("failed to pin map: %s\n", cp);
- return err;
+ return libbpf_err(err);
}
int bpf_map__unpin(struct bpf_map *map, const char *path)
@@ -8155,29 +8400,29 @@ int bpf_map__unpin(struct bpf_map *map, const char *path)
if (map == NULL) {
pr_warn("invalid map pointer\n");
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
if (map->pin_path) {
if (path && strcmp(path, map->pin_path)) {
pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
bpf_map__name(map), map->pin_path, path);
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
path = map->pin_path;
} else if (!path) {
pr_warn("no path to unpin map '%s' from\n",
bpf_map__name(map));
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
err = check_path(path);
if (err)
- return err;
+ return libbpf_err(err);
err = unlink(path);
if (err != 0)
- return -errno;
+ return libbpf_err(-errno);
map->pinned = false;
pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map), path);
@@ -8192,7 +8437,7 @@ int bpf_map__set_pin_path(struct bpf_map *map, const char *path)
if (path) {
new = strdup(path);
if (!new)
- return -errno;
+ return libbpf_err(-errno);
}
free(map->pin_path);
@@ -8226,11 +8471,11 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
int err;
if (!obj)
- return -ENOENT;
+ return libbpf_err(-ENOENT);
if (!obj->loaded) {
pr_warn("object not yet loaded; load it first\n");
- return -ENOENT;
+ return libbpf_err(-ENOENT);
}
bpf_object__for_each_map(map, obj) {
@@ -8270,7 +8515,7 @@ err_unpin_maps:
bpf_map__unpin(map, NULL);
}
- return err;
+ return libbpf_err(err);
}
int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
@@ -8279,7 +8524,7 @@ int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
int err;
if (!obj)
- return -ENOENT;
+ return libbpf_err(-ENOENT);
bpf_object__for_each_map(map, obj) {
char *pin_path = NULL;
@@ -8291,9 +8536,9 @@ int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
len = snprintf(buf, PATH_MAX, "%s/%s", path,
bpf_map__name(map));
if (len < 0)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
else if (len >= PATH_MAX)
- return -ENAMETOOLONG;
+ return libbpf_err(-ENAMETOOLONG);
sanitize_pin_path(buf);
pin_path = buf;
} else if (!map->pin_path) {
@@ -8302,7 +8547,7 @@ int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
err = bpf_map__unpin(map, pin_path);
if (err)
- return err;
+ return libbpf_err(err);
}
return 0;
@@ -8314,11 +8559,11 @@ int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
int err;
if (!obj)
- return -ENOENT;
+ return libbpf_err(-ENOENT);
if (!obj->loaded) {
pr_warn("object not yet loaded; load it first\n");
- return -ENOENT;
+ return libbpf_err(-ENOENT);
}
bpf_object__for_each_program(prog, obj) {
@@ -8357,7 +8602,7 @@ err_unpin_programs:
bpf_program__unpin(prog, buf);
}
- return err;
+ return libbpf_err(err);
}
int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
@@ -8366,7 +8611,7 @@ int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
int err;
if (!obj)
- return -ENOENT;
+ return libbpf_err(-ENOENT);
bpf_object__for_each_program(prog, obj) {
char buf[PATH_MAX];
@@ -8375,13 +8620,13 @@ int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
len = snprintf(buf, PATH_MAX, "%s/%s", path,
prog->pin_name);
if (len < 0)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
else if (len >= PATH_MAX)
- return -ENAMETOOLONG;
+ return libbpf_err(-ENAMETOOLONG);
err = bpf_program__unpin(prog, buf);
if (err)
- return err;
+ return libbpf_err(err);
}
return 0;
@@ -8393,12 +8638,12 @@ int bpf_object__pin(struct bpf_object *obj, const char *path)
err = bpf_object__pin_maps(obj, path);
if (err)
- return err;
+ return libbpf_err(err);
err = bpf_object__pin_programs(obj, path);
if (err) {
bpf_object__unpin_maps(obj, path);
- return err;
+ return libbpf_err(err);
}
return 0;
@@ -8448,6 +8693,7 @@ void bpf_object__close(struct bpf_object *obj)
if (obj->clear_priv)
obj->clear_priv(obj, obj->priv);
+ bpf_gen__free(obj->gen_loader);
bpf_object__elf_finish(obj);
bpf_object__unload(obj);
btf__free(obj->btf);
@@ -8494,7 +8740,7 @@ bpf_object__next(struct bpf_object *prev)
const char *bpf_object__name(const struct bpf_object *obj)
{
- return obj ? obj->name : ERR_PTR(-EINVAL);
+ return obj ? obj->name : libbpf_err_ptr(-EINVAL);
}
unsigned int bpf_object__kversion(const struct bpf_object *obj)
@@ -8515,7 +8761,7 @@ int bpf_object__btf_fd(const struct bpf_object *obj)
int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version)
{
if (obj->loaded)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
obj->kern_version = kern_version;
@@ -8535,7 +8781,23 @@ int bpf_object__set_priv(struct bpf_object *obj, void *priv,
void *bpf_object__priv(const struct bpf_object *obj)
{
- return obj ? obj->priv : ERR_PTR(-EINVAL);
+ return obj ? obj->priv : libbpf_err_ptr(-EINVAL);
+}
+
+int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts)
+{
+ struct bpf_gen *gen;
+
+ if (!opts)
+ return -EFAULT;
+ if (!OPTS_VALID(opts, gen_loader_opts))
+ return -EINVAL;
+ gen = calloc(sizeof(*gen), 1);
+ if (!gen)
+ return -ENOMEM;
+ gen->opts = opts;
+ obj->gen_loader = gen;
+ return 0;
}
static struct bpf_program *
@@ -8555,7 +8817,7 @@ __bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj,
if (p->obj != obj) {
pr_warn("error: program handler doesn't match object\n");
- return NULL;
+ return errno = EINVAL, NULL;
}
idx = (p - obj->programs) + (forward ? 1 : -1);
@@ -8601,7 +8863,7 @@ int bpf_program__set_priv(struct bpf_program *prog, void *priv,
void *bpf_program__priv(const struct bpf_program *prog)
{
- return prog ? prog->priv : ERR_PTR(-EINVAL);
+ return prog ? prog->priv : libbpf_err_ptr(-EINVAL);
}
void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex)
@@ -8628,7 +8890,7 @@ const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy)
title = strdup(title);
if (!title) {
pr_warn("failed to strdup program title\n");
- return ERR_PTR(-ENOMEM);
+ return libbpf_err_ptr(-ENOMEM);
}
}
@@ -8643,7 +8905,7 @@ bool bpf_program__autoload(const struct bpf_program *prog)
int bpf_program__set_autoload(struct bpf_program *prog, bool autoload)
{
if (prog->obj->loaded)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
prog->load = autoload;
return 0;
@@ -8665,17 +8927,17 @@ int bpf_program__set_prep(struct bpf_program *prog, int nr_instances,
int *instances_fds;
if (nr_instances <= 0 || !prep)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (prog->instances.nr > 0 || prog->instances.fds) {
pr_warn("Can't set pre-processor after loading\n");
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
instances_fds = malloc(sizeof(int) * nr_instances);
if (!instances_fds) {
pr_warn("alloc memory failed for fds\n");
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
}
/* fill all fd with -1 */
@@ -8692,19 +8954,19 @@ int bpf_program__nth_fd(const struct bpf_program *prog, int n)
int fd;
if (!prog)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (n >= prog->instances.nr || n < 0) {
pr_warn("Can't get the %dth fd from program %s: only %d instances\n",
n, prog->name, prog->instances.nr);
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
fd = prog->instances.fds[n];
if (fd < 0) {
pr_warn("%dth instance of program '%s' is invalid\n",
n, prog->name);
- return -ENOENT;
+ return libbpf_err(-ENOENT);
}
return fd;
@@ -8730,7 +8992,7 @@ static bool bpf_program__is_type(const struct bpf_program *prog,
int bpf_program__set_##NAME(struct bpf_program *prog) \
{ \
if (!prog) \
- return -EINVAL; \
+ return libbpf_err(-EINVAL); \
bpf_program__set_type(prog, TYPE); \
return 0; \
} \
@@ -8820,7 +9082,10 @@ static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
static const struct bpf_sec_def section_defs[] = {
BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER),
- BPF_PROG_SEC("sk_reuseport", BPF_PROG_TYPE_SK_REUSEPORT),
+ BPF_EAPROG_SEC("sk_reuseport/migrate", BPF_PROG_TYPE_SK_REUSEPORT,
+ BPF_SK_REUSEPORT_SELECT_OR_MIGRATE),
+ BPF_EAPROG_SEC("sk_reuseport", BPF_PROG_TYPE_SK_REUSEPORT,
+ BPF_SK_REUSEPORT_SELECT),
SEC_DEF("kprobe/", KPROBE,
.attach_fn = attach_kprobe),
BPF_PROG_SEC("uprobe/", BPF_PROG_TYPE_KPROBE),
@@ -8884,6 +9149,8 @@ static const struct bpf_sec_def section_defs[] = {
.expected_attach_type = BPF_TRACE_ITER,
.is_attach_btf = true,
.attach_fn = attach_iter),
+ SEC_DEF("syscall", SYSCALL,
+ .is_sleepable = true),
BPF_EAPROG_SEC("xdp_devmap/", BPF_PROG_TYPE_XDP,
BPF_XDP_DEVMAP),
BPF_EAPROG_SEC("xdp_cpumap/", BPF_PROG_TYPE_XDP,
@@ -9015,7 +9282,7 @@ int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
char *type_names;
if (!name)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
sec_def = find_sec_def(name);
if (sec_def) {
@@ -9031,7 +9298,7 @@ int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
free(type_names);
}
- return -ESRCH;
+ return libbpf_err(-ESRCH);
}
static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj,
@@ -9173,6 +9440,28 @@ invalid_prog:
#define BTF_ITER_PREFIX "bpf_iter_"
#define BTF_MAX_NAME_SIZE 128
+void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type,
+ const char **prefix, int *kind)
+{
+ switch (attach_type) {
+ case BPF_TRACE_RAW_TP:
+ *prefix = BTF_TRACE_PREFIX;
+ *kind = BTF_KIND_TYPEDEF;
+ break;
+ case BPF_LSM_MAC:
+ *prefix = BTF_LSM_PREFIX;
+ *kind = BTF_KIND_FUNC;
+ break;
+ case BPF_TRACE_ITER:
+ *prefix = BTF_ITER_PREFIX;
+ *kind = BTF_KIND_FUNC;
+ break;
+ default:
+ *prefix = "";
+ *kind = BTF_KIND_FUNC;
+ }
+}
+
static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
const char *name, __u32 kind)
{
@@ -9193,21 +9482,11 @@ static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
static inline int find_attach_btf_id(struct btf *btf, const char *name,
enum bpf_attach_type attach_type)
{
- int err;
+ const char *prefix;
+ int kind;
- if (attach_type == BPF_TRACE_RAW_TP)
- err = find_btf_by_prefix_kind(btf, BTF_TRACE_PREFIX, name,
- BTF_KIND_TYPEDEF);
- else if (attach_type == BPF_LSM_MAC)
- err = find_btf_by_prefix_kind(btf, BTF_LSM_PREFIX, name,
- BTF_KIND_FUNC);
- else if (attach_type == BPF_TRACE_ITER)
- err = find_btf_by_prefix_kind(btf, BTF_ITER_PREFIX, name,
- BTF_KIND_FUNC);
- else
- err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
-
- return err;
+ btf_get_kernel_prefix_kind(attach_type, &prefix, &kind);
+ return find_btf_by_prefix_kind(btf, prefix, name, kind);
}
int libbpf_find_vmlinux_btf_id(const char *name,
@@ -9217,9 +9496,10 @@ int libbpf_find_vmlinux_btf_id(const char *name,
int err;
btf = libbpf_find_kernel_btf();
- if (IS_ERR(btf)) {
+ err = libbpf_get_error(btf);
+ if (err) {
pr_warn("vmlinux BTF is not found\n");
- return -EINVAL;
+ return libbpf_err(err);
}
err = find_attach_btf_id(btf, name, attach_type);
@@ -9227,7 +9507,7 @@ int libbpf_find_vmlinux_btf_id(const char *name,
pr_warn("%s is not found in vmlinux BTF\n", name);
btf__free(btf);
- return err;
+ return libbpf_err(err);
}
static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
@@ -9238,10 +9518,11 @@ static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
int err = -EINVAL;
info_linear = bpf_program__get_prog_info_linear(attach_prog_fd, 0);
- if (IS_ERR_OR_NULL(info_linear)) {
+ err = libbpf_get_error(info_linear);
+ if (err) {
pr_warn("failed get_prog_info_linear for FD %d\n",
attach_prog_fd);
- return -EINVAL;
+ return err;
}
info = &info_linear->info;
if (!info->btf_id) {
@@ -9306,7 +9587,7 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog, int *btf_obj_fd,
__u32 attach_prog_fd = prog->attach_prog_fd;
const char *name = prog->sec_name, *attach_name;
const struct bpf_sec_def *sec = NULL;
- int i, err;
+ int i, err = 0;
if (!name)
return -EINVAL;
@@ -9341,7 +9622,13 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog, int *btf_obj_fd,
}
/* kernel/module BTF ID */
- err = find_kernel_btf_id(prog->obj, attach_name, attach_type, btf_obj_fd, btf_type_id);
+ if (prog->obj->gen_loader) {
+ bpf_gen__record_attach_target(prog->obj->gen_loader, attach_name, attach_type);
+ *btf_obj_fd = 0;
+ *btf_type_id = 1;
+ } else {
+ err = find_kernel_btf_id(prog->obj, attach_name, attach_type, btf_obj_fd, btf_type_id);
+ }
if (err) {
pr_warn("failed to find kernel BTF type ID of '%s': %d\n", attach_name, err);
return err;
@@ -9356,13 +9643,13 @@ int libbpf_attach_type_by_name(const char *name,
int i;
if (!name)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
if (strncmp(name, section_defs[i].sec, section_defs[i].len))
continue;
if (!section_defs[i].is_attachable)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
*attach_type = section_defs[i].expected_attach_type;
return 0;
}
@@ -9373,17 +9660,17 @@ int libbpf_attach_type_by_name(const char *name,
free(type_names);
}
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
int bpf_map__fd(const struct bpf_map *map)
{
- return map ? map->fd : -EINVAL;
+ return map ? map->fd : libbpf_err(-EINVAL);
}
const struct bpf_map_def *bpf_map__def(const struct bpf_map *map)
{
- return map ? &map->def : ERR_PTR(-EINVAL);
+ return map ? &map->def : libbpf_err_ptr(-EINVAL);
}
const char *bpf_map__name(const struct bpf_map *map)
@@ -9399,7 +9686,7 @@ enum bpf_map_type bpf_map__type(const struct bpf_map *map)
int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type)
{
if (map->fd >= 0)
- return -EBUSY;
+ return libbpf_err(-EBUSY);
map->def.type = type;
return 0;
}
@@ -9412,7 +9699,7 @@ __u32 bpf_map__map_flags(const struct bpf_map *map)
int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags)
{
if (map->fd >= 0)
- return -EBUSY;
+ return libbpf_err(-EBUSY);
map->def.map_flags = flags;
return 0;
}
@@ -9425,7 +9712,7 @@ __u32 bpf_map__numa_node(const struct bpf_map *map)
int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node)
{
if (map->fd >= 0)
- return -EBUSY;
+ return libbpf_err(-EBUSY);
map->numa_node = numa_node;
return 0;
}
@@ -9438,7 +9725,7 @@ __u32 bpf_map__key_size(const struct bpf_map *map)
int bpf_map__set_key_size(struct bpf_map *map, __u32 size)
{
if (map->fd >= 0)
- return -EBUSY;
+ return libbpf_err(-EBUSY);
map->def.key_size = size;
return 0;
}
@@ -9451,7 +9738,7 @@ __u32 bpf_map__value_size(const struct bpf_map *map)
int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
{
if (map->fd >= 0)
- return -EBUSY;
+ return libbpf_err(-EBUSY);
map->def.value_size = size;
return 0;
}
@@ -9470,7 +9757,7 @@ int bpf_map__set_priv(struct bpf_map *map, void *priv,
bpf_map_clear_priv_t clear_priv)
{
if (!map)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (map->priv) {
if (map->clear_priv)
@@ -9484,7 +9771,7 @@ int bpf_map__set_priv(struct bpf_map *map, void *priv,
void *bpf_map__priv(const struct bpf_map *map)
{
- return map ? map->priv : ERR_PTR(-EINVAL);
+ return map ? map->priv : libbpf_err_ptr(-EINVAL);
}
int bpf_map__set_initial_value(struct bpf_map *map,
@@ -9492,12 +9779,20 @@ int bpf_map__set_initial_value(struct bpf_map *map,
{
if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG ||
size != map->def.value_size || map->fd >= 0)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
memcpy(map->mmaped, data, size);
return 0;
}
+const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize)
+{
+ if (!map->mmaped)
+ return NULL;
+ *psize = map->def.value_size;
+ return map->mmaped;
+}
+
bool bpf_map__is_offload_neutral(const struct bpf_map *map)
{
return map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY;
@@ -9516,7 +9811,7 @@ __u32 bpf_map__ifindex(const struct bpf_map *map)
int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
{
if (map->fd >= 0)
- return -EBUSY;
+ return libbpf_err(-EBUSY);
map->map_ifindex = ifindex;
return 0;
}
@@ -9525,11 +9820,11 @@ int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
{
if (!bpf_map_type__is_map_in_map(map->def.type)) {
pr_warn("error: unsupported map type\n");
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
if (map->inner_map_fd != -1) {
pr_warn("error: inner_map_fd already specified\n");
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
zfree(&map->inner_map);
map->inner_map_fd = fd;
@@ -9543,7 +9838,7 @@ __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
struct bpf_map *s, *e;
if (!obj || !obj->maps)
- return NULL;
+ return errno = EINVAL, NULL;
s = obj->maps;
e = obj->maps + obj->nr_maps;
@@ -9551,7 +9846,7 @@ __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
if ((m < s) || (m >= e)) {
pr_warn("error in %s: map handler doesn't belong to object\n",
__func__);
- return NULL;
+ return errno = EINVAL, NULL;
}
idx = (m - obj->maps) + i;
@@ -9590,7 +9885,7 @@ bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name)
if (pos->name && !strcmp(pos->name, name))
return pos;
}
- return NULL;
+ return errno = ENOENT, NULL;
}
int
@@ -9602,12 +9897,23 @@ bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name)
struct bpf_map *
bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset)
{
- return ERR_PTR(-ENOTSUP);
+ return libbpf_err_ptr(-ENOTSUP);
}
long libbpf_get_error(const void *ptr)
{
- return PTR_ERR_OR_ZERO(ptr);
+ if (!IS_ERR_OR_NULL(ptr))
+ return 0;
+
+ if (IS_ERR(ptr))
+ errno = -PTR_ERR(ptr);
+
+ /* If ptr == NULL, then errno should be already set by the failing
+ * API, because libbpf never returns NULL on success and it now always
+ * sets errno on error. So no extra errno handling for ptr == NULL
+ * case.
+ */
+ return -errno;
}
int bpf_prog_load(const char *file, enum bpf_prog_type type,
@@ -9633,16 +9939,17 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
int err;
if (!attr)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (!attr->file)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
open_attr.file = attr->file;
open_attr.prog_type = attr->prog_type;
obj = bpf_object__open_xattr(&open_attr);
- if (IS_ERR_OR_NULL(obj))
- return -ENOENT;
+ err = libbpf_get_error(obj);
+ if (err)
+ return libbpf_err(-ENOENT);
bpf_object__for_each_program(prog, obj) {
enum bpf_attach_type attach_type = attr->expected_attach_type;
@@ -9662,7 +9969,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
* didn't provide a fallback type, too bad...
*/
bpf_object__close(obj);
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
prog->prog_ifindex = attr->ifindex;
@@ -9680,13 +9987,13 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
if (!first_prog) {
pr_warn("object file doesn't contain bpf program\n");
bpf_object__close(obj);
- return -ENOENT;
+ return libbpf_err(-ENOENT);
}
err = bpf_object__load(obj);
if (err) {
bpf_object__close(obj);
- return err;
+ return libbpf_err(err);
}
*pobj = obj;
@@ -9705,7 +10012,10 @@ struct bpf_link {
/* Replace link's underlying BPF program with the new one */
int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog)
{
- return bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL);
+ int ret;
+
+ ret = bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL);
+ return libbpf_err_errno(ret);
}
/* Release "ownership" of underlying BPF resource (typically, BPF program
@@ -9738,7 +10048,7 @@ int bpf_link__destroy(struct bpf_link *link)
free(link->pin_path);
free(link);
- return err;
+ return libbpf_err(err);
}
int bpf_link__fd(const struct bpf_link *link)
@@ -9753,7 +10063,7 @@ const char *bpf_link__pin_path(const struct bpf_link *link)
static int bpf_link__detach_fd(struct bpf_link *link)
{
- return close(link->fd);
+ return libbpf_err_errno(close(link->fd));
}
struct bpf_link *bpf_link__open(const char *path)
@@ -9765,13 +10075,13 @@ struct bpf_link *bpf_link__open(const char *path)
if (fd < 0) {
fd = -errno;
pr_warn("failed to open link at %s: %d\n", path, fd);
- return ERR_PTR(fd);
+ return libbpf_err_ptr(fd);
}
link = calloc(1, sizeof(*link));
if (!link) {
close(fd);
- return ERR_PTR(-ENOMEM);
+ return libbpf_err_ptr(-ENOMEM);
}
link->detach = &bpf_link__detach_fd;
link->fd = fd;
@@ -9779,7 +10089,7 @@ struct bpf_link *bpf_link__open(const char *path)
link->pin_path = strdup(path);
if (!link->pin_path) {
bpf_link__destroy(link);
- return ERR_PTR(-ENOMEM);
+ return libbpf_err_ptr(-ENOMEM);
}
return link;
@@ -9795,22 +10105,22 @@ int bpf_link__pin(struct bpf_link *link, const char *path)
int err;
if (link->pin_path)
- return -EBUSY;
+ return libbpf_err(-EBUSY);
err = make_parent_dir(path);
if (err)
- return err;
+ return libbpf_err(err);
err = check_path(path);
if (err)
- return err;
+ return libbpf_err(err);
link->pin_path = strdup(path);
if (!link->pin_path)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
if (bpf_obj_pin(link->fd, link->pin_path)) {
err = -errno;
zfree(&link->pin_path);
- return err;
+ return libbpf_err(err);
}
pr_debug("link fd=%d: pinned at %s\n", link->fd, link->pin_path);
@@ -9822,11 +10132,11 @@ int bpf_link__unpin(struct bpf_link *link)
int err;
if (!link->pin_path)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
err = unlink(link->pin_path);
if (err != 0)
- return -errno;
+ return libbpf_err_errno(err);
pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path);
zfree(&link->pin_path);
@@ -9842,11 +10152,10 @@ static int bpf_link__detach_perf_event(struct bpf_link *link)
err = -errno;
close(link->fd);
- return err;
+ return libbpf_err(err);
}
-struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
- int pfd)
+struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, int pfd)
{
char errmsg[STRERR_BUFSIZE];
struct bpf_link *link;
@@ -9855,18 +10164,18 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
if (pfd < 0) {
pr_warn("prog '%s': invalid perf event FD %d\n",
prog->name, pfd);
- return ERR_PTR(-EINVAL);
+ return libbpf_err_ptr(-EINVAL);
}
prog_fd = bpf_program__fd(prog);
if (prog_fd < 0) {
pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n",
prog->name);
- return ERR_PTR(-EINVAL);
+ return libbpf_err_ptr(-EINVAL);
}
link = calloc(1, sizeof(*link));
if (!link)
- return ERR_PTR(-ENOMEM);
+ return libbpf_err_ptr(-ENOMEM);
link->detach = &bpf_link__detach_perf_event;
link->fd = pfd;
@@ -9878,14 +10187,14 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
if (err == -EPROTO)
pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n",
prog->name, pfd);
- return ERR_PTR(err);
+ return libbpf_err_ptr(err);
}
if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
err = -errno;
free(link);
pr_warn("prog '%s': failed to enable pfd %d: %s\n",
prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
- return ERR_PTR(err);
+ return libbpf_err_ptr(err);
}
return link;
}
@@ -10009,16 +10318,16 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n",
prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
- return ERR_PTR(pfd);
+ return libbpf_err_ptr(pfd);
}
link = bpf_program__attach_perf_event(prog, pfd);
- if (IS_ERR(link)) {
+ err = libbpf_get_error(link);
+ if (err) {
close(pfd);
- err = PTR_ERR(link);
pr_warn("prog '%s': failed to attach to %s '%s': %s\n",
prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
- return link;
+ return libbpf_err_ptr(err);
}
return link;
}
@@ -10051,17 +10360,17 @@ struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
prog->name, retprobe ? "uretprobe" : "uprobe",
binary_path, func_offset,
libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
- return ERR_PTR(pfd);
+ return libbpf_err_ptr(pfd);
}
link = bpf_program__attach_perf_event(prog, pfd);
- if (IS_ERR(link)) {
+ err = libbpf_get_error(link);
+ if (err) {
close(pfd);
- err = PTR_ERR(link);
pr_warn("prog '%s': failed to attach to %s '%s:0x%zx': %s\n",
prog->name, retprobe ? "uretprobe" : "uprobe",
binary_path, func_offset,
libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
- return link;
+ return libbpf_err_ptr(err);
}
return link;
}
@@ -10129,16 +10438,16 @@ struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,
pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n",
prog->name, tp_category, tp_name,
libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
- return ERR_PTR(pfd);
+ return libbpf_err_ptr(pfd);
}
link = bpf_program__attach_perf_event(prog, pfd);
- if (IS_ERR(link)) {
+ err = libbpf_get_error(link);
+ if (err) {
close(pfd);
- err = PTR_ERR(link);
pr_warn("prog '%s': failed to attach to tracepoint '%s/%s': %s\n",
prog->name, tp_category, tp_name,
libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
- return link;
+ return libbpf_err_ptr(err);
}
return link;
}
@@ -10151,20 +10460,19 @@ static struct bpf_link *attach_tp(const struct bpf_sec_def *sec,
sec_name = strdup(prog->sec_name);
if (!sec_name)
- return ERR_PTR(-ENOMEM);
+ return libbpf_err_ptr(-ENOMEM);
/* extract "tp/<category>/<name>" */
tp_cat = sec_name + sec->len;
tp_name = strchr(tp_cat, '/');
if (!tp_name) {
- link = ERR_PTR(-EINVAL);
- goto out;
+ free(sec_name);
+ return libbpf_err_ptr(-EINVAL);
}
*tp_name = '\0';
tp_name++;
link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name);
-out:
free(sec_name);
return link;
}
@@ -10179,12 +10487,12 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
prog_fd = bpf_program__fd(prog);
if (prog_fd < 0) {
pr_warn("prog '%s': can't attach before loaded\n", prog->name);
- return ERR_PTR(-EINVAL);
+ return libbpf_err_ptr(-EINVAL);
}
link = calloc(1, sizeof(*link));
if (!link)
- return ERR_PTR(-ENOMEM);
+ return libbpf_err_ptr(-ENOMEM);
link->detach = &bpf_link__detach_fd;
pfd = bpf_raw_tracepoint_open(tp_name, prog_fd);
@@ -10193,7 +10501,7 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
free(link);
pr_warn("prog '%s': failed to attach to raw tracepoint '%s': %s\n",
prog->name, tp_name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
- return ERR_PTR(pfd);
+ return libbpf_err_ptr(pfd);
}
link->fd = pfd;
return link;
@@ -10217,12 +10525,12 @@ static struct bpf_link *bpf_program__attach_btf_id(struct bpf_program *prog)
prog_fd = bpf_program__fd(prog);
if (prog_fd < 0) {
pr_warn("prog '%s': can't attach before loaded\n", prog->name);
- return ERR_PTR(-EINVAL);
+ return libbpf_err_ptr(-EINVAL);
}
link = calloc(1, sizeof(*link));
if (!link)
- return ERR_PTR(-ENOMEM);
+ return libbpf_err_ptr(-ENOMEM);
link->detach = &bpf_link__detach_fd;
pfd = bpf_raw_tracepoint_open(NULL, prog_fd);
@@ -10231,7 +10539,7 @@ static struct bpf_link *bpf_program__attach_btf_id(struct bpf_program *prog)
free(link);
pr_warn("prog '%s': failed to attach: %s\n",
prog->name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
- return ERR_PTR(pfd);
+ return libbpf_err_ptr(pfd);
}
link->fd = pfd;
return (struct bpf_link *)link;
@@ -10259,12 +10567,6 @@ static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec,
return bpf_program__attach_lsm(prog);
}
-static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
- struct bpf_program *prog)
-{
- return bpf_program__attach_iter(prog, NULL);
-}
-
static struct bpf_link *
bpf_program__attach_fd(struct bpf_program *prog, int target_fd, int btf_id,
const char *target_name)
@@ -10279,12 +10581,12 @@ bpf_program__attach_fd(struct bpf_program *prog, int target_fd, int btf_id,
prog_fd = bpf_program__fd(prog);
if (prog_fd < 0) {
pr_warn("prog '%s': can't attach before loaded\n", prog->name);
- return ERR_PTR(-EINVAL);
+ return libbpf_err_ptr(-EINVAL);
}
link = calloc(1, sizeof(*link));
if (!link)
- return ERR_PTR(-ENOMEM);
+ return libbpf_err_ptr(-ENOMEM);
link->detach = &bpf_link__detach_fd;
attach_type = bpf_program__get_expected_attach_type(prog);
@@ -10295,7 +10597,7 @@ bpf_program__attach_fd(struct bpf_program *prog, int target_fd, int btf_id,
pr_warn("prog '%s': failed to attach to %s: %s\n",
prog->name, target_name,
libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
- return ERR_PTR(link_fd);
+ return libbpf_err_ptr(link_fd);
}
link->fd = link_fd;
return link;
@@ -10328,19 +10630,19 @@ struct bpf_link *bpf_program__attach_freplace(struct bpf_program *prog,
if (!!target_fd != !!attach_func_name) {
pr_warn("prog '%s': supply none or both of target_fd and attach_func_name\n",
prog->name);
- return ERR_PTR(-EINVAL);
+ return libbpf_err_ptr(-EINVAL);
}
if (prog->type != BPF_PROG_TYPE_EXT) {
pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace",
prog->name);
- return ERR_PTR(-EINVAL);
+ return libbpf_err_ptr(-EINVAL);
}
if (target_fd) {
btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd);
if (btf_id < 0)
- return ERR_PTR(btf_id);
+ return libbpf_err_ptr(btf_id);
return bpf_program__attach_fd(prog, target_fd, btf_id, "freplace");
} else {
@@ -10362,7 +10664,7 @@ bpf_program__attach_iter(struct bpf_program *prog,
__u32 target_fd = 0;
if (!OPTS_VALID(opts, bpf_iter_attach_opts))
- return ERR_PTR(-EINVAL);
+ return libbpf_err_ptr(-EINVAL);
link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0);
link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0);
@@ -10370,12 +10672,12 @@ bpf_program__attach_iter(struct bpf_program *prog,
prog_fd = bpf_program__fd(prog);
if (prog_fd < 0) {
pr_warn("prog '%s': can't attach before loaded\n", prog->name);
- return ERR_PTR(-EINVAL);
+ return libbpf_err_ptr(-EINVAL);
}
link = calloc(1, sizeof(*link));
if (!link)
- return ERR_PTR(-ENOMEM);
+ return libbpf_err_ptr(-ENOMEM);
link->detach = &bpf_link__detach_fd;
link_fd = bpf_link_create(prog_fd, target_fd, BPF_TRACE_ITER,
@@ -10385,19 +10687,25 @@ bpf_program__attach_iter(struct bpf_program *prog,
free(link);
pr_warn("prog '%s': failed to attach to iterator: %s\n",
prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
- return ERR_PTR(link_fd);
+ return libbpf_err_ptr(link_fd);
}
link->fd = link_fd;
return link;
}
+static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
+ struct bpf_program *prog)
+{
+ return bpf_program__attach_iter(prog, NULL);
+}
+
struct bpf_link *bpf_program__attach(struct bpf_program *prog)
{
const struct bpf_sec_def *sec_def;
sec_def = find_sec_def(prog->sec_name);
if (!sec_def || !sec_def->attach_fn)
- return ERR_PTR(-ESRCH);
+ return libbpf_err_ptr(-ESRCH);
return sec_def->attach_fn(sec_def, prog);
}
@@ -10420,11 +10728,11 @@ struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map)
int err;
if (!bpf_map__is_struct_ops(map) || map->fd == -1)
- return ERR_PTR(-EINVAL);
+ return libbpf_err_ptr(-EINVAL);
link = calloc(1, sizeof(*link));
if (!link)
- return ERR_PTR(-EINVAL);
+ return libbpf_err_ptr(-EINVAL);
st_ops = map->st_ops;
for (i = 0; i < btf_vlen(st_ops->type); i++) {
@@ -10444,7 +10752,7 @@ struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map)
if (err) {
err = -errno;
free(link);
- return ERR_PTR(err);
+ return libbpf_err_ptr(err);
}
link->detach = bpf_link__detach_struct_ops;
@@ -10498,7 +10806,7 @@ bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
}
ring_buffer_write_tail(header, data_tail);
- return ret;
+ return libbpf_err(ret);
}
struct perf_buffer;
@@ -10651,7 +10959,7 @@ struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
p.lost_cb = opts ? opts->lost_cb : NULL;
p.ctx = opts ? opts->ctx : NULL;
- return __perf_buffer__new(map_fd, page_cnt, &p);
+ return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
}
struct perf_buffer *
@@ -10667,7 +10975,7 @@ perf_buffer__new_raw(int map_fd, size_t page_cnt,
p.cpus = opts->cpus;
p.map_keys = opts->map_keys;
- return __perf_buffer__new(map_fd, page_cnt, &p);
+ return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
}
static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
@@ -10888,16 +11196,19 @@ int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
int i, cnt, err;
cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms);
+ if (cnt < 0)
+ return libbpf_err_errno(cnt);
+
for (i = 0; i < cnt; i++) {
struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr;
err = perf_buffer__process_records(pb, cpu_buf);
if (err) {
pr_warn("error while processing records: %d\n", err);
- return err;
+ return libbpf_err(err);
}
}
- return cnt < 0 ? -errno : cnt;
+ return cnt;
}
/* Return number of PERF_EVENT_ARRAY map slots set up by this perf_buffer
@@ -10918,11 +11229,11 @@ int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx)
struct perf_cpu_buf *cpu_buf;
if (buf_idx >= pb->cpu_cnt)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
cpu_buf = pb->cpu_bufs[buf_idx];
if (!cpu_buf)
- return -ENOENT;
+ return libbpf_err(-ENOENT);
return cpu_buf->fd;
}
@@ -10940,11 +11251,11 @@ int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx)
struct perf_cpu_buf *cpu_buf;
if (buf_idx >= pb->cpu_cnt)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
cpu_buf = pb->cpu_bufs[buf_idx];
if (!cpu_buf)
- return -ENOENT;
+ return libbpf_err(-ENOENT);
return perf_buffer__process_records(pb, cpu_buf);
}
@@ -10962,7 +11273,7 @@ int perf_buffer__consume(struct perf_buffer *pb)
err = perf_buffer__process_records(pb, cpu_buf);
if (err) {
pr_warn("perf_buffer: failed to process records in buffer #%d: %d\n", i, err);
- return err;
+ return libbpf_err(err);
}
}
return 0;
@@ -11074,13 +11385,13 @@ bpf_program__get_prog_info_linear(int fd, __u64 arrays)
void *ptr;
if (arrays >> BPF_PROG_INFO_LAST_ARRAY)
- return ERR_PTR(-EINVAL);
+ return libbpf_err_ptr(-EINVAL);
/* step 1: get array dimensions */
err = bpf_obj_get_info_by_fd(fd, &info, &info_len);
if (err) {
pr_debug("can't get prog info: %s", strerror(errno));
- return ERR_PTR(-EFAULT);
+ return libbpf_err_ptr(-EFAULT);
}
/* step 2: calculate total size of all arrays */
@@ -11112,7 +11423,7 @@ bpf_program__get_prog_info_linear(int fd, __u64 arrays)
data_len = roundup(data_len, sizeof(__u64));
info_linear = malloc(sizeof(struct bpf_prog_info_linear) + data_len);
if (!info_linear)
- return ERR_PTR(-ENOMEM);
+ return libbpf_err_ptr(-ENOMEM);
/* step 4: fill data to info_linear->info */
info_linear->arrays = arrays;
@@ -11144,7 +11455,7 @@ bpf_program__get_prog_info_linear(int fd, __u64 arrays)
if (err) {
pr_debug("can't get prog info: %s", strerror(errno));
free(info_linear);
- return ERR_PTR(-EFAULT);
+ return libbpf_err_ptr(-EFAULT);
}
/* step 6: verify the data */
@@ -11223,26 +11534,26 @@ int bpf_program__set_attach_target(struct bpf_program *prog,
int btf_obj_fd = 0, btf_id = 0, err;
if (!prog || attach_prog_fd < 0 || !attach_func_name)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (prog->obj->loaded)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (attach_prog_fd) {
btf_id = libbpf_find_prog_btf_id(attach_func_name,
attach_prog_fd);
if (btf_id < 0)
- return btf_id;
+ return libbpf_err(btf_id);
} else {
/* load btf_vmlinux, if not yet */
err = bpf_object__load_vmlinux_btf(prog->obj, true);
if (err)
- return err;
+ return libbpf_err(err);
err = find_kernel_btf_id(prog->obj, attach_func_name,
prog->expected_attach_type,
&btf_obj_fd, &btf_id);
if (err)
- return err;
+ return libbpf_err(err);
}
prog->attach_btf_id = btf_id;
@@ -11341,7 +11652,7 @@ int libbpf_num_possible_cpus(void)
err = parse_cpu_mask_file(fcpu, &mask, &n);
if (err)
- return err;
+ return libbpf_err(err);
tmp_cpus = 0;
for (i = 0; i < n; i++) {
@@ -11361,7 +11672,7 @@ int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
.object_name = s->name,
);
struct bpf_object *obj;
- int i;
+ int i, err;
/* Attempt to preserve opts->object_name, unless overriden by user
* explicitly. Overwriting object name for skeletons is discouraged,
@@ -11376,10 +11687,11 @@ int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
}
obj = bpf_object__open_mem(s->data, s->data_sz, &skel_opts);
- if (IS_ERR(obj)) {
- pr_warn("failed to initialize skeleton BPF object '%s': %ld\n",
- s->name, PTR_ERR(obj));
- return PTR_ERR(obj);
+ err = libbpf_get_error(obj);
+ if (err) {
+ pr_warn("failed to initialize skeleton BPF object '%s': %d\n",
+ s->name, err);
+ return libbpf_err(err);
}
*s->obj = obj;
@@ -11392,7 +11704,7 @@ int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
*map = bpf_object__find_map_by_name(obj, name);
if (!*map) {
pr_warn("failed to find skeleton map '%s'\n", name);
- return -ESRCH;
+ return libbpf_err(-ESRCH);
}
/* externs shouldn't be pre-setup from user code */
@@ -11407,7 +11719,7 @@ int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
*prog = bpf_object__find_program_by_name(obj, name);
if (!*prog) {
pr_warn("failed to find skeleton program '%s'\n", name);
- return -ESRCH;
+ return libbpf_err(-ESRCH);
}
}
@@ -11421,7 +11733,7 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
err = bpf_object__load(*s->obj);
if (err) {
pr_warn("failed to load BPF skeleton '%s': %d\n", s->name, err);
- return err;
+ return libbpf_err(err);
}
for (i = 0; i < s->map_cnt; i++) {
@@ -11460,7 +11772,7 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
*mmaped = NULL;
pr_warn("failed to re-mmap() map '%s': %d\n",
bpf_map__name(map), err);
- return err;
+ return libbpf_err(err);
}
}
@@ -11469,7 +11781,7 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
{
- int i;
+ int i, err;
for (i = 0; i < s->prog_cnt; i++) {
struct bpf_program *prog = *s->progs[i].prog;
@@ -11484,10 +11796,11 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
continue;
*link = sec_def->attach_fn(sec_def, prog);
- if (IS_ERR(*link)) {
- pr_warn("failed to auto-attach program '%s': %ld\n",
- bpf_program__name(prog), PTR_ERR(*link));
- return PTR_ERR(*link);
+ err = libbpf_get_error(*link);
+ if (err) {
+ pr_warn("failed to auto-attach program '%s': %d\n",
+ bpf_program__name(prog), err);
+ return libbpf_err(err);
}
}
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index bec4e6a6e31d..6e61342ba56c 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -18,6 +18,7 @@
#include <linux/bpf.h>
#include "libbpf_common.h"
+#include "libbpf_legacy.h"
#ifdef __cplusplus
extern "C" {
@@ -471,6 +472,7 @@ LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv,
LIBBPF_API void *bpf_map__priv(const struct bpf_map *map);
LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map,
const void *data, size_t size);
+LIBBPF_API const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize);
LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map);
LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map);
LIBBPF_API int bpf_map__set_pin_path(struct bpf_map *map, const char *path);
@@ -498,6 +500,7 @@ LIBBPF_API int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
LIBBPF_API int bpf_prog_load(const char *file, enum bpf_prog_type type,
struct bpf_object **pobj, int *prog_fd);
+/* XDP related API */
struct xdp_link_info {
__u32 prog_id;
__u32 drv_prog_id;
@@ -520,6 +523,49 @@ LIBBPF_API int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags);
LIBBPF_API int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
size_t info_size, __u32 flags);
+/* TC related API */
+enum bpf_tc_attach_point {
+ BPF_TC_INGRESS = 1 << 0,
+ BPF_TC_EGRESS = 1 << 1,
+ BPF_TC_CUSTOM = 1 << 2,
+};
+
+#define BPF_TC_PARENT(a, b) \
+ ((((a) << 16) & 0xFFFF0000U) | ((b) & 0x0000FFFFU))
+
+enum bpf_tc_flags {
+ BPF_TC_F_REPLACE = 1 << 0,
+};
+
+struct bpf_tc_hook {
+ size_t sz;
+ int ifindex;
+ enum bpf_tc_attach_point attach_point;
+ __u32 parent;
+ size_t :0;
+};
+#define bpf_tc_hook__last_field parent
+
+struct bpf_tc_opts {
+ size_t sz;
+ int prog_fd;
+ __u32 flags;
+ __u32 prog_id;
+ __u32 handle;
+ __u32 priority;
+ size_t :0;
+};
+#define bpf_tc_opts__last_field priority
+
+LIBBPF_API int bpf_tc_hook_create(struct bpf_tc_hook *hook);
+LIBBPF_API int bpf_tc_hook_destroy(struct bpf_tc_hook *hook);
+LIBBPF_API int bpf_tc_attach(const struct bpf_tc_hook *hook,
+ struct bpf_tc_opts *opts);
+LIBBPF_API int bpf_tc_detach(const struct bpf_tc_hook *hook,
+ const struct bpf_tc_opts *opts);
+LIBBPF_API int bpf_tc_query(const struct bpf_tc_hook *hook,
+ struct bpf_tc_opts *opts);
+
/* Ring buffer APIs */
struct ring_buffer;
@@ -756,6 +802,18 @@ LIBBPF_API int bpf_object__attach_skeleton(struct bpf_object_skeleton *s);
LIBBPF_API void bpf_object__detach_skeleton(struct bpf_object_skeleton *s);
LIBBPF_API void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s);
+struct gen_loader_opts {
+ size_t sz; /* size of this struct, for forward/backward compatiblity */
+ const char *data;
+ const char *insns;
+ __u32 data_sz;
+ __u32 insns_sz;
+};
+
+#define gen_loader_opts__last_field insns_sz
+LIBBPF_API int bpf_object__gen_loader(struct bpf_object *obj,
+ struct gen_loader_opts *opts);
+
enum libbpf_tristate {
TRI_NO = 0,
TRI_YES = 1,
@@ -768,10 +826,18 @@ struct bpf_linker_opts {
};
#define bpf_linker_opts__last_field sz
+struct bpf_linker_file_opts {
+ /* size of this struct, for forward/backward compatiblity */
+ size_t sz;
+};
+#define bpf_linker_file_opts__last_field sz
+
struct bpf_linker;
LIBBPF_API struct bpf_linker *bpf_linker__new(const char *filename, struct bpf_linker_opts *opts);
-LIBBPF_API int bpf_linker__add_file(struct bpf_linker *linker, const char *filename);
+LIBBPF_API int bpf_linker__add_file(struct bpf_linker *linker,
+ const char *filename,
+ const struct bpf_linker_file_opts *opts);
LIBBPF_API int bpf_linker__finalize(struct bpf_linker *linker);
LIBBPF_API void bpf_linker__free(struct bpf_linker *linker);
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index b9b29baf1df8..944c99d1ded3 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -361,4 +361,17 @@ LIBBPF_0.4.0 {
bpf_linker__new;
bpf_map__inner_map;
bpf_object__set_kversion;
+ bpf_tc_attach;
+ bpf_tc_detach;
+ bpf_tc_hook_create;
+ bpf_tc_hook_destroy;
+ bpf_tc_query;
} LIBBPF_0.3.0;
+
+LIBBPF_0.5.0 {
+ global:
+ bpf_map__initial_value;
+ bpf_map_lookup_and_delete_elem_flags;
+ bpf_object__gen_loader;
+ libbpf_set_strict_mode;
+} LIBBPF_0.4.0;
diff --git a/tools/lib/bpf/libbpf_errno.c b/tools/lib/bpf/libbpf_errno.c
index 0afb51f7a919..96f67a772a1b 100644
--- a/tools/lib/bpf/libbpf_errno.c
+++ b/tools/lib/bpf/libbpf_errno.c
@@ -12,6 +12,7 @@
#include <string.h>
#include "libbpf.h"
+#include "libbpf_internal.h"
/* make sure libbpf doesn't use kernel-only integer typedefs */
#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
@@ -39,7 +40,7 @@ static const char *libbpf_strerror_table[NR_ERRNO] = {
int libbpf_strerror(int err, char *buf, size_t size)
{
if (!buf || !size)
- return -1;
+ return libbpf_err(-EINVAL);
err = err > 0 ? err : -err;
@@ -48,7 +49,7 @@ int libbpf_strerror(int err, char *buf, size_t size)
ret = strerror_r(err, buf, size);
buf[size - 1] = '\0';
- return ret;
+ return libbpf_err_errno(ret);
}
if (err < __LIBBPF_ERRNO__END) {
@@ -62,5 +63,5 @@ int libbpf_strerror(int err, char *buf, size_t size)
snprintf(buf, size, "Unknown libbpf error %d", err);
buf[size - 1] = '\0';
- return -1;
+ return libbpf_err(-ENOENT);
}
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index ee426226928f..016ca7cb4f8a 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -11,6 +11,9 @@
#include <stdlib.h>
#include <limits.h>
+#include <errno.h>
+#include <linux/err.h>
+#include "libbpf_legacy.h"
/* make sure libbpf doesn't use kernel-only integer typedefs */
#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
@@ -28,6 +31,12 @@
#ifndef R_BPF_64_64
#define R_BPF_64_64 1
#endif
+#ifndef R_BPF_64_ABS64
+#define R_BPF_64_ABS64 2
+#endif
+#ifndef R_BPF_64_ABS32
+#define R_BPF_64_ABS32 3
+#endif
#ifndef R_BPF_64_32
#define R_BPF_64_32 10
#endif
@@ -41,6 +50,11 @@
#define ELF_C_READ_MMAP ELF_C_READ
#endif
+/* Older libelf all end up in this expression, for both 32 and 64 bit */
+#ifndef GELF_ST_VISIBILITY
+#define GELF_ST_VISIBILITY(o) ((o) & 0x03)
+#endif
+
#define BTF_INFO_ENC(kind, kind_flag, vlen) \
((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
#define BTF_TYPE_ENC(name, info, size_or_type) (name), (info), (size_or_type)
@@ -258,6 +272,8 @@ int bpf_object__section_size(const struct bpf_object *obj, const char *name,
int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
__u32 *off);
struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf);
+void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type,
+ const char **prefix, int *kind);
struct btf_ext_info {
/*
@@ -428,4 +444,54 @@ int btf_type_visit_str_offs(struct btf_type *t, str_off_visit_fn visit, void *ct
int btf_ext_visit_type_ids(struct btf_ext *btf_ext, type_id_visit_fn visit, void *ctx);
int btf_ext_visit_str_offs(struct btf_ext *btf_ext, str_off_visit_fn visit, void *ctx);
+extern enum libbpf_strict_mode libbpf_mode;
+
+/* handle direct returned errors */
+static inline int libbpf_err(int ret)
+{
+ if (ret < 0)
+ errno = -ret;
+ return ret;
+}
+
+/* handle errno-based (e.g., syscall or libc) errors according to libbpf's
+ * strict mode settings
+ */
+static inline int libbpf_err_errno(int ret)
+{
+ if (libbpf_mode & LIBBPF_STRICT_DIRECT_ERRS)
+ /* errno is already assumed to be set on error */
+ return ret < 0 ? -errno : ret;
+
+ /* legacy: on error return -1 directly and don't touch errno */
+ return ret;
+}
+
+/* handle error for pointer-returning APIs, err is assumed to be < 0 always */
+static inline void *libbpf_err_ptr(int err)
+{
+ /* set errno on error, this doesn't break anything */
+ errno = -err;
+
+ if (libbpf_mode & LIBBPF_STRICT_CLEAN_PTRS)
+ return NULL;
+
+ /* legacy: encode err as ptr */
+ return ERR_PTR(err);
+}
+
+/* handle pointer-returning APIs' error handling */
+static inline void *libbpf_ptr(void *ret)
+{
+ /* set errno on error, this doesn't break anything */
+ if (IS_ERR(ret))
+ errno = -PTR_ERR(ret);
+
+ if (libbpf_mode & LIBBPF_STRICT_CLEAN_PTRS)
+ return IS_ERR(ret) ? NULL : ret;
+
+ /* legacy: pass-through original pointer */
+ return ret;
+}
+
#endif /* __LIBBPF_LIBBPF_INTERNAL_H */
diff --git a/tools/lib/bpf/libbpf_legacy.h b/tools/lib/bpf/libbpf_legacy.h
new file mode 100644
index 000000000000..df0d03dcffab
--- /dev/null
+++ b/tools/lib/bpf/libbpf_legacy.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+/*
+ * Libbpf legacy APIs (either discouraged or deprecated, as mentioned in [0])
+ *
+ * [0] https://docs.google.com/document/d/1UyjTZuPFWiPFyKk1tV5an11_iaRuec6U-ZESZ54nNTY
+ *
+ * Copyright (C) 2021 Facebook
+ */
+#ifndef __LIBBPF_LEGACY_BPF_H
+#define __LIBBPF_LEGACY_BPF_H
+
+#include <linux/bpf.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include "libbpf_common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum libbpf_strict_mode {
+ /* Turn on all supported strict features of libbpf to simulate libbpf
+ * v1.0 behavior.
+ * This will be the default behavior in libbpf v1.0.
+ */
+ LIBBPF_STRICT_ALL = 0xffffffff,
+
+ /*
+ * Disable any libbpf 1.0 behaviors. This is the default before libbpf
+ * v1.0. It won't be supported anymore in v1.0, please update your
+ * code so that it handles LIBBPF_STRICT_ALL mode before libbpf v1.0.
+ */
+ LIBBPF_STRICT_NONE = 0x00,
+ /*
+ * Return NULL pointers on error, not ERR_PTR(err).
+ * Additionally, libbpf also always sets errno to corresponding Exx
+ * (positive) error code.
+ */
+ LIBBPF_STRICT_CLEAN_PTRS = 0x01,
+ /*
+ * Return actual error codes from low-level APIs directly, not just -1.
+ * Additionally, libbpf also always sets errno to corresponding Exx
+ * (positive) error code.
+ */
+ LIBBPF_STRICT_DIRECT_ERRS = 0x02,
+
+ __LIBBPF_STRICT_LAST,
+};
+
+LIBBPF_API int libbpf_set_strict_mode(enum libbpf_strict_mode mode);
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* __LIBBPF_LEGACY_BPF_H */
diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c
index 9de084b1c699..10911a8cad0f 100644
--- a/tools/lib/bpf/linker.c
+++ b/tools/lib/bpf/linker.c
@@ -158,7 +158,9 @@ struct bpf_linker {
static int init_output_elf(struct bpf_linker *linker, const char *file);
-static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, struct src_obj *obj);
+static int linker_load_obj_file(struct bpf_linker *linker, const char *filename,
+ const struct bpf_linker_file_opts *opts,
+ struct src_obj *obj);
static int linker_sanity_check_elf(struct src_obj *obj);
static int linker_sanity_check_elf_symtab(struct src_obj *obj, struct src_sec *sec);
static int linker_sanity_check_elf_relos(struct src_obj *obj, struct src_sec *sec);
@@ -218,16 +220,16 @@ struct bpf_linker *bpf_linker__new(const char *filename, struct bpf_linker_opts
int err;
if (!OPTS_VALID(opts, bpf_linker_opts))
- return NULL;
+ return errno = EINVAL, NULL;
if (elf_version(EV_CURRENT) == EV_NONE) {
pr_warn_elf("libelf initialization failed");
- return NULL;
+ return errno = EINVAL, NULL;
}
linker = calloc(1, sizeof(*linker));
if (!linker)
- return NULL;
+ return errno = ENOMEM, NULL;
linker->fd = -1;
@@ -239,7 +241,7 @@ struct bpf_linker *bpf_linker__new(const char *filename, struct bpf_linker_opts
err_out:
bpf_linker__free(linker);
- return NULL;
+ return errno = -err, NULL;
}
static struct dst_sec *add_dst_sec(struct bpf_linker *linker, const char *sec_name)
@@ -435,15 +437,19 @@ static int init_output_elf(struct bpf_linker *linker, const char *file)
return 0;
}
-int bpf_linker__add_file(struct bpf_linker *linker, const char *filename)
+int bpf_linker__add_file(struct bpf_linker *linker, const char *filename,
+ const struct bpf_linker_file_opts *opts)
{
struct src_obj obj = {};
int err = 0;
+ if (!OPTS_VALID(opts, bpf_linker_file_opts))
+ return libbpf_err(-EINVAL);
+
if (!linker->elf)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
- err = err ?: linker_load_obj_file(linker, filename, &obj);
+ err = err ?: linker_load_obj_file(linker, filename, opts, &obj);
err = err ?: linker_append_sec_data(linker, &obj);
err = err ?: linker_append_elf_syms(linker, &obj);
err = err ?: linker_append_elf_relos(linker, &obj);
@@ -461,7 +467,7 @@ int bpf_linker__add_file(struct bpf_linker *linker, const char *filename)
if (obj.fd >= 0)
close(obj.fd);
- return err;
+ return libbpf_err(err);
}
static bool is_dwarf_sec_name(const char *name)
@@ -529,7 +535,9 @@ static struct src_sec *add_src_sec(struct src_obj *obj, const char *sec_name)
return sec;
}
-static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, struct src_obj *obj)
+static int linker_load_obj_file(struct bpf_linker *linker, const char *filename,
+ const struct bpf_linker_file_opts *opts,
+ struct src_obj *obj)
{
#if __BYTE_ORDER == __LITTLE_ENDIAN
const int host_endianness = ELFDATA2LSB;
@@ -884,7 +892,8 @@ static int linker_sanity_check_elf_relos(struct src_obj *obj, struct src_sec *se
size_t sym_idx = ELF64_R_SYM(relo->r_info);
size_t sym_type = ELF64_R_TYPE(relo->r_info);
- if (sym_type != R_BPF_64_64 && sym_type != R_BPF_64_32) {
+ if (sym_type != R_BPF_64_64 && sym_type != R_BPF_64_32 &&
+ sym_type != R_BPF_64_ABS64 && sym_type != R_BPF_64_ABS32) {
pr_warn("ELF relo #%d in section #%zu has unexpected type %zu in %s\n",
i, sec->sec_idx, sym_type, obj->filename);
return -EINVAL;
@@ -1780,7 +1789,7 @@ static void sym_update_visibility(Elf64_Sym *sym, int sym_vis)
/* libelf doesn't provide setters for ST_VISIBILITY,
* but it is stored in the lower 2 bits of st_other
*/
- sym->st_other &= 0x03;
+ sym->st_other &= ~0x03;
sym->st_other |= sym_vis;
}
@@ -2539,11 +2548,11 @@ int bpf_linker__finalize(struct bpf_linker *linker)
int err, i;
if (!linker->elf)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
err = finalize_btf(linker);
if (err)
- return err;
+ return libbpf_err(err);
/* Finalize strings */
strs_sz = strset__data_size(linker->strtab_strs);
@@ -2575,14 +2584,14 @@ int bpf_linker__finalize(struct bpf_linker *linker)
if (elf_update(linker->elf, ELF_C_NULL) < 0) {
err = -errno;
pr_warn_elf("failed to finalize ELF layout");
- return err;
+ return libbpf_err(err);
}
/* Write out final ELF contents */
if (elf_update(linker->elf, ELF_C_WRITE) < 0) {
err = -errno;
pr_warn_elf("failed to write ELF contents");
- return err;
+ return libbpf_err(err);
}
elf_end(linker->elf);
diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c
index d2cb28e9ef52..39f25e09b51e 100644
--- a/tools/lib/bpf/netlink.c
+++ b/tools/lib/bpf/netlink.c
@@ -4,7 +4,10 @@
#include <stdlib.h>
#include <memory.h>
#include <unistd.h>
+#include <arpa/inet.h>
#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/pkt_cls.h>
#include <linux/rtnetlink.h>
#include <sys/socket.h>
#include <errno.h>
@@ -73,9 +76,20 @@ cleanup:
return ret;
}
-static int bpf_netlink_recv(int sock, __u32 nl_pid, int seq,
- __dump_nlmsg_t _fn, libbpf_dump_nlmsg_t fn,
- void *cookie)
+static void libbpf_netlink_close(int sock)
+{
+ close(sock);
+}
+
+enum {
+ NL_CONT,
+ NL_NEXT,
+ NL_DONE,
+};
+
+static int libbpf_netlink_recv(int sock, __u32 nl_pid, int seq,
+ __dump_nlmsg_t _fn, libbpf_dump_nlmsg_t fn,
+ void *cookie)
{
bool multipart = true;
struct nlmsgerr *err;
@@ -84,6 +98,7 @@ static int bpf_netlink_recv(int sock, __u32 nl_pid, int seq,
int len, ret;
while (multipart) {
+start:
multipart = false;
len = recv(sock, buf, sizeof(buf), 0);
if (len < 0) {
@@ -121,8 +136,16 @@ static int bpf_netlink_recv(int sock, __u32 nl_pid, int seq,
}
if (_fn) {
ret = _fn(nh, fn, cookie);
- if (ret)
+ switch (ret) {
+ case NL_CONT:
+ break;
+ case NL_NEXT:
+ goto start;
+ case NL_DONE:
+ return 0;
+ default:
return ret;
+ }
}
}
}
@@ -131,95 +154,92 @@ done:
return ret;
}
-static int __bpf_set_link_xdp_fd_replace(int ifindex, int fd, int old_fd,
- __u32 flags)
+static int libbpf_netlink_send_recv(struct libbpf_nla_req *req,
+ __dump_nlmsg_t parse_msg,
+ libbpf_dump_nlmsg_t parse_attr,
+ void *cookie)
{
- int sock, seq = 0, ret;
- struct nlattr *nla, *nla_xdp;
- struct {
- struct nlmsghdr nh;
- struct ifinfomsg ifinfo;
- char attrbuf[64];
- } req;
__u32 nl_pid = 0;
+ int sock, ret;
sock = libbpf_netlink_open(&nl_pid);
if (sock < 0)
return sock;
+ req->nh.nlmsg_pid = 0;
+ req->nh.nlmsg_seq = time(NULL);
+
+ if (send(sock, req, req->nh.nlmsg_len, 0) < 0) {
+ ret = -errno;
+ goto out;
+ }
+
+ ret = libbpf_netlink_recv(sock, nl_pid, req->nh.nlmsg_seq,
+ parse_msg, parse_attr, cookie);
+out:
+ libbpf_netlink_close(sock);
+ return ret;
+}
+
+static int __bpf_set_link_xdp_fd_replace(int ifindex, int fd, int old_fd,
+ __u32 flags)
+{
+ struct nlattr *nla;
+ int ret;
+ struct libbpf_nla_req req;
+
memset(&req, 0, sizeof(req));
- req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
- req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
- req.nh.nlmsg_type = RTM_SETLINK;
- req.nh.nlmsg_pid = 0;
- req.nh.nlmsg_seq = ++seq;
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_type = RTM_SETLINK;
req.ifinfo.ifi_family = AF_UNSPEC;
- req.ifinfo.ifi_index = ifindex;
-
- /* started nested attribute for XDP */
- nla = (struct nlattr *)(((char *)&req)
- + NLMSG_ALIGN(req.nh.nlmsg_len));
- nla->nla_type = NLA_F_NESTED | IFLA_XDP;
- nla->nla_len = NLA_HDRLEN;
-
- /* add XDP fd */
- nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
- nla_xdp->nla_type = IFLA_XDP_FD;
- nla_xdp->nla_len = NLA_HDRLEN + sizeof(int);
- memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd));
- nla->nla_len += nla_xdp->nla_len;
-
- /* if user passed in any flags, add those too */
+ req.ifinfo.ifi_index = ifindex;
+
+ nla = nlattr_begin_nested(&req, IFLA_XDP);
+ if (!nla)
+ return -EMSGSIZE;
+ ret = nlattr_add(&req, IFLA_XDP_FD, &fd, sizeof(fd));
+ if (ret < 0)
+ return ret;
if (flags) {
- nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
- nla_xdp->nla_type = IFLA_XDP_FLAGS;
- nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags);
- memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags));
- nla->nla_len += nla_xdp->nla_len;
+ ret = nlattr_add(&req, IFLA_XDP_FLAGS, &flags, sizeof(flags));
+ if (ret < 0)
+ return ret;
}
-
if (flags & XDP_FLAGS_REPLACE) {
- nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
- nla_xdp->nla_type = IFLA_XDP_EXPECTED_FD;
- nla_xdp->nla_len = NLA_HDRLEN + sizeof(old_fd);
- memcpy((char *)nla_xdp + NLA_HDRLEN, &old_fd, sizeof(old_fd));
- nla->nla_len += nla_xdp->nla_len;
+ ret = nlattr_add(&req, IFLA_XDP_EXPECTED_FD, &old_fd,
+ sizeof(old_fd));
+ if (ret < 0)
+ return ret;
}
+ nlattr_end_nested(&req, nla);
- req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
-
- if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
- ret = -errno;
- goto cleanup;
- }
- ret = bpf_netlink_recv(sock, nl_pid, seq, NULL, NULL, NULL);
-
-cleanup:
- close(sock);
- return ret;
+ return libbpf_netlink_send_recv(&req, NULL, NULL, NULL);
}
int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags,
const struct bpf_xdp_set_link_opts *opts)
{
- int old_fd = -1;
+ int old_fd = -1, ret;
if (!OPTS_VALID(opts, bpf_xdp_set_link_opts))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (OPTS_HAS(opts, old_fd)) {
old_fd = OPTS_GET(opts, old_fd, -1);
flags |= XDP_FLAGS_REPLACE;
}
- return __bpf_set_link_xdp_fd_replace(ifindex, fd,
- old_fd,
- flags);
+ ret = __bpf_set_link_xdp_fd_replace(ifindex, fd, old_fd, flags);
+ return libbpf_err(ret);
}
int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags)
{
- return __bpf_set_link_xdp_fd_replace(ifindex, fd, 0, flags);
+ int ret;
+
+ ret = __bpf_set_link_xdp_fd_replace(ifindex, fd, 0, flags);
+ return libbpf_err(ret);
}
static int __dump_link_nlmsg(struct nlmsghdr *nlh,
@@ -231,6 +251,7 @@ static int __dump_link_nlmsg(struct nlmsghdr *nlh,
len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi));
attr = (struct nlattr *) ((void *) ifi + NLMSG_ALIGN(sizeof(*ifi)));
+
if (libbpf_nla_parse(tb, IFLA_MAX, attr, len, NULL) != 0)
return -LIBBPF_ERRNO__NLPARSE;
@@ -282,34 +303,33 @@ static int get_xdp_info(void *cookie, void *msg, struct nlattr **tb)
return 0;
}
-static int libbpf_nl_get_link(int sock, unsigned int nl_pid,
- libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie);
-
int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
size_t info_size, __u32 flags)
{
struct xdp_id_md xdp_id = {};
- int sock, ret;
- __u32 nl_pid = 0;
__u32 mask;
+ int ret;
+ struct libbpf_nla_req req = {
+ .nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
+ .nh.nlmsg_type = RTM_GETLINK,
+ .nh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
+ .ifinfo.ifi_family = AF_PACKET,
+ };
if (flags & ~XDP_FLAGS_MASK || !info_size)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
/* Check whether the single {HW,DRV,SKB} mode is set */
flags &= (XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE);
mask = flags - 1;
if (flags && flags & mask)
- return -EINVAL;
-
- sock = libbpf_netlink_open(&nl_pid);
- if (sock < 0)
- return sock;
+ return libbpf_err(-EINVAL);
xdp_id.ifindex = ifindex;
xdp_id.flags = flags;
- ret = libbpf_nl_get_link(sock, nl_pid, get_xdp_info, &xdp_id);
+ ret = libbpf_netlink_send_recv(&req, __dump_link_nlmsg,
+ get_xdp_info, &xdp_id);
if (!ret) {
size_t sz = min(info_size, sizeof(xdp_id.info));
@@ -317,8 +337,7 @@ int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
memset((void *) info + sz, 0, info_size - sz);
}
- close(sock);
- return ret;
+ return libbpf_err(ret);
}
static __u32 get_xdp_id(struct xdp_link_info *info, __u32 flags)
@@ -346,27 +365,394 @@ int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags)
if (!ret)
*prog_id = get_xdp_id(&info, flags);
- return ret;
+ return libbpf_err(ret);
}
-int libbpf_nl_get_link(int sock, unsigned int nl_pid,
- libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie)
+typedef int (*qdisc_config_t)(struct libbpf_nla_req *req);
+
+static int clsact_config(struct libbpf_nla_req *req)
{
- struct {
- struct nlmsghdr nlh;
- struct ifinfomsg ifm;
- } req = {
- .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
- .nlh.nlmsg_type = RTM_GETLINK,
- .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
- .ifm.ifi_family = AF_PACKET,
- };
- int seq = time(NULL);
+ req->tc.tcm_parent = TC_H_CLSACT;
+ req->tc.tcm_handle = TC_H_MAKE(TC_H_CLSACT, 0);
+
+ return nlattr_add(req, TCA_KIND, "clsact", sizeof("clsact"));
+}
+
+static int attach_point_to_config(struct bpf_tc_hook *hook,
+ qdisc_config_t *config)
+{
+ switch (OPTS_GET(hook, attach_point, 0)) {
+ case BPF_TC_INGRESS:
+ case BPF_TC_EGRESS:
+ case BPF_TC_INGRESS | BPF_TC_EGRESS:
+ if (OPTS_GET(hook, parent, 0))
+ return -EINVAL;
+ *config = &clsact_config;
+ return 0;
+ case BPF_TC_CUSTOM:
+ return -EOPNOTSUPP;
+ default:
+ return -EINVAL;
+ }
+}
+
+static int tc_get_tcm_parent(enum bpf_tc_attach_point attach_point,
+ __u32 *parent)
+{
+ switch (attach_point) {
+ case BPF_TC_INGRESS:
+ case BPF_TC_EGRESS:
+ if (*parent)
+ return -EINVAL;
+ *parent = TC_H_MAKE(TC_H_CLSACT,
+ attach_point == BPF_TC_INGRESS ?
+ TC_H_MIN_INGRESS : TC_H_MIN_EGRESS);
+ break;
+ case BPF_TC_CUSTOM:
+ if (!*parent)
+ return -EINVAL;
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int tc_qdisc_modify(struct bpf_tc_hook *hook, int cmd, int flags)
+{
+ qdisc_config_t config;
+ int ret;
+ struct libbpf_nla_req req;
+
+ ret = attach_point_to_config(hook, &config);
+ if (ret < 0)
+ return ret;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
+ req.nh.nlmsg_type = cmd;
+ req.tc.tcm_family = AF_UNSPEC;
+ req.tc.tcm_ifindex = OPTS_GET(hook, ifindex, 0);
+
+ ret = config(&req);
+ if (ret < 0)
+ return ret;
+
+ return libbpf_netlink_send_recv(&req, NULL, NULL, NULL);
+}
+
+static int tc_qdisc_create_excl(struct bpf_tc_hook *hook)
+{
+ return tc_qdisc_modify(hook, RTM_NEWQDISC, NLM_F_CREATE | NLM_F_EXCL);
+}
+
+static int tc_qdisc_delete(struct bpf_tc_hook *hook)
+{
+ return tc_qdisc_modify(hook, RTM_DELQDISC, 0);
+}
+
+int bpf_tc_hook_create(struct bpf_tc_hook *hook)
+{
+ int ret;
+
+ if (!hook || !OPTS_VALID(hook, bpf_tc_hook) ||
+ OPTS_GET(hook, ifindex, 0) <= 0)
+ return libbpf_err(-EINVAL);
+
+ ret = tc_qdisc_create_excl(hook);
+ return libbpf_err(ret);
+}
+
+static int __bpf_tc_detach(const struct bpf_tc_hook *hook,
+ const struct bpf_tc_opts *opts,
+ const bool flush);
+
+int bpf_tc_hook_destroy(struct bpf_tc_hook *hook)
+{
+ if (!hook || !OPTS_VALID(hook, bpf_tc_hook) ||
+ OPTS_GET(hook, ifindex, 0) <= 0)
+ return libbpf_err(-EINVAL);
+
+ switch (OPTS_GET(hook, attach_point, 0)) {
+ case BPF_TC_INGRESS:
+ case BPF_TC_EGRESS:
+ return libbpf_err(__bpf_tc_detach(hook, NULL, true));
+ case BPF_TC_INGRESS | BPF_TC_EGRESS:
+ return libbpf_err(tc_qdisc_delete(hook));
+ case BPF_TC_CUSTOM:
+ return libbpf_err(-EOPNOTSUPP);
+ default:
+ return libbpf_err(-EINVAL);
+ }
+}
+
+struct bpf_cb_ctx {
+ struct bpf_tc_opts *opts;
+ bool processed;
+};
+
+static int __get_tc_info(void *cookie, struct tcmsg *tc, struct nlattr **tb,
+ bool unicast)
+{
+ struct nlattr *tbb[TCA_BPF_MAX + 1];
+ struct bpf_cb_ctx *info = cookie;
+
+ if (!info || !info->opts)
+ return -EINVAL;
+ if (unicast && info->processed)
+ return -EINVAL;
+ if (!tb[TCA_OPTIONS])
+ return NL_CONT;
+
+ libbpf_nla_parse_nested(tbb, TCA_BPF_MAX, tb[TCA_OPTIONS], NULL);
+ if (!tbb[TCA_BPF_ID])
+ return -EINVAL;
+
+ OPTS_SET(info->opts, prog_id, libbpf_nla_getattr_u32(tbb[TCA_BPF_ID]));
+ OPTS_SET(info->opts, handle, tc->tcm_handle);
+ OPTS_SET(info->opts, priority, TC_H_MAJ(tc->tcm_info) >> 16);
+
+ info->processed = true;
+ return unicast ? NL_NEXT : NL_DONE;
+}
+
+static int get_tc_info(struct nlmsghdr *nh, libbpf_dump_nlmsg_t fn,
+ void *cookie)
+{
+ struct tcmsg *tc = NLMSG_DATA(nh);
+ struct nlattr *tb[TCA_MAX + 1];
+
+ libbpf_nla_parse(tb, TCA_MAX,
+ (struct nlattr *)((void *)tc + NLMSG_ALIGN(sizeof(*tc))),
+ NLMSG_PAYLOAD(nh, sizeof(*tc)), NULL);
+ if (!tb[TCA_KIND])
+ return NL_CONT;
+ return __get_tc_info(cookie, tc, tb, nh->nlmsg_flags & NLM_F_ECHO);
+}
+
+static int tc_add_fd_and_name(struct libbpf_nla_req *req, int fd)
+{
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
+ char name[256];
+ int len, ret;
+
+ ret = bpf_obj_get_info_by_fd(fd, &info, &info_len);
+ if (ret < 0)
+ return ret;
- req.nlh.nlmsg_seq = seq;
- if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
+ ret = nlattr_add(req, TCA_BPF_FD, &fd, sizeof(fd));
+ if (ret < 0)
+ return ret;
+ len = snprintf(name, sizeof(name), "%s:[%u]", info.name, info.id);
+ if (len < 0)
return -errno;
+ if (len >= sizeof(name))
+ return -ENAMETOOLONG;
+ return nlattr_add(req, TCA_BPF_NAME, name, len + 1);
+}
+
+int bpf_tc_attach(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts)
+{
+ __u32 protocol, bpf_flags, handle, priority, parent, prog_id, flags;
+ int ret, ifindex, attach_point, prog_fd;
+ struct bpf_cb_ctx info = {};
+ struct libbpf_nla_req req;
+ struct nlattr *nla;
+
+ if (!hook || !opts ||
+ !OPTS_VALID(hook, bpf_tc_hook) ||
+ !OPTS_VALID(opts, bpf_tc_opts))
+ return libbpf_err(-EINVAL);
+
+ ifindex = OPTS_GET(hook, ifindex, 0);
+ parent = OPTS_GET(hook, parent, 0);
+ attach_point = OPTS_GET(hook, attach_point, 0);
+
+ handle = OPTS_GET(opts, handle, 0);
+ priority = OPTS_GET(opts, priority, 0);
+ prog_fd = OPTS_GET(opts, prog_fd, 0);
+ prog_id = OPTS_GET(opts, prog_id, 0);
+ flags = OPTS_GET(opts, flags, 0);
+
+ if (ifindex <= 0 || !prog_fd || prog_id)
+ return libbpf_err(-EINVAL);
+ if (priority > UINT16_MAX)
+ return libbpf_err(-EINVAL);
+ if (flags & ~BPF_TC_F_REPLACE)
+ return libbpf_err(-EINVAL);
+
+ flags = (flags & BPF_TC_F_REPLACE) ? NLM_F_REPLACE : NLM_F_EXCL;
+ protocol = ETH_P_ALL;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE |
+ NLM_F_ECHO | flags;
+ req.nh.nlmsg_type = RTM_NEWTFILTER;
+ req.tc.tcm_family = AF_UNSPEC;
+ req.tc.tcm_ifindex = ifindex;
+ req.tc.tcm_handle = handle;
+ req.tc.tcm_info = TC_H_MAKE(priority << 16, htons(protocol));
+
+ ret = tc_get_tcm_parent(attach_point, &parent);
+ if (ret < 0)
+ return libbpf_err(ret);
+ req.tc.tcm_parent = parent;
+
+ ret = nlattr_add(&req, TCA_KIND, "bpf", sizeof("bpf"));
+ if (ret < 0)
+ return libbpf_err(ret);
+ nla = nlattr_begin_nested(&req, TCA_OPTIONS);
+ if (!nla)
+ return libbpf_err(-EMSGSIZE);
+ ret = tc_add_fd_and_name(&req, prog_fd);
+ if (ret < 0)
+ return libbpf_err(ret);
+ bpf_flags = TCA_BPF_FLAG_ACT_DIRECT;
+ ret = nlattr_add(&req, TCA_BPF_FLAGS, &bpf_flags, sizeof(bpf_flags));
+ if (ret < 0)
+ return libbpf_err(ret);
+ nlattr_end_nested(&req, nla);
+
+ info.opts = opts;
+
+ ret = libbpf_netlink_send_recv(&req, get_tc_info, NULL, &info);
+ if (ret < 0)
+ return libbpf_err(ret);
+ if (!info.processed)
+ return libbpf_err(-ENOENT);
+ return ret;
+}
+
+static int __bpf_tc_detach(const struct bpf_tc_hook *hook,
+ const struct bpf_tc_opts *opts,
+ const bool flush)
+{
+ __u32 protocol = 0, handle, priority, parent, prog_id, flags;
+ int ret, ifindex, attach_point, prog_fd;
+ struct libbpf_nla_req req;
+
+ if (!hook ||
+ !OPTS_VALID(hook, bpf_tc_hook) ||
+ !OPTS_VALID(opts, bpf_tc_opts))
+ return -EINVAL;
+
+ ifindex = OPTS_GET(hook, ifindex, 0);
+ parent = OPTS_GET(hook, parent, 0);
+ attach_point = OPTS_GET(hook, attach_point, 0);
+
+ handle = OPTS_GET(opts, handle, 0);
+ priority = OPTS_GET(opts, priority, 0);
+ prog_fd = OPTS_GET(opts, prog_fd, 0);
+ prog_id = OPTS_GET(opts, prog_id, 0);
+ flags = OPTS_GET(opts, flags, 0);
+
+ if (ifindex <= 0 || flags || prog_fd || prog_id)
+ return -EINVAL;
+ if (priority > UINT16_MAX)
+ return -EINVAL;
+ if (!flush) {
+ if (!handle || !priority)
+ return -EINVAL;
+ protocol = ETH_P_ALL;
+ } else {
+ if (handle || priority)
+ return -EINVAL;
+ }
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_type = RTM_DELTFILTER;
+ req.tc.tcm_family = AF_UNSPEC;
+ req.tc.tcm_ifindex = ifindex;
+ if (!flush) {
+ req.tc.tcm_handle = handle;
+ req.tc.tcm_info = TC_H_MAKE(priority << 16, htons(protocol));
+ }
+
+ ret = tc_get_tcm_parent(attach_point, &parent);
+ if (ret < 0)
+ return ret;
+ req.tc.tcm_parent = parent;
- return bpf_netlink_recv(sock, nl_pid, seq, __dump_link_nlmsg,
- dump_link_nlmsg, cookie);
+ if (!flush) {
+ ret = nlattr_add(&req, TCA_KIND, "bpf", sizeof("bpf"));
+ if (ret < 0)
+ return ret;
+ }
+
+ return libbpf_netlink_send_recv(&req, NULL, NULL, NULL);
+}
+
+int bpf_tc_detach(const struct bpf_tc_hook *hook,
+ const struct bpf_tc_opts *opts)
+{
+ int ret;
+
+ if (!opts)
+ return libbpf_err(-EINVAL);
+
+ ret = __bpf_tc_detach(hook, opts, false);
+ return libbpf_err(ret);
+}
+
+int bpf_tc_query(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts)
+{
+ __u32 protocol, handle, priority, parent, prog_id, flags;
+ int ret, ifindex, attach_point, prog_fd;
+ struct bpf_cb_ctx info = {};
+ struct libbpf_nla_req req;
+
+ if (!hook || !opts ||
+ !OPTS_VALID(hook, bpf_tc_hook) ||
+ !OPTS_VALID(opts, bpf_tc_opts))
+ return libbpf_err(-EINVAL);
+
+ ifindex = OPTS_GET(hook, ifindex, 0);
+ parent = OPTS_GET(hook, parent, 0);
+ attach_point = OPTS_GET(hook, attach_point, 0);
+
+ handle = OPTS_GET(opts, handle, 0);
+ priority = OPTS_GET(opts, priority, 0);
+ prog_fd = OPTS_GET(opts, prog_fd, 0);
+ prog_id = OPTS_GET(opts, prog_id, 0);
+ flags = OPTS_GET(opts, flags, 0);
+
+ if (ifindex <= 0 || flags || prog_fd || prog_id ||
+ !handle || !priority)
+ return libbpf_err(-EINVAL);
+ if (priority > UINT16_MAX)
+ return libbpf_err(-EINVAL);
+
+ protocol = ETH_P_ALL;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
+ req.nh.nlmsg_flags = NLM_F_REQUEST;
+ req.nh.nlmsg_type = RTM_GETTFILTER;
+ req.tc.tcm_family = AF_UNSPEC;
+ req.tc.tcm_ifindex = ifindex;
+ req.tc.tcm_handle = handle;
+ req.tc.tcm_info = TC_H_MAKE(priority << 16, htons(protocol));
+
+ ret = tc_get_tcm_parent(attach_point, &parent);
+ if (ret < 0)
+ return libbpf_err(ret);
+ req.tc.tcm_parent = parent;
+
+ ret = nlattr_add(&req, TCA_KIND, "bpf", sizeof("bpf"));
+ if (ret < 0)
+ return libbpf_err(ret);
+
+ info.opts = opts;
+
+ ret = libbpf_netlink_send_recv(&req, get_tc_info, NULL, &info);
+ if (ret < 0)
+ return libbpf_err(ret);
+ if (!info.processed)
+ return libbpf_err(-ENOENT);
+ return ret;
}
diff --git a/tools/lib/bpf/nlattr.c b/tools/lib/bpf/nlattr.c
index b607fa9852b1..f57e77a6e40f 100644
--- a/tools/lib/bpf/nlattr.c
+++ b/tools/lib/bpf/nlattr.c
@@ -27,7 +27,7 @@ static struct nlattr *nla_next(const struct nlattr *nla, int *remaining)
int totlen = NLA_ALIGN(nla->nla_len);
*remaining -= totlen;
- return (struct nlattr *) ((char *) nla + totlen);
+ return (struct nlattr *)((void *)nla + totlen);
}
static int nla_ok(const struct nlattr *nla, int remaining)
diff --git a/tools/lib/bpf/nlattr.h b/tools/lib/bpf/nlattr.h
index 6cc3ac91690f..4d15ae2ff812 100644
--- a/tools/lib/bpf/nlattr.h
+++ b/tools/lib/bpf/nlattr.h
@@ -10,7 +10,11 @@
#define __LIBBPF_NLATTR_H
#include <stdint.h>
+#include <string.h>
+#include <errno.h>
#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+
/* avoid multiple definition of netlink features */
#define __LINUX_NETLINK_H
@@ -49,6 +53,15 @@ struct libbpf_nla_policy {
uint16_t maxlen;
};
+struct libbpf_nla_req {
+ struct nlmsghdr nh;
+ union {
+ struct ifinfomsg ifinfo;
+ struct tcmsg tc;
+ };
+ char buf[128];
+};
+
/**
* @ingroup attr
* Iterate over a stream of attributes
@@ -68,7 +81,7 @@ struct libbpf_nla_policy {
*/
static inline void *libbpf_nla_data(const struct nlattr *nla)
{
- return (char *) nla + NLA_HDRLEN;
+ return (void *)nla + NLA_HDRLEN;
}
static inline uint8_t libbpf_nla_getattr_u8(const struct nlattr *nla)
@@ -103,4 +116,49 @@ int libbpf_nla_parse_nested(struct nlattr *tb[], int maxtype,
int libbpf_nla_dump_errormsg(struct nlmsghdr *nlh);
+static inline struct nlattr *nla_data(struct nlattr *nla)
+{
+ return (struct nlattr *)((void *)nla + NLA_HDRLEN);
+}
+
+static inline struct nlattr *req_tail(struct libbpf_nla_req *req)
+{
+ return (struct nlattr *)((void *)req + NLMSG_ALIGN(req->nh.nlmsg_len));
+}
+
+static inline int nlattr_add(struct libbpf_nla_req *req, int type,
+ const void *data, int len)
+{
+ struct nlattr *nla;
+
+ if (NLMSG_ALIGN(req->nh.nlmsg_len) + NLA_ALIGN(NLA_HDRLEN + len) > sizeof(*req))
+ return -EMSGSIZE;
+ if (!!data != !!len)
+ return -EINVAL;
+
+ nla = req_tail(req);
+ nla->nla_type = type;
+ nla->nla_len = NLA_HDRLEN + len;
+ if (data)
+ memcpy(nla_data(nla), data, len);
+ req->nh.nlmsg_len = NLMSG_ALIGN(req->nh.nlmsg_len) + NLA_ALIGN(nla->nla_len);
+ return 0;
+}
+
+static inline struct nlattr *nlattr_begin_nested(struct libbpf_nla_req *req, int type)
+{
+ struct nlattr *tail;
+
+ tail = req_tail(req);
+ if (nlattr_add(req, type | NLA_F_NESTED, NULL, 0))
+ return NULL;
+ return tail;
+}
+
+static inline void nlattr_end_nested(struct libbpf_nla_req *req,
+ struct nlattr *tail)
+{
+ tail->nla_len = (void *)req_tail(req) - (void *)tail;
+}
+
#endif /* __LIBBPF_NLATTR_H */
diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c
index 1d80ad4e0de8..8bc117bcc7bc 100644
--- a/tools/lib/bpf/ringbuf.c
+++ b/tools/lib/bpf/ringbuf.c
@@ -69,23 +69,23 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
err = -errno;
pr_warn("ringbuf: failed to get map info for fd=%d: %d\n",
map_fd, err);
- return err;
+ return libbpf_err(err);
}
if (info.type != BPF_MAP_TYPE_RINGBUF) {
pr_warn("ringbuf: map fd=%d is not BPF_MAP_TYPE_RINGBUF\n",
map_fd);
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
tmp = libbpf_reallocarray(rb->rings, rb->ring_cnt + 1, sizeof(*rb->rings));
if (!tmp)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
rb->rings = tmp;
tmp = libbpf_reallocarray(rb->events, rb->ring_cnt + 1, sizeof(*rb->events));
if (!tmp)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
rb->events = tmp;
r = &rb->rings[rb->ring_cnt];
@@ -103,7 +103,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
err = -errno;
pr_warn("ringbuf: failed to mmap consumer page for map fd=%d: %d\n",
map_fd, err);
- return err;
+ return libbpf_err(err);
}
r->consumer_pos = tmp;
@@ -118,7 +118,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
ringbuf_unmap_ring(rb, r);
pr_warn("ringbuf: failed to mmap data pages for map fd=%d: %d\n",
map_fd, err);
- return err;
+ return libbpf_err(err);
}
r->producer_pos = tmp;
r->data = tmp + rb->page_size;
@@ -133,7 +133,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
ringbuf_unmap_ring(rb, r);
pr_warn("ringbuf: failed to epoll add map fd=%d: %d\n",
map_fd, err);
- return err;
+ return libbpf_err(err);
}
rb->ring_cnt++;
@@ -165,11 +165,11 @@ ring_buffer__new(int map_fd, ring_buffer_sample_fn sample_cb, void *ctx,
int err;
if (!OPTS_VALID(opts, ring_buffer_opts))
- return NULL;
+ return errno = EINVAL, NULL;
rb = calloc(1, sizeof(*rb));
if (!rb)
- return NULL;
+ return errno = ENOMEM, NULL;
rb->page_size = getpagesize();
@@ -188,7 +188,7 @@ ring_buffer__new(int map_fd, ring_buffer_sample_fn sample_cb, void *ctx,
err_out:
ring_buffer__free(rb);
- return NULL;
+ return errno = -err, NULL;
}
static inline int roundup_len(__u32 len)
@@ -260,7 +260,7 @@ int ring_buffer__consume(struct ring_buffer *rb)
err = ringbuf_process_ring(ring);
if (err < 0)
- return err;
+ return libbpf_err(err);
res += err;
}
if (res > INT_MAX)
@@ -279,7 +279,7 @@ int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms)
cnt = epoll_wait(rb->epoll_fd, rb->events, rb->ring_cnt, timeout_ms);
if (cnt < 0)
- return -errno;
+ return libbpf_err(-errno);
for (i = 0; i < cnt; i++) {
__u32 ring_id = rb->events[i].data.fd;
@@ -287,7 +287,7 @@ int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms)
err = ringbuf_process_ring(ring);
if (err < 0)
- return err;
+ return libbpf_err(err);
res += err;
}
if (res > INT_MAX)
diff --git a/tools/lib/bpf/skel_internal.h b/tools/lib/bpf/skel_internal.h
new file mode 100644
index 000000000000..b22b50c1b173
--- /dev/null
+++ b/tools/lib/bpf/skel_internal.h
@@ -0,0 +1,123 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+/* Copyright (c) 2021 Facebook */
+#ifndef __SKEL_INTERNAL_H
+#define __SKEL_INTERNAL_H
+
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/mman.h>
+
+/* This file is a base header for auto-generated *.lskel.h files.
+ * Its contents will change and may become part of auto-generation in the future.
+ *
+ * The layout of bpf_[map|prog]_desc and bpf_loader_ctx is feature dependent
+ * and will change from one version of libbpf to another and features
+ * requested during loader program generation.
+ */
+struct bpf_map_desc {
+ union {
+ /* input for the loader prog */
+ struct {
+ __aligned_u64 initial_value;
+ __u32 max_entries;
+ };
+ /* output of the loader prog */
+ struct {
+ int map_fd;
+ };
+ };
+};
+struct bpf_prog_desc {
+ int prog_fd;
+};
+
+struct bpf_loader_ctx {
+ size_t sz;
+ __u32 log_level;
+ __u32 log_size;
+ __u64 log_buf;
+};
+
+struct bpf_load_and_run_opts {
+ struct bpf_loader_ctx *ctx;
+ const void *data;
+ const void *insns;
+ __u32 data_sz;
+ __u32 insns_sz;
+ const char *errstr;
+};
+
+static inline int skel_sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
+ unsigned int size)
+{
+ return syscall(__NR_bpf, cmd, attr, size);
+}
+
+static inline int skel_closenz(int fd)
+{
+ if (fd > 0)
+ return close(fd);
+ return -EINVAL;
+}
+
+static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts)
+{
+ int map_fd = -1, prog_fd = -1, key = 0, err;
+ union bpf_attr attr;
+
+ map_fd = bpf_create_map_name(BPF_MAP_TYPE_ARRAY, "__loader.map", 4,
+ opts->data_sz, 1, 0);
+ if (map_fd < 0) {
+ opts->errstr = "failed to create loader map";
+ err = -errno;
+ goto out;
+ }
+
+ err = bpf_map_update_elem(map_fd, &key, opts->data, 0);
+ if (err < 0) {
+ opts->errstr = "failed to update loader map";
+ err = -errno;
+ goto out;
+ }
+
+ memset(&attr, 0, sizeof(attr));
+ attr.prog_type = BPF_PROG_TYPE_SYSCALL;
+ attr.insns = (long) opts->insns;
+ attr.insn_cnt = opts->insns_sz / sizeof(struct bpf_insn);
+ attr.license = (long) "Dual BSD/GPL";
+ memcpy(attr.prog_name, "__loader.prog", sizeof("__loader.prog"));
+ attr.fd_array = (long) &map_fd;
+ attr.log_level = opts->ctx->log_level;
+ attr.log_size = opts->ctx->log_size;
+ attr.log_buf = opts->ctx->log_buf;
+ attr.prog_flags = BPF_F_SLEEPABLE;
+ prog_fd = skel_sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
+ if (prog_fd < 0) {
+ opts->errstr = "failed to load loader prog";
+ err = -errno;
+ goto out;
+ }
+
+ memset(&attr, 0, sizeof(attr));
+ attr.test.prog_fd = prog_fd;
+ attr.test.ctx_in = (long) opts->ctx;
+ attr.test.ctx_size_in = opts->ctx->sz;
+ err = skel_sys_bpf(BPF_PROG_RUN, &attr, sizeof(attr));
+ if (err < 0 || (int)attr.test.retval < 0) {
+ opts->errstr = "failed to execute loader prog";
+ if (err < 0)
+ err = -errno;
+ else
+ err = (int)attr.test.retval;
+ goto out;
+ }
+ err = 0;
+out:
+ if (map_fd >= 0)
+ close(map_fd);
+ if (prog_fd >= 0)
+ close(prog_fd);
+ return err;
+}
+
+#endif
diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index 6061431ee04c..e9b619aa0cdf 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -1094,7 +1094,7 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
goto out_put_ctx;
}
if (xsk->fd == umem->fd)
- umem->rx_ring_setup_done = true;
+ umem->tx_ring_setup_done = true;
}
err = xsk_get_mmap_offsets(xsk->fd, &off);
diff --git a/tools/lib/traceevent/plugins/plugin_kvm.c b/tools/lib/traceevent/plugins/plugin_kvm.c
index 51ceeb9147eb..9ce7b4b68e3f 100644
--- a/tools/lib/traceevent/plugins/plugin_kvm.c
+++ b/tools/lib/traceevent/plugins/plugin_kvm.c
@@ -366,7 +366,7 @@ union kvm_mmu_page_role {
unsigned direct:1;
unsigned access:3;
unsigned invalid:1;
- unsigned nxe:1;
+ unsigned efer_nx:1;
unsigned cr0_wp:1;
unsigned smep_and_not_wp:1;
unsigned smap_and_not_wp:1;
@@ -403,7 +403,7 @@ static int kvm_mmu_print_role(struct trace_seq *s, struct tep_record *record,
access_str[role.access],
role.invalid ? " invalid" : "",
role.cr4_pae ? "" : "!",
- role.nxe ? "" : "!",
+ role.efer_nx ? "" : "!",
role.cr0_wp ? "" : "!",
role.smep_and_not_wp ? " smep" : "",
role.smap_and_not_wp ? " smap" : "",
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 24295d39713b..bc821056aba9 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -684,7 +684,7 @@ static int elf_add_alternative(struct elf *elf,
sec = find_section_by_name(elf, ".altinstructions");
if (!sec) {
sec = elf_create_section(elf, ".altinstructions",
- SHF_WRITE, size, 0);
+ SHF_ALLOC, size, 0);
if (!sec) {
WARN_ELF("elf_create_section");
@@ -747,6 +747,10 @@ int arch_rewrite_retpolines(struct objtool_file *file)
list_for_each_entry(insn, &file->retpoline_call_list, call_node) {
+ if (insn->type != INSN_JUMP_DYNAMIC &&
+ insn->type != INSN_CALL_DYNAMIC)
+ continue;
+
if (!strcmp(insn->sec->name, ".text.__x86.indirect_thunk"))
continue;
diff --git a/tools/objtool/arch/x86/include/arch/special.h b/tools/objtool/arch/x86/include/arch/special.h
index 14271cca0c74..f2918f789a0a 100644
--- a/tools/objtool/arch/x86/include/arch/special.h
+++ b/tools/objtool/arch/x86/include/arch/special.h
@@ -9,6 +9,7 @@
#define JUMP_ENTRY_SIZE 16
#define JUMP_ORIG_OFFSET 0
#define JUMP_NEW_OFFSET 4
+#define JUMP_KEY_OFFSET 8
#define ALT_ENTRY_SIZE 12
#define ALT_ORIG_OFFSET 0
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 9ed1a4cd00dc..e5947fbb9e7a 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1225,15 +1225,41 @@ static int handle_jump_alt(struct objtool_file *file,
struct instruction *orig_insn,
struct instruction **new_insn)
{
- if (orig_insn->type == INSN_NOP)
- return 0;
+ if (orig_insn->type != INSN_JUMP_UNCONDITIONAL &&
+ orig_insn->type != INSN_NOP) {
- if (orig_insn->type != INSN_JUMP_UNCONDITIONAL) {
WARN_FUNC("unsupported instruction at jump label",
orig_insn->sec, orig_insn->offset);
return -1;
}
+ if (special_alt->key_addend & 2) {
+ struct reloc *reloc = insn_reloc(file, orig_insn);
+
+ if (reloc) {
+ reloc->type = R_NONE;
+ elf_write_reloc(file->elf, reloc);
+ }
+ elf_write_insn(file->elf, orig_insn->sec,
+ orig_insn->offset, orig_insn->len,
+ arch_nop_insn(orig_insn->len));
+ orig_insn->type = INSN_NOP;
+ }
+
+ if (orig_insn->type == INSN_NOP) {
+ if (orig_insn->len == 2)
+ file->jl_nop_short++;
+ else
+ file->jl_nop_long++;
+
+ return 0;
+ }
+
+ if (orig_insn->len == 2)
+ file->jl_short++;
+ else
+ file->jl_long++;
+
*new_insn = list_next_entry(orig_insn, list);
return 0;
}
@@ -1314,6 +1340,12 @@ static int add_special_section_alts(struct objtool_file *file)
free(special_alt);
}
+ if (stats) {
+ printf("jl\\\tNOP\tJMP\n");
+ printf("short:\t%ld\t%ld\n", file->jl_nop_short, file->jl_short);
+ printf("long:\t%ld\t%ld\n", file->jl_nop_long, file->jl_long);
+ }
+
out:
return ret;
}
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 743c2e9d0f56..8676c7598728 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -9,6 +9,7 @@
#include <sys/types.h>
#include <sys/stat.h>
+#include <sys/mman.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
@@ -27,21 +28,27 @@ static inline u32 str_hash(const char *str)
return jhash(str, strlen(str), 0);
}
-static inline int elf_hash_bits(void)
-{
- return vmlinux ? ELF_HASH_BITS : 16;
-}
+#define __elf_table(name) (elf->name##_hash)
+#define __elf_bits(name) (elf->name##_bits)
-#define elf_hash_add(hashtable, node, key) \
- hlist_add_head(node, &hashtable[hash_min(key, elf_hash_bits())])
+#define elf_hash_add(name, node, key) \
+ hlist_add_head(node, &__elf_table(name)[hash_min(key, __elf_bits(name))])
-static void elf_hash_init(struct hlist_head *table)
-{
- __hash_init(table, 1U << elf_hash_bits());
-}
+#define elf_hash_for_each_possible(name, obj, member, key) \
+ hlist_for_each_entry(obj, &__elf_table(name)[hash_min(key, __elf_bits(name))], member)
-#define elf_hash_for_each_possible(name, obj, member, key) \
- hlist_for_each_entry(obj, &name[hash_min(key, elf_hash_bits())], member)
+#define elf_alloc_hash(name, size) \
+({ \
+ __elf_bits(name) = max(10, ilog2(size)); \
+ __elf_table(name) = mmap(NULL, sizeof(struct hlist_head) << __elf_bits(name), \
+ PROT_READ|PROT_WRITE, \
+ MAP_PRIVATE|MAP_ANON, -1, 0); \
+ if (__elf_table(name) == (void *)-1L) { \
+ WARN("mmap fail " #name); \
+ __elf_table(name) = NULL; \
+ } \
+ __elf_table(name); \
+})
static bool symbol_to_offset(struct rb_node *a, const struct rb_node *b)
{
@@ -80,9 +87,10 @@ struct section *find_section_by_name(const struct elf *elf, const char *name)
{
struct section *sec;
- elf_hash_for_each_possible(elf->section_name_hash, sec, name_hash, str_hash(name))
+ elf_hash_for_each_possible(section_name, sec, name_hash, str_hash(name)) {
if (!strcmp(sec->name, name))
return sec;
+ }
return NULL;
}
@@ -92,9 +100,10 @@ static struct section *find_section_by_index(struct elf *elf,
{
struct section *sec;
- elf_hash_for_each_possible(elf->section_hash, sec, hash, idx)
+ elf_hash_for_each_possible(section, sec, hash, idx) {
if (sec->idx == idx)
return sec;
+ }
return NULL;
}
@@ -103,9 +112,10 @@ static struct symbol *find_symbol_by_index(struct elf *elf, unsigned int idx)
{
struct symbol *sym;
- elf_hash_for_each_possible(elf->symbol_hash, sym, hash, idx)
+ elf_hash_for_each_possible(symbol, sym, hash, idx) {
if (sym->idx == idx)
return sym;
+ }
return NULL;
}
@@ -170,9 +180,10 @@ struct symbol *find_symbol_by_name(const struct elf *elf, const char *name)
{
struct symbol *sym;
- elf_hash_for_each_possible(elf->symbol_name_hash, sym, name_hash, str_hash(name))
+ elf_hash_for_each_possible(symbol_name, sym, name_hash, str_hash(name)) {
if (!strcmp(sym->name, name))
return sym;
+ }
return NULL;
}
@@ -189,8 +200,8 @@ struct reloc *find_reloc_by_dest_range(const struct elf *elf, struct section *se
sec = sec->reloc;
for_offset_range(o, offset, offset + len) {
- elf_hash_for_each_possible(elf->reloc_hash, reloc, hash,
- sec_offset_hash(sec, o)) {
+ elf_hash_for_each_possible(reloc, reloc, hash,
+ sec_offset_hash(sec, o)) {
if (reloc->sec != sec)
continue;
@@ -228,6 +239,10 @@ static int read_sections(struct elf *elf)
return -1;
}
+ if (!elf_alloc_hash(section, sections_nr) ||
+ !elf_alloc_hash(section_name, sections_nr))
+ return -1;
+
for (i = 0; i < sections_nr; i++) {
sec = malloc(sizeof(*sec));
if (!sec) {
@@ -273,13 +288,18 @@ static int read_sections(struct elf *elf)
}
sec->len = sec->sh.sh_size;
+ if (sec->sh.sh_flags & SHF_EXECINSTR)
+ elf->text_size += sec->len;
+
list_add_tail(&sec->list, &elf->sections);
- elf_hash_add(elf->section_hash, &sec->hash, sec->idx);
- elf_hash_add(elf->section_name_hash, &sec->name_hash, str_hash(sec->name));
+ elf_hash_add(section, &sec->hash, sec->idx);
+ elf_hash_add(section_name, &sec->name_hash, str_hash(sec->name));
}
- if (stats)
+ if (stats) {
printf("nr_sections: %lu\n", (unsigned long)sections_nr);
+ printf("section_bits: %d\n", elf->section_bits);
+ }
/* sanity check, one more call to elf_nextscn() should return NULL */
if (elf_nextscn(elf->elf, s)) {
@@ -308,8 +328,8 @@ static void elf_add_symbol(struct elf *elf, struct symbol *sym)
else
entry = &sym->sec->symbol_list;
list_add(&sym->list, entry);
- elf_hash_add(elf->symbol_hash, &sym->hash, sym->idx);
- elf_hash_add(elf->symbol_name_hash, &sym->name_hash, str_hash(sym->name));
+ elf_hash_add(symbol, &sym->hash, sym->idx);
+ elf_hash_add(symbol_name, &sym->name_hash, str_hash(sym->name));
/*
* Don't store empty STT_NOTYPE symbols in the rbtree. They
@@ -329,19 +349,25 @@ static int read_symbols(struct elf *elf)
Elf32_Word shndx;
symtab = find_section_by_name(elf, ".symtab");
- if (!symtab) {
+ if (symtab) {
+ symtab_shndx = find_section_by_name(elf, ".symtab_shndx");
+ if (symtab_shndx)
+ shndx_data = symtab_shndx->data;
+
+ symbols_nr = symtab->sh.sh_size / symtab->sh.sh_entsize;
+ } else {
/*
* A missing symbol table is actually possible if it's an empty
- * .o file. This can happen for thunk_64.o.
+ * .o file. This can happen for thunk_64.o. Make sure to at
+ * least allocate the symbol hash tables so we can do symbol
+ * lookups without crashing.
*/
- return 0;
+ symbols_nr = 0;
}
- symtab_shndx = find_section_by_name(elf, ".symtab_shndx");
- if (symtab_shndx)
- shndx_data = symtab_shndx->data;
-
- symbols_nr = symtab->sh.sh_size / symtab->sh.sh_entsize;
+ if (!elf_alloc_hash(symbol, symbols_nr) ||
+ !elf_alloc_hash(symbol_name, symbols_nr))
+ return -1;
for (i = 0; i < symbols_nr; i++) {
sym = malloc(sizeof(*sym));
@@ -389,8 +415,10 @@ static int read_symbols(struct elf *elf)
elf_add_symbol(elf, sym);
}
- if (stats)
+ if (stats) {
printf("nr_symbols: %lu\n", (unsigned long)symbols_nr);
+ printf("symbol_bits: %d\n", elf->symbol_bits);
+ }
/* Create parent/child links for any cold subfunctions */
list_for_each_entry(sec, &elf->sections, list) {
@@ -479,7 +507,7 @@ int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
reloc->addend = addend;
list_add_tail(&reloc->list, &sec->reloc->reloc_list);
- elf_hash_add(elf->reloc_hash, &reloc->hash, reloc_hash(reloc));
+ elf_hash_add(reloc, &reloc->hash, reloc_hash(reloc));
sec->reloc->changed = true;
@@ -556,6 +584,9 @@ static int read_relocs(struct elf *elf)
unsigned int symndx;
unsigned long nr_reloc, max_reloc = 0, tot_reloc = 0;
+ if (!elf_alloc_hash(reloc, elf->text_size / 16))
+ return -1;
+
list_for_each_entry(sec, &elf->sections, list) {
if ((sec->sh.sh_type != SHT_RELA) &&
(sec->sh.sh_type != SHT_REL))
@@ -600,7 +631,7 @@ static int read_relocs(struct elf *elf)
}
list_add_tail(&reloc->list, &sec->reloc_list);
- elf_hash_add(elf->reloc_hash, &reloc->hash, reloc_hash(reloc));
+ elf_hash_add(reloc, &reloc->hash, reloc_hash(reloc));
nr_reloc++;
}
@@ -611,6 +642,7 @@ static int read_relocs(struct elf *elf)
if (stats) {
printf("max_reloc: %lu\n", max_reloc);
printf("tot_reloc: %lu\n", tot_reloc);
+ printf("reloc_bits: %d\n", elf->reloc_bits);
}
return 0;
@@ -632,12 +664,6 @@ struct elf *elf_open_read(const char *name, int flags)
INIT_LIST_HEAD(&elf->sections);
- elf_hash_init(elf->symbol_hash);
- elf_hash_init(elf->symbol_name_hash);
- elf_hash_init(elf->section_hash);
- elf_hash_init(elf->section_name_hash);
- elf_hash_init(elf->reloc_hash);
-
elf->fd = open(name, flags);
if (elf->fd == -1) {
fprintf(stderr, "objtool: Can't open '%s': %s\n",
@@ -717,7 +743,7 @@ static int elf_add_string(struct elf *elf, struct section *strtab, char *str)
struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name)
{
- struct section *symtab;
+ struct section *symtab, *symtab_shndx;
struct symbol *sym;
Elf_Data *data;
Elf_Scn *s;
@@ -769,6 +795,29 @@ struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name)
symtab->len += data->d_size;
symtab->changed = true;
+ symtab_shndx = find_section_by_name(elf, ".symtab_shndx");
+ if (symtab_shndx) {
+ s = elf_getscn(elf->elf, symtab_shndx->idx);
+ if (!s) {
+ WARN_ELF("elf_getscn");
+ return NULL;
+ }
+
+ data = elf_newdata(s);
+ if (!data) {
+ WARN_ELF("elf_newdata");
+ return NULL;
+ }
+
+ data->d_buf = &sym->sym.st_size; /* conveniently 0 */
+ data->d_size = sizeof(Elf32_Word);
+ data->d_align = 4;
+ data->d_type = ELF_T_WORD;
+
+ symtab_shndx->len += 4;
+ symtab_shndx->changed = true;
+ }
+
sym->sec = find_section_by_index(elf, 0);
elf_add_symbol(elf, sym);
@@ -851,8 +900,8 @@ struct section *elf_create_section(struct elf *elf, const char *name,
return NULL;
list_add_tail(&sec->list, &elf->sections);
- elf_hash_add(elf->section_hash, &sec->hash, sec->idx);
- elf_hash_add(elf->section_name_hash, &sec->name_hash, str_hash(sec->name));
+ elf_hash_add(section, &sec->hash, sec->idx);
+ elf_hash_add(section_name, &sec->name_hash, str_hash(sec->name));
elf->changed = true;
diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h
index 45e5ede363b0..e34395047530 100644
--- a/tools/objtool/include/objtool/elf.h
+++ b/tools/objtool/include/objtool/elf.h
@@ -83,12 +83,20 @@ struct elf {
int fd;
bool changed;
char *name;
+ unsigned int text_size;
struct list_head sections;
- DECLARE_HASHTABLE(symbol_hash, ELF_HASH_BITS);
- DECLARE_HASHTABLE(symbol_name_hash, ELF_HASH_BITS);
- DECLARE_HASHTABLE(section_hash, ELF_HASH_BITS);
- DECLARE_HASHTABLE(section_name_hash, ELF_HASH_BITS);
- DECLARE_HASHTABLE(reloc_hash, ELF_HASH_BITS);
+
+ int symbol_bits;
+ int symbol_name_bits;
+ int section_bits;
+ int section_name_bits;
+ int reloc_bits;
+
+ struct hlist_head *symbol_hash;
+ struct hlist_head *symbol_name_hash;
+ struct hlist_head *section_hash;
+ struct hlist_head *section_name_hash;
+ struct hlist_head *reloc_hash;
};
#define OFFSET_STRIDE_BITS 4
diff --git a/tools/objtool/include/objtool/objtool.h b/tools/objtool/include/objtool/objtool.h
index e4084afb2304..24fa83634de4 100644
--- a/tools/objtool/include/objtool/objtool.h
+++ b/tools/objtool/include/objtool/objtool.h
@@ -22,6 +22,9 @@ struct objtool_file {
struct list_head static_call_list;
struct list_head mcount_loc_list;
bool ignore_unreachables, c_file, hints, rodata;
+
+ unsigned long jl_short, jl_long;
+ unsigned long jl_nop_short, jl_nop_long;
};
struct objtool_file *objtool_open_read(const char *_objname);
diff --git a/tools/objtool/include/objtool/special.h b/tools/objtool/include/objtool/special.h
index 8a09f4e9d480..dc4721e19002 100644
--- a/tools/objtool/include/objtool/special.h
+++ b/tools/objtool/include/objtool/special.h
@@ -27,6 +27,7 @@ struct special_alt {
unsigned long new_off;
unsigned int orig_len, new_len; /* group only */
+ u8 key_addend;
};
int special_get_alts(struct elf *elf, struct list_head *alts);
diff --git a/tools/objtool/special.c b/tools/objtool/special.c
index 07b21cfabf5c..bc925cf19e2d 100644
--- a/tools/objtool/special.c
+++ b/tools/objtool/special.c
@@ -23,6 +23,7 @@ struct special_entry {
unsigned char size, orig, new;
unsigned char orig_len, new_len; /* group only */
unsigned char feature; /* ALTERNATIVE macro CPU feature */
+ unsigned char key; /* jump_label key */
};
struct special_entry entries[] = {
@@ -42,6 +43,7 @@ struct special_entry entries[] = {
.size = JUMP_ENTRY_SIZE,
.orig = JUMP_ORIG_OFFSET,
.new = JUMP_NEW_OFFSET,
+ .key = JUMP_KEY_OFFSET,
},
{
.sec = "__ex_table",
@@ -122,6 +124,18 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry,
alt->new_off -= 0x7ffffff0;
}
+ if (entry->key) {
+ struct reloc *key_reloc;
+
+ key_reloc = find_reloc_by_dest(elf, sec, offset + entry->key);
+ if (!key_reloc) {
+ WARN_FUNC("can't find key reloc",
+ sec, offset + entry->key);
+ return -1;
+ }
+ alt->key_addend = key_reloc->addend;
+ }
+
return 0;
}
diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt
index 0f1005209a2b..2d586fe5e4c5 100644
--- a/tools/perf/Documentation/itrace.txt
+++ b/tools/perf/Documentation/itrace.txt
@@ -20,6 +20,7 @@
L synthesize last branch entries on existing event records
s skip initial number of events
q quicker (less detailed) decoding
+ Z prefer to ignore timestamps (so-called "timeless" decoding)
The default is all events i.e. the same as --itrace=ibxwpe,
except for perf script where it is --itrace=ce
diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt
index 80c1be5d566c..33c2521cba4a 100644
--- a/tools/perf/Documentation/perf-annotate.txt
+++ b/tools/perf/Documentation/perf-annotate.txt
@@ -58,6 +58,13 @@ OPTIONS
--ignore-vmlinux::
Ignore vmlinux files.
+--itrace::
+ Options for decoding instruction tracing data. The options are:
+
+include::itrace.txt[]
+
+ To disable decoding entirely, use --no-itrace.
+
-m::
--modules::
Load module symbols. WARNING: use only with -k and LIVE kernel.
diff --git a/tools/perf/Documentation/perf-dlfilter.txt b/tools/perf/Documentation/perf-dlfilter.txt
new file mode 100644
index 000000000000..02842cb4cf90
--- /dev/null
+++ b/tools/perf/Documentation/perf-dlfilter.txt
@@ -0,0 +1,251 @@
+perf-dlfilter(1)
+================
+
+NAME
+----
+perf-dlfilter - Filter sample events using a dynamically loaded shared
+object file
+
+SYNOPSIS
+--------
+[verse]
+'perf script' [--dlfilter file.so ] [ --dlarg arg ]...
+
+DESCRIPTION
+-----------
+
+This option is used to process data through a custom filter provided by a
+dynamically loaded shared object file. Arguments can be passed using --dlarg
+and retrieved using perf_dlfilter_fns.args().
+
+If 'file.so' does not contain "/", then it will be found either in the current
+directory, or perf tools exec path which is ~/libexec/perf-core/dlfilters for
+a local build and install (refer perf --exec-path), or the dynamic linker
+paths.
+
+API
+---
+
+The API for filtering consists of the following:
+
+[source,c]
+----
+#include <perf/perf_dlfilter.h>
+
+const struct perf_dlfilter_fns perf_dlfilter_fns;
+
+int start(void **data, void *ctx);
+int stop(void *data, void *ctx);
+int filter_event(void *data, const struct perf_dlfilter_sample *sample, void *ctx);
+int filter_event_early(void *data, const struct perf_dlfilter_sample *sample, void *ctx);
+const char *filter_description(const char **long_description);
+----
+
+If implemented, 'start' will be called at the beginning, before any
+calls to 'filter_event' or 'filter_event_early'. Return 0 to indicate success,
+or return a negative error code. '*data' can be assigned for use by other
+functions. 'ctx' is needed for calls to perf_dlfilter_fns, but most
+perf_dlfilter_fns are not valid when called from 'start'.
+
+If implemented, 'stop' will be called at the end, after any calls to
+'filter_event' or 'filter_event_early'. Return 0 to indicate success, or
+return a negative error code. 'data' is set by 'start'. 'ctx' is needed
+for calls to perf_dlfilter_fns, but most perf_dlfilter_fns are not valid
+when called from 'stop'.
+
+If implemented, 'filter_event' will be called for each sample event.
+Return 0 to keep the sample event, 1 to filter it out, or return a negative
+error code. 'data' is set by 'start'. 'ctx' is needed for calls to
+'perf_dlfilter_fns'.
+
+'filter_event_early' is the same as 'filter_event' except it is called before
+internal filtering.
+
+If implemented, 'filter_description' should return a one-line description
+of the filter, and optionally a longer description.
+
+The perf_dlfilter_sample structure
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+'filter_event' and 'filter_event_early' are passed a perf_dlfilter_sample
+structure, which contains the following fields:
+[source,c]
+----
+/*
+ * perf sample event information (as per perf script and <linux/perf_event.h>)
+ */
+struct perf_dlfilter_sample {
+ __u32 size; /* Size of this structure (for compatibility checking) */
+ __u16 ins_lat; /* Refer PERF_SAMPLE_WEIGHT_TYPE in <linux/perf_event.h> */
+ __u16 p_stage_cyc; /* Refer PERF_SAMPLE_WEIGHT_TYPE in <linux/perf_event.h> */
+ __u64 ip;
+ __s32 pid;
+ __s32 tid;
+ __u64 time;
+ __u64 addr;
+ __u64 id;
+ __u64 stream_id;
+ __u64 period;
+ __u64 weight; /* Refer PERF_SAMPLE_WEIGHT_TYPE in <linux/perf_event.h> */
+ __u64 transaction; /* Refer PERF_SAMPLE_TRANSACTION in <linux/perf_event.h> */
+ __u64 insn_cnt; /* For instructions-per-cycle (IPC) */
+ __u64 cyc_cnt; /* For instructions-per-cycle (IPC) */
+ __s32 cpu;
+ __u32 flags; /* Refer PERF_DLFILTER_FLAG_* above */
+ __u64 data_src; /* Refer PERF_SAMPLE_DATA_SRC in <linux/perf_event.h> */
+ __u64 phys_addr; /* Refer PERF_SAMPLE_PHYS_ADDR in <linux/perf_event.h> */
+ __u64 data_page_size; /* Refer PERF_SAMPLE_DATA_PAGE_SIZE in <linux/perf_event.h> */
+ __u64 code_page_size; /* Refer PERF_SAMPLE_CODE_PAGE_SIZE in <linux/perf_event.h> */
+ __u64 cgroup; /* Refer PERF_SAMPLE_CGROUP in <linux/perf_event.h> */
+ __u8 cpumode; /* Refer CPUMODE_MASK etc in <linux/perf_event.h> */
+ __u8 addr_correlates_sym; /* True => resolve_addr() can be called */
+ __u16 misc; /* Refer perf_event_header in <linux/perf_event.h> */
+ __u32 raw_size; /* Refer PERF_SAMPLE_RAW in <linux/perf_event.h> */
+ const void *raw_data; /* Refer PERF_SAMPLE_RAW in <linux/perf_event.h> */
+ __u64 brstack_nr; /* Number of brstack entries */
+ const struct perf_branch_entry *brstack; /* Refer <linux/perf_event.h> */
+ __u64 raw_callchain_nr; /* Number of raw_callchain entries */
+ const __u64 *raw_callchain; /* Refer <linux/perf_event.h> */
+ const char *event;
+};
+----
+
+The perf_dlfilter_fns structure
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The 'perf_dlfilter_fns' structure is populated with function pointers when the
+file is loaded. The functions can be called by 'filter_event' or
+'filter_event_early'.
+
+[source,c]
+----
+struct perf_dlfilter_fns {
+ const struct perf_dlfilter_al *(*resolve_ip)(void *ctx);
+ const struct perf_dlfilter_al *(*resolve_addr)(void *ctx);
+ char **(*args)(void *ctx, int *dlargc);
+ __s32 (*resolve_address)(void *ctx, __u64 address, struct perf_dlfilter_al *al);
+ const __u8 *(*insn)(void *ctx, __u32 *length);
+ const char *(*srcline)(void *ctx, __u32 *line_number);
+ struct perf_event_attr *(*attr)(void *ctx);
+ __s32 (*object_code)(void *ctx, __u64 ip, void *buf, __u32 len);
+ void *(*reserved[120])(void *);
+};
+----
+
+'resolve_ip' returns information about ip.
+
+'resolve_addr' returns information about addr (if addr_correlates_sym).
+
+'args' returns arguments from --dlarg options.
+
+'resolve_address' provides information about 'address'. al->size must be set
+before calling. Returns 0 on success, -1 otherwise.
+
+'insn' returns instruction bytes and length.
+
+'srcline' return source file name and line number.
+
+'attr' returns perf_event_attr, refer <linux/perf_event.h>.
+
+'object_code' reads object code and returns the number of bytes read.
+
+The perf_dlfilter_al structure
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The 'perf_dlfilter_al' structure contains information about an address.
+
+[source,c]
+----
+/*
+ * Address location (as per perf script)
+ */
+struct perf_dlfilter_al {
+ __u32 size; /* Size of this structure (for compatibility checking) */
+ __u32 symoff;
+ const char *sym;
+ __u64 addr; /* Mapped address (from dso) */
+ __u64 sym_start;
+ __u64 sym_end;
+ const char *dso;
+ __u8 sym_binding; /* STB_LOCAL, STB_GLOBAL or STB_WEAK, refer <elf.h> */
+ __u8 is_64_bit; /* Only valid if dso is not NULL */
+ __u8 is_kernel_ip; /* True if in kernel space */
+ __u32 buildid_size;
+ __u8 *buildid;
+ /* Below members are only populated by resolve_ip() */
+ __u8 filtered; /* true if this sample event will be filtered out */
+ const char *comm;
+};
+----
+
+perf_dlfilter_sample flags
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The 'flags' member of 'perf_dlfilter_sample' corresponds with the flags field
+of perf script. The bits of the flags are as follows:
+
+[source,c]
+----
+/* Definitions for perf_dlfilter_sample flags */
+enum {
+ PERF_DLFILTER_FLAG_BRANCH = 1ULL << 0,
+ PERF_DLFILTER_FLAG_CALL = 1ULL << 1,
+ PERF_DLFILTER_FLAG_RETURN = 1ULL << 2,
+ PERF_DLFILTER_FLAG_CONDITIONAL = 1ULL << 3,
+ PERF_DLFILTER_FLAG_SYSCALLRET = 1ULL << 4,
+ PERF_DLFILTER_FLAG_ASYNC = 1ULL << 5,
+ PERF_DLFILTER_FLAG_INTERRUPT = 1ULL << 6,
+ PERF_DLFILTER_FLAG_TX_ABORT = 1ULL << 7,
+ PERF_DLFILTER_FLAG_TRACE_BEGIN = 1ULL << 8,
+ PERF_DLFILTER_FLAG_TRACE_END = 1ULL << 9,
+ PERF_DLFILTER_FLAG_IN_TX = 1ULL << 10,
+ PERF_DLFILTER_FLAG_VMENTRY = 1ULL << 11,
+ PERF_DLFILTER_FLAG_VMEXIT = 1ULL << 12,
+};
+----
+
+EXAMPLE
+-------
+
+Filter out everything except branches from "foo" to "bar":
+
+[source,c]
+----
+#include <perf/perf_dlfilter.h>
+#include <string.h>
+
+const struct perf_dlfilter_fns perf_dlfilter_fns;
+
+int filter_event(void *data, const struct perf_dlfilter_sample *sample, void *ctx)
+{
+ const struct perf_dlfilter_al *al;
+ const struct perf_dlfilter_al *addr_al;
+
+ if (!sample->ip || !sample->addr_correlates_sym)
+ return 1;
+
+ al = perf_dlfilter_fns.resolve_ip(ctx);
+ if (!al || !al->sym || strcmp(al->sym, "foo"))
+ return 1;
+
+ addr_al = perf_dlfilter_fns.resolve_addr(ctx);
+ if (!addr_al || !addr_al->sym || strcmp(addr_al->sym, "bar"))
+ return 1;
+
+ return 0;
+}
+----
+
+To build the shared object, assuming perf has been installed for the local user
+i.e. perf_dlfilter.h is in ~/include/perf :
+
+ gcc -c -I ~/include -fpic dlfilter-example.c
+ gcc -shared -o dlfilter-example.so dlfilter-example.o
+
+To use the filter with perf script:
+
+ perf script --dlfilter dlfilter-example.so
+
+SEE ALSO
+--------
+linkperf:perf-script[1]
diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt
index a8eccff21281..91108fe3ad5f 100644
--- a/tools/perf/Documentation/perf-inject.txt
+++ b/tools/perf/Documentation/perf-inject.txt
@@ -68,6 +68,16 @@ include::itrace.txt[]
--force::
Don't complain, do it.
+--vm-time-correlation[=OPTIONS]::
+ Some architectures may capture AUX area data which contains timestamps
+ affected by virtualization. This option will update those timestamps
+ in place, to correlate with host timestamps. The in-place update means
+ that an output file is not specified, and instead the input file is
+ modified. The options are architecture specific, except that they may
+ start with "dry-run" which will cause the file to be processed but
+ without updating it. Currently this option is supported only by
+ Intel PT, refer linkperf:perf-intel-pt[1]
+
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1],
diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt
index 1dcec73c910c..184ba62420f0 100644
--- a/tools/perf/Documentation/perf-intel-pt.txt
+++ b/tools/perf/Documentation/perf-intel-pt.txt
@@ -108,9 +108,9 @@ displayed as follows:
perf script --itrace=ibxwpe -F+flags
-The flags are "bcrosyiABEx" which stand for branch, call, return, conditional,
-system, asynchronous, interrupt, transaction abort, trace begin, trace end, and
-in transaction, respectively.
+The flags are "bcrosyiABExgh" which stand for branch, call, return, conditional,
+system, asynchronous, interrupt, transaction abort, trace begin, trace end,
+in transaction, VM-entry, and VM-exit respectively.
perf script also supports higher level ways to dump instruction traces:
@@ -174,7 +174,11 @@ Refer to script export-to-sqlite.py or export-to-postgresql.py for more details,
and to script exported-sql-viewer.py for an example of using the database.
There is also script intel-pt-events.py which provides an example of how to
-unpack the raw data for power events and PTWRITE.
+unpack the raw data for power events and PTWRITE. The script also displays
+branches, and supports 2 additional modes selected by option:
+
+ --insn-trace - instruction trace
+ --src-trace - source trace
As mentioned above, it is easy to capture too much data. One way to limit the
data captured is to use 'snapshot' mode which is explained further below.
@@ -869,6 +873,7 @@ The letters are:
L synthesize last branch entries on existing event records
s skip initial number of events
q quicker (less detailed) decoding
+ Z prefer to ignore timestamps (so-called "timeless" decoding)
"Instructions" events look like they were recorded by "perf record -e
instructions".
@@ -1062,6 +1067,10 @@ What *will* be decoded with the qq option:
- instruction pointer associated with PSB packets
+The Z option is equivalent to having recorded a trace without TSC
+(i.e. config term tsc=0). It can be useful to avoid timestamp issues when
+decoding a trace of a virtual machine.
+
dump option
~~~~~~~~~~~
@@ -1150,8 +1159,9 @@ include::build-xed.txt[]
Tracing Virtual Machines
------------------------
-Currently, only kernel tracing is supported and only with "timeless" decoding
-i.e. no TSC timestamps
+Currently, only kernel tracing is supported and only with either "timeless" decoding
+(i.e. no TSC timestamps) or VM Time Correlation. VM Time Correlation is an extra step
+using 'perf inject' and requires unchanging VMX TSC Offset and no VMX TSC Scaling.
Other limitations and caveats
@@ -1162,7 +1172,7 @@ Other limitations and caveats
Guest VCPU is unknown but may be able to be inferred from the host thread
Callchains are not supported
-Example
+Example using "timeless" decoding
Start VM
@@ -1226,6 +1236,107 @@ perf script can be used to provide an instruction trace
:1440 1440 ffffffffbb74603c clockevents_program_event+0x4c ([guest.kernel.kallsyms]) popq %rbx
:1440 1440 ffffffffbb74603d clockevents_program_event+0x4d ([guest.kernel.kallsyms]) popq %r12
+Example using VM Time Correlation
+
+Start VM
+
+ $ sudo virsh start kubuntu20.04
+ Domain kubuntu20.04 started
+
+Mount the guest file system. Note sshfs needs -o direct_io to enable reading of proc files. root access is needed to read /proc/kcore.
+
+ $ mkdir -p vm0
+ $ sshfs -o direct_io root@vm0:/ vm0
+
+Copy the guest /proc/kallsyms, /proc/modules and /proc/kcore
+
+ $ perf buildid-cache -v --kcore vm0/proc/kcore
+ same kcore found in /home/user/.debug/[kernel.kcore]/cc9c55a98c5e4ec0aeda69302554aabed5cd6491/2021021312450777
+ $ KALLSYMS=/home/user/.debug/\[kernel.kcore\]/cc9c55a98c5e4ec0aeda69302554aabed5cd6491/2021021312450777/kallsyms
+
+Find the VM process
+
+ $ ps -eLl | grep 'KVM\|PID'
+ F S UID PID PPID LWP C PRI NI ADDR SZ WCHAN TTY TIME CMD
+ 3 S 64055 16998 1 17005 13 80 0 - 1818189 - ? 00:00:16 CPU 0/KVM
+ 3 S 64055 16998 1 17006 4 80 0 - 1818189 - ? 00:00:05 CPU 1/KVM
+ 3 S 64055 16998 1 17007 3 80 0 - 1818189 - ? 00:00:04 CPU 2/KVM
+ 3 S 64055 16998 1 17008 4 80 0 - 1818189 - ? 00:00:05 CPU 3/KVM
+
+Start an open-ended perf record, tracing the VM process, do something on the VM, and then ctrl-C to stop.
+IPC can be determined, hence cyc=1 can be added.
+Only kernel decoding is supported, so 'k' must be specified.
+Intel PT traces both the host and the guest so --guest and --host need to be specified.
+
+ $ sudo perf kvm --guest --host --guestkallsyms $KALLSYMS record --kcore -e intel_pt/cyc=1/k -p 16998
+ ^C[ perf record: Woken up 1 times to write data ]
+ [ perf record: Captured and wrote 9.041 MB perf.data.kvm ]
+
+Now 'perf inject' can be used to determine the VMX TCS Offset. Note, Intel PT TSC packets are
+only 7-bytes, so the TSC Offset might differ from the actual value in the 8th byte. That will
+have no effect i.e. the resulting timestamps will be correct anyway.
+
+ $ perf inject -i perf.data.kvm --vm-time-correlation=dry-run
+ ERROR: Unknown TSC Offset for VMCS 0x1bff6a
+ VMCS: 0x1bff6a TSC Offset 0xffffe42722c64c41
+ ERROR: Unknown TSC Offset for VMCS 0x1cbc08
+ VMCS: 0x1cbc08 TSC Offset 0xffffe42722c64c41
+ ERROR: Unknown TSC Offset for VMCS 0x1c3ce8
+ VMCS: 0x1c3ce8 TSC Offset 0xffffe42722c64c41
+ ERROR: Unknown TSC Offset for VMCS 0x1cbce9
+ VMCS: 0x1cbce9 TSC Offset 0xffffe42722c64c41
+
+Each virtual CPU has a different Virtual Machine Control Structure (VMCS)
+shown above with the calculated TSC Offset. For an unchanging TSC Offset
+they should all be the same for the same virtual machine.
+
+Now that the TSC Offset is known, it can be provided to 'perf inject'
+
+ $ perf inject -i perf.data.kvm --vm-time-correlation="dry-run 0xffffe42722c64c41"
+
+Note the options for 'perf inject' --vm-time-correlation are:
+
+ [ dry-run ] [ <TSC Offset> [ : <VMCS> [ , <VMCS> ]... ] ]...
+
+So it is possible to specify different TSC Offsets for different VMCS.
+The option "dry-run" will cause the file to be processed but without updating it.
+Note it is also possible to get a intel_pt.log file by adding option --itrace=d
+
+There were no errors so, do it for real
+
+ $ perf inject -i perf.data.kvm --vm-time-correlation=0xffffe42722c64c41 --force
+
+'perf script' can be used to see if there are any decoder errors
+
+ $ perf script -i perf.data.kvm --guestkallsyms $KALLSYMS --itrace=e-o
+
+There were none.
+
+'perf script' can be used to provide an instruction trace showing timestamps
+
+ $ perf script -i perf.data.kvm --guestkallsyms $KALLSYMS --insn-trace --xed -F+ipc | grep -C10 vmresume | head -21
+ CPU 1/KVM 17006 [001] 11500.262865593: ffffffff82133cdd __vmx_vcpu_run+0x3d ([kernel.kallsyms]) movq 0x48(%rax), %r9
+ CPU 1/KVM 17006 [001] 11500.262865593: ffffffff82133ce1 __vmx_vcpu_run+0x41 ([kernel.kallsyms]) movq 0x50(%rax), %r10
+ CPU 1/KVM 17006 [001] 11500.262865593: ffffffff82133ce5 __vmx_vcpu_run+0x45 ([kernel.kallsyms]) movq 0x58(%rax), %r11
+ CPU 1/KVM 17006 [001] 11500.262865593: ffffffff82133ce9 __vmx_vcpu_run+0x49 ([kernel.kallsyms]) movq 0x60(%rax), %r12
+ CPU 1/KVM 17006 [001] 11500.262865593: ffffffff82133ced __vmx_vcpu_run+0x4d ([kernel.kallsyms]) movq 0x68(%rax), %r13
+ CPU 1/KVM 17006 [001] 11500.262865593: ffffffff82133cf1 __vmx_vcpu_run+0x51 ([kernel.kallsyms]) movq 0x70(%rax), %r14
+ CPU 1/KVM 17006 [001] 11500.262865593: ffffffff82133cf5 __vmx_vcpu_run+0x55 ([kernel.kallsyms]) movq 0x78(%rax), %r15
+ CPU 1/KVM 17006 [001] 11500.262865593: ffffffff82133cf9 __vmx_vcpu_run+0x59 ([kernel.kallsyms]) movq (%rax), %rax
+ CPU 1/KVM 17006 [001] 11500.262865593: ffffffff82133cfc __vmx_vcpu_run+0x5c ([kernel.kallsyms]) callq 0xffffffff82133c40
+ CPU 1/KVM 17006 [001] 11500.262865593: ffffffff82133c40 vmx_vmenter+0x0 ([kernel.kallsyms]) jz 0xffffffff82133c46
+ CPU 1/KVM 17006 [001] 11500.262866075: ffffffff82133c42 vmx_vmenter+0x2 ([kernel.kallsyms]) vmresume IPC: 0.05 (40/769)
+ :17006 17006 [001] 11500.262869216: ffffffff82200cb0 asm_sysvec_apic_timer_interrupt+0x0 ([guest.kernel.kallsyms]) clac
+ :17006 17006 [001] 11500.262869216: ffffffff82200cb3 asm_sysvec_apic_timer_interrupt+0x3 ([guest.kernel.kallsyms]) pushq $0xffffffffffffffff
+ :17006 17006 [001] 11500.262869216: ffffffff82200cb5 asm_sysvec_apic_timer_interrupt+0x5 ([guest.kernel.kallsyms]) callq 0xffffffff82201160
+ :17006 17006 [001] 11500.262869216: ffffffff82201160 error_entry+0x0 ([guest.kernel.kallsyms]) cld
+ :17006 17006 [001] 11500.262869216: ffffffff82201161 error_entry+0x1 ([guest.kernel.kallsyms]) pushq %rsi
+ :17006 17006 [001] 11500.262869216: ffffffff82201162 error_entry+0x2 ([guest.kernel.kallsyms]) movq 0x8(%rsp), %rsi
+ :17006 17006 [001] 11500.262869216: ffffffff82201167 error_entry+0x7 ([guest.kernel.kallsyms]) movq %rdi, 0x8(%rsp)
+ :17006 17006 [001] 11500.262869216: ffffffff8220116c error_entry+0xc ([guest.kernel.kallsyms]) pushq %rdx
+ :17006 17006 [001] 11500.262869216: ffffffff8220116d error_entry+0xd ([guest.kernel.kallsyms]) pushq %rcx
+ :17006 17006 [001] 11500.262869216: ffffffff8220116e error_entry+0xe ([guest.kernel.kallsyms]) pushq %rax
+
SEE ALSO
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index ed3ecfa422e1..080981d38d7b 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -226,7 +226,7 @@ So, "source.c:100-120" shows lines between 100th to l20th in source.c file. And
LAZY MATCHING
-------------
- The lazy line matching is similar to glob matching but ignoring spaces in both of pattern and target. So this accepts wildcards('*', '?') and character classes(e.g. [a-z], [!A-Z]).
+The lazy line matching is similar to glob matching but ignoring spaces in both of pattern and target. So this accepts wildcards('*', '?') and character classes(e.g. [a-z], [!A-Z]).
e.g.
'a=*' can matches 'a=b', 'a = b', 'a == b' and so on.
@@ -235,8 +235,8 @@ This provides some sort of flexibility and robustness to probe point definitions
FILTER PATTERN
--------------
- The filter pattern is a glob matching pattern(s) to filter variables.
- In addition, you can use "!" for specifying filter-out rule. You also can give several rules combined with "&" or "|", and fold those rules as one rule by using "(" ")".
+The filter pattern is a glob matching pattern(s) to filter variables.
+In addition, you can use "!" for specifying filter-out rule. You also can give several rules combined with "&" or "|", and fold those rules as one rule by using "(" ")".
e.g.
With --filter "foo* | bar*", perf probe -V shows variables which start with "foo" or "bar".
@@ -295,6 +295,19 @@ Add a probe in a source file using special characters by backslash escape
./perf probe -x /opt/test/a.out 'foo\+bar.c:4'
+PERMISSIONS AND SYSCTL
+----------------------
+Since perf probe depends on ftrace (tracefs) and kallsyms (/proc/kallsyms), you have to care about the permission and some sysctl knobs.
+
+ - Since tracefs and kallsyms requires root or privileged user to access it, the following perf probe commands also require it; --add, --del, --list (except for --cache option)
+
+ - The system admin can remount the tracefs with 755 (`sudo mount -o remount,mode=755 /sys/kernel/tracing/`) to allow unprivileged user to run the perf probe --list command.
+
+ - /proc/sys/kernel/kptr_restrict = 2 (restrict all users) also prevents perf probe to retrieve the important information from kallsyms. You also need to set to 1 (restrict non CAP_SYSLOG users) for the above commands. Since the user-space probe doesn't need to access kallsyms, this is only for probing the kernel function (kprobes).
+
+ - Since the perf probe commands read the vmlinux (for kernel) and/or the debuginfo file (including user-space application), you need to ensure that you can read those files.
+
+
SEE ALSO
--------
linkperf:perf-trace[1], linkperf:perf-record[1], linkperf:perf-buildid-cache[1]
diff --git a/tools/perf/Documentation/perf-script-python.txt b/tools/perf/Documentation/perf-script-python.txt
index 0fb9eda3cbca..5e43cfa5ea1e 100644
--- a/tools/perf/Documentation/perf-script-python.txt
+++ b/tools/perf/Documentation/perf-script-python.txt
@@ -550,6 +550,27 @@ def trace_unhandled(event_name, context, event_fields_dict):
pass
----
+*process_event*, if defined, is called for any non-tracepoint event
+
+----
+def process_event(param_dict):
+ pass
+----
+
+*context_switch*, if defined, is called for any context switch
+
+----
+def context_switch(ts, cpu, pid, tid, np_pid, np_tid, machine_pid, out, out_preempt, *x):
+ pass
+----
+
+*auxtrace_error*, if defined, is called for any AUX area tracing error
+
+----
+def auxtrace_error(typ, code, cpu, pid, tid, ip, ts, msg, cpumode, *x):
+ pass
+----
+
The remaining sections provide descriptions of each of the available
built-in perf script Python modules and their associated functions.
@@ -592,12 +613,18 @@ common, but need to be made accessible to user scripts nonetheless.
perf_trace_context defines a set of functions that can be used to
access this data in the context of the current event. Each of these
functions expects a context variable, which is the same as the
-context variable passed into every event handler as the second
-argument.
+context variable passed into every tracepoint event handler as the second
+argument. For non-tracepoint events, the context variable is also present
+as perf_trace_context.perf_script_context .
common_pc(context) - returns common_preempt count for the current event
common_flags(context) - returns common_flags for the current event
common_lock_depth(context) - returns common_lock_depth for the current event
+ perf_sample_insn(context) - returns the machine code instruction
+ perf_set_itrace_options(context, itrace_options) - set --itrace options if they have not been set already
+ perf_sample_srcline(context) - returns source_file_name, line_number
+ perf_sample_srccode(context) - returns source_file_name, line_number, source_line
+
Util.py Module
~~~~~~~~~~~~~~
@@ -616,9 +643,20 @@ SUPPORTED FIELDS
Currently supported fields:
ev_name, comm, pid, tid, cpu, ip, time, period, phys_addr, addr,
-symbol, dso, time_enabled, time_running, values, callchain,
+symbol, symoff, dso, time_enabled, time_running, values, callchain,
brstack, brstacksym, datasrc, datasrc_decode, iregs, uregs,
-weight, transaction, raw_buf, attr.
+weight, transaction, raw_buf, attr, cpumode.
+
+Fields that may also be present:
+
+ flags - sample flags
+ flags_disp - sample flags display
+ insn_cnt - instruction count for determining instructions-per-cycle (IPC)
+ cyc_cnt - cycle count for determining IPC
+ addr_correlates_sym - addr can correlate to a symbol
+ addr_dso - addr dso
+ addr_symbol - addr symbol
+ addr_symoff - addr symbol offset
Some fields have sub items:
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 5b8b61075039..aa3a0b2c29a2 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -98,6 +98,18 @@ OPTIONS
Generate perf-script.[ext] starter script for given language,
using current perf.data.
+--dlfilter=<file>::
+ Filter sample events using the given shared object file.
+ Refer linkperf:perf-dlfilter[1]
+
+--dlarg=<arg>::
+ Pass 'arg' as an argument to the dlfilter. --dlarg may be repeated
+ to add more arguments.
+
+--list-dlfilters=::
+ Display a list of available dlfilters. Use with option -v (must come
+ before option --list-dlfilters) to show long descriptions.
+
-a::
Force system-wide collection. Scripts run without a <command>
normally use -a by default, while scripts run with a <command>
@@ -183,14 +195,15 @@ OPTIONS
At this point usage is displayed, and perf-script exits.
The flags field is synthesized and may have a value when Instruction
- Trace decoding. The flags are "bcrosyiABEx" which stand for branch,
+ Trace decoding. The flags are "bcrosyiABExgh" which stand for branch,
call, return, conditional, system, asynchronous, interrupt,
- transaction abort, trace begin, trace end, and in transaction,
+ transaction abort, trace begin, trace end, in transaction, VM-Entry, and VM-Exit
respectively. Known combinations of flags are printed more nicely e.g.
"call" for "bc", "return" for "br", "jcc" for "bo", "jmp" for "b",
"int" for "bci", "iret" for "bri", "syscall" for "bcs", "sysret" for "brs",
"async" for "by", "hw int" for "bcyi", "tx abrt" for "bA", "tr strt" for "bB",
- "tr end" for "bE". However the "x" flag will be display separately in those
+ "tr end" for "bE", "vmentry" for "bcg", "vmexit" for "bch".
+ However the "x" flag will be displayed separately in those
cases e.g. "jcc (x)" for a condition branch within a transaction.
The callindent field is synthesized and may have a value when
@@ -482,4 +495,5 @@ include::itrace.txt[]
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-script-perl[1],
-linkperf:perf-script-python[1], linkperf:perf-intel-pt[1]
+linkperf:perf-script-python[1], linkperf:perf-intel-pt[1],
+linkperf:perf-dlfilter[1]
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index bba5ffb05463..9898a32b8d9c 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -277,6 +277,18 @@ Default is to monitor all CPUS.
Record events of type PERF_RECORD_NAMESPACES and display it with the
'cgroup_id' sort key.
+-G name::
+--cgroup name::
+monitor only in the container (cgroup) called "name". This option is available only
+in per-cpu mode. The cgroup filesystem must be mounted. All threads belonging to
+container "name" are monitored when they run on the monitored CPUs. Multiple cgroups
+can be provided. Each cgroup is applied to the corresponding event, i.e., first cgroup
+to first event, second cgroup to second event and so on. It is possible to provide
+an empty cgroup (monitor all the time) using, e.g., -G foo,,bar. Cgroups must have
+corresponding events, i.e., they always refer to events defined earlier on the command
+line. If the user wants to track multiple events for a specific cgroup, the user can
+use '-e e1 -e e2 -G foo,foo' or just use '-e e1 -e e2 -G foo'.
+
--all-cgroups::
Record events of type PERF_RECORD_CGROUP and display it with the
'cgroup' sort key.
diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt
index 9ee96640744e..e6ff8c898ada 100644
--- a/tools/perf/Documentation/perf.data-file-format.txt
+++ b/tools/perf/Documentation/perf.data-file-format.txt
@@ -402,6 +402,39 @@ struct {
u64 clockid_time_ns;
};
+ HEADER_HYBRID_TOPOLOGY = 30,
+
+Indicate the hybrid CPUs. The format of data is as below.
+
+struct {
+ u32 nr;
+ struct {
+ char pmu_name[];
+ char cpus[];
+ } [nr]; /* Variable length records */
+};
+
+Example:
+ hybrid cpu system:
+ cpu_core cpu list : 0-15
+ cpu_atom cpu list : 16-23
+
+ HEADER_HYBRID_CPU_PMU_CAPS = 31,
+
+ A list of hybrid CPU PMU capabilities.
+
+struct {
+ u32 nr_pmu;
+ struct {
+ u32 nr_cpu_pmu_caps;
+ {
+ char name[];
+ char value[];
+ } [nr_cpu_pmu_caps];
+ char pmu_name[];
+ } [nr_pmu];
+};
+
other bits are reserved and should ignored for now
HEADER_FEAT_BITS = 256,
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 406a9519145e..eb8e487ef90b 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -90,7 +90,6 @@ endif
ifeq ($(ARCH),mips)
NO_PERF_REGS := 0
CFLAGS += -I$(OUTPUT)arch/mips/include/generated
- CFLAGS += -I../../arch/mips/include/uapi -I../../arch/mips/include/generated/uapi
LIBUNWIND_LIBS = -lunwind -lunwind-mips
endif
@@ -202,6 +201,12 @@ ifeq ($(call get-executable,$(BISON)),)
dummy := $(error Error: $(BISON) is missing on this system, please install it)
endif
+ifneq ($(OUTPUT),)
+ ifeq ($(shell expr $(shell $(BISON) --version | grep bison | sed -e 's/.\+ \([0-9]\+\).\([0-9]\+\).\([0-9]\+\)/\1\2\3/g') \>\= 371), 1)
+ BISON_FILE_PREFIX_MAP := --file-prefix-map=$(OUTPUT)=
+ endif
+endif
+
# Treat warnings as errors unless directed not to
ifneq ($(WERROR),0)
CORE_CFLAGS += -Werror
@@ -635,7 +640,7 @@ endif
ifdef BUILD_BPF_SKEL
$(call feature_check,clang-bpf-co-re)
ifeq ($(feature-clang-bpf-co-re), 0)
- dummy := $(error Error: clang too old. Please install recent clang)
+ dummy := $(error Error: clang too old/not installed. Please install recent clang to build with BUILD_BPF_SKEL)
endif
$(call detected,CONFIG_PERF_BPF_SKEL)
CFLAGS += -DHAVE_BPF_SKEL
@@ -1112,6 +1117,8 @@ prefix ?= $(HOME)
endif
bindir_relative = bin
bindir = $(abspath $(prefix)/$(bindir_relative))
+includedir_relative = include
+includedir = $(abspath $(prefix)/$(includedir_relative))
mandir = share/man
infodir = share/info
perfexecdir = libexec/perf-core
@@ -1144,6 +1151,7 @@ ETC_PERFCONFIG_SQ = $(subst ','\'',$(ETC_PERFCONFIG))
STRACE_GROUPS_DIR_SQ = $(subst ','\'',$(STRACE_GROUPS_DIR))
DESTDIR_SQ = $(subst ','\'',$(DESTDIR))
bindir_SQ = $(subst ','\'',$(bindir))
+includedir_SQ = $(subst ','\'',$(includedir))
mandir_SQ = $(subst ','\'',$(mandir))
infodir_SQ = $(subst ','\'',$(infodir))
perfexecdir_SQ = $(subst ','\'',$(perfexecdir))
@@ -1228,6 +1236,9 @@ $(call detected_var,LIBDIR)
$(call detected_var,GTK_CFLAGS)
$(call detected_var,PERL_EMBED_CCOPTS)
$(call detected_var,PYTHON_EMBED_CCOPTS)
+ifneq ($(BISON_FILE_PREFIX_MAP),)
+$(call detected_var,BISON_FILE_PREFIX_MAP)
+endif
# re-generate FEATURE-DUMP as we may have called feature_check, found out
# extra libraries to add to LDFLAGS of some other test and then redo those
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index e47f04e5b51e..c9e0de5b00c1 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -923,7 +923,9 @@ install-tools: all install-gtk
$(call QUIET_INSTALL, binaries) \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)'; \
$(INSTALL) $(OUTPUT)perf '$(DESTDIR_SQ)$(bindir_SQ)'; \
- $(LN) '$(DESTDIR_SQ)$(bindir_SQ)/perf' '$(DESTDIR_SQ)$(bindir_SQ)/trace'
+ $(LN) '$(DESTDIR_SQ)$(bindir_SQ)/perf' '$(DESTDIR_SQ)$(dir_SQ)/trace'; \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(includedir_SQ)/perf'; \
+ $(INSTALL) util/perf_dlfilter.h -t '$(DESTDIR_SQ)$(includedir_SQ)/perf'
ifndef NO_PERF_READ_VDSO32
$(call QUIET_INSTALL, perf-read-vdso32) \
$(INSTALL) $(OUTPUT)perf-read-vdso32 '$(DESTDIR_SQ)$(bindir_SQ)';
diff --git a/tools/perf/arch/arm/include/arch-tests.h b/tools/perf/arch/arm/include/arch-tests.h
index 90ec4c8cb880..c62538052404 100644
--- a/tools/perf/arch/arm/include/arch-tests.h
+++ b/tools/perf/arch/arm/include/arch-tests.h
@@ -2,11 +2,6 @@
#ifndef ARCH_TESTS_H
#define ARCH_TESTS_H
-#ifdef HAVE_DWARF_UNWIND_SUPPORT
-struct thread;
-struct perf_sample;
-#endif
-
extern struct test arch_tests[];
#endif
diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
index d942f118d32c..85168d87b2d7 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -38,8 +38,6 @@ struct cs_etm_recording {
struct auxtrace_record itr;
struct perf_pmu *cs_etm_pmu;
struct evlist *evlist;
- int wrapped_cnt;
- bool *wrapped;
bool snapshot_mode;
size_t snapshot_size;
};
@@ -734,135 +732,6 @@ static int cs_etm_info_fill(struct auxtrace_record *itr,
return 0;
}
-static int cs_etm_alloc_wrapped_array(struct cs_etm_recording *ptr, int idx)
-{
- bool *wrapped;
- int cnt = ptr->wrapped_cnt;
-
- /* Make @ptr->wrapped as big as @idx */
- while (cnt <= idx)
- cnt++;
-
- /*
- * Free'ed in cs_etm_recording_free(). Using realloc() to avoid
- * cross compilation problems where the host's system supports
- * reallocarray() but not the target.
- */
- wrapped = realloc(ptr->wrapped, cnt * sizeof(bool));
- if (!wrapped)
- return -ENOMEM;
-
- wrapped[cnt - 1] = false;
- ptr->wrapped_cnt = cnt;
- ptr->wrapped = wrapped;
-
- return 0;
-}
-
-static bool cs_etm_buffer_has_wrapped(unsigned char *buffer,
- size_t buffer_size, u64 head)
-{
- u64 i, watermark;
- u64 *buf = (u64 *)buffer;
- size_t buf_size = buffer_size;
-
- /*
- * We want to look the very last 512 byte (chosen arbitrarily) in
- * the ring buffer.
- */
- watermark = buf_size - 512;
-
- /*
- * @head is continuously increasing - if its value is equal or greater
- * than the size of the ring buffer, it has wrapped around.
- */
- if (head >= buffer_size)
- return true;
-
- /*
- * The value of @head is somewhere within the size of the ring buffer.
- * This can be that there hasn't been enough data to fill the ring
- * buffer yet or the trace time was so long that @head has numerically
- * wrapped around. To find we need to check if we have data at the very
- * end of the ring buffer. We can reliably do this because mmap'ed
- * pages are zeroed out and there is a fresh mapping with every new
- * session.
- */
-
- /* @head is less than 512 byte from the end of the ring buffer */
- if (head > watermark)
- watermark = head;
-
- /*
- * Speed things up by using 64 bit transactions (see "u64 *buf" above)
- */
- watermark >>= 3;
- buf_size >>= 3;
-
- /*
- * If we find trace data at the end of the ring buffer, @head has
- * been there and has numerically wrapped around at least once.
- */
- for (i = watermark; i < buf_size; i++)
- if (buf[i])
- return true;
-
- return false;
-}
-
-static int cs_etm_find_snapshot(struct auxtrace_record *itr,
- int idx, struct auxtrace_mmap *mm,
- unsigned char *data,
- u64 *head, u64 *old)
-{
- int err;
- bool wrapped;
- struct cs_etm_recording *ptr =
- container_of(itr, struct cs_etm_recording, itr);
-
- /*
- * Allocate memory to keep track of wrapping if this is the first
- * time we deal with this *mm.
- */
- if (idx >= ptr->wrapped_cnt) {
- err = cs_etm_alloc_wrapped_array(ptr, idx);
- if (err)
- return err;
- }
-
- /*
- * Check to see if *head has wrapped around. If it hasn't only the
- * amount of data between *head and *old is snapshot'ed to avoid
- * bloating the perf.data file with zeros. But as soon as *head has
- * wrapped around the entire size of the AUX ring buffer it taken.
- */
- wrapped = ptr->wrapped[idx];
- if (!wrapped && cs_etm_buffer_has_wrapped(data, mm->len, *head)) {
- wrapped = true;
- ptr->wrapped[idx] = true;
- }
-
- pr_debug3("%s: mmap index %d old head %zu new head %zu size %zu\n",
- __func__, idx, (size_t)*old, (size_t)*head, mm->len);
-
- /* No wrap has occurred, we can just use *head and *old. */
- if (!wrapped)
- return 0;
-
- /*
- * *head has wrapped around - adjust *head and *old to pickup the
- * entire content of the AUX buffer.
- */
- if (*head >= mm->len) {
- *old = *head - mm->len;
- } else {
- *head += mm->len;
- *old = *head - mm->len;
- }
-
- return 0;
-}
-
static int cs_etm_snapshot_start(struct auxtrace_record *itr)
{
struct cs_etm_recording *ptr =
@@ -900,7 +769,6 @@ static void cs_etm_recording_free(struct auxtrace_record *itr)
struct cs_etm_recording *ptr =
container_of(itr, struct cs_etm_recording, itr);
- zfree(&ptr->wrapped);
free(ptr);
}
@@ -928,7 +796,6 @@ struct auxtrace_record *cs_etm_record_init(int *err)
ptr->itr.recording_options = cs_etm_recording_options;
ptr->itr.info_priv_size = cs_etm_info_priv_size;
ptr->itr.info_fill = cs_etm_info_fill;
- ptr->itr.find_snapshot = cs_etm_find_snapshot;
ptr->itr.snapshot_start = cs_etm_snapshot_start;
ptr->itr.snapshot_finish = cs_etm_snapshot_finish;
ptr->itr.reference = cs_etm_reference;
diff --git a/tools/perf/arch/arm64/include/arch-tests.h b/tools/perf/arch/arm64/include/arch-tests.h
index 90ec4c8cb880..c62538052404 100644
--- a/tools/perf/arch/arm64/include/arch-tests.h
+++ b/tools/perf/arch/arm64/include/arch-tests.h
@@ -2,11 +2,6 @@
#ifndef ARCH_TESTS_H
#define ARCH_TESTS_H
-#ifdef HAVE_DWARF_UNWIND_SUPPORT
-struct thread;
-struct perf_sample;
-#endif
-
extern struct test arch_tests[];
#endif
diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c
index 414c8a5584b1..a4420d4df503 100644
--- a/tools/perf/arch/arm64/util/arm-spe.c
+++ b/tools/perf/arch/arm64/util/arm-spe.c
@@ -14,6 +14,7 @@
#include "../../../util/cpumap.h"
#include "../../../util/event.h"
#include "../../../util/evsel.h"
+#include "../../../util/evsel_config.h"
#include "../../../util/evlist.h"
#include "../../../util/session.h"
#include <internal/lib.h> // page_size
@@ -32,6 +33,29 @@ struct arm_spe_recording {
struct evlist *evlist;
};
+static void arm_spe_set_timestamp(struct auxtrace_record *itr,
+ struct evsel *evsel)
+{
+ struct arm_spe_recording *ptr;
+ struct perf_pmu *arm_spe_pmu;
+ struct evsel_config_term *term = evsel__get_config_term(evsel, CFG_CHG);
+ u64 user_bits = 0, bit;
+
+ ptr = container_of(itr, struct arm_spe_recording, itr);
+ arm_spe_pmu = ptr->arm_spe_pmu;
+
+ if (term)
+ user_bits = term->val.cfg_chg;
+
+ bit = perf_pmu__format_bits(&arm_spe_pmu->format, "ts_enable");
+
+ /* Skip if user has set it */
+ if (bit & user_bits)
+ return;
+
+ evsel->core.attr.config |= bit;
+}
+
static size_t
arm_spe_info_priv_size(struct auxtrace_record *itr __maybe_unused,
struct evlist *evlist __maybe_unused)
@@ -68,6 +92,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
container_of(itr, struct arm_spe_recording, itr);
struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu;
struct evsel *evsel, *arm_spe_evsel = NULL;
+ struct perf_cpu_map *cpus = evlist->core.cpus;
bool privileged = perf_event_paranoid_check(-1);
struct evsel *tracking_evsel;
int err;
@@ -91,7 +116,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
return 0;
/* We are in full trace mode but '-m,xyz' wasn't specified */
- if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) {
+ if (!opts->auxtrace_mmap_pages) {
if (privileged) {
opts->auxtrace_mmap_pages = MiB(4) / page_size;
} else {
@@ -120,9 +145,14 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
*/
evlist__to_front(evlist, arm_spe_evsel);
- evsel__set_sample_bit(arm_spe_evsel, CPU);
- evsel__set_sample_bit(arm_spe_evsel, TIME);
- evsel__set_sample_bit(arm_spe_evsel, TID);
+ /*
+ * In the case of per-cpu mmaps, sample CPU for AUX event;
+ * also enable the timestamp tracing for samples correlation.
+ */
+ if (!perf_cpu_map__empty(cpus)) {
+ evsel__set_sample_bit(arm_spe_evsel, CPU);
+ arm_spe_set_timestamp(itr, arm_spe_evsel);
+ }
/* Add dummy event to keep tracking */
err = parse_events(evlist, "dummy:u", NULL);
@@ -134,9 +164,10 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
tracking_evsel->core.attr.freq = 0;
tracking_evsel->core.attr.sample_period = 1;
- evsel__set_sample_bit(tracking_evsel, TIME);
- evsel__set_sample_bit(tracking_evsel, CPU);
- evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK);
+
+ /* In per-cpu case, always need the time of mmap events etc */
+ if (!perf_cpu_map__empty(cpus))
+ evsel__set_sample_bit(tracking_evsel, TIME);
return 0;
}
diff --git a/tools/perf/arch/arm64/util/mem-events.c b/tools/perf/arch/arm64/util/mem-events.c
index 2a2497372671..be41721b9aa1 100644
--- a/tools/perf/arch/arm64/util/mem-events.c
+++ b/tools/perf/arch/arm64/util/mem-events.c
@@ -20,7 +20,7 @@ struct perf_mem_event *perf_mem_events__ptr(int i)
return &perf_mem_events[i];
}
-char *perf_mem_events__name(int i)
+char *perf_mem_events__name(int i, char *pmu_name __maybe_unused)
{
struct perf_mem_event *e = perf_mem_events__ptr(i);
diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
index 9974f5f8e49b..9cd1c34f31b5 100644
--- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
+++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
@@ -357,7 +357,7 @@
440 n64 process_madvise sys_process_madvise
441 n64 epoll_pwait2 sys_epoll_pwait2
442 n64 mount_setattr sys_mount_setattr
-443 n64 quotactl_path sys_quotactl_path
+# 443 reserved for quotactl_path
444 n64 landlock_create_ruleset sys_landlock_create_ruleset
445 n64 landlock_add_rule sys_landlock_add_rule
446 n64 landlock_restrict_self sys_landlock_restrict_self
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index 2e68fbb57cc6..8f052ff4058c 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -522,7 +522,7 @@
440 common process_madvise sys_process_madvise
441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2
442 common mount_setattr sys_mount_setattr
-443 common quotactl_path sys_quotactl_path
+# 443 reserved for quotactl_path
444 common landlock_create_ruleset sys_landlock_create_ruleset
445 common landlock_add_rule sys_landlock_add_rule
446 common landlock_restrict_self sys_landlock_restrict_self
diff --git a/tools/perf/arch/powerpc/include/arch-tests.h b/tools/perf/arch/powerpc/include/arch-tests.h
index 1c7be75cbc78..c62538052404 100644
--- a/tools/perf/arch/powerpc/include/arch-tests.h
+++ b/tools/perf/arch/powerpc/include/arch-tests.h
@@ -2,13 +2,6 @@
#ifndef ARCH_TESTS_H
#define ARCH_TESTS_H
-#ifdef HAVE_DWARF_UNWIND_SUPPORT
-struct thread;
-struct perf_sample;
-int test__arch_unwind_sample(struct perf_sample *sample,
- struct thread *thread);
-#endif
-
extern struct test arch_tests[];
#endif
diff --git a/tools/perf/arch/powerpc/tests/dwarf-unwind.c b/tools/perf/arch/powerpc/tests/dwarf-unwind.c
index 8efd9ed9e9db..c9cb4b059392 100644
--- a/tools/perf/arch/powerpc/tests/dwarf-unwind.c
+++ b/tools/perf/arch/powerpc/tests/dwarf-unwind.c
@@ -7,7 +7,6 @@
#include "event.h"
#include "debug.h"
#include "tests/tests.h"
-#include "arch-tests.h"
#define STACK_SIZE 8192
diff --git a/tools/perf/arch/powerpc/util/mem-events.c b/tools/perf/arch/powerpc/util/mem-events.c
index 07fb5e049488..4120fafe0be4 100644
--- a/tools/perf/arch/powerpc/util/mem-events.c
+++ b/tools/perf/arch/powerpc/util/mem-events.c
@@ -3,7 +3,7 @@
#include "mem-events.h"
/* PowerPC does not support 'ldlat' parameter. */
-char *perf_mem_events__name(int i)
+char *perf_mem_events__name(int i, char *pmu_name __maybe_unused)
{
if (i == PERF_MEM_EVENTS__LOAD)
return (char *) "cpu/mem-loads/";
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index 7e4a2aba366d..0690263df1dd 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -445,7 +445,7 @@
440 common process_madvise sys_process_madvise sys_process_madvise
441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2
442 common mount_setattr sys_mount_setattr sys_mount_setattr
-443 common quotactl_path sys_quotactl_path sys_quotactl_path
+# 443 reserved for quotactl_path
444 common landlock_create_ruleset sys_landlock_create_ruleset sys_landlock_create_ruleset
445 common landlock_add_rule sys_landlock_add_rule sys_landlock_add_rule
446 common landlock_restrict_self sys_landlock_restrict_self sys_landlock_restrict_self
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index ecd551b08d05..ce18119ea0d0 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -364,7 +364,7 @@
440 common process_madvise sys_process_madvise
441 common epoll_pwait2 sys_epoll_pwait2
442 common mount_setattr sys_mount_setattr
-443 common quotactl_path sys_quotactl_path
+# 443 reserved for quotactl_path
444 common landlock_create_ruleset sys_landlock_create_ruleset
445 common landlock_add_rule sys_landlock_add_rule
446 common landlock_restrict_self sys_landlock_restrict_self
diff --git a/tools/perf/arch/x86/include/arch-tests.h b/tools/perf/arch/x86/include/arch-tests.h
index 0e20f3dc69f3..9599e7a3f1af 100644
--- a/tools/perf/arch/x86/include/arch-tests.h
+++ b/tools/perf/arch/x86/include/arch-tests.h
@@ -2,23 +2,15 @@
#ifndef ARCH_TESTS_H
#define ARCH_TESTS_H
-#include <linux/compiler.h>
struct test;
/* Tests */
-int test__rdpmc(struct test *test __maybe_unused, int subtest);
-int test__insn_x86(struct test *test __maybe_unused, int subtest);
+int test__rdpmc(struct test *test, int subtest);
+int test__insn_x86(struct test *test, int subtest);
int test__intel_pt_pkt_decoder(struct test *test, int subtest);
int test__bp_modify(struct test *test, int subtest);
int test__x86_sample_parsing(struct test *test, int subtest);
-#ifdef HAVE_DWARF_UNWIND_SUPPORT
-struct thread;
-struct perf_sample;
-int test__arch_unwind_sample(struct perf_sample *sample,
- struct thread *thread);
-#endif
-
extern struct test arch_tests[];
#endif
diff --git a/tools/perf/arch/x86/tests/dwarf-unwind.c b/tools/perf/arch/x86/tests/dwarf-unwind.c
index 478078fb0f22..a54dea7c112f 100644
--- a/tools/perf/arch/x86/tests/dwarf-unwind.c
+++ b/tools/perf/arch/x86/tests/dwarf-unwind.c
@@ -7,7 +7,6 @@
#include "event.h"
#include "debug.h"
#include "tests/tests.h"
-#include "arch-tests.h"
#define STACK_SIZE 8192
diff --git a/tools/perf/arch/x86/util/kvm-stat.c b/tools/perf/arch/x86/util/kvm-stat.c
index 072920475b65..c5dd54f6ef5e 100644
--- a/tools/perf/arch/x86/util/kvm-stat.c
+++ b/tools/perf/arch/x86/util/kvm-stat.c
@@ -133,11 +133,56 @@ static struct kvm_events_ops ioport_events = {
.name = "IO Port Access"
};
+ /* The time of emulation msr is from kvm_msr to kvm_entry. */
+static void msr_event_get_key(struct evsel *evsel,
+ struct perf_sample *sample,
+ struct event_key *key)
+{
+ key->key = evsel__intval(evsel, sample, "ecx");
+ key->info = evsel__intval(evsel, sample, "write");
+}
+
+static bool msr_event_begin(struct evsel *evsel,
+ struct perf_sample *sample,
+ struct event_key *key)
+{
+ if (!strcmp(evsel->name, "kvm:kvm_msr")) {
+ msr_event_get_key(evsel, sample, key);
+ return true;
+ }
+
+ return false;
+}
+
+static bool msr_event_end(struct evsel *evsel,
+ struct perf_sample *sample __maybe_unused,
+ struct event_key *key __maybe_unused)
+{
+ return kvm_entry_event(evsel);
+}
+
+static void msr_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused,
+ struct event_key *key,
+ char *decode)
+{
+ scnprintf(decode, decode_str_len, "%#llx:%s",
+ (unsigned long long)key->key,
+ key->info ? "W" : "R");
+}
+
+static struct kvm_events_ops msr_events = {
+ .is_begin_event = msr_event_begin,
+ .is_end_event = msr_event_end,
+ .decode_key = msr_event_decode_key,
+ .name = "MSR Access"
+};
+
const char *kvm_events_tp[] = {
"kvm:kvm_entry",
"kvm:kvm_exit",
"kvm:kvm_mmio",
"kvm:kvm_pio",
+ "kvm:kvm_msr",
NULL,
};
@@ -145,6 +190,7 @@ struct kvm_reg_events_ops kvm_reg_events_ops[] = {
{ .name = "vmexit", .ops = &exit_events },
{ .name = "mmio", .ops = &mmio_events },
{ .name = "ioport", .ops = &ioport_events },
+ { .name = "msr", .ops = &msr_events },
{ NULL, NULL },
};
diff --git a/tools/perf/arch/x86/util/mem-events.c b/tools/perf/arch/x86/util/mem-events.c
index 588110fd8904..5214370ca4e4 100644
--- a/tools/perf/arch/x86/util/mem-events.c
+++ b/tools/perf/arch/x86/util/mem-events.c
@@ -5,19 +5,41 @@
static char mem_loads_name[100];
static bool mem_loads_name__init;
+static char mem_stores_name[100];
#define MEM_LOADS_AUX 0x8203
-#define MEM_LOADS_AUX_NAME "{cpu/mem-loads-aux/,cpu/mem-loads,ldlat=%u/pp}:S"
+#define MEM_LOADS_AUX_NAME "{%s/mem-loads-aux/,%s/mem-loads,ldlat=%u/}:P"
+
+#define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s }
+
+static struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = {
+ E("ldlat-loads", "%s/mem-loads,ldlat=%u/P", "%s/events/mem-loads"),
+ E("ldlat-stores", "%s/mem-stores/P", "%s/events/mem-stores"),
+ E(NULL, NULL, NULL),
+};
+
+struct perf_mem_event *perf_mem_events__ptr(int i)
+{
+ if (i >= PERF_MEM_EVENTS__MAX)
+ return NULL;
+
+ return &perf_mem_events[i];
+}
bool is_mem_loads_aux_event(struct evsel *leader)
{
- if (!pmu_have_event("cpu", "mem-loads-aux"))
- return false;
+ if (perf_pmu__find("cpu")) {
+ if (!pmu_have_event("cpu", "mem-loads-aux"))
+ return false;
+ } else if (perf_pmu__find("cpu_core")) {
+ if (!pmu_have_event("cpu_core", "mem-loads-aux"))
+ return false;
+ }
return leader->core.attr.config == MEM_LOADS_AUX;
}
-char *perf_mem_events__name(int i)
+char *perf_mem_events__name(int i, char *pmu_name)
{
struct perf_mem_event *e = perf_mem_events__ptr(i);
@@ -25,20 +47,34 @@ char *perf_mem_events__name(int i)
return NULL;
if (i == PERF_MEM_EVENTS__LOAD) {
- if (mem_loads_name__init)
+ if (mem_loads_name__init && !pmu_name)
return mem_loads_name;
- mem_loads_name__init = true;
+ if (!pmu_name) {
+ mem_loads_name__init = true;
+ pmu_name = (char *)"cpu";
+ }
- if (pmu_have_event("cpu", "mem-loads-aux")) {
+ if (pmu_have_event(pmu_name, "mem-loads-aux")) {
scnprintf(mem_loads_name, sizeof(mem_loads_name),
- MEM_LOADS_AUX_NAME, perf_mem_events__loads_ldlat);
+ MEM_LOADS_AUX_NAME, pmu_name, pmu_name,
+ perf_mem_events__loads_ldlat);
} else {
scnprintf(mem_loads_name, sizeof(mem_loads_name),
- e->name, perf_mem_events__loads_ldlat);
+ e->name, pmu_name,
+ perf_mem_events__loads_ldlat);
}
return mem_loads_name;
}
+ if (i == PERF_MEM_EVENTS__STORE) {
+ if (!pmu_name)
+ pmu_name = (char *)"cpu";
+
+ scnprintf(mem_stores_name, sizeof(mem_stores_name),
+ e->name, pmu_name);
+ return mem_stores_name;
+ }
+
return (char *)e->name;
}
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 49627a7bed7c..cebb861be3e3 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -474,6 +474,9 @@ int cmd_annotate(int argc, const char **argv)
.attr = perf_event__process_attr,
.build_id = perf_event__process_build_id,
.tracing_data = perf_event__process_tracing_data,
+ .id_index = perf_event__process_id_index,
+ .auxtrace_info = perf_event__process_auxtrace_info,
+ .auxtrace = perf_event__process_auxtrace,
.feature = process_feature_event,
.ordered_events = true,
.ordering_requires_timestamps = true,
@@ -483,6 +486,9 @@ int cmd_annotate(int argc, const char **argv)
struct perf_data data = {
.mode = PERF_DATA_MODE_READ,
};
+ struct itrace_synth_opts itrace_synth_opts = {
+ .set = 0,
+ };
struct option options[] = {
OPT_STRING('i', "input", &input_name, "file",
"input file name"),
@@ -547,6 +553,9 @@ int cmd_annotate(int argc, const char **argv)
OPT_CALLBACK(0, "percent-type", &annotate.opts, "local-period",
"Set percent type local/global-period/hits",
annotate_parse_percent_type),
+ OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts",
+ "Instruction Tracing options\n" ITRACE_HELP,
+ itrace_parse_synth_opts),
OPT_END()
};
@@ -591,6 +600,8 @@ int cmd_annotate(int argc, const char **argv)
if (IS_ERR(annotate.session))
return PTR_ERR(annotate.session);
+ annotate.session->itrace_synth_opts = &itrace_synth_opts;
+
annotate.has_br_stack = perf_header__has_feat(&annotate.session->header,
HEADER_BRANCH_STACK);
diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c
index 87f5b1a4a7fa..833405c27dae 100644
--- a/tools/perf/builtin-buildid-list.c
+++ b/tools/perf/builtin-buildid-list.c
@@ -80,6 +80,9 @@ static int perf_session__list_build_ids(bool force, bool with_hits)
if (!perf_header__has_feat(&session->header, HEADER_BUILD_ID))
with_hits = true;
+ if (zstd_init(&(session->zstd_data), 0) < 0)
+ pr_warning("Decompression initialization failed. Reported data may be incomplete.\n");
+
/*
* in pipe-mode, the only way to get the buildids is to parse
* the record stream. Buildids are stored as RECORD_HEADER_BUILD_ID
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index e3b9d63077ef..6dea37f141b2 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -42,6 +42,8 @@
#include "ui/ui.h"
#include "ui/progress.h"
#include "../perf.h"
+#include "pmu.h"
+#include "pmu-hybrid.h"
struct c2c_hists {
struct hists hists;
@@ -2907,8 +2909,9 @@ static const char * const *record_mem_usage = __usage_record;
static int perf_c2c__record(int argc, const char **argv)
{
- int rec_argc, i = 0, j;
+ int rec_argc, i = 0, j, rec_tmp_nr = 0;
const char **rec_argv;
+ char **rec_tmp;
int ret;
bool all_user = false, all_kernel = false;
bool event_set = false;
@@ -2932,11 +2935,21 @@ static int perf_c2c__record(int argc, const char **argv)
argc = parse_options(argc, argv, options, record_mem_usage,
PARSE_OPT_KEEP_UNKNOWN);
- rec_argc = argc + 11; /* max number of arguments */
+ if (!perf_pmu__has_hybrid())
+ rec_argc = argc + 11; /* max number of arguments */
+ else
+ rec_argc = argc + 11 * perf_pmu__hybrid_pmu_num();
+
rec_argv = calloc(rec_argc + 1, sizeof(char *));
if (!rec_argv)
return -1;
+ rec_tmp = calloc(rec_argc + 1, sizeof(char *));
+ if (!rec_tmp) {
+ free(rec_argv);
+ return -1;
+ }
+
rec_argv[i++] = "record";
if (!event_set) {
@@ -2964,21 +2977,9 @@ static int perf_c2c__record(int argc, const char **argv)
rec_argv[i++] = "--phys-data";
rec_argv[i++] = "--sample-cpu";
- for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
- e = perf_mem_events__ptr(j);
- if (!e->record)
- continue;
-
- if (!e->supported) {
- pr_err("failed: event '%s' not supported\n",
- perf_mem_events__name(j));
- free(rec_argv);
- return -1;
- }
-
- rec_argv[i++] = "-e";
- rec_argv[i++] = perf_mem_events__name(j);
- }
+ ret = perf_mem_events__record_args(rec_argv, &i, rec_tmp, &rec_tmp_nr);
+ if (ret)
+ goto out;
if (all_user)
rec_argv[i++] = "--all-user";
@@ -3002,6 +3003,11 @@ static int perf_c2c__record(int argc, const char **argv)
}
ret = cmd_record(i, rec_argv);
+out:
+ for (i = 0; i < rec_tmp_nr; i++)
+ free(rec_tmp[i]);
+
+ free(rec_tmp);
free(rec_argv);
return ret;
}
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index ddccc0eb7390..5d6f583e2cd3 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -31,6 +31,7 @@
#include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
#include <linux/list.h>
+#include <linux/string.h>
#include <errno.h>
#include <signal.h>
@@ -43,6 +44,8 @@ struct perf_inject {
bool have_auxtrace;
bool strip;
bool jit_mode;
+ bool in_place_update;
+ bool in_place_update_dry_run;
const char *input_name;
struct perf_data output;
u64 bytes_written;
@@ -380,8 +383,8 @@ static int perf_event__repipe_buildid_mmap(struct perf_tool *tool,
if (dso && !dso->hit) {
dso->hit = 1;
dso__inject_build_id(dso, tool, machine, sample->cpumode, 0);
- dso__put(dso);
}
+ dso__put(dso);
return perf_event__repipe(tool, event, sample, machine);
}
@@ -396,6 +399,18 @@ static int perf_event__repipe_mmap2(struct perf_tool *tool,
err = perf_event__process_mmap2(tool, event, sample, machine);
perf_event__repipe(tool, event, sample, machine);
+ if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) {
+ struct dso *dso;
+
+ dso = findnew_dso(event->mmap2.pid, event->mmap2.tid,
+ event->mmap2.filename, NULL, machine);
+ if (dso) {
+ /* mark it not to inject build-id */
+ dso->hit = 1;
+ }
+ dso__put(dso);
+ }
+
return err;
}
@@ -437,6 +452,18 @@ static int perf_event__repipe_buildid_mmap2(struct perf_tool *tool,
};
struct dso *dso;
+ if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) {
+ /* cannot use dso_id since it'd have invalid info */
+ dso = findnew_dso(event->mmap2.pid, event->mmap2.tid,
+ event->mmap2.filename, NULL, machine);
+ if (dso) {
+ /* mark it not to inject build-id */
+ dso->hit = 1;
+ }
+ dso__put(dso);
+ return 0;
+ }
+
dso = findnew_dso(event->mmap2.pid, event->mmap2.tid,
event->mmap2.filename, &dso_id, machine);
@@ -444,8 +471,8 @@ static int perf_event__repipe_buildid_mmap2(struct perf_tool *tool,
dso->hit = 1;
dso__inject_build_id(dso, tool, machine, sample->cpumode,
event->mmap2.flags);
- dso__put(dso);
}
+ dso__put(dso);
perf_event__repipe(tool, event, sample, machine);
@@ -696,12 +723,42 @@ static void strip_init(struct perf_inject *inject)
evsel->handler = drop_sample;
}
+static int parse_vm_time_correlation(const struct option *opt, const char *str, int unset)
+{
+ struct perf_inject *inject = opt->value;
+ const char *args;
+ char *dry_run;
+
+ if (unset)
+ return 0;
+
+ inject->itrace_synth_opts.set = true;
+ inject->itrace_synth_opts.vm_time_correlation = true;
+ inject->in_place_update = true;
+
+ if (!str)
+ return 0;
+
+ dry_run = skip_spaces(str);
+ if (!strncmp(dry_run, "dry-run", strlen("dry-run"))) {
+ inject->itrace_synth_opts.vm_tm_corr_dry_run = true;
+ inject->in_place_update_dry_run = true;
+ args = dry_run + strlen("dry-run");
+ } else {
+ args = str;
+ }
+
+ inject->itrace_synth_opts.vm_tm_corr_args = strdup(args);
+
+ return inject->itrace_synth_opts.vm_tm_corr_args ? 0 : -ENOMEM;
+}
+
static int __cmd_inject(struct perf_inject *inject)
{
int ret = -EINVAL;
struct perf_session *session = inject->session;
struct perf_data *data_out = &inject->output;
- int fd = perf_data__fd(data_out);
+ int fd = inject->in_place_update ? -1 : perf_data__fd(data_out);
u64 output_data_offset;
signal(SIGINT, sig_handler);
@@ -737,6 +794,15 @@ static int __cmd_inject(struct perf_inject *inject)
else if (!strncmp(name, "sched:sched_stat_", 17))
evsel->handler = perf_inject__sched_stat;
}
+ } else if (inject->itrace_synth_opts.vm_time_correlation) {
+ session->itrace_synth_opts = &inject->itrace_synth_opts;
+ memset(&inject->tool, 0, sizeof(inject->tool));
+ inject->tool.id_index = perf_event__process_id_index;
+ inject->tool.auxtrace_info = perf_event__process_auxtrace_info;
+ inject->tool.auxtrace = perf_event__process_auxtrace;
+ inject->tool.auxtrace_error = perf_event__process_auxtrace_error;
+ inject->tool.ordered_events = true;
+ inject->tool.ordering_requires_timestamps = true;
} else if (inject->itrace_synth_opts.set) {
session->itrace_synth_opts = &inject->itrace_synth_opts;
inject->itrace_synth_opts.inject = true;
@@ -759,14 +825,14 @@ static int __cmd_inject(struct perf_inject *inject)
if (!inject->itrace_synth_opts.set)
auxtrace_index__free(&session->auxtrace_index);
- if (!data_out->is_pipe)
+ if (!data_out->is_pipe && !inject->in_place_update)
lseek(fd, output_data_offset, SEEK_SET);
ret = perf_session__process_events(session);
if (ret)
return ret;
- if (!data_out->is_pipe) {
+ if (!data_out->is_pipe && !inject->in_place_update) {
if (inject->build_ids)
perf_header__set_feat(&session->header,
HEADER_BUILD_ID);
@@ -878,6 +944,9 @@ int cmd_inject(int argc, const char **argv)
itrace_parse_synth_opts),
OPT_BOOLEAN(0, "strip", &inject.strip,
"strip non-synthesized events (use with --itrace)"),
+ OPT_CALLBACK_OPTARG(0, "vm-time-correlation", &inject, NULL, "opts",
+ "correlate time between VM guests and the host",
+ parse_vm_time_correlation),
OPT_END()
};
const char * const inject_usage[] = {
@@ -900,7 +969,23 @@ int cmd_inject(int argc, const char **argv)
return -1;
}
- if (perf_data__open(&inject.output)) {
+ if (inject.in_place_update) {
+ if (!strcmp(inject.input_name, "-")) {
+ pr_err("Input file name required for in-place updating\n");
+ return -1;
+ }
+ if (strcmp(inject.output.path, "-")) {
+ pr_err("Output file name must not be specified for in-place updating\n");
+ return -1;
+ }
+ if (!data.force && !inject.in_place_update_dry_run) {
+ pr_err("The input file would be updated in place, "
+ "the --force option is required.\n");
+ return -1;
+ }
+ if (!inject.in_place_update_dry_run)
+ data.in_place_update = true;
+ } else if (perf_data__open(&inject.output)) {
perror("failed to create output file");
return -1;
}
@@ -950,5 +1035,6 @@ int cmd_inject(int argc, const char **argv)
out_delete:
zstd_fini(&(inject.session->zstd_data));
perf_session__delete(inject.session);
+ free(inject.itrace_synth_opts.vm_tm_corr_args);
return ret;
}
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index cdd2b9f643f6..0fd2a74dbaca 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -18,6 +18,8 @@
#include "util/dso.h"
#include "util/map.h"
#include "util/symbol.h"
+#include "util/pmu.h"
+#include "util/pmu-hybrid.h"
#include <linux/err.h>
#define MEM_OPERATION_LOAD 0x1
@@ -62,8 +64,10 @@ static const char * const *record_mem_usage = __usage;
static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
{
- int rec_argc, i = 0, j;
+ int rec_argc, i = 0, j, tmp_nr = 0;
+ int start, end;
const char **rec_argv;
+ char **rec_tmp;
int ret;
bool all_user = false, all_kernel = false;
struct perf_mem_event *e;
@@ -87,11 +91,24 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
argc = parse_options(argc, argv, options, record_mem_usage,
PARSE_OPT_KEEP_UNKNOWN);
- rec_argc = argc + 9; /* max number of arguments */
+ if (!perf_pmu__has_hybrid())
+ rec_argc = argc + 9; /* max number of arguments */
+ else
+ rec_argc = argc + 9 * perf_pmu__hybrid_pmu_num();
+
rec_argv = calloc(rec_argc + 1, sizeof(char *));
if (!rec_argv)
return -1;
+ /*
+ * Save the allocated event name strings.
+ */
+ rec_tmp = calloc(rec_argc + 1, sizeof(char *));
+ if (!rec_tmp) {
+ free(rec_argv);
+ return -1;
+ }
+
rec_argv[i++] = "record";
e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD_STORE);
@@ -128,21 +145,11 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
if (mem->data_page_size)
rec_argv[i++] = "--data-page-size";
- for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
- e = perf_mem_events__ptr(j);
- if (!e->record)
- continue;
-
- if (!e->supported) {
- pr_err("failed: event '%s' not supported\n",
- perf_mem_events__name(j));
- free(rec_argv);
- return -1;
- }
-
- rec_argv[i++] = "-e";
- rec_argv[i++] = perf_mem_events__name(j);
- }
+ start = i;
+ ret = perf_mem_events__record_args(rec_argv, &i, rec_tmp, &tmp_nr);
+ if (ret)
+ goto out;
+ end = i;
if (all_user)
rec_argv[i++] = "--all-user";
@@ -156,14 +163,18 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
if (verbose > 0) {
pr_debug("calling: record ");
- while (rec_argv[j]) {
+ for (j = start; j < end; j++)
pr_debug("%s ", rec_argv[j]);
- j++;
- }
+
pr_debug("\n");
}
ret = cmd_record(i, rec_argv);
+out:
+ for (i = 0; i < tmp_nr; i++)
+ free(rec_tmp[i]);
+
+ free(rec_tmp);
free(rec_argv);
return ret;
}
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index 6b1507566770..2bfd41df621c 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -347,7 +347,10 @@ static int perf_add_probe_events(struct perf_probe_event *pevs, int npevs)
goto out_cleanup;
if (params.command == 'D') { /* it shows definition */
- ret = show_probe_trace_events(pevs, npevs);
+ if (probe_conf.bootconfig)
+ ret = show_bootconfig_events(pevs, npevs);
+ else
+ ret = show_probe_trace_events(pevs, npevs);
goto out_cleanup;
}
@@ -581,6 +584,8 @@ __cmd_probe(int argc, const char **argv)
"Look for files with symbols relative to this directory"),
OPT_CALLBACK(0, "target-ns", NULL, "pid",
"target pid for namespace contexts", opt_set_target_ns),
+ OPT_BOOLEAN(0, "bootconfig", &probe_conf.bootconfig,
+ "Output probe definition with bootconfig format"),
OPT_END()
};
int ret;
@@ -692,6 +697,11 @@ __cmd_probe(int argc, const char **argv)
}
break;
case 'D':
+ if (probe_conf.bootconfig && params.uprobes) {
+ pr_err(" Error: --bootconfig doesn't support uprobes.\n");
+ return -EINVAL;
+ }
+ __fallthrough;
case 'a':
/* Ensure the last given target is used */
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 3337b5f93336..71efe6573ee7 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -969,6 +969,15 @@ out:
return rc;
}
+static void set_timestamp_boundary(struct record *rec, u64 sample_time)
+{
+ if (rec->evlist->first_sample_time == 0)
+ rec->evlist->first_sample_time = sample_time;
+
+ if (sample_time)
+ rec->evlist->last_sample_time = sample_time;
+}
+
static int process_sample_event(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
@@ -977,10 +986,7 @@ static int process_sample_event(struct perf_tool *tool,
{
struct record *rec = container_of(tool, struct record, tool);
- if (rec->evlist->first_sample_time == 0)
- rec->evlist->first_sample_time = sample->time;
-
- rec->evlist->last_sample_time = sample->time;
+ set_timestamp_boundary(rec, sample->time);
if (rec->buildid_all)
return 0;
@@ -2402,6 +2408,17 @@ static int build_id__process_mmap2(struct perf_tool *tool, union perf_event *eve
return perf_event__process_mmap2(tool, event, sample, machine);
}
+static int process_timestamp_boundary(struct perf_tool *tool,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample,
+ struct machine *machine __maybe_unused)
+{
+ struct record *rec = container_of(tool, struct record, tool);
+
+ set_timestamp_boundary(rec, sample->time);
+ return 0;
+}
+
/*
* XXX Ideally would be local to cmd_record() and passed to a record__new
* because we need to have access to it in record__exit, that is called
@@ -2436,6 +2453,8 @@ static struct record record = {
.namespaces = perf_event__process_namespaces,
.mmap = build_id__process_mmap,
.mmap2 = build_id__process_mmap2,
+ .itrace_start = process_timestamp_boundary,
+ .aux = process_timestamp_boundary,
.ordered_events = true,
},
};
@@ -2714,6 +2733,12 @@ int cmd_record(int argc, const char **argv)
rec->no_buildid = true;
}
+ if (rec->opts.record_cgroup && !perf_can_record_cgroup()) {
+ pr_err("Kernel has no cgroup sampling support.\n");
+ err = -EINVAL;
+ goto out_opts;
+ }
+
if (rec->opts.kcore)
rec->data.is_dir = true;
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 36f9ccfeb38a..bc5c393021dc 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -934,6 +934,8 @@ static int __cmd_report(struct report *rep)
return ret;
}
+ evlist__check_mem_load_aux(session->evlist);
+
if (rep->stats_mode)
return stats_print(rep);
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 1280cbfad4db..2030936cc891 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -55,6 +55,7 @@
#include <subcmd/pager.h>
#include <perf/evlist.h>
#include <linux/err.h>
+#include "util/dlfilter.h"
#include "util/record.h"
#include "util/util.h"
#include "perf.h"
@@ -79,6 +80,9 @@ static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
static struct perf_stat_config stat_config;
static int max_blocks;
static bool native_arch;
+static struct dlfilter *dlfilter;
+static int dlargc;
+static char **dlargv;
unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH;
@@ -1337,17 +1341,18 @@ static const char *resolve_branch_sym(struct perf_sample *sample,
struct evsel *evsel,
struct thread *thread,
struct addr_location *al,
+ struct addr_location *addr_al,
u64 *ip)
{
- struct addr_location addr_al;
struct perf_event_attr *attr = &evsel->core.attr;
const char *name = NULL;
if (sample->flags & (PERF_IP_FLAG_CALL | PERF_IP_FLAG_TRACE_BEGIN)) {
if (sample_addr_correlates_sym(attr)) {
- thread__resolve(thread, &addr_al, sample);
- if (addr_al.sym)
- name = addr_al.sym->name;
+ if (!addr_al->thread)
+ thread__resolve(thread, addr_al, sample);
+ if (addr_al->sym)
+ name = addr_al->sym->name;
else
*ip = sample->addr;
} else {
@@ -1365,7 +1370,9 @@ static const char *resolve_branch_sym(struct perf_sample *sample,
static int perf_sample__fprintf_callindent(struct perf_sample *sample,
struct evsel *evsel,
struct thread *thread,
- struct addr_location *al, FILE *fp)
+ struct addr_location *al,
+ struct addr_location *addr_al,
+ FILE *fp)
{
struct perf_event_attr *attr = &evsel->core.attr;
size_t depth = thread_stack__depth(thread, sample->cpu);
@@ -1382,7 +1389,7 @@ static int perf_sample__fprintf_callindent(struct perf_sample *sample,
if (thread->ts && sample->flags & PERF_IP_FLAG_RETURN)
depth += 1;
- name = resolve_branch_sym(sample, evsel, thread, al, &ip);
+ name = resolve_branch_sym(sample, evsel, thread, al, addr_al, &ip);
if (PRINT_FIELD(DSO) && !(PRINT_FIELD(IP) || PRINT_FIELD(ADDR))) {
dlen += fprintf(fp, "(");
@@ -1417,6 +1424,13 @@ __weak void arch_fetch_insn(struct perf_sample *sample __maybe_unused,
{
}
+void script_fetch_insn(struct perf_sample *sample, struct thread *thread,
+ struct machine *machine)
+{
+ if (sample->insn_len == 0 && native_arch)
+ arch_fetch_insn(sample, thread, machine);
+}
+
static int perf_sample__fprintf_insn(struct perf_sample *sample,
struct perf_event_attr *attr,
struct thread *thread,
@@ -1424,8 +1438,7 @@ static int perf_sample__fprintf_insn(struct perf_sample *sample,
{
int printed = 0;
- if (sample->insn_len == 0 && native_arch)
- arch_fetch_insn(sample, thread, machine);
+ script_fetch_insn(sample, thread, machine);
if (PRINT_FIELD(INSNLEN))
printed += fprintf(fp, " ilen: %d", sample->insn_len);
@@ -1460,6 +1473,7 @@ static int perf_sample__fprintf_bts(struct perf_sample *sample,
struct evsel *evsel,
struct thread *thread,
struct addr_location *al,
+ struct addr_location *addr_al,
struct machine *machine, FILE *fp)
{
struct perf_event_attr *attr = &evsel->core.attr;
@@ -1468,7 +1482,7 @@ static int perf_sample__fprintf_bts(struct perf_sample *sample,
int printed = 0;
if (PRINT_FIELD(CALLINDENT))
- printed += perf_sample__fprintf_callindent(sample, evsel, thread, al, fp);
+ printed += perf_sample__fprintf_callindent(sample, evsel, thread, al, addr_al, fp);
/* print branch_from information */
if (PRINT_FIELD(IP)) {
@@ -1553,41 +1567,49 @@ static const char *sample_flags_to_name(u32 flags)
return NULL;
}
-static int perf_sample__fprintf_flags(u32 flags, FILE *fp)
+int perf_sample__sprintf_flags(u32 flags, char *str, size_t sz)
{
const char *chars = PERF_IP_FLAG_CHARS;
- const int n = strlen(PERF_IP_FLAG_CHARS);
+ const size_t n = strlen(PERF_IP_FLAG_CHARS);
bool in_tx = flags & PERF_IP_FLAG_IN_TX;
const char *name = NULL;
- char str[33];
- int i, pos = 0;
+ size_t i, pos = 0;
name = sample_flags_to_name(flags & ~PERF_IP_FLAG_IN_TX);
if (name)
- return fprintf(fp, " %-15s%4s ", name, in_tx ? "(x)" : "");
+ return snprintf(str, sz, "%-15s%4s", name, in_tx ? "(x)" : "");
if (flags & PERF_IP_FLAG_TRACE_BEGIN) {
name = sample_flags_to_name(flags & ~(PERF_IP_FLAG_IN_TX | PERF_IP_FLAG_TRACE_BEGIN));
if (name)
- return fprintf(fp, " tr strt %-7s%4s ", name, in_tx ? "(x)" : "");
+ return snprintf(str, sz, "tr strt %-7s%4s", name, in_tx ? "(x)" : "");
}
if (flags & PERF_IP_FLAG_TRACE_END) {
name = sample_flags_to_name(flags & ~(PERF_IP_FLAG_IN_TX | PERF_IP_FLAG_TRACE_END));
if (name)
- return fprintf(fp, " tr end %-7s%4s ", name, in_tx ? "(x)" : "");
+ return snprintf(str, sz, "tr end %-7s%4s", name, in_tx ? "(x)" : "");
}
for (i = 0; i < n; i++, flags >>= 1) {
- if (flags & 1)
+ if ((flags & 1) && pos < sz)
str[pos++] = chars[i];
}
for (; i < 32; i++, flags >>= 1) {
- if (flags & 1)
+ if ((flags & 1) && pos < sz)
str[pos++] = '?';
}
- str[pos] = 0;
+ if (pos < sz)
+ str[pos] = 0;
+
+ return pos;
+}
+static int perf_sample__fprintf_flags(u32 flags, FILE *fp)
+{
+ char str[SAMPLE_FLAGS_BUF_SIZE];
+
+ perf_sample__sprintf_flags(flags, str, sizeof(str));
return fprintf(fp, " %-19s ", str);
}
@@ -1917,7 +1939,8 @@ static void perf_sample__fprint_metric(struct perf_script *script,
static bool show_event(struct perf_sample *sample,
struct evsel *evsel,
struct thread *thread,
- struct addr_location *al)
+ struct addr_location *al,
+ struct addr_location *addr_al)
{
int depth = thread_stack__depth(thread, sample->cpu);
@@ -1933,7 +1956,7 @@ static bool show_event(struct perf_sample *sample,
} else {
const char *s = symbol_conf.graph_function;
u64 ip;
- const char *name = resolve_branch_sym(sample, evsel, thread, al,
+ const char *name = resolve_branch_sym(sample, evsel, thread, al, addr_al,
&ip);
unsigned nlen;
@@ -1958,6 +1981,7 @@ static bool show_event(struct perf_sample *sample,
static void process_event(struct perf_script *script,
struct perf_sample *sample, struct evsel *evsel,
struct addr_location *al,
+ struct addr_location *addr_al,
struct machine *machine)
{
struct thread *thread = al->thread;
@@ -1970,12 +1994,6 @@ static void process_event(struct perf_script *script,
if (output[type].fields == 0)
return;
- if (!show_event(sample, evsel, thread, al))
- return;
-
- if (evswitch__discard(&script->evswitch, evsel))
- return;
-
++es->samples;
perf_sample__fprintf_start(script, sample, thread, evsel,
@@ -1997,7 +2015,7 @@ static void process_event(struct perf_script *script,
perf_sample__fprintf_flags(sample->flags, fp);
if (is_bts_event(attr)) {
- perf_sample__fprintf_bts(sample, evsel, thread, al, machine, fp);
+ perf_sample__fprintf_bts(sample, evsel, thread, al, addr_al, machine, fp);
return;
}
@@ -2160,10 +2178,23 @@ static int process_sample_event(struct perf_tool *tool,
{
struct perf_script *scr = container_of(tool, struct perf_script, tool);
struct addr_location al;
+ struct addr_location addr_al;
+ int ret = 0;
+
+ /* Set thread to NULL to indicate addr_al and al are not initialized */
+ addr_al.thread = NULL;
+ al.thread = NULL;
+
+ ret = dlfilter__filter_event_early(dlfilter, event, sample, evsel, machine, &al, &addr_al);
+ if (ret) {
+ if (ret > 0)
+ ret = 0;
+ goto out_put;
+ }
if (perf_time__ranges_skip_sample(scr->ptime_range, scr->range_num,
sample->time)) {
- return 0;
+ goto out_put;
}
if (debug_mode) {
@@ -2174,29 +2205,53 @@ static int process_sample_event(struct perf_tool *tool,
nr_unordered++;
}
last_timestamp = sample->time;
- return 0;
+ goto out_put;
}
+ if (filter_cpu(sample))
+ goto out_put;
+
if (machine__resolve(machine, &al, sample) < 0) {
pr_err("problem processing %d event, skipping it.\n",
event->header.type);
- return -1;
+ ret = -1;
+ goto out_put;
}
if (al.filtered)
goto out_put;
- if (filter_cpu(sample))
+ if (!show_event(sample, evsel, al.thread, &al, &addr_al))
goto out_put;
- if (scripting_ops)
- scripting_ops->process_event(event, sample, evsel, &al);
- else
- process_event(scr, sample, evsel, &al, machine);
+ if (evswitch__discard(&scr->evswitch, evsel))
+ goto out_put;
+
+ ret = dlfilter__filter_event(dlfilter, event, sample, evsel, machine, &al, &addr_al);
+ if (ret) {
+ if (ret > 0)
+ ret = 0;
+ goto out_put;
+ }
+
+ if (scripting_ops) {
+ struct addr_location *addr_al_ptr = NULL;
+
+ if ((evsel->core.attr.sample_type & PERF_SAMPLE_ADDR) &&
+ sample_addr_correlates_sym(&evsel->core.attr)) {
+ if (!addr_al.thread)
+ thread__resolve(al.thread, &addr_al, sample);
+ addr_al_ptr = &addr_al;
+ }
+ scripting_ops->process_event(event, sample, evsel, &al, addr_al_ptr);
+ } else {
+ process_event(scr, sample, evsel, &al, &addr_al, machine);
+ }
out_put:
- addr_location__put(&al);
- return 0;
+ if (al.thread)
+ addr_location__put(&al);
+ return ret;
}
static int process_attr(struct perf_tool *tool, union perf_event *event,
@@ -2415,6 +2470,17 @@ static int process_switch_event(struct perf_tool *tool,
sample->tid);
}
+static int process_auxtrace_error(struct perf_session *session,
+ union perf_event *event)
+{
+ if (scripting_ops && scripting_ops->process_auxtrace_error) {
+ scripting_ops->process_auxtrace_error(session, event);
+ return 0;
+ }
+
+ return perf_event__process_auxtrace_error(session, event);
+}
+
static int
process_lost_event(struct perf_tool *tool,
union perf_event *event,
@@ -2554,6 +2620,8 @@ static int __cmd_script(struct perf_script *script)
}
if (script->show_switch_events || (scripting_ops && scripting_ops->process_switch))
script->tool.context_switch = process_switch_event;
+ if (scripting_ops && scripting_ops->process_auxtrace_error)
+ script->tool.auxtrace_error = process_auxtrace_error;
if (script->show_namespace_events)
script->tool.namespaces = process_namespaces_event;
if (script->show_cgroup_events)
@@ -2665,6 +2733,37 @@ static void list_available_languages(void)
fprintf(stderr, "\n");
}
+/* Find script file relative to current directory or exec path */
+static char *find_script(const char *script)
+{
+ char path[PATH_MAX];
+
+ if (!scripting_ops) {
+ const char *ext = strrchr(script, '.');
+
+ if (!ext)
+ return NULL;
+
+ scripting_ops = script_spec__lookup(++ext);
+ if (!scripting_ops)
+ return NULL;
+ }
+
+ if (access(script, R_OK)) {
+ char *exec_path = get_argv_exec_path();
+
+ if (!exec_path)
+ return NULL;
+ snprintf(path, sizeof(path), "%s/scripts/%s/%s",
+ exec_path, scripting_ops->dirname, script);
+ free(exec_path);
+ script = path;
+ if (access(script, R_OK))
+ return NULL;
+ }
+ return strdup(script);
+}
+
static int parse_scriptname(const struct option *opt __maybe_unused,
const char *str, int unset __maybe_unused)
{
@@ -2706,7 +2805,9 @@ static int parse_scriptname(const struct option *opt __maybe_unused,
}
}
- script_name = strdup(script);
+ script_name = find_script(script);
+ if (!script_name)
+ script_name = strdup(script);
return 0;
}
@@ -3076,6 +3177,34 @@ static int list_available_scripts(const struct option *opt __maybe_unused,
exit(0);
}
+static int add_dlarg(const struct option *opt __maybe_unused,
+ const char *s, int unset __maybe_unused)
+{
+ char *arg = strdup(s);
+ void *a;
+
+ if (!arg)
+ return -1;
+
+ a = realloc(dlargv, sizeof(dlargv[0]) * (dlargc + 1));
+ if (!a) {
+ free(arg);
+ return -1;
+ }
+
+ dlargv = a;
+ dlargv[dlargc++] = arg;
+
+ return 0;
+}
+
+static void free_dlarg(void)
+{
+ while (dlargc--)
+ free(dlargv[dlargc]);
+ free(dlargv);
+}
+
/*
* Some scripts specify the required events in their "xxx-record" file,
* this function will check if the events in perf.data match those
@@ -3489,6 +3618,7 @@ int cmd_script(int argc, const char **argv)
};
struct utsname uts;
char *script_path = NULL;
+ const char *dlfilter_file = NULL;
const char **__argv;
int i, j, err = 0;
struct perf_script script = {
@@ -3531,11 +3661,16 @@ int cmd_script(int argc, const char **argv)
"show latency attributes (irqs/preemption disabled, etc)"),
OPT_CALLBACK_NOOPT('l', "list", NULL, NULL, "list available scripts",
list_available_scripts),
+ OPT_CALLBACK_NOOPT(0, "list-dlfilters", NULL, NULL, "list available dlfilters",
+ list_available_dlfilters),
OPT_CALLBACK('s', "script", NULL, "name",
"script file name (lang:script name, script name, or *)",
parse_scriptname),
OPT_STRING('g', "gen-script", &generate_script_lang, "lang",
"generate perf-script.xx script in specified language"),
+ OPT_STRING(0, "dlfilter", &dlfilter_file, "file", "filter .so file name"),
+ OPT_CALLBACK(0, "dlarg", NULL, "argument", "filter argument",
+ add_dlarg),
OPT_STRING('i', "input", &input_name, "file", "input file name"),
OPT_BOOLEAN('d', "debug-mode", &debug_mode,
"do various checks like samples ordering and lost events"),
@@ -3718,6 +3853,12 @@ int cmd_script(int argc, const char **argv)
rep_script_path = get_script_path(argv[0], REPORT_SUFFIX);
if (!rec_script_path && !rep_script_path) {
+ script_name = find_script(argv[0]);
+ if (script_name) {
+ argc -= 1;
+ argv += 1;
+ goto script_found;
+ }
usage_with_options_msg(script_usage, options,
"Couldn't find script `%s'\n\n See perf"
" script -l for available scripts.\n", argv[0]);
@@ -3810,7 +3951,7 @@ int cmd_script(int argc, const char **argv)
free(__argv);
exit(-1);
}
-
+script_found:
if (rec_script_path)
script_path = rec_script_path;
if (rep_script_path)
@@ -3848,6 +3989,12 @@ int cmd_script(int argc, const char **argv)
exit(-1);
}
+ if (dlfilter_file) {
+ dlfilter = dlfilter__new(dlfilter_file, dlargc, dlargv);
+ if (!dlfilter)
+ return -1;
+ }
+
if (!script_name) {
setup_pager();
use_browser = 0;
@@ -3947,8 +4094,12 @@ int cmd_script(int argc, const char **argv)
goto out_delete;
}
+ err = dlfilter__start(dlfilter, session);
+ if (err)
+ goto out_delete;
+
if (script_name) {
- err = scripting_ops->start_script(script_name, argc, argv);
+ err = scripting_ops->start_script(script_name, argc, argv, session);
if (err)
goto out_delete;
pr_debug("perf script started with script %s\n\n", script_name);
@@ -3996,6 +4147,8 @@ out_delete:
if (script_started)
cleanup_scripting();
+ dlfilter__cleanup(dlfilter);
+ free_dlarg();
out:
return err;
}
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 5a830ae09418..f9f74a514315 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -572,7 +572,8 @@ static int enable_counters(void)
* - we have initial delay configured
*/
if (!target__none(&target) || stat_config.initial_delay) {
- evlist__enable(evsel_list);
+ if (!all_counters_use_bpf)
+ evlist__enable(evsel_list);
if (stat_config.initial_delay > 0)
pr_info(EVLIST_ENABLED_MSG);
}
@@ -581,13 +582,19 @@ static int enable_counters(void)
static void disable_counters(void)
{
+ struct evsel *counter;
+
/*
* If we don't have tracee (attaching to task or cpu), counters may
* still be running. To get accurate group ratios, we must stop groups
* from counting before reading their constituent counters.
*/
- if (!target__none(&target))
- evlist__disable(evsel_list);
+ if (!target__none(&target)) {
+ evlist__for_each_entry(evsel_list, counter)
+ bpf_counter__disable(counter);
+ if (!all_counters_use_bpf)
+ evlist__disable(evsel_list);
+ }
}
static volatile int workload_exec_errno;
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 69cb3635f5ef..2d570bfe7a56 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -22,6 +22,7 @@
#include "util/annotate.h"
#include "util/bpf-event.h"
+#include "util/cgroup.h"
#include "util/config.h"
#include "util/color.h"
#include "util/dso.h"
@@ -1558,6 +1559,8 @@ int cmd_top(int argc, const char **argv)
OPT_BOOLEAN(0, "force", &symbol_conf.force, "don't complain, do it"),
OPT_UINTEGER(0, "num-thread-synthesize", &top.nr_threads_synthesize,
"number of thread to run event synthesize"),
+ OPT_CALLBACK('G', "cgroup", &top.evlist, "name",
+ "monitor event in cgroup name only", parse_cgroups),
OPT_BOOLEAN(0, "namespaces", &opts->record_namespaces,
"Record namespaces events"),
OPT_BOOLEAN(0, "all-cgroups", &opts->record_cgroup,
@@ -1646,6 +1649,11 @@ int cmd_top(int argc, const char **argv)
goto out_delete_evlist;
}
+ if (nr_cgroups > 0 && opts->record_cgroup) {
+ pr_err("--cgroup and --all-cgroups cannot be used together\n");
+ goto out_delete_evlist;
+ }
+
if (opts->branch_stack && callchain_param.enabled)
symbol_conf.show_branchflag_count = true;
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index dd8ff287e930..c783558332b8 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -39,6 +39,7 @@ arch/x86/lib/x86-opcode-map.txt
arch/x86/tools/gen-insn-attr-x86.awk
arch/arm/include/uapi/asm/perf_regs.h
arch/arm64/include/uapi/asm/perf_regs.h
+arch/mips/include/uapi/asm/perf_regs.h
arch/powerpc/include/uapi/asm/perf_regs.h
arch/s390/include/uapi/asm/perf_regs.h
arch/x86/include/uapi/asm/perf_regs.h
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 20cb91ef06ff..2f6b67189b42 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -443,6 +443,8 @@ int main(int argc, const char **argv)
const char *cmd;
char sbuf[STRERR_BUFSIZE];
+ perf_debug_setup();
+
/* libsubcmd init */
exec_cmd_init("perf", PREFIX, PERF_EXEC_PATH, EXEC_PATH_ENVIRONMENT);
pager_init(PERF_PAGER_ENVIRONMENT);
@@ -531,8 +533,6 @@ int main(int argc, const char **argv)
*/
pthread__block_sigwinch();
- perf_debug_setup();
-
while (1) {
static int done_help;
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/cache.json b/tools/perf/pmu-events/arch/powerpc/power10/cache.json
index 616f29098c71..605be14f441c 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/cache.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/cache.json
@@ -1,46 +1,56 @@
[
{
- "EventCode": "1003C",
+ "EventCode": "0x1003C",
"EventName": "PM_EXEC_STALL_DMISS_L2L3",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from either the local L2 or local L3."
},
{
- "EventCode": "34056",
+ "EventCode": "0x1E054",
+ "EventName": "PM_EXEC_STALL_DMISS_L21_L31",
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from another core's L2 or L3 on the same chip."
+ },
+ {
+ "EventCode": "0x34054",
+ "EventName": "PM_EXEC_STALL_DMISS_L2L3_NOCONFLICT",
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local L2 or local L3, without a dispatch conflict."
+ },
+ {
+ "EventCode": "0x34056",
"EventName": "PM_EXEC_STALL_LOAD_FINISH",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline was finishing a load after its data was reloaded from a data source beyond the local L1; cycles in which the LSU was processing an L1-hit; cycles in which the NTF instruction merged with another load in the LMQ."
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline was finishing a load after its data was reloaded from a data source beyond the local L1; cycles in which the LSU was processing an L1-hit; cycles in which the NTF instruction merged with another load in the LMQ; cycles in which the NTF instruction is waiting for a data reload for a load miss, but the data comes back with a non-NTF instruction."
},
{
- "EventCode": "3006C",
+ "EventCode": "0x3006C",
"EventName": "PM_RUN_CYC_SMT2_MODE",
"BriefDescription": "Cycles when this thread's run latch is set and the core is in SMT2 mode."
},
{
- "EventCode": "300F4",
+ "EventCode": "0x300F4",
"EventName": "PM_RUN_INST_CMPL_CONC",
"BriefDescription": "PowerPC instructions completed by this thread when all threads in the core had the run-latch set."
},
{
- "EventCode": "4C016",
+ "EventCode": "0x4C016",
"EventName": "PM_EXEC_STALL_DMISS_L2L3_CONFLICT",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local L2 or local L3, with a dispatch conflict."
},
{
- "EventCode": "4D014",
+ "EventCode": "0x4D014",
"EventName": "PM_EXEC_STALL_LOAD",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was a load instruction executing in the Load Store Unit."
},
{
- "EventCode": "4D016",
+ "EventCode": "0x4D016",
"EventName": "PM_EXEC_STALL_PTESYNC",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was a PTESYNC instruction executing in the Load Store Unit."
},
{
- "EventCode": "401EA",
+ "EventCode": "0x401EA",
"EventName": "PM_THRESH_EXC_128",
"BriefDescription": "Threshold counter exceeded a value of 128."
},
{
- "EventCode": "400F6",
+ "EventCode": "0x400F6",
"EventName": "PM_BR_MPRED_CMPL",
"BriefDescription": "A mispredicted branch completed. Includes direction and target."
}
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/floating_point.json b/tools/perf/pmu-events/arch/powerpc/power10/floating_point.json
index 703cd431ae5b..54acb55e2c8c 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/floating_point.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/floating_point.json
@@ -1,6 +1,6 @@
[
{
- "EventCode": "4016E",
+ "EventCode": "0x4016E",
"EventName": "PM_THRESH_NOT_MET",
"BriefDescription": "Threshold counter did not meet threshold."
}
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/frontend.json b/tools/perf/pmu-events/arch/powerpc/power10/frontend.json
index eac8609dcc90..558f9530f54e 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/frontend.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/frontend.json
@@ -1,216 +1,246 @@
[
{
- "EventCode": "10004",
+ "EventCode": "0x10004",
"EventName": "PM_EXEC_STALL_TRANSLATION",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline suffered a TLB miss or ERAT miss and waited for it to resolve."
},
{
- "EventCode": "10010",
+ "EventCode": "0x10006",
+ "EventName": "PM_DISP_STALL_HELD_OTHER_CYC",
+ "BriefDescription": "Cycles in which the NTC instruction is held at dispatch for any other reason."
+ },
+ {
+ "EventCode": "0x10010",
"EventName": "PM_PMC4_OVERFLOW",
"BriefDescription": "The event selected for PMC4 caused the event counter to overflow."
},
{
- "EventCode": "10020",
+ "EventCode": "0x10020",
"EventName": "PM_PMC4_REWIND",
"BriefDescription": "The speculative event selected for PMC4 rewinds and the counter for PMC4 is not charged."
},
{
- "EventCode": "10038",
+ "EventCode": "0x10038",
"EventName": "PM_DISP_STALL_TRANSLATION",
"BriefDescription": "Cycles when dispatch was stalled for this thread because the MMU was handling a translation miss."
},
{
- "EventCode": "1003A",
+ "EventCode": "0x1003A",
"EventName": "PM_DISP_STALL_BR_MPRED_IC_L2",
"BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L2 after suffering a branch mispredict."
},
{
- "EventCode": "1E050",
+ "EventCode": "0x1D05E",
+ "EventName": "PM_DISP_STALL_HELD_HALT_CYC",
+ "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because of power management."
+ },
+ {
+ "EventCode": "0x1E050",
"EventName": "PM_DISP_STALL_HELD_STF_MAPPER_CYC",
"BriefDescription": "Cycles in which the NTC instruction is held at dispatch because the STF mapper/SRB was full. Includes GPR (count, link, tar), VSR, VMR, FPR."
},
{
- "EventCode": "1F054",
+ "EventCode": "0x1F054",
"EventName": "PM_DTLB_HIT",
"BriefDescription": "The PTE required by the instruction was resident in the TLB (data TLB access). When MMCR1[16]=0 this event counts only demand hits. When MMCR1[16]=1 this event includes demand and prefetch. Applies to both HPT and RPT."
},
{
- "EventCode": "101E8",
+ "EventCode": "0x10064",
+ "EventName": "PM_DISP_STALL_IC_L2",
+ "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L2."
+ },
+ {
+ "EventCode": "0x101E8",
"EventName": "PM_THRESH_EXC_256",
"BriefDescription": "Threshold counter exceeded a count of 256."
},
{
- "EventCode": "101EC",
+ "EventCode": "0x101EC",
"EventName": "PM_THRESH_MET",
"BriefDescription": "Threshold exceeded."
},
{
- "EventCode": "100F2",
+ "EventCode": "0x100F2",
"EventName": "PM_1PLUS_PPC_CMPL",
"BriefDescription": "Cycles in which at least one instruction is completed by this thread."
},
{
- "EventCode": "100F6",
+ "EventCode": "0x100F6",
"EventName": "PM_IERAT_MISS",
"BriefDescription": "IERAT Reloaded to satisfy an IERAT miss. All page sizes are counted by this event."
},
{
- "EventCode": "100F8",
+ "EventCode": "0x100F8",
"EventName": "PM_DISP_STALL_CYC",
"BriefDescription": "Cycles the ICT has no itags assigned to this thread (no instructions were dispatched during these cycles)."
},
{
- "EventCode": "20114",
+ "EventCode": "0x20006",
+ "EventName": "PM_DISP_STALL_HELD_ISSQ_FULL_CYC",
+ "BriefDescription": "Cycles in which the NTC instruction is held at dispatch due to Issue queue full. Includes issue queue and branch queue."
+ },
+ {
+ "EventCode": "0x20114",
"EventName": "PM_MRK_L2_RC_DISP",
"BriefDescription": "Marked instruction RC dispatched in L2."
},
{
- "EventCode": "2C010",
+ "EventCode": "0x2C010",
"EventName": "PM_EXEC_STALL_LSU",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the Load Store Unit. This does not include simple fixed point instructions."
},
{
- "EventCode": "2C016",
+ "EventCode": "0x2C016",
"EventName": "PM_DISP_STALL_IERAT_ONLY_MISS",
"BriefDescription": "Cycles when dispatch was stalled while waiting to resolve an instruction ERAT miss."
},
{
- "EventCode": "2C01E",
+ "EventCode": "0x2C01E",
"EventName": "PM_DISP_STALL_BR_MPRED_IC_L3",
"BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L3 after suffering a branch mispredict."
},
{
- "EventCode": "2D01A",
+ "EventCode": "0x2D01A",
"EventName": "PM_DISP_STALL_IC_MISS",
"BriefDescription": "Cycles when dispatch was stalled for this thread due to an Icache Miss."
},
{
- "EventCode": "2D01C",
- "EventName": "PM_CMPL_STALL_STCX",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a stcx waiting for resolution from the nest before completing."
- },
- {
- "EventCode": "2E018",
+ "EventCode": "0x2E018",
"EventName": "PM_DISP_STALL_FETCH",
"BriefDescription": "Cycles when dispatch was stalled for this thread because Fetch was being held."
},
{
- "EventCode": "2E01A",
+ "EventCode": "0x2E01A",
"EventName": "PM_DISP_STALL_HELD_XVFC_MAPPER_CYC",
"BriefDescription": "Cycles in which the NTC instruction is held at dispatch because the XVFC mapper/SRB was full."
},
{
- "EventCode": "2C142",
+ "EventCode": "0x2C142",
"EventName": "PM_MRK_XFER_FROM_SRC_PMC2",
"BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[15:27]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
},
{
- "EventCode": "24050",
+ "EventCode": "0x24050",
"EventName": "PM_IOPS_DISP",
"BriefDescription": "Internal Operations dispatched. PM_IOPS_DISP / PM_INST_DISP will show the average number of internal operations per PowerPC instruction."
},
{
- "EventCode": "2405E",
+ "EventCode": "0x2405E",
"EventName": "PM_ISSUE_CANCEL",
"BriefDescription": "An instruction issued and the issue was later cancelled. Only one cancel per PowerPC instruction."
},
{
- "EventCode": "200FA",
+ "EventCode": "0x200FA",
"EventName": "PM_BR_TAKEN_CMPL",
"BriefDescription": "Branch Taken instruction completed."
},
{
- "EventCode": "30012",
+ "EventCode": "0x30004",
+ "EventName": "PM_DISP_STALL_FLUSH",
+ "BriefDescription": "Cycles when dispatch was stalled because of a flush that happened to an instruction(s) that was not yet NTC. PM_EXEC_STALL_NTC_FLUSH only includes instructions that were flushed after becoming NTC."
+ },
+ {
+ "EventCode": "0x3000A",
+ "EventName": "PM_DISP_STALL_ITLB_MISS",
+ "BriefDescription": "Cycles when dispatch was stalled while waiting to resolve an instruction TLB miss."
+ },
+ {
+ "EventCode": "0x30012",
"EventName": "PM_FLUSH_COMPLETION",
"BriefDescription": "The instruction that was next to complete (oldest in the pipeline) did not complete because it suffered a flush."
},
{
- "EventCode": "30014",
+ "EventCode": "0x30014",
"EventName": "PM_EXEC_STALL_STORE",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was a store instruction executing in the Load Store Unit."
},
{
- "EventCode": "30018",
+ "EventCode": "0x30018",
"EventName": "PM_DISP_STALL_HELD_SCOREBOARD_CYC",
"BriefDescription": "Cycles in which the NTC instruction is held at dispatch while waiting on the Scoreboard. This event combines VSCR and FPSCR together."
},
{
- "EventCode": "30026",
+ "EventCode": "0x30026",
"EventName": "PM_EXEC_STALL_STORE_MISS",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was a store whose cache line was not resident in the L1 and was waiting for allocation of the missing line into the L1."
},
{
- "EventCode": "3012A",
+ "EventCode": "0x3012A",
"EventName": "PM_MRK_L2_RC_DONE",
"BriefDescription": "L2 RC machine completed the transaction for the marked instruction."
},
{
- "EventCode": "3F046",
+ "EventCode": "0x3F046",
"EventName": "PM_ITLB_HIT_1G",
"BriefDescription": "Instruction TLB hit (IERAT reload) page size 1G, which implies Radix Page Table translation is in use. When MMCR1[17]=0 this event counts only for demand misses. When MMCR1[17]=1 this event includes demand misses and prefetches."
},
{
- "EventCode": "34058",
+ "EventCode": "0x34058",
"EventName": "PM_DISP_STALL_BR_MPRED_ICMISS",
"BriefDescription": "Cycles when dispatch was stalled after a mispredicted branch resulted in an instruction cache miss."
},
{
- "EventCode": "3D05C",
+ "EventCode": "0x3D05C",
"EventName": "PM_DISP_STALL_HELD_RENAME_CYC",
"BriefDescription": "Cycles in which the NTC instruction is held at dispatch because the mapper/SRB was full. Includes GPR (count, link, tar), VSR, VMR, FPR and XVFC."
},
{
- "EventCode": "3E052",
+ "EventCode": "0x3E052",
"EventName": "PM_DISP_STALL_IC_L3",
"BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L3."
},
{
- "EventCode": "3E054",
+ "EventCode": "0x3E054",
"EventName": "PM_LD_MISS_L1",
"BriefDescription": "Load Missed L1, counted at execution time (can be greater than loads finished). LMQ merges are not included in this count. i.e. if a load instruction misses on an address that is already allocated on the LMQ, this event will not increment for that load). Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load."
},
{
- "EventCode": "301EA",
+ "EventCode": "0x301EA",
"EventName": "PM_THRESH_EXC_1024",
"BriefDescription": "Threshold counter exceeded a value of 1024."
},
{
- "EventCode": "300FA",
+ "EventCode": "0x300FA",
"EventName": "PM_INST_FROM_L3MISS",
"BriefDescription": "The processor's instruction cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss."
},
{
- "EventCode": "40006",
+ "EventCode": "0x40006",
"EventName": "PM_ISSUE_KILL",
"BriefDescription": "Cycles in which an instruction or group of instructions were cancelled after being issued. This event increments once per occurrence, regardless of how many instructions are included in the issue group."
},
{
- "EventCode": "40116",
+ "EventCode": "0x40116",
"EventName": "PM_MRK_LARX_FIN",
"BriefDescription": "Marked load and reserve instruction (LARX) finished. LARX and STCX are instructions used to acquire a lock."
},
{
- "EventCode": "4C010",
+ "EventCode": "0x4C010",
"EventName": "PM_DISP_STALL_BR_MPRED_IC_L3MISS",
"BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from sources beyond the local L3 after suffering a mispredicted branch."
},
{
- "EventCode": "4D01E",
+ "EventCode": "0x4D01E",
"EventName": "PM_DISP_STALL_BR_MPRED",
"BriefDescription": "Cycles when dispatch was stalled for this thread due to a mispredicted branch."
},
{
- "EventCode": "4E010",
+ "EventCode": "0x4E010",
"EventName": "PM_DISP_STALL_IC_L3MISS",
"BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from any source beyond the local L3."
},
{
- "EventCode": "4E01A",
+ "EventCode": "0x4E01A",
"EventName": "PM_DISP_STALL_HELD_CYC",
"BriefDescription": "Cycles in which the NTC instruction is held at dispatch for any reason."
},
{
- "EventCode": "44056",
+ "EventCode": "0x4003C",
+ "EventName": "PM_DISP_STALL_HELD_SYNC_CYC",
+ "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because of a synchronizing instruction that requires the ICT to be empty before dispatch."
+ },
+ {
+ "EventCode": "0x44056",
"EventName": "PM_VECTOR_ST_CMPL",
"BriefDescription": "Vector store instructions completed."
}
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/locks.json b/tools/perf/pmu-events/arch/powerpc/power10/locks.json
index 016d8de0e14a..b5a0d6521963 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/locks.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/locks.json
@@ -1,11 +1,11 @@
[
{
- "EventCode": "1E058",
+ "EventCode": "0x1E058",
"EventName": "PM_STCX_FAIL_FIN",
"BriefDescription": "Conditional store instruction (STCX) failed. LARX and STCX are instructions used to acquire a lock."
},
{
- "EventCode": "4E050",
+ "EventCode": "0x4E050",
"EventName": "PM_STCX_PASS_FIN",
"BriefDescription": "Conditional store instruction (STCX) passed. LARX and STCX are instructions used to acquire a lock."
}
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/marked.json b/tools/perf/pmu-events/arch/powerpc/power10/marked.json
index 93a5a5910648..58b5dfe3a273 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/marked.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/marked.json
@@ -1,146 +1,141 @@
[
{
- "EventCode": "1002C",
+ "EventCode": "0x1002C",
"EventName": "PM_LD_PREFETCH_CACHE_LINE_MISS",
"BriefDescription": "The L1 cache was reloaded with a line that fulfills a prefetch request."
},
{
- "EventCode": "10132",
+ "EventCode": "0x10132",
"EventName": "PM_MRK_INST_ISSUED",
"BriefDescription": "Marked instruction issued. Note that stores always get issued twice, the address gets issued to the LSU and the data gets issued to the VSU. Also, issues can sometimes get killed/cancelled and cause multiple sequential issues for the same instruction."
},
{
- "EventCode": "101E0",
+ "EventCode": "0x101E0",
"EventName": "PM_MRK_INST_DISP",
"BriefDescription": "The thread has dispatched a randomly sampled marked instruction."
},
{
- "EventCode": "101E2",
+ "EventCode": "0x101E2",
"EventName": "PM_MRK_BR_TAKEN_CMPL",
"BriefDescription": "Marked Branch Taken instruction completed."
},
{
- "EventCode": "20112",
+ "EventCode": "0x20112",
"EventName": "PM_MRK_NTF_FIN",
"BriefDescription": "The marked instruction became the oldest in the pipeline before it finished. It excludes instructions that finish at dispatch."
},
{
- "EventCode": "2C01C",
+ "EventCode": "0x2C01C",
"EventName": "PM_EXEC_STALL_DMISS_OFF_CHIP",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from a remote chip."
},
{
- "EventCode": "20138",
+ "EventCode": "0x20138",
"EventName": "PM_MRK_ST_NEST",
"BriefDescription": "A store has been sampled/marked and is at the point of execution where it has completed in the core and can no longer be flushed. At this point the store is sent to the L2."
},
{
- "EventCode": "2013A",
+ "EventCode": "0x2013A",
"EventName": "PM_MRK_BRU_FIN",
"BriefDescription": "Marked Branch instruction finished."
},
{
- "EventCode": "2C144",
+ "EventCode": "0x2C144",
"EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC2",
"BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[15:27]."
},
{
- "EventCode": "24156",
+ "EventCode": "0x24156",
"EventName": "PM_MRK_STCX_FIN",
"BriefDescription": "Marked conditional store instruction (STCX) finished. LARX and STCX are instructions used to acquire a lock."
},
{
- "EventCode": "24158",
+ "EventCode": "0x24158",
"EventName": "PM_MRK_INST",
"BriefDescription": "An instruction was marked. Includes both Random Instruction Sampling (RIS) at decode time and Random Event Sampling (RES) at the time the configured event happens."
},
{
- "EventCode": "2415C",
+ "EventCode": "0x2415C",
"EventName": "PM_MRK_BR_CMPL",
"BriefDescription": "A marked branch completed. All branches are included."
},
{
- "EventCode": "200FD",
+ "EventCode": "0x200FD",
"EventName": "PM_L1_ICACHE_MISS",
"BriefDescription": "Demand iCache Miss."
},
{
- "EventCode": "30130",
+ "EventCode": "0x30130",
"EventName": "PM_MRK_INST_FIN",
"BriefDescription": "marked instruction finished. Excludes instructions that finish at dispatch. Note that stores always finish twice since the address gets issued to the LSU and the data gets issued to the VSU."
},
{
- "EventCode": "34146",
+ "EventCode": "0x34146",
"EventName": "PM_MRK_LD_CMPL",
"BriefDescription": "Marked loads completed."
},
{
- "EventCode": "3E158",
+ "EventCode": "0x3E158",
"EventName": "PM_MRK_STCX_FAIL",
"BriefDescription": "Marked conditional store instruction (STCX) failed. LARX and STCX are instructions used to acquire a lock."
},
{
- "EventCode": "3E15A",
+ "EventCode": "0x3E15A",
"EventName": "PM_MRK_ST_FIN",
"BriefDescription": "The marked instruction was a store of any kind."
},
{
- "EventCode": "30068",
+ "EventCode": "0x30068",
"EventName": "PM_L1_ICACHE_RELOADED_PREF",
"BriefDescription": "Counts all Icache prefetch reloads ( includes demand turned into prefetch)."
},
{
- "EventCode": "301E4",
+ "EventCode": "0x301E4",
"EventName": "PM_MRK_BR_MPRED_CMPL",
"BriefDescription": "Marked Branch Mispredicted. Includes direction and target."
},
{
- "EventCode": "300F6",
+ "EventCode": "0x300F6",
"EventName": "PM_LD_DEMAND_MISS_L1",
"BriefDescription": "The L1 cache was reloaded with a line that fulfills a demand miss request. Counted at reload time, before finish."
},
{
- "EventCode": "300FE",
+ "EventCode": "0x300FE",
"EventName": "PM_DATA_FROM_L3MISS",
"BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss."
},
{
- "EventCode": "40012",
+ "EventCode": "0x40012",
"EventName": "PM_L1_ICACHE_RELOADED_ALL",
"BriefDescription": "Counts all Icache reloads includes demand, prefetch, prefetch turned into demand and demand turned into prefetch."
},
{
- "EventCode": "40134",
+ "EventCode": "0x40134",
"EventName": "PM_MRK_INST_TIMEO",
"BriefDescription": "Marked instruction finish timeout (instruction was lost)."
},
{
- "EventCode": "4003C",
- "EventName": "PM_DISP_STALL_HELD_SYNC_CYC",
- "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because of a synchronizing instruction that requires the ICT to be empty before dispatch."
- },
- {
- "EventCode": "4505A",
+ "EventCode": "0x4505A",
"EventName": "PM_SP_FLOP_CMPL",
"BriefDescription": "Single Precision floating point instructions completed."
},
{
- "EventCode": "4D058",
+ "EventCode": "0x4D058",
"EventName": "PM_VECTOR_FLOP_CMPL",
"BriefDescription": "Vector floating point instructions completed."
},
{
- "EventCode": "4D05A",
+ "EventCode": "0x4D05A",
"EventName": "PM_NON_MATH_FLOP_CMPL",
"BriefDescription": "Non Math instructions completed."
},
{
- "EventCode": "401E0",
+ "EventCode": "0x401E0",
"EventName": "PM_MRK_INST_CMPL",
"BriefDescription": "marked instruction completed."
},
{
- "EventCode": "400FE",
+ "EventCode": "0x400FE",
"EventName": "PM_DATA_FROM_MEMORY",
"BriefDescription": "The processor's data cache was reloaded from local, remote, or distant memory due to a demand miss."
}
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/memory.json b/tools/perf/pmu-events/arch/powerpc/power10/memory.json
index b01141eeebee..843b51f531e9 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/memory.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/memory.json
@@ -1,191 +1,186 @@
[
{
- "EventCode": "1000A",
+ "EventCode": "0x1000A",
"EventName": "PM_PMC3_REWIND",
"BriefDescription": "The speculative event selected for PMC3 rewinds and the counter for PMC3 is not charged."
},
{
- "EventCode": "1C040",
+ "EventCode": "0x1C040",
"EventName": "PM_XFER_FROM_SRC_PMC1",
"BriefDescription": "The processor's L1 data cache was reloaded from the source specified in MMCR3[0:12]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
},
{
- "EventCode": "1C142",
+ "EventCode": "0x1C142",
"EventName": "PM_MRK_XFER_FROM_SRC_PMC1",
"BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[0:12]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
},
{
- "EventCode": "1C144",
+ "EventCode": "0x1C144",
"EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC1",
"BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[0:12]."
},
{
- "EventCode": "1C056",
+ "EventCode": "0x1C056",
"EventName": "PM_DERAT_MISS_4K",
"BriefDescription": "Data ERAT Miss (Data TLB Access) page size 4K. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
},
{
- "EventCode": "1C058",
+ "EventCode": "0x1C058",
"EventName": "PM_DTLB_MISS_16G",
"BriefDescription": "Data TLB reload (after a miss) page size 16G. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
},
{
- "EventCode": "1C05C",
+ "EventCode": "0x1C05C",
"EventName": "PM_DTLB_MISS_2M",
"BriefDescription": "Data TLB reload (after a miss) page size 2M. Implies radix translation was used. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
},
{
- "EventCode": "1E056",
+ "EventCode": "0x1E056",
"EventName": "PM_EXEC_STALL_STORE_PIPE",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the store unit. This does not include cycles spent handling store misses, PTESYNC instructions or TLBIE instructions."
},
{
- "EventCode": "1F150",
+ "EventCode": "0x1F150",
"EventName": "PM_MRK_ST_L2_CYC",
"BriefDescription": "Cycles from L2 RC dispatch to L2 RC completion."
},
{
- "EventCode": "10062",
+ "EventCode": "0x10062",
"EventName": "PM_LD_L3MISS_PEND_CYC",
"BriefDescription": "Cycles L3 miss was pending for this thread."
},
{
- "EventCode": "20010",
+ "EventCode": "0x20010",
"EventName": "PM_PMC1_OVERFLOW",
"BriefDescription": "The event selected for PMC1 caused the event counter to overflow."
},
{
- "EventCode": "2001A",
+ "EventCode": "0x2001A",
"EventName": "PM_ITLB_HIT",
"BriefDescription": "The PTE required to translate the instruction address was resident in the TLB (instruction TLB access/IERAT reload). Applies to both HPT and RPT. When MMCR1[17]=0 this event counts only for demand misses. When MMCR1[17]=1 this event includes demand misses and prefetches."
},
{
- "EventCode": "2003E",
+ "EventCode": "0x2003E",
"EventName": "PM_PTESYNC_FIN",
"BriefDescription": "Ptesync instruction finished in the store unit. Only one ptesync can finish at a time."
},
{
- "EventCode": "2C040",
+ "EventCode": "0x2C040",
"EventName": "PM_XFER_FROM_SRC_PMC2",
"BriefDescription": "The processor's L1 data cache was reloaded from the source specified in MMCR3[15:27]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
},
{
- "EventCode": "2C054",
+ "EventCode": "0x2C054",
"EventName": "PM_DERAT_MISS_64K",
"BriefDescription": "Data ERAT Miss (Data TLB Access) page size 64K. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
},
{
- "EventCode": "2C056",
+ "EventCode": "0x2C056",
"EventName": "PM_DTLB_MISS_4K",
"BriefDescription": "Data TLB reload (after a miss) page size 4K. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
},
{
- "EventCode": "2D154",
+ "EventCode": "0x2D154",
"EventName": "PM_MRK_DERAT_MISS_64K",
"BriefDescription": "Data ERAT Miss (Data TLB Access) page size 64K for a marked instruction. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
},
{
- "EventCode": "200F6",
+ "EventCode": "0x200F6",
"EventName": "PM_DERAT_MISS",
"BriefDescription": "DERAT Reloaded to satisfy a DERAT miss. All page sizes are counted by this event. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
},
{
- "EventCode": "3000A",
- "EventName": "PM_DISP_STALL_ITLB_MISS",
- "BriefDescription": "Cycles when dispatch was stalled while waiting to resolve an instruction TLB miss."
- },
- {
- "EventCode": "30016",
+ "EventCode": "0x30016",
"EventName": "PM_EXEC_STALL_DERAT_DTLB_MISS",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline suffered a TLB miss and waited for it resolve."
},
{
- "EventCode": "3C040",
+ "EventCode": "0x3C040",
"EventName": "PM_XFER_FROM_SRC_PMC3",
"BriefDescription": "The processor's L1 data cache was reloaded from the source specified in MMCR3[30:42]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
},
{
- "EventCode": "3C142",
+ "EventCode": "0x3C142",
"EventName": "PM_MRK_XFER_FROM_SRC_PMC3",
"BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[30:42]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
},
{
- "EventCode": "3C144",
+ "EventCode": "0x3C144",
"EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC3",
"BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[30:42]."
},
{
- "EventCode": "3C054",
+ "EventCode": "0x3C054",
"EventName": "PM_DERAT_MISS_16M",
"BriefDescription": "Data ERAT Miss (Data TLB Access) page size 16M. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
},
{
- "EventCode": "3C056",
+ "EventCode": "0x3C056",
"EventName": "PM_DTLB_MISS_64K",
"BriefDescription": "Data TLB reload (after a miss) page size 64K. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
},
{
- "EventCode": "3C058",
+ "EventCode": "0x3C058",
"EventName": "PM_LARX_FIN",
"BriefDescription": "Load and reserve instruction (LARX) finished. LARX and STCX are instructions used to acquire a lock."
},
{
- "EventCode": "301E2",
+ "EventCode": "0x301E2",
"EventName": "PM_MRK_ST_CMPL",
"BriefDescription": "Marked store completed and sent to nest. Note that this count excludes cache-inhibited stores."
},
{
- "EventCode": "300FC",
+ "EventCode": "0x300FC",
"EventName": "PM_DTLB_MISS",
"BriefDescription": "The DPTEG required for the load/store instruction in execution was missing from the TLB. It includes pages of all sizes for demand and prefetch activity."
},
{
- "EventCode": "4D02C",
+ "EventCode": "0x4D02C",
"EventName": "PM_PMC1_REWIND",
"BriefDescription": "The speculative event selected for PMC1 rewinds and the counter for PMC1 is not charged."
},
{
- "EventCode": "4003E",
+ "EventCode": "0x4003E",
"EventName": "PM_LD_CMPL",
"BriefDescription": "Loads completed."
},
{
- "EventCode": "4C040",
+ "EventCode": "0x4C040",
"EventName": "PM_XFER_FROM_SRC_PMC4",
"BriefDescription": "The processor's L1 data cache was reloaded from the source specified in MMCR3[45:57]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
},
{
- "EventCode": "4C142",
+ "EventCode": "0x4C142",
"EventName": "PM_MRK_XFER_FROM_SRC_PMC4",
"BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[45:57]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
},
{
- "EventCode": "4C144",
+ "EventCode": "0x4C144",
"EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC4",
"BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[45:57]."
},
{
- "EventCode": "4C056",
+ "EventCode": "0x4C056",
"EventName": "PM_DTLB_MISS_16M",
"BriefDescription": "Data TLB reload (after a miss) page size 16M. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
},
{
- "EventCode": "4C05A",
+ "EventCode": "0x4C05A",
"EventName": "PM_DTLB_MISS_1G",
"BriefDescription": "Data TLB reload (after a miss) page size 1G. Implies radix translation was used. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
},
{
- "EventCode": "4C15E",
+ "EventCode": "0x4C15E",
"EventName": "PM_MRK_DTLB_MISS_64K",
"BriefDescription": "Marked Data TLB reload (after a miss) page size 64K. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
},
{
- "EventCode": "4D056",
+ "EventCode": "0x4D056",
"EventName": "PM_NON_FMA_FLOP_CMPL",
"BriefDescription": "Non FMA instruction completed."
},
{
- "EventCode": "40164",
+ "EventCode": "0x40164",
"EventName": "PM_MRK_DERAT_MISS_2M",
"BriefDescription": "Data ERAT Miss (Data TLB Access) page size 2M for a marked instruction. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
}
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/others.json b/tools/perf/pmu-events/arch/powerpc/power10/others.json
index a119e56cbf1c..7d0de1a2860b 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/others.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/others.json
@@ -1,296 +1,271 @@
[
{
- "EventCode": "10016",
+ "EventCode": "0x10016",
"EventName": "PM_VSU0_ISSUE",
"BriefDescription": "VSU instructions issued to VSU pipe 0."
},
{
- "EventCode": "1001C",
+ "EventCode": "0x1001C",
"EventName": "PM_ULTRAVISOR_INST_CMPL",
"BriefDescription": "PowerPC instructions that completed while the thread was in ultravisor state."
},
{
- "EventCode": "100F0",
+ "EventCode": "0x100F0",
"EventName": "PM_CYC",
"BriefDescription": "Processor cycles."
},
{
- "EventCode": "10134",
+ "EventCode": "0x10134",
"EventName": "PM_MRK_ST_DONE_L2",
"BriefDescription": "Marked stores completed in L2 (RC machine done)."
},
{
- "EventCode": "1505E",
+ "EventCode": "0x1505E",
"EventName": "PM_LD_HIT_L1",
"BriefDescription": "Loads that finished without experiencing an L1 miss."
},
{
- "EventCode": "1D05E",
- "EventName": "PM_DISP_STALL_HELD_HALT_CYC",
- "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because of power management."
- },
- {
- "EventCode": "1E054",
- "EventName": "PM_EXEC_STALL_DMISS_L21_L31",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from another core's L2 or L3 on the same chip."
- },
- {
- "EventCode": "1E05A",
- "EventName": "PM_CMPL_STALL_LWSYNC",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a lwsync waiting to complete."
- },
- {
- "EventCode": "1F056",
+ "EventCode": "0x1F056",
"EventName": "PM_DISP_SS0_2_INSTR_CYC",
"BriefDescription": "Cycles in which Superslice 0 dispatches either 1 or 2 instructions."
},
{
- "EventCode": "1F15C",
+ "EventCode": "0x1F15C",
"EventName": "PM_MRK_STCX_L2_CYC",
"BriefDescription": "Cycles spent in the nest portion of a marked Stcx instruction. It starts counting when the operation starts to drain to the L2 and it stops counting when the instruction retires from the Instruction Completion Table (ICT) in the Instruction Sequencing Unit (ISU)."
},
{
- "EventCode": "10066",
+ "EventCode": "0x10066",
"EventName": "PM_ADJUNCT_CYC",
"BriefDescription": "Cycles in which the thread is in Adjunct state. MSR[S HV PR] bits = 011."
},
{
- "EventCode": "101E4",
+ "EventCode": "0x101E4",
"EventName": "PM_MRK_L1_ICACHE_MISS",
"BriefDescription": "Marked Instruction suffered an icache Miss."
},
{
- "EventCode": "101EA",
+ "EventCode": "0x101EA",
"EventName": "PM_MRK_L1_RELOAD_VALID",
"BriefDescription": "Marked demand reload."
},
{
- "EventCode": "100F4",
+ "EventCode": "0x100F4",
"EventName": "PM_FLOP_CMPL",
"BriefDescription": "Floating Point Operations Completed. Includes any type. It counts once for each 1, 2, 4 or 8 flop instruction. Use PM_1|2|4|8_FLOP_CMPL events to count flops."
},
{
- "EventCode": "100FA",
+ "EventCode": "0x100FA",
"EventName": "PM_RUN_LATCH_ANY_THREAD_CYC",
"BriefDescription": "Cycles when at least one thread has the run latch set."
},
{
- "EventCode": "100FC",
+ "EventCode": "0x100FC",
"EventName": "PM_LD_REF_L1",
"BriefDescription": "All L1 D cache load references counted at finish, gated by reject. In P9 and earlier this event counted only cacheable loads but in P10 both cacheable and non-cacheable loads are included."
},
{
- "EventCode": "20006",
- "EventName": "PM_DISP_STALL_HELD_ISSQ_FULL_CYC",
- "BriefDescription": "Cycles in which the NTC instruction is held at dispatch due to Issue queue full. Includes issue queue and branch queue."
- },
- {
- "EventCode": "2000C",
+ "EventCode": "0x2000C",
"EventName": "PM_RUN_LATCH_ALL_THREADS_CYC",
"BriefDescription": "Cycles when the run latch is set for all threads."
},
{
- "EventCode": "2E010",
+ "EventCode": "0x2E010",
"EventName": "PM_ADJUNCT_INST_CMPL",
"BriefDescription": "PowerPC instructions that completed while the thread is in Adjunct state."
},
{
- "EventCode": "2E014",
+ "EventCode": "0x2E014",
"EventName": "PM_STCX_FIN",
"BriefDescription": "Conditional store instruction (STCX) finished. LARX and STCX are instructions used to acquire a lock."
},
{
- "EventCode": "20130",
+ "EventCode": "0x20130",
"EventName": "PM_MRK_INST_DECODED",
"BriefDescription": "An instruction was marked at decode time. Random Instruction Sampling (RIS) only."
},
{
- "EventCode": "20132",
+ "EventCode": "0x20132",
"EventName": "PM_MRK_DFU_ISSUE",
"BriefDescription": "The marked instruction was a decimal floating point operation issued to the VSU. Measured at issue time."
},
{
- "EventCode": "20134",
+ "EventCode": "0x20134",
"EventName": "PM_MRK_FXU_ISSUE",
"BriefDescription": "The marked instruction was a fixed point operation issued to the VSU. Measured at issue time."
},
{
- "EventCode": "2505C",
+ "EventCode": "0x2505C",
"EventName": "PM_VSU_ISSUE",
"BriefDescription": "At least one VSU instruction was issued to one of the VSU pipes. Up to 4 per cycle. Includes fixed point operations."
},
{
- "EventCode": "2F054",
+ "EventCode": "0x2F054",
"EventName": "PM_DISP_SS1_2_INSTR_CYC",
"BriefDescription": "Cycles in which Superslice 1 dispatches either 1 or 2 instructions."
},
{
- "EventCode": "2F056",
+ "EventCode": "0x2F056",
"EventName": "PM_DISP_SS1_4_INSTR_CYC",
"BriefDescription": "Cycles in which Superslice 1 dispatches either 3 or 4 instructions."
},
{
- "EventCode": "2006C",
+ "EventCode": "0x2006C",
"EventName": "PM_RUN_CYC_SMT4_MODE",
"BriefDescription": "Cycles when this thread's run latch is set and the core is in SMT4 mode."
},
{
- "EventCode": "201E0",
+ "EventCode": "0x201E0",
"EventName": "PM_MRK_DATA_FROM_MEMORY",
"BriefDescription": "The processor's data cache was reloaded from local, remote, or distant memory due to a demand miss for a marked load."
},
{
- "EventCode": "201E4",
+ "EventCode": "0x201E4",
"EventName": "PM_MRK_DATA_FROM_L3MISS",
"BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss for a marked load."
},
{
- "EventCode": "201E8",
+ "EventCode": "0x201E8",
"EventName": "PM_THRESH_EXC_512",
"BriefDescription": "Threshold counter exceeded a value of 512."
},
{
- "EventCode": "200F2",
+ "EventCode": "0x200F2",
"EventName": "PM_INST_DISP",
"BriefDescription": "PowerPC instructions dispatched."
},
{
- "EventCode": "30132",
+ "EventCode": "0x30132",
"EventName": "PM_MRK_VSU_FIN",
"BriefDescription": "VSU marked instructions finished. Excludes simple FX instructions issued to the Store Unit."
},
{
- "EventCode": "30038",
+ "EventCode": "0x30038",
"EventName": "PM_EXEC_STALL_DMISS_LMEM",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local memory, local OpenCapp cache, or local OpenCapp memory."
},
{
- "EventCode": "3F04A",
+ "EventCode": "0x3F04A",
"EventName": "PM_LSU_ST5_FIN",
"BriefDescription": "LSU Finished an internal operation in ST2 port."
},
{
- "EventCode": "34054",
- "EventName": "PM_EXEC_STALL_DMISS_L2L3_NOCONFLICT",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local L2 or local L3, without a dispatch conflict."
- },
- {
- "EventCode": "3405A",
+ "EventCode": "0x3405A",
"EventName": "PM_PRIVILEGED_INST_CMPL",
"BriefDescription": "PowerPC Instructions that completed while the thread is in Privileged state."
},
{
- "EventCode": "3F150",
+ "EventCode": "0x3F150",
"EventName": "PM_MRK_ST_DRAIN_CYC",
"BriefDescription": "cycles to drain st from core to L2."
},
{
- "EventCode": "3F054",
+ "EventCode": "0x3F054",
"EventName": "PM_DISP_SS0_4_INSTR_CYC",
"BriefDescription": "Cycles in which Superslice 0 dispatches either 3 or 4 instructions."
},
{
- "EventCode": "3F056",
+ "EventCode": "0x3F056",
"EventName": "PM_DISP_SS0_8_INSTR_CYC",
"BriefDescription": "Cycles in which Superslice 0 dispatches either 5, 6, 7 or 8 instructions."
},
{
- "EventCode": "30162",
+ "EventCode": "0x30162",
"EventName": "PM_MRK_ISSUE_DEPENDENT_LOAD",
"BriefDescription": "The marked instruction was dependent on a load. It is eligible for issue kill."
},
{
- "EventCode": "40114",
+ "EventCode": "0x40114",
"EventName": "PM_MRK_START_PROBE_NOP_DISP",
"BriefDescription": "Marked Start probe nop dispatched. Instruction AND R0,R0,R0."
},
{
- "EventCode": "4001C",
+ "EventCode": "0x4001C",
"EventName": "PM_VSU_FIN",
"BriefDescription": "VSU instructions finished."
},
{
- "EventCode": "4C01A",
+ "EventCode": "0x4C01A",
"EventName": "PM_EXEC_STALL_DMISS_OFF_NODE",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from a distant chip."
},
{
- "EventCode": "4D012",
+ "EventCode": "0x4D012",
"EventName": "PM_PMC3_SAVED",
"BriefDescription": "The conditions for the speculative event selected for PMC3 are met and PMC3 is charged."
},
{
- "EventCode": "4D022",
+ "EventCode": "0x4D022",
"EventName": "PM_HYPERVISOR_INST_CMPL",
"BriefDescription": "PowerPC instructions that completed while the thread is in hypervisor state."
},
{
- "EventCode": "4D026",
+ "EventCode": "0x4D026",
"EventName": "PM_ULTRAVISOR_CYC",
"BriefDescription": "Cycles when the thread is in Ultravisor state. MSR[S HV PR]=110."
},
{
- "EventCode": "4D028",
+ "EventCode": "0x4D028",
"EventName": "PM_PRIVILEGED_CYC",
"BriefDescription": "Cycles when the thread is in Privileged state. MSR[S HV PR]=x00."
},
{
- "EventCode": "40030",
+ "EventCode": "0x40030",
"EventName": "PM_INST_FIN",
"BriefDescription": "Instructions finished."
},
{
- "EventCode": "44146",
+ "EventCode": "0x44146",
"EventName": "PM_MRK_STCX_CORE_CYC",
"BriefDescription": "Cycles spent in the core portion of a marked Stcx instruction. It starts counting when the instruction is decoded and stops counting when it drains into the L2."
},
{
- "EventCode": "44054",
+ "EventCode": "0x44054",
"EventName": "PM_VECTOR_LD_CMPL",
"BriefDescription": "Vector load instructions completed."
},
{
- "EventCode": "45054",
+ "EventCode": "0x45054",
"EventName": "PM_FMA_CMPL",
"BriefDescription": "Two floating point instructions completed (FMA class of instructions: fmadd, fnmadd, fmsub, fnmsub). Scalar instructions only."
},
{
- "EventCode": "45056",
+ "EventCode": "0x45056",
"EventName": "PM_SCALAR_FLOP_CMPL",
"BriefDescription": "Scalar floating point instructions completed."
},
{
- "EventCode": "4505C",
+ "EventCode": "0x4505C",
"EventName": "PM_MATH_FLOP_CMPL",
"BriefDescription": "Math floating point instructions completed."
},
{
- "EventCode": "4D05E",
+ "EventCode": "0x4D05E",
"EventName": "PM_BR_CMPL",
"BriefDescription": "A branch completed. All branches are included."
},
{
- "EventCode": "4E15E",
+ "EventCode": "0x4E15E",
"EventName": "PM_MRK_INST_FLUSHED",
"BriefDescription": "The marked instruction was flushed."
},
{
- "EventCode": "401E6",
+ "EventCode": "0x401E6",
"EventName": "PM_MRK_INST_FROM_L3MISS",
"BriefDescription": "The processor's instruction cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss for a marked instruction."
},
{
- "EventCode": "401E8",
+ "EventCode": "0x401E8",
"EventName": "PM_MRK_DATA_FROM_L2MISS",
"BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1 or L2 due to a demand miss for a marked load."
},
{
- "EventCode": "400F0",
+ "EventCode": "0x400F0",
"EventName": "PM_LD_DEMAND_MISS_L1_FIN",
"BriefDescription": "Load Missed L1, counted at finish time."
},
{
- "EventCode": "400FA",
+ "EventCode": "0x400FA",
"EventName": "PM_RUN_INST_CMPL",
"BriefDescription": "Completed PowerPC instructions gated by the run latch."
}
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json b/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json
index b61b5cc157ee..b8aded6045fa 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json
@@ -1,296 +1,291 @@
[
{
- "EventCode": "100FE",
+ "EventCode": "0x100FE",
"EventName": "PM_INST_CMPL",
"BriefDescription": "PowerPC instructions completed."
},
{
- "EventCode": "10006",
- "EventName": "PM_DISP_STALL_HELD_OTHER_CYC",
- "BriefDescription": "Cycles in which the NTC instruction is held at dispatch for any other reason."
- },
- {
- "EventCode": "1000C",
+ "EventCode": "0x1000C",
"EventName": "PM_LSU_LD0_FIN",
"BriefDescription": "LSU Finished an internal operation in LD0 port."
},
{
- "EventCode": "1000E",
+ "EventCode": "0x1000E",
"EventName": "PM_MMA_ISSUED",
"BriefDescription": "MMA instructions issued."
},
{
- "EventCode": "10012",
+ "EventCode": "0x10012",
"EventName": "PM_LSU_ST0_FIN",
"BriefDescription": "LSU Finished an internal operation in ST0 port."
},
{
- "EventCode": "10014",
+ "EventCode": "0x10014",
"EventName": "PM_LSU_ST4_FIN",
"BriefDescription": "LSU Finished an internal operation in ST4 port."
},
{
- "EventCode": "10018",
+ "EventCode": "0x10018",
"EventName": "PM_IC_DEMAND_CYC",
"BriefDescription": "Cycles in which an instruction reload is pending to satisfy a demand miss."
},
{
- "EventCode": "10022",
+ "EventCode": "0x10022",
"EventName": "PM_PMC2_SAVED",
"BriefDescription": "The conditions for the speculative event selected for PMC2 are met and PMC2 is charged."
},
{
- "EventCode": "10024",
+ "EventCode": "0x10024",
"EventName": "PM_PMC5_OVERFLOW",
"BriefDescription": "The event selected for PMC5 caused the event counter to overflow."
},
{
- "EventCode": "10058",
+ "EventCode": "0x10058",
"EventName": "PM_EXEC_STALL_FIN_AT_DISP",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline finished at dispatch and did not require execution in the LSU, BRU or VSU."
},
{
- "EventCode": "1005A",
+ "EventCode": "0x1005A",
"EventName": "PM_FLUSH_MPRED",
"BriefDescription": "A flush occurred due to a mispredicted branch. Includes target and direction."
},
{
- "EventCode": "1C05A",
+ "EventCode": "0x1C05A",
"EventName": "PM_DERAT_MISS_2M",
"BriefDescription": "Data ERAT Miss (Data TLB Access) page size 2M. Implies radix translation. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
},
{
- "EventCode": "10064",
- "EventName": "PM_DISP_STALL_IC_L2",
- "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L2."
+ "EventCode": "0x1E05A",
+ "EventName": "PM_CMPL_STALL_LWSYNC",
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a lwsync waiting to complete."
},
{
- "EventCode": "10068",
+ "EventCode": "0x10068",
"EventName": "PM_BR_FIN",
"BriefDescription": "A branch instruction finished. Includes predicted/mispredicted/unconditional."
},
{
- "EventCode": "1006A",
+ "EventCode": "0x1006A",
"EventName": "PM_FX_LSU_FIN",
"BriefDescription": "Simple fixed point instruction issued to the store unit. Measured at finish time."
},
{
- "EventCode": "1006C",
+ "EventCode": "0x1006C",
"EventName": "PM_RUN_CYC_ST_MODE",
"BriefDescription": "Cycles when the run latch is set and the core is in ST mode."
},
{
- "EventCode": "20004",
+ "EventCode": "0x20004",
"EventName": "PM_ISSUE_STALL",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was dispatched but not issued yet."
},
{
- "EventCode": "2000A",
+ "EventCode": "0x2000A",
"EventName": "PM_HYPERVISOR_CYC",
"BriefDescription": "Cycles when the thread is in Hypervisor state. MSR[S HV PR]=010."
},
{
- "EventCode": "2000E",
+ "EventCode": "0x2000E",
"EventName": "PM_LSU_LD1_FIN",
"BriefDescription": "LSU Finished an internal operation in LD1 port."
},
{
- "EventCode": "2C014",
+ "EventCode": "0x2C014",
"EventName": "PM_CMPL_STALL_SPECIAL",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline required special handling before completing."
},
{
- "EventCode": "2C018",
+ "EventCode": "0x2C018",
"EventName": "PM_EXEC_STALL_DMISS_L3MISS",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from a source beyond the local L2 or local L3."
},
{
- "EventCode": "2D010",
+ "EventCode": "0x2D010",
"EventName": "PM_LSU_ST1_FIN",
"BriefDescription": "LSU Finished an internal operation in ST1 port."
},
{
- "EventCode": "2D012",
+ "EventCode": "0x2D012",
"EventName": "PM_VSU1_ISSUE",
"BriefDescription": "VSU instructions issued to VSU pipe 1."
},
{
- "EventCode": "2D018",
+ "EventCode": "0x2D018",
"EventName": "PM_EXEC_STALL_VSU",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the VSU (includes FXU, VSU, CRU)."
},
{
- "EventCode": "2E01E",
+ "EventCode": "0x2D01C",
+ "EventName": "PM_CMPL_STALL_STCX",
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a stcx waiting for resolution from the nest before completing."
+ },
+ {
+ "EventCode": "0x2E01E",
"EventName": "PM_EXEC_STALL_NTC_FLUSH",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in any unit before it was flushed. Note that if the flush of the oldest instruction happens after finish, the cycles from dispatch to issue will be included in PM_DISP_STALL and the cycles from issue to finish will be included in PM_EXEC_STALL and its corresponding children."
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in any unit before it was flushed. Note that if the flush of the oldest instruction happens after finish, the cycles from dispatch to issue will be included in PM_DISP_STALL and the cycles from issue to finish will be included in PM_EXEC_STALL and its corresponding children. This event will also count cycles when the previous NTF instruction is still completing and the new NTF instruction is stalled at dispatch."
},
{
- "EventCode": "2013C",
+ "EventCode": "0x2013C",
"EventName": "PM_MRK_FX_LSU_FIN",
"BriefDescription": "The marked instruction was simple fixed point that was issued to the store unit. Measured at finish time."
},
{
- "EventCode": "2405A",
+ "EventCode": "0x2405A",
"EventName": "PM_NTC_FIN",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline (NTC) finishes. Note that instructions can finish out of order, therefore not all the instructions that finish have a Next-to-complete status."
},
{
- "EventCode": "201E2",
+ "EventCode": "0x201E2",
"EventName": "PM_MRK_LD_MISS_L1",
"BriefDescription": "Marked DL1 Demand Miss counted at finish time."
},
{
- "EventCode": "200F4",
+ "EventCode": "0x200F4",
"EventName": "PM_RUN_CYC",
"BriefDescription": "Processor cycles gated by the run latch."
},
{
- "EventCode": "30004",
- "EventName": "PM_DISP_STALL_FLUSH",
- "BriefDescription": "Cycles when dispatch was stalled because of a flush that happened to an instruction(s) that was not yet NTC. PM_EXEC_STALL_NTC_FLUSH only includes instructions that were flushed after becoming NTC."
- },
- {
- "EventCode": "30008",
+ "EventCode": "0x30008",
"EventName": "PM_EXEC_STALL",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting to finish in one of the execution units (BRU, LSU, VSU). Only cycles between issue and finish are counted in this category."
},
{
- "EventCode": "3001A",
+ "EventCode": "0x3001A",
"EventName": "PM_LSU_ST2_FIN",
"BriefDescription": "LSU Finished an internal operation in ST2 port."
},
{
- "EventCode": "30020",
+ "EventCode": "0x30020",
"EventName": "PM_PMC2_REWIND",
"BriefDescription": "The speculative event selected for PMC2 rewinds and the counter for PMC2 is not charged."
},
{
- "EventCode": "30022",
+ "EventCode": "0x30022",
"EventName": "PM_PMC4_SAVED",
"BriefDescription": "The conditions for the speculative event selected for PMC4 are met and PMC4 is charged."
},
{
- "EventCode": "30024",
+ "EventCode": "0x30024",
"EventName": "PM_PMC6_OVERFLOW",
"BriefDescription": "The event selected for PMC6 caused the event counter to overflow."
},
{
- "EventCode": "30028",
+ "EventCode": "0x30028",
"EventName": "PM_CMPL_STALL_MEM_ECC",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for the non-speculative finish of either a stcx waiting for its result or a load waiting for non-critical sectors of data and ECC."
},
{
- "EventCode": "30036",
+ "EventCode": "0x30036",
"EventName": "PM_EXEC_STALL_SIMPLE_FX",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was a simple fixed point instruction executing in the Load Store Unit."
},
{
- "EventCode": "3003A",
+ "EventCode": "0x3003A",
"EventName": "PM_CMPL_STALL_EXCEPTION",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was not allowed to complete because it was interrupted by ANY exception, which has to be serviced before the instruction can complete."
},
{
- "EventCode": "3F044",
+ "EventCode": "0x3F044",
"EventName": "PM_VSU2_ISSUE",
"BriefDescription": "VSU instructions issued to VSU pipe 2."
},
{
- "EventCode": "30058",
+ "EventCode": "0x30058",
"EventName": "PM_TLBIE_FIN",
"BriefDescription": "TLBIE instructions finished in the LSU. Two TLBIEs can finish each cycle. All will be counted."
},
{
- "EventCode": "3D058",
+ "EventCode": "0x3D058",
"EventName": "PM_SCALAR_FSQRT_FDIV_ISSUE",
"BriefDescription": "Scalar versions of four floating point operations: fdiv,fsqrt (xvdivdp, xvdivsp, xvsqrtdp, xvsqrtsp)."
},
{
- "EventCode": "30066",
+ "EventCode": "0x30066",
"EventName": "PM_LSU_FIN",
"BriefDescription": "LSU Finished an internal operation (up to 4 per cycle)."
},
{
- "EventCode": "40004",
+ "EventCode": "0x40004",
"EventName": "PM_FXU_ISSUE",
"BriefDescription": "A fixed point instruction was issued to the VSU."
},
{
- "EventCode": "40008",
+ "EventCode": "0x40008",
"EventName": "PM_NTC_ALL_FIN",
"BriefDescription": "Cycles in which both instructions in the ICT entry pair show as finished. These are the cycles between finish and completion for the oldest pair of instructions in the pipeline."
},
{
- "EventCode": "40010",
+ "EventCode": "0x40010",
"EventName": "PM_PMC3_OVERFLOW",
"BriefDescription": "The event selected for PMC3 caused the event counter to overflow."
},
{
- "EventCode": "4C012",
+ "EventCode": "0x4C012",
"EventName": "PM_EXEC_STALL_DERAT_ONLY_MISS",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline suffered an ERAT miss and waited for it resolve."
},
{
- "EventCode": "4C018",
+ "EventCode": "0x4C018",
"EventName": "PM_CMPL_STALL",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline cannot complete because the thread was blocked for any reason."
},
{
- "EventCode": "4C01E",
+ "EventCode": "0x4C01E",
"EventName": "PM_LSU_ST3_FIN",
"BriefDescription": "LSU Finished an internal operation in ST3 port."
},
{
- "EventCode": "4D018",
+ "EventCode": "0x4D018",
"EventName": "PM_EXEC_STALL_BRU",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the Branch unit."
},
{
- "EventCode": "4D01A",
+ "EventCode": "0x4D01A",
"EventName": "PM_CMPL_STALL_HWSYNC",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was a hwsync waiting for response from L2 before completing."
},
{
- "EventCode": "4D01C",
+ "EventCode": "0x4D01C",
"EventName": "PM_EXEC_STALL_TLBIEL",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was a TLBIEL instruction executing in the Load Store Unit. TLBIEL instructions have lower overhead than TLBIE instructions because they don't get set to the nest."
},
{
- "EventCode": "4E012",
+ "EventCode": "0x4E012",
"EventName": "PM_EXEC_STALL_UNKNOWN",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline completed without an ntf_type pulse. The ntf_pulse was missed by the ISU because the NTF finishes and completions came too close together."
},
{
- "EventCode": "4D020",
+ "EventCode": "0x4D020",
"EventName": "PM_VSU3_ISSUE",
"BriefDescription": "VSU instruction was issued to VSU pipe 3."
},
{
- "EventCode": "40132",
+ "EventCode": "0x40132",
"EventName": "PM_MRK_LSU_FIN",
"BriefDescription": "LSU marked instruction finish."
},
{
- "EventCode": "45058",
+ "EventCode": "0x45058",
"EventName": "PM_IC_MISS_CMPL",
"BriefDescription": "Non-speculative icache miss, counted at completion."
},
{
- "EventCode": "4D050",
+ "EventCode": "0x4D050",
"EventName": "PM_VSU_NON_FLOP_CMPL",
"BriefDescription": "Non-floating point VSU instructions completed."
},
{
- "EventCode": "4D052",
+ "EventCode": "0x4D052",
"EventName": "PM_2FLOP_CMPL",
"BriefDescription": "Double Precision vector version of fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg completed."
},
{
- "EventCode": "400F2",
+ "EventCode": "0x400F2",
"EventName": "PM_1PLUS_PPC_DISP",
"BriefDescription": "Cycles at least one Instr Dispatched."
},
{
- "EventCode": "400F8",
+ "EventCode": "0x400F8",
"EventName": "PM_FLUSH",
"BriefDescription": "Flush (any type)."
}
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/pmc.json b/tools/perf/pmu-events/arch/powerpc/power10/pmc.json
index ea122a91ceb0..b5d1bd39cfb2 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/pmc.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/pmc.json
@@ -1,21 +1,21 @@
[
{
- "EventCode": "301E8",
+ "EventCode": "0x301E8",
"EventName": "PM_THRESH_EXC_64",
"BriefDescription": "Threshold counter exceeded a value of 64."
},
{
- "EventCode": "45050",
+ "EventCode": "0x45050",
"EventName": "PM_1FLOP_CMPL",
"BriefDescription": "One floating point instruction completed (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg)."
},
{
- "EventCode": "45052",
+ "EventCode": "0x45052",
"EventName": "PM_4FLOP_CMPL",
"BriefDescription": "Four floating point instructions completed (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg)."
},
{
- "EventCode": "4D054",
+ "EventCode": "0x4D054",
"EventName": "PM_8FLOP_CMPL",
"BriefDescription": "Four Double Precision vector instructions completed."
}
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/translation.json b/tools/perf/pmu-events/arch/powerpc/power10/translation.json
index 5a714e3dd71a..db3766dca07c 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/translation.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/translation.json
@@ -1,56 +1,56 @@
[
{
- "EventCode": "1F15E",
+ "EventCode": "0x1F15E",
"EventName": "PM_MRK_START_PROBE_NOP_CMPL",
"BriefDescription": "Marked Start probe nop (AND R0,R0,R0) completed."
},
{
- "EventCode": "20016",
+ "EventCode": "0x20016",
"EventName": "PM_ST_FIN",
"BriefDescription": "Store finish count. Includes speculative activity."
},
{
- "EventCode": "20018",
+ "EventCode": "0x20018",
"EventName": "PM_ST_FWD",
"BriefDescription": "Store forwards that finished."
},
{
- "EventCode": "2011C",
+ "EventCode": "0x2011C",
"EventName": "PM_MRK_NTF_CYC",
"BriefDescription": "Cycles during which the marked instruction is the oldest in the pipeline (NTF or NTC)."
},
{
- "EventCode": "2E01C",
+ "EventCode": "0x2E01C",
"EventName": "PM_EXEC_STALL_TLBIE",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was a TLBIE instruction executing in the Load Store Unit."
},
{
- "EventCode": "201E6",
+ "EventCode": "0x201E6",
"EventName": "PM_THRESH_EXC_32",
"BriefDescription": "Threshold counter exceeded a value of 32."
},
{
- "EventCode": "200F0",
+ "EventCode": "0x200F0",
"EventName": "PM_ST_CMPL",
"BriefDescription": "Stores completed from S2Q (2nd-level store queue). This event includes regular stores, stcx and cache inhibited stores. The following operations are excluded (pteupdate, snoop tlbie complete, store atomics, miso, load atomic payloads, tlbie, tlbsync, slbieg, isync, msgsnd, slbiag, cpabort, copy, tcheck, tend, stsync, dcbst, icbi, dcbf, hwsync, lwsync, ptesync, eieio, msgsync)."
},
{
- "EventCode": "200FE",
+ "EventCode": "0x200FE",
"EventName": "PM_DATA_FROM_L2MISS",
"BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1 or L2 due to a demand miss."
},
{
- "EventCode": "30010",
+ "EventCode": "0x30010",
"EventName": "PM_PMC2_OVERFLOW",
"BriefDescription": "The event selected for PMC2 caused the event counter to overflow."
},
{
- "EventCode": "4D010",
+ "EventCode": "0x4D010",
"EventName": "PM_PMC1_SAVED",
"BriefDescription": "The conditions for the speculative event selected for PMC1 are met and PMC1 is charged."
},
{
- "EventCode": "4D05C",
+ "EventCode": "0x4D05C",
"EventName": "PM_DPP_FLOP_CMPL",
"BriefDescription": "Double-Precision or Quad-Precision instructions completed."
}
diff --git a/tools/perf/pmu-events/arch/x86/icelake/cache.json b/tools/perf/pmu-events/arch/x86/icelake/cache.json
index 3529fc338c17..49fe78fb6538 100644
--- a/tools/perf/pmu-events/arch/x86/icelake/cache.json
+++ b/tools/perf/pmu-events/arch/x86/icelake/cache.json
@@ -1,552 +1,664 @@
[
{
+ "BriefDescription": "L2 code requests",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of demand Data Read requests that miss L2 cache. Only not rejected loads are counted.",
- "EventCode": "0x24",
"Counter": "0,1,2,3",
- "UMask": "0x21",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.ALL_CODE_RD",
"PEBScounters": "0,1,2,3",
- "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS",
+ "PublicDescription": "Counts the total number of L2 code requests.",
"SampleAfterValue": "200003",
- "BriefDescription": "Demand Data Read miss L2, no rejects"
+ "Speculative": "1",
+ "UMask": "0xe4"
},
{
+ "BriefDescription": "Retired load instructions whose data sources were L3 hit and cross-core snoop missed in on-pkg core cache.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache.",
- "EventCode": "0x24",
"Counter": "0,1,2,3",
- "UMask": "0x22",
+ "Data_LA": "1",
+ "EventCode": "0xd2",
+ "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS",
+ "PEBS": "1",
"PEBScounters": "0,1,2,3",
- "EventName": "L2_RQSTS.RFO_MISS",
- "SampleAfterValue": "200003",
- "BriefDescription": "RFO requests that miss L2 cache"
+ "PublicDescription": "Counts the retired load instructions whose data sources were L3 hit and cross-core snoop missed in on-pkg core cache.",
+ "SampleAfterValue": "20011",
+ "UMask": "0x1"
},
{
+ "BriefDescription": "Demand requests that miss L2 cache",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts L2 cache misses when fetching instructions.",
- "EventCode": "0x24",
"Counter": "0,1,2,3",
- "UMask": "0x24",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.ALL_DEMAND_MISS",
"PEBScounters": "0,1,2,3",
- "EventName": "L2_RQSTS.CODE_RD_MISS",
+ "PublicDescription": "Counts demand requests that miss L2 cache.",
"SampleAfterValue": "200003",
- "BriefDescription": "L2 cache misses when fetching instructions"
+ "Speculative": "1",
+ "UMask": "0x27"
},
{
+ "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts demand requests that miss L2 cache.",
- "EventCode": "0x24",
"Counter": "0,1,2,3",
- "UMask": "0x27",
+ "EventCode": "0xb0",
+ "EventName": "OFFCORE_REQUESTS.DEMAND_RFO",
"PEBScounters": "0,1,2,3",
- "EventName": "L2_RQSTS.ALL_DEMAND_MISS",
- "SampleAfterValue": "200003",
- "BriefDescription": "Demand requests that miss L2 cache"
+ "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x4"
},
{
+ "BriefDescription": "RFO requests that hit L2 cache",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts Software prefetch requests that miss the L2 cache. This event accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions.",
- "EventCode": "0x24",
"Counter": "0,1,2,3",
- "UMask": "0x28",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.RFO_HIT",
"PEBScounters": "0,1,2,3",
- "EventName": "L2_RQSTS.SWPF_MISS",
+ "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.",
"SampleAfterValue": "200003",
- "BriefDescription": "SW prefetch requests that miss L2 cache."
+ "Speculative": "1",
+ "UMask": "0xc2"
},
{
+ "BriefDescription": "Number of completed demand load requests that missed the L1, but hit the FB(fill buffer), because a preceding miss to the same cacheline initiated the line to be brought into L1, but data is not yet ready in L1.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of demand Data Read requests initiated by load instructions that hit L2 cache.",
- "EventCode": "0x24",
"Counter": "0,1,2,3",
- "UMask": "0xc1",
+ "Data_LA": "1",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_RETIRED.FB_HIT",
+ "PEBS": "1",
"PEBScounters": "0,1,2,3",
- "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
- "SampleAfterValue": "200003",
- "BriefDescription": "Demand Data Read requests that hit L2 cache"
+ "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x40"
},
{
+ "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.",
- "EventCode": "0x24",
"Counter": "0,1,2,3",
- "UMask": "0xc2",
+ "EventCode": "0x60",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD",
"PEBScounters": "0,1,2,3",
- "EventName": "L2_RQSTS.RFO_HIT",
- "SampleAfterValue": "200003",
- "BriefDescription": "RFO requests that hit L2 cache"
+ "PublicDescription": "Counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x8"
},
{
+ "BriefDescription": "L2 cache lines filling L2",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.",
- "EventCode": "0x24",
"Counter": "0,1,2,3",
- "UMask": "0xc4",
+ "EventCode": "0xF1",
+ "EventName": "L2_LINES_IN.ALL",
"PEBScounters": "0,1,2,3",
- "EventName": "L2_RQSTS.CODE_RD_HIT",
- "SampleAfterValue": "200003",
- "BriefDescription": "L2 cache hits when fetching instructions, code reads."
+ "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1f"
},
{
+ "BriefDescription": "Retired load instructions that split across a cacheline boundary.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts Software prefetch requests that hit the L2 cache. This event accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions.",
- "EventCode": "0x24",
"Counter": "0,1,2,3",
- "UMask": "0xc8",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.SPLIT_LOADS",
+ "PEBS": "1",
"PEBScounters": "0,1,2,3",
- "EventName": "L2_RQSTS.SWPF_HIT",
- "SampleAfterValue": "200003",
- "BriefDescription": "SW prefetch requests that hit L2 cache."
+ "PublicDescription": "Counts retired load instructions that split across a cacheline boundary.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x41"
},
{
+ "BriefDescription": "Retired load instructions with L3 cache hits as data sources",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of demand Data Read requests (including requests from L1D hardware prefetchers). These loads may hit or miss L2 cache. Only non rejected loads are counted.",
- "EventCode": "0x24",
"Counter": "0,1,2,3",
- "UMask": "0xe1",
+ "Data_LA": "1",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_RETIRED.L3_HIT",
+ "PEBS": "1",
"PEBScounters": "0,1,2,3",
- "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
- "SampleAfterValue": "200003",
- "BriefDescription": "Demand Data Read requests"
+ "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L3 cache.",
+ "SampleAfterValue": "100021",
+ "UMask": "0x4"
},
{
+ "BriefDescription": "Demand Data Read miss L2, no rejects",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.",
- "EventCode": "0x24",
"Counter": "0,1,2,3",
- "UMask": "0xe2",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS",
"PEBScounters": "0,1,2,3",
- "EventName": "L2_RQSTS.ALL_RFO",
+ "PublicDescription": "Counts the number of demand Data Read requests that miss L2 cache. Only not rejected loads are counted.",
"SampleAfterValue": "200003",
- "BriefDescription": "RFO requests to L2 cache"
+ "Speculative": "1",
+ "UMask": "0x21"
},
{
+ "BriefDescription": "L2 cache misses when fetching instructions",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the total number of L2 code requests.",
- "EventCode": "0x24",
"Counter": "0,1,2,3",
- "UMask": "0xe4",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.CODE_RD_MISS",
"PEBScounters": "0,1,2,3",
- "EventName": "L2_RQSTS.ALL_CODE_RD",
+ "PublicDescription": "Counts L2 cache misses when fetching instructions.",
"SampleAfterValue": "200003",
- "BriefDescription": "L2 code requests"
+ "Speculative": "1",
+ "UMask": "0x24"
},
{
+ "BriefDescription": "Number of cycles a demand request has waited due to L1D Fill Buffer (FB) unavailability.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts demand requests to L2 cache.",
- "EventCode": "0x24",
"Counter": "0,1,2,3",
- "UMask": "0xe7",
+ "EventCode": "0x48",
+ "EventName": "L1D_PEND_MISS.FB_FULL",
"PEBScounters": "0,1,2,3",
- "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES",
- "SampleAfterValue": "200003",
- "BriefDescription": "Demand requests to L2 cache"
+ "PublicDescription": "Counts number of cycles a demand request has waited due to L1D Fill Buffer (FB) unavailablability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x2"
},
{
+ "BriefDescription": "Counts the number of cache lines replaced in L1 data cache.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts number of L1D misses that are outstanding in each cycle, that is each cycle the number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch. Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.",
- "EventCode": "0x48",
"Counter": "0,1,2,3",
- "UMask": "0x1",
+ "EventCode": "0x51",
+ "EventName": "L1D.REPLACEMENT",
"PEBScounters": "0,1,2,3",
- "EventName": "L1D_PEND_MISS.PENDING",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Number of L1D misses that are outstanding"
+ "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
},
{
+ "BriefDescription": "All retired load instructions.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts duration of L1D miss outstanding in cycles.",
- "EventCode": "0x48",
"Counter": "0,1,2,3",
- "UMask": "0x1",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.ALL_LOADS",
+ "PEBS": "1",
"PEBScounters": "0,1,2,3",
- "EventName": "L1D_PEND_MISS.PENDING_CYCLES",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Cycles with L1D load Misses outstanding.",
- "CounterMask": "1"
+ "PublicDescription": "Counts all retired load instructions. This event accounts for SW prefetch instructions for loads.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x81"
},
{
+ "BriefDescription": "L2 writebacks that access L2 cache",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts number of cycles a demand request has waited due to L1D Fill Buffer (FB) unavailablability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
- "EventCode": "0x48",
"Counter": "0,1,2,3",
- "UMask": "0x2",
+ "EventCode": "0xF0",
+ "EventName": "L2_TRANS.L2_WB",
"PEBScounters": "0,1,2,3",
- "EventName": "L1D_PEND_MISS.FB_FULL",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Number of cycles a demand request has waited due to L1D Fill Buffer (FB) unavailablability."
+ "PublicDescription": "Counts L2 writebacks that access L2 cache.",
+ "SampleAfterValue": "200003",
+ "Speculative": "1",
+ "UMask": "0x40"
},
{
+ "BriefDescription": "Demand Data Read requests",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts number of phases a demand request has waited due to L1D Fill Buffer (FB) unavailablability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
- "EventCode": "0x48",
"Counter": "0,1,2,3",
- "UMask": "0x2",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
"PEBScounters": "0,1,2,3",
- "EventName": "L1D_PEND_MISS.FB_FULL_PERIODS",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Number of phases a demand request has waited due to L1D Fill Buffer (FB) unavailablability.",
- "CounterMask": "1",
- "EdgeDetect": "1"
+ "PublicDescription": "Counts the number of demand Data Read requests (including requests from L1D hardware prefetchers). These loads may hit or miss L2 cache. Only non rejected loads are counted.",
+ "SampleAfterValue": "200003",
+ "Speculative": "1",
+ "UMask": "0xe1"
},
{
+ "BriefDescription": "Demand Data Read transactions pending for off-core. Highly correlated.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts number of cycles a demand request has waited due to L1D due to lack of L2 resources. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
- "EventCode": "0x48",
"Counter": "0,1,2,3",
- "UMask": "0x4",
+ "EventCode": "0x60",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
"PEBScounters": "0,1,2,3",
- "EventName": "L1D_PEND_MISS.L2_STALL",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Number of cycles a demand request has waited due to L1D due to lack of L2 resources."
+ "PublicDescription": "Counts the number of off-core outstanding Demand Data Read transactions every cycle. A transaction is considered to be in the Off-core outstanding state between L2 cache miss and data-return to the core.",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x1"
},
{
+ "BriefDescription": "Demand Data Read requests that hit L2 cache",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.",
- "EventCode": "0x51",
"Counter": "0,1,2,3",
- "UMask": "0x1",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
"PEBScounters": "0,1,2,3",
- "EventName": "L1D.REPLACEMENT",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Counts the number of cache lines replaced in L1 data cache."
+ "PublicDescription": "Counts the number of demand Data Read requests initiated by load instructions that hit L2 cache.",
+ "SampleAfterValue": "200003",
+ "Speculative": "1",
+ "UMask": "0xc1"
},
{
+ "BriefDescription": "Cycles the superQ cannot take any more entries.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
- "EventCode": "0x60",
"Counter": "0,1,2,3",
- "UMask": "0x4",
+ "EventCode": "0xf4",
+ "EventName": "SQ_MISC.SQ_FULL",
"PEBScounters": "0,1,2,3",
- "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Cycles with offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore.",
- "CounterMask": "1"
+ "PublicDescription": "Counts the cycles for which the thread is active and the superQ cannot take any more entries.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x4"
},
{
+ "BriefDescription": "Cycles with L1D load Misses outstanding.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
- "EventCode": "0x60",
"Counter": "0,1,2,3",
- "UMask": "0x8",
+ "CounterMask": "1",
+ "EventCode": "0x48",
+ "EventName": "L1D_PEND_MISS.PENDING_CYCLES",
"PEBScounters": "0,1,2,3",
- "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore"
+ "PublicDescription": "Counts duration of L1D miss outstanding in cycles.",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x1"
},
{
+ "BriefDescription": "Demand Data Read requests sent to uncore",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
- "EventCode": "0x60",
"Counter": "0,1,2,3",
- "UMask": "0x8",
+ "EventCode": "0xb0",
+ "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
"PEBScounters": "0,1,2,3",
- "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.",
- "CounterMask": "1"
+ "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
},
{
+ "BriefDescription": "Retired load instructions with L1 cache hits as data sources",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.",
- "EventCode": "0xB0",
"Counter": "0,1,2,3",
- "UMask": "0x1",
+ "Data_LA": "1",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_RETIRED.L1_HIT",
+ "PEBS": "1",
"PEBScounters": "0,1,2,3",
- "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
- "SampleAfterValue": "100003",
- "BriefDescription": "Demand Data Read requests sent to uncore"
+ "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1"
},
{
+ "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.",
- "EventCode": "0xB0",
"Counter": "0,1,2,3",
- "UMask": "0x4",
+ "CounterMask": "1",
+ "EventCode": "0x60",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
"PEBScounters": "0,1,2,3",
- "EventName": "OFFCORE_REQUESTS.DEMAND_RFO",
- "SampleAfterValue": "100003",
- "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM"
+ "PublicDescription": "Counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x8"
},
{
+ "BriefDescription": "Cycles with offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.",
- "EventCode": "0xB0",
"Counter": "0,1,2,3",
- "UMask": "0x8",
+ "CounterMask": "1",
+ "EventCode": "0x60",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
"PEBScounters": "0,1,2,3",
- "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD",
- "SampleAfterValue": "100003",
- "BriefDescription": "Demand and prefetch data reads"
+ "PublicDescription": "Counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x4"
},
{
+ "BriefDescription": "Number of cycles a demand request has waited due to L1D due to lack of L2 resources.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts memory transactions reached the super queue including requests initiated by the core, all L3 prefetches, page walks, etc..",
- "EventCode": "0xB0",
"Counter": "0,1,2,3",
- "UMask": "0x80",
+ "EventCode": "0x48",
+ "EventName": "L1D_PEND_MISS.L2_STALL",
"PEBScounters": "0,1,2,3",
- "EventName": "OFFCORE_REQUESTS.ALL_REQUESTS",
- "SampleAfterValue": "100003",
- "BriefDescription": "Any memory transaction that reached the SQ."
+ "PublicDescription": "Counts number of cycles a demand request has waited due to L1D due to lack of L2 resources. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x4"
},
{
- "PEBS": "1",
+ "BriefDescription": "Retired load instructions with L2 cache hits as data sources",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts retired load instructions that true miss the STLB.",
- "EventCode": "0xD0",
"Counter": "0,1,2,3",
- "UMask": "0x11",
+ "Data_LA": "1",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_RETIRED.L2_HIT",
+ "PEBS": "1",
"PEBScounters": "0,1,2,3",
- "EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS",
- "SampleAfterValue": "100003",
- "BriefDescription": "Retired load instructions that miss the STLB.",
- "Data_LA": "1"
+ "PublicDescription": "Counts retired load instructions with L2 cache hits as data sources.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x2"
},
{
- "PEBS": "1",
+ "BriefDescription": "Retired load instructions with locked access.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts retired store instructions that true miss the STLB.",
- "EventCode": "0xD0",
"Counter": "0,1,2,3",
- "UMask": "0x12",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.LOCK_LOADS",
+ "PEBS": "1",
"PEBScounters": "0,1,2,3",
- "EventName": "MEM_INST_RETIRED.STLB_MISS_STORES",
- "SampleAfterValue": "100003",
- "BriefDescription": "Retired store instructions that miss the STLB.",
+ "PublicDescription": "Counts retired load instructions with locked access.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x21"
+ },
+ {
+ "BriefDescription": "Retired load instructions missed L3 cache as data sources",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
"Data_LA": "1",
- "L1_Hit_Indication": "1"
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_RETIRED.L3_MISS",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L3 cache.",
+ "SampleAfterValue": "50021",
+ "UMask": "0x20"
},
{
+ "BriefDescription": "All retired store instructions.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.ALL_STORES",
+ "L1_Hit_Indication": "1",
"PEBS": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts all retired store instructions. This event account for SW prefetch instructions and PREFETCHW instruction for stores.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x82"
+ },
+ {
+ "BriefDescription": "Demand requests to L2 cache",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts retired load instructions with locked access.",
- "EventCode": "0xD0",
"Counter": "0,1,2,3",
- "UMask": "0x21",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES",
"PEBScounters": "0,1,2,3",
- "EventName": "MEM_INST_RETIRED.LOCK_LOADS",
- "SampleAfterValue": "100007",
- "BriefDescription": "Retired load instructions with locked access.",
- "Data_LA": "1"
+ "PublicDescription": "Counts demand requests to L2 cache.",
+ "SampleAfterValue": "200003",
+ "Speculative": "1",
+ "UMask": "0xe7"
},
{
- "PEBS": "1",
+ "BriefDescription": "L2 cache hits when fetching instructions, code reads.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts retired load instructions that split across a cacheline boundary.",
- "EventCode": "0xD0",
"Counter": "0,1,2,3",
- "UMask": "0x41",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.CODE_RD_HIT",
"PEBScounters": "0,1,2,3",
- "EventName": "MEM_INST_RETIRED.SPLIT_LOADS",
- "SampleAfterValue": "100003",
- "BriefDescription": "Retired load instructions that split across a cacheline boundary.",
- "Data_LA": "1"
+ "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.",
+ "SampleAfterValue": "200003",
+ "Speculative": "1",
+ "UMask": "0xc4"
},
{
- "PEBS": "1",
+ "BriefDescription": "Demand and prefetch data reads",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts retired store instructions that split across a cacheline boundary.",
- "EventCode": "0xD0",
"Counter": "0,1,2,3",
- "UMask": "0x42",
+ "EventCode": "0xB0",
+ "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD",
"PEBScounters": "0,1,2,3",
- "EventName": "MEM_INST_RETIRED.SPLIT_STORES",
+ "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.",
"SampleAfterValue": "100003",
- "BriefDescription": "Retired store instructions that split across a cacheline boundary.",
- "Data_LA": "1",
- "L1_Hit_Indication": "1"
+ "Speculative": "1",
+ "UMask": "0x8"
},
{
- "PEBS": "1",
+ "BriefDescription": "Core-originated cacheable demand requests missed L3",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x2e",
+ "EventName": "LONGEST_LAT_CACHE.MISS",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2. It does not include all misses to the L3.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "SW prefetch requests that miss L2 cache.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts all retired load instructions. This event accounts for SW prefetch instructions for loads.",
- "EventCode": "0xD0",
"Counter": "0,1,2,3",
- "UMask": "0x81",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.SWPF_MISS",
"PEBScounters": "0,1,2,3",
- "EventName": "MEM_INST_RETIRED.ALL_LOADS",
- "SampleAfterValue": "2000003",
- "BriefDescription": "All retired load instructions.",
- "Data_LA": "1"
+ "PublicDescription": "Counts Software prefetch requests that miss the L2 cache. This event accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions.",
+ "SampleAfterValue": "200003",
+ "Speculative": "1",
+ "UMask": "0x28"
},
{
- "PEBS": "1",
+ "BriefDescription": "Retired load instructions missed L1 cache as data sources",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts all retired store instructions. This event account for SW prefetch instructions and PREFETCHW instruction for stores.",
- "EventCode": "0xD0",
"Counter": "0,1,2,3",
- "UMask": "0x82",
- "PEBScounters": "0,1,2,3",
- "EventName": "MEM_INST_RETIRED.ALL_STORES",
- "SampleAfterValue": "2000003",
- "BriefDescription": "All retired store instructions.",
"Data_LA": "1",
- "L1_Hit_Indication": "1"
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_RETIRED.L1_MISS",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L1 cache.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x8"
},
{
- "PEBS": "1",
+ "BriefDescription": "Number of L1D misses that are outstanding",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.",
- "EventCode": "0xD1",
"Counter": "0,1,2,3",
- "UMask": "0x1",
+ "EventCode": "0x48",
+ "EventName": "L1D_PEND_MISS.PENDING",
"PEBScounters": "0,1,2,3",
- "EventName": "MEM_LOAD_RETIRED.L1_HIT",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Retired load instructions with L1 cache hits as data sources",
- "Data_LA": "1"
+ "PublicDescription": "Counts number of L1D misses that are outstanding in each cycle, that is each cycle the number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch. Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x1"
},
{
- "PEBS": "1",
+ "BriefDescription": "Number of phases a demand request has waited due to L1D Fill Buffer (FB) unavailablability.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts retired load instructions with L2 cache hits as data sources.",
- "EventCode": "0xD1",
"Counter": "0,1,2,3",
- "UMask": "0x2",
+ "CounterMask": "1",
+ "EdgeDetect": "1",
+ "EventCode": "0x48",
+ "EventName": "L1D_PEND_MISS.FB_FULL_PERIODS",
"PEBScounters": "0,1,2,3",
- "EventName": "MEM_LOAD_RETIRED.L2_HIT",
- "SampleAfterValue": "100003",
- "BriefDescription": "Retired load instructions with L2 cache hits as data sources",
- "Data_LA": "1"
+ "PublicDescription": "Counts number of phases a demand request has waited due to L1D Fill Buffer (FB) unavailablability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x2"
},
{
- "PEBS": "1",
+ "BriefDescription": "Retired load instructions whose data sources were HitM responses from shared L3",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L3 cache.",
- "EventCode": "0xD1",
"Counter": "0,1,2,3",
- "UMask": "0x4",
+ "Data_LA": "1",
+ "EventCode": "0xd2",
+ "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM",
+ "PEBS": "1",
"PEBScounters": "0,1,2,3",
- "EventName": "MEM_LOAD_RETIRED.L3_HIT",
- "SampleAfterValue": "50021",
- "BriefDescription": "Retired load instructions with L3 cache hits as data sources",
- "Data_LA": "1"
+ "PublicDescription": "Counts retired load instructions whose data sources were HitM responses from shared L3.",
+ "SampleAfterValue": "20011",
+ "UMask": "0x4"
},
{
+ "BriefDescription": "Retired load instructions whose data sources were L3 and cross-core snoop hits in on-pkg core cache",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd2",
+ "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT",
"PEBS": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts retired load instructions whose data sources were L3 and cross-core snoop hits in on-pkg core cache.",
+ "SampleAfterValue": "20011",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Retired load instructions whose data sources were hits in L3 without snoops required",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L1 cache.",
- "EventCode": "0xD1",
"Counter": "0,1,2,3",
- "UMask": "0x8",
+ "Data_LA": "1",
+ "EventCode": "0xd2",
+ "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NONE",
+ "PEBS": "1",
"PEBScounters": "0,1,2,3",
- "EventName": "MEM_LOAD_RETIRED.L1_MISS",
+ "PublicDescription": "Counts retired load instructions whose data sources were hits in L3 without snoops required.",
"SampleAfterValue": "100003",
- "BriefDescription": "Retired load instructions missed L1 cache as data sources",
- "Data_LA": "1"
+ "UMask": "0x8"
},
{
- "PEBS": "1",
+ "BriefDescription": "Retired store instructions that miss the STLB.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts retired load instructions missed L2 cache as data sources.",
- "EventCode": "0xD1",
"Counter": "0,1,2,3",
- "UMask": "0x10",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.STLB_MISS_STORES",
+ "L1_Hit_Indication": "1",
+ "PEBS": "1",
"PEBScounters": "0,1,2,3",
- "EventName": "MEM_LOAD_RETIRED.L2_MISS",
- "SampleAfterValue": "50021",
- "BriefDescription": "Retired load instructions missed L2 cache as data sources",
- "Data_LA": "1"
+ "PublicDescription": "Counts retired store instructions that true miss the STLB.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x12"
},
{
- "PEBS": "1",
+ "BriefDescription": "RFO requests to L2 cache",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L3 cache.",
- "EventCode": "0xD1",
"Counter": "0,1,2,3",
- "UMask": "0x20",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.ALL_RFO",
"PEBScounters": "0,1,2,3",
- "EventName": "MEM_LOAD_RETIRED.L3_MISS",
- "SampleAfterValue": "100007",
- "BriefDescription": "Retired load instructions missed L3 cache as data sources",
- "Data_LA": "1"
+ "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.",
+ "SampleAfterValue": "200003",
+ "Speculative": "1",
+ "UMask": "0xe2"
},
{
- "PEBS": "1",
+ "BriefDescription": "Retired load instructions missed L2 cache as data sources",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready.",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
"EventCode": "0xd1",
+ "EventName": "MEM_LOAD_RETIRED.L2_MISS",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts retired load instructions missed L2 cache as data sources.",
+ "SampleAfterValue": "100021",
+ "UMask": "0x10"
+ },
+ {
+ "BriefDescription": "Store Read transactions pending for off-core. Highly correlated.",
+ "CollectPEBSRecord": "2",
"Counter": "0,1,2,3",
- "UMask": "0x40",
+ "EventCode": "0x60",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO",
"PEBScounters": "0,1,2,3",
- "EventName": "MEM_LOAD_RETIRED.FB_HIT",
- "SampleAfterValue": "100007",
- "BriefDescription": "Number of completed demand load requests that missed the L1, but hit the FB(fill buffer), because a preceding miss to the same cacheline initiated the line to be brought into L1, but data is not yet ready in L1.",
- "Data_LA": "1"
+ "PublicDescription": "Counts the number of off-core outstanding read-for-ownership (RFO) store transactions every cycle. An RFO transaction is considered to be in the Off-core outstanding state between L2 cache miss and transaction completion.",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x4"
},
{
- "PEBS": "1",
+ "BriefDescription": "Non-modified cache lines that are silently dropped by L2 cache when triggered by an L2 cache fill.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the retired load instructions whose data sources were L3 hit and cross-core snoop missed in on-pkg core cache.",
- "EventCode": "0xd2",
"Counter": "0,1,2,3",
- "UMask": "0x1",
+ "EventCode": "0xF2",
+ "EventName": "L2_LINES_OUT.SILENT",
"PEBScounters": "0,1,2,3",
- "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS",
- "SampleAfterValue": "20011",
- "BriefDescription": "Retired load instructions whose data sources were L3 hit and cross-core snoop missed in on-pkg core cache.",
- "Data_LA": "1"
+ "PublicDescription": "Counts the number of lines that are silently dropped by L2 cache when triggered by an L2 cache fill. These lines are typically in Shared or Exclusive state. A non-threaded event.",
+ "SampleAfterValue": "200003",
+ "Speculative": "1",
+ "UMask": "0x1"
},
{
+ "BriefDescription": "Retired store instructions that split across a cacheline boundary.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.SPLIT_STORES",
+ "L1_Hit_Indication": "1",
"PEBS": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts retired store instructions that split across a cacheline boundary.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x42"
+ },
+ {
+ "BriefDescription": "SW prefetch requests that hit L2 cache.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts retired load instructions whose data sources were L3 and cross-core snoop hits in on-pkg core cache.",
- "EventCode": "0xd2",
"Counter": "0,1,2,3",
- "UMask": "0x2",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.SWPF_HIT",
"PEBScounters": "0,1,2,3",
- "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT",
- "SampleAfterValue": "20011",
- "BriefDescription": "Retired load instructions whose data sources were L3 and cross-core snoop hits in on-pkg core cache",
- "Data_LA": "1"
+ "PublicDescription": "Counts Software prefetch requests that hit the L2 cache. This event accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions.",
+ "SampleAfterValue": "200003",
+ "Speculative": "1",
+ "UMask": "0xc8"
},
{
+ "BriefDescription": "Retired load instructions that miss the STLB.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS",
"PEBS": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts retired load instructions that true miss the STLB.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x11"
+ },
+ {
+ "BriefDescription": "RFO requests that miss L2 cache",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts retired load instructions whose data sources were HitM responses from shared L3.",
- "EventCode": "0xd2",
"Counter": "0,1,2,3",
- "UMask": "0x4",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.RFO_MISS",
"PEBScounters": "0,1,2,3",
- "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM",
- "SampleAfterValue": "20011",
- "BriefDescription": "Retired load instructions whose data sources were HitM responses from shared L3",
- "Data_LA": "1"
+ "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache.",
+ "SampleAfterValue": "200003",
+ "Speculative": "1",
+ "UMask": "0x22"
},
{
- "PEBS": "1",
+ "BriefDescription": "Modified cache lines that are evicted by L2 cache when triggered by an L2 cache fill.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts retired load instructions whose data sources were hits in L3 without snoops required.",
- "EventCode": "0xd2",
"Counter": "0,1,2,3",
- "UMask": "0x8",
+ "EventCode": "0xF2",
+ "EventName": "L2_LINES_OUT.NON_SILENT",
"PEBScounters": "0,1,2,3",
- "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NONE",
- "SampleAfterValue": "100003",
- "BriefDescription": "Retired load instructions whose data sources were hits in L3 without snoops required",
- "Data_LA": "1"
+ "PublicDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines are in Modified state. Modified lines are written back to L3",
+ "SampleAfterValue": "200003",
+ "Speculative": "1",
+ "UMask": "0x2"
},
{
+ "BriefDescription": "Any memory transaction that reached the SQ.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects.",
- "EventCode": "0xF1",
"Counter": "0,1,2,3",
- "UMask": "0x1f",
+ "EventCode": "0xB0",
+ "EventName": "OFFCORE_REQUESTS.ALL_REQUESTS",
"PEBScounters": "0,1,2,3",
- "EventName": "L2_LINES_IN.ALL",
+ "PublicDescription": "Counts memory transactions reached the super queue including requests initiated by the core, all L3 prefetches, page walks, etc..",
"SampleAfterValue": "100003",
- "BriefDescription": "L2 cache lines filling L2"
+ "Speculative": "1",
+ "UMask": "0x80"
},
{
+ "BriefDescription": "Cache lines that have been L2 hardware prefetched but not used by demand accesses",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the cycles for which the thread is active and the superQ cannot take any more entries.",
- "EventCode": "0xF4",
"Counter": "0,1,2,3",
- "UMask": "0x4",
+ "EventCode": "0xf2",
+ "EventName": "L2_LINES_OUT.USELESS_HWPF",
"PEBScounters": "0,1,2,3",
- "EventName": "SQ_MISC.SQ_FULL",
- "SampleAfterValue": "100003",
- "BriefDescription": "Cycles the thread is active and superQ cannot take any more entries."
+ "PublicDescription": "Counts the number of cache lines that have been prefetched by the L2 hardware prefetcher but not used by demand access when evicted from the L2 cache",
+ "SampleAfterValue": "200003",
+ "Speculative": "1",
+ "UMask": "0x4"
}
] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/icelake/floating-point.json b/tools/perf/pmu-events/arch/x86/icelake/floating-point.json
index 594c5551f610..5391c4f6eca3 100644
--- a/tools/perf/pmu-events/arch/x86/icelake/floating-point.json
+++ b/tools/perf/pmu-events/arch/x86/icelake/floating-point.json
@@ -1,102 +1,95 @@
[
{
+ "BriefDescription": "Counts number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts all microcode Floating Point assists.",
- "EventCode": "0xC1",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x2",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "ASSISTS.FP",
"SampleAfterValue": "100003",
- "BriefDescription": "Counts all microcode FP assists.",
- "CounterMask": "1"
+ "UMask": "0x40"
},
{
+ "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
- "EventCode": "0xc7",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x1",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 RANGE SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element."
+ "PublicDescription": "Counts number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x8"
},
{
+ "BriefDescription": "Counts number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 16 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
- "EventCode": "0xc7",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x2",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 RANGE SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element."
+ "SampleAfterValue": "100003",
+ "UMask": "0x80"
},
{
+ "BriefDescription": "Counts number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
- "EventCode": "0xc7",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x4",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT14 RCP14 RANGE DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element."
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
+ "BriefDescription": "Counts number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
- "EventCode": "0xc7",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x8",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element."
+ "SampleAfterValue": "100003",
+ "UMask": "0x4"
},
{
+ "BriefDescription": "Counts number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
- "EventCode": "0xc7",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x10",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 RANGE SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element."
+ "SampleAfterValue": "100003",
+ "UMask": "0x20"
},
{
+ "BriefDescription": "Counts number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
- "EventCode": "0xc7",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x20",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 RANGE SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element."
+ "SampleAfterValue": "100003",
+ "UMask": "0x2"
},
{
+ "BriefDescription": "Counts number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 RANGE FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
- "EventCode": "0xc7",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x40",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 16 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 RANGE FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element."
+ "SampleAfterValue": "100003",
+ "UMask": "0x10"
},
{
+ "BriefDescription": "Counts all microcode FP assists.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 16 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 RANGE FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
- "EventCode": "0xc7",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x80",
+ "EventCode": "0xc1",
+ "EventName": "ASSISTS.FP",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 RANGE FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element."
+ "PublicDescription": "Counts all microcode Floating Point assists.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x2"
}
] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/icelake/frontend.json b/tools/perf/pmu-events/arch/x86/icelake/frontend.json
index 9c3cfbfcec0f..4fa2a4186ee3 100644
--- a/tools/perf/pmu-events/arch/x86/icelake/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/icelake/frontend.json
@@ -1,424 +1,482 @@
[
{
+ "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
- "EventCode": "0x79",
"Counter": "0,1,2,3",
- "UMask": "0x4",
+ "EventCode": "0xe6",
+ "EventName": "BACLEARS.ANY",
"PEBScounters": "0,1,2,3",
- "EventName": "IDQ.MITE_UOPS",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path"
+ "PublicDescription": "Counts the number of times the front-end is resteered when it finds a branch instruction in a fetch line. This occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
},
{
+ "BriefDescription": "Retired Instructions who experienced DSB miss.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
- "EventCode": "0x79",
- "Counter": "0,1,2,3",
- "UMask": "0x4",
- "PEBScounters": "0,1,2,3",
- "EventName": "IDQ.MITE_CYCLES_OK",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Cycles MITE is delivering optimal number of Uops",
- "CounterMask": "5"
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.DSB_MISS",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x11",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.",
+ "SampleAfterValue": "100007",
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
+ "BriefDescription": "Cycles MITE is delivering optimal number of Uops",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
- "EventCode": "0x79",
"Counter": "0,1,2,3",
- "UMask": "0x4",
- "PEBScounters": "0,1,2,3",
- "EventName": "IDQ.MITE_CYCLES_ANY",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Cycles MITE is delivering any Uop",
- "CounterMask": "1"
- },
- {
- "CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.",
+ "CounterMask": "5",
"EventCode": "0x79",
- "Counter": "0,1,2,3",
- "UMask": "0x8",
+ "EventName": "IDQ.MITE_CYCLES_OK",
"PEBScounters": "0,1,2,3",
- "EventName": "IDQ.DSB_UOPS",
+ "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
"SampleAfterValue": "2000003",
- "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path"
+ "Speculative": "1",
+ "UMask": "0x4"
},
{
+ "BriefDescription": "Retired Instructions who experienced iTLB true miss.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
- "EventCode": "0x79",
- "Counter": "0,1,2,3",
- "UMask": "0x8",
- "PEBScounters": "0,1,2,3",
- "EventName": "IDQ.DSB_CYCLES_OK",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Cycles DSB is delivering optimal number of Uops",
- "CounterMask": "5"
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.ITLB_MISS",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x14",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss.",
+ "SampleAfterValue": "100007",
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
+ "BriefDescription": "Cycles when no uops are not delivered by the IDQ when backend of the machine is not stalled",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.",
- "EventCode": "0x79",
- "Counter": "0,1,2,3",
- "UMask": "0x8",
- "PEBScounters": "0,1,2,3",
- "EventName": "IDQ.DSB_CYCLES_ANY",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop",
- "CounterMask": "1"
+ "Counter": "0,1,2,3,4,5,6,7",
+ "CounterMask": "5",
+ "EventCode": "0x9c",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts the number of cycles when no uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle.",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x1"
},
{
+ "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
- "EventCode": "0x79",
"Counter": "0,1,2,3",
- "UMask": "0x30",
+ "EventCode": "0x80",
+ "EventName": "ICACHE_16B.IFDATA_STALL",
"PEBScounters": "0,1,2,3",
- "EventName": "IDQ.MS_SWITCHES",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Number of switches from DSB or MITE to the MS",
- "CounterMask": "1",
- "EdgeDetect": "1"
+ "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity.",
+ "SampleAfterValue": "500009",
+ "Speculative": "1",
+ "UMask": "0x4"
},
{
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS). Any instruction over 4 uops will be delivered by the MS. Some instructions such as transcendentals may additionally generate uops from the MS.",
- "EventCode": "0x79",
- "Counter": "0,1,2,3",
- "UMask": "0x30",
- "PEBScounters": "0,1,2,3",
- "EventName": "IDQ.MS_UOPS",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Uops delivered to IDQ while MS is busy"
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_256",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x510006",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall.",
+ "SampleAfterValue": "100007",
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
+ "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.",
- "EventCode": "0x79",
"Counter": "0,1,2,3",
- "UMask": "0x30",
+ "CounterMask": "1",
+ "EventCode": "0x79",
+ "EventName": "IDQ.DSB_CYCLES_ANY",
"PEBScounters": "0,1,2,3",
- "EventName": "IDQ.MS_CYCLES_ANY",
+ "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.",
"SampleAfterValue": "2000003",
- "BriefDescription": "Cycles when uops are being delivered to IDQ while MS is busy",
- "CounterMask": "1"
+ "Speculative": "1",
+ "UMask": "0x8"
},
{
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity.",
- "EventCode": "0x80",
- "Counter": "0,1,2,3",
- "UMask": "0x4",
- "PEBScounters": "0,1,2,3",
- "EventName": "ICACHE_16B.IFDATA_STALL",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss."
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x100206",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall.",
+ "SampleAfterValue": "100007",
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
+ "BriefDescription": "DSB-to-MITE switch true penalty cycles.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts instruction fetch tag lookups that hit in the instruction cache (L1I). Counts at 64-byte cache-line granularity. Accounts for both cacheable and uncacheable accesses.",
- "EventCode": "0x83",
"Counter": "0,1,2,3",
- "UMask": "0x1",
+ "EventCode": "0xab",
+ "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
"PEBScounters": "0,1,2,3",
- "EventName": "ICACHE_64B.IFTAG_HIT",
- "SampleAfterValue": "200003",
- "BriefDescription": "Instruction fetch tag lookups that hit in the instruction cache (L1I). Counts at 64-byte cache-line granularity."
+ "PublicDescription": "Decode Stream Buffer (DSB) is a Uop-cache that holds translations of previously fetched instructions that were decoded by the legacy x86 decode pipeline (MITE). This event counts fetch penalty cycles when a transition occurs from DSB to MITE.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x2"
},
{
+ "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts instruction fetch tag lookups that miss in the instruction cache (L1I). Counts at 64-byte cache-line granularity. Accounts for both cacheable and uncacheable accesses.",
- "EventCode": "0x83",
- "Counter": "0,1,2,3",
- "UMask": "0x2",
- "PEBScounters": "0,1,2,3",
- "EventName": "ICACHE_64B.IFTAG_MISS",
- "SampleAfterValue": "200003",
- "BriefDescription": "Instruction fetch tag lookups that miss in the instruction cache (L1I). Counts at 64-byte cache-line granularity."
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.STLB_MISS",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x15",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss.",
+ "SampleAfterValue": "100007",
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
- "EventCode": "0x83",
"Counter": "0,1,2,3",
- "UMask": "0x4",
+ "EventCode": "0x79",
+ "EventName": "IDQ.MITE_UOPS",
"PEBScounters": "0,1,2,3",
- "EventName": "ICACHE_64B.IFTAG_STALL",
- "SampleAfterValue": "200003",
- "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss."
- },
- {
- "CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of uops not delivered to by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle.",
- "EventCode": "0x9C",
- "Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
+ "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
"SampleAfterValue": "2000003",
- "BriefDescription": "Uops not delivered by IDQ when backend of the machine is not stalled"
+ "Speculative": "1",
+ "UMask": "0x4"
},
{
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of cycles when no uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle.",
- "EventCode": "0x9c",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x1",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_64",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x504006",
+ "PEBS": "1",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Cycles when no uops are not delivered by the IDQ when backend of the machine is not stalled",
- "CounterMask": "5"
+ "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall.",
+ "SampleAfterValue": "100007",
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of cycles when the optimal number of uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle.",
- "EventCode": "0x9C",
- "Invert": "1",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x1",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_32",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x502006",
+ "PEBS": "1",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Cycles when optimal number of uops was delivered to the back-end when the back-end is not stalled",
- "CounterMask": "1"
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.",
+ "SampleAfterValue": "100007",
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
+ "BriefDescription": "Cycles MITE is delivering any Uop",
"CollectPEBSRecord": "2",
- "PublicDescription": "Decode Stream Buffer (DSB) is a Uop-cache that holds translations of previously fetched instructions that were decoded by the legacy x86 decode pipeline (MITE). This event counts fetch penalty cycles when a transition occurs from DSB to MITE.",
- "EventCode": "0xAB",
"Counter": "0,1,2,3",
- "UMask": "0x2",
+ "CounterMask": "1",
+ "EventCode": "0x79",
+ "EventName": "IDQ.MITE_CYCLES_ANY",
"PEBScounters": "0,1,2,3",
- "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
+ "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
"SampleAfterValue": "2000003",
- "BriefDescription": "DSB-to-MITE switch true penalty cycles."
+ "Speculative": "1",
+ "UMask": "0x4"
},
{
- "PEBS": "1",
+ "BriefDescription": "Retired instructions after front-end starvation of at least 2 cycles",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.",
- "EventCode": "0xC6",
- "MSRValue": "0x11",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "FRONTEND_RETIRED.DSB_MISS",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_2",
"MSRIndex": "0x3F7",
+ "MSRValue": "0x500206",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of at least 2 cycles which was not interrupted by a back-end stall.",
"SampleAfterValue": "100007",
- "BriefDescription": "Retired Instructions who experienced DSB miss.",
- "TakenAlone": "1"
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "PEBS": "1",
+ "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE transitions count.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts retired Instructions who experienced Instruction L1 Cache true miss.",
- "EventCode": "0xC6",
- "MSRValue": "0x12",
- "Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "FRONTEND_RETIRED.L1I_MISS",
- "MSRIndex": "0x3F7",
- "SampleAfterValue": "100007",
- "BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss.",
- "TakenAlone": "1"
+ "Counter": "0,1,2,3",
+ "CounterMask": "1",
+ "EdgeDetect": "1",
+ "EventCode": "0xab",
+ "EventName": "DSB2MITE_SWITCHES.COUNT",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of Decode Stream Buffer (DSB a.k.a. Uop Cache)-to-MITE speculative transitions.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x2"
},
{
- "PEBS": "1",
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x79",
+ "EventName": "IDQ.DSB_UOPS",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts retired Instructions who experienced Instruction L2 Cache true miss.",
- "EventCode": "0xC6",
- "MSRValue": "0x13",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc6",
"EventName": "FRONTEND_RETIRED.L2_MISS",
"MSRIndex": "0x3F7",
+ "MSRValue": "0x13",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts retired Instructions who experienced Instruction L2 Cache true miss.",
"SampleAfterValue": "100007",
- "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss.",
- "TakenAlone": "1"
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "PEBS": "1",
+ "BriefDescription": "Instruction fetch tag lookups that hit in the instruction cache (L1I). Counts at 64-byte cache-line granularity.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x83",
+ "EventName": "ICACHE_64B.IFTAG_HIT",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts instruction fetch tag lookups that hit in the instruction cache (L1I). Counts at 64-byte cache-line granularity. Accounts for both cacheable and uncacheable accesses.",
+ "SampleAfterValue": "200003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss.",
- "EventCode": "0xC6",
- "MSRValue": "0x14",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "FRONTEND_RETIRED.ITLB_MISS",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_512",
"MSRIndex": "0x3F7",
+ "MSRValue": "0x520006",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall.",
"SampleAfterValue": "100007",
- "BriefDescription": "Retired Instructions who experienced iTLB true miss.",
- "TakenAlone": "1"
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "PEBS": "1",
+ "BriefDescription": "Cycles when optimal number of uops was delivered to the back-end when the back-end is not stalled",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss.",
- "EventCode": "0xC6",
- "MSRValue": "0x15",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x1",
+ "CounterMask": "1",
+ "EventCode": "0x9C",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
+ "Invert": "1",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "FRONTEND_RETIRED.STLB_MISS",
- "MSRIndex": "0x3F7",
- "SampleAfterValue": "100007",
- "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss.",
- "TakenAlone": "1"
+ "PublicDescription": "Counts the number of cycles when the optimal number of uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle.",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x1"
},
{
- "PEBS": "1",
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 2 cycles which was not interrupted by a back-end stall.",
- "EventCode": "0xC6",
- "MSRValue": "0x500206",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "FRONTEND_RETIRED.LATENCY_GE_2",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_16",
"MSRIndex": "0x3F7",
+ "MSRValue": "0x501006",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.",
"SampleAfterValue": "100007",
- "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 2 cycles which was not interrupted by a back-end stall.",
- "TakenAlone": "1"
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "PEBS": "1",
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall.",
- "EventCode": "0xC6",
- "MSRValue": "0x500406",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "FRONTEND_RETIRED.LATENCY_GE_4",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_128",
"MSRIndex": "0x3F7",
+ "MSRValue": "0x508006",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall.",
"SampleAfterValue": "100007",
- "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall.",
- "TakenAlone": "1"
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "PEBS": "1",
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 8 cycles which was not interrupted by a back-end stall.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops.",
- "EventCode": "0xC6",
- "MSRValue": "0x500806",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc6",
"EventName": "FRONTEND_RETIRED.LATENCY_GE_8",
"MSRIndex": "0x3F7",
+ "MSRValue": "0x500806",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops.",
"SampleAfterValue": "100007",
- "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 8 cycles which was not interrupted by a back-end stall.",
- "TakenAlone": "1"
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "PEBS": "1",
+ "BriefDescription": "Retired instructions after front-end starvation of at least 1 cycle",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.",
- "EventCode": "0xC6",
- "MSRValue": "0x501006",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "FRONTEND_RETIRED.LATENCY_GE_16",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_1",
"MSRIndex": "0x3F7",
+ "MSRValue": "0x500106",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of at least 1 cycle which was not interrupted by a back-end stall.",
"SampleAfterValue": "100007",
- "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall.",
- "TakenAlone": "1"
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "PEBS": "1",
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.",
- "EventCode": "0xC6",
- "MSRValue": "0x502006",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "FRONTEND_RETIRED.LATENCY_GE_32",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_4",
"MSRIndex": "0x3F7",
+ "MSRValue": "0x500406",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall.",
"SampleAfterValue": "100007",
- "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall.",
- "TakenAlone": "1"
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "PEBS": "1",
+ "BriefDescription": "Number of switches from DSB or MITE to the MS",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall.",
- "EventCode": "0xC6",
- "MSRValue": "0x504006",
- "Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "FRONTEND_RETIRED.LATENCY_GE_64",
- "MSRIndex": "0x3F7",
- "SampleAfterValue": "100007",
- "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall.",
- "TakenAlone": "1"
+ "Counter": "0,1,2,3",
+ "CounterMask": "1",
+ "EdgeDetect": "1",
+ "EventCode": "0x79",
+ "EventName": "IDQ.MS_SWITCHES",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x30"
},
{
- "PEBS": "1",
+ "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall.",
- "EventCode": "0xC6",
- "MSRValue": "0x508006",
- "Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "FRONTEND_RETIRED.LATENCY_GE_128",
- "MSRIndex": "0x3F7",
- "SampleAfterValue": "100007",
- "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall.",
- "TakenAlone": "1"
+ "Counter": "0,1,2,3",
+ "EventCode": "0x83",
+ "EventName": "ICACHE_64B.IFTAG_STALL",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
+ "SampleAfterValue": "200003",
+ "Speculative": "1",
+ "UMask": "0x4"
},
{
- "PEBS": "1",
+ "BriefDescription": "Uops delivered to IDQ while MS is busy",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall.",
- "EventCode": "0xC6",
- "MSRValue": "0x510006",
- "Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "FRONTEND_RETIRED.LATENCY_GE_256",
- "MSRIndex": "0x3F7",
- "SampleAfterValue": "100007",
- "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall.",
- "TakenAlone": "1"
+ "Counter": "0,1,2,3",
+ "EventCode": "0x79",
+ "EventName": "IDQ.MS_UOPS",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS). Any instruction over 4 uops will be delivered by the MS. Some instructions such as transcendentals may additionally generate uops from the MS.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x30"
},
{
- "PEBS": "1",
+ "BriefDescription": "Instruction fetch tag lookups that miss in the instruction cache (L1I). Counts at 64-byte cache-line granularity.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x83",
+ "EventName": "ICACHE_64B.IFTAG_MISS",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts instruction fetch tag lookups that miss in the instruction cache (L1I). Counts at 64-byte cache-line granularity. Accounts for both cacheable and uncacheable accesses.",
+ "SampleAfterValue": "200003",
+ "Speculative": "1",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Cycles when uops are being delivered to IDQ while MS is busy",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "CounterMask": "1",
+ "EventCode": "0x79",
+ "EventName": "IDQ.MS_CYCLES_ANY",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x30"
+ },
+ {
+ "BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall.",
- "EventCode": "0xC6",
- "MSRValue": "0x520006",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "FRONTEND_RETIRED.LATENCY_GE_512",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.L1I_MISS",
"MSRIndex": "0x3F7",
+ "MSRValue": "0x12",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts retired Instructions who experienced Instruction L1 Cache true miss.",
"SampleAfterValue": "100007",
- "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall.",
- "TakenAlone": "1"
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "PEBS": "1",
+ "BriefDescription": "Cycles DSB is delivering optimal number of Uops",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "CounterMask": "5",
+ "EventCode": "0x79",
+ "EventName": "IDQ.DSB_CYCLES_OK",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "Uops not delivered by IDQ when backend of the machine is not stalled",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall.",
- "EventCode": "0xC6",
- "MSRValue": "0x100206",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x1",
+ "EventCode": "0x9c",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1",
- "MSRIndex": "0x3F7",
- "SampleAfterValue": "100007",
- "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall.",
- "TakenAlone": "1"
+ "PublicDescription": "Counts the number of uops not delivered to by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle.",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x1"
}
] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json b/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json
new file mode 100644
index 000000000000..432e45ac6814
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json
@@ -0,0 +1,273 @@
+[
+ {
+ "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+ "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
+ "MetricGroup": "Summary",
+ "MetricName": "IPC"
+ },
+ {
+ "MetricExpr": "UOPS_RETIRED.SLOTS / INST_RETIRED.ANY",
+ "BriefDescription": "Uops Per Instruction",
+ "MetricGroup": "Pipeline;Retire",
+ "MetricName": "UPI"
+ },
+ {
+ "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN",
+ "BriefDescription": "Instruction per taken branch",
+ "MetricGroup": "Branches;FetchBW;PGO",
+ "MetricName": "IpTB"
+ },
+ {
+ "MetricExpr": "1 / (INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD)",
+ "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
+ "MetricGroup": "Pipeline",
+ "MetricName": "CPI"
+ },
+ {
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+ "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
+ "MetricGroup": "Pipeline",
+ "MetricName": "CLKS"
+ },
+ {
+ "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.DISTRIBUTED",
+ "BriefDescription": "Instructions Per Cycle (per physical core)",
+ "MetricGroup": "SMT;TmaL1",
+ "MetricName": "CoreIPC"
+ },
+ {
+ "MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / CPU_CLK_UNHALTED.DISTRIBUTED",
+ "BriefDescription": "Floating Point Operations Per Cycle",
+ "MetricGroup": "Flops",
+ "MetricName": "FLOPc"
+ },
+ {
+ "MetricExpr": "UOPS_EXECUTED.THREAD / ( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 )",
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
+ "MetricGroup": "Pipeline;PortsUtil",
+ "MetricName": "ILP"
+ },
+ {
+ "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
+ "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)",
+ "MetricGroup": "BrMispredicts",
+ "MetricName": "IpMispredict"
+ },
+ {
+ "MetricExpr": "CPU_CLK_UNHALTED.DISTRIBUTED",
+ "BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core",
+ "MetricGroup": "SMT",
+ "MetricName": "CORE_CLKS"
+ },
+ {
+ "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_LOADS",
+ "BriefDescription": "Instructions per Load (lower number means higher occurrence rate)",
+ "MetricGroup": "InsType",
+ "MetricName": "IpLoad"
+ },
+ {
+ "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES",
+ "BriefDescription": "Instructions per Store (lower number means higher occurrence rate)",
+ "MetricGroup": "InsType",
+ "MetricName": "IpStore"
+ },
+ {
+ "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
+ "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
+ "MetricGroup": "Branches;InsType",
+ "MetricName": "IpBranch"
+ },
+ {
+ "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL",
+ "BriefDescription": "Instructions per (near) call (lower number means higher occurrence rate)",
+ "MetricGroup": "Branches",
+ "MetricName": "IpCall"
+ },
+ {
+ "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
+ "BriefDescription": "Branch instructions per taken branch. ",
+ "MetricGroup": "Branches;PGO",
+ "MetricName": "BpTkBranch"
+ },
+ {
+ "MetricExpr": "INST_RETIRED.ANY / ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )",
+ "BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)",
+ "MetricGroup": "Flops;FpArith;InsType",
+ "MetricName": "IpFLOP"
+ },
+ {
+ "MetricExpr": "INST_RETIRED.ANY",
+ "BriefDescription": "Total number of retired Instructions",
+ "MetricGroup": "Summary;TmaL1",
+ "MetricName": "Instructions"
+ },
+ {
+ "MetricExpr": "LSD.UOPS / (IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)",
+ "BriefDescription": "Fraction of Uops delivered by the LSD (Loop Stream Detector; aka Loop Cache)",
+ "MetricGroup": "LSD",
+ "MetricName": "LSD_Coverage"
+ },
+ {
+ "MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)",
+ "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
+ "MetricGroup": "DSB;FetchBW",
+ "MetricName": "DSB_Coverage"
+ },
+ {
+ "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )",
+ "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads (in core cycles)",
+ "MetricGroup": "MemoryBound;MemoryLat",
+ "MetricName": "Load_Miss_Real_Latency"
+ },
+ {
+ "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+ "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)",
+ "MetricGroup": "MemoryBound;MemoryBW",
+ "MetricName": "MLP"
+ },
+ {
+ "MetricConstraint": "NO_NMI_WATCHDOG",
+ "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING ) / ( 2 * CPU_CLK_UNHALTED.DISTRIBUTED )",
+ "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+ "MetricGroup": "MemoryTLB",
+ "MetricName": "Page_Walks_Utilization"
+ },
+ {
+ "MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time",
+ "BriefDescription": "Average data fill bandwidth to the L1 data cache [GB / sec]",
+ "MetricGroup": "MemoryBW",
+ "MetricName": "L1D_Cache_Fill_BW"
+ },
+ {
+ "MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time",
+ "BriefDescription": "Average data fill bandwidth to the L2 cache [GB / sec]",
+ "MetricGroup": "MemoryBW",
+ "MetricName": "L2_Cache_Fill_BW"
+ },
+ {
+ "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time",
+ "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
+ "MetricGroup": "MemoryBW",
+ "MetricName": "L3_Cache_Fill_BW"
+ },
+ {
+ "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1000000000 / duration_time",
+ "BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]",
+ "MetricGroup": "MemoryBW;Offcore",
+ "MetricName": "L3_Cache_Access_BW"
+ },
+ {
+ "MetricExpr": "1000 * MEM_LOAD_RETIRED.L1_MISS / INST_RETIRED.ANY",
+ "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
+ "MetricGroup": "CacheMisses",
+ "MetricName": "L1MPKI"
+ },
+ {
+ "MetricExpr": "1000 * MEM_LOAD_RETIRED.L2_MISS / INST_RETIRED.ANY",
+ "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
+ "MetricGroup": "CacheMisses",
+ "MetricName": "L2MPKI"
+ },
+ {
+ "MetricExpr": "1000 * ( ( OFFCORE_REQUESTS.ALL_DATA_RD - OFFCORE_REQUESTS.DEMAND_DATA_RD ) + L2_RQSTS.ALL_DEMAND_MISS + L2_RQSTS.SWPF_MISS ) / INST_RETIRED.ANY",
+ "BriefDescription": "L2 cache misses per kilo instruction for all request types (including speculative)",
+ "MetricGroup": "CacheMisses;Offcore",
+ "MetricName": "L2MPKI_All"
+ },
+ {
+ "MetricExpr": "1000 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY",
+ "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
+ "MetricGroup": "CacheMisses",
+ "MetricName": "L3MPKI"
+ },
+ {
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+ "BriefDescription": "Average CPU Utilization",
+ "MetricGroup": "HPC;Summary",
+ "MetricName": "CPU_Utilization"
+ },
+ {
+ "MetricExpr": "(CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC) * msr@tsc@ / 1000000000 / duration_time",
+ "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
+ "MetricGroup": "Summary;Power",
+ "MetricName": "Average_Frequency"
+ },
+ {
+ "MetricExpr": "( ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / 1000000000 ) / duration_time",
+ "BriefDescription": "Giga Floating Point Operations Per Second",
+ "MetricGroup": "Flops;HPC",
+ "MetricName": "GFLOPs"
+ },
+ {
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+ "BriefDescription": "Average Frequency Utilization relative nominal frequency",
+ "MetricGroup": "Power",
+ "MetricName": "Turbo_Utilization"
+ },
+ {
+ "MetricExpr": "1 - CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_DISTRIBUTED",
+ "BriefDescription": "Fraction of cycles where both hardware Logical Processors were active",
+ "MetricGroup": "SMT",
+ "MetricName": "SMT_2T_Utilization"
+ },
+ {
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD",
+ "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode",
+ "MetricGroup": "OS",
+ "MetricName": "Kernel_Utilization"
+ },
+ {
+ "MetricExpr": "64 * ( arb@event\\=0x81\\,umask\\=0x1@ + arb@event\\=0x84\\,umask\\=0x1@ ) / 1000000 / duration_time / 1000",
+ "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
+ "MetricGroup": "HPC;MemoryBW;SoC",
+ "MetricName": "DRAM_BW_Use"
+ },
+ {
+ "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",
+ "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
+ "MetricGroup": "Branches;OS",
+ "MetricName": "IpFarBranch"
+ },
+ {
+ "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
+ "BriefDescription": "C3 residency percent per core",
+ "MetricGroup": "Power",
+ "MetricName": "C3_Core_Residency"
+ },
+ {
+ "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
+ "BriefDescription": "C6 residency percent per core",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Core_Residency"
+ },
+ {
+ "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
+ "BriefDescription": "C7 residency percent per core",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Core_Residency"
+ },
+ {
+ "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
+ "BriefDescription": "C2 residency percent per package",
+ "MetricGroup": "Power",
+ "MetricName": "C2_Pkg_Residency"
+ },
+ {
+ "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
+ "BriefDescription": "C3 residency percent per package",
+ "MetricGroup": "Power",
+ "MetricName": "C3_Pkg_Residency"
+ },
+ {
+ "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
+ "BriefDescription": "C6 residency percent per package",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Pkg_Residency"
+ },
+ {
+ "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
+ "BriefDescription": "C7 residency percent per package",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Pkg_Residency"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/icelake/memory.json b/tools/perf/pmu-events/arch/x86/icelake/memory.json
index f158366b9dd6..3701bd93a462 100644
--- a/tools/perf/pmu-events/arch/x86/icelake/memory.json
+++ b/tools/perf/pmu-events/arch/x86/icelake/memory.json
@@ -1,410 +1,574 @@
[
{
+ "BriefDescription": "Number of times a transactional abort was signaled due to a data conflict on a transactionally accessed address",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of times a TSX line had a cache conflict.",
- "EventCode": "0x54",
"Counter": "0,1,2,3",
- "UMask": "0x1",
- "PEBScounters": "0,1,2,3",
+ "EventCode": "0x54",
"EventName": "TX_MEM.ABORT_CONFLICT",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Number of times a transactional abort was signaled due to a data conflict on a transactionally accessed address"
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of times a TSX line had a cache conflict.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
},
{
+ "BriefDescription": "Number of times an HLE execution aborted due to any reasons (multiple categories may count as one).",
"CollectPEBSRecord": "2",
- "PublicDescription": "Speculatively counts the number Transactional Synchronization Extensions (TSX) Aborts due to a data capacity limitation for transactional writes.",
- "EventCode": "0x54",
- "Counter": "0,1,2,3",
- "UMask": "0x2",
- "PEBScounters": "0,1,2,3",
- "EventName": "TX_MEM.ABORT_CAPACITY_WRITE",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Speculatively counts the number TSX Aborts due to a data capacity limitation for transactional writes."
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc8",
+ "EventName": "HLE_RETIRED.ABORTED",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts the number of times HLE abort was triggered.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x4"
},
{
+ "BriefDescription": "Counts demand data reads that was not supplied by the L3 cache.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of times a TSX Abort was triggered due to a non-release/commit store to lock.",
- "EventCode": "0x54",
"Counter": "0,1,2,3",
- "UMask": "0x4",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_DATA_RD.L3_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FFFC00001",
+ "Offcore": "1",
"PEBScounters": "0,1,2,3",
- "EventName": "TX_MEM.ABORT_HLE_STORE_TO_ELIDED_LOCK",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "BriefDescription": "Number of times a HLE transactional region aborted due to a non XRELEASE prefixed instruction writing to an elided lock in the elision buffer"
+ "Speculative": "1",
+ "UMask": "0x1"
},
{
+ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of times a TSX Abort was triggered due to commit but Lock Buffer not empty.",
- "EventCode": "0x54",
- "Counter": "0,1,2,3",
- "UMask": "0x8",
- "PEBScounters": "0,1,2,3",
- "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_NOT_EMPTY",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Number of times an HLE transactional execution aborted due to NoAllocatedElisionBuffer being non-zero."
+ "Counter": "0,1,2,3,4,5,6,7",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x10",
+ "PEBS": "2",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles. Reported latency may be longer than just the memory latency.",
+ "SampleAfterValue": "20011",
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
+ "BriefDescription": "Counts hardware prefetch data reads (which bring data to L2) that was not supplied by the L3 cache.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of times a TSX Abort was triggered due to release/commit but data and address mismatch.",
- "EventCode": "0x54",
"Counter": "0,1,2,3",
- "UMask": "0x10",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L2_DATA_RD.L3_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FFFC00010",
+ "Offcore": "1",
"PEBScounters": "0,1,2,3",
- "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_MISMATCH",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Number of times an HLE transactional execution aborted due to XRELEASE lock not satisfying the address and value requirements in the elision buffer"
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
},
{
+ "BriefDescription": "Number of times an HLE execution aborted due to various memory events (e.g., read/write capacity and conflicts).",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc8",
+ "EventName": "HLE_RETIRED.ABORTED_MEM",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts the number of times an HLE execution aborted due to various memory events (e.g., read/write capacity and conflicts).",
+ "SampleAfterValue": "100003",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "Number of times an HLE transactional execution aborted due to an unsupported read alignment from the elision buffer.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of times a TSX Abort was triggered due to attempting an unsupported alignment from Lock Buffer.",
- "EventCode": "0x54",
"Counter": "0,1,2,3",
- "UMask": "0x20",
- "PEBScounters": "0,1,2,3",
+ "EventCode": "0x54",
"EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_UNSUPPORTED_ALIGNMENT",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Number of times an HLE transactional execution aborted due to an unsupported read alignment from the elision buffer."
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of times a TSX Abort was triggered due to attempting an unsupported alignment from Lock Buffer.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x20"
},
{
+ "BriefDescription": "Number of times an HLE transactional execution aborted due to NoAllocatedElisionBuffer being non-zero.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of times we could not allocate Lock Buffer.",
- "EventCode": "0x54",
"Counter": "0,1,2,3",
- "UMask": "0x40",
+ "EventCode": "0x54",
+ "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_NOT_EMPTY",
"PEBScounters": "0,1,2,3",
- "EventName": "TX_MEM.HLE_ELISION_BUFFER_FULL",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Number of times HLE lock could not be elided due to ElisionBufferAvailable being zero."
+ "PublicDescription": "Counts the number of times a TSX Abort was triggered due to commit but Lock Buffer not empty.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x8"
},
{
+ "BriefDescription": "Number of times an instruction execution caused the transactional nest count supported to be exceeded",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts Unfriendly TSX abort triggered by a vzeroupper instruction.",
- "EventCode": "0x5d",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x2",
+ "EventCode": "0x5d",
+ "EventName": "TX_EXEC.MISC3",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "TX_EXEC.MISC2",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Counts the number of times a class of instructions that may cause a transactional abort was executed inside a transactional region"
+ "PublicDescription": "Counts Unfriendly TSX abort triggered by a nest count that is too deep.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x4"
},
{
+ "BriefDescription": "Counts the number of times a class of instructions that may cause a transactional abort was executed inside a transactional region",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts Unfriendly TSX abort triggered by a nest count that is too deep.",
- "EventCode": "0x5d",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x4",
+ "EventCode": "0x5d",
+ "EventName": "TX_EXEC.MISC2",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "TX_EXEC.MISC3",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Number of times an instruction execution caused the transactional nest count supported to be exceeded"
+ "PublicDescription": "Counts Unfriendly TSX abort triggered by a vzeroupper instruction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x2"
},
{
+ "BriefDescription": "Cycles where data return is pending for a Demand Data Read request who miss L3 cache.",
"CollectPEBSRecord": "2",
- "EventCode": "0xA3",
"Counter": "0,1,2,3",
- "UMask": "0x2",
+ "CounterMask": "1",
+ "EventCode": "0x60",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD",
"PEBScounters": "0,1,2,3",
- "EventName": "CYCLE_ACTIVITY.CYCLES_L3_MISS",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Cycles while L3 cache miss demand load is outstanding.",
- "CounterMask": "2"
+ "PublicDescription": "Cycles with at least 1 Demand Data Read requests who miss L3 cache in the superQ.",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x10"
},
{
+ "BriefDescription": "Counts demand reads for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that was not supplied by the L3 cache.",
"CollectPEBSRecord": "2",
- "EventCode": "0xA3",
"Counter": "0,1,2,3",
- "UMask": "0x6",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_RFO.L3_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FFFC00002",
+ "Offcore": "1",
"PEBScounters": "0,1,2,3",
- "EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Execution stalls while L3 cache miss demand load is outstanding.",
- "CounterMask": "6"
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
},
{
+ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x200",
+ "PEBS": "2",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles. Reported latency may be longer than just the memory latency.",
+ "SampleAfterValue": "101",
+ "TakenAlone": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Number of times an RTM execution successfully committed",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc9",
+ "EventName": "RTM_RETIRED.COMMIT",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts the number of times RTM commit succeeded.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Speculatively counts the number of TSX aborts due to a data capacity limitation for transactional writes.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Demand Data Read requests who miss L3 cache.",
- "EventCode": "0xB0",
"Counter": "0,1,2,3",
- "UMask": "0x10",
+ "EventCode": "0x54",
+ "EventName": "TX_MEM.ABORT_CAPACITY_WRITE",
"PEBScounters": "0,1,2,3",
- "EventName": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
+ "PublicDescription": "Speculatively counts the number of Transactional Synchronization Extensions (TSX) aborts due to a data capacity limitation for transactional writes.",
"SampleAfterValue": "100003",
- "BriefDescription": "Demand Data Read requests who miss L3 cache"
+ "Speculative": "1",
+ "UMask": "0x2"
},
{
+ "BriefDescription": "Number of times an HLE execution aborted due to unfriendly events (such as interrupts).",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of Machine Clears detected dye to memory ordering. Memory Ordering Machine Clears may apply when a memory read may not conform to the memory ordering rules of the x86 architecture",
- "EventCode": "0xc3",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x2",
+ "EventCode": "0xc8",
+ "EventName": "HLE_RETIRED.ABORTED_EVENTS",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+ "PublicDescription": "Counts the number of times an HLE execution aborted due to unfriendly events (such as interrupts).",
"SampleAfterValue": "100003",
- "BriefDescription": "Number of machine clears due to memory ordering conflicts."
+ "UMask": "0x80"
},
{
+ "BriefDescription": "Number of times an HLE execution successfully committed",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of times we entered an HLE region. Does not count nested transactions.",
- "EventCode": "0xC8",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x1",
+ "EventCode": "0xc8",
+ "EventName": "HLE_RETIRED.COMMIT",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "HLE_RETIRED.START",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Number of times an HLE execution started."
+ "PublicDescription": "Counts the number of times HLE commit succeeded.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x2"
},
{
+ "BriefDescription": "Number of times an RTM execution aborted due to incompatible memory type",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of times HLE commit succeeded.",
- "EventCode": "0xC8",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x2",
+ "EventCode": "0xc9",
+ "EventName": "RTM_RETIRED.ABORTED_MEMTYPE",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "HLE_RETIRED.COMMIT",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Number of times an HLE execution successfully committed",
- "Data_LA": "1"
+ "PublicDescription": "Counts the number of times an RTM execution aborted due to incompatible memory type.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x40"
},
{
+ "BriefDescription": "Number of machine clears due to memory ordering conflicts.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of times HLE abort was triggered.",
- "EventCode": "0xc8",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x4",
+ "EventCode": "0xc3",
+ "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "HLE_RETIRED.ABORTED",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Number of times an HLE execution aborted due to any reasons (multiple categories may count as one)."
+ "PublicDescription": "Counts the number of Machine Clears detected dye to memory ordering. Memory Ordering Machine Clears may apply when a memory read may not conform to the memory ordering rules of the x86 architecture",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x2"
},
{
+ "BriefDescription": "Number of times an HLE transactional execution aborted due to XRELEASE lock not satisfying the address and value requirements in the elision buffer",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of times an HLE execution aborted due to various memory events (e.g., read/write capacity and conflicts).",
- "EventCode": "0xC8",
- "Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x8",
- "PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "HLE_RETIRED.ABORTED_MEM",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Number of times an HLE execution aborted due to various memory events (e.g., read/write capacity and conflicts)."
+ "Counter": "0,1,2,3",
+ "EventCode": "0x54",
+ "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_MISMATCH",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of times a TSX Abort was triggered due to release/commit but data and address mismatch.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x10"
},
{
+ "BriefDescription": "Counts streaming stores that was not supplied by the L3 cache.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).",
- "EventCode": "0xC8",
- "Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x20",
- "PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "HLE_RETIRED.ABORTED_UNFRIENDLY",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.)."
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.STREAMING_WR.L3_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FFFC00800",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
},
{
+ "BriefDescription": "Speculatively counts the number of TSX aborts due to a data capacity limitation for transactional reads",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of times an HLE execution aborted due to unfriendly events (such as interrupts).",
- "EventCode": "0xC8",
- "Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x80",
- "PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "HLE_RETIRED.ABORTED_EVENTS",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Number of times an HLE execution aborted due to unfriendly events (such as interrupts)."
+ "Counter": "0,1,2,3",
+ "EventCode": "0x54",
+ "EventName": "TX_MEM.ABORT_CAPACITY_READ",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Speculatively counts the number of Transactional Synchronization Extensions (TSX) aborts due to a data capacity limitation for transactional reads",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x80"
},
{
+ "BriefDescription": "Counts miscellaneous requests, such as I/O and un-cacheable accesses that was not supplied by the L3 cache.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of times we entered an RTM region. Does not count nested transactions.",
- "EventCode": "0xC9",
- "Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "RTM_RETIRED.START",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Number of times an RTM execution started."
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.OTHER.L3_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FFFC08000",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
},
{
+ "BriefDescription": "Counts hardware prefetch RFOs (which bring data to L2) that was not supplied by the L3 cache.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of times RTM commit succeeded.",
- "EventCode": "0xC9",
- "Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x2",
- "PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "RTM_RETIRED.COMMIT",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Number of times an RTM execution successfully committed"
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L2_RFO.L3_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FFFC00020",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
},
{
+ "BriefDescription": "Demand Data Read requests who miss L3 cache",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of times RTM abort was triggered.",
- "EventCode": "0xc9",
- "Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x4",
- "PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "RTM_RETIRED.ABORTED",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Number of times an RTM execution aborted.",
- "Data_LA": "1"
+ "Counter": "0,1,2,3",
+ "EventCode": "0xb0",
+ "EventName": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Demand Data Read requests who miss L3 cache.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x10"
},
{
+ "BriefDescription": "Cycles while L3 cache miss demand load is outstanding.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts).",
- "EventCode": "0xC9",
- "Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x8",
- "PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "RTM_RETIRED.ABORTED_MEM",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts)"
+ "Counter": "0,1,2,3",
+ "CounterMask": "2",
+ "EventCode": "0xA3",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L3_MISS",
+ "PEBScounters": "0,1,2,3",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x2"
},
{
+ "BriefDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of times an RTM execution aborted due to HLE-unfriendly instructions.",
- "EventCode": "0xC9",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x20",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc9",
"EventName": "RTM_RETIRED.ABORTED_UNFRIENDLY",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions"
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts the number of times an RTM execution aborted due to HLE-unfriendly instructions.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x20"
},
{
+ "BriefDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt)",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of times an RTM execution aborted due to incompatible memory type.",
- "EventCode": "0xC9",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x40",
+ "EventCode": "0xc9",
+ "EventName": "RTM_RETIRED.ABORTED_EVENTS",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "RTM_RETIRED.ABORTED_MEMTYPE",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Number of times an RTM execution aborted due to incompatible memory type"
+ "PublicDescription": "Counts the number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt).",
+ "SampleAfterValue": "100003",
+ "UMask": "0x80"
},
{
+ "BriefDescription": "Number of times an HLE execution started.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt).",
- "EventCode": "0xC9",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x80",
+ "EventCode": "0xc8",
+ "EventName": "HLE_RETIRED.START",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "RTM_RETIRED.ABORTED_EVENTS",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt)"
+ "PublicDescription": "Counts the number of times we entered an HLE region. Does not count nested transactions.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "PEBS": "2",
+ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles. Reported latency may be longer than just the memory latency.",
- "EventCode": "0xcd",
- "MSRValue": "0x4",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
"MSRIndex": "0x3F6",
+ "MSRValue": "0x4",
+ "PEBS": "2",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "100003",
- "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.",
- "TakenAlone": "1"
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "PEBS": "2",
+ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles. Reported latency may be longer than just the memory latency.",
- "EventCode": "0xcd",
- "MSRValue": "0x8",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x1",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x80",
+ "PEBS": "2",
"PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles. Reported latency may be longer than just the memory latency.",
+ "SampleAfterValue": "1009",
+ "TakenAlone": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Number of times HLE lock could not be elided due to ElisionBufferAvailable being zero.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x54",
+ "EventName": "TX_MEM.HLE_ELISION_BUFFER_FULL",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of times we could not allocate Lock Buffer.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x40"
+ },
+ {
+ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
"MSRIndex": "0x3F6",
+ "MSRValue": "0x8",
+ "PEBS": "2",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "50021",
- "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.",
- "TakenAlone": "1"
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "PEBS": "2",
+ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles. Reported latency may be longer than just the memory latency.",
- "EventCode": "0xcd",
- "MSRValue": "0x10",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
"MSRIndex": "0x3F6",
- "SampleAfterValue": "20011",
- "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.",
- "TakenAlone": "1"
+ "MSRValue": "0x100",
+ "PEBS": "2",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles. Reported latency may be longer than just the memory latency.",
+ "SampleAfterValue": "503",
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "PEBS": "2",
+ "BriefDescription": "Execution stalls while L3 cache miss demand load is outstanding.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "CounterMask": "6",
+ "EventCode": "0xa3",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS",
+ "PEBScounters": "0,1,2,3",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x6"
+ },
+ {
+ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles. Reported latency may be longer than just the memory latency.",
- "EventCode": "0xcd",
- "MSRValue": "0x20",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x1",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x40",
+ "PEBS": "2",
"PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles. Reported latency may be longer than just the memory latency.",
+ "SampleAfterValue": "2003",
+ "TakenAlone": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
"MSRIndex": "0x3F6",
+ "MSRValue": "0x20",
+ "PEBS": "2",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "100007",
- "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.",
- "TakenAlone": "1"
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "PEBS": "2",
+ "BriefDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts)",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles. Reported latency may be longer than just the memory latency.",
- "EventCode": "0xcd",
- "MSRValue": "0x40",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x1",
+ "EventCode": "0xc9",
+ "EventName": "RTM_RETIRED.ABORTED_MEM",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
- "MSRIndex": "0x3F6",
- "SampleAfterValue": "2003",
- "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.",
- "TakenAlone": "1"
+ "PublicDescription": "Counts the number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts).",
+ "SampleAfterValue": "100003",
+ "UMask": "0x8"
},
{
- "PEBS": "2",
+ "BriefDescription": "Counts L1 data cache prefetch requests and software prefetches (except PREFETCHW) that was not supplied by the L3 cache.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L1D_AND_SWPF.L3_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FFFC00400",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that was not supplied by the L3 cache.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_CODE_RD.L3_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FFFC00004",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Number of times an RTM execution aborted.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles. Reported latency may be longer than just the memory latency.",
- "EventCode": "0xcd",
- "MSRValue": "0x80",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x1",
+ "EventCode": "0xc9",
+ "EventName": "RTM_RETIRED.ABORTED",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
- "MSRIndex": "0x3F6",
- "SampleAfterValue": "1009",
- "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.",
- "TakenAlone": "1"
+ "PublicDescription": "Counts the number of times RTM abort was triggered.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x4"
},
{
- "PEBS": "2",
+ "BriefDescription": "Number of times an RTM execution started.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles. Reported latency may be longer than just the memory latency.",
- "EventCode": "0xcd",
- "MSRValue": "0x100",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x1",
+ "EventCode": "0xc9",
+ "EventName": "RTM_RETIRED.START",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
- "MSRIndex": "0x3F6",
- "SampleAfterValue": "503",
- "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.",
- "TakenAlone": "1"
+ "PublicDescription": "Counts the number of times we entered an RTM region. Does not count nested transactions.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "PEBS": "2",
+ "BriefDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles. Reported latency may be longer than just the memory latency.",
- "EventCode": "0xcd",
- "MSRValue": "0x200",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x1",
+ "EventCode": "0xc8",
+ "EventName": "HLE_RETIRED.ABORTED_UNFRIENDLY",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
- "MSRIndex": "0x3F6",
- "SampleAfterValue": "101",
- "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.",
- "TakenAlone": "1"
+ "PublicDescription": "Counts the number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).",
+ "SampleAfterValue": "100003",
+ "UMask": "0x20"
+ },
+ {
+ "BriefDescription": "Number of times a HLE transactional region aborted due to a non XRELEASE prefixed instruction writing to an elided lock in the elision buffer",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x54",
+ "EventName": "TX_MEM.ABORT_HLE_STORE_TO_ELIDED_LOCK",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of times a TSX Abort was triggered due to a non-release/commit store to lock.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x4"
}
] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/icelake/other.json b/tools/perf/pmu-events/arch/x86/icelake/other.json
index f8dfdb847224..a806b00f8616 100644
--- a/tools/perf/pmu-events/arch/x86/icelake/other.json
+++ b/tools/perf/pmu-events/arch/x86/icelake/other.json
@@ -1,121 +1,1090 @@
[
{
+ "BriefDescription": "Counts miscellaneous requests, such as I/O and un-cacheable accesses that hit a cacheline in the L3 where a snoop was not needed to satisfy the request.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of available slots for an unhalted logical processor. The event increments by machine-width of the narrowest pipeline as employed by the Top-down Microarchitecture Analysis method. The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core. Software can use this event as the denominator for the top-level metrics of the Top-down Microarchitecture Analysis method. This event is counted on a designated fixed counter (Fixed Counter 3) and is an architectural event.",
- "Counter": "35",
- "UMask": "0x4",
- "PEBScounters": "35",
- "EventName": "TOPDOWN.SLOTS",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.OTHER.L3_HIT.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x01003C8000",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts hardware prefetch RFOs (which bring data to L2) that DRAM supplied the request.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L2_RFO.DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0184000020",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts miscellaneous requests, such as I/O and un-cacheable accesses that DRAM supplied the request.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.OTHER.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0184008000",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts hardware prefetch data reads (which bring data to L2) that DRAM supplied the request.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L2_DATA_RD.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0184000010",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand reads for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that hit a cacheline in the L3 where a snoop hit in another cores caches, data forwarding is required as the data is modified.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10003C0002",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Number of PREFETCHNTA instructions executed.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x32",
+ "EventName": "SW_PREFETCH_ACCESS.NTA",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of PREFETCHNTA instructions executed.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand reads for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that hit a cacheline in the L3 where a snoop was sent but no other cores had the data.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_RFO.L3_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x02003C0002",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that have any type of response.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_CODE_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0000010004",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts L1 data cache prefetch requests and software prefetches (except PREFETCHW) that hit a cacheline in the L3 where a snoop was sent or not.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L1D_AND_SWPF.L3_HIT.ANY",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FC03C0400",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts miscellaneous requests, such as I/O and un-cacheable accesses that DRAM supplied the request.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.OTHER.DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0184008000",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand reads for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that have any type of response.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_RFO.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0000010002",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts miscellaneous requests, such as I/O and un-cacheable accesses that hit a cacheline in the L3 where a snoop was sent but no other cores had the data.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.OTHER.L3_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x02003C8000",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand reads for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that hit a cacheline in the L3 where a snoop was not needed to satisfy the request.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_RFO.L3_HIT.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x01003C0002",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "TMA slots wasted due to incorrect speculation by branch mispredictions",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xa4",
+ "EventName": "TOPDOWN.BR_MISPREDICT_SLOTS",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Number of TMA slots that were wasted due to incorrect speculation by branch mispredictions. This event estimates number of operations that were issued but not retired from the specualtive path as well as the out-of-order engine recovery past a branch misprediction.",
"SampleAfterValue": "10000003",
- "BriefDescription": "Counts the number of available slots for an unhalted logical processor."
+ "Speculative": "1",
+ "UMask": "0x8"
},
{
+ "BriefDescription": "Counts hardware prefetch data reads (which bring data to L2) that hit a cacheline in the L3 where a snoop was not needed to satisfy the request.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts Core cycles where the core was running with power-delivery for baseline license level 0. This includes non-AVX codes, SSE, AVX 128-bit, and low-current AVX 256-bit codes.",
- "EventCode": "0x28",
"Counter": "0,1,2,3",
- "UMask": "0x7",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L2_DATA_RD.L3_HIT.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x01003C0010",
+ "Offcore": "1",
"PEBScounters": "0,1,2,3",
- "EventName": "CORE_POWER.LVL0_TURBO_LICENSE",
- "SampleAfterValue": "200003",
- "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the Non-AVX turbo schedule."
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
},
{
+ "BriefDescription": "Counts streaming stores that have any type of response.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.STREAMING_WR.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0000010800",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts streaming stores that DRAM supplied the request.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts Core cycles where the core was running with power-delivery for license level 1. This includes high current AVX 256-bit instructions as well as low current AVX 512-bit instructions.",
- "EventCode": "0x28",
"Counter": "0,1,2,3",
- "UMask": "0x18",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.STREAMING_WR.DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0184000800",
+ "Offcore": "1",
"PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts hardware prefetch RFOs (which bring data to L2) that have any type of response.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L2_RFO.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0000010020",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand data reads that hit a cacheline in the L3 where a snoop hit in another cores caches, data forwarding is required as the data is modified.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10003C0001",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts hardware prefetch data reads (which bring data to L2) that DRAM supplied the request.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L2_DATA_RD.DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0184000010",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts miscellaneous requests, such as I/O and un-cacheable accesses that hit a cacheline in the L3 where a snoop was sent.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.OTHER.L3_HIT.SNOOP_SENT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x1E003C8000",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts hardware prefetch data reads (which bring data to L2) that have any type of response.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L2_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0000010010",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts hardware prefetch data reads (which bring data to L2) that hit a cacheline in the L3 where a snoop was sent or not.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L2_DATA_RD.L3_HIT.ANY",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FC03C0010",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX2 turbo schedule.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x28",
"EventName": "CORE_POWER.LVL1_TURBO_LICENSE",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts Core cycles where the core was running with power-delivery for license level 1. This includes high current AVX 256-bit instructions as well as low current AVX 512-bit instructions.",
"SampleAfterValue": "200003",
- "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX2 turbo schedule."
+ "Speculative": "1",
+ "UMask": "0x18"
},
{
+ "BriefDescription": "Counts demand data reads that hit a cacheline in the L3 where a snoop was sent but no other cores had the data.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Core cycles where the core was running with power-delivery for license level 2 (introduced in Skylake Server microarchtecture). This includes high current AVX 512-bit instructions.",
- "EventCode": "0x28",
"Counter": "0,1,2,3",
- "UMask": "0x20",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x02003C0001",
+ "Offcore": "1",
"PEBScounters": "0,1,2,3",
- "EventName": "CORE_POWER.LVL2_TURBO_LICENSE",
- "SampleAfterValue": "200003",
- "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX512 turbo schedule."
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
},
{
+ "BriefDescription": "Counts demand data reads that hit a cacheline in the L3 where a snoop was sent.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of PREFETCHNTA instructions executed.",
- "EventCode": "0x32",
"Counter": "0,1,2,3",
- "UMask": "0x1",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_SENT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x1E003C0001",
+ "Offcore": "1",
"PEBScounters": "0,1,2,3",
- "EventName": "SW_PREFETCH_ACCESS.NTA",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Number of PREFETCHNTA instructions executed."
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
},
{
+ "BriefDescription": "Counts hardware prefetch data reads (which bring data to L2) that hit a cacheline in the L3 where a snoop hit in another cores caches, data forwarding is required as the data is modified.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of PREFETCHT0 instructions executed.",
- "EventCode": "0x32",
"Counter": "0,1,2,3",
- "UMask": "0x2",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L2_DATA_RD.L3_HIT.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10003C0010",
+ "Offcore": "1",
"PEBScounters": "0,1,2,3",
- "EventName": "SW_PREFETCH_ACCESS.T0",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Number of PREFETCHT0 instructions executed."
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
},
{
+ "BriefDescription": "Counts demand data reads that DRAM supplied the request.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of PREFETCHT1 or PREFETCHT2 instructions executed.",
- "EventCode": "0x32",
"Counter": "0,1,2,3",
- "UMask": "0x4",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_DATA_RD.DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0184000001",
+ "Offcore": "1",
"PEBScounters": "0,1,2,3",
- "EventName": "SW_PREFETCH_ACCESS.T1_T2",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Number of PREFETCHT1 or PREFETCHT2 instructions executed."
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
},
{
+ "BriefDescription": "Counts streaming stores that DRAM supplied the request.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of PREFETCHW instructions executed.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.STREAMING_WR.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0184000800",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that DRAM supplied the request.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_CODE_RD.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0184000004",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that hit a cacheline in the L3 where a snoop was not needed to satisfy the request.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_CODE_RD.L3_HIT.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x01003C0004",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts hardware prefetch RFOs (which bring data to L2) that DRAM supplied the request.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L2_RFO.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0184000020",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that DRAM supplied the request.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_CODE_RD.DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0184000004",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand reads for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that hit a cacheline in the L3 where a snoop hit in another core, data forwarding is not required.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_RFO.L3_HIT.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x04003C0002",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts L1 data cache prefetch requests and software prefetches (except PREFETCHW) that hit a cacheline in the L3 where a snoop was sent but no other cores had the data.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L1D_AND_SWPF.L3_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x02003C0400",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts hardware prefetch RFOs (which bring data to L2) that hit a cacheline in the L3 where a snoop was sent.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L2_RFO.L3_HIT.SNOOP_SENT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x1E003C0020",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts hardware prefetch data reads (which bring data to L2) that hit a cacheline in the L3 where a snoop hit in another core, data forwarding is not required.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L2_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x04003C0010",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand data reads that hit a cacheline in the L3 where a snoop hit in another core, data forwarding is not required.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x04003C0001",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts hardware prefetch RFOs (which bring data to L2) that hit a cacheline in the L3 where a snoop was sent or not.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L2_RFO.L3_HIT.ANY",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FC03C0020",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand data reads that hit a cacheline in the L3 where a snoop was not needed to satisfy the request.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x01003C0001",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts miscellaneous requests, such as I/O and un-cacheable accesses that hit a cacheline in the L3 where a snoop hit in another core, data forwarding is not required.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.OTHER.L3_HIT.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x04003C8000",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "TMA slots available for an unhalted logical processor. Fixed counter - architectural event",
+ "CollectPEBSRecord": "2",
+ "Counter": "35",
+ "EventName": "TOPDOWN.SLOTS",
+ "PEBScounters": "35",
+ "PublicDescription": "Number of available slots for an unhalted logical processor. The event increments by machine-width of the narrowest pipeline as employed by the Top-down Microarchitecture Analysis method (TMA). The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core. Software can use this event as the denominator for the top-level metrics of the TMA method. This architectural event is counted on a designated fixed counter (Fixed Counter 3).",
+ "SampleAfterValue": "10000003",
+ "Speculative": "1",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Number of PREFETCHT1 or PREFETCHT2 instructions executed.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
"EventCode": "0x32",
+ "EventName": "SW_PREFETCH_ACCESS.T1_T2",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of PREFETCHT1 or PREFETCHT2 instructions executed.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Counts demand data reads that hit a cacheline in the L3 where a snoop was sent or not.",
+ "CollectPEBSRecord": "2",
"Counter": "0,1,2,3",
- "UMask": "0x8",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.ANY",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FC03C0001",
+ "Offcore": "1",
"PEBScounters": "0,1,2,3",
- "EventName": "SW_PREFETCH_ACCESS.PREFETCHW",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Number of PREFETCHW instructions executed."
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
},
{
+ "BriefDescription": "Counts L1 data cache prefetch requests and software prefetches (except PREFETCHW) that DRAM supplied the request.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L1D_AND_SWPF.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0184000400",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "TMA slots where no uops were being issued due to lack of back-end resources.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of available slots for an unhalted logical processor. The event increments by machine-width of the narrowest pipeline as employed by the Top-down Microarchitecture Analysis method. The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core.",
- "EventCode": "0xa4",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x1",
+ "EventCode": "0xa4",
+ "EventName": "TOPDOWN.BACKEND_BOUND_SLOTS",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "TOPDOWN.SLOTS_P",
+ "PublicDescription": "Counts the number of Top-down Microarchitecture Analysis (TMA) method's slots where no micro-operations were being issued from front-end to back-end of the machine due to lack of back-end resources.",
"SampleAfterValue": "10000003",
- "BriefDescription": "Counts the number of available slots for an unhalted logical processor."
+ "Speculative": "1",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Counts hardware prefetch RFOs (which bring data to L2) that hit a cacheline in the L3 where a snoop was not needed to satisfy the request.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L2_RFO.L3_HIT.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x01003C0020",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Number of PREFETCHT0 instructions executed.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x32",
+ "EventName": "SW_PREFETCH_ACCESS.T0",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of PREFETCHT0 instructions executed.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that hit a cacheline in the L3 where a snoop hit in another cores caches, data forwarding is required as the data is modified.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_CODE_RD.L3_HIT.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10003C0004",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand data reads that have any type of response.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0000010001",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that hit a cacheline in the L3 where a snoop hit in another core, data forwarding is not required.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x04003C0004",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts hardware prefetch RFOs (which bring data to L2) that hit a cacheline in the L3 where a snoop hit in another cores caches, data forwarding is required as the data is modified.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L2_RFO.L3_HIT.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10003C0020",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts miscellaneous requests, such as I/O and un-cacheable accesses that have any type of response.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.OTHER.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0000018000",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
},
{
+ "BriefDescription": "Number of PREFETCHW instructions executed.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x32",
+ "EventName": "SW_PREFETCH_ACCESS.PREFETCHW",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of PREFETCHW instructions executed.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "Counts hardware prefetch data reads (which bring data to L2) that hit a cacheline in the L3 where a snoop was sent.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L2_DATA_RD.L3_HIT.SNOOP_SENT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x1E003C0010",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts L1 data cache prefetch requests and software prefetches (except PREFETCHW) that DRAM supplied the request.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L1D_AND_SWPF.DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0184000400",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand reads for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that DRAM supplied the request.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_RFO.DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0184000002",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Number of occurrences where a microcode assist is invoked by hardware.",
"CollectPEBSRecord": "2",
- "EventCode": "0xA4",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x2",
+ "EventCode": "0xc1",
+ "EventName": "ASSISTS.ANY",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "TOPDOWN.BACKEND_BOUND_SLOTS",
- "SampleAfterValue": "10000003",
- "BriefDescription": "Issue slots where no uops were being issued due to lack of back end resources."
+ "PublicDescription": "Counts the number of occurrences where a microcode assist is invoked by hardware Examples include AD (page Access Dirty), FP and AVX related assists.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x7"
},
{
+ "BriefDescription": "Counts hardware prefetch data reads (which bring data to L2) that hit a cacheline in the L3 where a snoop was sent but no other cores had the data.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L2_DATA_RD.L3_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x02003C0010",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that hit a cacheline in the L3 where a snoop was sent or not.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_CODE_RD.L3_HIT.ANY",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FC03C0004",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX512 turbo schedule.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x28",
+ "EventName": "CORE_POWER.LVL2_TURBO_LICENSE",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Core cycles where the core was running with power-delivery for license level 2 (introduced in Skylake Server microarchtecture). This includes high current AVX 512-bit instructions.",
+ "SampleAfterValue": "200003",
+ "Speculative": "1",
+ "UMask": "0x20"
+ },
+ {
+ "BriefDescription": "Counts streaming stores that hit a cacheline in the L3 where a snoop was sent or not.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.STREAMING_WR.L3_HIT.ANY",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FC03C0800",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts hardware prefetch RFOs (which bring data to L2) that hit a cacheline in the L3 where a snoop was sent but no other cores had the data.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L2_RFO.L3_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x02003C0020",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand data reads that DRAM supplied the request.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_DATA_RD.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0184000001",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that hit a cacheline in the L3 where a snoop was sent.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_CODE_RD.L3_HIT.SNOOP_SENT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x1E003C0004",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand reads for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that hit a cacheline in the L3 where a snoop was sent or not.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_RFO.L3_HIT.ANY",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FC03C0002",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts hardware prefetches to the L3 only that hit a cacheline in the L3 where a snoop was sent or not.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L3.L3_HIT.ANY",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FC03C2380",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "TMA slots available for an unhalted logical processor. General counter - architectural event",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of occurrences where a microcode assist is invoked by hardware Examples include AD (page Access Dirty), FP and AVX related assists.",
- "EventCode": "0xc1",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x7",
+ "EventCode": "0xa4",
+ "EventName": "TOPDOWN.SLOTS_P",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "ASSISTS.ANY",
+ "PublicDescription": "Counts the number of available slots for an unhalted logical processor. The event increments by machine-width of the narrowest pipeline as employed by the Top-down Microarchitecture Analysis method. The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core.",
+ "SampleAfterValue": "10000003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts L1 data cache prefetch requests and software prefetches (except PREFETCHW) that hit a cacheline in the L3 where a snoop was not needed to satisfy the request.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L1D_AND_SWPF.L3_HIT.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x01003C0400",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand reads for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that hit a cacheline in the L3 where a snoop was sent.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_RFO.L3_HIT.SNOOP_SENT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x1E003C0002",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts L1 data cache prefetch requests and software prefetches (except PREFETCHW) that have any type of response.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L1D_AND_SWPF.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0000010400",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand reads for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that DRAM supplied the request.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_RFO.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0184000002",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the Non-AVX turbo schedule.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x28",
+ "EventName": "CORE_POWER.LVL0_TURBO_LICENSE",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts Core cycles where the core was running with power-delivery for baseline license level 0. This includes non-AVX codes, SSE, AVX 128-bit, and low-current AVX 256-bit codes.",
+ "SampleAfterValue": "200003",
+ "Speculative": "1",
+ "UMask": "0x7"
+ },
+ {
+ "BriefDescription": "Counts hardware prefetch RFOs (which bring data to L2) that hit a cacheline in the L3 where a snoop hit in another core, data forwarding is not required.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L2_RFO.L3_HIT.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x04003C0020",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that hit a cacheline in the L3 where a snoop was sent but no other cores had the data.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_CODE_RD.L3_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x02003C0004",
+ "Offcore": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "BriefDescription": "Number of occurrences where a microcode assist is invoked by hardware."
+ "Speculative": "1",
+ "UMask": "0x1"
}
] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/icelake/pipeline.json b/tools/perf/pmu-events/arch/x86/icelake/pipeline.json
index 6d8311e634aa..4f4ce309c2f8 100644
--- a/tools/perf/pmu-events/arch/x86/icelake/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/icelake/pipeline.json
@@ -1,892 +1,1035 @@
[
{
+ "BriefDescription": "Mispredicted indirect CALL instructions retired.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of X86 instructions retired - an Architectural PerfMon event. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter freeing up programmable counters to count other events. INST_RETIRED.ANY_P is counted by a programmable counter.",
- "Counter": "32",
- "UMask": "0x1",
- "PEBScounters": "32",
- "EventName": "INST_RETIRED.ANY",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Number of instructions retired. Fixed Counter - architectural event"
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.INDIRECT_CALL",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts retired mispredicted indirect (near taken) CALL instructions, including both register and memory indirect.",
+ "SampleAfterValue": "50021",
+ "UMask": "0x2"
},
{
- "PEBS": "2",
- "CollectPEBSRecord": "3",
- "PublicDescription": "A version of INST_RETIRED that allows for a more unbiased distribution of samples across instructions retired. It utilizes the Precise Distribution of Instructions Retired (PDIR) feature to mitigate some bias in how retired instructions get sampled. Use on Fixed Counter 0.",
- "Counter": "32",
- "UMask": "0x1",
- "PEBScounters": "32",
- "EventName": "INST_RETIRED.PREC_DIST",
+ "BriefDescription": "Number of uops executed on the core.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xB1",
+ "EventName": "UOPS_EXECUTED.CORE",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts the number of uops executed from any thread.",
"SampleAfterValue": "2000003",
- "BriefDescription": "Precise instruction retired event with a reduced effect of PEBS shadow in IP distribution"
+ "Speculative": "1",
+ "UMask": "0x2"
},
{
+ "BriefDescription": "Number of uops executed on port 4 and 9",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.",
- "Counter": "33",
- "UMask": "0x2",
- "PEBScounters": "33",
- "EventName": "CPU_CLK_UNHALTED.THREAD",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xa1",
+ "EventName": "UOPS_DISPATCHED.PORT_4_9",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to ports 5 and 9.",
"SampleAfterValue": "2000003",
- "BriefDescription": "Core cycles when the thread is not in halt state"
+ "Speculative": "1",
+ "UMask": "0x10"
},
{
+ "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.",
- "Counter": "34",
- "UMask": "0x3",
- "PEBScounters": "34",
- "EventName": "CPU_CLK_UNHALTED.REF_TSC",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xb1",
+ "EventName": "UOPS_EXECUTED.THREAD",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
- "BriefDescription": "Reference cycles when the core is not in halt state."
+ "Speculative": "1",
+ "UMask": "0x1"
},
{
+ "BriefDescription": "Not taken branch instructions retired.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of times the load operation got the true Block-on-Store blocking code preventing store forwarding. This includes cases when: a. preceding store conflicts with the load (incomplete overlap),b. store forwarding is impossible due to u-arch limitations, c. preceding lock RMW operations are not forwarded, d. store has the no-forward bit set (uncacheable/page-split/masked stores), e. all-blocking stores are used (mostly, fences and port I/O), and others. The most common case is a load blocked due to its address range overlapping with a preceding smaller uncompleted store. Note: This event does not take into account cases of out-of-SW-control (for example, SbTailHit), unknown physical STA, and cases of blocking loads on store due to being non-WB memory type or a lock. These cases are covered by other events. See the table of not supported store forwards in the Optimization Guide.",
- "EventCode": "0x03",
- "Counter": "0,1,2,3",
- "UMask": "0x2",
- "PEBScounters": "0,1,2,3",
- "EventName": "LD_BLOCKS.STORE_FORWARD",
- "SampleAfterValue": "100003",
- "BriefDescription": "Loads blocked by overlapping with store buffer that cannot be forwarded."
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.COND_NTAKEN",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts not taken branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "UMask": "0x10"
},
{
+ "BriefDescription": "Uops inserted at issue-stage in order to preserve upper bits of vector registers.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
- "EventCode": "0x03",
- "Counter": "0,1,2,3",
- "UMask": "0x8",
- "PEBScounters": "0,1,2,3",
- "EventName": "LD_BLOCKS.NO_SR",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x0e",
+ "EventName": "UOPS_ISSUED.VECTOR_WIDTH_MISMATCH",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts the number of Blend Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS) in order to preserve upper bits of vector registers. Starting with the Skylake microarchitecture, these Blend uops are needed since every Intel SSE instruction executed in Dirty Upper State needs to preserve bits 128-255 of the destination register. For more information, refer to Mixing Intel AVX and Intel SSE Code section of the Optimization Guide.",
"SampleAfterValue": "100003",
- "BriefDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use."
+ "Speculative": "1",
+ "UMask": "0x2"
},
{
+ "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of times a load got blocked due to false dependencies in MOB due to partial compare on address.",
- "EventCode": "0x07",
- "Counter": "0,1,2,3",
- "UMask": "0x1",
- "PEBScounters": "0,1,2,3",
- "EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0xB1",
+ "EventName": "UOPS_EXECUTED.STALL_CYCLES",
+ "Invert": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "All indirect branch instructions retired (excluding RETs. TSX aborts are considered indirect branch).",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.INDIRECT",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts all indirect branch instructions retired (excluding RETs. TSX aborts is considered indirect branch).",
"SampleAfterValue": "100003",
- "BriefDescription": "False dependencies in MOB due to partial compare on address."
+ "UMask": "0x80"
},
{
+ "BriefDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station was not empty.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts core cycles when the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event.",
- "EventCode": "0x0D",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x1",
+ "EventCode": "0xa6",
+ "EventName": "EXE_ACTIVITY.4_PORTS_UTIL",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "INT_MISC.RECOVERY_CYCLES",
+ "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty.",
"SampleAfterValue": "2000003",
- "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread"
+ "Speculative": "1",
+ "UMask": "0x10"
},
{
+ "BriefDescription": "Number of uops executed on port 2 and 3",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts cycles the Backend cluster is recovering after a miss-speculation or a Store Buffer or Load Buffer drain stall.",
- "EventCode": "0x0D",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x3",
+ "EventCode": "0xa1",
+ "EventName": "UOPS_DISPATCHED.PORT_2_3",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "INT_MISC.ALL_RECOVERY_CYCLES",
+ "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to ports 2 and 3.",
"SampleAfterValue": "2000003",
- "BriefDescription": "Cycles the Backend cluster is recovering after a miss-speculation or a Store Buffer or Load Buffer drain stall.",
- "CounterMask": "1"
+ "Speculative": "1",
+ "UMask": "0x4"
},
{
+ "BriefDescription": "Taken branch instructions retired.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Cycles after recovery from a branch misprediction or machine clear till the first uop is issued from the resteered path.",
- "EventCode": "0x0d",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x80",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
+ "PEBS": "1",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "INT_MISC.CLEAR_RESTEER_CYCLES",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Counts cycles after recovery from a branch misprediction or machine clear till the first uop is issued from the resteered path."
+ "PublicDescription": "Counts taken branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "UMask": "0x20"
},
{
+ "BriefDescription": "Counts the number of demand load dispatches that hit L1D fill buffer (FB) allocated for software prefetch.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x4c",
+ "EventName": "LOAD_HIT_PREFETCH.SWPF",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Number of uops executed on port 1",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).",
- "EventCode": "0x0E",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x1",
+ "EventCode": "0xa1",
+ "EventName": "UOPS_DISPATCHED.PORT_1",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "UOPS_ISSUED.ANY",
+ "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 1.",
"SampleAfterValue": "2000003",
- "BriefDescription": "Uops that RAT issues to RS"
+ "Speculative": "1",
+ "UMask": "0x2"
},
{
+ "BriefDescription": "Number of Uops delivered by the LSD.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xa8",
+ "EventName": "LSD.UOPS",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of uops delivered to the back-end by the LSD(Loop Stream Detector).",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Number of uops executed on port 5",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.",
- "EventCode": "0x0E",
- "Invert": "1",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x1",
+ "EventCode": "0xa1",
+ "EventName": "UOPS_DISPATCHED.PORT_5",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "UOPS_ISSUED.STALL_CYCLES",
+ "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 5.",
"SampleAfterValue": "2000003",
- "BriefDescription": "Cycles when RAT does not issue Uops to RS for the thread",
- "CounterMask": "1"
+ "Speculative": "1",
+ "UMask": "0x20"
},
{
+ "BriefDescription": "Number of uops executed on port 6",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.",
- "EventCode": "0x14",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x9",
+ "EventCode": "0xa1",
+ "EventName": "UOPS_DISPATCHED.PORT_6",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "ARITH.DIVIDER_ACTIVE",
+ "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 6.",
"SampleAfterValue": "2000003",
- "BriefDescription": "Cycles when divide unit is busy executing divide or square root operations.",
- "CounterMask": "1"
+ "Speculative": "1",
+ "UMask": "0x40"
},
{
+ "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "CounterMask": "1",
+ "EventCode": "0xA8",
+ "EventName": "LSD.CYCLES_ACTIVE",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector).",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread",
"CollectPEBSRecord": "2",
- "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.",
- "EventCode": "0x3C",
"Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x0D",
+ "EventName": "INT_MISC.RECOVERY_CYCLES",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "CPU_CLK_UNHALTED.THREAD_P",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Thread cycles when thread is not in halt state"
+ "PublicDescription": "Counts core cycles when the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event.",
+ "SampleAfterValue": "500009",
+ "Speculative": "1",
+ "UMask": "0x1"
},
{
+ "BriefDescription": "Cycles where the Store Buffer was full and no loads caused an execution stall.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts core crystal clock cycles when the thread is unhalted.",
- "EventCode": "0x3C",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x1",
+ "CounterMask": "2",
+ "EventCode": "0xA6",
+ "EventName": "EXE_ACTIVITY.BOUND_ON_STORES",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "CPU_CLK_UNHALTED.REF_XCLK",
- "SampleAfterValue": "25003",
- "BriefDescription": "Core crystal clock cycles when the thread is unhalted."
+ "PublicDescription": "Counts cycles where the Store Buffer was full and no loads caused an execution stall.",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x40"
},
{
+ "BriefDescription": "Core crystal clock cycles when the thread is unhalted.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts Core crystal clock cycles when current thread is unhalted and the other thread is halted.",
- "EventCode": "0x3C",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x2",
+ "EventCode": "0x3C",
+ "EventName": "CPU_CLK_UNHALTED.REF_XCLK",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
+ "PublicDescription": "Counts core crystal clock cycles when the thread is unhalted.",
"SampleAfterValue": "25003",
- "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted."
+ "Speculative": "1",
+ "UMask": "0x1"
},
{
+ "BriefDescription": "Stalls caused by changing prefix length of the instruction.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.",
- "EventCode": "0x4c",
"Counter": "0,1,2,3",
- "UMask": "0x1",
+ "EventCode": "0x87",
+ "EventName": "ILD_STALL.LCP",
"PEBScounters": "0,1,2,3",
- "EventName": "LOAD_HIT_PREFETCH.SWPF",
- "SampleAfterValue": "100003",
- "BriefDescription": "Counts the number of demand load dispatches that hit L1D fill buffer (FB) allocated for software prefetch."
+ "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.",
+ "SampleAfterValue": "500009",
+ "Speculative": "1",
+ "UMask": "0x1"
},
{
+ "BriefDescription": "False dependencies in MOB due to partial compare on address.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for this logical processor. This is usually caused when the front-end pipeline runs into stravation periods (e.g. branch mispredictions or i-cache misses)",
- "EventCode": "0x5E",
- "Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "RS_EVENTS.EMPTY_CYCLES",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread"
+ "Counter": "0,1,2,3",
+ "EventCode": "0x07",
+ "EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of times a load got blocked due to false dependencies in MOB due to partial compare on address.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
},
{
+ "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to closely sample on front-end latency issues (see the FRONTEND_RETIRED event of designated precise events)",
- "EventCode": "0x5E",
- "Invert": "1",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x1",
+ "EventCode": "0x5e",
+ "EventName": "RS_EVENTS.EMPTY_CYCLES",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "RS_EVENTS.EMPTY_END",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty.",
- "CounterMask": "1",
- "EdgeDetect": "1"
+ "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for this logical processor. This is usually caused when the front-end pipeline runs into stravation periods (e.g. branch mispredictions or i-cache misses)",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x1"
},
{
+ "BriefDescription": "Loads blocked due to overlapping with a preceding store that cannot be forwarded.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.",
- "EventCode": "0x87",
"Counter": "0,1,2,3",
- "UMask": "0x1",
+ "EventCode": "0x03",
+ "EventName": "LD_BLOCKS.STORE_FORWARD",
"PEBScounters": "0,1,2,3",
- "EventName": "ILD_STALL.LCP",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Stalls caused by changing prefix length of the instruction."
+ "PublicDescription": "Counts the number of times where store forwarding was prevented for a load operation. The most common case is a load blocked due to the address of memory access (partially) overlapping with a preceding uncompleted store. Note: See the table of not supported store forwards in the Optimization Guide.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x2"
},
{
+ "BriefDescription": "Cycles without actually retired uops.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 0.",
- "EventCode": "0xa1",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x1",
+ "CounterMask": "1",
+ "EventCode": "0xc2",
+ "EventName": "UOPS_RETIRED.STALL_CYCLES",
+ "Invert": "1",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "UOPS_DISPATCHED.PORT_0",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Number of uops executed on port 0"
+ "PublicDescription": "This event counts cycles without actually retired uops.",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x2"
},
{
+ "BriefDescription": "Far branch instructions retired.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 1.",
- "EventCode": "0xa1",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x2",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+ "PEBS": "1",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "UOPS_DISPATCHED.PORT_1",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Number of uops executed on port 1"
+ "PublicDescription": "Counts far branch instructions retired.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x40"
},
{
+ "BriefDescription": "Cycles while memory subsystem has an outstanding load.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to ports 2 and 3.",
- "EventCode": "0xa1",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x4",
+ "CounterMask": "16",
+ "EventCode": "0xA3",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "UOPS_DISPATCHED.PORT_2_3",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x10"
+ },
+ {
+ "BriefDescription": "Number of instructions retired. Fixed Counter - architectural event",
+ "CollectPEBSRecord": "2",
+ "Counter": "32",
+ "EventName": "INST_RETIRED.ANY",
+ "PEBS": "1",
+ "PEBScounters": "32",
+ "PublicDescription": "Counts the number of X86 instructions retired - an Architectural PerfMon event. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter freeing up programmable counters to count other events. INST_RETIRED.ANY_P is counted by a programmable counter.",
"SampleAfterValue": "2000003",
- "BriefDescription": "Number of uops executed on port 2 and 3"
+ "UMask": "0x1"
},
{
+ "BriefDescription": "Counts cycles where the pipeline is stalled due to serializing operations.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to ports 5 and 9.",
- "EventCode": "0xa1",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x10",
+ "EventCode": "0xa2",
+ "EventName": "RESOURCE_STALLS.SCOREBOARD",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "UOPS_DISPATCHED.PORT_4_9",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Number of uops executed on port 4 and 9"
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x2"
},
{
+ "BriefDescription": "Increments whenever there is an update to the LBR array.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 5.",
- "EventCode": "0xa1",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x20",
+ "EventCode": "0xcc",
+ "EventName": "MISC_RETIRED.LBR_INSERTS",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "UOPS_DISPATCHED.PORT_5",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Number of uops executed on port 5"
+ "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x20"
},
{
+ "BriefDescription": "Number of instructions retired. General Counter - architectural event",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 6.",
- "EventCode": "0xa1",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x40",
+ "EventCode": "0xc0",
+ "EventName": "INST_RETIRED.ANY_P",
+ "PEBS": "1",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "UOPS_DISPATCHED.PORT_6",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Number of uops executed on port 6"
+ "PublicDescription": "Counts the number of X86 instructions retired - an Architectural PerfMon event. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter freeing up programmable counters to count other events. INST_RETIRED.ANY_P is counted by a programmable counter.",
+ "SampleAfterValue": "2000003"
},
{
+ "BriefDescription": "Counts the number of x87 uops dispatched.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to ports 7 and 8.",
- "EventCode": "0xa1",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x80",
+ "EventCode": "0xB1",
+ "EventName": "UOPS_EXECUTED.X87",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "UOPS_DISPATCHED.PORT_7_8",
+ "PublicDescription": "Counts the number of x87 uops executed.",
"SampleAfterValue": "2000003",
- "BriefDescription": "Number of uops executed on port 7 and 8"
+ "Speculative": "1",
+ "UMask": "0x10"
},
{
+ "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.",
"CollectPEBSRecord": "2",
- "EventCode": "0xa2",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x2",
+ "CounterMask": "2",
+ "EventCode": "0xB1",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "RESOURCE_STALLS.SCOREBOARD",
+ "PublicDescription": "Counts cycles when at least 2 micro-ops are executed from any thread on physical core.",
"SampleAfterValue": "2000003",
- "BriefDescription": "Counts cycles where the pipeline is stalled due to serializing operations."
+ "Speculative": "1",
+ "UMask": "0x2"
},
{
+ "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.",
- "EventCode": "0xA2",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x8",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xa2",
"EventName": "RESOURCE_STALLS.SB",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync)."
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x8"
},
{
+ "BriefDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
"CollectPEBSRecord": "2",
- "EventCode": "0xA3",
"Counter": "0,1,2,3",
- "UMask": "0x1",
+ "EventCode": "0x03",
+ "EventName": "LD_BLOCKS.NO_SR",
"PEBScounters": "0,1,2,3",
- "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.",
- "CounterMask": "1"
+ "PublicDescription": "Counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x8"
},
{
+ "BriefDescription": "Number of machine clears (nukes) of any type.",
"CollectPEBSRecord": "2",
- "EventCode": "0xA3",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x4",
+ "CounterMask": "1",
+ "EdgeDetect": "1",
+ "EventCode": "0xc3",
+ "EventName": "MACHINE_CLEARS.COUNT",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Total execution stalls.",
- "CounterMask": "4"
+ "PublicDescription": "Counts the number of machine clears (nukes) of any type.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
},
{
+ "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken.",
"CollectPEBSRecord": "2",
- "EventCode": "0xA3",
- "Counter": "0,1,2,3",
- "UMask": "0x5",
- "PEBScounters": "0,1,2,3",
- "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.",
- "CounterMask": "5"
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts number of near branch instructions retired that were mispredicted and taken.",
+ "SampleAfterValue": "50021",
+ "UMask": "0x20"
},
{
+ "BriefDescription": "Return instructions retired.",
"CollectPEBSRecord": "2",
- "EventCode": "0xA3",
- "Counter": "0,1,2,3",
- "UMask": "0x8",
- "PEBScounters": "0,1,2,3",
- "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.",
- "CounterMask": "8"
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.NEAR_RETURN",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts return instructions retired.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x8"
},
{
+ "BriefDescription": "Cycles when divide unit is busy executing divide or square root operations.",
"CollectPEBSRecord": "2",
- "EventCode": "0xA3",
- "Counter": "0,1,2,3",
- "UMask": "0xc",
- "PEBScounters": "0,1,2,3",
- "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.",
- "CounterMask": "12"
+ "Counter": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0x14",
+ "EventName": "ARITH.DIVIDER_ACTIVE",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x9"
},
{
+ "BriefDescription": "Cycles total of 1 uop is executed on all ports and Reservation Station was not empty.",
"CollectPEBSRecord": "2",
- "EventCode": "0xA3",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x10",
+ "EventCode": "0xa6",
+ "EventName": "EXE_ACTIVITY.1_PORTS_UTIL",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY",
+ "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty.",
"SampleAfterValue": "2000003",
- "BriefDescription": "Cycles while memory subsystem has an outstanding load.",
- "CounterMask": "16"
+ "Speculative": "1",
+ "UMask": "0x2"
},
{
+ "BriefDescription": "Cycles without actually retired instructions.",
"CollectPEBSRecord": "2",
- "EventCode": "0xA3",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x14",
+ "CounterMask": "1",
+ "EventCode": "0xc0",
+ "EventName": "INST_RETIRED.STALL_CYCLES",
+ "Invert": "1",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.",
- "CounterMask": "20"
+ "PublicDescription": "This event counts cycles without actually retired instructions.",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x1"
},
{
+ "BriefDescription": "Mispredicted non-taken conditional branch instructions retired.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty.",
- "EventCode": "0xa6",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x2",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.COND_NTAKEN",
+ "PEBS": "1",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "EXE_ACTIVITY.1_PORTS_UTIL",
+ "PublicDescription": "Counts the number of conditional branch instructions retired that were mispredicted and the branch direction was not taken.",
+ "SampleAfterValue": "50021",
+ "UMask": "0x10"
+ },
+ {
+ "BriefDescription": "Core cycles when the thread is not in halt state",
+ "CollectPEBSRecord": "2",
+ "Counter": "33",
+ "EventName": "CPU_CLK_UNHALTED.THREAD",
+ "PEBScounters": "33",
+ "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the eight programmable counters available for other events.",
"SampleAfterValue": "2000003",
- "BriefDescription": "Cycles total of 1 uop is executed on all ports and Reservation Station was not empty."
+ "Speculative": "1",
+ "UMask": "0x2"
},
{
+ "BriefDescription": "Taken conditional branch instructions retired.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty.",
- "EventCode": "0xa6",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x4",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.COND_TAKEN",
+ "PEBS": "1",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "EXE_ACTIVITY.2_PORTS_UTIL",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Cycles total of 2 uops are executed on all ports and Reservation Station was not empty."
+ "PublicDescription": "Counts taken conditional branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "UMask": "0x1"
},
{
+ "BriefDescription": "Direct and indirect near call instructions retired.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts cycles where the Store Buffer was full and no loads caused an execution stall.",
- "EventCode": "0xA6",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x40",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.NEAR_CALL",
+ "PEBS": "1",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "EXE_ACTIVITY.BOUND_ON_STORES",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Cycles where the Store Buffer was full and no loads caused an execution stall.",
- "CounterMask": "2"
+ "PublicDescription": "Counts both direct and indirect near call instructions retired.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x2"
},
{
+ "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts cycles during which no uops were executed on all ports and Reservation Station (RS) was not empty.",
- "EventCode": "0xa6",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x80",
+ "CounterMask": "4",
+ "EventCode": "0xB1",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "EXE_ACTIVITY.EXE_BOUND_0_PORTS",
+ "PublicDescription": "Counts cycles when at least 4 micro-ops are executed from any thread on physical core.",
"SampleAfterValue": "2000003",
- "BriefDescription": "Cycles where no uops were executed, the Reservation Station was not empty, the Store Buffer was full and there was no outstanding load."
+ "Speculative": "1",
+ "UMask": "0x2"
},
{
+ "BriefDescription": "Precise instruction retired event with a reduced effect of PEBS shadow in IP distribution",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of uops delivered to the back-end by the LSD(Loop Stream Detector).",
- "EventCode": "0xA8",
- "Counter": "0,1,2,3",
- "UMask": "0x1",
- "PEBScounters": "0,1,2,3",
- "EventName": "LSD.UOPS",
+ "Counter": "32",
+ "EventName": "INST_RETIRED.PREC_DIST",
+ "PEBS": "1",
+ "PEBScounters": "32",
+ "PublicDescription": "A version of INST_RETIRED that allows for a more unbiased distribution of samples across instructions retired. It utilizes the Precise Distribution of Instructions Retired (PDIR) feature to mitigate some bias in how retired instructions get sampled. Use on Fixed Counter 0.",
"SampleAfterValue": "2000003",
- "BriefDescription": "Number of Uops delivered by the LSD."
+ "UMask": "0x1"
},
{
+ "BriefDescription": "Total execution stalls.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector).",
- "EventCode": "0xA8",
- "Counter": "0,1,2,3",
- "UMask": "0x1",
- "PEBScounters": "0,1,2,3",
- "EventName": "LSD.CYCLES_ACTIVE",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
- "CounterMask": "1"
+ "Counter": "0,1,2,3,4,5,6,7",
+ "CounterMask": "4",
+ "EventCode": "0xa3",
+ "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x4"
},
{
+ "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the cycles when optimal number of uops is delivered by the LSD (Loop-stream detector).",
- "EventCode": "0xa8",
"Counter": "0,1,2,3",
- "UMask": "0x1",
+ "CounterMask": "12",
+ "EventCode": "0xA3",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS",
"PEBScounters": "0,1,2,3",
- "EventName": "LSD.CYCLES_OK",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Cycles optimal number of Uops delivered by the LSD, but did not come from the decoder.",
- "CounterMask": "5"
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0xc"
},
{
+ "BriefDescription": "Number of retired PAUSE instructions. This event is not supported on first SKL and KBL products.",
"CollectPEBSRecord": "2",
- "EventCode": "0xB1",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "UOPS_EXECUTED.THREAD",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Counts the number of uops to be executed per-thread each cycle."
+ "EventCode": "0xcc",
+ "EventName": "MISC_RETIRED.PAUSE_INST",
+ "PublicDescription": "Counts number of retired PAUSE instructions. This event is not supported on first SKL and KBL products.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x40"
},
{
+ "BriefDescription": "Self-modifying code (SMC) detected.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.",
- "EventCode": "0xB1",
- "Invert": "1",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x1",
+ "EventCode": "0xc3",
+ "EventName": "MACHINE_CLEARS.SMC",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "UOPS_EXECUTED.STALL_CYCLES",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.",
- "CounterMask": "1"
+ "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x4"
},
{
+ "BriefDescription": "Uops that RAT issues to RS",
"CollectPEBSRecord": "2",
- "PublicDescription": "Cycles where at least 1 uop was executed per-thread.",
- "EventCode": "0xb1",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x1",
+ "EventCode": "0x0e",
+ "EventName": "UOPS_ISSUED.ANY",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "UOPS_EXECUTED.CYCLES_GE_1",
+ "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).",
"SampleAfterValue": "2000003",
- "BriefDescription": "Cycles where at least 1 uop was executed per-thread",
- "CounterMask": "1"
+ "Speculative": "1",
+ "UMask": "0x1"
},
{
+ "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Cycles where at least 2 uops were executed per-thread.",
- "EventCode": "0xb1",
- "Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "UOPS_EXECUTED.CYCLES_GE_2",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Cycles where at least 2 uops were executed per-thread",
- "CounterMask": "2"
+ "Counter": "0,1,2,3",
+ "CounterMask": "5",
+ "EventCode": "0xa3",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS",
+ "PEBScounters": "0,1,2,3",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x5"
},
{
+ "BriefDescription": "Reference cycles when the core is not in halt state.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Cycles where at least 3 uops were executed per-thread.",
- "EventCode": "0xb1",
- "Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "UOPS_EXECUTED.CYCLES_GE_3",
+ "Counter": "34",
+ "EventName": "CPU_CLK_UNHALTED.REF_TSC",
+ "PEBScounters": "34",
+ "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the eight programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.",
"SampleAfterValue": "2000003",
- "BriefDescription": "Cycles where at least 3 uops were executed per-thread",
- "CounterMask": "3"
+ "Speculative": "1",
+ "UMask": "0x3"
},
{
+ "BriefDescription": "Cycles the Backend cluster is recovering after a miss-speculation or a Store Buffer or Load Buffer drain stall.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Cycles where at least 4 uops were executed per-thread.",
- "EventCode": "0xb1",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x1",
+ "CounterMask": "1",
+ "EventCode": "0x0D",
+ "EventName": "INT_MISC.ALL_RECOVERY_CYCLES",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "UOPS_EXECUTED.CYCLES_GE_4",
+ "PublicDescription": "Counts cycles the Backend cluster is recovering after a miss-speculation or a Store Buffer or Load Buffer drain stall.",
"SampleAfterValue": "2000003",
- "BriefDescription": "Cycles where at least 4 uops were executed per-thread",
- "CounterMask": "4"
+ "Speculative": "1",
+ "UMask": "0x3"
},
{
+ "BriefDescription": "Cycles total of 2 uops are executed on all ports and Reservation Station was not empty.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of uops executed from any thread.",
- "EventCode": "0xB1",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x2",
+ "EventCode": "0xa6",
+ "EventName": "EXE_ACTIVITY.2_PORTS_UTIL",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "UOPS_EXECUTED.CORE",
+ "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty.",
"SampleAfterValue": "2000003",
- "BriefDescription": "Number of uops executed on the core."
+ "Speculative": "1",
+ "UMask": "0x4"
},
{
+ "BriefDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station was not empty.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts cycles when at least 1 micro-op is executed from any thread on physical core.",
- "EventCode": "0xB1",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x2",
+ "EventCode": "0xa6",
+ "EventName": "EXE_ACTIVITY.3_PORTS_UTIL",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
+ "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty.",
"SampleAfterValue": "2000003",
- "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.",
- "CounterMask": "1"
+ "Speculative": "1",
+ "UMask": "0x8"
},
{
+ "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "CounterMask": "8",
+ "EventCode": "0xA3",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS",
+ "PEBScounters": "0,1,2,3",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "Counts cycles after recovery from a branch misprediction or machine clear till the first uop is issued from the resteered path.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts cycles when at least 2 micro-ops are executed from any thread on physical core.",
- "EventCode": "0xB1",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x2",
+ "EventCode": "0x0d",
+ "EventName": "INT_MISC.CLEAR_RESTEER_CYCLES",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.",
- "CounterMask": "2"
+ "PublicDescription": "Cycles after recovery from a branch misprediction or machine clear till the first uop is issued from the resteered path.",
+ "SampleAfterValue": "500009",
+ "Speculative": "1",
+ "UMask": "0x80"
},
{
+ "BriefDescription": "Cycles with less than 10 actually retired uops.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts cycles when at least 3 micro-ops are executed from any thread on physical core.",
- "EventCode": "0xB1",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x2",
+ "CounterMask": "10",
+ "EventCode": "0xc2",
+ "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
+ "Invert": "1",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.",
- "CounterMask": "3"
+ "PublicDescription": "Counts the number of cycles using always true condition (uops_ret &amp;lt; 16) applied to non PEBS uops retired event.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2"
},
{
+ "BriefDescription": "All branch instructions retired.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts cycles when at least 4 micro-ops are executed from any thread on physical core.",
- "EventCode": "0xB1",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x2",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+ "PEBS": "1",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.",
- "CounterMask": "4"
+ "PublicDescription": "Counts all branch instructions retired.",
+ "SampleAfterValue": "400009"
},
{
+ "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of x87 uops executed.",
- "EventCode": "0xB1",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x10",
+ "CounterMask": "1",
+ "EdgeDetect": "1",
+ "EventCode": "0x5E",
+ "EventName": "RS_EVENTS.EMPTY_END",
+ "Invert": "1",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "UOPS_EXECUTED.X87",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Counts the number of x87 uops dispatched."
+ "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to closely sample on front-end latency issues (see the FRONTEND_RETIRED event of designated precise events)",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
},
{
+ "BriefDescription": "Cycle counts are evenly distributed between active threads in the Core.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of X86 instructions retired - an Architectural PerfMon event. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter freeing up programmable counters to count other events. INST_RETIRED.ANY_P is counted by a programmable counter.",
- "EventCode": "0xC0",
"Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xec",
+ "EventName": "CPU_CLK_UNHALTED.DISTRIBUTED",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "INST_RETIRED.ANY_P",
+ "PublicDescription": "This event distributes cycle counts between active hyperthreads, i.e., those in C0. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If all other hyperthreads are inactive (or disabled or do not exist), all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread.",
"SampleAfterValue": "2000003",
- "BriefDescription": "Number of instructions retired. General Counter - architectural event"
+ "Speculative": "1",
+ "UMask": "0x2"
},
{
+ "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of cycles using always true condition (uops_ret &amp;lt; 16) applied to non PEBS uops retired event.",
- "EventCode": "0xC2",
- "Invert": "1",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x2",
+ "EventCode": "0x3C",
+ "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Cycles with less than 10 actually retired uops.",
- "CounterMask": "10"
+ "PublicDescription": "Counts Core crystal clock cycles when current thread is unhalted and the other thread is halted.",
+ "SampleAfterValue": "25003",
+ "Speculative": "1",
+ "UMask": "0x2"
},
{
+ "BriefDescription": "Thread cycles when thread is not in halt state",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the retirement slots used each cycle.",
- "EventCode": "0xc2",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x2",
+ "EventCode": "0x3C",
+ "EventName": "CPU_CLK_UNHALTED.THREAD_P",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "UOPS_RETIRED.SLOTS",
+ "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.",
"SampleAfterValue": "2000003",
- "BriefDescription": "Retirement slots used."
+ "Speculative": "1"
},
{
+ "BriefDescription": "Mispredicted conditional branch instructions retired.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of machine clears (nukes) of any type.",
- "EventCode": "0xC3",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x1",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.COND",
+ "PEBS": "1",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "MACHINE_CLEARS.COUNT",
- "SampleAfterValue": "100003",
- "BriefDescription": "Number of machine clears (nukes) of any type.",
- "CounterMask": "1",
- "EdgeDetect": "1"
+ "PublicDescription": "Counts mispredicted conditional branch instructions retired.",
+ "SampleAfterValue": "50021",
+ "UMask": "0x11"
},
{
+ "BriefDescription": "Number of uops executed on port 0",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear.",
- "EventCode": "0xC3",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x4",
+ "EventCode": "0xa1",
+ "EventName": "UOPS_DISPATCHED.PORT_0",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "MACHINE_CLEARS.SMC",
- "SampleAfterValue": "100003",
- "BriefDescription": "Self-modifying code (SMC) detected."
+ "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 0.",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x1"
},
{
- "PEBS": "1",
+ "BriefDescription": "Conditional branch instructions retired.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts all branch instructions retired.",
- "EventCode": "0xC4",
"Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.COND",
+ "PEBS": "1",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+ "PublicDescription": "Counts conditional branch instructions retired.",
"SampleAfterValue": "400009",
- "BriefDescription": "All branch instructions retired."
+ "UMask": "0x11"
},
{
- "PEBS": "1",
+ "BriefDescription": "Retirement slots used.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts taken conditional branch instructions retired.",
- "EventCode": "0xc4",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x1",
+ "EventCode": "0xc2",
+ "EventName": "UOPS_RETIRED.SLOTS",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "BR_INST_RETIRED.COND_TAKEN",
- "SampleAfterValue": "400009",
- "BriefDescription": "Taken conditional branch instructions retired."
+ "PublicDescription": "Counts the retirement slots used each cycle.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2"
},
{
- "PEBS": "1",
+ "BriefDescription": "Cycles optimal number of Uops delivered by the LSD, but did not come from the decoder.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts both direct and indirect near call instructions retired.",
- "EventCode": "0xC4",
- "Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x2",
- "PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "BR_INST_RETIRED.NEAR_CALL",
- "SampleAfterValue": "100007",
- "BriefDescription": "Direct and indirect near call instructions retired."
+ "Counter": "0,1,2,3",
+ "CounterMask": "5",
+ "EventCode": "0xa8",
+ "EventName": "LSD.CYCLES_OK",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the cycles when optimal number of uops is delivered by the LSD (Loop-stream detector).",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x1"
},
{
- "PEBS": "1",
+ "BriefDescription": "Core crystal clock cycles. Cycle counts are evenly distributed between active threads in the Core.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts return instructions retired.",
- "EventCode": "0xC4",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x8",
+ "EventCode": "0x3c",
+ "EventName": "CPU_CLK_UNHALTED.REF_DISTRIBUTED",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "BR_INST_RETIRED.NEAR_RETURN",
- "SampleAfterValue": "100007",
- "BriefDescription": "Return instructions retired."
+ "PublicDescription": "This event distributes Core crystal clock cycle counts between active hyperthreads, i.e., those in C0 sleep-state. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If one thread is active in a core, all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread.",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x8"
},
{
- "PEBS": "1",
+ "BriefDescription": "Cycles where at least 3 uops were executed per-thread",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts not taken branch instructions retired.",
- "EventCode": "0xC4",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x10",
+ "CounterMask": "3",
+ "EventCode": "0xb1",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_3",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "BR_INST_RETIRED.COND_NTAKEN",
- "SampleAfterValue": "400009",
- "BriefDescription": "Not taken branch instructions retired."
+ "PublicDescription": "Cycles where at least 3 uops were executed per-thread.",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x1"
},
{
- "PEBS": "1",
+ "BriefDescription": "Cycles where at least 2 uops were executed per-thread",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts conditional branch instructions retired.",
- "EventCode": "0xc4",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x11",
+ "CounterMask": "2",
+ "EventCode": "0xb1",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_2",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "BR_INST_RETIRED.COND",
- "SampleAfterValue": "400009",
- "BriefDescription": "Conditional branch instructions retired."
+ "PublicDescription": "Cycles where at least 2 uops were executed per-thread.",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x1"
},
{
- "PEBS": "1",
+ "BriefDescription": "Cycles where at least 1 uop was executed per-thread",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts taken branch instructions retired.",
- "EventCode": "0xC4",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x20",
+ "CounterMask": "1",
+ "EventCode": "0xb1",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_1",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
- "SampleAfterValue": "400009",
- "BriefDescription": "Taken branch instructions retired."
+ "PublicDescription": "Cycles where at least 1 uop was executed per-thread.",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x1"
},
{
- "PEBS": "1",
+ "BriefDescription": "Cycles where at least 4 uops were executed per-thread",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts far branch instructions retired.",
- "EventCode": "0xC4",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x40",
+ "CounterMask": "4",
+ "EventCode": "0xb1",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_4",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "BR_INST_RETIRED.FAR_BRANCH",
- "SampleAfterValue": "100007",
- "BriefDescription": "Far branch instructions retired."
+ "PublicDescription": "Cycles where at least 4 uops were executed per-thread.",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x1"
},
{
- "PEBS": "1",
+ "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts all indirect branch instructions retired (excluding RETs. TSX aborts is considered indirect branch).",
- "EventCode": "0xc4",
- "Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x80",
- "PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "BR_INST_RETIRED.INDIRECT",
- "SampleAfterValue": "100003",
- "BriefDescription": "All indirect branch instructions retired (excluding RETs. TSX aborts are considered indirect branch)."
+ "Counter": "0,1,2,3",
+ "CounterMask": "1",
+ "EventCode": "0xA3",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS",
+ "PEBScounters": "0,1,2,3",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x1"
},
{
- "PEBS": "1",
+ "BriefDescription": "Cycles when RAT does not issue Uops to RS for the thread",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch. When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.",
- "EventCode": "0xC5",
"Counter": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0x0E",
+ "EventName": "UOPS_ISSUED.STALL_CYCLES",
+ "Invert": "1",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
- "SampleAfterValue": "400009",
- "BriefDescription": "All mispredicted branch instructions retired.",
- "Data_LA": "1"
+ "PublicDescription": "Counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x1"
},
{
- "PEBS": "1",
+ "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts taken conditional mispredicted branch instructions retired.",
- "EventCode": "0xc5",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x1",
+ "CounterMask": "3",
+ "EventCode": "0xB1",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "BR_MISP_RETIRED.COND_TAKEN",
- "SampleAfterValue": "400009",
- "BriefDescription": "number of branch instructions retired that were mispredicted and taken. Non PEBS",
- "Data_LA": "1"
+ "PublicDescription": "Counts cycles when at least 3 micro-ops are executed from any thread on physical core.",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x2"
},
{
- "PEBS": "1",
+ "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts mispredicted conditional branch instructions retired.",
- "EventCode": "0xc5",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x11",
+ "CounterMask": "1",
+ "EventCode": "0xB1",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "BR_MISP_RETIRED.COND",
- "SampleAfterValue": "400009",
- "BriefDescription": "Mispredicted conditional branch instructions retired.",
- "Data_LA": "1"
+ "PublicDescription": "Counts cycles when at least 1 micro-op is executed from any thread on physical core.",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x2"
},
{
- "PEBS": "1",
+ "BriefDescription": "All miss-predicted indirect branch instructions retired (excluding RETs. TSX aborts is considered indirect branch).",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts number of near branch instructions retired that were mispredicted and taken.",
- "EventCode": "0xC5",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x20",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.INDIRECT",
+ "PEBS": "1",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
- "SampleAfterValue": "400009",
- "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken.",
- "Data_LA": "1"
+ "PublicDescription": "Counts all miss-predicted indirect branch instructions retired (excluding RETs. TSX aborts is considered indirect branch).",
+ "SampleAfterValue": "50021",
+ "UMask": "0x80"
},
{
- "PEBS": "1",
+ "BriefDescription": "TMA slots where uops got dropped",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts all miss-predicted indirect branch instructions retired (excluding RETs. TSX aborts is considered indirect branch).",
- "EventCode": "0xC5",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x80",
+ "EventCode": "0x0d",
+ "EventName": "INT_MISC.UOP_DROPPING",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "BR_MISP_RETIRED.INDIRECT",
- "SampleAfterValue": "100003",
- "BriefDescription": "All miss-predicted indirect branch instructions retired (excluding RETs. TSX aborts is considered indirect branch).",
- "Data_LA": "1"
+ "PublicDescription": "Estimated number of Top-down Microarchitecture Analysis slots that got dropped due to non front-end reasons",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x10"
},
{
+ "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT.",
- "EventCode": "0xcc",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x20",
+ "CounterMask": "20",
+ "EventCode": "0xa3",
+ "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "MISC_RETIRED.LBR_INSERTS",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Increments whenever there is an update to the LBR array."
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x14"
},
{
- "PublicDescription": "Counts number of retired PAUSE instructions (that do not end up with a VMExit to the VMM; TSX aborted Instructions may be counted).",
- "EventCode": "0xcc",
+ "BriefDescription": "Number of uops executed on port 7 and 8",
+ "CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x40",
- "EventName": "MISC_RETIRED.PAUSE_INST",
+ "EventCode": "0xa1",
+ "EventName": "UOPS_DISPATCHED.PORT_7_8",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to ports 7 and 8.",
"SampleAfterValue": "2000003",
- "BriefDescription": "Number of retired PAUSE instructions."
+ "Speculative": "1",
+ "UMask": "0x80"
},
{
+ "BriefDescription": "number of branch instructions retired that were mispredicted and taken. Non PEBS",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of times the front-end is resteered when it finds a branch instruction in a fetch line. This occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.",
- "EventCode": "0xE6",
- "Counter": "0,1,2,3",
- "UMask": "0x1",
- "PEBScounters": "0,1,2,3",
- "EventName": "BACLEARS.ANY",
- "SampleAfterValue": "100003",
- "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end."
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.COND_TAKEN",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts taken conditional mispredicted branch instructions retired.",
+ "SampleAfterValue": "50021",
+ "UMask": "0x1"
},
{
+ "BriefDescription": "All mispredicted branch instructions retired.",
"CollectPEBSRecord": "2",
- "PublicDescription": "This event distributes cycle counts between active hyperthreads, i.e., those in C0. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If all other hyperthreads are inactive (or disabled or do not exist), all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread.",
- "EventCode": "0xec",
"Counter": "0,1,2,3,4,5,6,7",
- "UMask": "0x2",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+ "PEBS": "1",
"PEBScounters": "0,1,2,3,4,5,6,7",
- "EventName": "CPU_CLK_UNHALTED.DISTRIBUTED",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Cycle counts are evenly distributed between active threads in the Core."
+ "PublicDescription": "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch. When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.",
+ "SampleAfterValue": "50021"
}
] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/icelake/virtual-memory.json b/tools/perf/pmu-events/arch/x86/icelake/virtual-memory.json
index 7180a900c175..f485f4664ea6 100644
--- a/tools/perf/pmu-events/arch/x86/icelake/virtual-memory.json
+++ b/tools/perf/pmu-events/arch/x86/icelake/virtual-memory.json
@@ -1,236 +1,245 @@
[
{
+ "BriefDescription": "DTLB flush attempts of the thread-specific entries",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.",
- "EventCode": "0x08",
- "Counter": "0,1,2,3",
- "UMask": "0x2",
- "PEBScounters": "0,1,2,3",
- "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Page walks completed due to a demand data load to a 4K page."
- },
- {
- "CollectPEBSRecord": "2",
- "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 2M/4M pages. The page walks can end with or without a page fault.",
- "EventCode": "0x08",
"Counter": "0,1,2,3",
- "UMask": "0x4",
+ "EventCode": "0xBD",
+ "EventName": "TLB_FLUSH.DTLB_THREAD",
"PEBScounters": "0,1,2,3",
- "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Page walks completed due to a demand data load to a 2M/4M page."
+ "PublicDescription": "Counts the number of DTLB flush attempts of the thread-specific entries.",
+ "SampleAfterValue": "100007",
+ "Speculative": "1",
+ "UMask": "0x1"
},
{
+ "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (All page sizes)",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts demand data loads that caused a completed page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels. The page walk can end with or without a fault.",
- "EventCode": "0x08",
"Counter": "0,1,2,3",
- "UMask": "0xe",
+ "EventCode": "0x85",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED",
"PEBScounters": "0,1,2,3",
- "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+ "PublicDescription": "Counts completed page walks (all page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
- "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (All page sizes)"
+ "Speculative": "1",
+ "UMask": "0xe"
},
{
+ "BriefDescription": "STLB flush attempts",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of page walks outstanding for a demand load in the PMH (Page Miss Handler) each cycle.",
- "EventCode": "0x08",
"Counter": "0,1,2,3",
- "UMask": "0x10",
+ "EventCode": "0xBD",
+ "EventName": "TLB_FLUSH.STLB_ANY",
"PEBScounters": "0,1,2,3",
- "EventName": "DTLB_LOAD_MISSES.WALK_PENDING",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Number of page walks outstanding for a demand load in the PMH each cycle."
+ "PublicDescription": "Counts the number of any STLB flush attempts (such as entire, VPID, PCID, InvPage, CR3 write, etc.).",
+ "SampleAfterValue": "100007",
+ "Speculative": "1",
+ "UMask": "0x20"
},
{
+ "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a demand load.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a demand load.",
- "EventCode": "0x08",
"Counter": "0,1,2,3",
- "UMask": "0x10",
- "PEBScounters": "0,1,2,3",
+ "CounterMask": "1",
+ "EventCode": "0x08",
"EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a demand load.",
"SampleAfterValue": "100003",
- "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a demand load.",
- "CounterMask": "1"
+ "Speculative": "1",
+ "UMask": "0x10"
},
{
+ "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB).",
- "EventCode": "0x08",
"Counter": "0,1,2,3",
- "UMask": "0x20",
+ "EventCode": "0x85",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
"PEBScounters": "0,1,2,3",
- "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Loads that miss the DTLB and hit the STLB."
+ "PublicDescription": "Counts completed page walks (4K page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x2"
},
{
+ "BriefDescription": "Page walks completed due to a demand data load to a 4K page.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.",
- "EventCode": "0x49",
"Counter": "0,1,2,3",
- "UMask": "0x2",
+ "EventCode": "0x08",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
"PEBScounters": "0,1,2,3",
- "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
+ "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
- "BriefDescription": "Page walks completed due to a demand data store to a 4K page."
+ "Speculative": "1",
+ "UMask": "0x2"
},
{
+ "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 2M/4M pages. The page walks can end with or without a page fault.",
- "EventCode": "0x49",
"Counter": "0,1,2,3",
- "UMask": "0x4",
+ "EventCode": "0x85",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
"PEBScounters": "0,1,2,3",
- "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
+ "PublicDescription": "Counts completed page walks (2M/4M page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
- "BriefDescription": "Page walks completed due to a demand data store to a 2M/4M page."
+ "Speculative": "1",
+ "UMask": "0x4"
},
{
+ "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a store.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts demand data stores that caused a completed page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels. The page walk can end with or without a fault.",
- "EventCode": "0x49",
"Counter": "0,1,2,3",
- "UMask": "0xe",
+ "CounterMask": "1",
+ "EventCode": "0x49",
+ "EventName": "DTLB_STORE_MISSES.WALK_ACTIVE",
"PEBScounters": "0,1,2,3",
- "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
+ "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store.",
"SampleAfterValue": "100003",
- "BriefDescription": "Store misses in all TLB levels causes a page walk that completes. (All page sizes)"
+ "Speculative": "1",
+ "UMask": "0x10"
},
{
+ "BriefDescription": "Page walks completed due to a demand data store to a 2M/4M page.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of page walks outstanding for a store in the PMH (Page Miss Handler) each cycle.",
- "EventCode": "0x49",
"Counter": "0,1,2,3",
- "UMask": "0x10",
+ "EventCode": "0x49",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
"PEBScounters": "0,1,2,3",
- "EventName": "DTLB_STORE_MISSES.WALK_PENDING",
- "SampleAfterValue": "2000003",
- "BriefDescription": "Number of page walks outstanding for a store in the PMH each cycle."
+ "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x4"
},
{
+ "BriefDescription": "Stores that miss the DTLB and hit the STLB.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store.",
- "EventCode": "0x49",
"Counter": "0,1,2,3",
- "UMask": "0x10",
+ "EventCode": "0x49",
+ "EventName": "DTLB_STORE_MISSES.STLB_HIT",
"PEBScounters": "0,1,2,3",
- "EventName": "DTLB_STORE_MISSES.WALK_ACTIVE",
+ "PublicDescription": "Counts stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB).",
"SampleAfterValue": "100003",
- "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a store.",
- "CounterMask": "1"
+ "Speculative": "1",
+ "UMask": "0x20"
},
{
+ "BriefDescription": "Store misses in all TLB levels causes a page walk that completes. (All page sizes)",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB).",
- "EventCode": "0x49",
"Counter": "0,1,2,3",
- "UMask": "0x20",
+ "EventCode": "0x49",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
"PEBScounters": "0,1,2,3",
- "EventName": "DTLB_STORE_MISSES.STLB_HIT",
+ "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data stores. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
- "BriefDescription": "Stores that miss the DTLB and hit the STLB."
+ "Speculative": "1",
+ "UMask": "0xe"
},
{
+ "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (All page sizes)",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts completed page walks (4K page size) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB. The page walk can end with or without a fault.",
- "EventCode": "0x85",
"Counter": "0,1,2,3",
- "UMask": "0x2",
+ "EventCode": "0x08",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
"PEBScounters": "0,1,2,3",
- "EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
+ "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data loads. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
- "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)"
+ "Speculative": "1",
+ "UMask": "0xe"
},
{
+ "BriefDescription": "Page walks completed due to a demand data store to a 4K page.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts code misses in all ITLB (Instruction TLB) levels that caused a completed page walk (2M and 4M page sizes). The page walk can end with or without a fault.",
- "EventCode": "0x85",
"Counter": "0,1,2,3",
- "UMask": "0x4",
+ "EventCode": "0x49",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
"PEBScounters": "0,1,2,3",
- "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
+ "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
- "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)"
+ "Speculative": "1",
+ "UMask": "0x2"
},
{
+ "BriefDescription": "Instruction fetch requests that miss the ITLB and hit the STLB.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts completed page walks (2M and 4M page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
- "EventCode": "0x85",
"Counter": "0,1,2,3",
- "UMask": "0xe",
+ "EventCode": "0x85",
+ "EventName": "ITLB_MISSES.STLB_HIT",
"PEBScounters": "0,1,2,3",
- "EventName": "ITLB_MISSES.WALK_COMPLETED",
+ "PublicDescription": "Counts instruction fetch requests that miss the ITLB (Instruction TLB) and hit the STLB (Second-level TLB).",
"SampleAfterValue": "100003",
- "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (All page sizes)"
+ "Speculative": "1",
+ "UMask": "0x20"
},
{
+ "BriefDescription": "Page walks completed due to a demand data load to a 2M/4M page.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of page walks outstanding for an outstanding code (instruction fetch) request in the PMH (Page Miss Handler) each cycle.",
- "EventCode": "0x85",
"Counter": "0,1,2,3",
- "UMask": "0x10",
+ "EventCode": "0x08",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
"PEBScounters": "0,1,2,3",
- "EventName": "ITLB_MISSES.WALK_PENDING",
+ "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
- "BriefDescription": "Number of page walks outstanding for an outstanding code request in the PMH each cycle."
+ "Speculative": "1",
+ "UMask": "0x4"
},
{
+ "BriefDescription": "Number of page walks outstanding for an outstanding code request in the PMH each cycle.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a code (instruction fetch) request.",
- "EventCode": "0x85",
"Counter": "0,1,2,3",
- "UMask": "0x10",
+ "EventCode": "0x85",
+ "EventName": "ITLB_MISSES.WALK_PENDING",
"PEBScounters": "0,1,2,3",
- "EventName": "ITLB_MISSES.WALK_ACTIVE",
+ "PublicDescription": "Counts the number of page walks outstanding for an outstanding code (instruction fetch) request in the PMH (Page Miss Handler) each cycle.",
"SampleAfterValue": "100003",
- "BriefDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request.",
- "CounterMask": "1"
+ "Speculative": "1",
+ "UMask": "0x10"
},
{
+ "BriefDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts instruction fetch requests that miss the ITLB (Instruction TLB) and hit the STLB (Second-level TLB).",
- "EventCode": "0x85",
"Counter": "0,1,2,3",
- "UMask": "0x20",
+ "CounterMask": "1",
+ "EventCode": "0x85",
+ "EventName": "ITLB_MISSES.WALK_ACTIVE",
"PEBScounters": "0,1,2,3",
- "EventName": "ITLB_MISSES.STLB_HIT",
+ "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a code (instruction fetch) request.",
"SampleAfterValue": "100003",
- "BriefDescription": "Instruction fetch requests that miss the ITLB and hit the STLB."
+ "Speculative": "1",
+ "UMask": "0x10"
},
{
+ "BriefDescription": "Loads that miss the DTLB and hit the STLB.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of flushes of the big or small ITLB pages. Counting include both TLB Flush (covering all sets) and TLB Set Clear (set-specific).",
- "EventCode": "0xAE",
"Counter": "0,1,2,3",
- "UMask": "0x1",
+ "EventCode": "0x08",
+ "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
"PEBScounters": "0,1,2,3",
- "EventName": "ITLB.ITLB_FLUSH",
- "SampleAfterValue": "100007",
- "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages."
+ "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB).",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x20"
},
{
+ "BriefDescription": "Number of page walks outstanding for a demand load in the PMH each cycle.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of DTLB flush attempts of the thread-specific entries.",
- "EventCode": "0xBD",
"Counter": "0,1,2,3",
- "UMask": "0x1",
+ "EventCode": "0x08",
+ "EventName": "DTLB_LOAD_MISSES.WALK_PENDING",
"PEBScounters": "0,1,2,3",
- "EventName": "TLB_FLUSH.DTLB_THREAD",
- "SampleAfterValue": "100007",
- "BriefDescription": "DTLB flush attempts of the thread-specific entries"
+ "PublicDescription": "Counts the number of page walks outstanding for a demand load in the PMH (Page Miss Handler) each cycle.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x10"
},
{
+ "BriefDescription": "Number of page walks outstanding for a store in the PMH each cycle.",
"CollectPEBSRecord": "2",
- "PublicDescription": "Counts the number of any STLB flush attempts (such as entire, VPID, PCID, InvPage, CR3 write, etc.).",
- "EventCode": "0xBD",
"Counter": "0,1,2,3",
- "UMask": "0x20",
+ "EventCode": "0x49",
+ "EventName": "DTLB_STORE_MISSES.WALK_PENDING",
"PEBScounters": "0,1,2,3",
- "EventName": "TLB_FLUSH.STLB_ANY",
- "SampleAfterValue": "100007",
- "BriefDescription": "STLB flush attempts"
+ "PublicDescription": "Counts the number of page walks outstanding for a store in the PMH (Page Miss Handler) each cycle.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x10"
}
] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/icelakex/cache.json b/tools/perf/pmu-events/arch/x86/icelakex/cache.json
new file mode 100644
index 000000000000..624762008aaa
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/icelakex/cache.json
@@ -0,0 +1,706 @@
+[
+ {
+ "BriefDescription": "Demand Data Read miss L2, no rejects",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of demand Data Read requests that miss L2 cache. Only not rejected loads are counted.",
+ "SampleAfterValue": "200003",
+ "Speculative": "1",
+ "UMask": "0x21"
+ },
+ {
+ "BriefDescription": "RFO requests that miss L2 cache",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.RFO_MISS",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache.",
+ "SampleAfterValue": "200003",
+ "Speculative": "1",
+ "UMask": "0x22"
+ },
+ {
+ "BriefDescription": "L2 cache misses when fetching instructions",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.CODE_RD_MISS",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts L2 cache misses when fetching instructions.",
+ "SampleAfterValue": "200003",
+ "Speculative": "1",
+ "UMask": "0x24"
+ },
+ {
+ "BriefDescription": "Demand requests that miss L2 cache",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.ALL_DEMAND_MISS",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts demand requests that miss L2 cache.",
+ "SampleAfterValue": "200003",
+ "Speculative": "1",
+ "UMask": "0x27"
+ },
+ {
+ "BriefDescription": "SW prefetch requests that miss L2 cache.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.SWPF_MISS",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts Software prefetch requests that miss the L2 cache. This event accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions.",
+ "SampleAfterValue": "200003",
+ "Speculative": "1",
+ "UMask": "0x28"
+ },
+ {
+ "BriefDescription": "Demand Data Read requests that hit L2 cache",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of demand Data Read requests initiated by load instructions that hit L2 cache.",
+ "SampleAfterValue": "200003",
+ "Speculative": "1",
+ "UMask": "0xc1"
+ },
+ {
+ "BriefDescription": "RFO requests that hit L2 cache",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.RFO_HIT",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.",
+ "SampleAfterValue": "200003",
+ "Speculative": "1",
+ "UMask": "0xc2"
+ },
+ {
+ "BriefDescription": "L2 cache hits when fetching instructions, code reads.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.CODE_RD_HIT",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.",
+ "SampleAfterValue": "200003",
+ "Speculative": "1",
+ "UMask": "0xc4"
+ },
+ {
+ "BriefDescription": "SW prefetch requests that hit L2 cache.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.SWPF_HIT",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts Software prefetch requests that hit the L2 cache. This event accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions.",
+ "SampleAfterValue": "200003",
+ "Speculative": "1",
+ "UMask": "0xc8"
+ },
+ {
+ "BriefDescription": "Demand Data Read requests",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of demand Data Read requests (including requests from L1D hardware prefetchers). These loads may hit or miss L2 cache. Only non rejected loads are counted.",
+ "SampleAfterValue": "200003",
+ "Speculative": "1",
+ "UMask": "0xe1"
+ },
+ {
+ "BriefDescription": "RFO requests to L2 cache",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.ALL_RFO",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.",
+ "SampleAfterValue": "200003",
+ "Speculative": "1",
+ "UMask": "0xe2"
+ },
+ {
+ "BriefDescription": "L2 code requests",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.ALL_CODE_RD",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the total number of L2 code requests.",
+ "SampleAfterValue": "200003",
+ "Speculative": "1",
+ "UMask": "0xe4"
+ },
+ {
+ "BriefDescription": "Core-originated cacheable demand requests missed L3",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x2e",
+ "EventName": "LONGEST_LAT_CACHE.MISS",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2. It does not include all misses to the L3.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Number of L1D misses that are outstanding",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x48",
+ "EventName": "L1D_PEND_MISS.PENDING",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts number of L1D misses that are outstanding in each cycle, that is each cycle the number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch. Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Cycles with L1D load Misses outstanding.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "CounterMask": "1",
+ "EventCode": "0x48",
+ "EventName": "L1D_PEND_MISS.PENDING_CYCLES",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts duration of L1D miss outstanding in cycles.",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Number of cycles a demand request has waited due to L1D Fill Buffer (FB) unavailability.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x48",
+ "EventName": "L1D_PEND_MISS.FB_FULL",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts number of cycles a demand request has waited due to L1D Fill Buffer (FB) unavailablability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Number of phases a demand request has waited due to L1D Fill Buffer (FB) unavailablability.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "CounterMask": "1",
+ "EdgeDetect": "1",
+ "EventCode": "0x48",
+ "EventName": "L1D_PEND_MISS.FB_FULL_PERIODS",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts number of phases a demand request has waited due to L1D Fill Buffer (FB) unavailablability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Number of cycles a demand request has waited due to L1D due to lack of L2 resources.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x48",
+ "EventName": "L1D_PEND_MISS.L2_STALL",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts number of cycles a demand request has waited due to L1D due to lack of L2 resources. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Counts the number of cache lines replaced in L1 data cache.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x51",
+ "EventName": "L1D.REPLACEMENT",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "For every cycle where the core is waiting on at least 1 outstanding Demand RFO request, increments by 1.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "CounterMask": "1",
+ "EventCode": "0x60",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "For every cycle where the core is waiting on at least 1 outstanding demand RFO request, increments by 1. RFOs are initiated by a core as part of a data store operation. Demand RFO requests include RFOs, locks, and ItoM transactions. Requests are considered outstanding from the time they miss the core's L2 cache until the transaction completion message is sent to the requestor.",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "For every cycle, increments by the number of outstanding data read requests the core is waiting on.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x60",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "For every cycle, increments by the number of outstanding data read requests the core is waiting on. Data read requests include cacheable demand reads and L2 prefetches, but do not include RFOs, code reads or prefetches to the L3. Reads due to page walks resulting from any request type will also be counted. Requests are considered outstanding from the time they miss the core's L2 cache until the transaction completion message is sent to the requestor.",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "For every cycle where the core is waiting on at least 1 outstanding demand data read request, increments by 1.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "CounterMask": "1",
+ "EventCode": "0x60",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "For every cycle where the core is waiting on at least 1 outstanding data read request, increments by 1. Data read requests include cacheable demand reads and L2 prefetches, but do not include RFOs, code reads or prefetches to the L3. Reads due to page walks resulting from any request type will also be counted. Requests are considered outstanding from the time they miss the core's L2 cache until the transaction completion message is sent to the requestor.",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "Demand Data Read requests sent to uncore",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xb0",
+ "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Demand and prefetch data reads",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB0",
+ "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "Counts memory transactions sent to the uncore.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB0",
+ "EventName": "OFFCORE_REQUESTS.ALL_REQUESTS",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts memory transactions sent to the uncore including requests initiated by the core, all L3 prefetches, reads resulting from page walks, and snoop responses.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x80"
+ },
+ {
+ "BriefDescription": "Retired load instructions that miss the STLB.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts retired load instructions that true miss the STLB.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x11"
+ },
+ {
+ "BriefDescription": "Retired store instructions that miss the STLB.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.STLB_MISS_STORES",
+ "L1_Hit_Indication": "1",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts retired store instructions that true miss the STLB.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x12"
+ },
+ {
+ "BriefDescription": "Retired load instructions with locked access.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.LOCK_LOADS",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts retired load instructions with locked access.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x21"
+ },
+ {
+ "BriefDescription": "Retired load instructions that split across a cacheline boundary.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.SPLIT_LOADS",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts retired load instructions that split across a cacheline boundary.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x41"
+ },
+ {
+ "BriefDescription": "Retired store instructions that split across a cacheline boundary.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.SPLIT_STORES",
+ "L1_Hit_Indication": "1",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts retired store instructions that split across a cacheline boundary.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x42"
+ },
+ {
+ "BriefDescription": "All retired load instructions.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.ALL_LOADS",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts all retired load instructions. This event accounts for SW prefetch instructions for loads.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x81"
+ },
+ {
+ "BriefDescription": "All retired store instructions.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.ALL_STORES",
+ "L1_Hit_Indication": "1",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts all retired store instructions. This event account for SW prefetch instructions and PREFETCHW instruction for stores.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x82"
+ },
+ {
+ "BriefDescription": "Retired load instructions with L1 cache hits as data sources",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_RETIRED.L1_HIT",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Retired load instructions with L2 cache hits as data sources",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_RETIRED.L2_HIT",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts retired load instructions with L2 cache hits as data sources.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Retired load instructions with L3 cache hits as data sources",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_RETIRED.L3_HIT",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L3 cache.",
+ "SampleAfterValue": "100021",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Retired load instructions missed L1 cache as data sources",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_RETIRED.L1_MISS",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L1 cache.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "Retired load instructions missed L2 cache as data sources",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_RETIRED.L2_MISS",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts retired load instructions missed L2 cache as data sources.",
+ "SampleAfterValue": "100021",
+ "UMask": "0x10"
+ },
+ {
+ "BriefDescription": "Retired load instructions missed L3 cache as data sources",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_RETIRED.L3_MISS",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L3 cache.",
+ "SampleAfterValue": "50021",
+ "UMask": "0x20"
+ },
+ {
+ "BriefDescription": "Number of completed demand load requests that missed the L1, but hit the FB(fill buffer), because a preceding miss to the same cacheline initiated the line to be brought into L1, but data is not yet ready in L1.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_RETIRED.FB_HIT",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x40"
+ },
+ {
+ "BriefDescription": "Retired demand load instructions which missed L3 but serviced from local IXP memory as data sources",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_RETIRED.LOCAL_PMM",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3",
+ "SampleAfterValue": "100003",
+ "UMask": "0x80"
+ },
+ {
+ "BriefDescription": "Retired load instructions whose data sources were L3 hit and cross-core snoop missed in on-pkg core cache.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd2",
+ "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the retired load instructions whose data sources were L3 hit and cross-core snoop missed in on-pkg core cache.",
+ "SampleAfterValue": "20011",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "This event is deprecated. Refer to new event MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd2",
+ "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3",
+ "SampleAfterValue": "20011",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Retired load instructions whose data sources were L3 and cross-core snoop hits in on-pkg core cache",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd2",
+ "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts retired load instructions whose data sources were L3 and cross-core snoop hits in on-pkg core cache.",
+ "SampleAfterValue": "20011",
+ "Speculative": "1",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "This event is deprecated. Refer to new event MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd2",
+ "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3",
+ "SampleAfterValue": "20011",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Retired load instructions whose data sources were HitM responses from shared L3",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd2",
+ "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts retired load instructions whose data sources were HitM responses from shared L3.",
+ "SampleAfterValue": "20011",
+ "Speculative": "1",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Retired load instructions whose data sources were hits in L3 without snoops required",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd2",
+ "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NONE",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts retired load instructions whose data sources were hits in L3 without snoops required.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "Retired load instructions which data sources missed L3 but serviced from local dram",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd3",
+ "EventName": "MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Retired load instructions which data sources missed L3 but serviced from local DRAM.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Retired load instructions which data sources missed L3 but serviced from remote dram",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd3",
+ "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3",
+ "SampleAfterValue": "100007",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Retired load instructions whose data sources was remote HITM",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd3",
+ "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Retired load instructions whose data sources was remote HITM.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Retired load instructions whose data sources was forwarded from a remote cache",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd3",
+ "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Retired load instructions whose data sources was forwarded from a remote cache.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "Retired demand load instructions which missed L3 but serviced from remote IXP memory as data sources",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd3",
+ "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Retired load instructions which data source was serviced from L4",
+ "SampleAfterValue": "100007",
+ "UMask": "0x10"
+ },
+ {
+ "BriefDescription": "L2 writebacks that access L2 cache",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xF0",
+ "EventName": "L2_TRANS.L2_WB",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts L2 writebacks that access L2 cache.",
+ "SampleAfterValue": "200003",
+ "Speculative": "1",
+ "UMask": "0x40"
+ },
+ {
+ "BriefDescription": "L2 cache lines filling L2",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xF1",
+ "EventName": "L2_LINES_IN.ALL",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1f"
+ },
+ {
+ "BriefDescription": "Non-modified cache lines that are silently dropped by L2 cache when triggered by an L2 cache fill.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xF2",
+ "EventName": "L2_LINES_OUT.SILENT",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of lines that are silently dropped by L2 cache when triggered by an L2 cache fill. These lines are typically in Shared or Exclusive state. A non-threaded event.",
+ "SampleAfterValue": "200003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Cache lines that are evicted by L2 cache when triggered by an L2 cache fill.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xF2",
+ "EventName": "L2_LINES_OUT.NON_SILENT",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of lines that are evicted by the L2 cache due to L2 cache fills. Evicted lines are delivered to the L3, which may or may not cache them, according to system load and priorities.",
+ "SampleAfterValue": "200003",
+ "Speculative": "1",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Cycles the queue waiting for offcore responses is full.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xf4",
+ "EventName": "SQ_MISC.SQ_FULL",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the cycles for which the thread is active and the queue waiting for responses from the uncore cannot take any more entries.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x4"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/icelakex/floating-point.json b/tools/perf/pmu-events/arch/x86/icelakex/floating-point.json
new file mode 100644
index 000000000000..bcedcd985e84
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/icelakex/floating-point.json
@@ -0,0 +1,95 @@
+[
+ {
+ "BriefDescription": "Counts all microcode FP assists.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc1",
+ "EventName": "ASSISTS.FP",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts all microcode Floating Point assists.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Counts number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "SampleAfterValue": "100003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Counts number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "SampleAfterValue": "100003",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "Counts number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "SampleAfterValue": "100003",
+ "UMask": "0x10"
+ },
+ {
+ "BriefDescription": "Counts number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "SampleAfterValue": "100003",
+ "UMask": "0x20"
+ },
+ {
+ "BriefDescription": "Counts number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "SampleAfterValue": "100003",
+ "UMask": "0x40"
+ },
+ {
+ "BriefDescription": "Counts number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 16 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "SampleAfterValue": "100003",
+ "UMask": "0x80"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/icelakex/frontend.json b/tools/perf/pmu-events/arch/x86/icelakex/frontend.json
new file mode 100644
index 000000000000..cc59cee1cd57
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/icelakex/frontend.json
@@ -0,0 +1,469 @@
+[
+ {
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x79",
+ "EventName": "IDQ.MITE_UOPS",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Cycles MITE is delivering optimal number of Uops",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "CounterMask": "5",
+ "EventCode": "0x79",
+ "EventName": "IDQ.MITE_CYCLES_OK",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Cycles MITE is delivering any Uop",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "CounterMask": "1",
+ "EventCode": "0x79",
+ "EventName": "IDQ.MITE_CYCLES_ANY",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x79",
+ "EventName": "IDQ.DSB_UOPS",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "Cycles DSB is delivering optimal number of Uops",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "CounterMask": "5",
+ "EventCode": "0x79",
+ "EventName": "IDQ.DSB_CYCLES_OK",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "CounterMask": "1",
+ "EventCode": "0x79",
+ "EventName": "IDQ.DSB_CYCLES_ANY",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "Number of switches from DSB or MITE to the MS",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "CounterMask": "1",
+ "EdgeDetect": "1",
+ "EventCode": "0x79",
+ "EventName": "IDQ.MS_SWITCHES",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x30"
+ },
+ {
+ "BriefDescription": "Uops delivered to IDQ while MS is busy",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x79",
+ "EventName": "IDQ.MS_UOPS",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS). Any instruction over 4 uops will be delivered by the MS. Some instructions such as transcendentals may additionally generate uops from the MS.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x30"
+ },
+ {
+ "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x80",
+ "EventName": "ICACHE_16B.IFDATA_STALL",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity.",
+ "SampleAfterValue": "500009",
+ "Speculative": "1",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Instruction fetch tag lookups that hit in the instruction cache (L1I). Counts at 64-byte cache-line granularity.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x83",
+ "EventName": "ICACHE_64B.IFTAG_HIT",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts instruction fetch tag lookups that hit in the instruction cache (L1I). Counts at 64-byte cache-line granularity. Accounts for both cacheable and uncacheable accesses.",
+ "SampleAfterValue": "200003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Instruction fetch tag lookups that miss in the instruction cache (L1I). Counts at 64-byte cache-line granularity.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x83",
+ "EventName": "ICACHE_64B.IFTAG_MISS",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts instruction fetch tag lookups that miss in the instruction cache (L1I). Counts at 64-byte cache-line granularity. Accounts for both cacheable and uncacheable accesses.",
+ "SampleAfterValue": "200003",
+ "Speculative": "1",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x83",
+ "EventName": "ICACHE_64B.IFTAG_STALL",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
+ "SampleAfterValue": "200003",
+ "Speculative": "1",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Uops not delivered by IDQ when backend of the machine is not stalled",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x9c",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts the number of uops not delivered to by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle.",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Cycles when no uops are not delivered by the IDQ when backend of the machine is not stalled",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "CounterMask": "5",
+ "EventCode": "0x9c",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts the number of cycles when no uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle.",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Cycles when optimal number of uops was delivered to the back-end when the back-end is not stalled",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0x9C",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
+ "Invert": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts the number of cycles when the optimal number of uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle.",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "DSB-to-MITE switch true penalty cycles.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xab",
+ "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Decode Stream Buffer (DSB) is a Uop-cache that holds translations of previously fetched instructions that were decoded by the legacy x86 decode pipeline (MITE). This event counts fetch penalty cycles when a transition occurs from DSB to MITE.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE transitions count.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "CounterMask": "1",
+ "EdgeDetect": "1",
+ "EventCode": "0xab",
+ "EventName": "DSB2MITE_SWITCHES.COUNT",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of Decode Stream Buffer (DSB a.k.a. Uop Cache)-to-MITE speculative transitions.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Retired Instructions who experienced DSB miss.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.DSB_MISS",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x11",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.",
+ "SampleAfterValue": "100007",
+ "TakenAlone": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.L1I_MISS",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x12",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts retired Instructions who experienced Instruction L1 Cache true miss.",
+ "SampleAfterValue": "100007",
+ "TakenAlone": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.L2_MISS",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x13",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts retired Instructions who experienced Instruction L2 Cache true miss.",
+ "SampleAfterValue": "100007",
+ "TakenAlone": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Retired Instructions who experienced iTLB true miss.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.ITLB_MISS",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x14",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss.",
+ "SampleAfterValue": "100007",
+ "TakenAlone": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.STLB_MISS",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x15",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss.",
+ "SampleAfterValue": "100007",
+ "TakenAlone": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Retired instructions after front-end starvation of at least 2 cycles",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_2",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x500206",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of at least 2 cycles which was not interrupted by a back-end stall.",
+ "SampleAfterValue": "100007",
+ "TakenAlone": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_4",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x500406",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall.",
+ "SampleAfterValue": "100007",
+ "TakenAlone": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 8 cycles which was not interrupted by a back-end stall.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_8",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x500806",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops.",
+ "SampleAfterValue": "100007",
+ "TakenAlone": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_16",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x501006",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.",
+ "SampleAfterValue": "100007",
+ "TakenAlone": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_32",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x502006",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.",
+ "SampleAfterValue": "100007",
+ "TakenAlone": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_64",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x504006",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall.",
+ "SampleAfterValue": "100007",
+ "TakenAlone": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_128",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x508006",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall.",
+ "SampleAfterValue": "100007",
+ "TakenAlone": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_256",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x510006",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall.",
+ "SampleAfterValue": "100007",
+ "TakenAlone": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_512",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x520006",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall.",
+ "SampleAfterValue": "100007",
+ "TakenAlone": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x100206",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall.",
+ "SampleAfterValue": "100007",
+ "TakenAlone": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Retired instructions after front-end starvation of at least 1 cycle",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_1",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x500106",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of at least 1 cycle which was not interrupted by a back-end stall.",
+ "SampleAfterValue": "100007",
+ "TakenAlone": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xe6",
+ "EventName": "BACLEARS.ANY",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of times the front-end is resteered when it finds a branch instruction in a fetch line. This occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/icelakex/memory.json b/tools/perf/pmu-events/arch/x86/icelakex/memory.json
new file mode 100644
index 000000000000..d319d448e2aa
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/icelakex/memory.json
@@ -0,0 +1,291 @@
+[
+ {
+ "BriefDescription": "Number of times a transactional abort was signaled due to a data conflict on a transactionally accessed address",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x54",
+ "EventName": "TX_MEM.ABORT_CONFLICT",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of times a TSX line had a cache conflict.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Speculatively counts the number of TSX aborts due to a data capacity limitation for transactional writes.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x54",
+ "EventName": "TX_MEM.ABORT_CAPACITY_WRITE",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Speculatively counts the number of Transactional Synchronization Extensions (TSX) aborts due to a data capacity limitation for transactional writes.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Speculatively counts the number of TSX aborts due to a data capacity limitation for transactional reads",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x54",
+ "EventName": "TX_MEM.ABORT_CAPACITY_READ",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Speculatively counts the number of Transactional Synchronization Extensions (TSX) aborts due to a data capacity limitation for transactional reads",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x80"
+ },
+ {
+ "BriefDescription": "Counts the number of times a class of instructions that may cause a transactional abort was executed inside a transactional region",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x5d",
+ "EventName": "TX_EXEC.MISC2",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts Unfriendly TSX abort triggered by a vzeroupper instruction.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Number of times an instruction execution caused the transactional nest count supported to be exceeded",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x5d",
+ "EventName": "TX_EXEC.MISC3",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts Unfriendly TSX abort triggered by a nest count that is too deep.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Execution stalls while L3 cache miss demand load is outstanding.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "CounterMask": "6",
+ "EventCode": "0xa3",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS",
+ "PEBScounters": "0,1,2,3",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x6"
+ },
+ {
+ "BriefDescription": "Number of machine clears due to memory ordering conflicts.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc3",
+ "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts the number of Machine Clears detected dye to memory ordering. Memory Ordering Machine Clears may apply when a memory read may not conform to the memory ordering rules of the x86 architecture",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Number of times an RTM execution started.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc9",
+ "EventName": "RTM_RETIRED.START",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts the number of times we entered an RTM region. Does not count nested transactions.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Number of times an RTM execution successfully committed",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc9",
+ "EventName": "RTM_RETIRED.COMMIT",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts the number of times RTM commit succeeded.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Number of times an RTM execution aborted.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc9",
+ "EventName": "RTM_RETIRED.ABORTED",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts the number of times RTM abort was triggered.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts)",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc9",
+ "EventName": "RTM_RETIRED.ABORTED_MEM",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts the number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts).",
+ "SampleAfterValue": "100003",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc9",
+ "EventName": "RTM_RETIRED.ABORTED_UNFRIENDLY",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts the number of times an RTM execution aborted due to HLE-unfriendly instructions.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x20"
+ },
+ {
+ "BriefDescription": "Number of times an RTM execution aborted due to incompatible memory type",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc9",
+ "EventName": "RTM_RETIRED.ABORTED_MEMTYPE",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts the number of times an RTM execution aborted due to incompatible memory type.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x40"
+ },
+ {
+ "BriefDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt)",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc9",
+ "EventName": "RTM_RETIRED.ABORTED_EVENTS",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts the number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt).",
+ "SampleAfterValue": "100003",
+ "UMask": "0x80"
+ },
+ {
+ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x4",
+ "PEBS": "2",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles. Reported latency may be longer than just the memory latency.",
+ "SampleAfterValue": "100003",
+ "TakenAlone": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x8",
+ "PEBS": "2",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles. Reported latency may be longer than just the memory latency.",
+ "SampleAfterValue": "50021",
+ "TakenAlone": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x10",
+ "PEBS": "2",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles. Reported latency may be longer than just the memory latency.",
+ "SampleAfterValue": "20011",
+ "TakenAlone": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x20",
+ "PEBS": "2",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles. Reported latency may be longer than just the memory latency.",
+ "SampleAfterValue": "100007",
+ "TakenAlone": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x40",
+ "PEBS": "2",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles. Reported latency may be longer than just the memory latency.",
+ "SampleAfterValue": "2003",
+ "TakenAlone": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x80",
+ "PEBS": "2",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles. Reported latency may be longer than just the memory latency.",
+ "SampleAfterValue": "1009",
+ "TakenAlone": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x100",
+ "PEBS": "2",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles. Reported latency may be longer than just the memory latency.",
+ "SampleAfterValue": "503",
+ "TakenAlone": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x200",
+ "PEBS": "2",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles. Reported latency may be longer than just the memory latency.",
+ "SampleAfterValue": "101",
+ "TakenAlone": "1",
+ "UMask": "0x1"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/icelakex/other.json b/tools/perf/pmu-events/arch/x86/icelakex/other.json
new file mode 100644
index 000000000000..ef50d3a3392e
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/icelakex/other.json
@@ -0,0 +1,181 @@
+[
+ {
+ "BriefDescription": "TMA slots available for an unhalted logical processor. Fixed counter - architectural event",
+ "CollectPEBSRecord": "2",
+ "Counter": "35",
+ "EventName": "TOPDOWN.SLOTS",
+ "PEBScounters": "35",
+ "PublicDescription": "Number of available slots for an unhalted logical processor. The event increments by machine-width of the narrowest pipeline as employed by the Top-down Microarchitecture Analysis method (TMA). The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core. Software can use this event as the denominator for the top-level metrics of the TMA method. This architectural event is counted on a designated fixed counter (Fixed Counter 3).",
+ "SampleAfterValue": "10000003",
+ "Speculative": "1",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the Non-AVX turbo schedule.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x28",
+ "EventName": "CORE_POWER.LVL0_TURBO_LICENSE",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts Core cycles where the core was running with power-delivery for baseline license level 0. This includes non-AVX codes, SSE, AVX 128-bit, and low-current AVX 256-bit codes.",
+ "SampleAfterValue": "200003",
+ "Speculative": "1",
+ "UMask": "0x7"
+ },
+ {
+ "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX2 turbo schedule.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x28",
+ "EventName": "CORE_POWER.LVL1_TURBO_LICENSE",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts Core cycles where the core was running with power-delivery for license level 1. This includes high current AVX 256-bit instructions as well as low current AVX 512-bit instructions.",
+ "SampleAfterValue": "200003",
+ "Speculative": "1",
+ "UMask": "0x18"
+ },
+ {
+ "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX512 turbo schedule.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x28",
+ "EventName": "CORE_POWER.LVL2_TURBO_LICENSE",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Core cycles where the core was running with power-delivery for license level 2 (introduced in Skylake Server microarchtecture). This includes high current AVX 512-bit instructions.",
+ "SampleAfterValue": "200003",
+ "Speculative": "1",
+ "UMask": "0x20"
+ },
+ {
+ "BriefDescription": "Number of PREFETCHNTA instructions executed.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x32",
+ "EventName": "SW_PREFETCH_ACCESS.NTA",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of PREFETCHNTA instructions executed.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Number of PREFETCHT0 instructions executed.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x32",
+ "EventName": "SW_PREFETCH_ACCESS.T0",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of PREFETCHT0 instructions executed.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Number of PREFETCHT1 or PREFETCHT2 instructions executed.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x32",
+ "EventName": "SW_PREFETCH_ACCESS.T1_T2",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of PREFETCHT1 or PREFETCHT2 instructions executed.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Number of PREFETCHW instructions executed.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x32",
+ "EventName": "SW_PREFETCH_ACCESS.PREFETCHW",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of PREFETCHW instructions executed.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "TMA slots available for an unhalted logical processor. General counter - architectural event",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xa4",
+ "EventName": "TOPDOWN.SLOTS_P",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts the number of available slots for an unhalted logical processor. The event increments by machine-width of the narrowest pipeline as employed by the Top-down Microarchitecture Analysis method. The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core.",
+ "SampleAfterValue": "10000003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "TMA slots where no uops were being issued due to lack of back-end resources.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xa4",
+ "EventName": "TOPDOWN.BACKEND_BOUND_SLOTS",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts the number of Top-down Microarchitecture Analysis (TMA) method's slots where no micro-operations were being issued from front-end to back-end of the machine due to lack of back-end resources.",
+ "SampleAfterValue": "10000003",
+ "Speculative": "1",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Number of occurrences where a microcode assist is invoked by hardware.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc1",
+ "EventName": "ASSISTS.ANY",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts the number of occurrences where a microcode assist is invoked by hardware Examples include AD (page Access Dirty), FP and AVX related assists.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x7"
+ },
+ {
+ "BriefDescription": "Counts demand data reads that hit a cacheline in the L3 where a snoop hit in another cores caches, data forwarding is required as the data is modified.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10003C0001",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand data reads that hit a cacheline in the L3 where a snoop hit in another cores caches which forwarded the unmodified data to the requesting core.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x8003C0001",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts writes that generate a demand reads for ownership (RFO) request and software prefetches for exclusive ownership (PREFETCHW) that hit a cacheline in the L3 where a snoop hit in another cores caches, data forwarding is required as the data is modified.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10003C0002",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts streaming stores that have any type of response.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.STREAMING_WR.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10800",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/icelakex/pipeline.json b/tools/perf/pmu-events/arch/x86/icelakex/pipeline.json
new file mode 100644
index 000000000000..3cc71244e699
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/icelakex/pipeline.json
@@ -0,0 +1,972 @@
+[
+ {
+ "BriefDescription": "Number of instructions retired. Fixed Counter - architectural event",
+ "CollectPEBSRecord": "2",
+ "Counter": "32",
+ "EventName": "INST_RETIRED.ANY",
+ "PEBS": "1",
+ "PEBScounters": "32",
+ "PublicDescription": "Counts the number of instructions retired - an Architectural PerfMon event. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter freeing up programmable counters to count other events. INST_RETIRED.ANY_P is counted by a programmable counter.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Precise instruction retired event with a reduced effect of PEBS shadow in IP distribution",
+ "CollectPEBSRecord": "2",
+ "Counter": "32",
+ "EventName": "INST_RETIRED.PREC_DIST",
+ "PEBS": "1",
+ "PEBScounters": "32",
+ "PublicDescription": "A version of INST_RETIRED that allows for a more unbiased distribution of samples across instructions retired. It utilizes the Precise Distribution of Instructions Retired (PDIR) feature to mitigate some bias in how retired instructions get sampled. Use on Fixed Counter 0.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Core cycles when the thread is not in halt state",
+ "CollectPEBSRecord": "2",
+ "Counter": "33",
+ "EventName": "CPU_CLK_UNHALTED.THREAD",
+ "PEBScounters": "33",
+ "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the eight programmable counters available for other events.",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Reference cycles when the core is not in halt state.",
+ "CollectPEBSRecord": "2",
+ "Counter": "34",
+ "EventName": "CPU_CLK_UNHALTED.REF_TSC",
+ "PEBScounters": "34",
+ "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the eight programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x3"
+ },
+ {
+ "BriefDescription": "Loads blocked due to overlapping with a preceding store that cannot be forwarded.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x03",
+ "EventName": "LD_BLOCKS.STORE_FORWARD",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of times where store forwarding was prevented for a load operation. The most common case is a load blocked due to the address of memory access (partially) overlapping with a preceding uncompleted store. Note: See the table of not supported store forwards in the Optimization Guide.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x03",
+ "EventName": "LD_BLOCKS.NO_SR",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "False dependencies due to partial compare on address.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x07",
+ "EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of times a load got blocked due to false dependencies due to partial compare on address.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x0D",
+ "EventName": "INT_MISC.RECOVERY_CYCLES",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts core cycles when the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event.",
+ "SampleAfterValue": "500009",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Cycles the Backend cluster is recovering after a miss-speculation or a Store Buffer or Load Buffer drain stall.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0x0D",
+ "EventName": "INT_MISC.ALL_RECOVERY_CYCLES",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts cycles the Backend cluster is recovering after a miss-speculation or a Store Buffer or Load Buffer drain stall.",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x3"
+ },
+ {
+ "BriefDescription": "TMA slots where uops got dropped",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x0d",
+ "EventName": "INT_MISC.UOP_DROPPING",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Estimated number of Top-down Microarchitecture Analysis slots that got dropped due to non front-end reasons",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x10"
+ },
+ {
+ "BriefDescription": "Counts cycles after recovery from a branch misprediction or machine clear till the first uop is issued from the resteered path.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x0d",
+ "EventName": "INT_MISC.CLEAR_RESTEER_CYCLES",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Cycles after recovery from a branch misprediction or machine clear till the first uop is issued from the resteered path.",
+ "SampleAfterValue": "500009",
+ "Speculative": "1",
+ "UMask": "0x80"
+ },
+ {
+ "BriefDescription": "Uops that RAT issues to RS",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x0e",
+ "EventName": "UOPS_ISSUED.ANY",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Cycles when RAT does not issue Uops to RS for the thread",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0x0E",
+ "EventName": "UOPS_ISSUED.STALL_CYCLES",
+ "Invert": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Uops inserted at issue-stage in order to preserve upper bits of vector registers.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x0e",
+ "EventName": "UOPS_ISSUED.VECTOR_WIDTH_MISMATCH",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts the number of Blend Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS) in order to preserve upper bits of vector registers. Starting with the Skylake microarchitecture, these Blend uops are needed since every Intel SSE instruction executed in Dirty Upper State needs to preserve bits 128-255 of the destination register. For more information, refer to Mixing Intel AVX and Intel SSE Code section of the Optimization Guide.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Cycles when divide unit is busy executing divide or square root operations.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0x14",
+ "EventName": "ARITH.DIVIDER_ACTIVE",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x9"
+ },
+ {
+ "BriefDescription": "Thread cycles when thread is not in halt state",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x3C",
+ "EventName": "CPU_CLK_UNHALTED.THREAD_P",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1"
+ },
+ {
+ "BriefDescription": "Core crystal clock cycles when the thread is unhalted.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x3C",
+ "EventName": "CPU_CLK_UNHALTED.REF_XCLK",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts core crystal clock cycles when the thread is unhalted.",
+ "SampleAfterValue": "25003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x3C",
+ "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts Core crystal clock cycles when current thread is unhalted and the other thread is halted.",
+ "SampleAfterValue": "25003",
+ "Speculative": "1",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Core crystal clock cycles. Cycle counts are evenly distributed between active threads in the Core.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x3c",
+ "EventName": "CPU_CLK_UNHALTED.REF_DISTRIBUTED",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "This event distributes Core crystal clock cycle counts between active hyperthreads, i.e., those in C0 sleep-state. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If one thread is active in a core, all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread.",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "Counts the number of demand load dispatches that hit L1D fill buffer (FB) allocated for software prefetch.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x4c",
+ "EventName": "LOAD_HIT_PREFETCH.SWPF",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x5e",
+ "EventName": "RS_EVENTS.EMPTY_CYCLES",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for this logical processor. This is usually caused when the front-end pipeline runs into stravation periods (e.g. branch mispredictions or i-cache misses)",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EdgeDetect": "1",
+ "EventCode": "0x5E",
+ "EventName": "RS_EVENTS.EMPTY_END",
+ "Invert": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to closely sample on front-end latency issues (see the FRONTEND_RETIRED event of designated precise events)",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Stalls caused by changing prefix length of the instruction.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x87",
+ "EventName": "ILD_STALL.LCP",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.",
+ "SampleAfterValue": "500009",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Number of uops executed on port 0",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xa1",
+ "EventName": "UOPS_DISPATCHED.PORT_0",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 0.",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Number of uops executed on port 1",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xa1",
+ "EventName": "UOPS_DISPATCHED.PORT_1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 1.",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Number of uops executed on port 2 and 3",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xa1",
+ "EventName": "UOPS_DISPATCHED.PORT_2_3",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to ports 2 and 3.",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Number of uops executed on port 4 and 9",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xa1",
+ "EventName": "UOPS_DISPATCHED.PORT_4_9",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to ports 5 and 9.",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x10"
+ },
+ {
+ "BriefDescription": "Number of uops executed on port 5",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xa1",
+ "EventName": "UOPS_DISPATCHED.PORT_5",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 5.",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x20"
+ },
+ {
+ "BriefDescription": "Number of uops executed on port 6",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xa1",
+ "EventName": "UOPS_DISPATCHED.PORT_6",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 6.",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x40"
+ },
+ {
+ "BriefDescription": "Number of uops executed on port 7 and 8",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xa1",
+ "EventName": "UOPS_DISPATCHED.PORT_7_8",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to ports 7 and 8.",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x80"
+ },
+ {
+ "BriefDescription": "Counts cycles where the pipeline is stalled due to serializing operations.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xa2",
+ "EventName": "RESOURCE_STALLS.SCOREBOARD",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xa2",
+ "EventName": "RESOURCE_STALLS.SB",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "CounterMask": "1",
+ "EventCode": "0xA3",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS",
+ "PEBScounters": "0,1,2,3",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Total execution stalls.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "CounterMask": "4",
+ "EventCode": "0xa3",
+ "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "CounterMask": "5",
+ "EventCode": "0xa3",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS",
+ "PEBScounters": "0,1,2,3",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x5"
+ },
+ {
+ "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "CounterMask": "8",
+ "EventCode": "0xA3",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS",
+ "PEBScounters": "0,1,2,3",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "CounterMask": "12",
+ "EventCode": "0xA3",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS",
+ "PEBScounters": "0,1,2,3",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0xc"
+ },
+ {
+ "BriefDescription": "Cycles while memory subsystem has an outstanding load.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "CounterMask": "16",
+ "EventCode": "0xA3",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x10"
+ },
+ {
+ "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "CounterMask": "20",
+ "EventCode": "0xa3",
+ "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x14"
+ },
+ {
+ "BriefDescription": "Cycles total of 1 uop is executed on all ports and Reservation Station was not empty.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xa6",
+ "EventName": "EXE_ACTIVITY.1_PORTS_UTIL",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty.",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Cycles total of 2 uops are executed on all ports and Reservation Station was not empty.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xa6",
+ "EventName": "EXE_ACTIVITY.2_PORTS_UTIL",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty.",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station was not empty.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xa6",
+ "EventName": "EXE_ACTIVITY.3_PORTS_UTIL",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty.",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station was not empty.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xa6",
+ "EventName": "EXE_ACTIVITY.4_PORTS_UTIL",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty.",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x10"
+ },
+ {
+ "BriefDescription": "Cycles where the Store Buffer was full and no loads caused an execution stall.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "CounterMask": "2",
+ "EventCode": "0xA6",
+ "EventName": "EXE_ACTIVITY.BOUND_ON_STORES",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts cycles where the Store Buffer was full and no loads caused an execution stall.",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x40"
+ },
+ {
+ "BriefDescription": "Number of Uops delivered by the LSD.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xa8",
+ "EventName": "LSD.UOPS",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of uops delivered to the back-end by the LSD(Loop Stream Detector).",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "CounterMask": "1",
+ "EventCode": "0xA8",
+ "EventName": "LSD.CYCLES_ACTIVE",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector).",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Cycles optimal number of Uops delivered by the LSD, but did not come from the decoder.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "CounterMask": "5",
+ "EventCode": "0xa8",
+ "EventName": "LSD.CYCLES_OK",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the cycles when optimal number of uops is delivered by the LSD (Loop-stream detector).",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xb1",
+ "EventName": "UOPS_EXECUTED.THREAD",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0xB1",
+ "EventName": "UOPS_EXECUTED.STALL_CYCLES",
+ "Invert": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Cycles where at least 1 uop was executed per-thread",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0xb1",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Cycles where at least 1 uop was executed per-thread.",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Cycles where at least 2 uops were executed per-thread",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "CounterMask": "2",
+ "EventCode": "0xb1",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_2",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Cycles where at least 2 uops were executed per-thread.",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Cycles where at least 3 uops were executed per-thread",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "CounterMask": "3",
+ "EventCode": "0xb1",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_3",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Cycles where at least 3 uops were executed per-thread.",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Cycles where at least 4 uops were executed per-thread",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "CounterMask": "4",
+ "EventCode": "0xb1",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_4",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Cycles where at least 4 uops were executed per-thread.",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0xB1",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts cycles when at least 1 micro-op is executed from any thread on physical core.",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "CounterMask": "2",
+ "EventCode": "0xB1",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts cycles when at least 2 micro-ops are executed from any thread on physical core.",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "CounterMask": "3",
+ "EventCode": "0xB1",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts cycles when at least 3 micro-ops are executed from any thread on physical core.",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "CounterMask": "4",
+ "EventCode": "0xB1",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts cycles when at least 4 micro-ops are executed from any thread on physical core.",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Counts the number of x87 uops dispatched.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xB1",
+ "EventName": "UOPS_EXECUTED.X87",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts the number of x87 uops executed.",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x10"
+ },
+ {
+ "BriefDescription": "Number of instructions retired. General Counter - architectural event",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc0",
+ "EventName": "INST_RETIRED.ANY_P",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts the number of instructions retired - an Architectural PerfMon event. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter freeing up programmable counters to count other events. INST_RETIRED.ANY_P is counted by a programmable counter.",
+ "SampleAfterValue": "2000003"
+ },
+ {
+ "BriefDescription": "Cycles with less than 10 actually retired uops.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "CounterMask": "10",
+ "EventCode": "0xc2",
+ "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
+ "Invert": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts the number of cycles using always true condition (uops_ret &amp;lt; 16) applied to non PEBS uops retired event.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Retirement slots used.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc2",
+ "EventName": "UOPS_RETIRED.SLOTS",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts the retirement slots used each cycle.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Number of machine clears (nukes) of any type.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EdgeDetect": "1",
+ "EventCode": "0xc3",
+ "EventName": "MACHINE_CLEARS.COUNT",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts the number of machine clears (nukes) of any type.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Self-modifying code (SMC) detected.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc3",
+ "EventName": "MACHINE_CLEARS.SMC",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "All branch instructions retired.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts all branch instructions retired.",
+ "SampleAfterValue": "400009"
+ },
+ {
+ "BriefDescription": "Taken conditional branch instructions retired.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.COND_TAKEN",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts taken conditional branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Direct and indirect near call instructions retired.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.NEAR_CALL",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts both direct and indirect near call instructions retired.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Return instructions retired.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.NEAR_RETURN",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts return instructions retired.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "Not taken branch instructions retired.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.COND_NTAKEN",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts not taken branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "UMask": "0x10"
+ },
+ {
+ "BriefDescription": "Conditional branch instructions retired.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.COND",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts conditional branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "UMask": "0x11"
+ },
+ {
+ "BriefDescription": "Taken branch instructions retired.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts taken branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "UMask": "0x20"
+ },
+ {
+ "BriefDescription": "Far branch instructions retired.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts far branch instructions retired.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x40"
+ },
+ {
+ "BriefDescription": "All indirect branch instructions retired (excluding RETs. TSX aborts are considered indirect branch).",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.INDIRECT",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts all indirect branch instructions retired (excluding RETs. TSX aborts is considered indirect branch).",
+ "SampleAfterValue": "100003",
+ "UMask": "0x80"
+ },
+ {
+ "BriefDescription": "All mispredicted branch instructions retired.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch. When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.",
+ "SampleAfterValue": "50021"
+ },
+ {
+ "BriefDescription": "number of branch instructions retired that were mispredicted and taken. Non PEBS",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.COND_TAKEN",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts taken conditional mispredicted branch instructions retired.",
+ "SampleAfterValue": "50021",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Mispredicted non-taken conditional branch instructions retired.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.COND_NTAKEN",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts the number of conditional branch instructions retired that were mispredicted and the branch direction was not taken.",
+ "SampleAfterValue": "50021",
+ "UMask": "0x10"
+ },
+ {
+ "BriefDescription": "Mispredicted conditional branch instructions retired.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.COND",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts mispredicted conditional branch instructions retired.",
+ "SampleAfterValue": "50021",
+ "UMask": "0x11"
+ },
+ {
+ "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts number of near branch instructions retired that were mispredicted and taken.",
+ "SampleAfterValue": "50021",
+ "UMask": "0x20"
+ },
+ {
+ "BriefDescription": "All miss-predicted indirect branch instructions retired (excluding RETs. TSX aborts is considered indirect branch).",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.INDIRECT",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts all miss-predicted indirect branch instructions retired (excluding RETs. TSX aborts is considered indirect branch).",
+ "SampleAfterValue": "50021",
+ "UMask": "0x80"
+ },
+ {
+ "BriefDescription": "Number of retired PAUSE instructions. This event is not supported on first SKL and KBL products.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xcc",
+ "EventName": "MISC_RETIRED.PAUSE_INST",
+ "PublicDescription": "Counts number of retired PAUSE instructions. This event is not supported on first SKL and KBL products.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x40"
+ },
+ {
+ "BriefDescription": "Cycle counts are evenly distributed between active threads in the Core.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xec",
+ "EventName": "CPU_CLK_UNHALTED.DISTRIBUTED",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "This event distributes cycle counts between active hyperthreads, i.e., those in C0. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If all other hyperthreads are inactive (or disabled or do not exist), all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread.",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x2"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/icelakex/uncore-memory.json b/tools/perf/pmu-events/arch/x86/icelakex/uncore-memory.json
new file mode 100644
index 000000000000..5f0d2c462940
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/icelakex/uncore-memory.json
@@ -0,0 +1,333 @@
+[
+ {
+ "BriefDescription": "2LM Tag Check : Hit in Near Memory Cache",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xD3",
+ "EventName": "UNC_M_TAGCHK.HIT",
+ "PerPkg": "1",
+ "UMask": "0x01",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "2LM Tag Check : Miss, no data in this line",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xD3",
+ "EventName": "UNC_M_TAGCHK.MISS_CLEAN",
+ "PerPkg": "1",
+ "UMask": "0x02",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "2LM Tag Check : Miss, existing data may be evicted to Far Memory",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xD3",
+ "EventName": "UNC_M_TAGCHK.MISS_DIRTY",
+ "PerPkg": "1",
+ "UMask": "0x04",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "2LM Tag Check : Read Hit in Near Memory Cache",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xD3",
+ "EventName": "UNC_M_TAGCHK.NM_RD_HIT",
+ "PerPkg": "1",
+ "UMask": "0x08",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "2LM Tag Check : Write Hit in Near Memory Cache",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xD3",
+ "EventName": "UNC_M_TAGCHK.NM_WR_HIT",
+ "PerPkg": "1",
+ "UMask": "0x10",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "DRAM Precharge commands. : Precharge due to read",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x02",
+ "EventName": "UNC_M_PRE_COUNT.RD",
+ "PerPkg": "1",
+ "UMask": "0x04",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "DRAM Precharge commands. : Precharge due to write",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x02",
+ "EventName": "UNC_M_PRE_COUNT.WR",
+ "PerPkg": "1",
+ "UMask": "0x08",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "All DRAM read CAS commands issued (including underfills)",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x04",
+ "EventName": "UNC_M_CAS_COUNT.RD",
+ "PerPkg": "1",
+ "UMask": "0x0f",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "All DRAM write CAS commands issued",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x04",
+ "EventName": "UNC_M_CAS_COUNT.WR",
+ "PerPkg": "1",
+ "UMask": "0x30",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "All DRAM CAS commands issued",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x04",
+ "EventName": "UNC_M_CAS_COUNT.ALL",
+ "PerPkg": "1",
+ "UMask": "0x3f",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Number of DRAM Refreshes Issued",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x45",
+ "EventName": "UNC_M_DRAM_REFRESH.OPPORTUNISTIC",
+ "PerPkg": "1",
+ "UMask": "0x01",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Number of DRAM Refreshes Issued",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x45",
+ "EventName": "UNC_M_DRAM_REFRESH.PANIC",
+ "PerPkg": "1",
+ "UMask": "0x02",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Number of DRAM Refreshes Issued",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x45",
+ "EventName": "UNC_M_DRAM_REFRESH.HIGH",
+ "PerPkg": "1",
+ "UMask": "0x04",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Read Pending Queue Allocations",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x10",
+ "EventName": "UNC_M_RPQ_INSERTS.PCH0",
+ "PerPkg": "1",
+ "UMask": "0x01",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Read Pending Queue Allocations",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x10",
+ "EventName": "UNC_M_RPQ_INSERTS.PCH1",
+ "PerPkg": "1",
+ "UMask": "0x02",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Write Pending Queue Allocations",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x20",
+ "EventName": "UNC_M_WPQ_INSERTS.PCH0",
+ "PerPkg": "1",
+ "UMask": "0x01",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Write Pending Queue Allocations",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x20",
+ "EventName": "UNC_M_WPQ_INSERTS.PCH1",
+ "PerPkg": "1",
+ "UMask": "0x02",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "DRAM Precharge commands. : Precharge due to page table",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x02",
+ "EventName": "UNC_M_PRE_COUNT.PGT",
+ "PerPkg": "1",
+ "UMask": "0x10",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "DRAM Clockticks",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventName": "UNC_M_CLOCKTICKS",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Half clockticks for IMC",
+ "Counter": "FIXED",
+ "CounterType": "FIXED",
+ "EventCode": "0xff",
+ "EventName": "UNC_M_HCLOCKTICKS",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Read Pending Queue Occupancy",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x80",
+ "EventName": "UNC_M_RPQ_OCCUPANCY_PCH0",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Read Pending Queue Occupancy",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x81",
+ "EventName": "UNC_M_RPQ_OCCUPANCY_PCH1",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Write Pending Queue Occupancy",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x82",
+ "EventName": "UNC_M_WPQ_OCCUPANCY_PCH0",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Write Pending Queue Occupancy",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x83",
+ "EventName": "UNC_M_WPQ_OCCUPANCY_PCH1",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "DRAM Activate Count : All Activates",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x01",
+ "EventName": "UNC_M_ACT_COUNT.ALL",
+ "PerPkg": "1",
+ "UMask": "0x0B",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "DRAM Precharge commands",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x02",
+ "EventName": "UNC_M_PRE_COUNT.ALL",
+ "PerPkg": "1",
+ "UMask": "0x1C",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "PMM Read Pending Queue Occupancy",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xE0",
+ "EventName": "UNC_M_PMM_RPQ_OCCUPANCY.ALL",
+ "PerPkg": "1",
+ "UMask": "0x01",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "PMM Read Queue Inserts",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xE3",
+ "EventName": "UNC_M_PMM_RPQ_INSERTS",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "PMM Write Queue Inserts",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xE7",
+ "EventName": "UNC_M_PMM_WPQ_INSERTS",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "PMM Commands : All",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xEA",
+ "EventName": "UNC_M_PMM_CMD1.ALL",
+ "PerPkg": "1",
+ "UMask": "0x01",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "PMM Commands : Reads - RPQ",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xEA",
+ "EventName": "UNC_M_PMM_CMD1.RD",
+ "PerPkg": "1",
+ "UMask": "0x02",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "PMM Commands : Writes",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xEA",
+ "EventName": "UNC_M_PMM_CMD1.WR",
+ "PerPkg": "1",
+ "UMask": "0x04",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "PMM Commands : Underfill reads",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xEA",
+ "EventName": "UNC_M_PMM_CMD1.UFILL_RD",
+ "PerPkg": "1",
+ "UMask": "0x08",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "PMM Write Pending Queue Occupancy",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xE4",
+ "EventName": "UNC_M_PMM_WPQ_OCCUPANCY.ALL",
+ "PerPkg": "1",
+ "UMask": "0x01",
+ "Unit": "iMC"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/icelakex/uncore-other.json b/tools/perf/pmu-events/arch/x86/icelakex/uncore-other.json
new file mode 100644
index 000000000000..52f2301582bb
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/icelakex/uncore-other.json
@@ -0,0 +1,2476 @@
+[
+ {
+ "BriefDescription": "Local INVITOE requests (exclusive ownership of a cache line without receiving data) that miss the SF/LLC and are sent to the CHA's home agent",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x50",
+ "EventName": "UNC_CHA_REQUESTS.INVITOE_LOCAL",
+ "PerPkg": "1",
+ "UMask": "0x10",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "Remote INVITOE requests (exclusive ownership of a cache line without receiving data) sent to the CHA's home agent",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x50",
+ "EventName": "UNC_CHA_REQUESTS.INVITOE_REMOTE",
+ "PerPkg": "1",
+ "UMask": "0x20",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "Local read requests that miss the SF/LLC and are sent to the CHA's home agent",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x50",
+ "EventName": "UNC_CHA_REQUESTS.READS_LOCAL",
+ "PerPkg": "1",
+ "UMask": "0x01",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "Remote read requests sent to the CHA's home agent",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x50",
+ "EventName": "UNC_CHA_REQUESTS.READS_REMOTE",
+ "PerPkg": "1",
+ "UMask": "0x02",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "Local write requests that miss the SF/LLC and are sent to the CHA's home agent",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x50",
+ "EventName": "UNC_CHA_REQUESTS.WRITES_LOCAL",
+ "PerPkg": "1",
+ "UMask": "0x04",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "Remote write requests sent to the CHA's home agent",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x50",
+ "EventName": "UNC_CHA_REQUESTS.WRITES_REMOTE",
+ "PerPkg": "1",
+ "UMask": "0x08",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "Clockticks of the uncore caching &amp;amp; home agent (CHA)",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventName": "UNC_CHA_CLOCKTICKS",
+ "PerPkg": "1",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "Normal priority reads issued to the memory controller from the CHA",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x59",
+ "EventName": "UNC_CHA_IMC_READS_COUNT.NORMAL",
+ "PerPkg": "1",
+ "UMask": "0x01",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "CHA to iMC Full Line Writes Issued : Full Line Non-ISOCH",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x5B",
+ "EventName": "UNC_CHA_IMC_WRITES_COUNT.FULL",
+ "PerPkg": "1",
+ "UMask": "0x01",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "Lines Victimized : All Lines Victimized",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x37",
+ "EventName": "UNC_CHA_LLC_VICTIMS.ALL",
+ "PerPkg": "1",
+ "UMask": "0x0F",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "Local read requests that miss the SF/LLC and remote read requests sent to the CHA's home agent",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x50",
+ "EventName": "UNC_CHA_REQUESTS.READS",
+ "PerPkg": "1",
+ "UMask": "0x03",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "Local write requests that miss the SF/LLC and remote write requests sent to the CHA's home agent",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x50",
+ "EventName": "UNC_CHA_REQUESTS.WRITES",
+ "PerPkg": "1",
+ "UMask": "0x0c",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "Snoop filter capacity evictions for E-state entries",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x3D",
+ "EventName": "UNC_CHA_SF_EVICTION.E_STATE",
+ "PerPkg": "1",
+ "UMask": "0x02",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "Snoop filter capacity evictions for M-state entries",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x3D",
+ "EventName": "UNC_CHA_SF_EVICTION.M_STATE",
+ "PerPkg": "1",
+ "UMask": "0x01",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "Snoop filter capacity evictions for S-state entries",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x3D",
+ "EventName": "UNC_CHA_SF_EVICTION.S_STATE",
+ "PerPkg": "1",
+ "UMask": "0x04",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts : All requests from iA Cores",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA",
+ "PerPkg": "1",
+ "UMask": "0xC001FF01",
+ "UMaskExt": "0xC001FF",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts : All requests from iA Cores that Hit the LLC",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_HIT",
+ "PerPkg": "1",
+ "UMask": "0xC001FD01",
+ "UMaskExt": "0xC001FD",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts : CRds issued by iA Cores that Hit the LLC",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_HIT_CRD",
+ "PerPkg": "1",
+ "UMask": "0xC80FFD01",
+ "UMaskExt": "0xC80FFD",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts : DRds issued by iA Cores that Hit the LLC",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_HIT_DRD",
+ "PerPkg": "1",
+ "UMask": "0xC817FD01",
+ "UMaskExt": "0xC817FD",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts : LLCPrefRFO issued by iA Cores that hit the LLC",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_HIT_LLCPREFRFO",
+ "PerPkg": "1",
+ "UMask": "0xCCC7FD01",
+ "UMaskExt": "0xCCC7FD",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts : RFOs issued by iA Cores that Hit the LLC",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_HIT_RFO",
+ "PerPkg": "1",
+ "UMask": "0xC807FD01",
+ "UMaskExt": "0xC807FD",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts : All requests from iA Cores that Missed the LLC",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS",
+ "PerPkg": "1",
+ "UMask": "0xC001FE01",
+ "UMaskExt": "0xC001FE",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts : CRds issued by iA Cores that Missed the LLC",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_CRD",
+ "PerPkg": "1",
+ "UMask": "0xC80FFE01",
+ "UMaskExt": "0xC80FFE",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts : DRds issued by iA Cores that Missed the LLC",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD",
+ "PerPkg": "1",
+ "UMask": "0xC817FE01",
+ "UMaskExt": "0xC817FE",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts : LLCPrefRFO issued by iA Cores that missed the LLC",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFRFO",
+ "PerPkg": "1",
+ "UMask": "0xCCC7FE01",
+ "UMaskExt": "0xCCC7FE",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts : RFOs issued by iA Cores that Missed the LLC",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_RFO",
+ "PerPkg": "1",
+ "UMask": "0xC807FE01",
+ "UMaskExt": "0xC807FE",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts : All requests from IO Devices",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IO",
+ "PerPkg": "1",
+ "UMask": "0xC001FF04",
+ "UMaskExt": "0xC001FF",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts : All requests from IO Devices that hit the LLC",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IO_HIT",
+ "PerPkg": "1",
+ "UMask": "0xC001FD04",
+ "UMaskExt": "0xC001FD",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts : All requests from IO Devices that missed the LLC",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IO_MISS",
+ "PerPkg": "1",
+ "UMask": "0xC001FE04",
+ "UMaskExt": "0xC001FE",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Occupancy : All requests from iA Cores",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x36",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IA",
+ "PerPkg": "1",
+ "UMask": "0xC001FF01",
+ "UMaskExt": "0xC001FF",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Occupancy : All requests from iA Cores that Hit the LLC",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x36",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_HIT",
+ "PerPkg": "1",
+ "UMask": "0xC001FD01",
+ "UMaskExt": "0xC001FD",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Occupancy : All requests from iA Cores that Missed the LLC",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x36",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS",
+ "PerPkg": "1",
+ "UMask": "0xC001FE01",
+ "UMaskExt": "0xC001FE",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Occupancy : CRds issued by iA Cores that Missed the LLC",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x36",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_CRD",
+ "PerPkg": "1",
+ "UMask": "0xC80FFE01",
+ "UMaskExt": "0xC80FFE",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Occupancy : DRds issued by iA Cores that Missed the LLC",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x36",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD",
+ "PerPkg": "1",
+ "UMask": "0xC817FE01",
+ "UMaskExt": "0xC817FE",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Occupancy : RFOs issued by iA Cores that Missed the LLC",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x36",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_RFO",
+ "PerPkg": "1",
+ "UMask": "0xC807FE01",
+ "UMaskExt": "0xC807FE",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Occupancy : All requests from IO Devices",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x36",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IO",
+ "PerPkg": "1",
+ "UMask": "0xC001FF04",
+ "UMaskExt": "0xC001FF",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Occupancy : All requests from IO Devices that hit the LLC",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x36",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_HIT",
+ "PerPkg": "1",
+ "UMask": "0xC001FD04",
+ "UMaskExt": "0xC001FD",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Occupancy : All requests from IO Devices that missed the LLC",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x36",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_MISS",
+ "PerPkg": "1",
+ "UMask": "0xC001FE04",
+ "UMaskExt": "0xC001FE",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts : ItoMs issued by IO Devices that missed the LLC",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IO_MISS_ITOM",
+ "PerPkg": "1",
+ "UMask": "0xCC43FE04",
+ "UMaskExt": "0xCC43FE",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "CMS Clockticks",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xc0",
+ "EventName": "UNC_CHA_CMS_CLOCKTICKS",
+ "PerPkg": "1",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts : CRd_Prefs issued by iA Cores that hit the LLC",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_HIT_CRD_PREF",
+ "PerPkg": "1",
+ "UMask": "0xC88FFD01",
+ "UMaskExt": "0xC88FFD",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts : DRd_Prefs issued by iA Cores that Hit the LLC",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_HIT_DRD_PREF",
+ "PerPkg": "1",
+ "UMask": "0xC897FD01",
+ "UMaskExt": "0xC897FD",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts : RFO_Prefs issued by iA Cores that Hit the LLC",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_HIT_RFO_PREF",
+ "PerPkg": "1",
+ "UMask": "0xC887FD01",
+ "UMaskExt": "0xC887FD",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts : CRd_Prefs issued by iA Cores that Missed the LLC",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF",
+ "PerPkg": "1",
+ "UMask": "0xC88FFE01",
+ "UMaskExt": "0xC88FFE",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts : DRd_Prefs issued by iA Cores that Missed the LLC",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF",
+ "PerPkg": "1",
+ "UMask": "0xC897FE01",
+ "UMaskExt": "0xC897FE",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts : RFO_Prefs issued by iA Cores that Missed the LLC",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_RFO_PREF",
+ "PerPkg": "1",
+ "UMask": "0xC887FE01",
+ "UMaskExt": "0xC887FE",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts : ItoMs issued by IO Devices that Hit the LLC",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IO_HIT_ITOM",
+ "PerPkg": "1",
+ "UMask": "0xCC43FD04",
+ "UMaskExt": "0xCC43FD",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts : ItoMs issued by IO Devices",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IO_ITOM",
+ "PerPkg": "1",
+ "UMask": "0xCC43FF04",
+ "UMaskExt": "0xCC43FF",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts : RFO_Prefs issued by iA Cores",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_RFO_PREF",
+ "PerPkg": "1",
+ "UMask": "0xC887FF01",
+ "UMaskExt": "0xC887FF",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts : RFOs issued by iA Cores",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_RFO",
+ "PerPkg": "1",
+ "UMask": "0xC807FF01",
+ "UMaskExt": "0xC807FF",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts : LLCPrefRFO issued by iA Cores",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_LLCPREFRFO",
+ "PerPkg": "1",
+ "UMask": "0xCCC7FF01",
+ "UMaskExt": "0xCCC7FF",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts : DRd_Prefs issued by iA Cores",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_DRD_PREF",
+ "PerPkg": "1",
+ "UMask": "0xC897FF01",
+ "UMaskExt": "0xC897FF",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts : CRDs issued by iA Cores",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_CRD",
+ "PerPkg": "1",
+ "UMask": "0xC80FFF01",
+ "UMaskExt": "0xC80FFF",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Occupancy : RFOs issued by iA Cores",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x36",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_RFO",
+ "PerPkg": "1",
+ "UMask": "0xC807FF01",
+ "UMaskExt": "0xC807FF",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Occupancy : DRds issued by iA Cores",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x36",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_DRD",
+ "PerPkg": "1",
+ "UMask": "0xC817FF01",
+ "UMaskExt": "0xC817FF",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Occupancy : CRDs issued by iA Cores",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x36",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_CRD",
+ "PerPkg": "1",
+ "UMask": "0xC80FFF01",
+ "UMaskExt": "0xC80FFF",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Occupancy : DRds issued by iA Cores that Missed the LLC - HOMed locally",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x36",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL",
+ "PerPkg": "1",
+ "UMask": "0xC816FE01",
+ "UMaskExt": "0xC816FE",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Occupancy : DRds issued by iA Cores that Missed the LLC - HOMed remotely",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x36",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE",
+ "PerPkg": "1",
+ "UMask": "0xC8177E01",
+ "UMaskExt": "0xC8177E",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts : DRds issued by iA Cores that Missed the LLC - HOMed locally",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL",
+ "PerPkg": "1",
+ "UMask": "0xC816FE01",
+ "UMaskExt": "0xC816FE",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts : DRds issued by iA Cores that Missed the LLC - HOMed remotely",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE",
+ "PerPkg": "1",
+ "UMask": "0xC8177E01",
+ "UMaskExt": "0xC8177E",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts; DRd Pref misses from local IA",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL",
+ "PerPkg": "1",
+ "UMask": "0xC896FE01",
+ "UMaskExt": "0xC896FE",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts; DRd Pref misses from local IA",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE",
+ "PerPkg": "1",
+ "UMask": "0xC8977E01",
+ "UMaskExt": "0xC8977E",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts : RFOs issued by iA Cores that Missed the LLC - HOMed locally",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_RFO_LOCAL",
+ "PerPkg": "1",
+ "UMask": "0xC806FE01",
+ "UMaskExt": "0xC806FE",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts : RFOs issued by iA Cores that Missed the LLC - HOMed remotely",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_RFO_REMOTE",
+ "PerPkg": "1",
+ "UMask": "0xC8077E01",
+ "UMaskExt": "0xC8077E",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts : RFO_Prefs issued by iA Cores that Missed the LLC - HOMed locally",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_RFO_PREF_LOCAL",
+ "PerPkg": "1",
+ "UMask": "0xC886FE01",
+ "UMaskExt": "0xC886FE",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts : RFO_Prefs issued by iA Cores that Missed the LLC - HOMed remotely",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_RFO_PREF_REMOTE",
+ "PerPkg": "1",
+ "UMask": "0xC8877E01",
+ "UMaskExt": "0xC8877E",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts : CLFlushes issued by iA Cores",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_CLFLUSH",
+ "PerPkg": "1",
+ "UMask": "0xC8C7FF01",
+ "UMaskExt": "0xC8C7FF",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts : SpecItoMs issued by iA Cores",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_SPECITOM",
+ "PerPkg": "1",
+ "UMask": "0xCC57FF01",
+ "UMaskExt": "0xCC57FF",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts : ItoMCacheNears, indicating a partial write request, from IO Devices",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR",
+ "PerPkg": "1",
+ "UMask": "0xCD43FF04",
+ "UMaskExt": "0xCD43FF",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts : ItoMCacheNears, indicating a partial write request, from IO Devices that hit the LLC",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IO_HIT_ITOMCACHENEAR",
+ "PerPkg": "1",
+ "UMask": "0xCD43FD04",
+ "UMaskExt": "0xCD43FD",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts : ItoMCacheNears, indicating a partial write request, from IO Devices that missed the LLC",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IO_MISS_ITOMCACHENEAR",
+ "PerPkg": "1",
+ "UMask": "0xCD43FE04",
+ "UMaskExt": "0xCD43FE",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts : DRds issued by iA Cores targeting PMM Mem that Missed the LLC",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PMM",
+ "PerPkg": "1",
+ "UMask": "0xC8178A01",
+ "UMaskExt": "0xC8178A",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts : DRds issued by iA Cores targeting PMM Mem that Missed the LLC - HOMed locally",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL_PMM",
+ "PerPkg": "1",
+ "UMask": "0xC8168A01",
+ "UMaskExt": "0xC8168A",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts : DRds issued by iA Cores targeting PMM Mem that Missed the LLC - HOMed remotely",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE_PMM",
+ "PerPkg": "1",
+ "UMask": "0xC8170A01",
+ "UMaskExt": "0xC8170A",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts; WCiLF misses from local IA",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_FULL_STREAMING_WR",
+ "PerPkg": "1",
+ "UMask": "0xc867fe01",
+ "UMaskExt": "0xc867fe",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts; WCiL misses from local IA",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_PARTIAL_STREAMING_WR",
+ "PerPkg": "1",
+ "UMask": "0xc86ffe01",
+ "UMaskExt": "0xc86ffe",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Occupancy : DRds issued by iA Cores targeting PMM Mem that Missed the LLC",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x36",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_PMM",
+ "PerPkg": "1",
+ "UMask": "0xC8178A01",
+ "UMaskExt": "0xC8178A",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts : LLCPrefData issued by iA Cores that missed the LLC",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA",
+ "PerPkg": "1",
+ "UMask": "0xCCD7FE01",
+ "UMaskExt": "0xCCD7FE",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts : PCIRdCurs issued by IO Devices that missed the LLC",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IO_MISS_PCIRDCUR",
+ "PerPkg": "1",
+ "UMask": "0xC8F3FE04",
+ "UMaskExt": "0xC8F3FE",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Occupancy : PCIRdCurs issued by IO Devices that missed the LLC",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x36",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_MISS_PCIRDCUR",
+ "PerPkg": "1",
+ "UMask": "0xc8f3fe04",
+ "UMaskExt": "0xc8f3fe",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts : DRds issued by iA Cores targeting DDR Mem that Missed the LLC",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_DDR",
+ "PerPkg": "1",
+ "UMask": "0xC8178601",
+ "UMaskExt": "0xC81786",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts : DRds issued by iA Cores targeting DDR Mem that Missed the LLC - HOMed locally",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL_DDR",
+ "PerPkg": "1",
+ "UMask": "0xC8168601",
+ "UMaskExt": "0xC81686",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts : DRds issued by iA Cores targeting DDR Mem that Missed the LLC - HOMed remotely",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE_DDR",
+ "PerPkg": "1",
+ "UMask": "0xC8170601",
+ "UMaskExt": "0xC81706",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Occupancy : DRds issued by iA Cores targeting DDR Mem that Missed the LLC",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x36",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_DDR",
+ "PerPkg": "1",
+ "UMask": "0xC8178601",
+ "UMaskExt": "0xC81786",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts : PCIRdCurs issued by IO Devices that hit the LLC",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IO_HIT_PCIRDCUR",
+ "PerPkg": "1",
+ "UMask": "0xC8F3FD04",
+ "UMaskExt": "0xC8F3FD",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts : PCIRdCurs issued by IO Devices",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IO_PCIRDCUR",
+ "PerPkg": "1",
+ "UMask": "0xC8F3FF04",
+ "UMaskExt": "0xC8F3FF",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Inserts : LLCPrefData issued by iA Cores",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_LLCPREFDATA",
+ "PerPkg": "1",
+ "UMask": "0xCCD7FF01",
+ "UMaskExt": "0xCCD7FF",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Occupancy : PCIRdCurs issued by IO Devices",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x36",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_PCIRDCUR",
+ "PerPkg": "1",
+ "UMask": "0xC8F3FF04",
+ "UMaskExt": "0xC8F3FF",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "Cache and Snoop Filter Lookups; Data Read Request",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x34",
+ "EventName": "UNC_CHA_LLC_LOOKUP.DATA_READ",
+ "PerPkg": "1",
+ "UMask": "0x1BC1FF",
+ "UMaskExt": "0x1BC1",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "Clockticks of the integrated IO (IIO) traffic controller",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x01",
+ "EventName": "UNC_IIO_CLOCKTICKS",
+ "PerPkg": "1",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Four byte data request of the CPU : Card writing to DRAM",
+ "Counter": "0,1",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x83",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x01",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Four byte data request of the CPU : Card writing to DRAM",
+ "Counter": "0,1",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x83",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x02",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Four byte data request of the CPU : Card writing to DRAM",
+ "Counter": "0,1",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x83",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x04",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Four byte data request of the CPU : Card writing to DRAM",
+ "Counter": "0,1",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x83",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x08",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Four byte data request of the CPU : Card reading from DRAM",
+ "Counter": "0,1",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x83",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x01",
+ "UMask": "0x04",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Four byte data request of the CPU : Card reading from DRAM",
+ "Counter": "0,1",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x83",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x02",
+ "UMask": "0x04",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Four byte data request of the CPU : Card reading from DRAM",
+ "Counter": "0,1",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x83",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x04",
+ "UMask": "0x04",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Four byte data request of the CPU : Card reading from DRAM",
+ "Counter": "0,1",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x83",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x08",
+ "UMask": "0x04",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Data requested of the CPU : CmpD - device sending completion to CPU request",
+ "Counter": "0,1",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x83",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.CMPD.PART0",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x01",
+ "UMask": "0x80",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Data requested of the CPU : CmpD - device sending completion to CPU request",
+ "Counter": "0,1",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x83",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.CMPD.PART1",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x02",
+ "UMask": "0x80",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Data requested of the CPU : CmpD - device sending completion to CPU request",
+ "Counter": "0,1",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x83",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.CMPD.PART2",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x04",
+ "UMask": "0x80",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Data requested of the CPU : CmpD - device sending completion to CPU request",
+ "Counter": "0,1",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x83",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.CMPD.PART3",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x08",
+ "UMask": "0x80",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Data requested by the CPU : Core writing to Card's MMIO space",
+ "Counter": "2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xC0",
+ "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_WRITE.PART0",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x01",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Data requested by the CPU : Core writing to Card's MMIO space",
+ "Counter": "2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xC0",
+ "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_WRITE.PART1",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x02",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Data requested by the CPU : Core writing to Card's MMIO space",
+ "Counter": "2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xC0",
+ "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_WRITE.PART2",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x04",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Data requested by the CPU : Core writing to Card's MMIO space",
+ "Counter": "2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xC0",
+ "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_WRITE.PART3",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x08",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Data requested by the CPU : Core reporting completion of Card read from Core DRAM",
+ "Counter": "2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xc0",
+ "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_READ.PART0",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x01",
+ "UMask": "0x04",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Data requested by the CPU : Core reporting completion of Card read from Core DRAM",
+ "Counter": "2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xc0",
+ "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_READ.PART1",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x02",
+ "UMask": "0x04",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Data requested by the CPU : Core reporting completion of Card read from Core DRAM",
+ "Counter": "2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xc0",
+ "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_READ.PART2",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x04",
+ "UMask": "0x04",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Data requested by the CPU : Core reporting completion of Card read from Core DRAM",
+ "Counter": "2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xc0",
+ "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_READ.PART3",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x08",
+ "UMask": "0x04",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Number Transactions requested of the CPU : Card writing to DRAM",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x84",
+ "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_WRITE.PART0",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x01",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Number Transactions requested of the CPU : Card writing to DRAM",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x84",
+ "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_WRITE.PART1",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x02",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Number Transactions requested of the CPU : Card writing to DRAM",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x84",
+ "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_WRITE.PART2",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x04",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Number Transactions requested of the CPU : Card writing to DRAM",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x84",
+ "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_WRITE.PART3",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x08",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Number Transactions requested of the CPU : Card reading from DRAM",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x84",
+ "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_READ.PART0",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x01",
+ "UMask": "0x04",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Number Transactions requested of the CPU : Card reading from DRAM",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x84",
+ "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_READ.PART1",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x02",
+ "UMask": "0x04",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Number Transactions requested of the CPU : Card reading from DRAM",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x84",
+ "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_READ.PART2",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x04",
+ "UMask": "0x04",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Number Transactions requested of the CPU : Card reading from DRAM",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x84",
+ "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_READ.PART3",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x08",
+ "UMask": "0x04",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Number Transactions requested of the CPU : CmpD - device sending completion to CPU request",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x84",
+ "EventName": "UNC_IIO_TXN_REQ_OF_CPU.CMPD.PART0",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x01",
+ "UMask": "0x80",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Number Transactions requested of the CPU : CmpD - device sending completion to CPU request",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x84",
+ "EventName": "UNC_IIO_TXN_REQ_OF_CPU.CMPD.PART1",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x02",
+ "UMask": "0x80",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Number Transactions requested of the CPU : CmpD - device sending completion to CPU request",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x84",
+ "EventName": "UNC_IIO_TXN_REQ_OF_CPU.CMPD.PART2",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x04",
+ "UMask": "0x80",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Number Transactions requested of the CPU : CmpD - device sending completion to CPU request",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x84",
+ "EventName": "UNC_IIO_TXN_REQ_OF_CPU.CMPD.PART3",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x08",
+ "UMask": "0x80",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Number Transactions requested by the CPU : Core writing to Card's MMIO space",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xc1",
+ "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_WRITE.PART0",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x01",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Number Transactions requested by the CPU : Core writing to Card's MMIO space",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xc1",
+ "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_WRITE.PART1",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x02",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Number Transactions requested by the CPU : Core writing to Card's MMIO space",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xc1",
+ "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_WRITE.PART2",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x04",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Number Transactions requested by the CPU : Core writing to Card's MMIO space",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xc1",
+ "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_WRITE.PART3",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x08",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Number Transactions requested by the CPU : Core reading from Card's MMIO space",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xc1",
+ "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_READ.PART0",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x01",
+ "UMask": "0x04",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Number Transactions requested by the CPU : Core reading from Card's MMIO space",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xc1",
+ "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_READ.PART1",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x02",
+ "UMask": "0x04",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Number Transactions requested by the CPU : Core reading from Card's MMIO space",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xc1",
+ "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_READ.PART2",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x04",
+ "UMask": "0x04",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Number Transactions requested by the CPU : Core reading from Card's MMIO space",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xc1",
+ "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_READ.PART3",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x08",
+ "UMask": "0x04",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Data requested by the CPU : Core writing to Card's MMIO space",
+ "Counter": "2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xC0",
+ "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_WRITE.PART4",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x10",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Data requested by the CPU : Core writing to Card's MMIO space",
+ "Counter": "2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xC0",
+ "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_WRITE.PART5",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x20",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Data requested by the CPU : Core writing to Card's MMIO space",
+ "Counter": "2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xC0",
+ "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_WRITE.PART6",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x40",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Data requested by the CPU : Core writing to Card's MMIO space",
+ "Counter": "2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xC0",
+ "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_WRITE.PART7",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x80",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Data requested by the CPU : Core reporting completion of Card read from Core DRAM",
+ "Counter": "2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xc0",
+ "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_READ.PART4",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x10",
+ "UMask": "0x04",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Data requested by the CPU : Core reporting completion of Card read from Core DRAM",
+ "Counter": "2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xc0",
+ "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_READ.PART5",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x20",
+ "UMask": "0x04",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Data requested by the CPU : Core reporting completion of Card read from Core DRAM",
+ "Counter": "2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xc0",
+ "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_READ.PART6",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x40",
+ "UMask": "0x04",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Data requested by the CPU : Core reporting completion of Card read from Core DRAM",
+ "Counter": "2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xc0",
+ "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_READ.PART7",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x80",
+ "UMask": "0x04",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Four byte data request of the CPU : Card writing to DRAM",
+ "Counter": "0,1",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x83",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART4",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x10",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Four byte data request of the CPU : Card writing to DRAM",
+ "Counter": "0,1",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x83",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART5",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x20",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Four byte data request of the CPU : Card writing to DRAM",
+ "Counter": "0,1",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x83",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART6",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x40",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Four byte data request of the CPU : Card writing to DRAM",
+ "Counter": "0,1",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x83",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART7",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x80",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Four byte data request of the CPU : Card reading from DRAM",
+ "Counter": "0,1",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x83",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART4",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x10",
+ "UMask": "0x04",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Four byte data request of the CPU : Card reading from DRAM",
+ "Counter": "0,1",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x83",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART5",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x20",
+ "UMask": "0x04",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Four byte data request of the CPU : Card reading from DRAM",
+ "Counter": "0,1",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x83",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART6",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x40",
+ "UMask": "0x04",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Four byte data request of the CPU : Card reading from DRAM",
+ "Counter": "0,1",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x83",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART7",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x80",
+ "UMask": "0x04",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Data requested of the CPU : CmpD - device sending completion to CPU request",
+ "Counter": "0,1",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x83",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.CMPD.PART4",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x10",
+ "UMask": "0x80",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Data requested of the CPU : CmpD - device sending completion to CPU request",
+ "Counter": "0,1",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x83",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.CMPD.PART5",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x20",
+ "UMask": "0x80",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Data requested of the CPU : CmpD - device sending completion to CPU request",
+ "Counter": "0,1",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x83",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.CMPD.PART6",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x40",
+ "UMask": "0x80",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Data requested of the CPU : CmpD - device sending completion to CPU request",
+ "Counter": "0,1",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x83",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.CMPD.PART7",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x80",
+ "UMask": "0x80",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Number requests PCIe makes of the main die : All",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x85",
+ "EventName": "UNC_IIO_NUM_REQ_OF_CPU.COMMIT.ALL",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0xFF",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Number Transactions requested by the CPU : Core writing to Card's MMIO space",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xc1",
+ "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_WRITE.PART4",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x10",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Number Transactions requested by the CPU : Core writing to Card's MMIO space",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xc1",
+ "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_WRITE.PART5",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x20",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Number Transactions requested by the CPU : Core writing to Card's MMIO space",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xc1",
+ "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_WRITE.PART6",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x40",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Number Transactions requested by the CPU : Core writing to Card's MMIO space",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xc1",
+ "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_WRITE.PART7",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x80",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Number Transactions requested by the CPU : Core reading from Card's MMIO space",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xc1",
+ "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_READ.PART4",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x10",
+ "UMask": "0x04",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Number Transactions requested by the CPU : Core reading from Card's MMIO space",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xc1",
+ "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_READ.PART5",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x20",
+ "UMask": "0x04",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Number Transactions requested by the CPU : Core reading from Card's MMIO space",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xc1",
+ "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_READ.PART6",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x40",
+ "UMask": "0x04",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Number Transactions requested by the CPU : Core reading from Card's MMIO space",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xc1",
+ "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_READ.PART7",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x80",
+ "UMask": "0x04",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Number Transactions requested of the CPU : Card writing to DRAM",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x84",
+ "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_WRITE.PART4",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x10",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Number Transactions requested of the CPU : Card writing to DRAM",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x84",
+ "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_WRITE.PART5",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x20",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Number Transactions requested of the CPU : Card writing to DRAM",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x84",
+ "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_WRITE.PART6",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x40",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Number Transactions requested of the CPU : Card writing to DRAM",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x84",
+ "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_WRITE.PART7",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x80",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Number Transactions requested of the CPU : Card reading from DRAM",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x84",
+ "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_READ.PART4",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x10",
+ "UMask": "0x04",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Number Transactions requested of the CPU : Card reading from DRAM",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x84",
+ "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_READ.PART5",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x20",
+ "UMask": "0x04",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Number Transactions requested of the CPU : Card reading from DRAM",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x84",
+ "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_READ.PART6",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x40",
+ "UMask": "0x04",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Number Transactions requested of the CPU : Card reading from DRAM",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x84",
+ "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_READ.PART7",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x80",
+ "UMask": "0x04",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Number Transactions requested of the CPU : CmpD - device sending completion to CPU request",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x84",
+ "EventName": "UNC_IIO_TXN_REQ_OF_CPU.CMPD.PART4",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x10",
+ "UMask": "0x80",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Number Transactions requested of the CPU : CmpD - device sending completion to CPU request",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x84",
+ "EventName": "UNC_IIO_TXN_REQ_OF_CPU.CMPD.PART5",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x20",
+ "UMask": "0x80",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Number Transactions requested of the CPU : CmpD - device sending completion to CPU request",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x84",
+ "EventName": "UNC_IIO_TXN_REQ_OF_CPU.CMPD.PART6",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x40",
+ "UMask": "0x80",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Number Transactions requested of the CPU : CmpD - device sending completion to CPU request",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x84",
+ "EventName": "UNC_IIO_TXN_REQ_OF_CPU.CMPD.PART7",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x80",
+ "UMask": "0x80",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Free running counter that increments for IIO clocktick",
+ "CounterType": "FREERUN",
+ "EventName": "UNC_IIO_CLOCKTICKS_FREERUN",
+ "PerPkg": "1",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "PCIe Completion Buffer Inserts of completions with data: Part 0",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xc2",
+ "EventName": "UNC_IIO_COMP_BUF_INSERTS.CMPD.PART0",
+ "FCMask": "0x04",
+ "PerPkg": "1",
+ "PortMask": "0x01",
+ "UMask": "0x03",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "PCIe Completion Buffer Inserts of completions with data: Part 1",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xc2",
+ "EventName": "UNC_IIO_COMP_BUF_INSERTS.CMPD.PART1",
+ "FCMask": "0x04",
+ "PerPkg": "1",
+ "PortMask": "0x02",
+ "UMask": "0x03",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "PCIe Completion Buffer Inserts of completions with data: Part 2",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xc2",
+ "EventName": "UNC_IIO_COMP_BUF_INSERTS.CMPD.PART2",
+ "FCMask": "0x04",
+ "PerPkg": "1",
+ "PortMask": "0x04",
+ "UMask": "0x03",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "PCIe Completion Buffer Inserts of completions with data: Part 3",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xc2",
+ "EventName": "UNC_IIO_COMP_BUF_INSERTS.CMPD.PART3",
+ "FCMask": "0x04",
+ "PerPkg": "1",
+ "PortMask": "0x08",
+ "UMask": "0x03",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "PCIe Completion Buffer Inserts of completions with data: Part 4",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xc2",
+ "EventName": "UNC_IIO_COMP_BUF_INSERTS.CMPD.PART4",
+ "FCMask": "0x04",
+ "PerPkg": "1",
+ "PortMask": "0x10",
+ "UMask": "0x03",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "PCIe Completion Buffer Inserts of completions with data: Part 5",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xc2",
+ "EventName": "UNC_IIO_COMP_BUF_INSERTS.CMPD.PART5",
+ "FCMask": "0x04",
+ "PerPkg": "1",
+ "PortMask": "0x20",
+ "UMask": "0x03",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "PCIe Completion Buffer Inserts of completions with data: Part 6",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xc2",
+ "EventName": "UNC_IIO_COMP_BUF_INSERTS.CMPD.PART6",
+ "FCMask": "0x04",
+ "PerPkg": "1",
+ "PortMask": "0x40",
+ "UMask": "0x03",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "PCIe Completion Buffer Inserts of completions with data: Part 7",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xc2",
+ "EventName": "UNC_IIO_COMP_BUF_INSERTS.CMPD.PART7",
+ "FCMask": "0x04",
+ "PerPkg": "1",
+ "PortMask": "0x80",
+ "UMask": "0x03",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "PCIe Completion Buffer Occupancy of completions with data : Part 0",
+ "Counter": "2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xd5",
+ "EventName": "UNC_IIO_COMP_BUF_OCCUPANCY.CMPD.PART0",
+ "FCMask": "0x04",
+ "PerPkg": "1",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "PCIe Completion Buffer Occupancy of completions with data : Part 7",
+ "Counter": "2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xd5",
+ "EventName": "UNC_IIO_COMP_BUF_OCCUPANCY.CMPD.PART7",
+ "FCMask": "0x04",
+ "PerPkg": "1",
+ "UMask": "0x80",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "PCIe Completion Buffer Occupancy of completions with data : Part 6",
+ "Counter": "2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xd5",
+ "EventName": "UNC_IIO_COMP_BUF_OCCUPANCY.CMPD.PART6",
+ "FCMask": "0x04",
+ "PerPkg": "1",
+ "UMask": "0x40",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "PCIe Completion Buffer Occupancy of completions with data : Part 5",
+ "Counter": "2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xd5",
+ "EventName": "UNC_IIO_COMP_BUF_OCCUPANCY.CMPD.PART5",
+ "FCMask": "0x04",
+ "PerPkg": "1",
+ "UMask": "0x20",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "PCIe Completion Buffer Occupancy of completions with data : Part 4",
+ "Counter": "2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xd5",
+ "EventName": "UNC_IIO_COMP_BUF_OCCUPANCY.CMPD.PART4",
+ "FCMask": "0x04",
+ "PerPkg": "1",
+ "UMask": "0x10",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "PCIe Completion Buffer Occupancy of completions with data : Part 3",
+ "Counter": "2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xd5",
+ "EventName": "UNC_IIO_COMP_BUF_OCCUPANCY.CMPD.PART3",
+ "FCMask": "0x04",
+ "PerPkg": "1",
+ "UMask": "0x08",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "PCIe Completion Buffer Occupancy of completions with data : Part 2",
+ "Counter": "2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xd5",
+ "EventName": "UNC_IIO_COMP_BUF_OCCUPANCY.CMPD.PART2",
+ "FCMask": "0x04",
+ "PerPkg": "1",
+ "UMask": "0x04",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "PCIe Completion Buffer Occupancy of completions with data : Part 1",
+ "Counter": "2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xd5",
+ "EventName": "UNC_IIO_COMP_BUF_OCCUPANCY.CMPD.PART1",
+ "FCMask": "0x04",
+ "PerPkg": "1",
+ "UMask": "0x02",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "PCIe Completion Buffer Inserts of completions with data: Part 0-7",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xc2",
+ "EventName": "UNC_IIO_COMP_BUF_INSERTS.CMPD.ALL_PARTS",
+ "FCMask": "0x04",
+ "PerPkg": "1",
+ "PortMask": "0xff",
+ "UMask": "0x03",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "PCIe Completion Buffer Occupancy of completions with data : Part 0-7",
+ "Counter": "2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xd5",
+ "EventName": "UNC_IIO_COMP_BUF_OCCUPANCY.CMPD.ALL_PARTS",
+ "FCMask": "0x04",
+ "PerPkg": "1",
+ "UMask": "0xff",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Misc Events - Set 1 : Lost Forward",
+ "Counter": "0,1",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x1F",
+ "EventName": "UNC_I_MISC1.LOST_FWD",
+ "PerPkg": "1",
+ "UMask": "0x10",
+ "Unit": "IRP"
+ },
+ {
+ "BriefDescription": "PCIITOM request issued by the IRP unit to the mesh with the intention of writing a full cacheline",
+ "Counter": "0,1",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x10",
+ "EventName": "UNC_I_COHERENT_OPS.PCITOM",
+ "PerPkg": "1",
+ "UMask": "0x10",
+ "Unit": "IRP"
+ },
+ {
+ "BriefDescription": "Coherent Ops : WbMtoI",
+ "Counter": "0,1",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x10",
+ "EventName": "UNC_I_COHERENT_OPS.WBMTOI",
+ "PerPkg": "1",
+ "UMask": "0x40",
+ "Unit": "IRP"
+ },
+ {
+ "BriefDescription": "Total IRP occupancy of inbound read and write requests to coherent memory",
+ "Counter": "0,1",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x0f",
+ "EventName": "UNC_I_CACHE_TOTAL_OCCUPANCY.MEM",
+ "PerPkg": "1",
+ "UMask": "0x04",
+ "Unit": "IRP"
+ },
+ {
+ "BriefDescription": ": All Inserts Inbound (p2p + faf + cset)",
+ "Counter": "0,1",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x20",
+ "EventName": "UNC_I_IRP_ALL.INBOUND_INSERTS",
+ "PerPkg": "1",
+ "UMask": "0x01",
+ "Unit": "IRP"
+ },
+ {
+ "BriefDescription": "Inbound write (fast path) requests received by the IRP",
+ "Counter": "0,1",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x11",
+ "EventName": "UNC_I_TRANSACTIONS.WR_PREF",
+ "PerPkg": "1",
+ "UMask": "0x08",
+ "Unit": "IRP"
+ },
+ {
+ "BriefDescription": "Clockticks of the IO coherency tracker (IRP)",
+ "Counter": "0,1",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x01",
+ "EventName": "UNC_I_CLOCKTICKS",
+ "PerPkg": "1",
+ "Unit": "IRP"
+ },
+ {
+ "BriefDescription": "FAF RF full",
+ "Counter": "0,1",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x17",
+ "EventName": "UNC_I_FAF_FULL",
+ "PerPkg": "1",
+ "Unit": "IRP"
+ },
+ {
+ "BriefDescription": "Inbound read requests received by the IRP and inserted into the FAF queue",
+ "Counter": "0,1",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x18",
+ "EventName": "UNC_I_FAF_INSERTS",
+ "PerPkg": "1",
+ "Unit": "IRP"
+ },
+ {
+ "BriefDescription": "Occupancy of the IRP FAF queue",
+ "Counter": "0,1",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x19",
+ "EventName": "UNC_I_FAF_OCCUPANCY",
+ "PerPkg": "1",
+ "Unit": "IRP"
+ },
+ {
+ "BriefDescription": "FAF allocation -- sent to ADQ",
+ "Counter": "0,1",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x16",
+ "EventName": "UNC_I_FAF_TRANSACTIONS",
+ "PerPkg": "1",
+ "Unit": "IRP"
+ },
+ {
+ "BriefDescription": "Responses to snoops of any type that hit M line in the IIO cache",
+ "Counter": "0,1",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x12",
+ "EventName": "UNC_I_SNOOP_RESP.ALL_HIT_M",
+ "PerPkg": "1",
+ "UMask": "0x78",
+ "Unit": "IRP"
+ },
+ {
+ "BriefDescription": "Multi-socket cacheline Directory Lookups : Found in any state",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x2D",
+ "EventName": "UNC_M2M_DIRECTORY_LOOKUP.ANY",
+ "PerPkg": "1",
+ "UMask": "0x01",
+ "Unit": "M2M"
+ },
+ {
+ "BriefDescription": "Multi-socket cacheline Directory Lookups : Found in A state",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x2D",
+ "EventName": "UNC_M2M_DIRECTORY_LOOKUP.STATE_A",
+ "PerPkg": "1",
+ "UMask": "0x08",
+ "Unit": "M2M"
+ },
+ {
+ "BriefDescription": "Multi-socket cacheline Directory Lookups : Found in I state",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x2D",
+ "EventName": "UNC_M2M_DIRECTORY_LOOKUP.STATE_I",
+ "PerPkg": "1",
+ "UMask": "0x02",
+ "Unit": "M2M"
+ },
+ {
+ "BriefDescription": "Multi-socket cacheline Directory Lookups : Found in S state",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x2D",
+ "EventName": "UNC_M2M_DIRECTORY_LOOKUP.STATE_S",
+ "PerPkg": "1",
+ "UMask": "0x04",
+ "Unit": "M2M"
+ },
+ {
+ "BriefDescription": "Tag Hit : Clean NearMem Read Hit",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x2C",
+ "EventName": "UNC_M2M_TAG_HIT.NM_RD_HIT_CLEAN",
+ "PerPkg": "1",
+ "UMask": "0x01",
+ "Unit": "M2M"
+ },
+ {
+ "BriefDescription": "Tag Hit : Dirty NearMem Read Hit",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x2C",
+ "EventName": "UNC_M2M_TAG_HIT.NM_RD_HIT_DIRTY",
+ "PerPkg": "1",
+ "UMask": "0x02",
+ "Unit": "M2M"
+ },
+ {
+ "BriefDescription": "Clockticks of the mesh to memory (M2M)",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventName": "UNC_M2M_CLOCKTICKS",
+ "PerPkg": "1",
+ "Unit": "M2M"
+ },
+ {
+ "BriefDescription": "CMS Clockticks",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xc0",
+ "EventName": "UNC_M2M_CMS_CLOCKTICKS",
+ "PerPkg": "1",
+ "Unit": "M2M"
+ },
+ {
+ "BriefDescription": "M2M Reads Issued to iMC : PMM - All Channels",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x37",
+ "EventName": "UNC_M2M_IMC_READS.TO_PMM",
+ "PerPkg": "1",
+ "UMask": "0x0720",
+ "UMaskExt": "0x07",
+ "Unit": "M2M"
+ },
+ {
+ "BriefDescription": "M2M Writes Issued to iMC : PMM - All Channels",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x38",
+ "EventName": "UNC_M2M_IMC_WRITES.TO_PMM",
+ "PerPkg": "1",
+ "UMask": "0x1C80",
+ "UMaskExt": "0x1C",
+ "Unit": "M2M"
+ },
+ {
+ "BriefDescription": "Clockticks of the mesh to PCI (M2P)",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x01",
+ "EventName": "UNC_M2P_CLOCKTICKS",
+ "PerPkg": "1",
+ "Unit": "M2PCIe"
+ },
+ {
+ "BriefDescription": "CMS Clockticks",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0xc0",
+ "EventName": "UNC_M2P_CMS_CLOCKTICKS",
+ "PerPkg": "1",
+ "Unit": "M2PCIe"
+ },
+ {
+ "BriefDescription": "Clockticks of the mesh to UPI (M3UPI)",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x01",
+ "EventName": "UNC_M3UPI_CLOCKTICKS",
+ "PerPkg": "1",
+ "Unit": "M3UPI"
+ },
+ {
+ "BriefDescription": "Clockticks in the UBOX using a dedicated 48-bit Fixed Counter",
+ "Counter": "FIXED",
+ "CounterType": "FIXED",
+ "EventCode": "0xff",
+ "EventName": "UNC_U_CLOCKTICKS",
+ "PerPkg": "1",
+ "Unit": "UBOX"
+ },
+ {
+ "BriefDescription": "Valid Flits Received : All Data",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x03",
+ "EventName": "UNC_UPI_RxL_FLITS.ALL_DATA",
+ "PerPkg": "1",
+ "UMask": "0x0F",
+ "Unit": "UPI LL"
+ },
+ {
+ "BriefDescription": "Valid Flits Received : All Non Data",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x03",
+ "EventName": "UNC_UPI_RxL_FLITS.NON_DATA",
+ "PerPkg": "1",
+ "UMask": "0x97",
+ "Unit": "UPI LL"
+ },
+ {
+ "BriefDescription": "Valid Flits Sent : All Data",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x02",
+ "EventName": "UNC_UPI_TxL_FLITS.ALL_DATA",
+ "PerPkg": "1",
+ "UMask": "0x0F",
+ "Unit": "UPI LL"
+ },
+ {
+ "BriefDescription": "Valid Flits Sent : All Non Data",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x02",
+ "EventName": "UNC_UPI_TxL_FLITS.NON_DATA",
+ "PerPkg": "1",
+ "UMask": "0x97",
+ "Unit": "UPI LL"
+ },
+ {
+ "BriefDescription": "Number of kfclks",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x01",
+ "EventName": "UNC_UPI_CLOCKTICKS",
+ "PerPkg": "1",
+ "Unit": "UPI LL"
+ },
+ {
+ "BriefDescription": "Cycles in L1",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x21",
+ "EventName": "UNC_UPI_L1_POWER_CYCLES",
+ "PerPkg": "1",
+ "Unit": "UPI LL"
+ },
+ {
+ "BriefDescription": "Cycles in L0p",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x27",
+ "EventName": "UNC_UPI_TxL0P_POWER_CYCLES",
+ "PerPkg": "1",
+ "Unit": "UPI LL"
+ },
+ {
+ "BriefDescription": "Valid Flits Sent : Null FLITs transmitted to any slot",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x02",
+ "EventName": "UNC_UPI_TxL_FLITS.ALL_NULL",
+ "PerPkg": "1",
+ "UMask": "0x27",
+ "Unit": "UPI LL"
+ },
+ {
+ "BriefDescription": "Valid Flits Received : Null FLITs received from any slot",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventCode": "0x03",
+ "EventName": "UNC_UPI_RxL_FLITS.ALL_NULL",
+ "PerPkg": "1",
+ "UMask": "0x27",
+ "Unit": "UPI LL"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/icelakex/uncore-power.json b/tools/perf/pmu-events/arch/x86/icelakex/uncore-power.json
new file mode 100644
index 000000000000..2d1368958762
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/icelakex/uncore-power.json
@@ -0,0 +1,10 @@
+[
+ {
+ "BriefDescription": "Clockticks of the power control unit (PCU)",
+ "Counter": "0,1,2,3",
+ "CounterType": "PGMABLE",
+ "EventName": "UNC_P_CLOCKTICKS",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/icelakex/virtual-memory.json b/tools/perf/pmu-events/arch/x86/icelakex/virtual-memory.json
new file mode 100644
index 000000000000..1b9d03039c53
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/icelakex/virtual-memory.json
@@ -0,0 +1,245 @@
+[
+ {
+ "BriefDescription": "Page walks completed due to a demand data load to a 4K page.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x08",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Page walks completed due to a demand data load to a 2M/4M page.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x08",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (All page sizes)",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x08",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data loads. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0xe"
+ },
+ {
+ "BriefDescription": "Number of page walks outstanding for a demand load in the PMH each cycle.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x08",
+ "EventName": "DTLB_LOAD_MISSES.WALK_PENDING",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of page walks outstanding for a demand load in the PMH (Page Miss Handler) each cycle.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x10"
+ },
+ {
+ "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a demand load.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "CounterMask": "1",
+ "EventCode": "0x08",
+ "EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a demand load.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x10"
+ },
+ {
+ "BriefDescription": "Loads that miss the DTLB and hit the STLB.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x08",
+ "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB).",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x20"
+ },
+ {
+ "BriefDescription": "Page walks completed due to a demand data store to a 4K page.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x49",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Page walks completed due to a demand data store to a 2M/4M page.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x49",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Store misses in all TLB levels causes a page walk that completes. (All page sizes)",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x49",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data stores. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0xe"
+ },
+ {
+ "BriefDescription": "Number of page walks outstanding for a store in the PMH each cycle.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x49",
+ "EventName": "DTLB_STORE_MISSES.WALK_PENDING",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of page walks outstanding for a store in the PMH (Page Miss Handler) each cycle.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x10"
+ },
+ {
+ "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a store.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "CounterMask": "1",
+ "EventCode": "0x49",
+ "EventName": "DTLB_STORE_MISSES.WALK_ACTIVE",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x10"
+ },
+ {
+ "BriefDescription": "Stores that miss the DTLB and hit the STLB.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x49",
+ "EventName": "DTLB_STORE_MISSES.STLB_HIT",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB).",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x20"
+ },
+ {
+ "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x85",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts completed page walks (4K page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x85",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts completed page walks (2M/4M page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (All page sizes)",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x85",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts completed page walks (all page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0xe"
+ },
+ {
+ "BriefDescription": "Number of page walks outstanding for an outstanding code request in the PMH each cycle.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x85",
+ "EventName": "ITLB_MISSES.WALK_PENDING",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of page walks outstanding for an outstanding code (instruction fetch) request in the PMH (Page Miss Handler) each cycle.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x10"
+ },
+ {
+ "BriefDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "CounterMask": "1",
+ "EventCode": "0x85",
+ "EventName": "ITLB_MISSES.WALK_ACTIVE",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a code (instruction fetch) request.",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x10"
+ },
+ {
+ "BriefDescription": "Instruction fetch requests that miss the ITLB and hit the STLB.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x85",
+ "EventName": "ITLB_MISSES.STLB_HIT",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts instruction fetch requests that miss the ITLB (Instruction TLB) and hit the STLB (Second-level TLB).",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x20"
+ },
+ {
+ "BriefDescription": "DTLB flush attempts of the thread-specific entries",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xBD",
+ "EventName": "TLB_FLUSH.DTLB_THREAD",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of DTLB flush attempts of the thread-specific entries.",
+ "SampleAfterValue": "100007",
+ "Speculative": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "STLB flush attempts",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xBD",
+ "EventName": "TLB_FLUSH.STLB_ANY",
+ "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of any STLB flush attempts (such as entire, VPID, PCID, InvPage, CR3 write, etc.).",
+ "SampleAfterValue": "100007",
+ "Speculative": "1",
+ "UMask": "0x20"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv
index 0a6a8c7f937f..5f5df6560202 100644
--- a/tools/perf/pmu-events/arch/x86/mapfile.csv
+++ b/tools/perf/pmu-events/arch/x86/mapfile.csv
@@ -38,6 +38,8 @@ GenuineIntel-6-7D,v1,icelake,core
GenuineIntel-6-7E,v1,icelake,core
GenuineIntel-6-8[CD],v1,icelake,core
GenuineIntel-6-A7,v1,icelake,core
+GenuineIntel-6-6A,v1,icelakex,core
+GenuineIntel-6-6C,v1,icelakex,core
GenuineIntel-6-86,v1,tremontx,core
AuthenticAMD-23-([12][0-9A-F]|[0-9A-F]),v2,amdzen1,core
AuthenticAMD-23-[[:xdigit:]]+,v1,amdzen2,core
diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
index 7422b0ea8790..9604446f8360 100644
--- a/tools/perf/pmu-events/jevents.c
+++ b/tools/perf/pmu-events/jevents.c
@@ -960,7 +960,7 @@ static int get_maxfds(void)
struct rlimit rlim;
if (getrlimit(RLIMIT_NOFILE, &rlim) == 0)
- return min((int)rlim.rlim_max / 2, 512);
+ return min(rlim.rlim_max / 2, (rlim_t)512);
return 512;
}
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/Context.c b/tools/perf/scripts/python/Perf-Trace-Util/Context.c
index 0b7096847991..895f5fc23965 100644
--- a/tools/perf/scripts/python/Perf-Trace-Util/Context.c
+++ b/tools/perf/scripts/python/Perf-Trace-Util/Context.c
@@ -5,68 +5,178 @@
* Copyright (C) 2010 Tom Zanussi <tzanussi@gmail.com>
*/
+/*
+ * Use Py_ssize_t for '#' formats to avoid DeprecationWarning: PY_SSIZE_T_CLEAN
+ * will be required for '#' formats.
+ */
+#define PY_SSIZE_T_CLEAN
+
#include <Python.h>
#include "../../../util/trace-event.h"
+#include "../../../util/event.h"
+#include "../../../util/symbol.h"
+#include "../../../util/thread.h"
+#include "../../../util/map.h"
+#include "../../../util/maps.h"
+#include "../../../util/auxtrace.h"
+#include "../../../util/session.h"
+#include "../../../util/srcline.h"
+#include "../../../util/srccode.h"
#if PY_MAJOR_VERSION < 3
#define _PyCapsule_GetPointer(arg1, arg2) \
PyCObject_AsVoidPtr(arg1)
+#define _PyBytes_FromStringAndSize(arg1, arg2) \
+ PyString_FromStringAndSize((arg1), (arg2))
+#define _PyUnicode_AsUTF8(arg) \
+ PyString_AsString(arg)
PyMODINIT_FUNC initperf_trace_context(void);
#else
#define _PyCapsule_GetPointer(arg1, arg2) \
PyCapsule_GetPointer((arg1), (arg2))
+#define _PyBytes_FromStringAndSize(arg1, arg2) \
+ PyBytes_FromStringAndSize((arg1), (arg2))
+#define _PyUnicode_AsUTF8(arg) \
+ PyUnicode_AsUTF8(arg)
PyMODINIT_FUNC PyInit_perf_trace_context(void);
#endif
-static PyObject *perf_trace_context_common_pc(PyObject *obj, PyObject *args)
+static struct scripting_context *get_args(PyObject *args, const char *name, PyObject **arg2)
{
- static struct scripting_context *scripting_context;
+ int cnt = 1 + !!arg2;
PyObject *context;
- int retval;
- if (!PyArg_ParseTuple(args, "O", &context))
+ if (!PyArg_UnpackTuple(args, name, 1, cnt, &context, arg2))
return NULL;
- scripting_context = _PyCapsule_GetPointer(context, NULL);
- retval = common_pc(scripting_context);
+ return _PyCapsule_GetPointer(context, NULL);
+}
- return Py_BuildValue("i", retval);
+static struct scripting_context *get_scripting_context(PyObject *args)
+{
+ return get_args(args, "context", NULL);
+}
+
+static PyObject *perf_trace_context_common_pc(PyObject *obj, PyObject *args)
+{
+ struct scripting_context *c = get_scripting_context(args);
+
+ if (!c)
+ return NULL;
+
+ return Py_BuildValue("i", common_pc(c));
}
static PyObject *perf_trace_context_common_flags(PyObject *obj,
PyObject *args)
{
- static struct scripting_context *scripting_context;
- PyObject *context;
- int retval;
+ struct scripting_context *c = get_scripting_context(args);
- if (!PyArg_ParseTuple(args, "O", &context))
+ if (!c)
return NULL;
- scripting_context = _PyCapsule_GetPointer(context, NULL);
- retval = common_flags(scripting_context);
-
- return Py_BuildValue("i", retval);
+ return Py_BuildValue("i", common_flags(c));
}
static PyObject *perf_trace_context_common_lock_depth(PyObject *obj,
PyObject *args)
{
- static struct scripting_context *scripting_context;
- PyObject *context;
- int retval;
+ struct scripting_context *c = get_scripting_context(args);
- if (!PyArg_ParseTuple(args, "O", &context))
+ if (!c)
return NULL;
- scripting_context = _PyCapsule_GetPointer(context, NULL);
- retval = common_lock_depth(scripting_context);
+ return Py_BuildValue("i", common_lock_depth(c));
+}
+static PyObject *perf_sample_insn(PyObject *obj, PyObject *args)
+{
+ struct scripting_context *c = get_scripting_context(args);
+
+ if (!c)
+ return NULL;
+
+ if (c->sample->ip && !c->sample->insn_len &&
+ c->al->thread->maps && c->al->thread->maps->machine)
+ script_fetch_insn(c->sample, c->al->thread, c->al->thread->maps->machine);
+
+ if (!c->sample->insn_len)
+ Py_RETURN_NONE; /* N.B. This is a return statement */
+
+ return _PyBytes_FromStringAndSize(c->sample->insn, c->sample->insn_len);
+}
+
+static PyObject *perf_set_itrace_options(PyObject *obj, PyObject *args)
+{
+ struct scripting_context *c;
+ const char *itrace_options;
+ int retval = -1;
+ PyObject *str;
+
+ c = get_args(args, "itrace_options", &str);
+ if (!c)
+ return NULL;
+
+ if (!c->session || !c->session->itrace_synth_opts)
+ goto out;
+
+ if (c->session->itrace_synth_opts->set) {
+ retval = 1;
+ goto out;
+ }
+
+ itrace_options = _PyUnicode_AsUTF8(str);
+
+ retval = itrace_do_parse_synth_opts(c->session->itrace_synth_opts, itrace_options, 0);
+out:
return Py_BuildValue("i", retval);
}
+static PyObject *perf_sample_src(PyObject *obj, PyObject *args, bool get_srccode)
+{
+ struct scripting_context *c = get_scripting_context(args);
+ unsigned int line = 0;
+ char *srcfile = NULL;
+ char *srccode = NULL;
+ PyObject *result;
+ struct map *map;
+ int len = 0;
+ u64 addr;
+
+ if (!c)
+ return NULL;
+
+ map = c->al->map;
+ addr = c->al->addr;
+
+ if (map && map->dso)
+ srcfile = get_srcline_split(map->dso, map__rip_2objdump(map, addr), &line);
+
+ if (get_srccode) {
+ if (srcfile)
+ srccode = find_sourceline(srcfile, line, &len);
+ result = Py_BuildValue("(sIs#)", srcfile, line, srccode, (Py_ssize_t)len);
+ } else {
+ result = Py_BuildValue("(sI)", srcfile, line);
+ }
+
+ free(srcfile);
+
+ return result;
+}
+
+static PyObject *perf_sample_srcline(PyObject *obj, PyObject *args)
+{
+ return perf_sample_src(obj, args, false);
+}
+
+static PyObject *perf_sample_srccode(PyObject *obj, PyObject *args)
+{
+ return perf_sample_src(obj, args, true);
+}
+
static PyMethodDef ContextMethods[] = {
{ "common_pc", perf_trace_context_common_pc, METH_VARARGS,
"Get the common preempt count event field value."},
@@ -74,6 +184,14 @@ static PyMethodDef ContextMethods[] = {
"Get the common flags event field value."},
{ "common_lock_depth", perf_trace_context_common_lock_depth,
METH_VARARGS, "Get the common lock depth event field value."},
+ { "perf_sample_insn", perf_sample_insn,
+ METH_VARARGS, "Get the machine code instruction."},
+ { "perf_set_itrace_options", perf_set_itrace_options,
+ METH_VARARGS, "Set --itrace options."},
+ { "perf_sample_srcline", perf_sample_srcline,
+ METH_VARARGS, "Get source file name and line number."},
+ { "perf_sample_srccode", perf_sample_srccode,
+ METH_VARARGS, "Get source file name, line number and line."},
{ NULL, NULL, 0, NULL}
};
@@ -96,6 +214,12 @@ PyMODINIT_FUNC PyInit_perf_trace_context(void)
NULL, /* m_clear */
NULL, /* m_free */
};
- return PyModule_Create(&moduledef);
+ PyObject *mod;
+
+ mod = PyModule_Create(&moduledef);
+ /* Add perf_script_context to the module so it can be imported */
+ PyObject_SetAttrString(mod, "perf_script_context", Py_None);
+
+ return mod;
}
#endif
diff --git a/tools/perf/scripts/python/bin/intel-pt-events-record b/tools/perf/scripts/python/bin/intel-pt-events-record
index 10fe2b6977d4..6b9877cfe23e 100644
--- a/tools/perf/scripts/python/bin/intel-pt-events-record
+++ b/tools/perf/scripts/python/bin/intel-pt-events-record
@@ -1,8 +1,8 @@
#!/bin/bash
#
-# print Intel PT Power Events and PTWRITE. The intel_pt PMU event needs
-# to be specified with appropriate config terms.
+# print Intel PT Events including Power Events and PTWRITE. The intel_pt PMU
+# event needs to be specified with appropriate config terms.
#
if ! echo "$@" | grep -q intel_pt ; then
echo "Options must include the Intel PT event e.g. -e intel_pt/pwr_evt,ptw/"
diff --git a/tools/perf/scripts/python/bin/intel-pt-events-report b/tools/perf/scripts/python/bin/intel-pt-events-report
index 9a9c92fcd026..beeac3fde9db 100644
--- a/tools/perf/scripts/python/bin/intel-pt-events-report
+++ b/tools/perf/scripts/python/bin/intel-pt-events-report
@@ -1,3 +1,3 @@
#!/bin/bash
-# description: print Intel PT Power Events and PTWRITE
-perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/intel-pt-events.py \ No newline at end of file
+# description: print Intel PT Events including Power Events and PTWRITE
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/intel-pt-events.py
diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py
index 7daa8bb70a5a..13f2d8a81610 100755
--- a/tools/perf/scripts/python/exported-sql-viewer.py
+++ b/tools/perf/scripts/python/exported-sql-viewer.py
@@ -91,6 +91,11 @@
from __future__ import print_function
import sys
+# Only change warnings if the python -W option was not used
+if not sys.warnoptions:
+ import warnings
+ # PySide2 causes deprecation warnings, ignore them.
+ warnings.filterwarnings("ignore", category=DeprecationWarning)
import argparse
import weakref
import threading
@@ -108,6 +113,7 @@ import os
import random
import copy
import math
+from libxed import LibXED
pyside_version_1 = True
if not "--pyside-version-1" in sys.argv:
@@ -125,8 +131,9 @@ if pyside_version_1:
from PySide.QtGui import *
from PySide.QtSql import *
-from decimal import *
-from ctypes import *
+from decimal import Decimal, ROUND_HALF_UP
+from ctypes import CDLL, Structure, create_string_buffer, addressof, sizeof, \
+ c_void_p, c_bool, c_byte, c_char, c_int, c_uint, c_longlong, c_ulonglong
from multiprocessing import Process, Array, Value, Event
# xrange is range in Python3
@@ -3868,7 +3875,7 @@ def CopyTableCellsToClipboard(view, as_csv=False, with_hdr=False):
if with_hdr:
model = indexes[0].model()
for col in range(min_col, max_col + 1):
- val = model.headerData(col, Qt.Horizontal)
+ val = model.headerData(col, Qt.Horizontal, Qt.DisplayRole)
if as_csv:
text += sep + ToCSValue(val)
sep = ","
@@ -4741,94 +4748,6 @@ class MainWindow(QMainWindow):
dialog = AboutDialog(self.glb, self)
dialog.exec_()
-# XED Disassembler
-
-class xed_state_t(Structure):
-
- _fields_ = [
- ("mode", c_int),
- ("width", c_int)
- ]
-
-class XEDInstruction():
-
- def __init__(self, libxed):
- # Current xed_decoded_inst_t structure is 192 bytes. Use 512 to allow for future expansion
- xedd_t = c_byte * 512
- self.xedd = xedd_t()
- self.xedp = addressof(self.xedd)
- libxed.xed_decoded_inst_zero(self.xedp)
- self.state = xed_state_t()
- self.statep = addressof(self.state)
- # Buffer for disassembled instruction text
- self.buffer = create_string_buffer(256)
- self.bufferp = addressof(self.buffer)
-
-class LibXED():
-
- def __init__(self):
- try:
- self.libxed = CDLL("libxed.so")
- except:
- self.libxed = None
- if not self.libxed:
- self.libxed = CDLL("/usr/local/lib/libxed.so")
-
- self.xed_tables_init = self.libxed.xed_tables_init
- self.xed_tables_init.restype = None
- self.xed_tables_init.argtypes = []
-
- self.xed_decoded_inst_zero = self.libxed.xed_decoded_inst_zero
- self.xed_decoded_inst_zero.restype = None
- self.xed_decoded_inst_zero.argtypes = [ c_void_p ]
-
- self.xed_operand_values_set_mode = self.libxed.xed_operand_values_set_mode
- self.xed_operand_values_set_mode.restype = None
- self.xed_operand_values_set_mode.argtypes = [ c_void_p, c_void_p ]
-
- self.xed_decoded_inst_zero_keep_mode = self.libxed.xed_decoded_inst_zero_keep_mode
- self.xed_decoded_inst_zero_keep_mode.restype = None
- self.xed_decoded_inst_zero_keep_mode.argtypes = [ c_void_p ]
-
- self.xed_decode = self.libxed.xed_decode
- self.xed_decode.restype = c_int
- self.xed_decode.argtypes = [ c_void_p, c_void_p, c_uint ]
-
- self.xed_format_context = self.libxed.xed_format_context
- self.xed_format_context.restype = c_uint
- self.xed_format_context.argtypes = [ c_int, c_void_p, c_void_p, c_int, c_ulonglong, c_void_p, c_void_p ]
-
- self.xed_tables_init()
-
- def Instruction(self):
- return XEDInstruction(self)
-
- def SetMode(self, inst, mode):
- if mode:
- inst.state.mode = 4 # 32-bit
- inst.state.width = 4 # 4 bytes
- else:
- inst.state.mode = 1 # 64-bit
- inst.state.width = 8 # 8 bytes
- self.xed_operand_values_set_mode(inst.xedp, inst.statep)
-
- def DisassembleOne(self, inst, bytes_ptr, bytes_cnt, ip):
- self.xed_decoded_inst_zero_keep_mode(inst.xedp)
- err = self.xed_decode(inst.xedp, bytes_ptr, bytes_cnt)
- if err:
- return 0, ""
- # Use AT&T mode (2), alternative is Intel (3)
- ok = self.xed_format_context(2, inst.xedp, inst.bufferp, sizeof(inst.buffer), ip, 0, 0)
- if not ok:
- return 0, ""
- if sys.version_info[0] == 2:
- result = inst.buffer.value
- else:
- result = inst.buffer.value.decode()
- # Return instruction length and the disassembled instruction text
- # For now, assume the length is in byte 166
- return inst.xedd[166], result
-
def TryOpen(file_name):
try:
return open(file_name, "rb")
diff --git a/tools/perf/scripts/python/intel-pt-events.py b/tools/perf/scripts/python/intel-pt-events.py
index a73847c8f548..1d3a189a9a54 100644
--- a/tools/perf/scripts/python/intel-pt-events.py
+++ b/tools/perf/scripts/python/intel-pt-events.py
@@ -1,5 +1,6 @@
-# intel-pt-events.py: Print Intel PT Power Events and PTWRITE
-# Copyright (c) 2017, Intel Corporation.
+# SPDX-License-Identifier: GPL-2.0
+# intel-pt-events.py: Print Intel PT Events including Power Events and PTWRITE
+# Copyright (c) 2017-2021, Intel Corporation.
#
# This program is free software; you can redistribute it and/or modify it
# under the terms and conditions of the GNU General Public License,
@@ -15,16 +16,82 @@ from __future__ import print_function
import os
import sys
import struct
+import argparse
+
+from libxed import LibXED
+from ctypes import create_string_buffer, addressof
sys.path.append(os.environ['PERF_EXEC_PATH'] + \
'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
-# These perf imports are not used at present
-#from perf_trace_context import *
-#from Core import *
+from perf_trace_context import perf_set_itrace_options, \
+ perf_sample_insn, perf_sample_srccode
+
+try:
+ broken_pipe_exception = BrokenPipeError
+except:
+ broken_pipe_exception = IOError
+
+glb_switch_str = None
+glb_switch_printed = True
+glb_insn = False
+glb_disassembler = None
+glb_src = False
+glb_source_file_name = None
+glb_line_number = None
+glb_dso = None
+
+def get_optional_null(perf_dict, field):
+ if field in perf_dict:
+ return perf_dict[field]
+ return ""
+
+def get_optional_zero(perf_dict, field):
+ if field in perf_dict:
+ return perf_dict[field]
+ return 0
+
+def get_optional_bytes(perf_dict, field):
+ if field in perf_dict:
+ return perf_dict[field]
+ return bytes()
+
+def get_optional(perf_dict, field):
+ if field in perf_dict:
+ return perf_dict[field]
+ return "[unknown]"
+
+def get_offset(perf_dict, field):
+ if field in perf_dict:
+ return "+%#x" % perf_dict[field]
+ return ""
def trace_begin():
- print("Intel PT Power Events and PTWRITE")
+ ap = argparse.ArgumentParser(usage = "", add_help = False)
+ ap.add_argument("--insn-trace", action='store_true')
+ ap.add_argument("--src-trace", action='store_true')
+ global glb_args
+ global glb_insn
+ global glb_src
+ glb_args = ap.parse_args()
+ if glb_args.insn_trace:
+ print("Intel PT Instruction Trace")
+ itrace = "i0nsepwx"
+ glb_insn = True
+ elif glb_args.src_trace:
+ print("Intel PT Source Trace")
+ itrace = "i0nsepwx"
+ glb_insn = True
+ glb_src = True
+ else:
+ print("Intel PT Branch Trace, Power Events and PTWRITE")
+ itrace = "bepwx"
+ global glb_disassembler
+ try:
+ glb_disassembler = LibXED()
+ except:
+ glb_disassembler = None
+ perf_set_itrace_options(perf_script_context, itrace)
def trace_end():
print("End")
@@ -77,58 +144,212 @@ def print_pwrx(raw_buf):
print("deepest cstate: %u last cstate: %u wake reason: %#x" %
(deepest_cstate, last_cstate, wake_reason), end=' ')
-def print_common_start(comm, sample, name):
+def print_psb(raw_buf):
+ data = struct.unpack_from("<IQ", raw_buf)
+ offset = data[1]
+ print("offset: %#x" % (offset), end=' ')
+
+def common_start_str(comm, sample):
ts = sample["time"]
cpu = sample["cpu"]
pid = sample["pid"]
tid = sample["tid"]
- print("%16s %5u/%-5u [%03u] %9u.%09u %7s:" %
- (comm, pid, tid, cpu, ts / 1000000000, ts %1000000000, name),
- end=' ')
+ return "%16s %5u/%-5u [%03u] %9u.%09u " % (comm, pid, tid, cpu, ts / 1000000000, ts %1000000000)
+
+def print_common_start(comm, sample, name):
+ flags_disp = get_optional_null(sample, "flags_disp")
+ # Unused fields:
+ # period = sample["period"]
+ # phys_addr = sample["phys_addr"]
+ # weight = sample["weight"]
+ # transaction = sample["transaction"]
+ # cpumode = get_optional_zero(sample, "cpumode")
+ print(common_start_str(comm, sample) + "%7s %19s" % (name, flags_disp), end=' ')
+
+def print_instructions_start(comm, sample):
+ if "x" in get_optional_null(sample, "flags"):
+ print(common_start_str(comm, sample) + "x", end=' ')
+ else:
+ print(common_start_str(comm, sample), end=' ')
+
+def disassem(insn, ip):
+ inst = glb_disassembler.Instruction()
+ glb_disassembler.SetMode(inst, 0) # Assume 64-bit
+ buf = create_string_buffer(64)
+ buf.value = insn
+ return glb_disassembler.DisassembleOne(inst, addressof(buf), len(insn), ip)
+
+def print_common_ip(param_dict, sample, symbol, dso):
+ ip = sample["ip"]
+ offs = get_offset(param_dict, "symoff")
+ if "cyc_cnt" in sample:
+ cyc_cnt = sample["cyc_cnt"]
+ insn_cnt = get_optional_zero(sample, "insn_cnt")
+ ipc_str = " IPC: %#.2f (%u/%u)" % (insn_cnt / cyc_cnt, insn_cnt, cyc_cnt)
+ else:
+ ipc_str = ""
+ if glb_insn and glb_disassembler is not None:
+ insn = perf_sample_insn(perf_script_context)
+ if insn and len(insn):
+ cnt, text = disassem(insn, ip)
+ byte_str = ("%x" % ip).rjust(16)
+ if sys.version_info.major >= 3:
+ for k in range(cnt):
+ byte_str += " %02x" % insn[k]
+ else:
+ for k in xrange(cnt):
+ byte_str += " %02x" % ord(insn[k])
+ print("%-40s %-30s" % (byte_str, text), end=' ')
+ print("%s%s (%s)" % (symbol, offs, dso), end=' ')
+ else:
+ print("%16x %s%s (%s)" % (ip, symbol, offs, dso), end=' ')
+ if "addr_correlates_sym" in sample:
+ addr = sample["addr"]
+ dso = get_optional(sample, "addr_dso")
+ symbol = get_optional(sample, "addr_symbol")
+ offs = get_offset(sample, "addr_symoff")
+ print("=> %x %s%s (%s)%s" % (addr, symbol, offs, dso, ipc_str))
+ else:
+ print(ipc_str)
-def print_common_ip(sample, symbol, dso):
+def print_srccode(comm, param_dict, sample, symbol, dso, with_insn):
ip = sample["ip"]
- print("%16x %s (%s)" % (ip, symbol, dso))
+ if symbol == "[unknown]":
+ start_str = common_start_str(comm, sample) + ("%x" % ip).rjust(16).ljust(40)
+ else:
+ offs = get_offset(param_dict, "symoff")
+ start_str = common_start_str(comm, sample) + (symbol + offs).ljust(40)
-def process_event(param_dict):
+ if with_insn and glb_insn and glb_disassembler is not None:
+ insn = perf_sample_insn(perf_script_context)
+ if insn and len(insn):
+ cnt, text = disassem(insn, ip)
+ start_str += text.ljust(30)
+
+ global glb_source_file_name
+ global glb_line_number
+ global glb_dso
+
+ source_file_name, line_number, source_line = perf_sample_srccode(perf_script_context)
+ if source_file_name:
+ if glb_line_number == line_number and glb_source_file_name == source_file_name:
+ src_str = ""
+ else:
+ if len(source_file_name) > 40:
+ src_file = ("..." + source_file_name[-37:]) + " "
+ else:
+ src_file = source_file_name.ljust(41)
+ if source_line is None:
+ src_str = src_file + str(line_number).rjust(4) + " <source not found>"
+ else:
+ src_str = src_file + str(line_number).rjust(4) + " " + source_line
+ glb_dso = None
+ elif dso == glb_dso:
+ src_str = ""
+ else:
+ src_str = dso
+ glb_dso = dso
+
+ glb_line_number = line_number
+ glb_source_file_name = source_file_name
+
+ print(start_str, src_str)
+
+def do_process_event(param_dict):
+ global glb_switch_printed
+ if not glb_switch_printed:
+ print(glb_switch_str)
+ glb_switch_printed = True
event_attr = param_dict["attr"]
- sample = param_dict["sample"]
- raw_buf = param_dict["raw_buf"]
+ sample = param_dict["sample"]
+ raw_buf = param_dict["raw_buf"]
comm = param_dict["comm"]
name = param_dict["ev_name"]
+ # Unused fields:
+ # callchain = param_dict["callchain"]
+ # brstack = param_dict["brstack"]
+ # brstacksym = param_dict["brstacksym"]
# Symbol and dso info are not always resolved
- if "dso" in param_dict:
- dso = param_dict["dso"]
- else:
- dso = "[unknown]"
-
- if "symbol" in param_dict:
- symbol = param_dict["symbol"]
- else:
- symbol = "[unknown]"
+ dso = get_optional(param_dict, "dso")
+ symbol = get_optional(param_dict, "symbol")
- if name == "ptwrite":
+ if name[0:12] == "instructions":
+ if glb_src:
+ print_srccode(comm, param_dict, sample, symbol, dso, True)
+ else:
+ print_instructions_start(comm, sample)
+ print_common_ip(param_dict, sample, symbol, dso)
+ elif name[0:8] == "branches":
+ if glb_src:
+ print_srccode(comm, param_dict, sample, symbol, dso, False)
+ else:
+ print_common_start(comm, sample, name)
+ print_common_ip(param_dict, sample, symbol, dso)
+ elif name == "ptwrite":
print_common_start(comm, sample, name)
print_ptwrite(raw_buf)
- print_common_ip(sample, symbol, dso)
+ print_common_ip(param_dict, sample, symbol, dso)
elif name == "cbr":
print_common_start(comm, sample, name)
print_cbr(raw_buf)
- print_common_ip(sample, symbol, dso)
+ print_common_ip(param_dict, sample, symbol, dso)
elif name == "mwait":
print_common_start(comm, sample, name)
print_mwait(raw_buf)
- print_common_ip(sample, symbol, dso)
+ print_common_ip(param_dict, sample, symbol, dso)
elif name == "pwre":
print_common_start(comm, sample, name)
print_pwre(raw_buf)
- print_common_ip(sample, symbol, dso)
+ print_common_ip(param_dict, sample, symbol, dso)
elif name == "exstop":
print_common_start(comm, sample, name)
print_exstop(raw_buf)
- print_common_ip(sample, symbol, dso)
+ print_common_ip(param_dict, sample, symbol, dso)
elif name == "pwrx":
print_common_start(comm, sample, name)
print_pwrx(raw_buf)
- print_common_ip(sample, symbol, dso)
+ print_common_ip(param_dict, sample, symbol, dso)
+ elif name == "psb":
+ print_common_start(comm, sample, name)
+ print_psb(raw_buf)
+ print_common_ip(param_dict, sample, symbol, dso)
+ else:
+ print_common_start(comm, sample, name)
+ print_common_ip(param_dict, sample, symbol, dso)
+
+def process_event(param_dict):
+ try:
+ do_process_event(param_dict)
+ except broken_pipe_exception:
+ # Stop python printing broken pipe errors and traceback
+ sys.stdout = open(os.devnull, 'w')
+ sys.exit(1)
+
+def auxtrace_error(typ, code, cpu, pid, tid, ip, ts, msg, cpumode, *x):
+ try:
+ print("%16s %5u/%-5u [%03u] %9u.%09u error type %u code %u: %s ip 0x%16x" %
+ ("Trace error", pid, tid, cpu, ts / 1000000000, ts %1000000000, typ, code, msg, ip))
+ except broken_pipe_exception:
+ # Stop python printing broken pipe errors and traceback
+ sys.stdout = open(os.devnull, 'w')
+ sys.exit(1)
+
+def context_switch(ts, cpu, pid, tid, np_pid, np_tid, machine_pid, out, out_preempt, *x):
+ global glb_switch_printed
+ global glb_switch_str
+ if out:
+ out_str = "Switch out "
+ else:
+ out_str = "Switch In "
+ if out_preempt:
+ preempt_str = "preempt"
+ else:
+ preempt_str = ""
+ if machine_pid == -1:
+ machine_str = ""
+ else:
+ machine_str = "machine PID %d" % machine_pid
+ glb_switch_str = "%16s %5d/%-5d [%03u] %9u.%09u %5d/%-5d %s %s" % \
+ (out_str, pid, tid, cpu, ts / 1000000000, ts %1000000000, np_pid, np_tid, machine_str, preempt_str)
+ glb_switch_printed = False
diff --git a/tools/perf/scripts/python/libxed.py b/tools/perf/scripts/python/libxed.py
new file mode 100644
index 000000000000..2c70a5a7eb9c
--- /dev/null
+++ b/tools/perf/scripts/python/libxed.py
@@ -0,0 +1,107 @@
+#!/usr/bin/env python
+# SPDX-License-Identifier: GPL-2.0
+# libxed.py: Python wrapper for libxed.so
+# Copyright (c) 2014-2021, Intel Corporation.
+
+# To use Intel XED, libxed.so must be present. To build and install
+# libxed.so:
+# git clone https://github.com/intelxed/mbuild.git mbuild
+# git clone https://github.com/intelxed/xed
+# cd xed
+# ./mfile.py --share
+# sudo ./mfile.py --prefix=/usr/local install
+# sudo ldconfig
+#
+
+import sys
+
+from ctypes import CDLL, Structure, create_string_buffer, addressof, sizeof, \
+ c_void_p, c_bool, c_byte, c_char, c_int, c_uint, c_longlong, c_ulonglong
+
+# XED Disassembler
+
+class xed_state_t(Structure):
+
+ _fields_ = [
+ ("mode", c_int),
+ ("width", c_int)
+ ]
+
+class XEDInstruction():
+
+ def __init__(self, libxed):
+ # Current xed_decoded_inst_t structure is 192 bytes. Use 512 to allow for future expansion
+ xedd_t = c_byte * 512
+ self.xedd = xedd_t()
+ self.xedp = addressof(self.xedd)
+ libxed.xed_decoded_inst_zero(self.xedp)
+ self.state = xed_state_t()
+ self.statep = addressof(self.state)
+ # Buffer for disassembled instruction text
+ self.buffer = create_string_buffer(256)
+ self.bufferp = addressof(self.buffer)
+
+class LibXED():
+
+ def __init__(self):
+ try:
+ self.libxed = CDLL("libxed.so")
+ except:
+ self.libxed = None
+ if not self.libxed:
+ self.libxed = CDLL("/usr/local/lib/libxed.so")
+
+ self.xed_tables_init = self.libxed.xed_tables_init
+ self.xed_tables_init.restype = None
+ self.xed_tables_init.argtypes = []
+
+ self.xed_decoded_inst_zero = self.libxed.xed_decoded_inst_zero
+ self.xed_decoded_inst_zero.restype = None
+ self.xed_decoded_inst_zero.argtypes = [ c_void_p ]
+
+ self.xed_operand_values_set_mode = self.libxed.xed_operand_values_set_mode
+ self.xed_operand_values_set_mode.restype = None
+ self.xed_operand_values_set_mode.argtypes = [ c_void_p, c_void_p ]
+
+ self.xed_decoded_inst_zero_keep_mode = self.libxed.xed_decoded_inst_zero_keep_mode
+ self.xed_decoded_inst_zero_keep_mode.restype = None
+ self.xed_decoded_inst_zero_keep_mode.argtypes = [ c_void_p ]
+
+ self.xed_decode = self.libxed.xed_decode
+ self.xed_decode.restype = c_int
+ self.xed_decode.argtypes = [ c_void_p, c_void_p, c_uint ]
+
+ self.xed_format_context = self.libxed.xed_format_context
+ self.xed_format_context.restype = c_uint
+ self.xed_format_context.argtypes = [ c_int, c_void_p, c_void_p, c_int, c_ulonglong, c_void_p, c_void_p ]
+
+ self.xed_tables_init()
+
+ def Instruction(self):
+ return XEDInstruction(self)
+
+ def SetMode(self, inst, mode):
+ if mode:
+ inst.state.mode = 4 # 32-bit
+ inst.state.width = 4 # 4 bytes
+ else:
+ inst.state.mode = 1 # 64-bit
+ inst.state.width = 8 # 8 bytes
+ self.xed_operand_values_set_mode(inst.xedp, inst.statep)
+
+ def DisassembleOne(self, inst, bytes_ptr, bytes_cnt, ip):
+ self.xed_decoded_inst_zero_keep_mode(inst.xedp)
+ err = self.xed_decode(inst.xedp, bytes_ptr, bytes_cnt)
+ if err:
+ return 0, ""
+ # Use AT&T mode (2), alternative is Intel (3)
+ ok = self.xed_format_context(2, inst.xedp, inst.bufferp, sizeof(inst.buffer), ip, 0, 0)
+ if not ok:
+ return 0, ""
+ if sys.version_info[0] == 2:
+ result = inst.buffer.value
+ else:
+ result = inst.buffer.value.decode()
+ # Return instruction length and the disassembled instruction text
+ # For now, assume the length is in byte 166
+ return inst.xedd[166], result
diff --git a/tools/perf/tests/attr/base-record b/tools/perf/tests/attr/base-record
index 4a7b8deef3fd..8c10955eff93 100644
--- a/tools/perf/tests/attr/base-record
+++ b/tools/perf/tests/attr/base-record
@@ -16,7 +16,7 @@ pinned=0
exclusive=0
exclude_user=0
exclude_kernel=0|1
-exclude_hv=0
+exclude_hv=0|1
exclude_idle=0
mmap=1
comm=1
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index c4b888f18e9c..41e3cf6bb66c 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -510,8 +510,8 @@ static const char *shell_test__description(char *description, size_t size,
return description ? strim(description + 1) : NULL;
}
-#define for_each_shell_test(dir, base, ent) \
- while ((ent = readdir(dir)) != NULL) \
+#define for_each_shell_test(entlist, nr, base, ent) \
+ for (int __i = 0; __i < nr && (ent = entlist[__i]); __i++) \
if (!is_directory(base, ent) && ent->d_name[0] != '.')
static const char *shell_tests__dir(char *path, size_t size)
@@ -538,8 +538,9 @@ static const char *shell_tests__dir(char *path, size_t size)
static int shell_tests__max_desc_width(void)
{
- DIR *dir;
+ struct dirent **entlist;
struct dirent *ent;
+ int n_dirs;
char path_dir[PATH_MAX];
const char *path = shell_tests__dir(path_dir, sizeof(path_dir));
int width = 0;
@@ -547,11 +548,11 @@ static int shell_tests__max_desc_width(void)
if (path == NULL)
return -1;
- dir = opendir(path);
- if (!dir)
+ n_dirs = scandir(path, &entlist, NULL, alphasort);
+ if (n_dirs == -1)
return -1;
- for_each_shell_test(dir, path, ent) {
+ for_each_shell_test(entlist, n_dirs, path, ent) {
char bf[256];
const char *desc = shell_test__description(bf, sizeof(bf), path, ent->d_name);
@@ -563,7 +564,8 @@ static int shell_tests__max_desc_width(void)
}
}
- closedir(dir);
+ free(entlist);
+
return width;
}
@@ -578,7 +580,10 @@ static int shell_test__run(struct test *test, int subdir __maybe_unused)
char script[PATH_MAX];
struct shell_test *st = test->priv;
- path__join(script, sizeof(script), st->dir, st->file);
+ path__join(script, sizeof(script) - 3, st->dir, st->file);
+
+ if (verbose)
+ strncat(script, " -v", sizeof(script) - strlen(script) - 1);
err = system(script);
if (!err)
@@ -589,8 +594,9 @@ static int shell_test__run(struct test *test, int subdir __maybe_unused)
static int run_shell_tests(int argc, const char *argv[], int i, int width)
{
- DIR *dir;
+ struct dirent **entlist;
struct dirent *ent;
+ int n_dirs;
char path_dir[PATH_MAX];
struct shell_test st = {
.dir = shell_tests__dir(path_dir, sizeof(path_dir)),
@@ -599,14 +605,14 @@ static int run_shell_tests(int argc, const char *argv[], int i, int width)
if (st.dir == NULL)
return -1;
- dir = opendir(st.dir);
- if (!dir) {
+ n_dirs = scandir(st.dir, &entlist, NULL, alphasort);
+ if (n_dirs == -1) {
pr_err("failed to open shell test directory: %s\n",
st.dir);
return -1;
}
- for_each_shell_test(dir, st.dir, ent) {
+ for_each_shell_test(entlist, n_dirs, st.dir, ent) {
int curr = i++;
char desc[256];
struct test test = {
@@ -623,7 +629,7 @@ static int run_shell_tests(int argc, const char *argv[], int i, int width)
test_and_print(&test, false, -1);
}
- closedir(dir);
+ free(entlist);
return 0;
}
@@ -722,19 +728,20 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
static int perf_test__list_shell(int argc, const char **argv, int i)
{
- DIR *dir;
+ struct dirent **entlist;
struct dirent *ent;
+ int n_dirs;
char path_dir[PATH_MAX];
const char *path = shell_tests__dir(path_dir, sizeof(path_dir));
if (path == NULL)
return -1;
- dir = opendir(path);
- if (!dir)
+ n_dirs = scandir(path, &entlist, NULL, alphasort);
+ if (n_dirs == -1)
return -1;
- for_each_shell_test(dir, path, ent) {
+ for_each_shell_test(entlist, n_dirs, path, ent) {
int curr = i++;
char bf[256];
struct test t = {
@@ -747,7 +754,7 @@ static int perf_test__list_shell(int argc, const char **argv, int i)
pr_info("%2d: %s\n", i, t.desc);
}
- closedir(dir);
+ free(entlist);
return 0;
}
diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c
index 83638097c3bc..a288035eb362 100644
--- a/tools/perf/tests/dwarf-unwind.c
+++ b/tools/perf/tests/dwarf-unwind.c
@@ -17,10 +17,6 @@
#include "callchain.h"
#include "util/synthetic-events.h"
-#if defined (__x86_64__) || defined (__i386__) || defined (__powerpc__)
-#include "arch-tests.h"
-#endif
-
/* For bsearch. We try to unwind functions in shared object. */
#include <stdlib.h>
diff --git a/tools/perf/tests/make b/tools/perf/tests/make
index 94bd5d215d94..da013e90a945 100644
--- a/tools/perf/tests/make
+++ b/tools/perf/tests/make
@@ -84,9 +84,11 @@ make_no_libaudit := NO_LIBAUDIT=1
make_no_libbionic := NO_LIBBIONIC=1
make_no_auxtrace := NO_AUXTRACE=1
make_no_libbpf := NO_LIBBPF=1
+make_libbpf_dynamic := LIBBPF_DYNAMIC=1
make_no_libbpf_DEBUG := NO_LIBBPF=1 DEBUG=1
make_no_libcrypto := NO_LIBCRYPTO=1
make_with_babeltrace:= LIBBABELTRACE=1
+make_with_coresight := CORESIGHT=1
make_no_sdt := NO_SDT=1
make_no_syscall_tbl := NO_SYSCALL_TABLE=1
make_with_clangllvm := LIBCLANGLLVM=1
@@ -148,11 +150,13 @@ run += make_no_libaudit
run += make_no_libbionic
run += make_no_auxtrace
run += make_no_libbpf
+run += make_libbpf_dynamic
run += make_no_libbpf_DEBUG
run += make_no_libcrypto
run += make_no_sdt
run += make_no_syscall_tbl
run += make_with_babeltrace
+run += make_with_coresight
run += make_with_clangllvm
run += make_with_libpfm4
run += make_help
@@ -266,6 +270,9 @@ test_make_install_info_O := $(test_ok)
test_make_install_pdf := $(test_ok)
test_make_install_pdf_O := $(test_ok)
+test_make_libbpf_dynamic := ldd $(PERF_O)/perf | grep -q libbpf
+test_make_libbpf_dynamic_O := ldd $$TMP_O/perf | grep -q libbpf
+
test_make_python_perf_so_O := test -f $$TMP_O/python/perf.so
test_make_perf_o_O := test -f $$TMP_O/perf.o
test_make_util_map_o_O := test -f $$TMP_O/util/map.o
diff --git a/tools/perf/tests/pfm.c b/tools/perf/tests/pfm.c
index 76a53126efdf..acd50944f6af 100644
--- a/tools/perf/tests/pfm.c
+++ b/tools/perf/tests/pfm.c
@@ -131,8 +131,8 @@ static int test__pfm_group(void)
},
{
.events = "{},{instructions}",
- .nr_events = 0,
- .nr_groups = 0,
+ .nr_events = 1,
+ .nr_groups = 1,
},
{
.events = "{instructions},{instructions}",
@@ -155,6 +155,16 @@ static int test__pfm_group(void)
.nr_events = 3,
.nr_groups = 1,
},
+ {
+ .events = "instructions}",
+ .nr_events = 1,
+ .nr_groups = 0,
+ },
+ {
+ .events = "{{instructions}}",
+ .nr_events = 0,
+ .nr_groups = 0,
+ },
};
for (i = 0; i < ARRAY_SIZE(table); i++) {
diff --git a/tools/perf/tests/shell/stat_bpf_counters.sh b/tools/perf/tests/shell/stat_bpf_counters.sh
index 22eb31e48ca7..2aed20dc2262 100755
--- a/tools/perf/tests/shell/stat_bpf_counters.sh
+++ b/tools/perf/tests/shell/stat_bpf_counters.sh
@@ -11,9 +11,9 @@ compare_number()
second_num=$2
# upper bound is first_num * 110%
- upper=$(( $first_num + $first_num / 10 ))
+ upper=$(expr $first_num + $first_num / 10 )
# lower bound is first_num * 90%
- lower=$(( $first_num - $first_num / 10 ))
+ lower=$(expr $first_num - $first_num / 10 )
if [ $second_num -gt $upper ] || [ $second_num -lt $lower ]; then
echo "The difference between $first_num and $second_num are greater than 10%."
@@ -22,10 +22,24 @@ compare_number()
}
# skip if --bpf-counters is not supported
-perf stat --bpf-counters true > /dev/null 2>&1 || exit 2
+if ! perf stat --bpf-counters true > /dev/null 2>&1; then
+ if [ "$1" == "-v" ]; then
+ echo "Skipping: --bpf-counters not supported"
+ perf --no-pager stat --bpf-counters true || true
+ fi
+ exit 2
+fi
base_cycles=$(perf stat --no-big-num -e cycles -- perf bench sched messaging -g 1 -l 100 -t 2>&1 | awk '/cycles/ {print $1}')
+if [ "$base_cycles" == "<not" ]; then
+ echo "Skipping: cycles event not counted"
+ exit 2
+fi
bpf_cycles=$(perf stat --no-big-num --bpf-counters -e cycles -- perf bench sched messaging -g 1 -l 100 -t 2>&1 | awk '/cycles/ {print $1}')
+if [ "$bpf_cycles" == "<not" ]; then
+ echo "Failed: cycles not counted with --bpf-counters"
+ exit 1
+fi
compare_number $base_cycles $bpf_cycles
exit 0
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index b85f005308a3..1100dd55b657 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -133,14 +133,12 @@ bool test__bp_account_is_supported(void);
bool test__wp_is_supported(void);
bool test__tsc_is_supported(void);
-#if defined(__arm__) || defined(__aarch64__)
#ifdef HAVE_DWARF_UNWIND_SUPPORT
struct thread;
struct perf_sample;
int test__arch_unwind_sample(struct perf_sample *sample,
struct thread *thread);
#endif
-#endif
#if defined(__arm__)
int test__vectors_page(struct test *test, int subtest);
diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h
index b8fc5c53ba6f..0d8e3dcb7f88 100644
--- a/tools/perf/trace/beauty/include/linux/socket.h
+++ b/tools/perf/trace/beauty/include/linux/socket.h
@@ -438,6 +438,4 @@ extern int __sys_socketpair(int family, int type, int protocol,
int __user *usockvec);
extern int __sys_shutdown_sock(struct socket *sock, int how);
extern int __sys_shutdown(int fd, int how);
-
-extern struct ns_common *get_net_ns(struct ns_common *ns);
#endif /* _LINUX_SOCKET_H */
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index ad0a70f0edaf..f5509a958e38 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -343,6 +343,29 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser,
browser->curr_hot = rb_last(&browser->entries);
}
+static struct annotation_line *annotate_browser__find_next_asm_line(
+ struct annotate_browser *browser,
+ struct annotation_line *al)
+{
+ struct annotation_line *it = al;
+
+ /* find next asm line */
+ list_for_each_entry_continue(it, browser->b.top, node) {
+ if (it->idx_asm >= 0)
+ return it;
+ }
+
+ /* no asm line found forwards, try backwards */
+ it = al;
+ list_for_each_entry_continue_reverse(it, browser->b.top, node) {
+ if (it->idx_asm >= 0)
+ return it;
+ }
+
+ /* There are no asm lines */
+ return NULL;
+}
+
static bool annotate_browser__toggle_source(struct annotate_browser *browser)
{
struct annotation *notes = browser__annotation(&browser->b);
@@ -363,9 +386,12 @@ static bool annotate_browser__toggle_source(struct annotate_browser *browser)
browser->b.index = al->idx;
} else {
if (al->idx_asm < 0) {
- ui_helpline__puts("Only available for assembly lines.");
- browser->b.seek(&browser->b, -offset, SEEK_CUR);
- return false;
+ /* move cursor to next asm line */
+ al = annotate_browser__find_next_asm_line(browser, al);
+ if (!al) {
+ browser->b.seek(&browser->b, -offset, SEEK_CUR);
+ return false;
+ }
}
if (al->idx_asm < offset)
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index b64bdc1a7026..1a909b53dc15 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -126,6 +126,7 @@ perf-y += parse-regs-options.o
perf-y += parse-sublevel-options.o
perf-y += term.o
perf-y += help-unknown-cmd.o
+perf-y += dlfilter.o
perf-y += mem-events.o
perf-y += vsprintf.o
perf-y += units.o
@@ -216,7 +217,7 @@ $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-flex.h: util/parse-
$(OUTPUT)util/parse-events-bison.c $(OUTPUT)util/parse-events-bison.h: util/parse-events.y
$(call rule_mkdir)
- $(Q)$(call echo-cmd,bison)$(BISON) -v $< -d $(PARSER_DEBUG_BISON) \
+ $(Q)$(call echo-cmd,bison)$(BISON) -v $< -d $(PARSER_DEBUG_BISON) $(BISON_FILE_PREFIX_MAP) \
-o $(OUTPUT)util/parse-events-bison.c -p parse_events_
$(OUTPUT)util/expr-flex.c $(OUTPUT)util/expr-flex.h: util/expr.l $(OUTPUT)util/expr-bison.c
@@ -226,7 +227,7 @@ $(OUTPUT)util/expr-flex.c $(OUTPUT)util/expr-flex.h: util/expr.l $(OUTPUT)util/e
$(OUTPUT)util/expr-bison.c $(OUTPUT)util/expr-bison.h: util/expr.y
$(call rule_mkdir)
- $(Q)$(call echo-cmd,bison)$(BISON) -v $< -d $(PARSER_DEBUG_BISON) \
+ $(Q)$(call echo-cmd,bison)$(BISON) -v $< -d $(PARSER_DEBUG_BISON) $(BISON_FILE_PREFIX_MAP) \
-o $(OUTPUT)util/expr-bison.c -p expr_
$(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-flex.h: util/pmu.l $(OUTPUT)util/pmu-bison.c
@@ -236,7 +237,7 @@ $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-flex.h: util/pmu.l $(OUTPUT)util/pmu-
$(OUTPUT)util/pmu-bison.c $(OUTPUT)util/pmu-bison.h: util/pmu.y
$(call rule_mkdir)
- $(Q)$(call echo-cmd,bison)$(BISON) -v $< -d $(PARSER_DEBUG_BISON) \
+ $(Q)$(call echo-cmd,bison)$(BISON) -v $< -d $(PARSER_DEBUG_BISON) $(BISON_FILE_PREFIX_MAP) \
-o $(OUTPUT)util/pmu-bison.c -p perf_pmu_
FLEX_GE_26 := $(shell expr $(shell $(FLEX) --version | sed -e 's/flex \([0-9]\+\).\([0-9]\+\)/\1\2/g') \>\= 26)
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index 2539d4baec44..58b7069c5a5f 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -26,6 +26,7 @@
#include "symbol.h"
#include "thread.h"
#include "thread-stack.h"
+#include "tsc.h"
#include "tool.h"
#include "util/synthetic-events.h"
@@ -45,6 +46,8 @@ struct arm_spe {
struct machine *machine;
u32 pmu_type;
+ struct perf_tsc_conversion tc;
+
u8 timeless_decoding;
u8 data_queued;
@@ -231,7 +234,7 @@ static void arm_spe_prep_sample(struct arm_spe *spe,
struct arm_spe_record *record = &speq->decoder->record;
if (!spe->timeless_decoding)
- sample->time = speq->timestamp;
+ sample->time = tsc_to_perf_time(record->timestamp, &spe->tc);
sample->ip = record->from_ip;
sample->cpumode = arm_spe_cpumode(spe, sample->ip);
@@ -431,12 +434,36 @@ static int arm_spe_sample(struct arm_spe_queue *speq)
static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp)
{
struct arm_spe *spe = speq->spe;
+ struct arm_spe_record *record;
int ret;
if (!spe->kernel_start)
spe->kernel_start = machine__kernel_start(spe->machine);
while (1) {
+ /*
+ * The usual logic is firstly to decode the packets, and then
+ * based the record to synthesize sample; but here the flow is
+ * reversed: it calls arm_spe_sample() for synthesizing samples
+ * prior to arm_spe_decode().
+ *
+ * Two reasons for this code logic:
+ * 1. Firstly, when setup queue in arm_spe__setup_queue(), it
+ * has decoded trace data and generated a record, but the record
+ * is left to generate sample until run to here, so it's correct
+ * to synthesize sample for the left record.
+ * 2. After decoding trace data, it needs to compare the record
+ * timestamp with the coming perf event, if the record timestamp
+ * is later than the perf event, it needs bail out and pushs the
+ * record into auxtrace heap, thus the record can be deferred to
+ * synthesize sample until run to here at the next time; so this
+ * can correlate samples between Arm SPE trace data and other
+ * perf events with correct time ordering.
+ */
+ ret = arm_spe_sample(speq);
+ if (ret)
+ return ret;
+
ret = arm_spe_decode(speq->decoder);
if (!ret) {
pr_debug("No data or all data has been processed.\n");
@@ -450,10 +477,17 @@ static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp)
if (ret < 0)
continue;
- ret = arm_spe_sample(speq);
- if (ret)
- return ret;
+ record = &speq->decoder->record;
+ /* Update timestamp for the last record */
+ if (record->timestamp > speq->timestamp)
+ speq->timestamp = record->timestamp;
+
+ /*
+ * If the timestamp of the queue is later than timestamp of the
+ * coming perf event, bail out so can allow the perf event to
+ * be processed ahead.
+ */
if (!spe->timeless_decoding && speq->timestamp >= *timestamp) {
*timestamp = speq->timestamp;
return 0;
@@ -666,7 +700,7 @@ static int arm_spe_process_event(struct perf_session *session,
}
if (sample->time && (sample->time != (u64) -1))
- timestamp = sample->time;
+ timestamp = perf_time_to_tsc(sample->time, &spe->tc);
else
timestamp = 0;
@@ -683,11 +717,7 @@ static int arm_spe_process_event(struct perf_session *session,
sample->time);
}
} else if (timestamp) {
- if (event->header.type == PERF_RECORD_EXIT) {
- err = arm_spe_process_queues(spe, timestamp);
- if (err)
- return err;
- }
+ err = arm_spe_process_queues(spe, timestamp);
}
return err;
@@ -1006,6 +1036,7 @@ int arm_spe_process_auxtrace_info(union perf_event *event,
{
struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
size_t min_sz = sizeof(u64) * ARM_SPE_AUXTRACE_PRIV_MAX;
+ struct perf_record_time_conv *tc = &session->time_conv;
struct arm_spe *spe;
int err;
@@ -1027,6 +1058,28 @@ int arm_spe_process_auxtrace_info(union perf_event *event,
spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
spe->timeless_decoding = arm_spe__is_timeless_decoding(spe);
+
+ /*
+ * The synthesized event PERF_RECORD_TIME_CONV has been handled ahead
+ * and the parameters for hardware clock are stored in the session
+ * context. Passes these parameters to the struct perf_tsc_conversion
+ * in "spe->tc", which is used for later conversion between clock
+ * counter and timestamp.
+ *
+ * For backward compatibility, copies the fields starting from
+ * "time_cycles" only if they are contained in the event.
+ */
+ spe->tc.time_shift = tc->time_shift;
+ spe->tc.time_mult = tc->time_mult;
+ spe->tc.time_zero = tc->time_zero;
+
+ if (event_contains(*tc, time_cycles)) {
+ spe->tc.time_cycles = tc->time_cycles;
+ spe->tc.time_mask = tc->time_mask;
+ spe->tc.cap_user_time_zero = tc->cap_user_time_zero;
+ spe->tc.cap_user_time_short = tc->cap_user_time_short;
+ }
+
spe->auxtrace.process_event = arm_spe_process_event;
spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event;
spe->auxtrace.flush_events = arm_spe_flush;
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 1b4091a3b508..9350eeb3a3fc 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -1120,8 +1120,9 @@ int auxtrace_queue_data(struct perf_session *session, bool samples, bool events)
auxtrace_queue_data_cb, &qd);
}
-void *auxtrace_buffer__get_data(struct auxtrace_buffer *buffer, int fd)
+void *auxtrace_buffer__get_data_rw(struct auxtrace_buffer *buffer, int fd, bool rw)
{
+ int prot = rw ? PROT_READ | PROT_WRITE : PROT_READ;
size_t adj = buffer->data_offset & (page_size - 1);
size_t size = buffer->size + adj;
off_t file_offset = buffer->data_offset - adj;
@@ -1130,7 +1131,7 @@ void *auxtrace_buffer__get_data(struct auxtrace_buffer *buffer, int fd)
if (buffer->data)
return buffer->data;
- addr = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, file_offset);
+ addr = mmap(NULL, size, prot, MAP_SHARED, fd, file_offset);
if (addr == MAP_FAILED)
return NULL;
@@ -1404,10 +1405,9 @@ static int get_flags(const char **ptr, unsigned int *plus_flags, unsigned int *m
* about the options parsed here, which is introduced after this cset,
* when support in 'perf script' for these options is introduced.
*/
-int itrace_parse_synth_opts(const struct option *opt, const char *str,
- int unset)
+int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts,
+ const char *str, int unset)
{
- struct itrace_synth_opts *synth_opts = opt->value;
const char *p;
char *endptr;
bool period_type_set = false;
@@ -1569,6 +1569,9 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
case 'q':
synth_opts->quick += 1;
break;
+ case 'Z':
+ synth_opts->timeless_decoding = true;
+ break;
case ' ':
case ',':
break;
@@ -1592,6 +1595,11 @@ out_err:
return -EINVAL;
}
+int itrace_parse_synth_opts(const struct option *opt, const char *str, int unset)
+{
+ return itrace_do_parse_synth_opts(opt->value, str, unset);
+}
+
static const char * const auxtrace_error_type_name[] = {
[PERF_AUXTRACE_ERROR_ITRACE] = "instruction trace",
};
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index a4fbb33b7245..cc1c1b9cec9c 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -89,6 +89,10 @@ enum itrace_period_type {
* @tlb: whether to synthesize TLB events
* @remote_access: whether to synthesize remote access events
* @mem: whether to synthesize memory events
+ * @timeless_decoding: prefer "timeless" decoding i.e. ignore timestamps
+ * @vm_time_correlation: perform VM Time Correlation
+ * @vm_tm_corr_dry_run: VM Time Correlation dry-run
+ * @vm_tm_corr_args: VM Time Correlation implementation-specific arguments
* @callchain_sz: maximum callchain size
* @last_branch_sz: branch context size
* @period: 'instructions' events period
@@ -128,6 +132,10 @@ struct itrace_synth_opts {
bool tlb;
bool remote_access;
bool mem;
+ bool timeless_decoding;
+ bool vm_time_correlation;
+ bool vm_tm_corr_dry_run;
+ char *vm_tm_corr_args;
unsigned int callchain_sz;
unsigned int last_branch_sz;
unsigned long long period;
@@ -444,7 +452,7 @@ static inline u64 auxtrace_mmap__read_snapshot_head(struct auxtrace_mmap *mm)
u64 head = READ_ONCE(pc->aux_head);
/* Ensure all reads are done after we read the head */
- rmb();
+ smp_rmb();
return head;
}
@@ -458,7 +466,7 @@ static inline u64 auxtrace_mmap__read_head(struct auxtrace_mmap *mm)
#endif
/* Ensure all reads are done after we read the head */
- rmb();
+ smp_rmb();
return head;
}
@@ -470,7 +478,7 @@ static inline void auxtrace_mmap__write_tail(struct auxtrace_mmap *mm, u64 tail)
#endif
/* Ensure all reads are done before we write the tail out */
- mb();
+ smp_mb();
#if BITS_PER_LONG == 64 || !defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT)
pc->aux_tail = tail;
#else
@@ -525,7 +533,11 @@ int auxtrace_queue_data(struct perf_session *session, bool samples,
bool events);
struct auxtrace_buffer *auxtrace_buffer__next(struct auxtrace_queue *queue,
struct auxtrace_buffer *buffer);
-void *auxtrace_buffer__get_data(struct auxtrace_buffer *buffer, int fd);
+void *auxtrace_buffer__get_data_rw(struct auxtrace_buffer *buffer, int fd, bool rw);
+static inline void *auxtrace_buffer__get_data(struct auxtrace_buffer *buffer, int fd)
+{
+ return auxtrace_buffer__get_data_rw(buffer, fd, false);
+}
void auxtrace_buffer__put_data(struct auxtrace_buffer *buffer);
void auxtrace_buffer__drop_data(struct auxtrace_buffer *buffer);
void auxtrace_buffer__free(struct auxtrace_buffer *buffer);
@@ -595,6 +607,8 @@ s64 perf_event__process_auxtrace(struct perf_session *session,
union perf_event *event);
int perf_event__process_auxtrace_error(struct perf_session *session,
union perf_event *event);
+int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts,
+ const char *str, int unset);
int itrace_parse_synth_opts(const struct option *opt, const char *str,
int unset);
void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts,
@@ -691,9 +705,26 @@ int auxtrace_record__options(struct auxtrace_record *itr __maybe_unused,
return 0;
}
-#define perf_event__process_auxtrace_info 0
-#define perf_event__process_auxtrace 0
-#define perf_event__process_auxtrace_error 0
+static inline
+int perf_event__process_auxtrace_info(struct perf_session *session __maybe_unused,
+ union perf_event *event __maybe_unused)
+{
+ return 0;
+}
+
+static inline
+s64 perf_event__process_auxtrace(struct perf_session *session __maybe_unused,
+ union perf_event *event __maybe_unused)
+{
+ return 0;
+}
+
+static inline
+int perf_event__process_auxtrace_error(struct perf_session *session __maybe_unused,
+ union perf_event *event __maybe_unused)
+{
+ return 0;
+}
static inline
void perf_session__auxtrace_error_inc(struct perf_session *session
@@ -710,6 +741,14 @@ void events_stats__auxtrace_error_warn(const struct events_stats *stats
}
static inline
+int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts __maybe_unused,
+ const char *str __maybe_unused, int unset __maybe_unused)
+{
+ pr_err("AUX area tracing not supported\n");
+ return -EINVAL;
+}
+
+static inline
int itrace_parse_synth_opts(const struct option *opt __maybe_unused,
const char *str __maybe_unused,
int unset __maybe_unused)
diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c
index ddb52f748c8e..21c8e71162b1 100644
--- a/tools/perf/util/bpf_counter.c
+++ b/tools/perf/util/bpf_counter.c
@@ -7,12 +7,8 @@
#include <unistd.h>
#include <sys/file.h>
#include <sys/time.h>
-#include <sys/resource.h>
#include <linux/err.h>
#include <linux/zalloc.h>
-#include <bpf/bpf.h>
-#include <bpf/btf.h>
-#include <bpf/libbpf.h>
#include <api/fs/fs.h>
#include <perf/bpf_perf.h>
@@ -37,13 +33,6 @@ static inline void *u64_to_ptr(__u64 ptr)
return (void *)(unsigned long)ptr;
}
-static void set_max_rlimit(void)
-{
- struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
-
- setrlimit(RLIMIT_MEMLOCK, &rinf);
-}
-
static struct bpf_counter *bpf_counter_alloc(void)
{
struct bpf_counter *counter;
@@ -297,33 +286,6 @@ struct bpf_counter_ops bpf_program_profiler_ops = {
.install_pe = bpf_program_profiler__install_pe,
};
-static __u32 bpf_link_get_id(int fd)
-{
- struct bpf_link_info link_info = {0};
- __u32 link_info_len = sizeof(link_info);
-
- bpf_obj_get_info_by_fd(fd, &link_info, &link_info_len);
- return link_info.id;
-}
-
-static __u32 bpf_link_get_prog_id(int fd)
-{
- struct bpf_link_info link_info = {0};
- __u32 link_info_len = sizeof(link_info);
-
- bpf_obj_get_info_by_fd(fd, &link_info, &link_info_len);
- return link_info.prog_id;
-}
-
-static __u32 bpf_map_get_id(int fd)
-{
- struct bpf_map_info map_info = {0};
- __u32 map_info_len = sizeof(map_info);
-
- bpf_obj_get_info_by_fd(fd, &map_info, &map_info_len);
- return map_info.id;
-}
-
static bool bperf_attr_map_compatible(int attr_map_fd)
{
struct bpf_map_info map_info = {0};
@@ -385,20 +347,6 @@ static int bperf_lock_attr_map(struct target *target)
return map_fd;
}
-/* trigger the leader program on a cpu */
-static int bperf_trigger_reading(int prog_fd, int cpu)
-{
- DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
- .ctx_in = NULL,
- .ctx_size_in = 0,
- .flags = BPF_F_TEST_RUN_ON_CPU,
- .cpu = cpu,
- .retval = 0,
- );
-
- return bpf_prog_test_run_opts(prog_fd, &opts);
-}
-
static int bperf_check_target(struct evsel *evsel,
struct target *target,
enum bperf_filter_type *filter_type,
@@ -451,10 +399,10 @@ static int bperf_reload_leader_program(struct evsel *evsel, int attr_map_fd,
goto out;
}
- err = -1;
link = bpf_program__attach(skel->progs.on_switch);
- if (!link) {
+ if (IS_ERR(link)) {
pr_err("Failed to attach leader program\n");
+ err = PTR_ERR(link);
goto out;
}
@@ -521,9 +469,10 @@ static int bperf__load(struct evsel *evsel, struct target *target)
evsel->bperf_leader_link_fd = bpf_link_get_fd_by_id(entry.link_id);
if (evsel->bperf_leader_link_fd < 0 &&
- bperf_reload_leader_program(evsel, attr_map_fd, &entry))
+ bperf_reload_leader_program(evsel, attr_map_fd, &entry)) {
+ err = -1;
goto out;
-
+ }
/*
* The bpf_link holds reference to the leader program, and the
* leader program holds reference to the maps. Therefore, if
@@ -550,6 +499,7 @@ static int bperf__load(struct evsel *evsel, struct target *target)
/* Step 2: load the follower skeleton */
evsel->follower_skel = bperf_follower_bpf__open();
if (!evsel->follower_skel) {
+ err = -1;
pr_err("Failed to open follower skeleton\n");
goto out;
}
diff --git a/tools/perf/util/bpf_counter.h b/tools/perf/util/bpf_counter.h
index d6d907c3dcf9..65ebaa6694fb 100644
--- a/tools/perf/util/bpf_counter.h
+++ b/tools/perf/util/bpf_counter.h
@@ -3,6 +3,10 @@
#define __PERF_BPF_COUNTER_H 1
#include <linux/list.h>
+#include <sys/resource.h>
+#include <bpf/bpf.h>
+#include <bpf/btf.h>
+#include <bpf/libbpf.h>
struct evsel;
struct target;
@@ -76,4 +80,52 @@ static inline int bpf_counter__install_pe(struct evsel *evsel __maybe_unused,
#endif /* HAVE_BPF_SKEL */
+static inline void set_max_rlimit(void)
+{
+ struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
+
+ setrlimit(RLIMIT_MEMLOCK, &rinf);
+}
+
+static inline __u32 bpf_link_get_id(int fd)
+{
+ struct bpf_link_info link_info = { .id = 0, };
+ __u32 link_info_len = sizeof(link_info);
+
+ bpf_obj_get_info_by_fd(fd, &link_info, &link_info_len);
+ return link_info.id;
+}
+
+static inline __u32 bpf_link_get_prog_id(int fd)
+{
+ struct bpf_link_info link_info = { .id = 0, };
+ __u32 link_info_len = sizeof(link_info);
+
+ bpf_obj_get_info_by_fd(fd, &link_info, &link_info_len);
+ return link_info.prog_id;
+}
+
+static inline __u32 bpf_map_get_id(int fd)
+{
+ struct bpf_map_info map_info = { .id = 0, };
+ __u32 map_info_len = sizeof(map_info);
+
+ bpf_obj_get_info_by_fd(fd, &map_info, &map_info_len);
+ return map_info.id;
+}
+
+/* trigger the leader program on a cpu */
+static inline int bperf_trigger_reading(int prog_fd, int cpu)
+{
+ DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .ctx_in = NULL,
+ .ctx_size_in = 0,
+ .flags = BPF_F_TEST_RUN_ON_CPU,
+ .cpu = cpu,
+ .retval = 0,
+ );
+
+ return bpf_prog_test_run_opts(prog_fd, &opts);
+}
+
#endif /* __PERF_BPF_COUNTER_H */
diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index f24ab4585553..e819a4f30fc2 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -9,6 +9,7 @@
#include <linux/zalloc.h>
#include <sys/types.h>
#include <sys/stat.h>
+#include <sys/statfs.h>
#include <fcntl.h>
#include <stdlib.h>
#include <string.h>
@@ -45,6 +46,49 @@ static int open_cgroup(const char *name)
return fd;
}
+#ifdef HAVE_FILE_HANDLE
+int read_cgroup_id(struct cgroup *cgrp)
+{
+ char path[PATH_MAX + 1];
+ char mnt[PATH_MAX + 1];
+ struct {
+ struct file_handle fh;
+ uint64_t cgroup_id;
+ } handle;
+ int mount_id;
+
+ if (cgroupfs_find_mountpoint(mnt, PATH_MAX + 1, "perf_event"))
+ return -1;
+
+ scnprintf(path, PATH_MAX, "%s/%s", mnt, cgrp->name);
+
+ handle.fh.handle_bytes = sizeof(handle.cgroup_id);
+ if (name_to_handle_at(AT_FDCWD, path, &handle.fh, &mount_id, 0) < 0)
+ return -1;
+
+ cgrp->id = handle.cgroup_id;
+ return 0;
+}
+#endif /* HAVE_FILE_HANDLE */
+
+#ifndef CGROUP2_SUPER_MAGIC
+#define CGROUP2_SUPER_MAGIC 0x63677270
+#endif
+
+int cgroup_is_v2(const char *subsys)
+{
+ char mnt[PATH_MAX + 1];
+ struct statfs stbuf;
+
+ if (cgroupfs_find_mountpoint(mnt, PATH_MAX + 1, subsys))
+ return -1;
+
+ if (statfs(mnt, &stbuf) < 0)
+ return -1;
+
+ return (stbuf.f_type == CGROUP2_SUPER_MAGIC);
+}
+
static struct cgroup *evlist__find_cgroup(struct evlist *evlist, const char *str)
{
struct evsel *counter;
diff --git a/tools/perf/util/cgroup.h b/tools/perf/util/cgroup.h
index 162906f3412a..de5b272560ab 100644
--- a/tools/perf/util/cgroup.h
+++ b/tools/perf/util/cgroup.h
@@ -2,6 +2,7 @@
#ifndef __CGROUP_H__
#define __CGROUP_H__
+#include <linux/compiler.h>
#include <linux/refcount.h>
#include <linux/rbtree.h>
#include "util/env.h"
@@ -38,4 +39,15 @@ struct cgroup *cgroup__find(struct perf_env *env, uint64_t id);
void perf_env__purge_cgroups(struct perf_env *env);
+#ifdef HAVE_FILE_HANDLE
+int read_cgroup_id(struct cgroup *cgrp);
+#else
+static inline int read_cgroup_id(struct cgroup *cgrp __maybe_unused)
+{
+ return -1;
+}
+#endif /* HAVE_FILE_HANDLE */
+
+int cgroup_is_v2(const char *subsys);
+
#endif /* __CGROUP_H__ */
diff --git a/tools/perf/util/cputopo.c b/tools/perf/util/cputopo.c
index 1b52402a8923..ec77e2a7b3ca 100644
--- a/tools/perf/util/cputopo.c
+++ b/tools/perf/util/cputopo.c
@@ -12,6 +12,7 @@
#include "cpumap.h"
#include "debug.h"
#include "env.h"
+#include "pmu-hybrid.h"
#define CORE_SIB_FMT \
"%s/devices/system/cpu/cpu%d/topology/core_siblings_list"
@@ -351,3 +352,82 @@ void numa_topology__delete(struct numa_topology *tp)
free(tp);
}
+
+static int load_hybrid_node(struct hybrid_topology_node *node,
+ struct perf_pmu *pmu)
+{
+ const char *sysfs;
+ char path[PATH_MAX];
+ char *buf = NULL, *p;
+ FILE *fp;
+ size_t len = 0;
+
+ node->pmu_name = strdup(pmu->name);
+ if (!node->pmu_name)
+ return -1;
+
+ sysfs = sysfs__mountpoint();
+ if (!sysfs)
+ goto err;
+
+ snprintf(path, PATH_MAX, CPUS_TEMPLATE_CPU, sysfs, pmu->name);
+ fp = fopen(path, "r");
+ if (!fp)
+ goto err;
+
+ if (getline(&buf, &len, fp) <= 0) {
+ fclose(fp);
+ goto err;
+ }
+
+ p = strchr(buf, '\n');
+ if (p)
+ *p = '\0';
+
+ fclose(fp);
+ node->cpus = buf;
+ return 0;
+
+err:
+ zfree(&node->pmu_name);
+ free(buf);
+ return -1;
+}
+
+struct hybrid_topology *hybrid_topology__new(void)
+{
+ struct perf_pmu *pmu;
+ struct hybrid_topology *tp = NULL;
+ u32 nr, i = 0;
+
+ nr = perf_pmu__hybrid_pmu_num();
+ if (nr == 0)
+ return NULL;
+
+ tp = zalloc(sizeof(*tp) + sizeof(tp->nodes[0]) * nr);
+ if (!tp)
+ return NULL;
+
+ tp->nr = nr;
+ perf_pmu__for_each_hybrid_pmu(pmu) {
+ if (load_hybrid_node(&tp->nodes[i], pmu)) {
+ hybrid_topology__delete(tp);
+ return NULL;
+ }
+ i++;
+ }
+
+ return tp;
+}
+
+void hybrid_topology__delete(struct hybrid_topology *tp)
+{
+ u32 i;
+
+ for (i = 0; i < tp->nr; i++) {
+ zfree(&tp->nodes[i].pmu_name);
+ zfree(&tp->nodes[i].cpus);
+ }
+
+ free(tp);
+}
diff --git a/tools/perf/util/cputopo.h b/tools/perf/util/cputopo.h
index 6201c3790d86..d9af97177068 100644
--- a/tools/perf/util/cputopo.h
+++ b/tools/perf/util/cputopo.h
@@ -25,10 +25,23 @@ struct numa_topology {
struct numa_topology_node nodes[];
};
+struct hybrid_topology_node {
+ char *pmu_name;
+ char *cpus;
+};
+
+struct hybrid_topology {
+ u32 nr;
+ struct hybrid_topology_node nodes[];
+};
+
struct cpu_topology *cpu_topology__new(void);
void cpu_topology__delete(struct cpu_topology *tp);
struct numa_topology *numa_topology__new(void);
void numa_topology__delete(struct numa_topology *tp);
+struct hybrid_topology *hybrid_topology__new(void);
+void hybrid_topology__delete(struct hybrid_topology *tp);
+
#endif /* __PERF_CPUTOPO_H */
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
index 059bcec3f651..3e1a05bc82cc 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -6,6 +6,7 @@
* Author: Mathieu Poirier <mathieu.poirier@linaro.org>
*/
+#include <asm/bug.h>
#include <linux/coresight-pmu.h>
#include <linux/err.h>
#include <linux/list.h>
@@ -17,6 +18,7 @@
#include "cs-etm.h"
#include "cs-etm-decoder.h"
+#include "debug.h"
#include "intlist.h"
/* use raw logging */
@@ -276,13 +278,13 @@ cs_etm_decoder__do_soft_timestamp(struct cs_etm_queue *etmq,
const uint8_t trace_chan_id)
{
/* No timestamp packet has been received, nothing to do */
- if (!packet_queue->timestamp)
+ if (!packet_queue->cs_timestamp)
return OCSD_RESP_CONT;
- packet_queue->timestamp = packet_queue->next_timestamp;
+ packet_queue->cs_timestamp = packet_queue->next_cs_timestamp;
/* Estimate the timestamp for the next range packet */
- packet_queue->next_timestamp += packet_queue->instr_count;
+ packet_queue->next_cs_timestamp += packet_queue->instr_count;
packet_queue->instr_count = 0;
/* Tell the front end which traceid_queue needs attention */
@@ -294,7 +296,8 @@ cs_etm_decoder__do_soft_timestamp(struct cs_etm_queue *etmq,
static ocsd_datapath_resp_t
cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq,
const ocsd_generic_trace_elem *elem,
- const uint8_t trace_chan_id)
+ const uint8_t trace_chan_id,
+ const ocsd_trc_index_t indx)
{
struct cs_etm_packet_queue *packet_queue;
@@ -308,20 +311,39 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq,
* Function do_soft_timestamp() will report the value to the front end,
* hence asking the decoder to keep decoding rather than stopping.
*/
- if (packet_queue->timestamp) {
- packet_queue->next_timestamp = elem->timestamp;
+ if (packet_queue->cs_timestamp) {
+ packet_queue->next_cs_timestamp = elem->timestamp;
return OCSD_RESP_CONT;
}
- /*
- * This is the first timestamp we've seen since the beginning of traces
- * or a discontinuity. Since timestamps packets are generated *after*
- * range packets have been generated, we need to estimate the time at
- * which instructions started by subtracting the number of instructions
- * executed to the timestamp.
- */
- packet_queue->timestamp = elem->timestamp - packet_queue->instr_count;
- packet_queue->next_timestamp = elem->timestamp;
+
+ if (!elem->timestamp) {
+ /*
+ * Zero timestamps can be seen due to misconfiguration or hardware bugs.
+ * Warn once, and don't try to subtract instr_count as it would result in an
+ * underflow.
+ */
+ packet_queue->cs_timestamp = 0;
+ WARN_ONCE(true, "Zero Coresight timestamp found at Idx:%" OCSD_TRC_IDX_STR
+ ". Decoding may be improved with --itrace=Z...\n", indx);
+ } else if (packet_queue->instr_count > elem->timestamp) {
+ /*
+ * Sanity check that the elem->timestamp - packet_queue->instr_count would not
+ * result in an underflow. Warn and clamp at 0 if it would.
+ */
+ packet_queue->cs_timestamp = 0;
+ pr_err("Timestamp calculation underflow at Idx:%" OCSD_TRC_IDX_STR "\n", indx);
+ } else {
+ /*
+ * This is the first timestamp we've seen since the beginning of traces
+ * or a discontinuity. Since timestamps packets are generated *after*
+ * range packets have been generated, we need to estimate the time at
+ * which instructions started by subtracting the number of instructions
+ * executed to the timestamp.
+ */
+ packet_queue->cs_timestamp = elem->timestamp - packet_queue->instr_count;
+ }
+ packet_queue->next_cs_timestamp = elem->timestamp;
packet_queue->instr_count = 0;
/* Tell the front end which traceid_queue needs attention */
@@ -334,8 +356,8 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq,
static void
cs_etm_decoder__reset_timestamp(struct cs_etm_packet_queue *packet_queue)
{
- packet_queue->timestamp = 0;
- packet_queue->next_timestamp = 0;
+ packet_queue->cs_timestamp = 0;
+ packet_queue->next_cs_timestamp = 0;
packet_queue->instr_count = 0;
}
@@ -542,7 +564,7 @@ cs_etm_decoder__set_tid(struct cs_etm_queue *etmq,
static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
const void *context,
- const ocsd_trc_index_t indx __maybe_unused,
+ const ocsd_trc_index_t indx,
const u8 trace_chan_id __maybe_unused,
const ocsd_generic_trace_elem *elem)
{
@@ -579,7 +601,8 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
break;
case OCSD_GEN_TRC_ELEM_TIMESTAMP:
resp = cs_etm_decoder__do_hard_timestamp(etmq, elem,
- trace_chan_id);
+ trace_chan_id,
+ indx);
break;
case OCSD_GEN_TRC_ELEM_PE_CONTEXT:
resp = cs_etm_decoder__set_tid(etmq, packet_queue,
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 7e63e7dedc33..32ad92d3e454 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -38,8 +38,6 @@
#include <tools/libc_compat.h>
#include "util/synthetic-events.h"
-#define MAX_TIMESTAMP (~0ULL)
-
struct cs_etm_auxtrace {
struct auxtrace auxtrace;
struct auxtrace_queues queues;
@@ -56,6 +54,7 @@ struct cs_etm_auxtrace {
u8 sample_instructions;
int num_cpu;
+ u64 latest_kernel_timestamp;
u32 auxtrace_type;
u64 branches_sample_type;
u64 branches_id;
@@ -86,7 +85,7 @@ struct cs_etm_queue {
struct cs_etm_decoder *decoder;
struct auxtrace_buffer *buffer;
unsigned int queue_nr;
- u8 pending_timestamp;
+ u8 pending_timestamp_chan_id;
u64 offset;
const unsigned char *buf;
size_t buf_len, buf_used;
@@ -208,7 +207,7 @@ void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
* be more than one channel per cs_etm_queue, we need to specify
* what traceID queue needs servicing.
*/
- etmq->pending_timestamp = trace_chan_id;
+ etmq->pending_timestamp_chan_id = trace_chan_id;
}
static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq,
@@ -216,22 +215,22 @@ static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq,
{
struct cs_etm_packet_queue *packet_queue;
- if (!etmq->pending_timestamp)
+ if (!etmq->pending_timestamp_chan_id)
return 0;
if (trace_chan_id)
- *trace_chan_id = etmq->pending_timestamp;
+ *trace_chan_id = etmq->pending_timestamp_chan_id;
packet_queue = cs_etm__etmq_get_packet_queue(etmq,
- etmq->pending_timestamp);
+ etmq->pending_timestamp_chan_id);
if (!packet_queue)
return 0;
/* Acknowledge pending status */
- etmq->pending_timestamp = 0;
+ etmq->pending_timestamp_chan_id = 0;
/* See function cs_etm_decoder__do_{hard|soft}_timestamp() */
- return packet_queue->timestamp;
+ return packet_queue->cs_timestamp;
}
static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue)
@@ -814,7 +813,7 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
int ret = 0;
unsigned int cs_queue_nr;
u8 trace_chan_id;
- u64 timestamp;
+ u64 cs_timestamp;
struct cs_etm_queue *etmq = queue->priv;
if (list_empty(&queue->head) || etmq)
@@ -854,7 +853,7 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
/*
* Run decoder on the trace block. The decoder will stop when
- * encountering a timestamp, a full packet queue or the end of
+ * encountering a CS timestamp, a full packet queue or the end of
* trace for that block.
*/
ret = cs_etm__decode_data_block(etmq);
@@ -865,10 +864,10 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
* Function cs_etm_decoder__do_{hard|soft}_timestamp() does all
* the timestamp calculation for us.
*/
- timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
+ cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
/* We found a timestamp, no need to continue. */
- if (timestamp)
+ if (cs_timestamp)
break;
/*
@@ -892,7 +891,7 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
* queue and will be processed in cs_etm__process_queues().
*/
cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
- ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, timestamp);
+ ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp);
out:
return ret;
}
@@ -1194,6 +1193,8 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
event->sample.header.misc = cs_etm__cpu_mode(etmq, addr);
event->sample.header.size = sizeof(struct perf_event_header);
+ if (!etm->timeless_decoding)
+ sample.time = etm->latest_kernel_timestamp;
sample.ip = addr;
sample.pid = tidq->pid;
sample.tid = tidq->tid;
@@ -1250,6 +1251,8 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
event->sample.header.misc = cs_etm__cpu_mode(etmq, ip);
event->sample.header.size = sizeof(struct perf_event_header);
+ if (!etm->timeless_decoding)
+ sample.time = etm->latest_kernel_timestamp;
sample.ip = ip;
sample.pid = tidq->pid;
sample.tid = tidq->tid;
@@ -2221,7 +2224,7 @@ static int cs_etm__process_queues(struct cs_etm_auxtrace *etm)
int ret = 0;
unsigned int cs_queue_nr, queue_nr;
u8 trace_chan_id;
- u64 timestamp;
+ u64 cs_timestamp;
struct auxtrace_queue *queue;
struct cs_etm_queue *etmq;
struct cs_etm_traceid_queue *tidq;
@@ -2283,9 +2286,9 @@ refetch:
if (ret)
goto out;
- timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
+ cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
- if (!timestamp) {
+ if (!cs_timestamp) {
/*
* Function cs_etm__decode_data_block() returns when
* there is no more traces to decode in the current
@@ -2308,7 +2311,7 @@ refetch:
* this queue/traceID.
*/
cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
- ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, timestamp);
+ ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp);
}
out:
@@ -2380,7 +2383,7 @@ static int cs_etm__process_event(struct perf_session *session,
struct perf_tool *tool)
{
int err = 0;
- u64 timestamp;
+ u64 sample_kernel_timestamp;
struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
struct cs_etm_auxtrace,
auxtrace);
@@ -2394,16 +2397,21 @@ static int cs_etm__process_event(struct perf_session *session,
}
if (sample->time && (sample->time != (u64) -1))
- timestamp = sample->time;
+ sample_kernel_timestamp = sample->time;
else
- timestamp = 0;
+ sample_kernel_timestamp = 0;
- if (timestamp || etm->timeless_decoding) {
+ if (sample_kernel_timestamp || etm->timeless_decoding) {
err = cs_etm__update_queues(etm);
if (err)
return err;
}
+ /*
+ * Don't wait for cs_etm__flush_events() in per-thread/timeless mode to start the decode. We
+ * need the tid of the PERF_RECORD_EXIT event to assign to the synthesised samples because
+ * ETM_OPT_CTXTID is not enabled.
+ */
if (etm->timeless_decoding &&
event->header.type == PERF_RECORD_EXIT)
return cs_etm__process_timeless_queues(etm,
@@ -2414,9 +2422,14 @@ static int cs_etm__process_event(struct perf_session *session,
else if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)
return cs_etm__process_switch_cpu_wide(etm, event);
- if (!etm->timeless_decoding &&
- event->header.type == PERF_RECORD_AUX)
- return cs_etm__process_queues(etm);
+ if (!etm->timeless_decoding && event->header.type == PERF_RECORD_AUX) {
+ /*
+ * Record the latest kernel timestamp available in the header
+ * for samples so that synthesised samples occur from this point
+ * onwards.
+ */
+ etm->latest_kernel_timestamp = sample_kernel_timestamp;
+ }
return 0;
}
@@ -2464,6 +2477,10 @@ static bool cs_etm__is_timeless_decoding(struct cs_etm_auxtrace *etm)
struct evlist *evlist = etm->session->evlist;
bool timeless_decoding = true;
+ /* Override timeless mode with user input from --itrace=Z */
+ if (etm->synth_opts.timeless_decoding)
+ return true;
+
/*
* Circle through the list of event and complain if we find one
* with the time bit set.
@@ -2810,6 +2827,14 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
if (err)
goto err_free_etm;
+ if (session->itrace_synth_opts->set) {
+ etm->synth_opts = *session->itrace_synth_opts;
+ } else {
+ itrace_synth_opts__set_default(&etm->synth_opts,
+ session->itrace_synth_opts->default_no_sample);
+ etm->synth_opts.callchain = false;
+ }
+
etm->session = session;
etm->machine = &session->machines.host;
@@ -2854,14 +2879,6 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
return 0;
}
- if (session->itrace_synth_opts->set) {
- etm->synth_opts = *session->itrace_synth_opts;
- } else {
- itrace_synth_opts__set_default(&etm->synth_opts,
- session->itrace_synth_opts->default_no_sample);
- etm->synth_opts.callchain = false;
- }
-
err = cs_etm__synth_events(etm, session);
if (err)
goto err_delete_thread;
diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h
index 36428918411e..d65c7b19407d 100644
--- a/tools/perf/util/cs-etm.h
+++ b/tools/perf/util/cs-etm.h
@@ -171,8 +171,8 @@ struct cs_etm_packet_queue {
u32 head;
u32 tail;
u32 instr_count;
- u64 timestamp;
- u64 next_timestamp;
+ u64 cs_timestamp;
+ u64 next_cs_timestamp;
struct cs_etm_packet packet_buffer[CS_ETM_PACKET_MAX_BUFFER];
};
diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c
index 8fca4779ae6a..a9c102e8e3c0 100644
--- a/tools/perf/util/data.c
+++ b/tools/perf/util/data.c
@@ -240,11 +240,12 @@ static bool is_dir(struct perf_data *data)
static int open_file_read(struct perf_data *data)
{
+ int flags = data->in_place_update ? O_RDWR : O_RDONLY;
struct stat st;
int fd;
char sbuf[STRERR_BUFSIZE];
- fd = open(data->file.path, O_RDONLY);
+ fd = open(data->file.path, flags);
if (fd < 0) {
int err = errno;
diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h
index 62a3e66fbee8..c9de82af5584 100644
--- a/tools/perf/util/data.h
+++ b/tools/perf/util/data.h
@@ -31,6 +31,7 @@ struct perf_data {
bool is_dir;
bool force;
bool use_stdio;
+ bool in_place_update;
enum perf_data_mode mode;
struct {
diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c
index 5cd189172525..e0d4f08839fb 100644
--- a/tools/perf/util/db-export.c
+++ b/tools/perf/util/db-export.c
@@ -343,7 +343,7 @@ static int db_export__threads(struct db_export *dbe, struct thread *thread,
int db_export__sample(struct db_export *dbe, union perf_event *event,
struct perf_sample *sample, struct evsel *evsel,
- struct addr_location *al)
+ struct addr_location *al, struct addr_location *addr_al)
{
struct thread *thread = al->thread;
struct export_sample es = {
@@ -389,18 +389,14 @@ int db_export__sample(struct db_export *dbe, union perf_event *event,
}
}
- if ((evsel->core.attr.sample_type & PERF_SAMPLE_ADDR) &&
- sample_addr_correlates_sym(&evsel->core.attr)) {
- struct addr_location addr_al;
-
- thread__resolve(thread, &addr_al, sample);
- err = db_ids_from_al(dbe, &addr_al, &es.addr_dso_db_id,
+ if (addr_al) {
+ err = db_ids_from_al(dbe, addr_al, &es.addr_dso_db_id,
&es.addr_sym_db_id, &es.addr_offset);
if (err)
goto out_put;
if (dbe->crp) {
err = thread_stack__process(thread, comm, sample, al,
- &addr_al, es.db_id,
+ addr_al, es.db_id,
dbe->crp);
if (err)
goto out_put;
diff --git a/tools/perf/util/db-export.h b/tools/perf/util/db-export.h
index 9c3d38f5a40d..23983cb35706 100644
--- a/tools/perf/util/db-export.h
+++ b/tools/perf/util/db-export.h
@@ -97,7 +97,7 @@ int db_export__branch_type(struct db_export *dbe, u32 branch_type,
const char *name);
int db_export__sample(struct db_export *dbe, union perf_event *event,
struct perf_sample *sample, struct evsel *evsel,
- struct addr_location *al);
+ struct addr_location *al, struct addr_location *addr_al);
int db_export__branch_types(struct db_export *dbe);
diff --git a/tools/perf/util/dlfilter.c b/tools/perf/util/dlfilter.c
new file mode 100644
index 000000000000..ca33fbc5efde
--- /dev/null
+++ b/tools/perf/util/dlfilter.c
@@ -0,0 +1,615 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * dlfilter.c: Interface to perf script --dlfilter shared object
+ * Copyright (c) 2021, Intel Corporation.
+ */
+#include <dlfcn.h>
+#include <stdlib.h>
+#include <string.h>
+#include <dirent.h>
+#include <subcmd/exec-cmd.h>
+#include <linux/zalloc.h>
+#include <linux/build_bug.h>
+
+#include "debug.h"
+#include "event.h"
+#include "evsel.h"
+#include "dso.h"
+#include "map.h"
+#include "thread.h"
+#include "trace-event.h"
+#include "symbol.h"
+#include "srcline.h"
+#include "dlfilter.h"
+#include "perf_dlfilter.h"
+
+static void al_to_d_al(struct addr_location *al, struct perf_dlfilter_al *d_al)
+{
+ struct symbol *sym = al->sym;
+
+ d_al->size = sizeof(*d_al);
+ if (al->map) {
+ struct dso *dso = al->map->dso;
+
+ if (symbol_conf.show_kernel_path && dso->long_name)
+ d_al->dso = dso->long_name;
+ else
+ d_al->dso = dso->name;
+ d_al->is_64_bit = dso->is_64_bit;
+ d_al->buildid_size = dso->bid.size;
+ d_al->buildid = dso->bid.data;
+ } else {
+ d_al->dso = NULL;
+ d_al->is_64_bit = 0;
+ d_al->buildid_size = 0;
+ d_al->buildid = NULL;
+ }
+ if (sym) {
+ d_al->sym = sym->name;
+ d_al->sym_start = sym->start;
+ d_al->sym_end = sym->end;
+ if (al->addr < sym->end)
+ d_al->symoff = al->addr - sym->start;
+ else
+ d_al->symoff = al->addr - al->map->start - sym->start;
+ d_al->sym_binding = sym->binding;
+ } else {
+ d_al->sym = NULL;
+ d_al->sym_start = 0;
+ d_al->sym_end = 0;
+ d_al->symoff = 0;
+ d_al->sym_binding = 0;
+ }
+ d_al->addr = al->addr;
+ d_al->comm = NULL;
+ d_al->filtered = 0;
+}
+
+static struct addr_location *get_al(struct dlfilter *d)
+{
+ struct addr_location *al = d->al;
+
+ if (!al->thread && machine__resolve(d->machine, al, d->sample) < 0)
+ return NULL;
+ return al;
+}
+
+static struct thread *get_thread(struct dlfilter *d)
+{
+ struct addr_location *al = get_al(d);
+
+ return al ? al->thread : NULL;
+}
+
+static const struct perf_dlfilter_al *dlfilter__resolve_ip(void *ctx)
+{
+ struct dlfilter *d = (struct dlfilter *)ctx;
+ struct perf_dlfilter_al *d_al = d->d_ip_al;
+ struct addr_location *al;
+
+ if (!d->ctx_valid)
+ return NULL;
+
+ /* 'size' is also used to indicate already initialized */
+ if (d_al->size)
+ return d_al;
+
+ al = get_al(d);
+ if (!al)
+ return NULL;
+
+ al_to_d_al(al, d_al);
+
+ d_al->is_kernel_ip = machine__kernel_ip(d->machine, d->sample->ip);
+ d_al->comm = al->thread ? thread__comm_str(al->thread) : ":-1";
+ d_al->filtered = al->filtered;
+
+ return d_al;
+}
+
+static const struct perf_dlfilter_al *dlfilter__resolve_addr(void *ctx)
+{
+ struct dlfilter *d = (struct dlfilter *)ctx;
+ struct perf_dlfilter_al *d_addr_al = d->d_addr_al;
+ struct addr_location *addr_al = d->addr_al;
+
+ if (!d->ctx_valid || !d->d_sample->addr_correlates_sym)
+ return NULL;
+
+ /* 'size' is also used to indicate already initialized */
+ if (d_addr_al->size)
+ return d_addr_al;
+
+ if (!addr_al->thread) {
+ struct thread *thread = get_thread(d);
+
+ if (!thread)
+ return NULL;
+ thread__resolve(thread, addr_al, d->sample);
+ }
+
+ al_to_d_al(addr_al, d_addr_al);
+
+ d_addr_al->is_kernel_ip = machine__kernel_ip(d->machine, d->sample->addr);
+
+ return d_addr_al;
+}
+
+static char **dlfilter__args(void *ctx, int *dlargc)
+{
+ struct dlfilter *d = (struct dlfilter *)ctx;
+
+ if (dlargc)
+ *dlargc = 0;
+ else
+ return NULL;
+
+ if (!d->ctx_valid && !d->in_start && !d->in_stop)
+ return NULL;
+
+ *dlargc = d->dlargc;
+ return d->dlargv;
+}
+
+static __s32 dlfilter__resolve_address(void *ctx, __u64 address, struct perf_dlfilter_al *d_al_p)
+{
+ struct dlfilter *d = (struct dlfilter *)ctx;
+ struct perf_dlfilter_al d_al;
+ struct addr_location al;
+ struct thread *thread;
+ __u32 sz;
+
+ if (!d->ctx_valid || !d_al_p)
+ return -1;
+
+ thread = get_thread(d);
+ if (!thread)
+ return -1;
+
+ thread__find_symbol_fb(thread, d->sample->cpumode, address, &al);
+
+ al_to_d_al(&al, &d_al);
+
+ d_al.is_kernel_ip = machine__kernel_ip(d->machine, address);
+
+ sz = d_al_p->size;
+ memcpy(d_al_p, &d_al, min((size_t)sz, sizeof(d_al)));
+ d_al_p->size = sz;
+
+ return 0;
+}
+
+static const __u8 *dlfilter__insn(void *ctx, __u32 *len)
+{
+ struct dlfilter *d = (struct dlfilter *)ctx;
+
+ if (!len)
+ return NULL;
+
+ *len = 0;
+
+ if (!d->ctx_valid)
+ return NULL;
+
+ if (d->sample->ip && !d->sample->insn_len) {
+ struct addr_location *al = d->al;
+
+ if (!al->thread && machine__resolve(d->machine, al, d->sample) < 0)
+ return NULL;
+
+ if (al->thread->maps && al->thread->maps->machine)
+ script_fetch_insn(d->sample, al->thread, al->thread->maps->machine);
+ }
+
+ if (!d->sample->insn_len)
+ return NULL;
+
+ *len = d->sample->insn_len;
+
+ return (__u8 *)d->sample->insn;
+}
+
+static const char *dlfilter__srcline(void *ctx, __u32 *line_no)
+{
+ struct dlfilter *d = (struct dlfilter *)ctx;
+ struct addr_location *al;
+ unsigned int line = 0;
+ char *srcfile = NULL;
+ struct map *map;
+ u64 addr;
+
+ if (!d->ctx_valid || !line_no)
+ return NULL;
+
+ al = get_al(d);
+ if (!al)
+ return NULL;
+
+ map = al->map;
+ addr = al->addr;
+
+ if (map && map->dso)
+ srcfile = get_srcline_split(map->dso, map__rip_2objdump(map, addr), &line);
+
+ *line_no = line;
+ return srcfile;
+}
+
+static struct perf_event_attr *dlfilter__attr(void *ctx)
+{
+ struct dlfilter *d = (struct dlfilter *)ctx;
+
+ if (!d->ctx_valid)
+ return NULL;
+
+ return &d->evsel->core.attr;
+}
+
+static __s32 dlfilter__object_code(void *ctx, __u64 ip, void *buf, __u32 len)
+{
+ struct dlfilter *d = (struct dlfilter *)ctx;
+ struct addr_location *al;
+ struct addr_location a;
+ struct map *map;
+ u64 offset;
+
+ if (!d->ctx_valid)
+ return -1;
+
+ al = get_al(d);
+ if (!al)
+ return -1;
+
+ map = al->map;
+
+ if (map && ip >= map->start && ip < map->end &&
+ machine__kernel_ip(d->machine, ip) == machine__kernel_ip(d->machine, d->sample->ip))
+ goto have_map;
+
+ thread__find_map_fb(al->thread, d->sample->cpumode, ip, &a);
+ if (!a.map)
+ return -1;
+
+ map = a.map;
+have_map:
+ offset = map->map_ip(map, ip);
+ if (ip + len >= map->end)
+ len = map->end - ip;
+ return dso__data_read_offset(map->dso, d->machine, offset, buf, len);
+}
+
+static const struct perf_dlfilter_fns perf_dlfilter_fns = {
+ .resolve_ip = dlfilter__resolve_ip,
+ .resolve_addr = dlfilter__resolve_addr,
+ .args = dlfilter__args,
+ .resolve_address = dlfilter__resolve_address,
+ .insn = dlfilter__insn,
+ .srcline = dlfilter__srcline,
+ .attr = dlfilter__attr,
+ .object_code = dlfilter__object_code,
+};
+
+static char *find_dlfilter(const char *file)
+{
+ char path[PATH_MAX];
+ char *exec_path;
+
+ if (strchr(file, '/'))
+ goto out;
+
+ if (!access(file, R_OK)) {
+ /*
+ * Prepend "./" so that dlopen will find the file in the
+ * current directory.
+ */
+ snprintf(path, sizeof(path), "./%s", file);
+ file = path;
+ goto out;
+ }
+
+ exec_path = get_argv_exec_path();
+ if (!exec_path)
+ goto out;
+ snprintf(path, sizeof(path), "%s/dlfilters/%s", exec_path, file);
+ free(exec_path);
+ if (!access(path, R_OK))
+ file = path;
+out:
+ return strdup(file);
+}
+
+#define CHECK_FLAG(x) BUILD_BUG_ON((u64)PERF_DLFILTER_FLAG_ ## x != (u64)PERF_IP_FLAG_ ## x)
+
+static int dlfilter__init(struct dlfilter *d, const char *file, int dlargc, char **dlargv)
+{
+ CHECK_FLAG(BRANCH);
+ CHECK_FLAG(CALL);
+ CHECK_FLAG(RETURN);
+ CHECK_FLAG(CONDITIONAL);
+ CHECK_FLAG(SYSCALLRET);
+ CHECK_FLAG(ASYNC);
+ CHECK_FLAG(INTERRUPT);
+ CHECK_FLAG(TX_ABORT);
+ CHECK_FLAG(TRACE_BEGIN);
+ CHECK_FLAG(TRACE_END);
+ CHECK_FLAG(IN_TX);
+ CHECK_FLAG(VMENTRY);
+ CHECK_FLAG(VMEXIT);
+
+ memset(d, 0, sizeof(*d));
+ d->file = find_dlfilter(file);
+ if (!d->file)
+ return -1;
+ d->dlargc = dlargc;
+ d->dlargv = dlargv;
+ return 0;
+}
+
+static void dlfilter__exit(struct dlfilter *d)
+{
+ zfree(&d->file);
+}
+
+static int dlfilter__open(struct dlfilter *d)
+{
+ d->handle = dlopen(d->file, RTLD_NOW);
+ if (!d->handle) {
+ pr_err("dlopen failed for: '%s'\n", d->file);
+ return -1;
+ }
+ d->start = dlsym(d->handle, "start");
+ d->filter_event = dlsym(d->handle, "filter_event");
+ d->filter_event_early = dlsym(d->handle, "filter_event_early");
+ d->stop = dlsym(d->handle, "stop");
+ d->fns = dlsym(d->handle, "perf_dlfilter_fns");
+ if (d->fns)
+ memcpy(d->fns, &perf_dlfilter_fns, sizeof(struct perf_dlfilter_fns));
+ return 0;
+}
+
+static int dlfilter__close(struct dlfilter *d)
+{
+ return dlclose(d->handle);
+}
+
+struct dlfilter *dlfilter__new(const char *file, int dlargc, char **dlargv)
+{
+ struct dlfilter *d = malloc(sizeof(*d));
+
+ if (!d)
+ return NULL;
+
+ if (dlfilter__init(d, file, dlargc, dlargv))
+ goto err_free;
+
+ if (dlfilter__open(d))
+ goto err_exit;
+
+ return d;
+
+err_exit:
+ dlfilter__exit(d);
+err_free:
+ free(d);
+ return NULL;
+}
+
+static void dlfilter__free(struct dlfilter *d)
+{
+ if (d) {
+ dlfilter__exit(d);
+ free(d);
+ }
+}
+
+int dlfilter__start(struct dlfilter *d, struct perf_session *session)
+{
+ if (d) {
+ d->session = session;
+ if (d->start) {
+ int ret;
+
+ d->in_start = true;
+ ret = d->start(&d->data, d);
+ d->in_start = false;
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static int dlfilter__stop(struct dlfilter *d)
+{
+ if (d && d->stop) {
+ int ret;
+
+ d->in_stop = true;
+ ret = d->stop(d->data, d);
+ d->in_stop = false;
+ return ret;
+ }
+ return 0;
+}
+
+void dlfilter__cleanup(struct dlfilter *d)
+{
+ if (d) {
+ dlfilter__stop(d);
+ dlfilter__close(d);
+ dlfilter__free(d);
+ }
+}
+
+#define ASSIGN(x) d_sample.x = sample->x
+
+int dlfilter__do_filter_event(struct dlfilter *d,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct evsel *evsel,
+ struct machine *machine,
+ struct addr_location *al,
+ struct addr_location *addr_al,
+ bool early)
+{
+ struct perf_dlfilter_sample d_sample;
+ struct perf_dlfilter_al d_ip_al;
+ struct perf_dlfilter_al d_addr_al;
+ int ret;
+
+ d->event = event;
+ d->sample = sample;
+ d->evsel = evsel;
+ d->machine = machine;
+ d->al = al;
+ d->addr_al = addr_al;
+ d->d_sample = &d_sample;
+ d->d_ip_al = &d_ip_al;
+ d->d_addr_al = &d_addr_al;
+
+ d_sample.size = sizeof(d_sample);
+ d_ip_al.size = 0; /* To indicate d_ip_al is not initialized */
+ d_addr_al.size = 0; /* To indicate d_addr_al is not initialized */
+
+ ASSIGN(ip);
+ ASSIGN(pid);
+ ASSIGN(tid);
+ ASSIGN(time);
+ ASSIGN(addr);
+ ASSIGN(id);
+ ASSIGN(stream_id);
+ ASSIGN(period);
+ ASSIGN(weight);
+ ASSIGN(ins_lat);
+ ASSIGN(p_stage_cyc);
+ ASSIGN(transaction);
+ ASSIGN(insn_cnt);
+ ASSIGN(cyc_cnt);
+ ASSIGN(cpu);
+ ASSIGN(flags);
+ ASSIGN(data_src);
+ ASSIGN(phys_addr);
+ ASSIGN(data_page_size);
+ ASSIGN(code_page_size);
+ ASSIGN(cgroup);
+ ASSIGN(cpumode);
+ ASSIGN(misc);
+ ASSIGN(raw_size);
+ ASSIGN(raw_data);
+
+ if (sample->branch_stack) {
+ d_sample.brstack_nr = sample->branch_stack->nr;
+ d_sample.brstack = (struct perf_branch_entry *)perf_sample__branch_entries(sample);
+ } else {
+ d_sample.brstack_nr = 0;
+ d_sample.brstack = NULL;
+ }
+
+ if (sample->callchain) {
+ d_sample.raw_callchain_nr = sample->callchain->nr;
+ d_sample.raw_callchain = (__u64 *)sample->callchain->ips;
+ } else {
+ d_sample.raw_callchain_nr = 0;
+ d_sample.raw_callchain = NULL;
+ }
+
+ d_sample.addr_correlates_sym =
+ (evsel->core.attr.sample_type & PERF_SAMPLE_ADDR) &&
+ sample_addr_correlates_sym(&evsel->core.attr);
+
+ d_sample.event = evsel__name(evsel);
+
+ d->ctx_valid = true;
+
+ if (early)
+ ret = d->filter_event_early(d->data, &d_sample, d);
+ else
+ ret = d->filter_event(d->data, &d_sample, d);
+
+ d->ctx_valid = false;
+
+ return ret;
+}
+
+static bool get_filter_desc(const char *dirname, const char *name,
+ char **desc, char **long_desc)
+{
+ char path[PATH_MAX];
+ void *handle;
+ const char *(*desc_fn)(const char **long_description);
+
+ snprintf(path, sizeof(path), "%s/%s", dirname, name);
+ handle = dlopen(path, RTLD_NOW);
+ if (!handle || !(dlsym(handle, "filter_event") || dlsym(handle, "filter_event_early")))
+ return false;
+ desc_fn = dlsym(handle, "filter_description");
+ if (desc_fn) {
+ const char *dsc;
+ const char *long_dsc;
+
+ dsc = desc_fn(&long_dsc);
+ if (dsc)
+ *desc = strdup(dsc);
+ if (long_dsc)
+ *long_desc = strdup(long_dsc);
+ }
+ dlclose(handle);
+ return true;
+}
+
+static void list_filters(const char *dirname)
+{
+ struct dirent *entry;
+ DIR *dir;
+
+ dir = opendir(dirname);
+ if (!dir)
+ return;
+
+ while ((entry = readdir(dir)) != NULL)
+ {
+ size_t n = strlen(entry->d_name);
+ char *long_desc = NULL;
+ char *desc = NULL;
+
+ if (entry->d_type == DT_DIR || n < 4 ||
+ strcmp(".so", entry->d_name + n - 3))
+ continue;
+ if (!get_filter_desc(dirname, entry->d_name, &desc, &long_desc))
+ continue;
+ printf(" %-36s %s\n", entry->d_name, desc ? desc : "");
+ if (verbose) {
+ char *p = long_desc;
+ char *line;
+
+ while ((line = strsep(&p, "\n")) != NULL)
+ printf("%39s%s\n", "", line);
+ }
+ free(long_desc);
+ free(desc);
+ }
+
+ closedir(dir);
+}
+
+int list_available_dlfilters(const struct option *opt __maybe_unused,
+ const char *s __maybe_unused,
+ int unset __maybe_unused)
+{
+ char path[PATH_MAX];
+ char *exec_path;
+
+ printf("List of available dlfilters:\n");
+
+ list_filters(".");
+
+ exec_path = get_argv_exec_path();
+ if (!exec_path)
+ goto out;
+ snprintf(path, sizeof(path), "%s/dlfilters", exec_path);
+
+ list_filters(path);
+
+ free(exec_path);
+out:
+ exit(0);
+}
diff --git a/tools/perf/util/dlfilter.h b/tools/perf/util/dlfilter.h
new file mode 100644
index 000000000000..505980442360
--- /dev/null
+++ b/tools/perf/util/dlfilter.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * dlfilter.h: Interface to perf script --dlfilter shared object
+ * Copyright (c) 2021, Intel Corporation.
+ */
+
+#ifndef PERF_UTIL_DLFILTER_H
+#define PERF_UTIL_DLFILTER_H
+
+struct perf_session;
+union perf_event;
+struct perf_sample;
+struct evsel;
+struct machine;
+struct addr_location;
+struct perf_dlfilter_fns;
+struct perf_dlfilter_sample;
+struct perf_dlfilter_al;
+
+struct dlfilter {
+ char *file;
+ void *handle;
+ void *data;
+ struct perf_session *session;
+ bool ctx_valid;
+ bool in_start;
+ bool in_stop;
+ int dlargc;
+ char **dlargv;
+
+ union perf_event *event;
+ struct perf_sample *sample;
+ struct evsel *evsel;
+ struct machine *machine;
+ struct addr_location *al;
+ struct addr_location *addr_al;
+ struct perf_dlfilter_sample *d_sample;
+ struct perf_dlfilter_al *d_ip_al;
+ struct perf_dlfilter_al *d_addr_al;
+
+ int (*start)(void **data, void *ctx);
+ int (*stop)(void *data, void *ctx);
+
+ int (*filter_event)(void *data,
+ const struct perf_dlfilter_sample *sample,
+ void *ctx);
+ int (*filter_event_early)(void *data,
+ const struct perf_dlfilter_sample *sample,
+ void *ctx);
+
+ struct perf_dlfilter_fns *fns;
+};
+
+struct dlfilter *dlfilter__new(const char *file, int dlargc, char **dlargv);
+
+int dlfilter__start(struct dlfilter *d, struct perf_session *session);
+
+int dlfilter__do_filter_event(struct dlfilter *d,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct evsel *evsel,
+ struct machine *machine,
+ struct addr_location *al,
+ struct addr_location *addr_al,
+ bool early);
+
+void dlfilter__cleanup(struct dlfilter *d);
+
+static inline int dlfilter__filter_event(struct dlfilter *d,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct evsel *evsel,
+ struct machine *machine,
+ struct addr_location *al,
+ struct addr_location *addr_al)
+{
+ if (!d || !d->filter_event)
+ return 0;
+ return dlfilter__do_filter_event(d, event, sample, evsel, machine, al, addr_al, false);
+}
+
+static inline int dlfilter__filter_event_early(struct dlfilter *d,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct evsel *evsel,
+ struct machine *machine,
+ struct addr_location *al,
+ struct addr_location *addr_al)
+{
+ if (!d || !d->filter_event_early)
+ return 0;
+ return dlfilter__do_filter_event(d, event, sample, evsel, machine, al, addr_al, true);
+}
+
+int list_available_dlfilters(const struct option *opt, const char *s, int unset);
+
+#endif
diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
index b2f4920e19a6..7d2ba8419b0c 100644
--- a/tools/perf/util/dwarf-aux.c
+++ b/tools/perf/util/dwarf-aux.c
@@ -975,9 +975,13 @@ static int __die_find_variable_cb(Dwarf_Die *die_mem, void *data)
if ((tag == DW_TAG_formal_parameter ||
tag == DW_TAG_variable) &&
die_compare_name(die_mem, fvp->name) &&
- /* Does the DIE have location information or external instance? */
+ /*
+ * Does the DIE have location information or const value
+ * or external instance?
+ */
(dwarf_attr(die_mem, DW_AT_external, &attr) ||
- dwarf_attr(die_mem, DW_AT_location, &attr)))
+ dwarf_attr(die_mem, DW_AT_location, &attr) ||
+ dwarf_attr(die_mem, DW_AT_const_value, &attr)))
return DIE_FIND_CB_END;
if (dwarf_haspc(die_mem, fvp->addr))
return DIE_FIND_CB_CONTINUE;
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index 9130f6fad8d5..ebc5e9ad35db 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -144,6 +144,7 @@ static void perf_env__purge_bpf(struct perf_env *env)
node = rb_entry(next, struct bpf_prog_info_node, rb_node);
next = rb_next(&node->rb_node);
rb_erase(&node->rb_node, root);
+ free(node->info_linear);
free(node);
}
@@ -202,6 +203,18 @@ void perf_env__exit(struct perf_env *env)
for (i = 0; i < env->nr_memory_nodes; i++)
zfree(&env->memory_nodes[i].set);
zfree(&env->memory_nodes);
+
+ for (i = 0; i < env->nr_hybrid_nodes; i++) {
+ zfree(&env->hybrid_nodes[i].pmu_name);
+ zfree(&env->hybrid_nodes[i].cpus);
+ }
+ zfree(&env->hybrid_nodes);
+
+ for (i = 0; i < env->nr_hybrid_cpc_nodes; i++) {
+ zfree(&env->hybrid_cpc_nodes[i].cpu_pmu_caps);
+ zfree(&env->hybrid_cpc_nodes[i].pmu_name);
+ }
+ zfree(&env->hybrid_cpc_nodes);
}
void perf_env__init(struct perf_env *env __maybe_unused)
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index ca249bf5e984..6824a7423a2d 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -37,6 +37,18 @@ struct memory_node {
unsigned long *set;
};
+struct hybrid_node {
+ char *pmu_name;
+ char *cpus;
+};
+
+struct hybrid_cpc_node {
+ int nr_cpu_pmu_caps;
+ unsigned int max_branches;
+ char *cpu_pmu_caps;
+ char *pmu_name;
+};
+
struct perf_env {
char *hostname;
char *os_release;
@@ -59,6 +71,8 @@ struct perf_env {
int nr_pmu_mappings;
int nr_groups;
int nr_cpu_pmu_caps;
+ int nr_hybrid_nodes;
+ int nr_hybrid_cpc_nodes;
char *cmdline;
const char **cmdline_argv;
char *sibling_cores;
@@ -77,6 +91,8 @@ struct perf_env {
struct numa_node *numa_nodes;
struct memory_node *memory_nodes;
unsigned long long memory_bsize;
+ struct hybrid_node *hybrid_nodes;
+ struct hybrid_cpc_node *hybrid_cpc_nodes;
#ifdef HAVE_LIBBPF_SUPPORT
/*
* bpf_info_lock protects bpf rbtrees. This is needed because the
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 8a62fb39e365..19ad64f2bd83 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -100,7 +100,7 @@ enum {
PERF_IP_FLAG_VMEXIT = 1ULL << 12,
};
-#define PERF_IP_FLAG_CHARS "bcrosyiABEx"
+#define PERF_IP_FLAG_CHARS "bcrosyiABExgh"
#define PERF_BRANCH_MASK (\
PERF_IP_FLAG_BRANCH |\
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 6e5c41528c7d..6ba9664089bd 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -425,9 +425,6 @@ static void __evlist__disable(struct evlist *evlist, char *evsel_name)
if (affinity__setup(&affinity) < 0)
return;
- evlist__for_each_entry(evlist, pos)
- bpf_counter__disable(pos);
-
/* Disable 'immediate' events last */
for (imm = 0; imm <= 1; imm++) {
evlist__for_each_cpu(evlist, i, cpu) {
@@ -2164,3 +2161,28 @@ int evlist__scnprintf_evsels(struct evlist *evlist, size_t size, char *bf)
return printed;
}
+
+void evlist__check_mem_load_aux(struct evlist *evlist)
+{
+ struct evsel *leader, *evsel, *pos;
+
+ /*
+ * For some platforms, the 'mem-loads' event is required to use
+ * together with 'mem-loads-aux' within a group and 'mem-loads-aux'
+ * must be the group leader. Now we disable this group before reporting
+ * because 'mem-loads-aux' is just an auxiliary event. It doesn't carry
+ * any valid memory load information.
+ */
+ evlist__for_each_entry(evlist, evsel) {
+ leader = evsel->leader;
+ if (leader == evsel)
+ continue;
+
+ if (leader->name && strstr(leader->name, "mem-loads-aux")) {
+ for_each_group_evsel(pos, leader) {
+ pos->leader = pos;
+ pos->core.nr_members = 0;
+ }
+ }
+ }
+}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index a8b97b50cceb..2073cfa79f79 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -367,4 +367,5 @@ int evlist__ctlfd_ack(struct evlist *evlist);
struct evsel *evlist__find_evsel(struct evlist *evlist, int idx);
int evlist__scnprintf_evsels(struct evlist *evlist, size_t size, char *bf);
+void evlist__check_mem_load_aux(struct evlist *evlist);
#endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 4a3cd1b5bb33..b1c930eca40f 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -428,6 +428,7 @@ struct evsel *evsel__clone(struct evsel *orig)
evsel->auto_merge_stats = orig->auto_merge_stats;
evsel->collect_stat = orig->collect_stat;
evsel->weak_group = orig->weak_group;
+ evsel->use_config_name = orig->use_config_name;
if (evsel__copy_config_terms(evsel, orig) < 0)
goto out_err;
@@ -1581,6 +1582,27 @@ int __evsel__read_on_cpu(struct evsel *evsel, int cpu, int thread, bool scale)
return 0;
}
+static int evsel__match_other_cpu(struct evsel *evsel, struct evsel *other,
+ int cpu)
+{
+ int cpuid;
+
+ cpuid = perf_cpu_map__cpu(evsel->core.cpus, cpu);
+ return perf_cpu_map__idx(other->core.cpus, cpuid);
+}
+
+static int evsel__hybrid_group_cpu(struct evsel *evsel, int cpu)
+{
+ struct evsel *leader = evsel->leader;
+
+ if ((evsel__is_hybrid(evsel) && !evsel__is_hybrid(leader)) ||
+ (!evsel__is_hybrid(evsel) && evsel__is_hybrid(leader))) {
+ return evsel__match_other_cpu(evsel, leader, cpu);
+ }
+
+ return cpu;
+}
+
static int get_group_fd(struct evsel *evsel, int cpu, int thread)
{
struct evsel *leader = evsel->leader;
@@ -1595,6 +1617,10 @@ static int get_group_fd(struct evsel *evsel, int cpu, int thread)
*/
BUG_ON(!leader->core.fd);
+ cpu = evsel__hybrid_group_cpu(evsel, cpu);
+ if (cpu == -1)
+ return -1;
+
fd = FD(leader, cpu, thread);
BUG_ON(fd == -1);
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 75cf5dbfe208..bdad52a06438 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -83,8 +83,10 @@ struct evsel {
bool collect_stat;
bool weak_group;
bool bpf_counter;
+ bool use_config_name;
int bpf_fd;
struct bpf_object *bpf_obj;
+ struct list_head config_terms;
};
/*
@@ -116,10 +118,8 @@ struct evsel {
bool merged_stat;
bool reset_group;
bool errored;
- bool use_config_name;
struct hashmap *per_pkg_mask;
struct evsel *leader;
- struct list_head config_terms;
int err;
int cpu_iter;
struct {
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index aa1e42518d37..0158d2945bab 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -49,6 +49,7 @@
#include "cputopo.h"
#include "bpf-event.h"
#include "clockid.h"
+#include "pmu-hybrid.h"
#include <linux/ctype.h>
#include <internal/lib.h>
@@ -932,6 +933,40 @@ static int write_clock_data(struct feat_fd *ff,
return do_write(ff, data64, sizeof(*data64));
}
+static int write_hybrid_topology(struct feat_fd *ff,
+ struct evlist *evlist __maybe_unused)
+{
+ struct hybrid_topology *tp;
+ int ret;
+ u32 i;
+
+ tp = hybrid_topology__new();
+ if (!tp)
+ return -ENOENT;
+
+ ret = do_write(ff, &tp->nr, sizeof(u32));
+ if (ret < 0)
+ goto err;
+
+ for (i = 0; i < tp->nr; i++) {
+ struct hybrid_topology_node *n = &tp->nodes[i];
+
+ ret = do_write_string(ff, n->pmu_name);
+ if (ret < 0)
+ goto err;
+
+ ret = do_write_string(ff, n->cpus);
+ if (ret < 0)
+ goto err;
+ }
+
+ ret = 0;
+
+err:
+ hybrid_topology__delete(tp);
+ return ret;
+}
+
static int write_dir_format(struct feat_fd *ff,
struct evlist *evlist __maybe_unused)
{
@@ -1425,18 +1460,14 @@ static int write_compressed(struct feat_fd *ff __maybe_unused,
return do_write(ff, &(ff->ph->env.comp_mmap_len), sizeof(ff->ph->env.comp_mmap_len));
}
-static int write_cpu_pmu_caps(struct feat_fd *ff,
- struct evlist *evlist __maybe_unused)
+static int write_per_cpu_pmu_caps(struct feat_fd *ff, struct perf_pmu *pmu,
+ bool write_pmu)
{
- struct perf_pmu *cpu_pmu = perf_pmu__find("cpu");
struct perf_pmu_caps *caps = NULL;
int nr_caps;
int ret;
- if (!cpu_pmu)
- return -ENOENT;
-
- nr_caps = perf_pmu__caps_parse(cpu_pmu);
+ nr_caps = perf_pmu__caps_parse(pmu);
if (nr_caps < 0)
return nr_caps;
@@ -1444,7 +1475,7 @@ static int write_cpu_pmu_caps(struct feat_fd *ff,
if (ret < 0)
return ret;
- list_for_each_entry(caps, &cpu_pmu->caps, list) {
+ list_for_each_entry(caps, &pmu->caps, list) {
ret = do_write_string(ff, caps->name);
if (ret < 0)
return ret;
@@ -1454,9 +1485,49 @@ static int write_cpu_pmu_caps(struct feat_fd *ff,
return ret;
}
+ if (write_pmu) {
+ ret = do_write_string(ff, pmu->name);
+ if (ret < 0)
+ return ret;
+ }
+
return ret;
}
+static int write_cpu_pmu_caps(struct feat_fd *ff,
+ struct evlist *evlist __maybe_unused)
+{
+ struct perf_pmu *cpu_pmu = perf_pmu__find("cpu");
+
+ if (!cpu_pmu)
+ return -ENOENT;
+
+ return write_per_cpu_pmu_caps(ff, cpu_pmu, false);
+}
+
+static int write_hybrid_cpu_pmu_caps(struct feat_fd *ff,
+ struct evlist *evlist __maybe_unused)
+{
+ struct perf_pmu *pmu;
+ u32 nr_pmu = perf_pmu__hybrid_pmu_num();
+ int ret;
+
+ if (nr_pmu == 0)
+ return -ENOENT;
+
+ ret = do_write(ff, &nr_pmu, sizeof(nr_pmu));
+ if (ret < 0)
+ return ret;
+
+ perf_pmu__for_each_hybrid_pmu(pmu) {
+ ret = write_per_cpu_pmu_caps(ff, pmu, true);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
static void print_hostname(struct feat_fd *ff, FILE *fp)
{
fprintf(fp, "# hostname : %s\n", ff->ph->env.hostname);
@@ -1623,6 +1694,18 @@ static void print_clock_data(struct feat_fd *ff, FILE *fp)
clockid_name(clockid));
}
+static void print_hybrid_topology(struct feat_fd *ff, FILE *fp)
+{
+ int i;
+ struct hybrid_node *n;
+
+ fprintf(fp, "# hybrid cpu system:\n");
+ for (i = 0; i < ff->ph->env.nr_hybrid_nodes; i++) {
+ n = &ff->ph->env.hybrid_nodes[i];
+ fprintf(fp, "# %s cpu list : %s\n", n->pmu_name, n->cpus);
+ }
+}
+
static void print_dir_format(struct feat_fd *ff, FILE *fp)
{
struct perf_session *session;
@@ -1916,18 +1999,28 @@ static void print_compressed(struct feat_fd *ff, FILE *fp)
ff->ph->env.comp_level, ff->ph->env.comp_ratio);
}
-static void print_cpu_pmu_caps(struct feat_fd *ff, FILE *fp)
+static void print_per_cpu_pmu_caps(FILE *fp, int nr_caps, char *cpu_pmu_caps,
+ char *pmu_name)
{
- const char *delimiter = "# cpu pmu capabilities: ";
- u32 nr_caps = ff->ph->env.nr_cpu_pmu_caps;
- char *str;
+ const char *delimiter;
+ char *str, buf[128];
if (!nr_caps) {
- fprintf(fp, "# cpu pmu capabilities: not available\n");
+ if (!pmu_name)
+ fprintf(fp, "# cpu pmu capabilities: not available\n");
+ else
+ fprintf(fp, "# %s pmu capabilities: not available\n", pmu_name);
return;
}
- str = ff->ph->env.cpu_pmu_caps;
+ if (!pmu_name)
+ scnprintf(buf, sizeof(buf), "# cpu pmu capabilities: ");
+ else
+ scnprintf(buf, sizeof(buf), "# %s pmu capabilities: ", pmu_name);
+
+ delimiter = buf;
+
+ str = cpu_pmu_caps;
while (nr_caps--) {
fprintf(fp, "%s%s", delimiter, str);
delimiter = ", ";
@@ -1937,6 +2030,24 @@ static void print_cpu_pmu_caps(struct feat_fd *ff, FILE *fp)
fprintf(fp, "\n");
}
+static void print_cpu_pmu_caps(struct feat_fd *ff, FILE *fp)
+{
+ print_per_cpu_pmu_caps(fp, ff->ph->env.nr_cpu_pmu_caps,
+ ff->ph->env.cpu_pmu_caps, NULL);
+}
+
+static void print_hybrid_cpu_pmu_caps(struct feat_fd *ff, FILE *fp)
+{
+ struct hybrid_cpc_node *n;
+
+ for (int i = 0; i < ff->ph->env.nr_hybrid_cpc_nodes; i++) {
+ n = &ff->ph->env.hybrid_cpc_nodes[i];
+ print_per_cpu_pmu_caps(fp, n->nr_cpu_pmu_caps,
+ n->cpu_pmu_caps,
+ n->pmu_name);
+ }
+}
+
static void print_pmu_mappings(struct feat_fd *ff, FILE *fp)
{
const char *delimiter = "# pmu mappings: ";
@@ -2849,6 +2960,46 @@ static int process_clock_data(struct feat_fd *ff,
return 0;
}
+static int process_hybrid_topology(struct feat_fd *ff,
+ void *data __maybe_unused)
+{
+ struct hybrid_node *nodes, *n;
+ u32 nr, i;
+
+ /* nr nodes */
+ if (do_read_u32(ff, &nr))
+ return -1;
+
+ nodes = zalloc(sizeof(*nodes) * nr);
+ if (!nodes)
+ return -ENOMEM;
+
+ for (i = 0; i < nr; i++) {
+ n = &nodes[i];
+
+ n->pmu_name = do_read_string(ff);
+ if (!n->pmu_name)
+ goto error;
+
+ n->cpus = do_read_string(ff);
+ if (!n->cpus)
+ goto error;
+ }
+
+ ff->ph->env.nr_hybrid_nodes = nr;
+ ff->ph->env.hybrid_nodes = nodes;
+ return 0;
+
+error:
+ for (i = 0; i < nr; i++) {
+ free(nodes[i].pmu_name);
+ free(nodes[i].cpus);
+ }
+
+ free(nodes);
+ return -1;
+}
+
static int process_dir_format(struct feat_fd *ff,
void *_data __maybe_unused)
{
@@ -3002,8 +3153,9 @@ static int process_compressed(struct feat_fd *ff,
return 0;
}
-static int process_cpu_pmu_caps(struct feat_fd *ff,
- void *data __maybe_unused)
+static int process_per_cpu_pmu_caps(struct feat_fd *ff, int *nr_cpu_pmu_caps,
+ char **cpu_pmu_caps,
+ unsigned int *max_branches)
{
char *name, *value;
struct strbuf sb;
@@ -3017,7 +3169,7 @@ static int process_cpu_pmu_caps(struct feat_fd *ff,
return 0;
}
- ff->ph->env.nr_cpu_pmu_caps = nr_caps;
+ *nr_cpu_pmu_caps = nr_caps;
if (strbuf_init(&sb, 128) < 0)
return -1;
@@ -3039,12 +3191,12 @@ static int process_cpu_pmu_caps(struct feat_fd *ff,
goto free_value;
if (!strcmp(name, "branches"))
- ff->ph->env.max_branches = atoi(value);
+ *max_branches = atoi(value);
free(value);
free(name);
}
- ff->ph->env.cpu_pmu_caps = strbuf_detach(&sb, NULL);
+ *cpu_pmu_caps = strbuf_detach(&sb, NULL);
return 0;
free_value:
@@ -3056,6 +3208,63 @@ error:
return -1;
}
+static int process_cpu_pmu_caps(struct feat_fd *ff,
+ void *data __maybe_unused)
+{
+ return process_per_cpu_pmu_caps(ff, &ff->ph->env.nr_cpu_pmu_caps,
+ &ff->ph->env.cpu_pmu_caps,
+ &ff->ph->env.max_branches);
+}
+
+static int process_hybrid_cpu_pmu_caps(struct feat_fd *ff,
+ void *data __maybe_unused)
+{
+ struct hybrid_cpc_node *nodes;
+ u32 nr_pmu, i;
+ int ret;
+
+ if (do_read_u32(ff, &nr_pmu))
+ return -1;
+
+ if (!nr_pmu) {
+ pr_debug("hybrid cpu pmu capabilities not available\n");
+ return 0;
+ }
+
+ nodes = zalloc(sizeof(*nodes) * nr_pmu);
+ if (!nodes)
+ return -ENOMEM;
+
+ for (i = 0; i < nr_pmu; i++) {
+ struct hybrid_cpc_node *n = &nodes[i];
+
+ ret = process_per_cpu_pmu_caps(ff, &n->nr_cpu_pmu_caps,
+ &n->cpu_pmu_caps,
+ &n->max_branches);
+ if (ret)
+ goto err;
+
+ n->pmu_name = do_read_string(ff);
+ if (!n->pmu_name) {
+ ret = -1;
+ goto err;
+ }
+ }
+
+ ff->ph->env.nr_hybrid_cpc_nodes = nr_pmu;
+ ff->ph->env.hybrid_cpc_nodes = nodes;
+ return 0;
+
+err:
+ for (i = 0; i < nr_pmu; i++) {
+ free(nodes[i].cpu_pmu_caps);
+ free(nodes[i].pmu_name);
+ }
+
+ free(nodes);
+ return ret;
+}
+
#define FEAT_OPR(n, func, __full_only) \
[HEADER_##n] = { \
.name = __stringify(n), \
@@ -3117,6 +3326,8 @@ const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE] = {
FEAT_OPR(COMPRESSED, compressed, false),
FEAT_OPR(CPU_PMU_CAPS, cpu_pmu_caps, false),
FEAT_OPR(CLOCK_DATA, clock_data, false),
+ FEAT_OPN(HYBRID_TOPOLOGY, hybrid_topology, true),
+ FEAT_OPR(HYBRID_CPU_PMU_CAPS, hybrid_cpu_pmu_caps, false),
};
struct header_print_data {
@@ -3814,6 +4025,11 @@ int perf_session__read_header(struct perf_session *session)
if (perf_file_header__read(&f_header, header, fd) < 0)
return -EINVAL;
+ if (header->needs_swap && data->in_place_update) {
+ pr_err("In-place update not supported when byte-swapping is required\n");
+ return -EINVAL;
+ }
+
/*
* Sanity check that perf.data was written cleanly; data size is
* initialized to 0 and updated only if the on_exit function is run.
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 2aca71763ecf..ae6b1cf19a7d 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -45,6 +45,8 @@ enum {
HEADER_COMPRESSED,
HEADER_CPU_PMU_CAPS,
HEADER_CLOCK_DATA,
+ HEADER_HYBRID_TOPOLOGY,
+ HEADER_HYBRID_CPU_PMU_CAPS,
HEADER_LAST_FEATURE,
HEADER_FEAT_BITS = 256,
};
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index 8c59677bee13..cb2520abf261 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -35,6 +35,10 @@
#define BIT63 (((uint64_t)1 << 63))
+#define SEVEN_BYTES 0xffffffffffffffULL
+
+#define NO_VMCS 0xffffffffffULL
+
#define INTEL_PT_RETURN 1
/* Maximum number of loops with no packets consumed i.e. stuck in a loop */
@@ -51,6 +55,11 @@ struct intel_pt_stack {
int pos;
};
+enum intel_pt_p_once {
+ INTEL_PT_PRT_ONCE_UNK_VMCS,
+ INTEL_PT_PRT_ONCE_ERANGE,
+};
+
enum intel_pt_pkt_state {
INTEL_PT_STATE_NO_PSB,
INTEL_PT_STATE_NO_IP,
@@ -64,6 +73,7 @@ enum intel_pt_pkt_state {
INTEL_PT_STATE_FUP_NO_TIP,
INTEL_PT_STATE_FUP_IN_PSB,
INTEL_PT_STATE_RESAMPLE,
+ INTEL_PT_STATE_VM_TIME_CORRELATION,
};
static inline bool intel_pt_sample_time(enum intel_pt_pkt_state pkt_state)
@@ -75,6 +85,7 @@ static inline bool intel_pt_sample_time(enum intel_pt_pkt_state pkt_state)
case INTEL_PT_STATE_IN_SYNC:
case INTEL_PT_STATE_TNT_CONT:
case INTEL_PT_STATE_RESAMPLE:
+ case INTEL_PT_STATE_VM_TIME_CORRELATION:
return true;
case INTEL_PT_STATE_TNT:
case INTEL_PT_STATE_TIP:
@@ -107,6 +118,7 @@ struct intel_pt_decoder {
uint64_t max_insn_cnt, void *data);
bool (*pgd_ip)(uint64_t ip, void *data);
int (*lookahead)(void *data, intel_pt_lookahead_cb_t cb, void *cb_data);
+ struct intel_pt_vmcs_info *(*findnew_vmcs_info)(void *data, uint64_t vmcs);
void *data;
struct intel_pt_state state;
const unsigned char *buf;
@@ -122,6 +134,11 @@ struct intel_pt_decoder {
bool in_psb;
bool hop;
bool leap;
+ bool vm_time_correlation;
+ bool vm_tm_corr_dry_run;
+ bool vm_tm_corr_reliable;
+ bool vm_tm_corr_same_buf;
+ bool vm_tm_corr_continuous;
bool nr;
bool next_nr;
enum intel_pt_param_flags flags;
@@ -139,6 +156,11 @@ struct intel_pt_decoder {
uint64_t ctc_delta;
uint64_t cycle_cnt;
uint64_t cyc_ref_timestamp;
+ uint64_t first_timestamp;
+ uint64_t last_reliable_timestamp;
+ uint64_t vmcs;
+ uint64_t print_once;
+ uint64_t last_ctc;
uint32_t last_mtc;
uint32_t tsc_ctc_ratio_n;
uint32_t tsc_ctc_ratio_d;
@@ -217,6 +239,31 @@ static uint64_t intel_pt_lower_power_of_2(uint64_t x)
return x << i;
}
+__printf(1, 2)
+static void p_log(const char *fmt, ...)
+{
+ char buf[512];
+ va_list args;
+
+ va_start(args, fmt);
+ vsnprintf(buf, sizeof(buf), fmt, args);
+ va_end(args);
+
+ fprintf(stderr, "%s\n", buf);
+ intel_pt_log("%s\n", buf);
+}
+
+static bool intel_pt_print_once(struct intel_pt_decoder *decoder,
+ enum intel_pt_p_once id)
+{
+ uint64_t bit = 1ULL << id;
+
+ if (decoder->print_once & bit)
+ return false;
+ decoder->print_once |= bit;
+ return true;
+}
+
static uint64_t intel_pt_cyc_threshold(uint64_t ctl)
{
if (!(ctl & INTEL_PT_CYC_ENABLE))
@@ -258,11 +305,16 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
decoder->walk_insn = params->walk_insn;
decoder->pgd_ip = params->pgd_ip;
decoder->lookahead = params->lookahead;
+ decoder->findnew_vmcs_info = params->findnew_vmcs_info;
decoder->data = params->data;
decoder->return_compression = params->return_compression;
decoder->branch_enable = params->branch_enable;
decoder->hop = params->quick >= 1;
decoder->leap = params->quick >= 2;
+ decoder->vm_time_correlation = params->vm_time_correlation;
+ decoder->vm_tm_corr_dry_run = params->vm_tm_corr_dry_run;
+ decoder->first_timestamp = params->first_timestamp;
+ decoder->last_reliable_timestamp = params->first_timestamp;
decoder->flags = params->flags;
@@ -312,6 +364,12 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
return decoder;
}
+void intel_pt_set_first_timestamp(struct intel_pt_decoder *decoder,
+ uint64_t first_timestamp)
+{
+ decoder->first_timestamp = first_timestamp;
+}
+
static void intel_pt_pop_blk(struct intel_pt_stack *stack)
{
struct intel_pt_blk *blk = stack->blk;
@@ -577,6 +635,7 @@ static int intel_pt_get_data(struct intel_pt_decoder *decoder, bool reposition)
intel_pt_reposition(decoder);
decoder->ref_timestamp = buffer.ref_timestamp;
decoder->state.trace_nr = buffer.trace_nr;
+ decoder->vm_tm_corr_same_buf = false;
intel_pt_log("Reference timestamp 0x%" PRIx64 "\n",
decoder->ref_timestamp);
return -ENOLINK;
@@ -1146,6 +1205,8 @@ static bool intel_pt_fup_event(struct intel_pt_decoder *decoder)
decoder->set_fup_tx_flags = false;
decoder->tx_flags = decoder->fup_tx_flags;
decoder->state.type = INTEL_PT_TRANSACTION;
+ if (decoder->fup_tx_flags & INTEL_PT_ABORT_TX)
+ decoder->state.type |= INTEL_PT_BRANCH;
decoder->state.from_ip = decoder->ip;
decoder->state.to_ip = 0;
decoder->state.flags = decoder->fup_tx_flags;
@@ -1220,8 +1281,10 @@ static int intel_pt_walk_fup(struct intel_pt_decoder *decoder)
return 0;
if (err == -EAGAIN ||
intel_pt_fup_with_nlip(decoder, &intel_pt_insn, ip, err)) {
+ bool no_tip = decoder->pkt_state != INTEL_PT_STATE_FUP;
+
decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
- if (intel_pt_fup_event(decoder))
+ if (intel_pt_fup_event(decoder) && no_tip)
return 0;
return -EAGAIN;
}
@@ -1465,9 +1528,24 @@ static uint64_t intel_pt_8b_tsc(uint64_t timestamp, uint64_t ref_timestamp)
return timestamp;
}
+/* For use only when decoder->vm_time_correlation is true */
+static bool intel_pt_time_in_range(struct intel_pt_decoder *decoder,
+ uint64_t timestamp)
+{
+ uint64_t max_timestamp = decoder->buf_timestamp;
+
+ if (!max_timestamp) {
+ max_timestamp = decoder->last_reliable_timestamp +
+ 0x400000000ULL;
+ }
+ return timestamp >= decoder->last_reliable_timestamp &&
+ timestamp < decoder->buf_timestamp;
+}
+
static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder)
{
uint64_t timestamp;
+ bool bad = false;
decoder->have_tma = false;
@@ -1489,10 +1567,21 @@ static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder)
timestamp = decoder->timestamp;
}
if (timestamp < decoder->timestamp) {
- intel_pt_log_to("Wraparound timestamp", timestamp);
- timestamp += (1ULL << 56);
- decoder->tsc_timestamp = timestamp;
+ if (!decoder->buf_timestamp ||
+ (timestamp + (1ULL << 56) < decoder->buf_timestamp)) {
+ intel_pt_log_to("Wraparound timestamp", timestamp);
+ timestamp += (1ULL << 56);
+ decoder->tsc_timestamp = timestamp;
+ } else {
+ intel_pt_log_to("Suppressing bad timestamp", timestamp);
+ timestamp = decoder->timestamp;
+ bad = true;
+ }
}
+ if (decoder->vm_time_correlation &&
+ (bad || !intel_pt_time_in_range(decoder, timestamp)) &&
+ intel_pt_print_once(decoder, INTEL_PT_PRT_ONCE_ERANGE))
+ p_log("Timestamp out of range");
decoder->timestamp = timestamp;
decoder->timestamp_insn_cnt = 0;
}
@@ -1569,6 +1658,7 @@ static void intel_pt_calc_tma(struct intel_pt_decoder *decoder)
intel_pt_mtc_cyc_cnt_upd(decoder);
decoder->last_mtc = (ctc >> decoder->mtc_shift) & 0xff;
+ decoder->last_ctc = ctc - ctc_rem;
decoder->ctc_timestamp = decoder->tsc_timestamp - fc;
if (decoder->tsc_ctc_mult) {
decoder->ctc_timestamp -= ctc_rem * decoder->tsc_ctc_mult;
@@ -1953,6 +2043,613 @@ static int intel_pt_resample(struct intel_pt_decoder *decoder)
return 0;
}
+struct intel_pt_vm_tsc_info {
+ struct intel_pt_pkt pip_packet;
+ struct intel_pt_pkt vmcs_packet;
+ struct intel_pt_pkt tma_packet;
+ bool tsc, pip, vmcs, tma, psbend;
+ uint64_t ctc_delta;
+ uint64_t last_ctc;
+ int max_lookahead;
+};
+
+/* Lookahead and get the PIP, VMCS and TMA packets from PSB+ */
+static int intel_pt_vm_psb_lookahead_cb(struct intel_pt_pkt_info *pkt_info)
+{
+ struct intel_pt_vm_tsc_info *data = pkt_info->data;
+
+ switch (pkt_info->packet.type) {
+ case INTEL_PT_PAD:
+ case INTEL_PT_MNT:
+ case INTEL_PT_MODE_EXEC:
+ case INTEL_PT_MODE_TSX:
+ case INTEL_PT_MTC:
+ case INTEL_PT_FUP:
+ case INTEL_PT_CYC:
+ case INTEL_PT_CBR:
+ break;
+
+ case INTEL_PT_TSC:
+ data->tsc = true;
+ break;
+
+ case INTEL_PT_TMA:
+ data->tma_packet = pkt_info->packet;
+ data->tma = true;
+ break;
+
+ case INTEL_PT_PIP:
+ data->pip_packet = pkt_info->packet;
+ data->pip = true;
+ break;
+
+ case INTEL_PT_VMCS:
+ data->vmcs_packet = pkt_info->packet;
+ data->vmcs = true;
+ break;
+
+ case INTEL_PT_PSBEND:
+ data->psbend = true;
+ return 1;
+
+ case INTEL_PT_TIP_PGE:
+ case INTEL_PT_PTWRITE:
+ case INTEL_PT_PTWRITE_IP:
+ case INTEL_PT_EXSTOP:
+ case INTEL_PT_EXSTOP_IP:
+ case INTEL_PT_MWAIT:
+ case INTEL_PT_PWRE:
+ case INTEL_PT_PWRX:
+ case INTEL_PT_BBP:
+ case INTEL_PT_BIP:
+ case INTEL_PT_BEP:
+ case INTEL_PT_BEP_IP:
+ case INTEL_PT_OVF:
+ case INTEL_PT_BAD:
+ case INTEL_PT_TNT:
+ case INTEL_PT_TIP_PGD:
+ case INTEL_PT_TIP:
+ case INTEL_PT_PSB:
+ case INTEL_PT_TRACESTOP:
+ default:
+ return 1;
+ }
+
+ return 0;
+}
+
+struct intel_pt_ovf_fup_info {
+ int max_lookahead;
+ bool found;
+};
+
+/* Lookahead to detect a FUP packet after OVF */
+static int intel_pt_ovf_fup_lookahead_cb(struct intel_pt_pkt_info *pkt_info)
+{
+ struct intel_pt_ovf_fup_info *data = pkt_info->data;
+
+ if (pkt_info->packet.type == INTEL_PT_CYC ||
+ pkt_info->packet.type == INTEL_PT_MTC ||
+ pkt_info->packet.type == INTEL_PT_TSC)
+ return !--(data->max_lookahead);
+ data->found = pkt_info->packet.type == INTEL_PT_FUP;
+ return 1;
+}
+
+static bool intel_pt_ovf_fup_lookahead(struct intel_pt_decoder *decoder)
+{
+ struct intel_pt_ovf_fup_info data = {
+ .max_lookahead = 16,
+ .found = false,
+ };
+
+ intel_pt_pkt_lookahead(decoder, intel_pt_ovf_fup_lookahead_cb, &data);
+ return data.found;
+}
+
+/* Lookahead and get the TMA packet after TSC */
+static int intel_pt_tma_lookahead_cb(struct intel_pt_pkt_info *pkt_info)
+{
+ struct intel_pt_vm_tsc_info *data = pkt_info->data;
+
+ if (pkt_info->packet.type == INTEL_PT_CYC ||
+ pkt_info->packet.type == INTEL_PT_MTC)
+ return !--(data->max_lookahead);
+
+ if (pkt_info->packet.type == INTEL_PT_TMA) {
+ data->tma_packet = pkt_info->packet;
+ data->tma = true;
+ }
+ return 1;
+}
+
+static uint64_t intel_pt_ctc_to_tsc(struct intel_pt_decoder *decoder, uint64_t ctc)
+{
+ if (decoder->tsc_ctc_mult)
+ return ctc * decoder->tsc_ctc_mult;
+ else
+ return multdiv(ctc, decoder->tsc_ctc_ratio_n, decoder->tsc_ctc_ratio_d);
+}
+
+static uint64_t intel_pt_calc_expected_tsc(struct intel_pt_decoder *decoder,
+ uint32_t ctc,
+ uint32_t fc,
+ uint64_t last_ctc_timestamp,
+ uint64_t ctc_delta,
+ uint32_t last_ctc)
+{
+ /* Number of CTC ticks from last_ctc_timestamp to last_mtc */
+ uint64_t last_mtc_ctc = last_ctc + ctc_delta;
+ /*
+ * Number of CTC ticks from there until current TMA packet. We would
+ * expect last_mtc_ctc to be before ctc, but the TSC packet can slip
+ * past an MTC, so a sign-extended value is used.
+ */
+ uint64_t delta = (int16_t)((uint16_t)ctc - (uint16_t)last_mtc_ctc);
+ /* Total CTC ticks from last_ctc_timestamp to current TMA packet */
+ uint64_t new_ctc_delta = ctc_delta + delta;
+ uint64_t expected_tsc;
+
+ /*
+ * Convert CTC ticks to TSC ticks, add the starting point
+ * (last_ctc_timestamp) and the fast counter from the TMA packet.
+ */
+ expected_tsc = last_ctc_timestamp + intel_pt_ctc_to_tsc(decoder, new_ctc_delta) + fc;
+
+ if (intel_pt_enable_logging) {
+ intel_pt_log_x64(last_mtc_ctc);
+ intel_pt_log_x32(last_ctc);
+ intel_pt_log_x64(ctc_delta);
+ intel_pt_log_x64(delta);
+ intel_pt_log_x32(ctc);
+ intel_pt_log_x64(new_ctc_delta);
+ intel_pt_log_x64(last_ctc_timestamp);
+ intel_pt_log_x32(fc);
+ intel_pt_log_x64(intel_pt_ctc_to_tsc(decoder, new_ctc_delta));
+ intel_pt_log_x64(expected_tsc);
+ }
+
+ return expected_tsc;
+}
+
+static uint64_t intel_pt_expected_tsc(struct intel_pt_decoder *decoder,
+ struct intel_pt_vm_tsc_info *data)
+{
+ uint32_t ctc = data->tma_packet.payload;
+ uint32_t fc = data->tma_packet.count;
+
+ return intel_pt_calc_expected_tsc(decoder, ctc, fc,
+ decoder->ctc_timestamp,
+ data->ctc_delta, data->last_ctc);
+}
+
+static void intel_pt_translate_vm_tsc(struct intel_pt_decoder *decoder,
+ struct intel_pt_vmcs_info *vmcs_info)
+{
+ uint64_t payload = decoder->packet.payload;
+
+ /* VMX adds the TSC Offset, so subtract to get host TSC */
+ decoder->packet.payload -= vmcs_info->tsc_offset;
+ /* TSC packet has only 7 bytes */
+ decoder->packet.payload &= SEVEN_BYTES;
+
+ /*
+ * The buffer is mmapped from the data file, so this also updates the
+ * data file.
+ */
+ if (!decoder->vm_tm_corr_dry_run)
+ memcpy((void *)decoder->buf + 1, &decoder->packet.payload, 7);
+
+ intel_pt_log("Translated VM TSC %#" PRIx64 " -> %#" PRIx64
+ " VMCS %#" PRIx64 " TSC Offset %#" PRIx64 "\n",
+ payload, decoder->packet.payload, vmcs_info->vmcs,
+ vmcs_info->tsc_offset);
+}
+
+static void intel_pt_translate_vm_tsc_offset(struct intel_pt_decoder *decoder,
+ uint64_t tsc_offset)
+{
+ struct intel_pt_vmcs_info vmcs_info = {
+ .vmcs = NO_VMCS,
+ .tsc_offset = tsc_offset
+ };
+
+ intel_pt_translate_vm_tsc(decoder, &vmcs_info);
+}
+
+static inline bool in_vm(uint64_t pip_payload)
+{
+ return pip_payload & 1;
+}
+
+static inline bool pip_in_vm(struct intel_pt_pkt *pip_packet)
+{
+ return pip_packet->payload & 1;
+}
+
+static void intel_pt_print_vmcs_info(struct intel_pt_vmcs_info *vmcs_info)
+{
+ p_log("VMCS: %#" PRIx64 " TSC Offset %#" PRIx64,
+ vmcs_info->vmcs, vmcs_info->tsc_offset);
+}
+
+static void intel_pt_vm_tm_corr_psb(struct intel_pt_decoder *decoder,
+ struct intel_pt_vm_tsc_info *data)
+{
+ memset(data, 0, sizeof(*data));
+ data->ctc_delta = decoder->ctc_delta;
+ data->last_ctc = decoder->last_ctc;
+ intel_pt_pkt_lookahead(decoder, intel_pt_vm_psb_lookahead_cb, data);
+ if (data->tsc && !data->psbend)
+ p_log("ERROR: PSB without PSBEND");
+ decoder->in_psb = data->psbend;
+}
+
+static void intel_pt_vm_tm_corr_first_tsc(struct intel_pt_decoder *decoder,
+ struct intel_pt_vm_tsc_info *data,
+ struct intel_pt_vmcs_info *vmcs_info,
+ uint64_t host_tsc)
+{
+ if (!decoder->in_psb) {
+ /* Can't happen */
+ p_log("ERROR: First TSC is not in PSB+");
+ }
+
+ if (data->pip) {
+ if (pip_in_vm(&data->pip_packet)) { /* Guest */
+ if (vmcs_info && vmcs_info->tsc_offset) {
+ intel_pt_translate_vm_tsc(decoder, vmcs_info);
+ decoder->vm_tm_corr_reliable = true;
+ } else {
+ p_log("ERROR: First TSC, unknown TSC Offset");
+ }
+ } else { /* Host */
+ decoder->vm_tm_corr_reliable = true;
+ }
+ } else { /* Host or Guest */
+ decoder->vm_tm_corr_reliable = false;
+ if (intel_pt_time_in_range(decoder, host_tsc)) {
+ /* Assume Host */
+ } else {
+ /* Assume Guest */
+ if (vmcs_info && vmcs_info->tsc_offset)
+ intel_pt_translate_vm_tsc(decoder, vmcs_info);
+ else
+ p_log("ERROR: First TSC, no PIP, unknown TSC Offset");
+ }
+ }
+}
+
+static void intel_pt_vm_tm_corr_tsc(struct intel_pt_decoder *decoder,
+ struct intel_pt_vm_tsc_info *data)
+{
+ struct intel_pt_vmcs_info *vmcs_info;
+ uint64_t tsc_offset = 0;
+ uint64_t vmcs;
+ bool reliable = true;
+ uint64_t expected_tsc;
+ uint64_t host_tsc;
+ uint64_t ref_timestamp;
+
+ bool assign = false;
+ bool assign_reliable = false;
+
+ /* Already have 'data' for the in_psb case */
+ if (!decoder->in_psb) {
+ memset(data, 0, sizeof(*data));
+ data->ctc_delta = decoder->ctc_delta;
+ data->last_ctc = decoder->last_ctc;
+ data->max_lookahead = 16;
+ intel_pt_pkt_lookahead(decoder, intel_pt_tma_lookahead_cb, data);
+ if (decoder->pge) {
+ data->pip = true;
+ data->pip_packet.payload = decoder->pip_payload;
+ }
+ }
+
+ /* Calculations depend on having TMA packets */
+ if (!data->tma) {
+ p_log("ERROR: TSC without TMA");
+ return;
+ }
+
+ vmcs = data->vmcs ? data->vmcs_packet.payload : decoder->vmcs;
+ if (vmcs == NO_VMCS)
+ vmcs = 0;
+
+ vmcs_info = decoder->findnew_vmcs_info(decoder->data, vmcs);
+
+ ref_timestamp = decoder->timestamp ? decoder->timestamp : decoder->buf_timestamp;
+ host_tsc = intel_pt_8b_tsc(decoder->packet.payload, ref_timestamp);
+
+ if (!decoder->ctc_timestamp) {
+ intel_pt_vm_tm_corr_first_tsc(decoder, data, vmcs_info, host_tsc);
+ return;
+ }
+
+ expected_tsc = intel_pt_expected_tsc(decoder, data);
+
+ tsc_offset = host_tsc - expected_tsc;
+
+ /* Determine if TSC is from Host or Guest */
+ if (data->pip) {
+ if (pip_in_vm(&data->pip_packet)) { /* Guest */
+ if (!vmcs_info) {
+ /* PIP NR=1 without VMCS cannot happen */
+ p_log("ERROR: Missing VMCS");
+ intel_pt_translate_vm_tsc_offset(decoder, tsc_offset);
+ decoder->vm_tm_corr_reliable = false;
+ return;
+ }
+ } else { /* Host */
+ decoder->last_reliable_timestamp = host_tsc;
+ decoder->vm_tm_corr_reliable = true;
+ return;
+ }
+ } else { /* Host or Guest */
+ reliable = false; /* Host/Guest is a guess, so not reliable */
+ if (decoder->in_psb) {
+ if (!tsc_offset)
+ return; /* Zero TSC Offset, assume Host */
+ /*
+ * TSC packet has only 7 bytes of TSC. We have no
+ * information about the Guest's 8th byte, but it
+ * doesn't matter because we only need 7 bytes.
+ * Here, since the 8th byte is unreliable and
+ * irrelevant, compare only 7 byes.
+ */
+ if (vmcs_info &&
+ (tsc_offset & SEVEN_BYTES) ==
+ (vmcs_info->tsc_offset & SEVEN_BYTES)) {
+ /* Same TSC Offset as last VMCS, assume Guest */
+ goto guest;
+ }
+ }
+ /*
+ * Check if the host_tsc is within the expected range.
+ * Note, we could narrow the range more by looking ahead for
+ * the next host TSC in the same buffer, but we don't bother to
+ * do that because this is probably good enough.
+ */
+ if (host_tsc >= expected_tsc && intel_pt_time_in_range(decoder, host_tsc)) {
+ /* Within expected range for Host TSC, assume Host */
+ decoder->vm_tm_corr_reliable = false;
+ return;
+ }
+ }
+
+guest: /* Assuming Guest */
+
+ /* Determine whether to assign TSC Offset */
+ if (vmcs_info && vmcs_info->vmcs) {
+ if (vmcs_info->tsc_offset && vmcs_info->reliable) {
+ assign = false;
+ } else if (decoder->in_psb && data->pip && decoder->vm_tm_corr_reliable &&
+ decoder->vm_tm_corr_continuous && decoder->vm_tm_corr_same_buf) {
+ /* Continuous tracing, TSC in a PSB is not a time loss */
+ assign = true;
+ assign_reliable = true;
+ } else if (decoder->in_psb && data->pip && decoder->vm_tm_corr_same_buf) {
+ /*
+ * Unlikely to be a time loss TSC in a PSB which is not
+ * at the start of a buffer.
+ */
+ assign = true;
+ assign_reliable = false;
+ }
+ }
+
+ /* Record VMCS TSC Offset */
+ if (assign && (vmcs_info->tsc_offset != tsc_offset ||
+ vmcs_info->reliable != assign_reliable)) {
+ bool print = vmcs_info->tsc_offset != tsc_offset;
+
+ vmcs_info->tsc_offset = tsc_offset;
+ vmcs_info->reliable = assign_reliable;
+ if (print)
+ intel_pt_print_vmcs_info(vmcs_info);
+ }
+
+ /* Determine what TSC Offset to use */
+ if (vmcs_info && vmcs_info->tsc_offset) {
+ if (!vmcs_info->reliable)
+ reliable = false;
+ intel_pt_translate_vm_tsc(decoder, vmcs_info);
+ } else {
+ reliable = false;
+ if (vmcs_info) {
+ if (!vmcs_info->error_printed) {
+ p_log("ERROR: Unknown TSC Offset for VMCS %#" PRIx64,
+ vmcs_info->vmcs);
+ vmcs_info->error_printed = true;
+ }
+ } else {
+ if (intel_pt_print_once(decoder, INTEL_PT_PRT_ONCE_UNK_VMCS))
+ p_log("ERROR: Unknown VMCS");
+ }
+ intel_pt_translate_vm_tsc_offset(decoder, tsc_offset);
+ }
+
+ decoder->vm_tm_corr_reliable = reliable;
+}
+
+static void intel_pt_vm_tm_corr_pebs_tsc(struct intel_pt_decoder *decoder)
+{
+ uint64_t host_tsc = decoder->packet.payload;
+ uint64_t guest_tsc = decoder->packet.payload;
+ struct intel_pt_vmcs_info *vmcs_info;
+ uint64_t vmcs;
+
+ vmcs = decoder->vmcs;
+ if (vmcs == NO_VMCS)
+ vmcs = 0;
+
+ vmcs_info = decoder->findnew_vmcs_info(decoder->data, vmcs);
+
+ if (decoder->pge) {
+ if (in_vm(decoder->pip_payload)) { /* Guest */
+ if (!vmcs_info) {
+ /* PIP NR=1 without VMCS cannot happen */
+ p_log("ERROR: Missing VMCS");
+ }
+ } else { /* Host */
+ return;
+ }
+ } else { /* Host or Guest */
+ if (intel_pt_time_in_range(decoder, host_tsc)) {
+ /* Within expected range for Host TSC, assume Host */
+ return;
+ }
+ }
+
+ if (vmcs_info) {
+ /* Translate Guest TSC to Host TSC */
+ host_tsc = ((guest_tsc & SEVEN_BYTES) - vmcs_info->tsc_offset) & SEVEN_BYTES;
+ host_tsc = intel_pt_8b_tsc(host_tsc, decoder->timestamp);
+ intel_pt_log("Translated VM TSC %#" PRIx64 " -> %#" PRIx64
+ " VMCS %#" PRIx64 " TSC Offset %#" PRIx64 "\n",
+ guest_tsc, host_tsc, vmcs_info->vmcs,
+ vmcs_info->tsc_offset);
+ if (!intel_pt_time_in_range(decoder, host_tsc) &&
+ intel_pt_print_once(decoder, INTEL_PT_PRT_ONCE_ERANGE))
+ p_log("Timestamp out of range");
+ } else {
+ if (intel_pt_print_once(decoder, INTEL_PT_PRT_ONCE_UNK_VMCS))
+ p_log("ERROR: Unknown VMCS");
+ host_tsc = decoder->timestamp;
+ }
+
+ decoder->packet.payload = host_tsc;
+
+ if (!decoder->vm_tm_corr_dry_run)
+ memcpy((void *)decoder->buf + 1, &host_tsc, 8);
+}
+
+static int intel_pt_vm_time_correlation(struct intel_pt_decoder *decoder)
+{
+ struct intel_pt_vm_tsc_info data = { .psbend = false };
+ bool pge;
+ int err;
+
+ if (decoder->in_psb)
+ intel_pt_vm_tm_corr_psb(decoder, &data);
+
+ while (1) {
+ err = intel_pt_get_next_packet(decoder);
+ if (err == -ENOLINK)
+ continue;
+ if (err)
+ break;
+
+ switch (decoder->packet.type) {
+ case INTEL_PT_TIP_PGD:
+ decoder->pge = false;
+ decoder->vm_tm_corr_continuous = false;
+ break;
+
+ case INTEL_PT_TNT:
+ case INTEL_PT_TIP:
+ case INTEL_PT_TIP_PGE:
+ decoder->pge = true;
+ break;
+
+ case INTEL_PT_OVF:
+ decoder->in_psb = false;
+ pge = decoder->pge;
+ decoder->pge = intel_pt_ovf_fup_lookahead(decoder);
+ if (pge != decoder->pge)
+ intel_pt_log("Surprising PGE change in OVF!");
+ if (!decoder->pge)
+ decoder->vm_tm_corr_continuous = false;
+ break;
+
+ case INTEL_PT_FUP:
+ if (decoder->in_psb)
+ decoder->pge = true;
+ break;
+
+ case INTEL_PT_TRACESTOP:
+ decoder->pge = false;
+ decoder->vm_tm_corr_continuous = false;
+ decoder->have_tma = false;
+ break;
+
+ case INTEL_PT_PSB:
+ intel_pt_vm_tm_corr_psb(decoder, &data);
+ break;
+
+ case INTEL_PT_PIP:
+ decoder->pip_payload = decoder->packet.payload;
+ break;
+
+ case INTEL_PT_MTC:
+ intel_pt_calc_mtc_timestamp(decoder);
+ break;
+
+ case INTEL_PT_TSC:
+ intel_pt_vm_tm_corr_tsc(decoder, &data);
+ intel_pt_calc_tsc_timestamp(decoder);
+ decoder->vm_tm_corr_same_buf = true;
+ decoder->vm_tm_corr_continuous = decoder->pge;
+ break;
+
+ case INTEL_PT_TMA:
+ intel_pt_calc_tma(decoder);
+ break;
+
+ case INTEL_PT_CYC:
+ intel_pt_calc_cyc_timestamp(decoder);
+ break;
+
+ case INTEL_PT_CBR:
+ intel_pt_calc_cbr(decoder);
+ break;
+
+ case INTEL_PT_PSBEND:
+ decoder->in_psb = false;
+ data.psbend = false;
+ break;
+
+ case INTEL_PT_VMCS:
+ if (decoder->packet.payload != NO_VMCS)
+ decoder->vmcs = decoder->packet.payload;
+ break;
+
+ case INTEL_PT_BBP:
+ decoder->blk_type = decoder->packet.payload;
+ break;
+
+ case INTEL_PT_BIP:
+ if (decoder->blk_type == INTEL_PT_PEBS_BASIC &&
+ decoder->packet.count == 2)
+ intel_pt_vm_tm_corr_pebs_tsc(decoder);
+ break;
+
+ case INTEL_PT_BEP:
+ case INTEL_PT_BEP_IP:
+ decoder->blk_type = 0;
+ break;
+
+ case INTEL_PT_MODE_EXEC:
+ case INTEL_PT_MODE_TSX:
+ case INTEL_PT_MNT:
+ case INTEL_PT_PAD:
+ case INTEL_PT_PTWRITE_IP:
+ case INTEL_PT_PTWRITE:
+ case INTEL_PT_MWAIT:
+ case INTEL_PT_PWRE:
+ case INTEL_PT_EXSTOP_IP:
+ case INTEL_PT_EXSTOP:
+ case INTEL_PT_PWRX:
+ case INTEL_PT_BAD: /* Does not happen */
+ default:
+ break;
+ }
+ }
+
+ return err;
+}
+
#define HOP_PROCESS 0
#define HOP_IGNORE 1
#define HOP_RETURN 2
@@ -2894,6 +3591,15 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder)
if (err)
return err;
+ if (decoder->vm_time_correlation) {
+ decoder->in_psb = true;
+ if (!decoder->timestamp)
+ decoder->timestamp = 1;
+ decoder->state.type = 0;
+ decoder->pkt_state = INTEL_PT_STATE_VM_TIME_CORRELATION;
+ return 0;
+ }
+
decoder->have_last_ip = true;
decoder->pkt_state = INTEL_PT_STATE_NO_IP;
@@ -2981,6 +3687,9 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
case INTEL_PT_STATE_RESAMPLE:
err = intel_pt_resample(decoder);
break;
+ case INTEL_PT_STATE_VM_TIME_CORRELATION:
+ err = intel_pt_vm_time_correlation(decoder);
+ break;
default:
err = intel_pt_bug(decoder);
break;
@@ -3227,6 +3936,7 @@ static unsigned char *adj_for_padding(unsigned char *buf_b,
* @len_b: size of second buffer
* @consecutive: returns true if there is data in buf_b that is consecutive
* to buf_a
+ * @ooo_tsc: out-of-order TSC due to VM TSC offset / scaling
*
* If the trace contains TSC we can look at the last TSC of @buf_a and the
* first TSC of @buf_b in order to determine if the buffers overlap, and then
@@ -3239,7 +3949,8 @@ static unsigned char *adj_for_padding(unsigned char *buf_b,
static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a,
size_t len_a,
unsigned char *buf_b,
- size_t len_b, bool *consecutive)
+ size_t len_b, bool *consecutive,
+ bool ooo_tsc)
{
uint64_t tsc_a, tsc_b;
unsigned char *p;
@@ -3274,7 +3985,7 @@ static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a,
start = buf_b + len_b - (rem_b - rem_a);
return adj_for_padding(start, buf_a, len_a);
}
- if (cmp < 0)
+ if (cmp < 0 && !ooo_tsc)
return buf_b; /* tsc_a < tsc_b => no overlap */
}
@@ -3292,6 +4003,7 @@ static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a,
* @have_tsc: can use TSC packets to detect overlap
* @consecutive: returns true if there is data in buf_b that is consecutive
* to buf_a
+ * @ooo_tsc: out-of-order TSC due to VM TSC offset / scaling
*
* When trace samples or snapshots are recorded there is the possibility that
* the data overlaps. Note that, for the purposes of decoding, data is only
@@ -3302,7 +4014,8 @@ static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a,
*/
unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
unsigned char *buf_b, size_t len_b,
- bool have_tsc, bool *consecutive)
+ bool have_tsc, bool *consecutive,
+ bool ooo_tsc)
{
unsigned char *found;
@@ -3315,7 +4028,7 @@ unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
if (have_tsc) {
found = intel_pt_find_overlap_tsc(buf_a, len_a, buf_b, len_b,
- consecutive);
+ consecutive, ooo_tsc);
if (found)
return found;
}
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
index d9e62a7f6f0e..714c475808c0 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
@@ -11,6 +11,8 @@
#include <stddef.h>
#include <stdbool.h>
+#include <linux/rbtree.h>
+
#include "intel-pt-insn-decoder.h"
#define INTEL_PT_IN_TX (1 << 0)
@@ -199,6 +201,14 @@ struct intel_pt_blk_items {
bool is_32_bit;
};
+struct intel_pt_vmcs_info {
+ struct rb_node rb_node;
+ uint64_t vmcs;
+ uint64_t tsc_offset;
+ bool reliable;
+ bool error_printed;
+};
+
struct intel_pt_state {
enum intel_pt_sample_type type;
bool from_nr;
@@ -244,9 +254,13 @@ struct intel_pt_params {
uint64_t max_insn_cnt, void *data);
bool (*pgd_ip)(uint64_t ip, void *data);
int (*lookahead)(void *data, intel_pt_lookahead_cb_t cb, void *cb_data);
+ struct intel_pt_vmcs_info *(*findnew_vmcs_info)(void *data, uint64_t vmcs);
void *data;
bool return_compression;
bool branch_enable;
+ bool vm_time_correlation;
+ bool vm_tm_corr_dry_run;
+ uint64_t first_timestamp;
uint64_t ctl;
uint64_t period;
enum intel_pt_period_type period_type;
@@ -269,8 +283,12 @@ int intel_pt_fast_forward(struct intel_pt_decoder *decoder, uint64_t timestamp);
unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
unsigned char *buf_b, size_t len_b,
- bool have_tsc, bool *consecutive);
+ bool have_tsc, bool *consecutive,
+ bool ooo_tsc);
int intel_pt__strerror(int code, char *buf, size_t buflen);
+void intel_pt_set_first_timestamp(struct intel_pt_decoder *decoder,
+ uint64_t first_timestamp);
+
#endif
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.h b/tools/perf/util/intel-pt-decoder/intel-pt-log.h
index 388661f89c44..d900aab24b21 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-log.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.h
@@ -67,4 +67,9 @@ static inline void intel_pt_log_to(const char *msg, uint64_t u)
intel_pt_log("%s to " x64_fmt "\n", msg, u);
}
+#define intel_pt_log_var(var, fmt) intel_pt_log("%s: " #var " " fmt "\n", __func__, var)
+
+#define intel_pt_log_x32(var) intel_pt_log_var(var, "%#x")
+#define intel_pt_log_x64(var) intel_pt_log_var(var, "%#" PRIx64)
+
#endif
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 8658d42ce57a..154a1077f22e 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -78,6 +78,7 @@ struct intel_pt {
u64 kernel_start;
u64 switch_ip;
u64 ptss_ip;
+ u64 first_timestamp;
struct perf_tsc_conversion tc;
bool cap_user_time_zero;
@@ -133,6 +134,9 @@ struct intel_pt {
struct ip_callchain *chain;
struct branch_stack *br_stack;
+
+ u64 dflt_tsc_offset;
+ struct rb_root vmcs_info;
};
enum switch_state {
@@ -271,6 +275,65 @@ static bool intel_pt_log_events(struct intel_pt *pt, u64 tm)
return !n || !perf_time__ranges_skip_sample(range, n, tm);
}
+static struct intel_pt_vmcs_info *intel_pt_findnew_vmcs(struct rb_root *rb_root,
+ u64 vmcs,
+ u64 dflt_tsc_offset)
+{
+ struct rb_node **p = &rb_root->rb_node;
+ struct rb_node *parent = NULL;
+ struct intel_pt_vmcs_info *v;
+
+ while (*p) {
+ parent = *p;
+ v = rb_entry(parent, struct intel_pt_vmcs_info, rb_node);
+
+ if (v->vmcs == vmcs)
+ return v;
+
+ if (vmcs < v->vmcs)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+
+ v = zalloc(sizeof(*v));
+ if (v) {
+ v->vmcs = vmcs;
+ v->tsc_offset = dflt_tsc_offset;
+ v->reliable = dflt_tsc_offset;
+
+ rb_link_node(&v->rb_node, parent, p);
+ rb_insert_color(&v->rb_node, rb_root);
+ }
+
+ return v;
+}
+
+static struct intel_pt_vmcs_info *intel_pt_findnew_vmcs_info(void *data, uint64_t vmcs)
+{
+ struct intel_pt_queue *ptq = data;
+ struct intel_pt *pt = ptq->pt;
+
+ if (!vmcs && !pt->dflt_tsc_offset)
+ return NULL;
+
+ return intel_pt_findnew_vmcs(&pt->vmcs_info, vmcs, pt->dflt_tsc_offset);
+}
+
+static void intel_pt_free_vmcs_info(struct intel_pt *pt)
+{
+ struct intel_pt_vmcs_info *v;
+ struct rb_node *n;
+
+ n = rb_first(&pt->vmcs_info);
+ while (n) {
+ v = rb_entry(n, struct intel_pt_vmcs_info, rb_node);
+ n = rb_next(n);
+ rb_erase(&v->rb_node, &pt->vmcs_info);
+ free(v);
+ }
+}
+
static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a,
struct auxtrace_buffer *b)
{
@@ -278,9 +341,17 @@ static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *
void *start;
start = intel_pt_find_overlap(a->data, a->size, b->data, b->size,
- pt->have_tsc, &consecutive);
+ pt->have_tsc, &consecutive,
+ pt->synth_opts.vm_time_correlation);
if (!start)
return -EINVAL;
+ /*
+ * In the case of vm_time_correlation, the overlap might contain TSC
+ * packets that will not be fixed, and that will then no longer work for
+ * overlap detection. Avoid that by zeroing out the overlap.
+ */
+ if (pt->synth_opts.vm_time_correlation)
+ memset(b->data, 0, start - b->data);
b->use_size = b->data + b->size - start;
b->use_data = start;
if (b->use_size && consecutive)
@@ -707,8 +778,10 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
*ip += intel_pt_insn->length;
- if (to_ip && *ip == to_ip)
+ if (to_ip && *ip == to_ip) {
+ intel_pt_insn->length = 0;
goto out_no_cache;
+ }
if (*ip >= al.map->end)
break;
@@ -899,7 +972,7 @@ static bool intel_pt_timeless_decoding(struct intel_pt *pt)
bool timeless_decoding = true;
u64 config;
- if (!pt->tsc_bit || !pt->cap_user_time_zero)
+ if (!pt->tsc_bit || !pt->cap_user_time_zero || pt->synth_opts.timeless_decoding)
return true;
evlist__for_each_entry(pt->session->evlist, evsel) {
@@ -947,6 +1020,19 @@ static bool intel_pt_have_tsc(struct intel_pt *pt)
return have_tsc;
}
+static bool intel_pt_have_mtc(struct intel_pt *pt)
+{
+ struct evsel *evsel;
+ u64 config;
+
+ evlist__for_each_entry(pt->session->evlist, evsel) {
+ if (intel_pt_get_config(pt, &evsel->core.attr, &config) &&
+ (config & pt->mtc_bit))
+ return true;
+ }
+ return false;
+}
+
static bool intel_pt_sampling_mode(struct intel_pt *pt)
{
struct evsel *evsel;
@@ -1101,6 +1187,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
params.get_trace = intel_pt_get_trace;
params.walk_insn = intel_pt_walk_next_insn;
params.lookahead = intel_pt_lookahead;
+ params.findnew_vmcs_info = intel_pt_findnew_vmcs_info;
params.data = ptq;
params.return_compression = intel_pt_return_compression(pt);
params.branch_enable = intel_pt_branch_enable(pt);
@@ -1110,6 +1197,9 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n;
params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d;
params.quick = pt->synth_opts.quick;
+ params.vm_time_correlation = pt->synth_opts.vm_time_correlation;
+ params.vm_tm_corr_dry_run = pt->synth_opts.vm_tm_corr_dry_run;
+ params.first_timestamp = pt->first_timestamp;
if (pt->filts.cnt > 0)
params.pgd_ip = intel_pt_pgd_ip;
@@ -1174,6 +1264,21 @@ static void intel_pt_free_queue(void *priv)
free(ptq);
}
+static void intel_pt_first_timestamp(struct intel_pt *pt, u64 timestamp)
+{
+ unsigned int i;
+
+ pt->first_timestamp = timestamp;
+
+ for (i = 0; i < pt->queues.nr_queues; i++) {
+ struct auxtrace_queue *queue = &pt->queues.queue_array[i];
+ struct intel_pt_queue *ptq = queue->priv;
+
+ if (ptq && ptq->decoder)
+ intel_pt_set_first_timestamp(ptq->decoder, timestamp);
+ }
+}
+
static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt,
struct auxtrace_queue *queue)
{
@@ -1198,6 +1303,7 @@ static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt,
static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
{
+ ptq->insn_len = 0;
if (ptq->state->flags & INTEL_PT_ABORT_TX) {
ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT;
} else if (ptq->state->flags & INTEL_PT_ASYNC) {
@@ -1211,7 +1317,6 @@ static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
PERF_IP_FLAG_ASYNC |
PERF_IP_FLAG_INTERRUPT;
- ptq->insn_len = 0;
} else {
if (ptq->state->from_ip)
ptq->flags = intel_pt_insn_type(ptq->state->insn_op);
@@ -2377,7 +2482,7 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
if (pt->per_cpu_mmaps &&
(pt->have_sched_switch == 1 || pt->have_sched_switch == 3) &&
!pt->timeless_decoding && intel_pt_tracing_kernel(pt) &&
- !pt->sampling_mode) {
+ !pt->sampling_mode && !pt->synth_opts.vm_time_correlation) {
pt->switch_ip = intel_pt_switch_ip(pt, &pt->ptss_ip);
if (pt->switch_ip) {
intel_pt_log("switch_ip: %"PRIx64" ptss_ip: %"PRIx64"\n",
@@ -2876,6 +2981,8 @@ static int intel_pt_process_event(struct perf_session *session,
sample->time);
}
} else if (timestamp) {
+ if (!pt->first_timestamp)
+ intel_pt_first_timestamp(pt, timestamp);
err = intel_pt_process_queues(pt, timestamp);
}
if (err)
@@ -2962,6 +3069,7 @@ static void intel_pt_free(struct perf_session *session)
auxtrace_heap__free(&pt->heap);
intel_pt_free_events(session);
session->auxtrace = NULL;
+ intel_pt_free_vmcs_info(pt);
thread__put(pt->unknown_thread);
addr_filters__exit(&pt->filts);
zfree(&pt->chain);
@@ -3405,6 +3513,65 @@ static int intel_pt_setup_time_ranges(struct intel_pt *pt,
return 0;
}
+static int intel_pt_parse_vm_tm_corr_arg(struct intel_pt *pt, char **args)
+{
+ struct intel_pt_vmcs_info *vmcs_info;
+ u64 tsc_offset, vmcs;
+ char *p = *args;
+
+ errno = 0;
+
+ p = skip_spaces(p);
+ if (!*p)
+ return 1;
+
+ tsc_offset = strtoull(p, &p, 0);
+ if (errno)
+ return -errno;
+ p = skip_spaces(p);
+ if (*p != ':') {
+ pt->dflt_tsc_offset = tsc_offset;
+ *args = p;
+ return 0;
+ }
+ while (1) {
+ vmcs = strtoull(p, &p, 0);
+ if (errno)
+ return -errno;
+ if (!vmcs)
+ return -EINVAL;
+ vmcs_info = intel_pt_findnew_vmcs(&pt->vmcs_info, vmcs, tsc_offset);
+ if (!vmcs_info)
+ return -ENOMEM;
+ p = skip_spaces(p);
+ if (*p != ',')
+ break;
+ p += 1;
+ }
+ *args = p;
+ return 0;
+}
+
+static int intel_pt_parse_vm_tm_corr_args(struct intel_pt *pt)
+{
+ char *args = pt->synth_opts.vm_tm_corr_args;
+ int ret;
+
+ if (!args)
+ return 0;
+
+ do {
+ ret = intel_pt_parse_vm_tm_corr_arg(pt, &args);
+ } while (!ret);
+
+ if (ret < 0) {
+ pr_err("Failed to parse VM Time Correlation options\n");
+ return ret;
+ }
+
+ return 0;
+}
+
static const char * const intel_pt_info_fmts[] = {
[INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n",
[INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n",
@@ -3467,6 +3634,8 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
if (!pt)
return -ENOMEM;
+ pt->vmcs_info = RB_ROOT;
+
addr_filters__init(&pt->filts);
err = perf_config(intel_pt_perf_config, pt);
@@ -3479,6 +3648,20 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
intel_pt_log_set_name(INTEL_PT_PMU_NAME);
+ if (session->itrace_synth_opts->set) {
+ pt->synth_opts = *session->itrace_synth_opts;
+ } else {
+ struct itrace_synth_opts *opts = session->itrace_synth_opts;
+
+ itrace_synth_opts__set_default(&pt->synth_opts, opts->default_no_sample);
+ if (!opts->default_no_sample && !opts->inject) {
+ pt->synth_opts.branches = false;
+ pt->synth_opts.callchain = true;
+ pt->synth_opts.add_callchain = true;
+ }
+ pt->synth_opts.thread_stack = opts->thread_stack;
+ }
+
pt->session = session;
pt->machine = &session->machines.host; /* No kvm support */
pt->auxtrace_type = auxtrace_info->type;
@@ -3560,6 +3743,28 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
pt->sampling_mode = intel_pt_sampling_mode(pt);
pt->est_tsc = !pt->timeless_decoding;
+ if (pt->synth_opts.vm_time_correlation) {
+ if (pt->timeless_decoding) {
+ pr_err("Intel PT has no time information for VM Time Correlation\n");
+ err = -EINVAL;
+ goto err_free_queues;
+ }
+ if (session->itrace_synth_opts->ptime_range) {
+ pr_err("Time ranges cannot be specified with VM Time Correlation\n");
+ err = -EINVAL;
+ goto err_free_queues;
+ }
+ /* Currently TSC Offset is calculated using MTC packets */
+ if (!intel_pt_have_mtc(pt)) {
+ pr_err("MTC packets must have been enabled for VM Time Correlation\n");
+ err = -EINVAL;
+ goto err_free_queues;
+ }
+ err = intel_pt_parse_vm_tm_corr_args(pt);
+ if (err)
+ goto err_free_queues;
+ }
+
pt->unknown_thread = thread__new(999999999, 999999999);
if (!pt->unknown_thread) {
err = -ENOMEM;
@@ -3609,21 +3814,6 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
goto err_delete_thread;
}
- if (session->itrace_synth_opts->set) {
- pt->synth_opts = *session->itrace_synth_opts;
- } else {
- itrace_synth_opts__set_default(&pt->synth_opts,
- session->itrace_synth_opts->default_no_sample);
- if (!session->itrace_synth_opts->default_no_sample &&
- !session->itrace_synth_opts->inject) {
- pt->synth_opts.branches = false;
- pt->synth_opts.callchain = true;
- pt->synth_opts.add_callchain = true;
- }
- pt->synth_opts.thread_stack =
- session->itrace_synth_opts->thread_stack;
- }
-
if (pt->synth_opts.log)
intel_pt_log_enable();
diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c
index 3ceaf7ef3301..cbd9b268f168 100644
--- a/tools/perf/util/llvm-utils.c
+++ b/tools/perf/util/llvm-utils.c
@@ -504,6 +504,7 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf,
goto errout;
}
+ err = -ENOMEM;
if (asprintf(&pipe_template, "%s -emit-llvm | %s -march=bpf %s -filetype=obj -o -",
template, llc_path, opts) < 0) {
pr_err("ERROR:\tnot enough memory to setup command line\n");
@@ -524,6 +525,7 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf,
pr_debug("llvm compiling command template: %s\n", template);
+ err = -ENOMEM;
if (asprintf(&command_echo, "echo -n \"%s\"", template) < 0)
goto errout;
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 3ff4936a15a4..da19be7da284 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -776,10 +776,10 @@ static int machine__process_ksymbol_register(struct machine *machine,
if (dso) {
dso->kernel = DSO_SPACE__KERNEL;
map = map__new2(0, dso);
+ dso__put(dso);
}
if (!dso || !map) {
- dso__put(dso);
return -ENOMEM;
}
@@ -792,6 +792,7 @@ static int machine__process_ksymbol_register(struct machine *machine,
map->start = event->ksymbol.addr;
map->end = map->start + event->ksymbol.len;
maps__insert(&machine->kmaps, map);
+ map__put(map);
dso__set_loaded(dso);
if (is_bpf_image(event->ksymbol.name)) {
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index f93a852ad838..f0e75df72b80 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -12,6 +12,8 @@
#include "mem-events.h"
#include "debug.h"
#include "symbol.h"
+#include "pmu.h"
+#include "pmu-hybrid.h"
unsigned int perf_mem_events__loads_ldlat = 30;
@@ -24,8 +26,6 @@ static struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = {
};
#undef E
-#undef E
-
static char mem_loads_name[100];
static bool mem_loads_name__init;
@@ -37,7 +37,7 @@ struct perf_mem_event * __weak perf_mem_events__ptr(int i)
return &perf_mem_events[i];
}
-char * __weak perf_mem_events__name(int i)
+char * __weak perf_mem_events__name(int i, char *pmu_name __maybe_unused)
{
struct perf_mem_event *e = perf_mem_events__ptr(i);
@@ -100,6 +100,15 @@ int perf_mem_events__parse(const char *str)
return -1;
}
+static bool perf_mem_event__supported(const char *mnt, char *sysfs_name)
+{
+ char path[PATH_MAX];
+ struct stat st;
+
+ scnprintf(path, PATH_MAX, "%s/devices/%s", mnt, sysfs_name);
+ return !stat(path, &st);
+}
+
int perf_mem_events__init(void)
{
const char *mnt = sysfs__mount();
@@ -110,9 +119,9 @@ int perf_mem_events__init(void)
return -ENOENT;
for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
- char path[PATH_MAX];
struct perf_mem_event *e = perf_mem_events__ptr(j);
- struct stat st;
+ struct perf_pmu *pmu;
+ char sysfs_name[100];
/*
* If the event entry isn't valid, skip initialization
@@ -121,11 +130,20 @@ int perf_mem_events__init(void)
if (!e->tag)
continue;
- scnprintf(path, PATH_MAX, "%s/devices/%s",
- mnt, e->sysfs_name);
+ if (!perf_pmu__has_hybrid()) {
+ scnprintf(sysfs_name, sizeof(sysfs_name),
+ e->sysfs_name, "cpu");
+ e->supported = perf_mem_event__supported(mnt, sysfs_name);
+ } else {
+ perf_pmu__for_each_hybrid_pmu(pmu) {
+ scnprintf(sysfs_name, sizeof(sysfs_name),
+ e->sysfs_name, pmu->name);
+ e->supported |= perf_mem_event__supported(mnt, sysfs_name);
+ }
+ }
- if (!stat(path, &st))
- e->supported = found = true;
+ if (e->supported)
+ found = true;
}
return found ? 0 : -ENOENT;
@@ -141,11 +159,76 @@ void perf_mem_events__list(void)
fprintf(stderr, "%-13s%-*s%s\n",
e->tag ?: "",
verbose > 0 ? 25 : 0,
- verbose > 0 ? perf_mem_events__name(j) : "",
+ verbose > 0 ? perf_mem_events__name(j, NULL) : "",
e->supported ? ": available" : "");
}
}
+static void perf_mem_events__print_unsupport_hybrid(struct perf_mem_event *e,
+ int idx)
+{
+ const char *mnt = sysfs__mount();
+ char sysfs_name[100];
+ struct perf_pmu *pmu;
+
+ perf_pmu__for_each_hybrid_pmu(pmu) {
+ scnprintf(sysfs_name, sizeof(sysfs_name), e->sysfs_name,
+ pmu->name);
+ if (!perf_mem_event__supported(mnt, sysfs_name)) {
+ pr_err("failed: event '%s' not supported\n",
+ perf_mem_events__name(idx, pmu->name));
+ }
+ }
+}
+
+int perf_mem_events__record_args(const char **rec_argv, int *argv_nr,
+ char **rec_tmp, int *tmp_nr)
+{
+ int i = *argv_nr, k = 0;
+ struct perf_mem_event *e;
+ struct perf_pmu *pmu;
+ char *s;
+
+ for (int j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
+ e = perf_mem_events__ptr(j);
+ if (!e->record)
+ continue;
+
+ if (!perf_pmu__has_hybrid()) {
+ if (!e->supported) {
+ pr_err("failed: event '%s' not supported\n",
+ perf_mem_events__name(j, NULL));
+ return -1;
+ }
+
+ rec_argv[i++] = "-e";
+ rec_argv[i++] = perf_mem_events__name(j, NULL);
+ } else {
+ if (!e->supported) {
+ perf_mem_events__print_unsupport_hybrid(e, j);
+ return -1;
+ }
+
+ perf_pmu__for_each_hybrid_pmu(pmu) {
+ rec_argv[i++] = "-e";
+ s = perf_mem_events__name(j, pmu->name);
+ if (s) {
+ s = strdup(s);
+ if (!s)
+ return -1;
+
+ rec_argv[i++] = s;
+ rec_tmp[k++] = s;
+ }
+ }
+ }
+ }
+
+ *argv_nr = i;
+ *tmp_nr = k;
+ return 0;
+}
+
static const char * const tlb_access[] = {
"N/A",
"HIT",
diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h
index cacdebd65b8a..916242f8020a 100644
--- a/tools/perf/util/mem-events.h
+++ b/tools/perf/util/mem-events.h
@@ -38,11 +38,13 @@ extern unsigned int perf_mem_events__loads_ldlat;
int perf_mem_events__parse(const char *str);
int perf_mem_events__init(void);
-char *perf_mem_events__name(int i);
+char *perf_mem_events__name(int i, char *pmu_name);
struct perf_mem_event *perf_mem_events__ptr(int i);
bool is_mem_loads_aux_event(struct evsel *leader);
void perf_mem_events__list(void);
+int perf_mem_events__record_args(const char **rec_argv, int *argv_nr,
+ char **rec_tmp, int *tmp_nr);
int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 8336dd8e8098..d3cf2dee36c8 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -162,10 +162,10 @@ static bool contains_event(struct evsel **metric_events, int num_events,
return false;
}
-static bool evsel_same_pmu(struct evsel *ev1, struct evsel *ev2)
+static bool evsel_same_pmu_or_none(struct evsel *ev1, struct evsel *ev2)
{
if (!ev1->pmu_name || !ev2->pmu_name)
- return false;
+ return true;
return !strcmp(ev1->pmu_name, ev2->pmu_name);
}
@@ -288,7 +288,7 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist,
*/
if (!has_constraint &&
ev->leader != metric_events[i]->leader &&
- evsel_same_pmu(ev->leader, metric_events[i]->leader))
+ evsel_same_pmu_or_none(ev->leader, metric_events[i]->leader))
break;
if (!strcmp(metric_events[i]->name, ev->name)) {
set_bit(ev->idx, evlist_used);
@@ -1073,16 +1073,18 @@ static int metricgroup__add_metric_sys_event_iter(struct pmu_event *pe,
ret = add_metric(d->metric_list, pe, d->metric_no_group, &m, NULL, d->ids);
if (ret)
- return ret;
+ goto out;
ret = resolve_metric(d->metric_no_group,
d->metric_list, NULL, d->ids);
if (ret)
- return ret;
+ goto out;
*(d->has_match) = true;
- return *d->ret;
+out:
+ *(d->ret) = ret;
+ return ret;
}
static int metricgroup__add_metric(const char *metric, bool metric_no_group,
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 4dad14265b81..84108c17f48d 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -150,6 +150,10 @@ struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = {
.symbol = "bpf-output",
.alias = "",
},
+ [PERF_COUNT_SW_CGROUP_SWITCHES] = {
+ .symbol = "cgroup-switches",
+ .alias = "",
+ },
};
#define __PERF_EVENT_FIELD(config, name) \
@@ -2928,9 +2932,14 @@ restart:
}
for (i = 0; i < max; i++, syms++) {
+ /*
+ * New attr.config still not supported here, the latest
+ * example was PERF_COUNT_SW_CGROUP_SWITCHES
+ */
+ if (syms->symbol == NULL)
+ continue;
- if (event_glob != NULL && syms->symbol != NULL &&
- !(strglobmatch(syms->symbol, event_glob) ||
+ if (event_glob != NULL && !(strglobmatch(syms->symbol, event_glob) ||
(syms->alias && strglobmatch(syms->alias, event_glob))))
continue;
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index fb8646cc3e83..923849024b15 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -347,6 +347,7 @@ emulation-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EM
dummy { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); }
duration_time { return tool(yyscanner, PERF_TOOL_DURATION_TIME); }
bpf-output { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); }
+cgroup-switches { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CGROUP_SWITCHES); }
/*
* We have to handle the kernel PMU event cycles-ct/cycles-t/mem-loads/mem-stores separately.
diff --git a/tools/perf/util/perf_api_probe.c b/tools/perf/util/perf_api_probe.c
index 829af17a0867..020411682a3c 100644
--- a/tools/perf/util/perf_api_probe.c
+++ b/tools/perf/util/perf_api_probe.c
@@ -103,6 +103,11 @@ static void perf_probe_build_id(struct evsel *evsel)
evsel->core.attr.build_id = 1;
}
+static void perf_probe_cgroup(struct evsel *evsel)
+{
+ evsel->core.attr.cgroup = 1;
+}
+
bool perf_can_sample_identifier(void)
{
return perf_probe_api(perf_probe_sample_identifier);
@@ -182,3 +187,8 @@ bool perf_can_record_build_id(void)
{
return perf_probe_api(perf_probe_build_id);
}
+
+bool perf_can_record_cgroup(void)
+{
+ return perf_probe_api(perf_probe_cgroup);
+}
diff --git a/tools/perf/util/perf_api_probe.h b/tools/perf/util/perf_api_probe.h
index f12ca55f509a..b104168efb15 100644
--- a/tools/perf/util/perf_api_probe.h
+++ b/tools/perf/util/perf_api_probe.h
@@ -12,5 +12,6 @@ bool perf_can_record_switch_events(void);
bool perf_can_record_text_poke_events(void);
bool perf_can_sample_identifier(void);
bool perf_can_record_build_id(void);
+bool perf_can_record_cgroup(void);
#endif // __PERF_API_PROBE_H
diff --git a/tools/perf/util/perf_dlfilter.h b/tools/perf/util/perf_dlfilter.h
new file mode 100644
index 000000000000..3eef03d661b4
--- /dev/null
+++ b/tools/perf/util/perf_dlfilter.h
@@ -0,0 +1,150 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * perf_dlfilter.h: API for perf --dlfilter shared object
+ * Copyright (c) 2021, Intel Corporation.
+ */
+#ifndef _LINUX_PERF_DLFILTER_H
+#define _LINUX_PERF_DLFILTER_H
+
+#include <linux/perf_event.h>
+#include <linux/types.h>
+
+/* Definitions for perf_dlfilter_sample flags */
+enum {
+ PERF_DLFILTER_FLAG_BRANCH = 1ULL << 0,
+ PERF_DLFILTER_FLAG_CALL = 1ULL << 1,
+ PERF_DLFILTER_FLAG_RETURN = 1ULL << 2,
+ PERF_DLFILTER_FLAG_CONDITIONAL = 1ULL << 3,
+ PERF_DLFILTER_FLAG_SYSCALLRET = 1ULL << 4,
+ PERF_DLFILTER_FLAG_ASYNC = 1ULL << 5,
+ PERF_DLFILTER_FLAG_INTERRUPT = 1ULL << 6,
+ PERF_DLFILTER_FLAG_TX_ABORT = 1ULL << 7,
+ PERF_DLFILTER_FLAG_TRACE_BEGIN = 1ULL << 8,
+ PERF_DLFILTER_FLAG_TRACE_END = 1ULL << 9,
+ PERF_DLFILTER_FLAG_IN_TX = 1ULL << 10,
+ PERF_DLFILTER_FLAG_VMENTRY = 1ULL << 11,
+ PERF_DLFILTER_FLAG_VMEXIT = 1ULL << 12,
+};
+
+/*
+ * perf sample event information (as per perf script and <linux/perf_event.h>)
+ */
+struct perf_dlfilter_sample {
+ __u32 size; /* Size of this structure (for compatibility checking) */
+ __u16 ins_lat; /* Refer PERF_SAMPLE_WEIGHT_TYPE in <linux/perf_event.h> */
+ __u16 p_stage_cyc; /* Refer PERF_SAMPLE_WEIGHT_TYPE in <linux/perf_event.h> */
+ __u64 ip;
+ __s32 pid;
+ __s32 tid;
+ __u64 time;
+ __u64 addr;
+ __u64 id;
+ __u64 stream_id;
+ __u64 period;
+ __u64 weight; /* Refer PERF_SAMPLE_WEIGHT_TYPE in <linux/perf_event.h> */
+ __u64 transaction; /* Refer PERF_SAMPLE_TRANSACTION in <linux/perf_event.h> */
+ __u64 insn_cnt; /* For instructions-per-cycle (IPC) */
+ __u64 cyc_cnt; /* For instructions-per-cycle (IPC) */
+ __s32 cpu;
+ __u32 flags; /* Refer PERF_DLFILTER_FLAG_* above */
+ __u64 data_src; /* Refer PERF_SAMPLE_DATA_SRC in <linux/perf_event.h> */
+ __u64 phys_addr; /* Refer PERF_SAMPLE_PHYS_ADDR in <linux/perf_event.h> */
+ __u64 data_page_size; /* Refer PERF_SAMPLE_DATA_PAGE_SIZE in <linux/perf_event.h> */
+ __u64 code_page_size; /* Refer PERF_SAMPLE_CODE_PAGE_SIZE in <linux/perf_event.h> */
+ __u64 cgroup; /* Refer PERF_SAMPLE_CGROUP in <linux/perf_event.h> */
+ __u8 cpumode; /* Refer CPUMODE_MASK etc in <linux/perf_event.h> */
+ __u8 addr_correlates_sym; /* True => resolve_addr() can be called */
+ __u16 misc; /* Refer perf_event_header in <linux/perf_event.h> */
+ __u32 raw_size; /* Refer PERF_SAMPLE_RAW in <linux/perf_event.h> */
+ const void *raw_data; /* Refer PERF_SAMPLE_RAW in <linux/perf_event.h> */
+ __u64 brstack_nr; /* Number of brstack entries */
+ const struct perf_branch_entry *brstack; /* Refer <linux/perf_event.h> */
+ __u64 raw_callchain_nr; /* Number of raw_callchain entries */
+ const __u64 *raw_callchain; /* Refer <linux/perf_event.h> */
+ const char *event;
+};
+
+/*
+ * Address location (as per perf script)
+ */
+struct perf_dlfilter_al {
+ __u32 size; /* Size of this structure (for compatibility checking) */
+ __u32 symoff;
+ const char *sym;
+ __u64 addr; /* Mapped address (from dso) */
+ __u64 sym_start;
+ __u64 sym_end;
+ const char *dso;
+ __u8 sym_binding; /* STB_LOCAL, STB_GLOBAL or STB_WEAK, refer <elf.h> */
+ __u8 is_64_bit; /* Only valid if dso is not NULL */
+ __u8 is_kernel_ip; /* True if in kernel space */
+ __u32 buildid_size;
+ __u8 *buildid;
+ /* Below members are only populated by resolve_ip() */
+ __u8 filtered; /* True if this sample event will be filtered out */
+ const char *comm;
+};
+
+struct perf_dlfilter_fns {
+ /* Return information about ip */
+ const struct perf_dlfilter_al *(*resolve_ip)(void *ctx);
+ /* Return information about addr (if addr_correlates_sym) */
+ const struct perf_dlfilter_al *(*resolve_addr)(void *ctx);
+ /* Return arguments from --dlarg option */
+ char **(*args)(void *ctx, int *dlargc);
+ /*
+ * Return information about address (al->size must be set before
+ * calling). Returns 0 on success, -1 otherwise.
+ */
+ __s32 (*resolve_address)(void *ctx, __u64 address, struct perf_dlfilter_al *al);
+ /* Return instruction bytes and length */
+ const __u8 *(*insn)(void *ctx, __u32 *length);
+ /* Return source file name and line number */
+ const char *(*srcline)(void *ctx, __u32 *line_number);
+ /* Return perf_event_attr, refer <linux/perf_event.h> */
+ struct perf_event_attr *(*attr)(void *ctx);
+ /* Read object code, return numbers of bytes read */
+ __s32 (*object_code)(void *ctx, __u64 ip, void *buf, __u32 len);
+ /* Reserved */
+ void *(*reserved[120])(void *);
+};
+
+/*
+ * If implemented, 'start' will be called at the beginning,
+ * before any calls to 'filter_event'. Return 0 to indicate success,
+ * or return a negative error code. '*data' can be assigned for use
+ * by other functions. 'ctx' is needed for calls to perf_dlfilter_fns,
+ * but most perf_dlfilter_fns are not valid when called from 'start'.
+ */
+int start(void **data, void *ctx);
+
+/*
+ * If implemented, 'stop' will be called at the end,
+ * after any calls to 'filter_event'. Return 0 to indicate success, or
+ * return a negative error code. 'data' is set by start(). 'ctx' is
+ * needed for calls to perf_dlfilter_fns, but most perf_dlfilter_fns
+ * are not valid when called from 'stop'.
+ */
+int stop(void *data, void *ctx);
+
+/*
+ * If implemented, 'filter_event' will be called for each sample
+ * event. Return 0 to keep the sample event, 1 to filter it out, or
+ * return a negative error code. 'data' is set by start(). 'ctx' is
+ * needed for calls to perf_dlfilter_fns.
+ */
+int filter_event(void *data, const struct perf_dlfilter_sample *sample, void *ctx);
+
+/*
+ * The same as 'filter_event' except it is called before internal
+ * filtering.
+ */
+int filter_event_early(void *data, const struct perf_dlfilter_sample *sample, void *ctx);
+
+/*
+ * If implemented, return a one-line description of the filter, and optionally
+ * a longer description.
+ */
+const char *filter_description(const char **long_description);
+
+#endif
diff --git a/tools/perf/util/pfm.c b/tools/perf/util/pfm.c
index d735acb6c29c..6eef6dfeaa57 100644
--- a/tools/perf/util/pfm.c
+++ b/tools/perf/util/pfm.c
@@ -62,8 +62,16 @@ int parse_libpfm_events_option(const struct option *opt, const char *str,
}
/* no event */
- if (*q == '\0')
+ if (*q == '\0') {
+ if (*sep == '}') {
+ if (grp_evt < 0) {
+ ui__error("cannot close a non-existing event group\n");
+ goto error;
+ }
+ grp_evt--;
+ }
continue;
+ }
memset(&attr, 0, sizeof(attr));
event_attr_init(&attr);
@@ -107,6 +115,7 @@ int parse_libpfm_events_option(const struct option *opt, const char *str,
grp_evt = -1;
}
}
+ free(p_orig);
return 0;
error:
free(p_orig);
diff --git a/tools/perf/util/pmu-hybrid.h b/tools/perf/util/pmu-hybrid.h
index d0fa7bc50a76..2b186c26a43e 100644
--- a/tools/perf/util/pmu-hybrid.h
+++ b/tools/perf/util/pmu-hybrid.h
@@ -19,4 +19,15 @@ struct perf_pmu *perf_pmu__find_hybrid_pmu(const char *name);
bool perf_pmu__is_hybrid(const char *name);
char *perf_pmu__hybrid_type_to_pmu(const char *type);
+static inline int perf_pmu__hybrid_pmu_num(void)
+{
+ struct perf_pmu *pmu;
+ int num = 0;
+
+ perf_pmu__for_each_hybrid_pmu(pmu)
+ num++;
+
+ return num;
+}
+
#endif /* __PMU_HYBRID_H */
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index a78c8d59a555..c14e1d228e56 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -108,7 +108,6 @@ void exit_probe_symbol_maps(void)
static struct ref_reloc_sym *kernel_get_ref_reloc_sym(struct map **pmap)
{
- /* kmap->ref_reloc_sym should be set if host_machine is initialized */
struct kmap *kmap;
struct map *map = machine__kernel_map(host_machine);
@@ -683,8 +682,13 @@ static int post_process_probe_trace_point(struct probe_trace_point *tp,
u64 addr = tp->address - offs;
sym = map__find_symbol(map, addr);
- if (!sym)
- return -ENOENT;
+ if (!sym) {
+ /*
+ * If the address is in the inittext section, map can not
+ * find it. Ignore it if we are probing offline kernel.
+ */
+ return (symbol_conf.ignore_vmlinux_buildid) ? 0 : -ENOENT;
+ }
if (strcmp(sym->name, tp->symbol)) {
/* If we have no realname, use symbol for it */
@@ -819,7 +823,10 @@ post_process_kernel_probe_trace_events(struct probe_trace_event *tevs,
reloc_sym = kernel_get_ref_reloc_sym(&map);
if (!reloc_sym) {
- pr_warning("Relocated base symbol is not found!\n");
+ pr_warning("Relocated base symbol is not found! "
+ "Check /proc/sys/kernel/kptr_restrict\n"
+ "and /proc/sys/kernel/perf_event_paranoid. "
+ "Or run as privileged perf user.\n\n");
return -EINVAL;
}
@@ -2120,19 +2127,55 @@ static int synthesize_probe_trace_arg(struct probe_trace_arg *arg,
}
static int
-synthesize_uprobe_trace_def(struct probe_trace_event *tev, struct strbuf *buf)
+synthesize_probe_trace_args(struct probe_trace_event *tev, struct strbuf *buf)
+{
+ int i, ret = 0;
+
+ for (i = 0; i < tev->nargs && ret >= 0; i++)
+ ret = synthesize_probe_trace_arg(&tev->args[i], buf);
+
+ return ret;
+}
+
+static int
+synthesize_uprobe_trace_def(struct probe_trace_point *tp, struct strbuf *buf)
{
- struct probe_trace_point *tp = &tev->point;
int err;
+ /* Uprobes must have tp->module */
+ if (!tp->module)
+ return -EINVAL;
+ /*
+ * If tp->address == 0, then this point must be a
+ * absolute address uprobe.
+ * try_to_find_absolute_address() should have made
+ * tp->symbol to "0x0".
+ */
+ if (!tp->address && (!tp->symbol || strcmp(tp->symbol, "0x0")))
+ return -EINVAL;
+
+ /* Use the tp->address for uprobes */
err = strbuf_addf(buf, "%s:0x%lx", tp->module, tp->address);
if (err >= 0 && tp->ref_ctr_offset) {
if (!uprobe_ref_ctr_is_supported())
- return -1;
+ return -EINVAL;
err = strbuf_addf(buf, "(0x%lx)", tp->ref_ctr_offset);
}
- return err >= 0 ? 0 : -1;
+ return err >= 0 ? 0 : err;
+}
+
+static int
+synthesize_kprobe_trace_def(struct probe_trace_point *tp, struct strbuf *buf)
+{
+ if (!strncmp(tp->symbol, "0x", 2)) {
+ /* Absolute address. See try_to_find_absolute_address() */
+ return strbuf_addf(buf, "%s%s0x%lx", tp->module ?: "",
+ tp->module ? ":" : "", tp->address);
+ } else {
+ return strbuf_addf(buf, "%s%s%s+%lu", tp->module ?: "",
+ tp->module ? ":" : "", tp->symbol, tp->offset);
+ }
}
char *synthesize_probe_trace_command(struct probe_trace_event *tev)
@@ -2140,11 +2183,7 @@ char *synthesize_probe_trace_command(struct probe_trace_event *tev)
struct probe_trace_point *tp = &tev->point;
struct strbuf buf;
char *ret = NULL;
- int i, err;
-
- /* Uprobes must have tp->module */
- if (tev->uprobes && !tp->module)
- return NULL;
+ int err;
if (strbuf_init(&buf, 32) < 0)
return NULL;
@@ -2152,37 +2191,17 @@ char *synthesize_probe_trace_command(struct probe_trace_event *tev)
if (strbuf_addf(&buf, "%c:%s/%s ", tp->retprobe ? 'r' : 'p',
tev->group, tev->event) < 0)
goto error;
- /*
- * If tp->address == 0, then this point must be a
- * absolute address uprobe.
- * try_to_find_absolute_address() should have made
- * tp->symbol to "0x0".
- */
- if (tev->uprobes && !tp->address) {
- if (!tp->symbol || strcmp(tp->symbol, "0x0"))
- goto error;
- }
- /* Use the tp->address for uprobes */
- if (tev->uprobes) {
- err = synthesize_uprobe_trace_def(tev, &buf);
- } else if (!strncmp(tp->symbol, "0x", 2)) {
- /* Absolute address. See try_to_find_absolute_address() */
- err = strbuf_addf(&buf, "%s%s0x%lx", tp->module ?: "",
- tp->module ? ":" : "", tp->address);
- } else {
- err = strbuf_addf(&buf, "%s%s%s+%lu", tp->module ?: "",
- tp->module ? ":" : "", tp->symbol, tp->offset);
- }
-
- if (err)
- goto error;
+ if (tev->uprobes)
+ err = synthesize_uprobe_trace_def(tp, &buf);
+ else
+ err = synthesize_kprobe_trace_def(tp, &buf);
- for (i = 0; i < tev->nargs; i++)
- if (synthesize_probe_trace_arg(&tev->args[i], &buf) < 0)
- goto error;
+ if (err >= 0)
+ err = synthesize_probe_trace_args(tev, &buf);
- ret = strbuf_detach(&buf, NULL);
+ if (err >= 0)
+ ret = strbuf_detach(&buf, NULL);
error:
strbuf_release(&buf);
return ret;
@@ -2934,7 +2953,7 @@ static int find_probe_functions(struct map *map, char *name,
bool cut_version = true;
if (map__load(map) < 0)
- return 0;
+ return -EACCES; /* Possible permission error to load symbols */
/* If user gives a version, don't cut off the version from symbols */
if (strchr(name, '@'))
@@ -2973,6 +2992,17 @@ void __weak arch__fix_tev_from_maps(struct perf_probe_event *pev __maybe_unused,
struct map *map __maybe_unused,
struct symbol *sym __maybe_unused) { }
+
+static void pr_kallsyms_access_error(void)
+{
+ pr_err("Please ensure you can read the /proc/kallsyms symbol addresses.\n"
+ "If /proc/sys/kernel/kptr_restrict is '2', you can not read\n"
+ "kernel symbol addresses even if you are a superuser. Please change\n"
+ "it to '1'. If kptr_restrict is '1', the superuser can read the\n"
+ "symbol addresses.\n"
+ "In that case, please run this command again with sudo.\n");
+}
+
/*
* Find probe function addresses from map.
* Return an error or the number of found probe_trace_event
@@ -3009,8 +3039,16 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
*/
num_matched_functions = find_probe_functions(map, pp->function, syms);
if (num_matched_functions <= 0) {
- pr_err("Failed to find symbol %s in %s\n", pp->function,
- pev->target ? : "kernel");
+ if (num_matched_functions == -EACCES) {
+ pr_err("Failed to load symbols from %s\n",
+ pev->target ?: "/proc/kallsyms");
+ if (pev->target)
+ pr_err("Please ensure the file is not stripped.\n");
+ else
+ pr_kallsyms_access_error();
+ } else
+ pr_err("Failed to find symbol %s in %s\n", pp->function,
+ pev->target ? : "kernel");
ret = -ENOENT;
goto out;
} else if (num_matched_functions > probe_conf.max_probes) {
@@ -3025,7 +3063,10 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
(!pp->retprobe || kretprobe_offset_is_supported())) {
reloc_sym = kernel_get_ref_reloc_sym(NULL);
if (!reloc_sym) {
- pr_warning("Relocated base symbol is not found!\n");
+ pr_warning("Relocated base symbol is not found! "
+ "Check /proc/sys/kernel/kptr_restrict\n"
+ "and /proc/sys/kernel/perf_event_paranoid. "
+ "Or run as privileged perf user.\n\n");
ret = -EINVAL;
goto out;
}
@@ -3523,6 +3564,78 @@ int show_probe_trace_events(struct perf_probe_event *pevs, int npevs)
return ret;
}
+static int show_bootconfig_event(struct probe_trace_event *tev)
+{
+ struct probe_trace_point *tp = &tev->point;
+ struct strbuf buf;
+ char *ret = NULL;
+ int err;
+
+ if (strbuf_init(&buf, 32) < 0)
+ return -ENOMEM;
+
+ err = synthesize_kprobe_trace_def(tp, &buf);
+ if (err >= 0)
+ err = synthesize_probe_trace_args(tev, &buf);
+ if (err >= 0)
+ ret = strbuf_detach(&buf, NULL);
+ strbuf_release(&buf);
+
+ if (ret) {
+ printf("'%s'", ret);
+ free(ret);
+ }
+
+ return err;
+}
+
+int show_bootconfig_events(struct perf_probe_event *pevs, int npevs)
+{
+ struct strlist *namelist = strlist__new(NULL, NULL);
+ struct probe_trace_event *tev;
+ struct perf_probe_event *pev;
+ char *cur_name = NULL;
+ int i, j, ret = 0;
+
+ if (!namelist)
+ return -ENOMEM;
+
+ for (j = 0; j < npevs && !ret; j++) {
+ pev = &pevs[j];
+ if (pev->group && strcmp(pev->group, "probe"))
+ pr_warning("WARN: Group name %s is ignored\n", pev->group);
+ if (pev->uprobes) {
+ pr_warning("ERROR: Bootconfig doesn't support uprobes\n");
+ ret = -EINVAL;
+ break;
+ }
+ for (i = 0; i < pev->ntevs && !ret; i++) {
+ tev = &pev->tevs[i];
+ /* Skip if the symbol is out of .text or blacklisted */
+ if (!tev->point.symbol && !pev->uprobes)
+ continue;
+
+ /* Set new name for tev (and update namelist) */
+ ret = probe_trace_event__set_name(tev, pev,
+ namelist, true);
+ if (ret)
+ break;
+
+ if (!cur_name || strcmp(cur_name, tev->event)) {
+ printf("%sftrace.event.kprobes.%s.probe = ",
+ cur_name ? "\n" : "", tev->event);
+ cur_name = tev->event;
+ } else
+ printf(", ");
+ ret = show_bootconfig_event(tev);
+ }
+ }
+ printf("\n");
+ strlist__delete(namelist);
+
+ return ret;
+}
+
int apply_perf_probe_events(struct perf_probe_event *pevs, int npevs)
{
int i, ret = 0;
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index 4f0eb3a20c36..65769d7949a3 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -15,6 +15,7 @@ struct probe_conf {
bool force_add;
bool no_inlines;
bool cache;
+ bool bootconfig;
int max_probes;
unsigned long magic_num;
};
@@ -163,6 +164,7 @@ int add_perf_probe_events(struct perf_probe_event *pevs, int npevs);
int convert_perf_probe_events(struct perf_probe_event *pevs, int npevs);
int apply_perf_probe_events(struct perf_probe_event *pevs, int npevs);
int show_probe_trace_events(struct perf_probe_event *pevs, int npevs);
+int show_bootconfig_events(struct perf_probe_event *pevs, int npevs);
void cleanup_perf_probe_events(struct perf_probe_event *pevs, int npevs);
struct strfilter;
diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c
index 52273542e6ef..f9a6cbcd6415 100644
--- a/tools/perf/util/probe-file.c
+++ b/tools/perf/util/probe-file.c
@@ -22,6 +22,7 @@
#include "symbol.h"
#include "strbuf.h"
#include <api/fs/tracing_path.h>
+#include <api/fs/fs.h>
#include "probe-event.h"
#include "probe-file.h"
#include "session.h"
@@ -31,44 +32,78 @@
/* 4096 - 2 ('\n' + '\0') */
#define MAX_CMDLEN 4094
-static void print_open_warning(int err, bool uprobe)
+static bool print_common_warning(int err, bool readwrite)
{
- char sbuf[STRERR_BUFSIZE];
+ if (err == -EACCES)
+ pr_warning("No permission to %s tracefs.\nPlease %s\n",
+ readwrite ? "write" : "read",
+ readwrite ? "run this command again with sudo." :
+ "try 'sudo mount -o remount,mode=755 /sys/kernel/tracing/'");
+ else
+ return false;
- if (err == -ENOENT) {
- const char *config;
+ return true;
+}
- if (uprobe)
- config = "CONFIG_UPROBE_EVENTS";
- else
- config = "CONFIG_KPROBE_EVENTS";
+static bool print_configure_probe_event(int kerr, int uerr)
+{
+ const char *config, *file;
+
+ if (kerr == -ENOENT && uerr == -ENOENT) {
+ file = "{k,u}probe_events";
+ config = "CONFIG_KPROBE_EVENTS=y and CONFIG_UPROBE_EVENTS=y";
+ } else if (kerr == -ENOENT) {
+ file = "kprobe_events";
+ config = "CONFIG_KPROBE_EVENTS=y";
+ } else if (uerr == -ENOENT) {
+ file = "uprobe_events";
+ config = "CONFIG_UPROBE_EVENTS=y";
+ } else
+ return false;
- pr_warning("%cprobe_events file does not exist"
- " - please rebuild kernel with %s.\n",
- uprobe ? 'u' : 'k', config);
- } else if (err == -ENOTSUP)
- pr_warning("Tracefs or debugfs is not mounted.\n");
+ if (!debugfs__configured() && !tracefs__configured())
+ pr_warning("Debugfs or tracefs is not mounted\n"
+ "Please try 'sudo mount -t tracefs nodev /sys/kernel/tracing/'\n");
else
- pr_warning("Failed to open %cprobe_events: %s\n",
- uprobe ? 'u' : 'k',
- str_error_r(-err, sbuf, sizeof(sbuf)));
+ pr_warning("%s/%s does not exist.\nPlease rebuild kernel with %s.\n",
+ tracing_path_mount(), file, config);
+
+ return true;
+}
+
+static void print_open_warning(int err, bool uprobe, bool readwrite)
+{
+ char sbuf[STRERR_BUFSIZE];
+
+ if (print_common_warning(err, readwrite))
+ return;
+
+ if (print_configure_probe_event(uprobe ? 0 : err, uprobe ? err : 0))
+ return;
+
+ pr_warning("Failed to open %s/%cprobe_events: %s\n",
+ tracing_path_mount(), uprobe ? 'u' : 'k',
+ str_error_r(-err, sbuf, sizeof(sbuf)));
}
-static void print_both_open_warning(int kerr, int uerr)
+static void print_both_open_warning(int kerr, int uerr, bool readwrite)
{
- /* Both kprobes and uprobes are disabled, warn it. */
- if (kerr == -ENOTSUP && uerr == -ENOTSUP)
- pr_warning("Tracefs or debugfs is not mounted.\n");
- else if (kerr == -ENOENT && uerr == -ENOENT)
- pr_warning("Please rebuild kernel with CONFIG_KPROBE_EVENTS "
- "or/and CONFIG_UPROBE_EVENTS.\n");
- else {
- char sbuf[STRERR_BUFSIZE];
- pr_warning("Failed to open kprobe events: %s.\n",
+ char sbuf[STRERR_BUFSIZE];
+
+ if (kerr == uerr && print_common_warning(kerr, readwrite))
+ return;
+
+ if (print_configure_probe_event(kerr, uerr))
+ return;
+
+ if (kerr < 0)
+ pr_warning("Failed to open %s/kprobe_events: %s.\n",
+ tracing_path_mount(),
str_error_r(-kerr, sbuf, sizeof(sbuf)));
- pr_warning("Failed to open uprobe events: %s.\n",
+ if (uerr < 0)
+ pr_warning("Failed to open %s/uprobe_events: %s.\n",
+ tracing_path_mount(),
str_error_r(-uerr, sbuf, sizeof(sbuf)));
- }
}
int open_trace_file(const char *trace_file, bool readwrite)
@@ -109,7 +144,7 @@ int probe_file__open(int flag)
else
fd = open_kprobe_events(flag & PF_FL_RW);
if (fd < 0)
- print_open_warning(fd, flag & PF_FL_UPROBE);
+ print_open_warning(fd, flag & PF_FL_UPROBE, flag & PF_FL_RW);
return fd;
}
@@ -122,7 +157,7 @@ int probe_file__open_both(int *kfd, int *ufd, int flag)
*kfd = open_kprobe_events(flag & PF_FL_RW);
*ufd = open_uprobe_events(flag & PF_FL_RW);
if (*kfd < 0 && *ufd < 0) {
- print_both_open_warning(*kfd, *ufd);
+ print_both_open_warning(*kfd, *ufd, flag & PF_FL_RW);
return *kfd;
}
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 866f2d514d72..b029c29ce227 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -190,6 +190,9 @@ static int convert_variable_location(Dwarf_Die *vr_die, Dwarf_Addr addr,
immediate_value_is_supported()) {
Dwarf_Sword snum;
+ if (!tvar)
+ return 0;
+
dwarf_formsdata(&attr, &snum);
ret = asprintf(&tvar->value, "\\%ld", (long)snum);
diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
index 0e608a5ef599..32a721b3e9a5 100644
--- a/tools/perf/util/scripting-engines/trace-event-perl.c
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c
@@ -371,9 +371,6 @@ static void perl_process_tracepoint(struct perf_sample *sample,
s = nsecs / NSEC_PER_SEC;
ns = nsecs - s * NSEC_PER_SEC;
- scripting_context->event_data = data;
- scripting_context->pevent = evsel->tp_format->tep;
-
ENTER;
SAVETMPS;
PUSHMARK(SP);
@@ -456,8 +453,10 @@ static void perl_process_event_generic(union perf_event *event,
static void perl_process_event(union perf_event *event,
struct perf_sample *sample,
struct evsel *evsel,
- struct addr_location *al)
+ struct addr_location *al,
+ struct addr_location *addr_al)
{
+ scripting_context__update(scripting_context, event, sample, evsel, al, addr_al);
perl_process_tracepoint(sample, evsel, al);
perl_process_event_generic(event, sample, evsel);
}
@@ -474,11 +473,14 @@ static void run_start_sub(void)
/*
* Start trace script
*/
-static int perl_start_script(const char *script, int argc, const char **argv)
+static int perl_start_script(const char *script, int argc, const char **argv,
+ struct perf_session *session)
{
const char **command_line;
int i, err = 0;
+ scripting_context->session = session;
+
command_line = malloc((argc + 2) * sizeof(const char *));
command_line[0] = "";
command_line[1] = script;
@@ -750,6 +752,7 @@ sub print_backtrace\n\
struct scripting_ops perl_scripting_ops = {
.name = "Perl",
+ .dirname = "perl",
.start_script = perl_start_script,
.flush_script = perl_flush_script,
.stop_script = perl_stop_script,
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 4e4aa4c97ac5..164d2f45028c 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -726,9 +726,49 @@ static void set_regs_in_dict(PyObject *dict,
_PyUnicode_FromString(bf));
}
+static void set_sym_in_dict(PyObject *dict, struct addr_location *al,
+ const char *dso_field, const char *sym_field,
+ const char *symoff_field)
+{
+ if (al->map) {
+ pydict_set_item_string_decref(dict, dso_field,
+ _PyUnicode_FromString(al->map->dso->name));
+ }
+ if (al->sym) {
+ pydict_set_item_string_decref(dict, sym_field,
+ _PyUnicode_FromString(al->sym->name));
+ pydict_set_item_string_decref(dict, symoff_field,
+ PyLong_FromUnsignedLong(get_offset(al->sym, al)));
+ }
+}
+
+static void set_sample_flags(PyObject *dict, u32 flags)
+{
+ const char *ch = PERF_IP_FLAG_CHARS;
+ char *p, str[33];
+
+ for (p = str; *ch; ch++, flags >>= 1) {
+ if (flags & 1)
+ *p++ = *ch;
+ }
+ *p = 0;
+ pydict_set_item_string_decref(dict, "flags", _PyUnicode_FromString(str));
+}
+
+static void python_process_sample_flags(struct perf_sample *sample, PyObject *dict_sample)
+{
+ char flags_disp[SAMPLE_FLAGS_BUF_SIZE];
+
+ set_sample_flags(dict_sample, sample->flags);
+ perf_sample__sprintf_flags(sample->flags, flags_disp, sizeof(flags_disp));
+ pydict_set_item_string_decref(dict_sample, "flags_disp",
+ _PyUnicode_FromString(flags_disp));
+}
+
static PyObject *get_perf_sample_dict(struct perf_sample *sample,
struct evsel *evsel,
struct addr_location *al,
+ struct addr_location *addr_al,
PyObject *callchain)
{
PyObject *dict, *dict_sample, *brstack, *brstacksym;
@@ -772,14 +812,7 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample,
(const char *)sample->raw_data, sample->raw_size));
pydict_set_item_string_decref(dict, "comm",
_PyUnicode_FromString(thread__comm_str(al->thread)));
- if (al->map) {
- pydict_set_item_string_decref(dict, "dso",
- _PyUnicode_FromString(al->map->dso->name));
- }
- if (al->sym) {
- pydict_set_item_string_decref(dict, "symbol",
- _PyUnicode_FromString(al->sym->name));
- }
+ set_sym_in_dict(dict, al, "dso", "symbol", "symoff");
pydict_set_item_string_decref(dict, "callchain", callchain);
@@ -789,6 +822,26 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample,
brstacksym = python_process_brstacksym(sample, al->thread);
pydict_set_item_string_decref(dict, "brstacksym", brstacksym);
+ pydict_set_item_string_decref(dict_sample, "cpumode",
+ _PyLong_FromLong((unsigned long)sample->cpumode));
+
+ if (addr_al) {
+ pydict_set_item_string_decref(dict_sample, "addr_correlates_sym",
+ PyBool_FromLong(1));
+ set_sym_in_dict(dict_sample, addr_al, "addr_dso", "addr_symbol", "addr_symoff");
+ }
+
+ if (sample->flags)
+ python_process_sample_flags(sample, dict_sample);
+
+ /* Instructions per cycle (IPC) */
+ if (sample->insn_cnt && sample->cyc_cnt) {
+ pydict_set_item_string_decref(dict_sample, "insn_cnt",
+ PyLong_FromUnsignedLongLong(sample->insn_cnt));
+ pydict_set_item_string_decref(dict_sample, "cyc_cnt",
+ PyLong_FromUnsignedLongLong(sample->cyc_cnt));
+ }
+
set_regs_in_dict(dict, sample, evsel);
return dict;
@@ -796,7 +849,8 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample,
static void python_process_tracepoint(struct perf_sample *sample,
struct evsel *evsel,
- struct addr_location *al)
+ struct addr_location *al,
+ struct addr_location *addr_al)
{
struct tep_event *event = evsel->tp_format;
PyObject *handler, *context, *t, *obj = NULL, *callchain;
@@ -843,9 +897,6 @@ static void python_process_tracepoint(struct perf_sample *sample,
s = nsecs / NSEC_PER_SEC;
ns = nsecs - s * NSEC_PER_SEC;
- scripting_context->event_data = data;
- scripting_context->pevent = evsel->tp_format->tep;
-
context = _PyCapsule_New(scripting_context, NULL, NULL);
PyTuple_SetItem(t, n++, _PyUnicode_FromString(handler_name));
@@ -906,7 +957,7 @@ static void python_process_tracepoint(struct perf_sample *sample,
PyTuple_SetItem(t, n++, dict);
if (get_argument_count(handler) == (int) n + 1) {
- all_entries_dict = get_perf_sample_dict(sample, evsel, al,
+ all_entries_dict = get_perf_sample_dict(sample, evsel, al, addr_al,
callchain);
PyTuple_SetItem(t, n++, all_entries_dict);
} else {
@@ -934,7 +985,7 @@ static PyObject *tuple_new(unsigned int sz)
return t;
}
-static int tuple_set_u64(PyObject *t, unsigned int pos, u64 val)
+static int tuple_set_s64(PyObject *t, unsigned int pos, s64 val)
{
#if BITS_PER_LONG == 64
return PyTuple_SetItem(t, pos, _PyLong_FromLong(val));
@@ -944,11 +995,37 @@ static int tuple_set_u64(PyObject *t, unsigned int pos, u64 val)
#endif
}
+/*
+ * Databases support only signed 64-bit numbers, so even though we are
+ * exporting a u64, it must be as s64.
+ */
+#define tuple_set_d64 tuple_set_s64
+
+static int tuple_set_u64(PyObject *t, unsigned int pos, u64 val)
+{
+#if BITS_PER_LONG == 64
+ return PyTuple_SetItem(t, pos, PyLong_FromUnsignedLong(val));
+#endif
+#if BITS_PER_LONG == 32
+ return PyTuple_SetItem(t, pos, PyLong_FromUnsignedLongLong(val));
+#endif
+}
+
+static int tuple_set_u32(PyObject *t, unsigned int pos, u32 val)
+{
+ return PyTuple_SetItem(t, pos, PyLong_FromUnsignedLong(val));
+}
+
static int tuple_set_s32(PyObject *t, unsigned int pos, s32 val)
{
return PyTuple_SetItem(t, pos, _PyLong_FromLong(val));
}
+static int tuple_set_bool(PyObject *t, unsigned int pos, bool val)
+{
+ return PyTuple_SetItem(t, pos, PyBool_FromLong(val));
+}
+
static int tuple_set_string(PyObject *t, unsigned int pos, const char *s)
{
return PyTuple_SetItem(t, pos, _PyUnicode_FromString(s));
@@ -967,7 +1044,7 @@ static int python_export_evsel(struct db_export *dbe, struct evsel *evsel)
t = tuple_new(2);
- tuple_set_u64(t, 0, evsel->db_id);
+ tuple_set_d64(t, 0, evsel->db_id);
tuple_set_string(t, 1, evsel__name(evsel));
call_object(tables->evsel_handler, t, "evsel_table");
@@ -985,7 +1062,7 @@ static int python_export_machine(struct db_export *dbe,
t = tuple_new(3);
- tuple_set_u64(t, 0, machine->db_id);
+ tuple_set_d64(t, 0, machine->db_id);
tuple_set_s32(t, 1, machine->pid);
tuple_set_string(t, 2, machine->root_dir ? machine->root_dir : "");
@@ -1004,9 +1081,9 @@ static int python_export_thread(struct db_export *dbe, struct thread *thread,
t = tuple_new(5);
- tuple_set_u64(t, 0, thread->db_id);
- tuple_set_u64(t, 1, machine->db_id);
- tuple_set_u64(t, 2, main_thread_db_id);
+ tuple_set_d64(t, 0, thread->db_id);
+ tuple_set_d64(t, 1, machine->db_id);
+ tuple_set_d64(t, 2, main_thread_db_id);
tuple_set_s32(t, 3, thread->pid_);
tuple_set_s32(t, 4, thread->tid);
@@ -1025,10 +1102,10 @@ static int python_export_comm(struct db_export *dbe, struct comm *comm,
t = tuple_new(5);
- tuple_set_u64(t, 0, comm->db_id);
+ tuple_set_d64(t, 0, comm->db_id);
tuple_set_string(t, 1, comm__str(comm));
- tuple_set_u64(t, 2, thread->db_id);
- tuple_set_u64(t, 3, comm->start);
+ tuple_set_d64(t, 2, thread->db_id);
+ tuple_set_d64(t, 3, comm->start);
tuple_set_s32(t, 4, comm->exec);
call_object(tables->comm_handler, t, "comm_table");
@@ -1046,9 +1123,9 @@ static int python_export_comm_thread(struct db_export *dbe, u64 db_id,
t = tuple_new(3);
- tuple_set_u64(t, 0, db_id);
- tuple_set_u64(t, 1, comm->db_id);
- tuple_set_u64(t, 2, thread->db_id);
+ tuple_set_d64(t, 0, db_id);
+ tuple_set_d64(t, 1, comm->db_id);
+ tuple_set_d64(t, 2, thread->db_id);
call_object(tables->comm_thread_handler, t, "comm_thread_table");
@@ -1068,8 +1145,8 @@ static int python_export_dso(struct db_export *dbe, struct dso *dso,
t = tuple_new(5);
- tuple_set_u64(t, 0, dso->db_id);
- tuple_set_u64(t, 1, machine->db_id);
+ tuple_set_d64(t, 0, dso->db_id);
+ tuple_set_d64(t, 1, machine->db_id);
tuple_set_string(t, 2, dso->short_name);
tuple_set_string(t, 3, dso->long_name);
tuple_set_string(t, 4, sbuild_id);
@@ -1090,10 +1167,10 @@ static int python_export_symbol(struct db_export *dbe, struct symbol *sym,
t = tuple_new(6);
- tuple_set_u64(t, 0, *sym_db_id);
- tuple_set_u64(t, 1, dso->db_id);
- tuple_set_u64(t, 2, sym->start);
- tuple_set_u64(t, 3, sym->end);
+ tuple_set_d64(t, 0, *sym_db_id);
+ tuple_set_d64(t, 1, dso->db_id);
+ tuple_set_d64(t, 2, sym->start);
+ tuple_set_d64(t, 3, sym->end);
tuple_set_s32(t, 4, sym->binding);
tuple_set_string(t, 5, sym->name);
@@ -1130,30 +1207,30 @@ static void python_export_sample_table(struct db_export *dbe,
t = tuple_new(24);
- tuple_set_u64(t, 0, es->db_id);
- tuple_set_u64(t, 1, es->evsel->db_id);
- tuple_set_u64(t, 2, es->al->maps->machine->db_id);
- tuple_set_u64(t, 3, es->al->thread->db_id);
- tuple_set_u64(t, 4, es->comm_db_id);
- tuple_set_u64(t, 5, es->dso_db_id);
- tuple_set_u64(t, 6, es->sym_db_id);
- tuple_set_u64(t, 7, es->offset);
- tuple_set_u64(t, 8, es->sample->ip);
- tuple_set_u64(t, 9, es->sample->time);
+ tuple_set_d64(t, 0, es->db_id);
+ tuple_set_d64(t, 1, es->evsel->db_id);
+ tuple_set_d64(t, 2, es->al->maps->machine->db_id);
+ tuple_set_d64(t, 3, es->al->thread->db_id);
+ tuple_set_d64(t, 4, es->comm_db_id);
+ tuple_set_d64(t, 5, es->dso_db_id);
+ tuple_set_d64(t, 6, es->sym_db_id);
+ tuple_set_d64(t, 7, es->offset);
+ tuple_set_d64(t, 8, es->sample->ip);
+ tuple_set_d64(t, 9, es->sample->time);
tuple_set_s32(t, 10, es->sample->cpu);
- tuple_set_u64(t, 11, es->addr_dso_db_id);
- tuple_set_u64(t, 12, es->addr_sym_db_id);
- tuple_set_u64(t, 13, es->addr_offset);
- tuple_set_u64(t, 14, es->sample->addr);
- tuple_set_u64(t, 15, es->sample->period);
- tuple_set_u64(t, 16, es->sample->weight);
- tuple_set_u64(t, 17, es->sample->transaction);
- tuple_set_u64(t, 18, es->sample->data_src);
+ tuple_set_d64(t, 11, es->addr_dso_db_id);
+ tuple_set_d64(t, 12, es->addr_sym_db_id);
+ tuple_set_d64(t, 13, es->addr_offset);
+ tuple_set_d64(t, 14, es->sample->addr);
+ tuple_set_d64(t, 15, es->sample->period);
+ tuple_set_d64(t, 16, es->sample->weight);
+ tuple_set_d64(t, 17, es->sample->transaction);
+ tuple_set_d64(t, 18, es->sample->data_src);
tuple_set_s32(t, 19, es->sample->flags & PERF_BRANCH_MASK);
tuple_set_s32(t, 20, !!(es->sample->flags & PERF_IP_FLAG_IN_TX));
- tuple_set_u64(t, 21, es->call_path_id);
- tuple_set_u64(t, 22, es->sample->insn_cnt);
- tuple_set_u64(t, 23, es->sample->cyc_cnt);
+ tuple_set_d64(t, 21, es->call_path_id);
+ tuple_set_d64(t, 22, es->sample->insn_cnt);
+ tuple_set_d64(t, 23, es->sample->cyc_cnt);
call_object(tables->sample_handler, t, "sample_table");
@@ -1167,8 +1244,8 @@ static void python_export_synth(struct db_export *dbe, struct export_sample *es)
t = tuple_new(3);
- tuple_set_u64(t, 0, es->db_id);
- tuple_set_u64(t, 1, es->evsel->core.attr.config);
+ tuple_set_d64(t, 0, es->db_id);
+ tuple_set_d64(t, 1, es->evsel->core.attr.config);
tuple_set_bytes(t, 2, es->sample->raw_data, es->sample->raw_size);
call_object(tables->synth_handler, t, "synth_data");
@@ -1200,10 +1277,10 @@ static int python_export_call_path(struct db_export *dbe, struct call_path *cp)
t = tuple_new(4);
- tuple_set_u64(t, 0, cp->db_id);
- tuple_set_u64(t, 1, parent_db_id);
- tuple_set_u64(t, 2, sym_db_id);
- tuple_set_u64(t, 3, cp->ip);
+ tuple_set_d64(t, 0, cp->db_id);
+ tuple_set_d64(t, 1, parent_db_id);
+ tuple_set_d64(t, 2, sym_db_id);
+ tuple_set_d64(t, 3, cp->ip);
call_object(tables->call_path_handler, t, "call_path_table");
@@ -1221,20 +1298,20 @@ static int python_export_call_return(struct db_export *dbe,
t = tuple_new(14);
- tuple_set_u64(t, 0, cr->db_id);
- tuple_set_u64(t, 1, cr->thread->db_id);
- tuple_set_u64(t, 2, comm_db_id);
- tuple_set_u64(t, 3, cr->cp->db_id);
- tuple_set_u64(t, 4, cr->call_time);
- tuple_set_u64(t, 5, cr->return_time);
- tuple_set_u64(t, 6, cr->branch_count);
- tuple_set_u64(t, 7, cr->call_ref);
- tuple_set_u64(t, 8, cr->return_ref);
- tuple_set_u64(t, 9, cr->cp->parent->db_id);
+ tuple_set_d64(t, 0, cr->db_id);
+ tuple_set_d64(t, 1, cr->thread->db_id);
+ tuple_set_d64(t, 2, comm_db_id);
+ tuple_set_d64(t, 3, cr->cp->db_id);
+ tuple_set_d64(t, 4, cr->call_time);
+ tuple_set_d64(t, 5, cr->return_time);
+ tuple_set_d64(t, 6, cr->branch_count);
+ tuple_set_d64(t, 7, cr->call_ref);
+ tuple_set_d64(t, 8, cr->return_ref);
+ tuple_set_d64(t, 9, cr->cp->parent->db_id);
tuple_set_s32(t, 10, cr->flags);
- tuple_set_u64(t, 11, cr->parent_db_id);
- tuple_set_u64(t, 12, cr->insn_count);
- tuple_set_u64(t, 13, cr->cyc_count);
+ tuple_set_d64(t, 11, cr->parent_db_id);
+ tuple_set_d64(t, 12, cr->insn_count);
+ tuple_set_d64(t, 13, cr->cyc_count);
call_object(tables->call_return_handler, t, "call_return_table");
@@ -1254,14 +1331,14 @@ static int python_export_context_switch(struct db_export *dbe, u64 db_id,
t = tuple_new(9);
- tuple_set_u64(t, 0, db_id);
- tuple_set_u64(t, 1, machine->db_id);
- tuple_set_u64(t, 2, sample->time);
+ tuple_set_d64(t, 0, db_id);
+ tuple_set_d64(t, 1, machine->db_id);
+ tuple_set_d64(t, 2, sample->time);
tuple_set_s32(t, 3, sample->cpu);
- tuple_set_u64(t, 4, th_out_id);
- tuple_set_u64(t, 5, comm_out_id);
- tuple_set_u64(t, 6, th_in_id);
- tuple_set_u64(t, 7, comm_in_id);
+ tuple_set_d64(t, 4, th_out_id);
+ tuple_set_d64(t, 5, comm_out_id);
+ tuple_set_d64(t, 6, th_in_id);
+ tuple_set_d64(t, 7, comm_in_id);
tuple_set_s32(t, 8, flags);
call_object(tables->context_switch_handler, t, "context_switch");
@@ -1281,7 +1358,8 @@ static int python_process_call_return(struct call_return *cr, u64 *parent_db_id,
static void python_process_general_event(struct perf_sample *sample,
struct evsel *evsel,
- struct addr_location *al)
+ struct addr_location *al,
+ struct addr_location *addr_al)
{
PyObject *handler, *t, *dict, *callchain;
static char handler_name[64];
@@ -1303,7 +1381,7 @@ static void python_process_general_event(struct perf_sample *sample,
/* ip unwinding */
callchain = python_process_callchain(sample, evsel, al);
- dict = get_perf_sample_dict(sample, evsel, al, callchain);
+ dict = get_perf_sample_dict(sample, evsel, al, addr_al, callchain);
PyTuple_SetItem(t, n++, dict);
if (_PyTuple_Resize(&t, n) == -1)
@@ -1317,23 +1395,64 @@ static void python_process_general_event(struct perf_sample *sample,
static void python_process_event(union perf_event *event,
struct perf_sample *sample,
struct evsel *evsel,
- struct addr_location *al)
+ struct addr_location *al,
+ struct addr_location *addr_al)
{
struct tables *tables = &tables_global;
+ scripting_context__update(scripting_context, event, sample, evsel, al, addr_al);
+
switch (evsel->core.attr.type) {
case PERF_TYPE_TRACEPOINT:
- python_process_tracepoint(sample, evsel, al);
+ python_process_tracepoint(sample, evsel, al, addr_al);
break;
/* Reserve for future process_hw/sw/raw APIs */
default:
if (tables->db_export_mode)
- db_export__sample(&tables->dbe, event, sample, evsel, al);
+ db_export__sample(&tables->dbe, event, sample, evsel, al, addr_al);
else
- python_process_general_event(sample, evsel, al);
+ python_process_general_event(sample, evsel, al, addr_al);
}
}
+static void python_do_process_switch(union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ const char *handler_name = "context_switch";
+ bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
+ bool out_preempt = out && (event->header.misc & PERF_RECORD_MISC_SWITCH_OUT_PREEMPT);
+ pid_t np_pid = -1, np_tid = -1;
+ PyObject *handler, *t;
+
+ handler = get_handler(handler_name);
+ if (!handler)
+ return;
+
+ if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) {
+ np_pid = event->context_switch.next_prev_pid;
+ np_tid = event->context_switch.next_prev_tid;
+ }
+
+ t = tuple_new(9);
+ if (!t)
+ return;
+
+ tuple_set_u64(t, 0, sample->time);
+ tuple_set_s32(t, 1, sample->cpu);
+ tuple_set_s32(t, 2, sample->pid);
+ tuple_set_s32(t, 3, sample->tid);
+ tuple_set_s32(t, 4, np_pid);
+ tuple_set_s32(t, 5, np_tid);
+ tuple_set_s32(t, 6, machine->pid);
+ tuple_set_bool(t, 7, out);
+ tuple_set_bool(t, 8, out_preempt);
+
+ call_object(handler, t, handler_name);
+
+ Py_DECREF(t);
+}
+
static void python_process_switch(union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
@@ -1342,6 +1461,44 @@ static void python_process_switch(union perf_event *event,
if (tables->db_export_mode)
db_export__switch(&tables->dbe, event, sample, machine);
+ else
+ python_do_process_switch(event, sample, machine);
+}
+
+static void python_process_auxtrace_error(struct perf_session *session __maybe_unused,
+ union perf_event *event)
+{
+ struct perf_record_auxtrace_error *e = &event->auxtrace_error;
+ u8 cpumode = e->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+ const char *handler_name = "auxtrace_error";
+ unsigned long long tm = e->time;
+ const char *msg = e->msg;
+ PyObject *handler, *t;
+
+ handler = get_handler(handler_name);
+ if (!handler)
+ return;
+
+ if (!e->fmt) {
+ tm = 0;
+ msg = (const char *)&e->time;
+ }
+
+ t = tuple_new(9);
+
+ tuple_set_u32(t, 0, e->type);
+ tuple_set_u32(t, 1, e->code);
+ tuple_set_s32(t, 2, e->cpu);
+ tuple_set_s32(t, 3, e->pid);
+ tuple_set_s32(t, 4, e->tid);
+ tuple_set_u64(t, 5, e->ip);
+ tuple_set_u64(t, 6, tm);
+ tuple_set_string(t, 7, msg);
+ tuple_set_u32(t, 8, cpumode);
+
+ call_object(handler, t, handler_name);
+
+ Py_DECREF(t);
}
static void get_handler_name(char *str, size_t size,
@@ -1442,6 +1599,31 @@ static void python_process_stat_interval(u64 tstamp)
Py_DECREF(t);
}
+static int perf_script_context_init(void)
+{
+ PyObject *perf_script_context;
+ PyObject *perf_trace_context;
+ PyObject *dict;
+ int ret;
+
+ perf_trace_context = PyImport_AddModule("perf_trace_context");
+ if (!perf_trace_context)
+ return -1;
+ dict = PyModule_GetDict(perf_trace_context);
+ if (!dict)
+ return -1;
+
+ perf_script_context = _PyCapsule_New(scripting_context, NULL, NULL);
+ if (!perf_script_context)
+ return -1;
+
+ ret = PyDict_SetItemString(dict, "perf_script_context", perf_script_context);
+ if (!ret)
+ ret = PyDict_SetItemString(main_dict, "perf_script_context", perf_script_context);
+ Py_DECREF(perf_script_context);
+ return ret;
+}
+
static int run_start_sub(void)
{
main_module = PyImport_AddModule("__main__");
@@ -1454,6 +1636,9 @@ static int run_start_sub(void)
goto error;
Py_INCREF(main_dict);
+ if (perf_script_context_init())
+ goto error;
+
try_call_object("trace_begin", NULL);
return 0;
@@ -1589,7 +1774,8 @@ static void _free_command_line(wchar_t **command_line, int num)
/*
* Start trace script
*/
-static int python_start_script(const char *script, int argc, const char **argv)
+static int python_start_script(const char *script, int argc, const char **argv,
+ struct perf_session *session)
{
struct tables *tables = &tables_global;
#if PY_MAJOR_VERSION < 3
@@ -1605,6 +1791,7 @@ static int python_start_script(const char *script, int argc, const char **argv)
int i, err = 0;
FILE *fp;
+ scripting_context->session = session;
#if PY_MAJOR_VERSION < 3
command_line = malloc((argc + 1) * sizeof(const char *));
command_line[0] = script;
@@ -1876,11 +2063,13 @@ static int python_generate_script(struct tep_handle *pevent, const char *outfile
struct scripting_ops python_scripting_ops = {
.name = "Python",
+ .dirname = "python",
.start_script = python_start_script,
.flush_script = python_flush_script,
.stop_script = python_stop_script,
.process_event = python_process_event,
.process_switch = python_process_switch,
+ .process_auxtrace_error = python_process_auxtrace_error,
.process_stat = python_process_stat,
.process_stat_interval = python_process_stat_interval,
.generate_script = python_generate_script,
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 106b3d60881a..e9c929a39973 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -301,8 +301,11 @@ void perf_session__delete(struct perf_session *session)
perf_session__release_decomp_events(session);
perf_env__exit(&session->header.env);
machines__exit(&session->machines);
- if (session->data)
+ if (session->data) {
+ if (perf_data__is_read(session->data))
+ evlist__delete(session->evlist);
perf_data__close(session->data);
+ }
free(session);
}
@@ -1723,6 +1726,7 @@ int perf_session__peek_event(struct perf_session *session, off_t file_offset,
if (event->header.size < hdr_sz || event->header.size > buf_sz)
return -1;
+ buf += hdr_sz;
rest = event->header.size - hdr_sz;
if (readn(fd, buf, rest) != (ssize_t)rest)
@@ -2155,6 +2159,7 @@ struct reader {
u64 data_size;
u64 data_offset;
reader_cb_t process;
+ bool in_place_update;
};
static int
@@ -2188,7 +2193,9 @@ reader__process_events(struct reader *rd, struct perf_session *session,
mmap_prot = PROT_READ;
mmap_flags = MAP_SHARED;
- if (session->header.needs_swap) {
+ if (rd->in_place_update) {
+ mmap_prot |= PROT_WRITE;
+ } else if (session->header.needs_swap) {
mmap_prot |= PROT_WRITE;
mmap_flags = MAP_PRIVATE;
}
@@ -2274,6 +2281,7 @@ static int __perf_session__process_events(struct perf_session *session)
.data_size = session->header.data_size,
.data_offset = session->header.data_offset,
.process = process_simple,
+ .in_place_update = session->data->in_place_update,
};
struct ordered_events *oe = &session->ordered_events;
struct perf_tool *tool = session->tool;
diff --git a/tools/perf/util/srccode.c b/tools/perf/util/srccode.c
index c29edaaca863..476e99896d5e 100644
--- a/tools/perf/util/srccode.c
+++ b/tools/perf/util/srccode.c
@@ -97,8 +97,7 @@ static struct srcfile *find_srcfile(char *fn)
hlist_for_each_entry (h, &srcfile_htab[hval], hash_nd) {
if (!strcmp(fn, h->fn)) {
/* Move to front */
- list_del(&h->nd);
- list_add(&h->nd, &srcfile_list);
+ list_move(&h->nd, &srcfile_list);
return h;
}
}
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index a76fff5e7d83..c588a6b7a8db 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -541,7 +541,7 @@ static void uniquify_event_name(struct evsel *counter)
char *config;
int ret = 0;
- if (counter->uniquified_name ||
+ if (counter->uniquified_name || counter->use_config_name ||
!counter->pmu_name || !strncmp(counter->name, counter->pmu_name,
strlen(counter->pmu_name)))
return;
@@ -555,10 +555,8 @@ static void uniquify_event_name(struct evsel *counter)
}
} else {
if (perf_pmu__has_hybrid()) {
- if (!counter->use_config_name) {
- ret = asprintf(&new_name, "%s/%s/",
- counter->pmu_name, counter->name);
- }
+ ret = asprintf(&new_name, "%s/%s/",
+ counter->pmu_name, counter->name);
} else {
ret = asprintf(&new_name, "%s [%s]",
counter->name, counter->pmu_name);
@@ -827,11 +825,11 @@ static void counter_aggr_cb(struct perf_stat_config *config __maybe_unused,
bool first __maybe_unused)
{
struct caggr_data *cd = data;
- struct perf_stat_evsel *ps = counter->stats;
+ struct perf_counts_values *aggr = &counter->counts->aggr;
- cd->avg += avg_stats(&ps->res_stats[0]);
- cd->avg_enabled += avg_stats(&ps->res_stats[1]);
- cd->avg_running += avg_stats(&ps->res_stats[2]);
+ cd->avg += aggr->val;
+ cd->avg_enabled += aggr->ena;
+ cd->avg_running += aggr->run;
}
/*
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 2db46b9bebd0..d3ec2624e036 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -437,18 +437,6 @@ int perf_stat_process_counter(struct perf_stat_config *config,
aggr->val = aggr->ena = aggr->run = 0;
- /*
- * We calculate counter's data every interval,
- * and the display code shows ps->res_stats
- * avg value. We need to zero the stats for
- * interval mode, otherwise overall avg running
- * averages will be shown for each interval.
- */
- if (config->interval || config->summary) {
- for (i = 0; i < 3; i++)
- init_stats(&ps->res_stats[i]);
- }
-
if (counter->per_pkg)
evsel__zero_per_pkg(counter);
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 4c56aa837434..a73345730ba9 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -2412,6 +2412,7 @@ int cleanup_sdt_note_list(struct list_head *sdt_notes)
list_for_each_entry_safe(pos, tmp, sdt_notes, note_list) {
list_del_init(&pos->note_list);
+ zfree(&pos->args);
zfree(&pos->name);
zfree(&pos->provider);
free(pos);
diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c
index 714581b0de65..7172ca05265f 100644
--- a/tools/perf/util/trace-event-scripting.c
+++ b/tools/perf/util/trace-event-scripting.c
@@ -12,10 +12,31 @@
#include "debug.h"
#include "trace-event.h"
+#include "event.h"
+#include "evsel.h"
#include <linux/zalloc.h>
struct scripting_context *scripting_context;
+void scripting_context__update(struct scripting_context *c,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct evsel *evsel,
+ struct addr_location *al,
+ struct addr_location *addr_al)
+{
+ c->event_data = sample->raw_data;
+ if (evsel->tp_format)
+ c->pevent = evsel->tp_format->tep;
+ else
+ c->pevent = NULL;
+ c->event = event;
+ c->sample = sample;
+ c->evsel = evsel;
+ c->al = al;
+ c->addr_al = addr_al;
+}
+
static int flush_script_unsupported(void)
{
return 0;
@@ -29,7 +50,8 @@ static int stop_script_unsupported(void)
static void process_event_unsupported(union perf_event *event __maybe_unused,
struct perf_sample *sample __maybe_unused,
struct evsel *evsel __maybe_unused,
- struct addr_location *al __maybe_unused)
+ struct addr_location *al __maybe_unused,
+ struct addr_location *addr_al __maybe_unused)
{
}
@@ -44,7 +66,8 @@ static void print_python_unsupported_msg(void)
static int python_start_script_unsupported(const char *script __maybe_unused,
int argc __maybe_unused,
- const char **argv __maybe_unused)
+ const char **argv __maybe_unused,
+ struct perf_session *session __maybe_unused)
{
print_python_unsupported_msg();
@@ -63,6 +86,7 @@ static int python_generate_script_unsupported(struct tep_handle *pevent
struct scripting_ops python_scripting_unsupported_ops = {
.name = "Python",
+ .dirname = "python",
.start_script = python_start_script_unsupported,
.flush_script = flush_script_unsupported,
.stop_script = stop_script_unsupported,
@@ -108,7 +132,8 @@ static void print_perl_unsupported_msg(void)
static int perl_start_script_unsupported(const char *script __maybe_unused,
int argc __maybe_unused,
- const char **argv __maybe_unused)
+ const char **argv __maybe_unused,
+ struct perf_session *session __maybe_unused)
{
print_perl_unsupported_msg();
@@ -126,6 +151,7 @@ static int perl_generate_script_unsupported(struct tep_handle *pevent
struct scripting_ops perl_scripting_unsupported_ops = {
.name = "Perl",
+ .dirname = "perl",
.start_script = perl_start_script_unsupported,
.flush_script = flush_script_unsupported,
.stop_script = stop_script_unsupported,
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index 72fdf2a3577c..54aadeedf28c 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -11,6 +11,7 @@ union perf_event;
struct perf_tool;
struct thread;
struct tep_plugin_list;
+struct evsel;
struct trace_event {
struct tep_handle *pevent;
@@ -71,16 +72,21 @@ struct perf_stat_config;
struct scripting_ops {
const char *name;
- int (*start_script) (const char *script, int argc, const char **argv);
+ const char *dirname; /* For script path .../scripts/<dirname>/... */
+ int (*start_script)(const char *script, int argc, const char **argv,
+ struct perf_session *session);
int (*flush_script) (void);
int (*stop_script) (void);
void (*process_event) (union perf_event *event,
struct perf_sample *sample,
struct evsel *evsel,
- struct addr_location *al);
+ struct addr_location *al,
+ struct addr_location *addr_al);
void (*process_switch)(union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
+ void (*process_auxtrace_error)(struct perf_session *session,
+ union perf_event *event);
void (*process_stat)(struct perf_stat_config *config,
struct evsel *evsel, u64 tstamp);
void (*process_stat_interval)(u64 tstamp);
@@ -91,16 +97,35 @@ extern unsigned int scripting_max_stack;
int script_spec_register(const char *spec, struct scripting_ops *ops);
+void script_fetch_insn(struct perf_sample *sample, struct thread *thread,
+ struct machine *machine);
+
void setup_perl_scripting(void);
void setup_python_scripting(void);
struct scripting_context {
struct tep_handle *pevent;
void *event_data;
+ union perf_event *event;
+ struct perf_sample *sample;
+ struct evsel *evsel;
+ struct addr_location *al;
+ struct addr_location *addr_al;
+ struct perf_session *session;
};
+void scripting_context__update(struct scripting_context *scripting_context,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct evsel *evsel,
+ struct addr_location *al,
+ struct addr_location *addr_al);
+
int common_pc(struct scripting_context *context);
int common_flags(struct scripting_context *context);
int common_lock_depth(struct scripting_context *context);
+#define SAMPLE_FLAGS_BUF_SIZE 64
+int perf_sample__sprintf_flags(u32 flags, char *str, size_t sz);
+
#endif /* _PERF_UTIL_TRACE_EVENT_H */
diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c
index ab940c508ef0..bf9fd3549a1d 100644
--- a/tools/power/x86/intel-speed-select/isst-config.c
+++ b/tools/power/x86/intel-speed-select/isst-config.c
@@ -15,7 +15,7 @@ struct process_cmd_struct {
int arg;
};
-static const char *version_str = "v1.9";
+static const char *version_str = "v1.10";
static const int supported_api_ver = 1;
static struct isst_if_platform_info isst_platform_info;
static char *progname;
@@ -106,6 +106,22 @@ int is_skx_based_platform(void)
return 0;
}
+int is_spr_platform(void)
+{
+ if (cpu_model == 0x8F)
+ return 1;
+
+ return 0;
+}
+
+int is_icx_platform(void)
+{
+ if (cpu_model == 0x6A || cpu_model == 0x6C)
+ return 1;
+
+ return 0;
+}
+
static int update_cpu_model(void)
{
unsigned int ebx, ecx, edx;
diff --git a/tools/power/x86/intel-speed-select/isst-core.c b/tools/power/x86/intel-speed-select/isst-core.c
index 6a26d5769984..4431c8a0d40a 100644
--- a/tools/power/x86/intel-speed-select/isst-core.c
+++ b/tools/power/x86/intel-speed-select/isst-core.c
@@ -201,6 +201,7 @@ void isst_get_uncore_mem_freq(int cpu, int config_index,
{
unsigned int resp;
int ret;
+
ret = isst_send_mbox_command(cpu, CONFIG_TDP, CONFIG_TDP_GET_MEM_FREQ,
0, config_index, &resp);
if (ret) {
@@ -209,6 +210,20 @@ void isst_get_uncore_mem_freq(int cpu, int config_index,
}
ctdp_level->mem_freq = resp & GENMASK(7, 0);
+ if (is_spr_platform()) {
+ ctdp_level->mem_freq *= 200;
+ } else if (is_icx_platform()) {
+ if (ctdp_level->mem_freq < 7) {
+ ctdp_level->mem_freq = (12 - ctdp_level->mem_freq) * 133.33 * 2 * 10;
+ ctdp_level->mem_freq /= 10;
+ if (ctdp_level->mem_freq % 10 > 5)
+ ctdp_level->mem_freq++;
+ } else {
+ ctdp_level->mem_freq = 0;
+ }
+ } else {
+ ctdp_level->mem_freq = 0;
+ }
debug_printf(
"cpu:%d ctdp:%d CONFIG_TDP_GET_MEM_FREQ resp:%x uncore mem_freq:%d\n",
cpu, config_index, resp, ctdp_level->mem_freq);
diff --git a/tools/power/x86/intel-speed-select/isst-display.c b/tools/power/x86/intel-speed-select/isst-display.c
index 3bf1820c0da1..f97d8859ada7 100644
--- a/tools/power/x86/intel-speed-select/isst-display.c
+++ b/tools/power/x86/intel-speed-select/isst-display.c
@@ -446,7 +446,7 @@ void isst_ctdp_display_information(int cpu, FILE *outf, int tdp_level,
if (ctdp_level->mem_freq) {
snprintf(header, sizeof(header), "mem-frequency(MHz)");
snprintf(value, sizeof(value), "%d",
- ctdp_level->mem_freq * DISP_FREQ_MULTIPLIER);
+ ctdp_level->mem_freq);
format_and_print(outf, level + 2, header, value);
}
diff --git a/tools/power/x86/intel-speed-select/isst.h b/tools/power/x86/intel-speed-select/isst.h
index 0cac6c54be87..1aa15d5ea57c 100644
--- a/tools/power/x86/intel-speed-select/isst.h
+++ b/tools/power/x86/intel-speed-select/isst.h
@@ -257,5 +257,7 @@ extern int get_cpufreq_base_freq(int cpu);
extern int isst_read_pm_config(int cpu, int *cp_state, int *cp_cap);
extern void isst_display_error_info_message(int error, char *msg, int arg_valid, int arg);
extern int is_skx_based_platform(void);
+extern int is_spr_platform(void);
+extern int is_icx_platform(void);
extern void isst_trl_display_information(int cpu, FILE *outf, unsigned long long trl);
#endif
diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include
index f9271f3ea912..071312f5eb92 100644
--- a/tools/scripts/Makefile.include
+++ b/tools/scripts/Makefile.include
@@ -131,29 +131,29 @@ QUIET_SUBDIR1 =
ifneq ($(silent),1)
ifneq ($(V),1)
- QUIET_CC = @echo ' CC '$@;
- QUIET_CC_FPIC = @echo ' CC FPIC '$@;
- QUIET_CLANG = @echo ' CLANG '$@;
- QUIET_AR = @echo ' AR '$@;
- QUIET_LINK = @echo ' LINK '$@;
- QUIET_MKDIR = @echo ' MKDIR '$@;
- QUIET_GEN = @echo ' GEN '$@;
+ QUIET_CC = @echo ' CC '$@;
+ QUIET_CC_FPIC = @echo ' CC FPIC '$@;
+ QUIET_CLANG = @echo ' CLANG '$@;
+ QUIET_AR = @echo ' AR '$@;
+ QUIET_LINK = @echo ' LINK '$@;
+ QUIET_MKDIR = @echo ' MKDIR '$@;
+ QUIET_GEN = @echo ' GEN '$@;
QUIET_SUBDIR0 = +@subdir=
QUIET_SUBDIR1 = ;$(NO_SUBDIR) \
- echo ' SUBDIR '$$subdir; \
+ echo ' SUBDIR '$$subdir; \
$(MAKE) $(PRINT_DIR) -C $$subdir
- QUIET_FLEX = @echo ' FLEX '$@;
- QUIET_BISON = @echo ' BISON '$@;
- QUIET_GENSKEL = @echo ' GEN-SKEL '$@;
+ QUIET_FLEX = @echo ' FLEX '$@;
+ QUIET_BISON = @echo ' BISON '$@;
+ QUIET_GENSKEL = @echo ' GENSKEL '$@;
descend = \
- +@echo ' DESCEND '$(1); \
+ +@echo ' DESCEND '$(1); \
mkdir -p $(OUTPUT)$(1) && \
$(MAKE) $(COMMAND_O) subdir=$(if $(subdir),$(subdir)/$(1),$(1)) $(PRINT_DIR) -C $(1) $(2)
- QUIET_CLEAN = @printf ' CLEAN %s\n' $1;
- QUIET_INSTALL = @printf ' INSTALL %s\n' $1;
- QUIET_UNINST = @printf ' UNINST %s\n' $1;
+ QUIET_CLEAN = @printf ' CLEAN %s\n' $1;
+ QUIET_INSTALL = @printf ' INSTALL %s\n' $1;
+ QUIET_UNINST = @printf ' UNINST %s\n' $1;
endif
endif
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index bc3299a20338..fb010a35d61a 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -49,6 +49,7 @@ TARGETS += proc
TARGETS += pstore
TARGETS += ptrace
TARGETS += openat2
+TARGETS += rlimits
TARGETS += rseq
TARGETS += rtc
TARGETS += seccomp
diff --git a/tools/testing/selftests/arm64/fp/sve-probe-vls.c b/tools/testing/selftests/arm64/fp/sve-probe-vls.c
index b29cbc642c57..76e138525d55 100644
--- a/tools/testing/selftests/arm64/fp/sve-probe-vls.c
+++ b/tools/testing/selftests/arm64/fp/sve-probe-vls.c
@@ -25,7 +25,7 @@ int main(int argc, char **argv)
ksft_set_plan(2);
if (!(getauxval(AT_HWCAP) & HWCAP_SVE))
- ksft_exit_skip("SVE not available");
+ ksft_exit_skip("SVE not available\n");
/*
* Enumerate up to SVE_VQ_MAX vector lengths
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 4866f6a21901..addcfd8b615e 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -10,6 +10,7 @@ FEATURE-DUMP.libbpf
fixdep
test_dev_cgroup
/test_progs*
+!test_progs.h
test_verifier_log
feature
test_sock
@@ -30,10 +31,13 @@ test_sysctl
xdping
test_cpp
*.skel.h
+*.lskel.h
/no_alu32
/bpf_gcc
/tools
/runqslower
/bench
*.ko
+*.tmp
xdpxceiver
+xdp_redirect_multi
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 511259c2c6c5..f405b20c1e6c 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -54,6 +54,7 @@ TEST_FILES = xsk_prereqs.sh \
# Order correspond to 'make run_tests' order
TEST_PROGS := test_kmod.sh \
test_xdp_redirect.sh \
+ test_xdp_redirect_multi.sh \
test_xdp_meta.sh \
test_xdp_veth.sh \
test_offload.py \
@@ -84,7 +85,7 @@ TEST_PROGS_EXTENDED := with_addr.sh \
TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \
test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \
- xdpxceiver
+ xdpxceiver xdp_redirect_multi
TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read
@@ -312,6 +313,10 @@ SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c
LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \
linked_vars.skel.h linked_maps.skel.h
+LSKELS := kfunc_call_test.c fentry_test.c fexit_test.c fexit_sleep.c \
+ test_ksyms_module.c test_ringbuf.c atomics.c trace_printk.c
+SKEL_BLACKLIST += $$(LSKELS)
+
test_static_linked.skel.h-deps := test_static_linked1.o test_static_linked2.o
linked_funcs.skel.h-deps := linked_funcs1.o linked_funcs2.o
linked_vars.skel.h-deps := linked_vars1.o linked_vars2.o
@@ -339,6 +344,7 @@ TRUNNER_BPF_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, $$(TRUNNER_BPF_SRCS)
TRUNNER_BPF_SKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.skel.h, \
$$(filter-out $(SKEL_BLACKLIST) $(LINKED_BPF_SRCS),\
$$(TRUNNER_BPF_SRCS)))
+TRUNNER_BPF_LSKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.lskel.h, $$(LSKELS))
TRUNNER_BPF_SKELS_LINKED := $$(addprefix $$(TRUNNER_OUTPUT)/,$(LINKED_SKELS))
TEST_GEN_FILES += $$(TRUNNER_BPF_OBJS)
@@ -380,6 +386,14 @@ $(TRUNNER_BPF_SKELS): %.skel.h: %.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
$(Q)diff $$(<:.o=.linked2.o) $$(<:.o=.linked3.o)
$(Q)$$(BPFTOOL) gen skeleton $$(<:.o=.linked3.o) name $$(notdir $$(<:.o=)) > $$@
+$(TRUNNER_BPF_LSKELS): %.lskel.h: %.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
+ $$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@)
+ $(Q)$$(BPFTOOL) gen object $$(<:.o=.linked1.o) $$<
+ $(Q)$$(BPFTOOL) gen object $$(<:.o=.linked2.o) $$(<:.o=.linked1.o)
+ $(Q)$$(BPFTOOL) gen object $$(<:.o=.linked3.o) $$(<:.o=.linked2.o)
+ $(Q)diff $$(<:.o=.linked2.o) $$(<:.o=.linked3.o)
+ $(Q)$$(BPFTOOL) gen skeleton -L $$(<:.o=.linked3.o) name $$(notdir $$(<:.o=)) > $$@
+
$(TRUNNER_BPF_SKELS_LINKED): $(TRUNNER_BPF_OBJS) $(BPFTOOL) | $(TRUNNER_OUTPUT)
$$(call msg,LINK-BPF,$(TRUNNER_BINARY),$$(@:.skel.h=.o))
$(Q)$$(BPFTOOL) gen object $$(@:.skel.h=.linked1.o) $$(addprefix $(TRUNNER_OUTPUT)/,$$($$(@F)-deps))
@@ -409,6 +423,7 @@ $(TRUNNER_TEST_OBJS): $(TRUNNER_OUTPUT)/%.test.o: \
$(TRUNNER_EXTRA_HDRS) \
$(TRUNNER_BPF_OBJS) \
$(TRUNNER_BPF_SKELS) \
+ $(TRUNNER_BPF_LSKELS) \
$(TRUNNER_BPF_SKELS_LINKED) \
$$(BPFOBJ) | $(TRUNNER_OUTPUT)
$$(call msg,TEST-OBJ,$(TRUNNER_BINARY),$$@)
@@ -516,6 +531,6 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUTPUT)/testing_helpers.o \
EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \
prog_tests/tests.h map_tests/tests.h verifier/tests.h \
feature \
- $(addprefix $(OUTPUT)/,*.o *.skel.h no_alu32 bpf_gcc bpf_testmod.ko)
+ $(addprefix $(OUTPUT)/,*.o *.skel.h *.lskel.h no_alu32 bpf_gcc bpf_testmod.ko)
.PHONY: docs docs-clean
diff --git a/tools/testing/selftests/bpf/Makefile.docs b/tools/testing/selftests/bpf/Makefile.docs
index ccf260021e83..eb6a4fea8c79 100644
--- a/tools/testing/selftests/bpf/Makefile.docs
+++ b/tools/testing/selftests/bpf/Makefile.docs
@@ -52,7 +52,8 @@ $(OUTPUT)%.$2: $(OUTPUT)%.rst
ifndef RST2MAN_DEP
$$(error "rst2man not found, but required to generate man pages")
endif
- $$(QUIET_GEN)rst2man $$< > $$@
+ $$(QUIET_GEN)rst2man --exit-status=1 $$< > $$@.tmp
+ $$(QUIET_GEN)mv $$@.tmp $$@
docs-clean-$1:
$$(call QUIET_CLEAN, eBPF_$1-manpage)
diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst
index 3353778c30f8..8deec1ca9150 100644
--- a/tools/testing/selftests/bpf/README.rst
+++ b/tools/testing/selftests/bpf/README.rst
@@ -202,3 +202,22 @@ generate valid BTF information for weak variables. Please make sure you use
Clang that contains the fix.
__ https://reviews.llvm.org/D100362
+
+Clang relocation changes
+========================
+
+Clang 13 patch `clang reloc patch`_ made some changes on relocations such
+that existing relocation types are broken into more types and
+each new type corresponds to only one way to resolve relocation.
+See `kernel llvm reloc`_ for more explanation and some examples.
+Using clang 13 to compile old libbpf which has static linker support,
+there will be a compilation failure::
+
+ libbpf: ELF relo #0 in section #6 has unexpected type 2 in .../bpf_tcp_nogpl.o
+
+Here, ``type 2`` refers to new relocation type ``R_BPF_64_ABS64``.
+To fix this issue, user newer libbpf.
+
+.. Links
+.. _clang reloc patch: https://reviews.llvm.org/D102712
+.. _kernel llvm reloc: /Documentation/bpf/llvm_reloc.rst
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index 332ed2f7b402..6ea15b93a2f8 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -43,6 +43,7 @@ void setup_libbpf()
{
int err;
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
libbpf_set_print(libbpf_print_fn);
err = bump_memlock_rlimit();
diff --git a/tools/testing/selftests/bpf/benchs/bench_rename.c b/tools/testing/selftests/bpf/benchs/bench_rename.c
index a967674098ad..c7ec114eca56 100644
--- a/tools/testing/selftests/bpf/benchs/bench_rename.c
+++ b/tools/testing/selftests/bpf/benchs/bench_rename.c
@@ -65,7 +65,7 @@ static void attach_bpf(struct bpf_program *prog)
struct bpf_link *link;
link = bpf_program__attach(prog);
- if (IS_ERR(link)) {
+ if (!link) {
fprintf(stderr, "failed to attach program!\n");
exit(1);
}
diff --git a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
index bde6c9d4cbd4..d167bffac679 100644
--- a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
+++ b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
@@ -181,7 +181,7 @@ static void ringbuf_libbpf_setup()
}
link = bpf_program__attach(ctx->skel->progs.bench_ringbuf);
- if (IS_ERR(link)) {
+ if (!link) {
fprintf(stderr, "failed to attach program!\n");
exit(1);
}
@@ -271,7 +271,7 @@ static void ringbuf_custom_setup()
}
link = bpf_program__attach(ctx->skel->progs.bench_ringbuf);
- if (IS_ERR(link)) {
+ if (!link) {
fprintf(stderr, "failed to attach program\n");
exit(1);
}
@@ -430,7 +430,7 @@ static void perfbuf_libbpf_setup()
}
link = bpf_program__attach(ctx->skel->progs.bench_perfbuf);
- if (IS_ERR(link)) {
+ if (!link) {
fprintf(stderr, "failed to attach program\n");
exit(1);
}
diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c
index 2a0b6c9885a4..f41a491a8cc0 100644
--- a/tools/testing/selftests/bpf/benchs/bench_trigger.c
+++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
@@ -60,7 +60,7 @@ static void attach_bpf(struct bpf_program *prog)
struct bpf_link *link;
link = bpf_program__attach(prog);
- if (IS_ERR(link)) {
+ if (!link) {
fprintf(stderr, "failed to attach program!\n");
exit(1);
}
diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index 12ee40284da0..2060bc122c53 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -40,7 +40,7 @@ struct ipv6_packet pkt_v6 = {
.tcp.doff = 5,
};
-static int settimeo(int fd, int timeout_ms)
+int settimeo(int fd, int timeout_ms)
{
struct timeval timeout = { .tv_sec = 3 };
diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h
index 7205f8afdba1..5e0d51c07b63 100644
--- a/tools/testing/selftests/bpf/network_helpers.h
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -33,6 +33,7 @@ struct ipv6_packet {
} __packed;
extern struct ipv6_packet pkt_v6;
+int settimeo(int fd, int timeout_ms);
int start_server(int family, int type, const char *addr, __u16 port,
int timeout_ms);
int connect_to_fd(int server_fd, int timeout_ms);
diff --git a/tools/testing/selftests/bpf/prog_tests/atomics.c b/tools/testing/selftests/bpf/prog_tests/atomics.c
index 21efe7bbf10d..ba0e1efe5a45 100644
--- a/tools/testing/selftests/bpf/prog_tests/atomics.c
+++ b/tools/testing/selftests/bpf/prog_tests/atomics.c
@@ -2,19 +2,19 @@
#include <test_progs.h>
-#include "atomics.skel.h"
+#include "atomics.lskel.h"
static void test_add(struct atomics *skel)
{
int err, prog_fd;
__u32 duration = 0, retval;
- struct bpf_link *link;
+ int link_fd;
- link = bpf_program__attach(skel->progs.add);
- if (CHECK(IS_ERR(link), "attach(add)", "err: %ld\n", PTR_ERR(link)))
+ link_fd = atomics__add__attach(skel);
+ if (!ASSERT_GT(link_fd, 0, "attach(add)"))
return;
- prog_fd = bpf_program__fd(skel->progs.add);
+ prog_fd = skel->progs.add.prog_fd;
err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
NULL, NULL, &retval, &duration);
if (CHECK(err || retval, "test_run add",
@@ -33,20 +33,20 @@ static void test_add(struct atomics *skel)
ASSERT_EQ(skel->data->add_noreturn_value, 3, "add_noreturn_value");
cleanup:
- bpf_link__destroy(link);
+ close(link_fd);
}
static void test_sub(struct atomics *skel)
{
int err, prog_fd;
__u32 duration = 0, retval;
- struct bpf_link *link;
+ int link_fd;
- link = bpf_program__attach(skel->progs.sub);
- if (CHECK(IS_ERR(link), "attach(sub)", "err: %ld\n", PTR_ERR(link)))
+ link_fd = atomics__sub__attach(skel);
+ if (!ASSERT_GT(link_fd, 0, "attach(sub)"))
return;
- prog_fd = bpf_program__fd(skel->progs.sub);
+ prog_fd = skel->progs.sub.prog_fd;
err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
NULL, NULL, &retval, &duration);
if (CHECK(err || retval, "test_run sub",
@@ -66,20 +66,20 @@ static void test_sub(struct atomics *skel)
ASSERT_EQ(skel->data->sub_noreturn_value, -1, "sub_noreturn_value");
cleanup:
- bpf_link__destroy(link);
+ close(link_fd);
}
static void test_and(struct atomics *skel)
{
int err, prog_fd;
__u32 duration = 0, retval;
- struct bpf_link *link;
+ int link_fd;
- link = bpf_program__attach(skel->progs.and);
- if (CHECK(IS_ERR(link), "attach(and)", "err: %ld\n", PTR_ERR(link)))
+ link_fd = atomics__and__attach(skel);
+ if (!ASSERT_GT(link_fd, 0, "attach(and)"))
return;
- prog_fd = bpf_program__fd(skel->progs.and);
+ prog_fd = skel->progs.and.prog_fd;
err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
NULL, NULL, &retval, &duration);
if (CHECK(err || retval, "test_run and",
@@ -94,20 +94,20 @@ static void test_and(struct atomics *skel)
ASSERT_EQ(skel->data->and_noreturn_value, 0x010ull << 32, "and_noreturn_value");
cleanup:
- bpf_link__destroy(link);
+ close(link_fd);
}
static void test_or(struct atomics *skel)
{
int err, prog_fd;
__u32 duration = 0, retval;
- struct bpf_link *link;
+ int link_fd;
- link = bpf_program__attach(skel->progs.or);
- if (CHECK(IS_ERR(link), "attach(or)", "err: %ld\n", PTR_ERR(link)))
+ link_fd = atomics__or__attach(skel);
+ if (!ASSERT_GT(link_fd, 0, "attach(or)"))
return;
- prog_fd = bpf_program__fd(skel->progs.or);
+ prog_fd = skel->progs.or.prog_fd;
err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
NULL, NULL, &retval, &duration);
if (CHECK(err || retval, "test_run or",
@@ -123,20 +123,20 @@ static void test_or(struct atomics *skel)
ASSERT_EQ(skel->data->or_noreturn_value, 0x111ull << 32, "or_noreturn_value");
cleanup:
- bpf_link__destroy(link);
+ close(link_fd);
}
static void test_xor(struct atomics *skel)
{
int err, prog_fd;
__u32 duration = 0, retval;
- struct bpf_link *link;
+ int link_fd;
- link = bpf_program__attach(skel->progs.xor);
- if (CHECK(IS_ERR(link), "attach(xor)", "err: %ld\n", PTR_ERR(link)))
+ link_fd = atomics__xor__attach(skel);
+ if (!ASSERT_GT(link_fd, 0, "attach(xor)"))
return;
- prog_fd = bpf_program__fd(skel->progs.xor);
+ prog_fd = skel->progs.xor.prog_fd;
err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
NULL, NULL, &retval, &duration);
if (CHECK(err || retval, "test_run xor",
@@ -151,20 +151,20 @@ static void test_xor(struct atomics *skel)
ASSERT_EQ(skel->data->xor_noreturn_value, 0x101ull << 32, "xor_nxoreturn_value");
cleanup:
- bpf_link__destroy(link);
+ close(link_fd);
}
static void test_cmpxchg(struct atomics *skel)
{
int err, prog_fd;
__u32 duration = 0, retval;
- struct bpf_link *link;
+ int link_fd;
- link = bpf_program__attach(skel->progs.cmpxchg);
- if (CHECK(IS_ERR(link), "attach(cmpxchg)", "err: %ld\n", PTR_ERR(link)))
+ link_fd = atomics__cmpxchg__attach(skel);
+ if (!ASSERT_GT(link_fd, 0, "attach(cmpxchg)"))
return;
- prog_fd = bpf_program__fd(skel->progs.cmpxchg);
+ prog_fd = skel->progs.cmpxchg.prog_fd;
err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
NULL, NULL, &retval, &duration);
if (CHECK(err || retval, "test_run add",
@@ -180,20 +180,20 @@ static void test_cmpxchg(struct atomics *skel)
ASSERT_EQ(skel->bss->cmpxchg32_result_succeed, 1, "cmpxchg_result_succeed");
cleanup:
- bpf_link__destroy(link);
+ close(link_fd);
}
static void test_xchg(struct atomics *skel)
{
int err, prog_fd;
__u32 duration = 0, retval;
- struct bpf_link *link;
+ int link_fd;
- link = bpf_program__attach(skel->progs.xchg);
- if (CHECK(IS_ERR(link), "attach(xchg)", "err: %ld\n", PTR_ERR(link)))
+ link_fd = atomics__xchg__attach(skel);
+ if (!ASSERT_GT(link_fd, 0, "attach(xchg)"))
return;
- prog_fd = bpf_program__fd(skel->progs.xchg);
+ prog_fd = skel->progs.xchg.prog_fd;
err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
NULL, NULL, &retval, &duration);
if (CHECK(err || retval, "test_run add",
@@ -207,7 +207,7 @@ static void test_xchg(struct atomics *skel)
ASSERT_EQ(skel->bss->xchg32_result, 1, "xchg32_result");
cleanup:
- bpf_link__destroy(link);
+ close(link_fd);
}
void test_atomics(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
index 9dc4e3dfbcf3..ec11e20d2b92 100644
--- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c
+++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
@@ -85,16 +85,14 @@ void test_attach_probe(void)
kprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kprobe,
false /* retprobe */,
SYS_NANOSLEEP_KPROBE_NAME);
- if (CHECK(IS_ERR(kprobe_link), "attach_kprobe",
- "err %ld\n", PTR_ERR(kprobe_link)))
+ if (!ASSERT_OK_PTR(kprobe_link, "attach_kprobe"))
goto cleanup;
skel->links.handle_kprobe = kprobe_link;
kretprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kretprobe,
true /* retprobe */,
SYS_NANOSLEEP_KPROBE_NAME);
- if (CHECK(IS_ERR(kretprobe_link), "attach_kretprobe",
- "err %ld\n", PTR_ERR(kretprobe_link)))
+ if (!ASSERT_OK_PTR(kretprobe_link, "attach_kretprobe"))
goto cleanup;
skel->links.handle_kretprobe = kretprobe_link;
@@ -103,8 +101,7 @@ void test_attach_probe(void)
0 /* self pid */,
"/proc/self/exe",
uprobe_offset);
- if (CHECK(IS_ERR(uprobe_link), "attach_uprobe",
- "err %ld\n", PTR_ERR(uprobe_link)))
+ if (!ASSERT_OK_PTR(uprobe_link, "attach_uprobe"))
goto cleanup;
skel->links.handle_uprobe = uprobe_link;
@@ -113,8 +110,7 @@ void test_attach_probe(void)
-1 /* any pid */,
"/proc/self/exe",
uprobe_offset);
- if (CHECK(IS_ERR(uretprobe_link), "attach_uretprobe",
- "err %ld\n", PTR_ERR(uretprobe_link)))
+ if (!ASSERT_OK_PTR(uretprobe_link, "attach_uretprobe"))
goto cleanup;
skel->links.handle_uretprobe = uretprobe_link;
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 2d3590cfb5e1..1f1aade56504 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -47,7 +47,7 @@ static void do_dummy_read(struct bpf_program *prog)
int iter_fd, len;
link = bpf_program__attach_iter(prog, NULL);
- if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
return;
iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -201,7 +201,7 @@ static int do_btf_read(struct bpf_iter_task_btf *skel)
int ret = 0;
link = bpf_program__attach_iter(prog, NULL);
- if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
return ret;
iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -396,7 +396,7 @@ static void test_file_iter(void)
return;
link = bpf_program__attach_iter(skel1->progs.dump_task, NULL);
- if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
goto out;
/* unlink this path if it exists. */
@@ -502,7 +502,7 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
skel->bss->map2_id = map_info.id;
link = bpf_program__attach_iter(skel->progs.dump_bpf_map, NULL);
- if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
goto free_map2;
iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -607,14 +607,12 @@ static void test_bpf_hash_map(void)
opts.link_info = &linfo;
opts.link_info_len = sizeof(linfo);
link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
- if (CHECK(!IS_ERR(link), "attach_iter",
- "attach_iter for hashmap2 unexpected succeeded\n"))
+ if (!ASSERT_ERR_PTR(link, "attach_iter"))
goto out;
linfo.map.map_fd = bpf_map__fd(skel->maps.hashmap3);
link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
- if (CHECK(!IS_ERR(link), "attach_iter",
- "attach_iter for hashmap3 unexpected succeeded\n"))
+ if (!ASSERT_ERR_PTR(link, "attach_iter"))
goto out;
/* hashmap1 should be good, update map values here */
@@ -636,7 +634,7 @@ static void test_bpf_hash_map(void)
linfo.map.map_fd = map_fd;
link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
- if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
goto out;
iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -727,7 +725,7 @@ static void test_bpf_percpu_hash_map(void)
opts.link_info = &linfo;
opts.link_info_len = sizeof(linfo);
link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_hash_map, &opts);
- if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
goto out;
iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -798,7 +796,7 @@ static void test_bpf_array_map(void)
opts.link_info = &linfo;
opts.link_info_len = sizeof(linfo);
link = bpf_program__attach_iter(skel->progs.dump_bpf_array_map, &opts);
- if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
goto out;
iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -894,7 +892,7 @@ static void test_bpf_percpu_array_map(void)
opts.link_info = &linfo;
opts.link_info_len = sizeof(linfo);
link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_array_map, &opts);
- if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
goto out;
iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -957,7 +955,7 @@ static void test_bpf_sk_storage_delete(void)
opts.link_info_len = sizeof(linfo);
link = bpf_program__attach_iter(skel->progs.delete_bpf_sk_storage_map,
&opts);
- if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
goto out;
iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -1075,7 +1073,7 @@ static void test_bpf_sk_storage_map(void)
opts.link_info = &linfo;
opts.link_info_len = sizeof(linfo);
link = bpf_program__attach_iter(skel->progs.dump_bpf_sk_storage_map, &opts);
- if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
goto out;
iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -1128,7 +1126,7 @@ static void test_rdonly_buf_out_of_bound(void)
opts.link_info = &linfo;
opts.link_info_len = sizeof(linfo);
link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
- if (CHECK(!IS_ERR(link), "attach_iter", "unexpected success\n"))
+ if (!ASSERT_ERR_PTR(link, "attach_iter"))
bpf_link__destroy(link);
bpf_iter_test_kern5__destroy(skel);
@@ -1186,8 +1184,7 @@ static void test_task_vma(void)
skel->links.proc_maps = bpf_program__attach_iter(
skel->progs.proc_maps, NULL);
- if (CHECK(IS_ERR(skel->links.proc_maps), "bpf_program__attach_iter",
- "attach iterator failed\n")) {
+ if (!ASSERT_OK_PTR(skel->links.proc_maps, "bpf_program__attach_iter")) {
skel->links.proc_maps = NULL;
goto out;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
index e25917f04602..efe1e979affb 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
@@ -82,7 +82,7 @@ static void *server(void *arg)
bytes, total_bytes, nr_sent, errno);
done:
- if (fd != -1)
+ if (fd >= 0)
close(fd);
if (err) {
WRITE_ONCE(stop, 1);
@@ -191,8 +191,7 @@ static void test_cubic(void)
return;
link = bpf_map__attach_struct_ops(cubic_skel->maps.cubic);
- if (CHECK(IS_ERR(link), "bpf_map__attach_struct_ops", "err:%ld\n",
- PTR_ERR(link))) {
+ if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops")) {
bpf_cubic__destroy(cubic_skel);
return;
}
@@ -213,8 +212,7 @@ static void test_dctcp(void)
return;
link = bpf_map__attach_struct_ops(dctcp_skel->maps.dctcp);
- if (CHECK(IS_ERR(link), "bpf_map__attach_struct_ops", "err:%ld\n",
- PTR_ERR(link))) {
+ if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops")) {
bpf_dctcp__destroy(dctcp_skel);
return;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index 0457ae32b270..857e3f26086f 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -3811,7 +3811,7 @@ static void do_test_raw(unsigned int test_num)
always_log);
free(raw_btf);
- err = ((btf_fd == -1) != test->btf_load_err);
+ err = ((btf_fd < 0) != test->btf_load_err);
if (CHECK(err, "btf_fd:%d test->btf_load_err:%u",
btf_fd, test->btf_load_err) ||
CHECK(test->err_str && !strstr(btf_log_buf, test->err_str),
@@ -3820,7 +3820,7 @@ static void do_test_raw(unsigned int test_num)
goto done;
}
- if (err || btf_fd == -1)
+ if (err || btf_fd < 0)
goto done;
create_attr.name = test->map_name;
@@ -3834,16 +3834,16 @@ static void do_test_raw(unsigned int test_num)
map_fd = bpf_create_map_xattr(&create_attr);
- err = ((map_fd == -1) != test->map_create_err);
+ err = ((map_fd < 0) != test->map_create_err);
CHECK(err, "map_fd:%d test->map_create_err:%u",
map_fd, test->map_create_err);
done:
if (*btf_log_buf && (err || always_log))
fprintf(stderr, "\n%s", btf_log_buf);
- if (btf_fd != -1)
+ if (btf_fd >= 0)
close(btf_fd);
- if (map_fd != -1)
+ if (map_fd >= 0)
close(map_fd);
}
@@ -3941,7 +3941,7 @@ static int test_big_btf_info(unsigned int test_num)
btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
btf_log_buf, BTF_LOG_BUF_SIZE,
always_log);
- if (CHECK(btf_fd == -1, "errno:%d", errno)) {
+ if (CHECK(btf_fd < 0, "errno:%d", errno)) {
err = -1;
goto done;
}
@@ -3987,7 +3987,7 @@ done:
free(raw_btf);
free(user_btf);
- if (btf_fd != -1)
+ if (btf_fd >= 0)
close(btf_fd);
return err;
@@ -4029,7 +4029,7 @@ static int test_btf_id(unsigned int test_num)
btf_fd[0] = bpf_load_btf(raw_btf, raw_btf_size,
btf_log_buf, BTF_LOG_BUF_SIZE,
always_log);
- if (CHECK(btf_fd[0] == -1, "errno:%d", errno)) {
+ if (CHECK(btf_fd[0] < 0, "errno:%d", errno)) {
err = -1;
goto done;
}
@@ -4043,7 +4043,7 @@ static int test_btf_id(unsigned int test_num)
}
btf_fd[1] = bpf_btf_get_fd_by_id(info[0].id);
- if (CHECK(btf_fd[1] == -1, "errno:%d", errno)) {
+ if (CHECK(btf_fd[1] < 0, "errno:%d", errno)) {
err = -1;
goto done;
}
@@ -4071,7 +4071,7 @@ static int test_btf_id(unsigned int test_num)
create_attr.btf_value_type_id = 2;
map_fd = bpf_create_map_xattr(&create_attr);
- if (CHECK(map_fd == -1, "errno:%d", errno)) {
+ if (CHECK(map_fd < 0, "errno:%d", errno)) {
err = -1;
goto done;
}
@@ -4094,7 +4094,7 @@ static int test_btf_id(unsigned int test_num)
/* Test BTF ID is removed from the kernel */
btf_fd[0] = bpf_btf_get_fd_by_id(map_info.btf_id);
- if (CHECK(btf_fd[0] == -1, "errno:%d", errno)) {
+ if (CHECK(btf_fd[0] < 0, "errno:%d", errno)) {
err = -1;
goto done;
}
@@ -4105,7 +4105,7 @@ static int test_btf_id(unsigned int test_num)
close(map_fd);
map_fd = -1;
btf_fd[0] = bpf_btf_get_fd_by_id(map_info.btf_id);
- if (CHECK(btf_fd[0] != -1, "BTF lingers")) {
+ if (CHECK(btf_fd[0] >= 0, "BTF lingers")) {
err = -1;
goto done;
}
@@ -4117,11 +4117,11 @@ done:
fprintf(stderr, "\n%s", btf_log_buf);
free(raw_btf);
- if (map_fd != -1)
+ if (map_fd >= 0)
close(map_fd);
for (i = 0; i < 2; i++) {
free(user_btf[i]);
- if (btf_fd[i] != -1)
+ if (btf_fd[i] >= 0)
close(btf_fd[i]);
}
@@ -4166,7 +4166,7 @@ static void do_test_get_info(unsigned int test_num)
btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
btf_log_buf, BTF_LOG_BUF_SIZE,
always_log);
- if (CHECK(btf_fd == -1, "errno:%d", errno)) {
+ if (CHECK(btf_fd <= 0, "errno:%d", errno)) {
err = -1;
goto done;
}
@@ -4212,7 +4212,7 @@ done:
free(raw_btf);
free(user_btf);
- if (btf_fd != -1)
+ if (btf_fd >= 0)
close(btf_fd);
}
@@ -4249,8 +4249,9 @@ static void do_test_file(unsigned int test_num)
return;
btf = btf__parse_elf(test->file, &btf_ext);
- if (IS_ERR(btf)) {
- if (PTR_ERR(btf) == -ENOENT) {
+ err = libbpf_get_error(btf);
+ if (err) {
+ if (err == -ENOENT) {
printf("%s:SKIP: No ELF %s found", __func__, BTF_ELF_SEC);
test__skip();
return;
@@ -4263,7 +4264,8 @@ static void do_test_file(unsigned int test_num)
btf_ext__free(btf_ext);
obj = bpf_object__open(test->file);
- if (CHECK(IS_ERR(obj), "obj: %ld", PTR_ERR(obj)))
+ err = libbpf_get_error(obj);
+ if (CHECK(err, "obj: %d", err))
return;
prog = bpf_program__next(NULL, obj);
@@ -4298,7 +4300,7 @@ static void do_test_file(unsigned int test_num)
info_len = sizeof(struct bpf_prog_info);
err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
- if (CHECK(err == -1, "invalid get info (1st) errno:%d", errno)) {
+ if (CHECK(err < 0, "invalid get info (1st) errno:%d", errno)) {
fprintf(stderr, "%s\n", btf_log_buf);
err = -1;
goto done;
@@ -4330,7 +4332,7 @@ static void do_test_file(unsigned int test_num)
err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
- if (CHECK(err == -1, "invalid get info (2nd) errno:%d", errno)) {
+ if (CHECK(err < 0, "invalid get info (2nd) errno:%d", errno)) {
fprintf(stderr, "%s\n", btf_log_buf);
err = -1;
goto done;
@@ -4886,7 +4888,7 @@ static void do_test_pprint(int test_num)
always_log);
free(raw_btf);
- if (CHECK(btf_fd == -1, "errno:%d", errno)) {
+ if (CHECK(btf_fd < 0, "errno:%d", errno)) {
err = -1;
goto done;
}
@@ -4901,7 +4903,7 @@ static void do_test_pprint(int test_num)
create_attr.btf_value_type_id = test->value_type_id;
map_fd = bpf_create_map_xattr(&create_attr);
- if (CHECK(map_fd == -1, "errno:%d", errno)) {
+ if (CHECK(map_fd < 0, "errno:%d", errno)) {
err = -1;
goto done;
}
@@ -4982,7 +4984,7 @@ static void do_test_pprint(int test_num)
err = check_line(expected_line, nexpected_line,
sizeof(expected_line), line);
- if (err == -1)
+ if (err < 0)
goto done;
}
@@ -4998,7 +5000,7 @@ static void do_test_pprint(int test_num)
cpu, cmapv);
err = check_line(expected_line, nexpected_line,
sizeof(expected_line), line);
- if (err == -1)
+ if (err < 0)
goto done;
cmapv = cmapv + rounded_value_size;
@@ -5036,9 +5038,9 @@ done:
fprintf(stderr, "OK");
if (*btf_log_buf && (err || always_log))
fprintf(stderr, "\n%s", btf_log_buf);
- if (btf_fd != -1)
+ if (btf_fd >= 0)
close(btf_fd);
- if (map_fd != -1)
+ if (map_fd >= 0)
close(map_fd);
if (pin_file)
fclose(pin_file);
@@ -5950,7 +5952,7 @@ static int test_get_finfo(const struct prog_info_raw_test *test,
/* get necessary lens */
info_len = sizeof(struct bpf_prog_info);
err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
- if (CHECK(err == -1, "invalid get info (1st) errno:%d", errno)) {
+ if (CHECK(err < 0, "invalid get info (1st) errno:%d", errno)) {
fprintf(stderr, "%s\n", btf_log_buf);
return -1;
}
@@ -5980,7 +5982,7 @@ static int test_get_finfo(const struct prog_info_raw_test *test,
info.func_info_rec_size = rec_size;
info.func_info = ptr_to_u64(func_info);
err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
- if (CHECK(err == -1, "invalid get info (2nd) errno:%d", errno)) {
+ if (CHECK(err < 0, "invalid get info (2nd) errno:%d", errno)) {
fprintf(stderr, "%s\n", btf_log_buf);
err = -1;
goto done;
@@ -6044,7 +6046,7 @@ static int test_get_linfo(const struct prog_info_raw_test *test,
info_len = sizeof(struct bpf_prog_info);
err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
- if (CHECK(err == -1, "err:%d errno:%d", err, errno)) {
+ if (CHECK(err < 0, "err:%d errno:%d", err, errno)) {
err = -1;
goto done;
}
@@ -6123,7 +6125,7 @@ static int test_get_linfo(const struct prog_info_raw_test *test,
* Only recheck the info.*line_info* fields.
* Other fields are not the concern of this test.
*/
- if (CHECK(err == -1 ||
+ if (CHECK(err < 0 ||
info.nr_line_info != cnt ||
(jited_cnt && !info.jited_line_info) ||
info.nr_jited_line_info != jited_cnt ||
@@ -6260,7 +6262,7 @@ static void do_test_info_raw(unsigned int test_num)
always_log);
free(raw_btf);
- if (CHECK(btf_fd == -1, "invalid btf_fd errno:%d", errno)) {
+ if (CHECK(btf_fd < 0, "invalid btf_fd errno:%d", errno)) {
err = -1;
goto done;
}
@@ -6273,7 +6275,8 @@ static void do_test_info_raw(unsigned int test_num)
patched_linfo = patch_name_tbd(test->line_info,
test->str_sec, linfo_str_off,
test->str_sec_size, &linfo_size);
- if (IS_ERR(patched_linfo)) {
+ err = libbpf_get_error(patched_linfo);
+ if (err) {
fprintf(stderr, "error in creating raw bpf_line_info");
err = -1;
goto done;
@@ -6297,7 +6300,7 @@ static void do_test_info_raw(unsigned int test_num)
}
prog_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
- err = ((prog_fd == -1) != test->expected_prog_load_failure);
+ err = ((prog_fd < 0) != test->expected_prog_load_failure);
if (CHECK(err, "prog_fd:%d expected_prog_load_failure:%u errno:%d",
prog_fd, test->expected_prog_load_failure, errno) ||
CHECK(test->err_str && !strstr(btf_log_buf, test->err_str),
@@ -6306,7 +6309,7 @@ static void do_test_info_raw(unsigned int test_num)
goto done;
}
- if (prog_fd == -1)
+ if (prog_fd < 0)
goto done;
err = test_get_finfo(test, prog_fd);
@@ -6323,12 +6326,12 @@ done:
if (*btf_log_buf && (err || always_log))
fprintf(stderr, "\n%s", btf_log_buf);
- if (btf_fd != -1)
+ if (btf_fd >= 0)
close(btf_fd);
- if (prog_fd != -1)
+ if (prog_fd >= 0)
close(prog_fd);
- if (!IS_ERR(patched_linfo))
+ if (!libbpf_get_error(patched_linfo))
free(patched_linfo);
}
@@ -6839,9 +6842,9 @@ static void do_test_dedup(unsigned int test_num)
return;
test_btf = btf__new((__u8 *)raw_btf, raw_btf_size);
+ err = libbpf_get_error(test_btf);
free(raw_btf);
- if (CHECK(IS_ERR(test_btf), "invalid test_btf errno:%ld",
- PTR_ERR(test_btf))) {
+ if (CHECK(err, "invalid test_btf errno:%d", err)) {
err = -1;
goto done;
}
@@ -6853,9 +6856,9 @@ static void do_test_dedup(unsigned int test_num)
if (!raw_btf)
return;
expect_btf = btf__new((__u8 *)raw_btf, raw_btf_size);
+ err = libbpf_get_error(expect_btf);
free(raw_btf);
- if (CHECK(IS_ERR(expect_btf), "invalid expect_btf errno:%ld",
- PTR_ERR(expect_btf))) {
+ if (CHECK(err, "invalid expect_btf errno:%d", err)) {
err = -1;
goto done;
}
@@ -6966,10 +6969,8 @@ static void do_test_dedup(unsigned int test_num)
}
done:
- if (!IS_ERR(test_btf))
- btf__free(test_btf);
- if (!IS_ERR(expect_btf))
- btf__free(expect_btf);
+ btf__free(test_btf);
+ btf__free(expect_btf);
}
void test_btf(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
index 5e129dc2073c..1b90e684ff13 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
@@ -32,8 +32,9 @@ static int btf_dump_all_types(const struct btf *btf,
int err = 0, id;
d = btf_dump__new(btf, NULL, opts, btf_dump_printf);
- if (IS_ERR(d))
- return PTR_ERR(d);
+ err = libbpf_get_error(d);
+ if (err)
+ return err;
for (id = 1; id <= type_cnt; id++) {
err = btf_dump__dump_type(d, id);
@@ -56,8 +57,7 @@ static int test_btf_dump_case(int n, struct btf_dump_test_case *t)
snprintf(test_file, sizeof(test_file), "%s.o", t->file);
btf = btf__parse_elf(test_file, NULL);
- if (CHECK(IS_ERR(btf), "btf_parse_elf",
- "failed to load test BTF: %ld\n", PTR_ERR(btf))) {
+ if (!ASSERT_OK_PTR(btf, "btf_parse_elf")) {
err = -PTR_ERR(btf);
btf = NULL;
goto done;
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_write.c b/tools/testing/selftests/bpf/prog_tests/btf_write.c
index f36da15b134f..022c7d89d6f4 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_write.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_write.c
@@ -4,8 +4,6 @@
#include <bpf/btf.h>
#include "btf_helpers.h"
-static int duration = 0;
-
void test_btf_write() {
const struct btf_var_secinfo *vi;
const struct btf_type *t;
@@ -16,7 +14,7 @@ void test_btf_write() {
int id, err, str_off;
btf = btf__new_empty();
- if (CHECK(IS_ERR(btf), "new_empty", "failed: %ld\n", PTR_ERR(btf)))
+ if (!ASSERT_OK_PTR(btf, "new_empty"))
return;
str_off = btf__find_str(btf, "int");
diff --git a/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c
index 643dfa35419c..876be0ecb654 100644
--- a/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c
@@ -102,8 +102,7 @@ static void test_egress_only(int parent_cgroup_fd, int child_cgroup_fd)
*/
parent_link = bpf_program__attach_cgroup(obj->progs.egress,
parent_cgroup_fd);
- if (CHECK(IS_ERR(parent_link), "parent-cg-attach",
- "err %ld", PTR_ERR(parent_link)))
+ if (!ASSERT_OK_PTR(parent_link, "parent-cg-attach"))
goto close_bpf_object;
err = connect_send(CHILD_CGROUP);
if (CHECK(err, "first-connect-send", "errno %d", errno))
@@ -126,8 +125,7 @@ static void test_egress_only(int parent_cgroup_fd, int child_cgroup_fd)
*/
child_link = bpf_program__attach_cgroup(obj->progs.egress,
child_cgroup_fd);
- if (CHECK(IS_ERR(child_link), "child-cg-attach",
- "err %ld", PTR_ERR(child_link)))
+ if (!ASSERT_OK_PTR(child_link, "child-cg-attach"))
goto close_bpf_object;
err = connect_send(CHILD_CGROUP);
if (CHECK(err, "second-connect-send", "errno %d", errno))
@@ -147,10 +145,8 @@ static void test_egress_only(int parent_cgroup_fd, int child_cgroup_fd)
goto close_bpf_object;
close_bpf_object:
- if (!IS_ERR(parent_link))
- bpf_link__destroy(parent_link);
- if (!IS_ERR(child_link))
- bpf_link__destroy(child_link);
+ bpf_link__destroy(parent_link);
+ bpf_link__destroy(child_link);
cg_storage_multi_egress_only__destroy(obj);
}
@@ -176,18 +172,15 @@ static void test_isolated(int parent_cgroup_fd, int child_cgroup_fd)
*/
parent_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
parent_cgroup_fd);
- if (CHECK(IS_ERR(parent_egress1_link), "parent-egress1-cg-attach",
- "err %ld", PTR_ERR(parent_egress1_link)))
+ if (!ASSERT_OK_PTR(parent_egress1_link, "parent-egress1-cg-attach"))
goto close_bpf_object;
parent_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
parent_cgroup_fd);
- if (CHECK(IS_ERR(parent_egress2_link), "parent-egress2-cg-attach",
- "err %ld", PTR_ERR(parent_egress2_link)))
+ if (!ASSERT_OK_PTR(parent_egress2_link, "parent-egress2-cg-attach"))
goto close_bpf_object;
parent_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
parent_cgroup_fd);
- if (CHECK(IS_ERR(parent_ingress_link), "parent-ingress-cg-attach",
- "err %ld", PTR_ERR(parent_ingress_link)))
+ if (!ASSERT_OK_PTR(parent_ingress_link, "parent-ingress-cg-attach"))
goto close_bpf_object;
err = connect_send(CHILD_CGROUP);
if (CHECK(err, "first-connect-send", "errno %d", errno))
@@ -221,18 +214,15 @@ static void test_isolated(int parent_cgroup_fd, int child_cgroup_fd)
*/
child_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
child_cgroup_fd);
- if (CHECK(IS_ERR(child_egress1_link), "child-egress1-cg-attach",
- "err %ld", PTR_ERR(child_egress1_link)))
+ if (!ASSERT_OK_PTR(child_egress1_link, "child-egress1-cg-attach"))
goto close_bpf_object;
child_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
child_cgroup_fd);
- if (CHECK(IS_ERR(child_egress2_link), "child-egress2-cg-attach",
- "err %ld", PTR_ERR(child_egress2_link)))
+ if (!ASSERT_OK_PTR(child_egress2_link, "child-egress2-cg-attach"))
goto close_bpf_object;
child_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
child_cgroup_fd);
- if (CHECK(IS_ERR(child_ingress_link), "child-ingress-cg-attach",
- "err %ld", PTR_ERR(child_ingress_link)))
+ if (!ASSERT_OK_PTR(child_ingress_link, "child-ingress-cg-attach"))
goto close_bpf_object;
err = connect_send(CHILD_CGROUP);
if (CHECK(err, "second-connect-send", "errno %d", errno))
@@ -264,18 +254,12 @@ static void test_isolated(int parent_cgroup_fd, int child_cgroup_fd)
goto close_bpf_object;
close_bpf_object:
- if (!IS_ERR(parent_egress1_link))
- bpf_link__destroy(parent_egress1_link);
- if (!IS_ERR(parent_egress2_link))
- bpf_link__destroy(parent_egress2_link);
- if (!IS_ERR(parent_ingress_link))
- bpf_link__destroy(parent_ingress_link);
- if (!IS_ERR(child_egress1_link))
- bpf_link__destroy(child_egress1_link);
- if (!IS_ERR(child_egress2_link))
- bpf_link__destroy(child_egress2_link);
- if (!IS_ERR(child_ingress_link))
- bpf_link__destroy(child_ingress_link);
+ bpf_link__destroy(parent_egress1_link);
+ bpf_link__destroy(parent_egress2_link);
+ bpf_link__destroy(parent_ingress_link);
+ bpf_link__destroy(child_egress1_link);
+ bpf_link__destroy(child_egress2_link);
+ bpf_link__destroy(child_ingress_link);
cg_storage_multi_isolated__destroy(obj);
}
@@ -301,18 +285,15 @@ static void test_shared(int parent_cgroup_fd, int child_cgroup_fd)
*/
parent_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
parent_cgroup_fd);
- if (CHECK(IS_ERR(parent_egress1_link), "parent-egress1-cg-attach",
- "err %ld", PTR_ERR(parent_egress1_link)))
+ if (!ASSERT_OK_PTR(parent_egress1_link, "parent-egress1-cg-attach"))
goto close_bpf_object;
parent_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
parent_cgroup_fd);
- if (CHECK(IS_ERR(parent_egress2_link), "parent-egress2-cg-attach",
- "err %ld", PTR_ERR(parent_egress2_link)))
+ if (!ASSERT_OK_PTR(parent_egress2_link, "parent-egress2-cg-attach"))
goto close_bpf_object;
parent_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
parent_cgroup_fd);
- if (CHECK(IS_ERR(parent_ingress_link), "parent-ingress-cg-attach",
- "err %ld", PTR_ERR(parent_ingress_link)))
+ if (!ASSERT_OK_PTR(parent_ingress_link, "parent-ingress-cg-attach"))
goto close_bpf_object;
err = connect_send(CHILD_CGROUP);
if (CHECK(err, "first-connect-send", "errno %d", errno))
@@ -338,18 +319,15 @@ static void test_shared(int parent_cgroup_fd, int child_cgroup_fd)
*/
child_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
child_cgroup_fd);
- if (CHECK(IS_ERR(child_egress1_link), "child-egress1-cg-attach",
- "err %ld", PTR_ERR(child_egress1_link)))
+ if (!ASSERT_OK_PTR(child_egress1_link, "child-egress1-cg-attach"))
goto close_bpf_object;
child_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
child_cgroup_fd);
- if (CHECK(IS_ERR(child_egress2_link), "child-egress2-cg-attach",
- "err %ld", PTR_ERR(child_egress2_link)))
+ if (!ASSERT_OK_PTR(child_egress2_link, "child-egress2-cg-attach"))
goto close_bpf_object;
child_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
child_cgroup_fd);
- if (CHECK(IS_ERR(child_ingress_link), "child-ingress-cg-attach",
- "err %ld", PTR_ERR(child_ingress_link)))
+ if (!ASSERT_OK_PTR(child_ingress_link, "child-ingress-cg-attach"))
goto close_bpf_object;
err = connect_send(CHILD_CGROUP);
if (CHECK(err, "second-connect-send", "errno %d", errno))
@@ -375,18 +353,12 @@ static void test_shared(int parent_cgroup_fd, int child_cgroup_fd)
goto close_bpf_object;
close_bpf_object:
- if (!IS_ERR(parent_egress1_link))
- bpf_link__destroy(parent_egress1_link);
- if (!IS_ERR(parent_egress2_link))
- bpf_link__destroy(parent_egress2_link);
- if (!IS_ERR(parent_ingress_link))
- bpf_link__destroy(parent_ingress_link);
- if (!IS_ERR(child_egress1_link))
- bpf_link__destroy(child_egress1_link);
- if (!IS_ERR(child_egress2_link))
- bpf_link__destroy(child_egress2_link);
- if (!IS_ERR(child_ingress_link))
- bpf_link__destroy(child_ingress_link);
+ bpf_link__destroy(parent_egress1_link);
+ bpf_link__destroy(parent_egress2_link);
+ bpf_link__destroy(parent_ingress_link);
+ bpf_link__destroy(child_egress1_link);
+ bpf_link__destroy(child_egress2_link);
+ bpf_link__destroy(child_ingress_link);
cg_storage_multi_shared__destroy(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
index 0a1fc9816cef..20bb8831dda6 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
@@ -167,7 +167,7 @@ void test_cgroup_attach_multi(void)
prog_cnt = 2;
CHECK_FAIL(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS,
BPF_F_QUERY_EFFECTIVE, &attach_flags,
- prog_ids, &prog_cnt) != -1);
+ prog_ids, &prog_cnt) >= 0);
CHECK_FAIL(errno != ENOSPC);
CHECK_FAIL(prog_cnt != 4);
/* check that prog_ids are returned even when buffer is too small */
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
index 736796e56ed1..9091524131d6 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
@@ -65,8 +65,7 @@ void test_cgroup_link(void)
for (i = 0; i < cg_nr; i++) {
links[i] = bpf_program__attach_cgroup(skel->progs.egress,
cgs[i].fd);
- if (CHECK(IS_ERR(links[i]), "cg_attach", "i: %d, err: %ld\n",
- i, PTR_ERR(links[i])))
+ if (!ASSERT_OK_PTR(links[i], "cg_attach"))
goto cleanup;
}
@@ -121,8 +120,7 @@ void test_cgroup_link(void)
links[last_cg] = bpf_program__attach_cgroup(skel->progs.egress,
cgs[last_cg].fd);
- if (CHECK(IS_ERR(links[last_cg]), "cg_attach", "err: %ld\n",
- PTR_ERR(links[last_cg])))
+ if (!ASSERT_OK_PTR(links[last_cg], "cg_attach"))
goto cleanup;
ping_and_check(cg_nr + 1, 0);
@@ -147,7 +145,7 @@ void test_cgroup_link(void)
/* attempt to mix in with multi-attach bpf_link */
tmp_link = bpf_program__attach_cgroup(skel->progs.egress,
cgs[last_cg].fd);
- if (CHECK(!IS_ERR(tmp_link), "cg_attach_fail", "unexpected success!\n")) {
+ if (!ASSERT_ERR_PTR(tmp_link, "cg_attach_fail")) {
bpf_link__destroy(tmp_link);
goto cleanup;
}
@@ -165,8 +163,7 @@ void test_cgroup_link(void)
/* attach back link-based one */
links[last_cg] = bpf_program__attach_cgroup(skel->progs.egress,
cgs[last_cg].fd);
- if (CHECK(IS_ERR(links[last_cg]), "cg_attach", "err: %ld\n",
- PTR_ERR(links[last_cg])))
+ if (!ASSERT_OK_PTR(links[last_cg], "cg_attach"))
goto cleanup;
ping_and_check(cg_nr, 0);
@@ -249,8 +246,7 @@ cleanup:
BPF_CGROUP_INET_EGRESS);
for (i = 0; i < cg_nr; i++) {
- if (!IS_ERR(links[i]))
- bpf_link__destroy(links[i]);
+ bpf_link__destroy(links[i]);
}
test_cgroup_link__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c
index 464edc1c1708..b9dc4ec655b5 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c
@@ -60,7 +60,7 @@ static void run_cgroup_bpf_test(const char *cg_path, int out_sk)
goto cleanup;
link = bpf_program__attach_cgroup(skel->progs.ingress_lookup, cgfd);
- if (CHECK(IS_ERR(link), "cgroup_attach", "err: %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "cgroup_attach"))
goto cleanup;
run_lookup_test(&skel->bss->g_serv_port, out_sk);
diff --git a/tools/testing/selftests/bpf/prog_tests/check_mtu.c b/tools/testing/selftests/bpf/prog_tests/check_mtu.c
index b62a39315336..012068f33a0a 100644
--- a/tools/testing/selftests/bpf/prog_tests/check_mtu.c
+++ b/tools/testing/selftests/bpf/prog_tests/check_mtu.c
@@ -53,7 +53,7 @@ static void test_check_mtu_xdp_attach(void)
prog = skel->progs.xdp_use_helper_basic;
link = bpf_program__attach_xdp(prog, IFINDEX_LO);
- if (CHECK(IS_ERR(link), "link_attach", "failed: %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "link_attach"))
goto out;
skel->links.xdp_use_helper_basic = link;
diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index 607710826dca..d02e064c535f 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -369,8 +369,7 @@ static int setup_type_id_case_local(struct core_reloc_test_case *test)
const char *name;
int i;
- if (CHECK(IS_ERR(local_btf), "local_btf", "failed: %ld\n", PTR_ERR(local_btf)) ||
- CHECK(IS_ERR(targ_btf), "targ_btf", "failed: %ld\n", PTR_ERR(targ_btf))) {
+ if (!ASSERT_OK_PTR(local_btf, "local_btf") || !ASSERT_OK_PTR(targ_btf, "targ_btf")) {
btf__free(local_btf);
btf__free(targ_btf);
return -EINVAL;
@@ -848,8 +847,7 @@ void test_core_reloc(void)
}
obj = bpf_object__open_file(test_case->bpf_obj_file, NULL);
- if (CHECK(IS_ERR(obj), "obj_open", "failed to open '%s': %ld\n",
- test_case->bpf_obj_file, PTR_ERR(obj)))
+ if (!ASSERT_OK_PTR(obj, "obj_open"))
continue;
probe_name = "raw_tracepoint/sys_enter";
@@ -899,8 +897,7 @@ void test_core_reloc(void)
data->my_pid_tgid = my_pid_tgid;
link = bpf_program__attach_raw_tracepoint(prog, tp_name);
- if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n",
- PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_raw_tp"))
goto cleanup;
/* trigger test run */
@@ -941,10 +938,8 @@ cleanup:
CHECK_FAIL(munmap(mmap_data, mmap_sz));
mmap_data = NULL;
}
- if (!IS_ERR_OR_NULL(link)) {
- bpf_link__destroy(link);
- link = NULL;
- }
+ bpf_link__destroy(link);
+ link = NULL;
bpf_object__close(obj);
}
}
diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
index 109d0345a2be..91154c2ba256 100644
--- a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
@@ -1,8 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#include <test_progs.h>
-#include "fentry_test.skel.h"
-#include "fexit_test.skel.h"
+#include "fentry_test.lskel.h"
+#include "fexit_test.lskel.h"
void test_fentry_fexit(void)
{
@@ -26,7 +26,7 @@ void test_fentry_fexit(void)
if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err))
goto close_prog;
- prog_fd = bpf_program__fd(fexit_skel->progs.test1);
+ prog_fd = fexit_skel->progs.test1.prog_fd;
err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
NULL, NULL, &retval, &duration);
CHECK(err || retval, "ipv6",
diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_test.c b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
index 7cb111b11995..174c89e7456e 100644
--- a/tools/testing/selftests/bpf/prog_tests/fentry_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
@@ -1,13 +1,13 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#include <test_progs.h>
-#include "fentry_test.skel.h"
+#include "fentry_test.lskel.h"
static int fentry_test(struct fentry_test *fentry_skel)
{
int err, prog_fd, i;
__u32 duration = 0, retval;
- struct bpf_link *link;
+ int link_fd;
__u64 *result;
err = fentry_test__attach(fentry_skel);
@@ -15,11 +15,11 @@ static int fentry_test(struct fentry_test *fentry_skel)
return err;
/* Check that already linked program can't be attached again. */
- link = bpf_program__attach(fentry_skel->progs.test1);
- if (!ASSERT_ERR_PTR(link, "fentry_attach_link"))
+ link_fd = fentry_test__test1__attach(fentry_skel);
+ if (!ASSERT_LT(link_fd, 0, "fentry_attach_link"))
return -1;
- prog_fd = bpf_program__fd(fentry_skel->progs.test1);
+ prog_fd = fentry_skel->progs.test1.prog_fd;
err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
NULL, NULL, &retval, &duration);
ASSERT_OK(err, "test_run");
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
index 63990842d20f..73b4c76e6b86 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
@@ -146,10 +146,8 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
close_prog:
for (i = 0; i < prog_cnt; i++)
- if (!IS_ERR_OR_NULL(link[i]))
- bpf_link__destroy(link[i]);
- if (!IS_ERR_OR_NULL(obj))
- bpf_object__close(obj);
+ bpf_link__destroy(link[i]);
+ bpf_object__close(obj);
bpf_object__close(tgt_obj);
free(link);
free(prog);
@@ -231,7 +229,7 @@ static int test_second_attach(struct bpf_object *obj)
return err;
link = bpf_program__attach_freplace(prog, tgt_fd, tgt_name);
- if (CHECK(IS_ERR(link), "second_link", "failed to attach second link prog_fd %d tgt_fd %d\n", bpf_program__fd(prog), tgt_fd))
+ if (!ASSERT_OK_PTR(link, "second_link"))
goto out;
err = bpf_prog_test_run(tgt_fd, 1, &pkt_v6, sizeof(pkt_v6),
@@ -283,9 +281,7 @@ static void test_fmod_ret_freplace(void)
opts.attach_prog_fd = pkt_fd;
freplace_obj = bpf_object__open_file(freplace_name, &opts);
- if (CHECK(IS_ERR_OR_NULL(freplace_obj), "freplace_obj_open",
- "failed to open %s: %ld\n", freplace_name,
- PTR_ERR(freplace_obj)))
+ if (!ASSERT_OK_PTR(freplace_obj, "freplace_obj_open"))
goto out;
err = bpf_object__load(freplace_obj);
@@ -294,14 +290,12 @@ static void test_fmod_ret_freplace(void)
prog = bpf_program__next(NULL, freplace_obj);
freplace_link = bpf_program__attach_trace(prog);
- if (CHECK(IS_ERR(freplace_link), "freplace_attach_trace", "failed to link\n"))
+ if (!ASSERT_OK_PTR(freplace_link, "freplace_attach_trace"))
goto out;
opts.attach_prog_fd = bpf_program__fd(prog);
fmod_obj = bpf_object__open_file(fmod_ret_name, &opts);
- if (CHECK(IS_ERR_OR_NULL(fmod_obj), "fmod_obj_open",
- "failed to open %s: %ld\n", fmod_ret_name,
- PTR_ERR(fmod_obj)))
+ if (!ASSERT_OK_PTR(fmod_obj, "fmod_obj_open"))
goto out;
err = bpf_object__load(fmod_obj);
@@ -350,9 +344,7 @@ static void test_obj_load_failure_common(const char *obj_file,
);
obj = bpf_object__open_file(obj_file, &opts);
- if (CHECK(IS_ERR_OR_NULL(obj), "obj_open",
- "failed to open %s: %ld\n", obj_file,
- PTR_ERR(obj)))
+ if (!ASSERT_OK_PTR(obj, "obj_open"))
goto close_prog;
/* It should fail to load the program */
@@ -361,8 +353,7 @@ static void test_obj_load_failure_common(const char *obj_file,
goto close_prog;
close_prog:
- if (!IS_ERR_OR_NULL(obj))
- bpf_object__close(obj);
+ bpf_object__close(obj);
bpf_object__close(pkt_obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c
index ccc7e8a34ab6..4e7f4b42ea29 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c
@@ -6,7 +6,7 @@
#include <time.h>
#include <sys/mman.h>
#include <sys/syscall.h>
-#include "fexit_sleep.skel.h"
+#include "fexit_sleep.lskel.h"
static int do_sleep(void *skel)
{
@@ -58,8 +58,8 @@ void test_fexit_sleep(void)
* waiting for percpu_ref_kill to confirm). The other one
* will be freed quickly.
*/
- close(bpf_program__fd(fexit_skel->progs.nanosleep_fentry));
- close(bpf_program__fd(fexit_skel->progs.nanosleep_fexit));
+ close(fexit_skel->progs.nanosleep_fentry.prog_fd);
+ close(fexit_skel->progs.nanosleep_fexit.prog_fd);
fexit_sleep__detach(fexit_skel);
/* kill the thread to unwind sys_nanosleep stack through the trampoline */
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_test.c b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
index 6792e41f7f69..af3dba726701 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
@@ -1,13 +1,13 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#include <test_progs.h>
-#include "fexit_test.skel.h"
+#include "fexit_test.lskel.h"
static int fexit_test(struct fexit_test *fexit_skel)
{
int err, prog_fd, i;
__u32 duration = 0, retval;
- struct bpf_link *link;
+ int link_fd;
__u64 *result;
err = fexit_test__attach(fexit_skel);
@@ -15,11 +15,11 @@ static int fexit_test(struct fexit_test *fexit_skel)
return err;
/* Check that already linked program can't be attached again. */
- link = bpf_program__attach(fexit_skel->progs.test1);
- if (!ASSERT_ERR_PTR(link, "fexit_attach_link"))
+ link_fd = fexit_test__test1__attach(fexit_skel);
+ if (!ASSERT_LT(link_fd, 0, "fexit_attach_link"))
return -1;
- prog_fd = bpf_program__fd(fexit_skel->progs.test1);
+ prog_fd = fexit_skel->progs.test1.prog_fd;
err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
NULL, NULL, &retval, &duration);
ASSERT_OK(err, "test_run");
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
index cd6dc80edf18..225714f71ac6 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
@@ -541,7 +541,7 @@ static void test_skb_less_link_create(struct bpf_flow *skel, int tap_fd)
return;
link = bpf_program__attach_netns(skel->progs._dissect, net_fd);
- if (CHECK(IS_ERR(link), "attach_netns", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_netns"))
goto out_close;
run_tests_skb_less(tap_fd, skel->maps.last_dissection);
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c
index 172c586b6996..3931ede5c534 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c
@@ -134,9 +134,9 @@ static void test_link_create_link_create(int netns, int prog1, int prog2)
/* Expect failure creating link when another link exists */
errno = 0;
link2 = bpf_link_create(prog2, netns, BPF_FLOW_DISSECTOR, &opts);
- if (CHECK_FAIL(link2 != -1 || errno != E2BIG))
+ if (CHECK_FAIL(link2 >= 0 || errno != E2BIG))
perror("bpf_prog_attach(prog2) expected E2BIG");
- if (link2 != -1)
+ if (link2 >= 0)
close(link2);
CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
@@ -159,9 +159,9 @@ static void test_prog_attach_link_create(int netns, int prog1, int prog2)
/* Expect failure creating link when prog attached */
errno = 0;
link = bpf_link_create(prog2, netns, BPF_FLOW_DISSECTOR, &opts);
- if (CHECK_FAIL(link != -1 || errno != EEXIST))
+ if (CHECK_FAIL(link >= 0 || errno != EEXIST))
perror("bpf_link_create(prog2) expected EEXIST");
- if (link != -1)
+ if (link >= 0)
close(link);
CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
@@ -623,7 +623,7 @@ static void run_tests(int netns)
}
out_close:
for (i = 0; i < ARRAY_SIZE(progs); i++) {
- if (progs[i] != -1)
+ if (progs[i] >= 0)
CHECK_FAIL(close(progs[i]));
}
}
diff --git a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
index 925722217edf..522237aa4470 100644
--- a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
+++ b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
@@ -121,12 +121,12 @@ void test_get_stack_raw_tp(void)
goto close_prog;
link = bpf_program__attach_raw_tracepoint(prog, "sys_enter");
- if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_raw_tp"))
goto close_prog;
pb_opts.sample_cb = get_stack_print_output;
pb = perf_buffer__new(bpf_map__fd(map), 8, &pb_opts);
- if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
+ if (!ASSERT_OK_PTR(pb, "perf_buf__new"))
goto close_prog;
/* trigger some syscall action */
@@ -141,9 +141,7 @@ void test_get_stack_raw_tp(void)
}
close_prog:
- if (!IS_ERR_OR_NULL(link))
- bpf_link__destroy(link);
- if (!IS_ERR_OR_NULL(pb))
- perf_buffer__free(pb);
+ bpf_link__destroy(link);
+ perf_buffer__free(pb);
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c b/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c
index d884b2ed5bc5..8d5a6023a1bb 100644
--- a/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c
@@ -48,8 +48,7 @@ void test_get_stackid_cannot_attach(void)
skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
pmu_fd);
- CHECK(!IS_ERR(skel->links.oncpu), "attach_perf_event_no_callchain",
- "should have failed\n");
+ ASSERT_ERR_PTR(skel->links.oncpu, "attach_perf_event_no_callchain");
close(pmu_fd);
/* add PERF_SAMPLE_CALLCHAIN, attach should succeed */
@@ -65,8 +64,7 @@ void test_get_stackid_cannot_attach(void)
skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
pmu_fd);
- CHECK(IS_ERR(skel->links.oncpu), "attach_perf_event_callchain",
- "err: %ld\n", PTR_ERR(skel->links.oncpu));
+ ASSERT_OK_PTR(skel->links.oncpu, "attach_perf_event_callchain");
close(pmu_fd);
/* add exclude_callchain_kernel, attach should fail */
@@ -82,8 +80,7 @@ void test_get_stackid_cannot_attach(void)
skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
pmu_fd);
- CHECK(!IS_ERR(skel->links.oncpu), "attach_perf_event_exclude_callchain_kernel",
- "should have failed\n");
+ ASSERT_ERR_PTR(skel->links.oncpu, "attach_perf_event_exclude_callchain_kernel");
close(pmu_fd);
cleanup:
diff --git a/tools/testing/selftests/bpf/prog_tests/hashmap.c b/tools/testing/selftests/bpf/prog_tests/hashmap.c
index 428d488830c6..4747ab18f97f 100644
--- a/tools/testing/selftests/bpf/prog_tests/hashmap.c
+++ b/tools/testing/selftests/bpf/prog_tests/hashmap.c
@@ -48,8 +48,7 @@ static void test_hashmap_generic(void)
struct hashmap *map;
map = hashmap__new(hash_fn, equal_fn, NULL);
- if (CHECK(IS_ERR(map), "hashmap__new",
- "failed to create map: %ld\n", PTR_ERR(map)))
+ if (!ASSERT_OK_PTR(map, "hashmap__new"))
return;
for (i = 0; i < ELEM_CNT; i++) {
@@ -267,8 +266,7 @@ static void test_hashmap_multimap(void)
/* force collisions */
map = hashmap__new(collision_hash_fn, equal_fn, NULL);
- if (CHECK(IS_ERR(map), "hashmap__new",
- "failed to create map: %ld\n", PTR_ERR(map)))
+ if (!ASSERT_OK_PTR(map, "hashmap__new"))
return;
/* set up multimap:
@@ -339,8 +337,7 @@ static void test_hashmap_empty()
/* force collisions */
map = hashmap__new(hash_fn, equal_fn, NULL);
- if (CHECK(IS_ERR(map), "hashmap__new",
- "failed to create map: %ld\n", PTR_ERR(map)))
+ if (!ASSERT_OK_PTR(map, "hashmap__new"))
goto cleanup;
if (CHECK(hashmap__size(map) != 0, "hashmap__size",
diff --git a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
index d65107919998..ddfb6bf97152 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
@@ -97,15 +97,13 @@ void test_kfree_skb(void)
goto close_prog;
link = bpf_program__attach_raw_tracepoint(prog, NULL);
- if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_raw_tp"))
goto close_prog;
link_fentry = bpf_program__attach_trace(fentry);
- if (CHECK(IS_ERR(link_fentry), "attach fentry", "err %ld\n",
- PTR_ERR(link_fentry)))
+ if (!ASSERT_OK_PTR(link_fentry, "attach fentry"))
goto close_prog;
link_fexit = bpf_program__attach_trace(fexit);
- if (CHECK(IS_ERR(link_fexit), "attach fexit", "err %ld\n",
- PTR_ERR(link_fexit)))
+ if (!ASSERT_OK_PTR(link_fexit, "attach fexit"))
goto close_prog;
perf_buf_map = bpf_object__find_map_by_name(obj2, "perf_buf_map");
@@ -116,7 +114,7 @@ void test_kfree_skb(void)
pb_opts.sample_cb = on_sample;
pb_opts.ctx = &passed;
pb = perf_buffer__new(bpf_map__fd(perf_buf_map), 1, &pb_opts);
- if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
+ if (!ASSERT_OK_PTR(pb, "perf_buf__new"))
goto close_prog;
memcpy(skb.cb, &cb, sizeof(cb));
@@ -144,12 +142,9 @@ void test_kfree_skb(void)
CHECK_FAIL(!test_ok[0] || !test_ok[1]);
close_prog:
perf_buffer__free(pb);
- if (!IS_ERR_OR_NULL(link))
- bpf_link__destroy(link);
- if (!IS_ERR_OR_NULL(link_fentry))
- bpf_link__destroy(link_fentry);
- if (!IS_ERR_OR_NULL(link_fexit))
- bpf_link__destroy(link_fexit);
+ bpf_link__destroy(link);
+ bpf_link__destroy(link_fentry);
+ bpf_link__destroy(link_fexit);
bpf_object__close(obj);
bpf_object__close(obj2);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
index 7fc0951ee75f..30a7b9b837bf 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
@@ -2,7 +2,7 @@
/* Copyright (c) 2021 Facebook */
#include <test_progs.h>
#include <network_helpers.h>
-#include "kfunc_call_test.skel.h"
+#include "kfunc_call_test.lskel.h"
#include "kfunc_call_test_subprog.skel.h"
static void test_main(void)
@@ -14,13 +14,13 @@ static void test_main(void)
if (!ASSERT_OK_PTR(skel, "skel"))
return;
- prog_fd = bpf_program__fd(skel->progs.kfunc_call_test1);
+ prog_fd = skel->progs.kfunc_call_test1.prog_fd;
err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
NULL, NULL, (__u32 *)&retval, NULL);
ASSERT_OK(err, "bpf_prog_test_run(test1)");
ASSERT_EQ(retval, 12, "test1-retval");
- prog_fd = bpf_program__fd(skel->progs.kfunc_call_test2);
+ prog_fd = skel->progs.kfunc_call_test2.prog_fd;
err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
NULL, NULL, (__u32 *)&retval, NULL);
ASSERT_OK(err, "bpf_prog_test_run(test2)");
diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
index b58b775d19f3..67bebd324147 100644
--- a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
@@ -87,8 +87,7 @@ void test_ksyms_btf(void)
struct btf *btf;
btf = libbpf_find_kernel_btf();
- if (CHECK(IS_ERR(btf), "btf_exists", "failed to load kernel BTF: %ld\n",
- PTR_ERR(btf)))
+ if (!ASSERT_OK_PTR(btf, "btf_exists"))
return;
percpu_datasec = btf__find_by_name_kind(btf, ".data..percpu",
diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_module.c b/tools/testing/selftests/bpf/prog_tests/ksyms_module.c
index 4c232b456479..2cd5cded543f 100644
--- a/tools/testing/selftests/bpf/prog_tests/ksyms_module.c
+++ b/tools/testing/selftests/bpf/prog_tests/ksyms_module.c
@@ -4,7 +4,7 @@
#include <test_progs.h>
#include <bpf/libbpf.h>
#include <bpf/btf.h>
-#include "test_ksyms_module.skel.h"
+#include "test_ksyms_module.lskel.h"
static int duration;
diff --git a/tools/testing/selftests/bpf/prog_tests/link_pinning.c b/tools/testing/selftests/bpf/prog_tests/link_pinning.c
index a743288cf384..6fc97c45f71e 100644
--- a/tools/testing/selftests/bpf/prog_tests/link_pinning.c
+++ b/tools/testing/selftests/bpf/prog_tests/link_pinning.c
@@ -17,7 +17,7 @@ void test_link_pinning_subtest(struct bpf_program *prog,
int err, i;
link = bpf_program__attach(prog);
- if (CHECK(IS_ERR(link), "link_attach", "err: %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "link_attach"))
goto cleanup;
bss->in = 1;
@@ -51,7 +51,7 @@ void test_link_pinning_subtest(struct bpf_program *prog,
/* re-open link from BPFFS */
link = bpf_link__open(link_pin_path);
- if (CHECK(IS_ERR(link), "link_open", "err: %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "link_open"))
goto cleanup;
CHECK(strcmp(link_pin_path, bpf_link__pin_path(link)), "pin_path2",
@@ -84,8 +84,7 @@ void test_link_pinning_subtest(struct bpf_program *prog,
CHECK(i == 10000, "link_attached", "got to iteration #%d\n", i);
cleanup:
- if (!IS_ERR(link))
- bpf_link__destroy(link);
+ bpf_link__destroy(link);
}
void test_link_pinning(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c b/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c
new file mode 100644
index 000000000000..beebfa9730e1
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c
@@ -0,0 +1,288 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <test_progs.h>
+#include "test_lookup_and_delete.skel.h"
+
+#define START_VALUE 1234
+#define NEW_VALUE 4321
+#define MAX_ENTRIES 2
+
+static int duration;
+static int nr_cpus;
+
+static int fill_values(int map_fd)
+{
+ __u64 key, value = START_VALUE;
+ int err;
+
+ for (key = 1; key < MAX_ENTRIES + 1; key++) {
+ err = bpf_map_update_elem(map_fd, &key, &value, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
+ return -1;
+ }
+
+ return 0;
+}
+
+static int fill_values_percpu(int map_fd)
+{
+ __u64 key, value[nr_cpus];
+ int i, err;
+
+ for (i = 0; i < nr_cpus; i++)
+ value[i] = START_VALUE;
+
+ for (key = 1; key < MAX_ENTRIES + 1; key++) {
+ err = bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
+ return -1;
+ }
+
+ return 0;
+}
+
+static struct test_lookup_and_delete *setup_prog(enum bpf_map_type map_type,
+ int *map_fd)
+{
+ struct test_lookup_and_delete *skel;
+ int err;
+
+ skel = test_lookup_and_delete__open();
+ if (!ASSERT_OK_PTR(skel, "test_lookup_and_delete__open"))
+ return NULL;
+
+ err = bpf_map__set_type(skel->maps.hash_map, map_type);
+ if (!ASSERT_OK(err, "bpf_map__set_type"))
+ goto cleanup;
+
+ err = bpf_map__set_max_entries(skel->maps.hash_map, MAX_ENTRIES);
+ if (!ASSERT_OK(err, "bpf_map__set_max_entries"))
+ goto cleanup;
+
+ err = test_lookup_and_delete__load(skel);
+ if (!ASSERT_OK(err, "test_lookup_and_delete__load"))
+ goto cleanup;
+
+ *map_fd = bpf_map__fd(skel->maps.hash_map);
+ if (!ASSERT_GE(*map_fd, 0, "bpf_map__fd"))
+ goto cleanup;
+
+ return skel;
+
+cleanup:
+ test_lookup_and_delete__destroy(skel);
+ return NULL;
+}
+
+/* Triggers BPF program that updates map with given key and value */
+static int trigger_tp(struct test_lookup_and_delete *skel, __u64 key,
+ __u64 value)
+{
+ int err;
+
+ skel->bss->set_pid = getpid();
+ skel->bss->set_key = key;
+ skel->bss->set_value = value;
+
+ err = test_lookup_and_delete__attach(skel);
+ if (!ASSERT_OK(err, "test_lookup_and_delete__attach"))
+ return -1;
+
+ syscall(__NR_getpgid);
+
+ test_lookup_and_delete__detach(skel);
+
+ return 0;
+}
+
+static void test_lookup_and_delete_hash(void)
+{
+ struct test_lookup_and_delete *skel;
+ __u64 key, value;
+ int map_fd, err;
+
+ /* Setup program and fill the map. */
+ skel = setup_prog(BPF_MAP_TYPE_HASH, &map_fd);
+ if (!ASSERT_OK_PTR(skel, "setup_prog"))
+ return;
+
+ err = fill_values(map_fd);
+ if (!ASSERT_OK(err, "fill_values"))
+ goto cleanup;
+
+ /* Lookup and delete element. */
+ key = 1;
+ err = bpf_map_lookup_and_delete_elem(map_fd, &key, &value);
+ if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem"))
+ goto cleanup;
+
+ /* Fetched value should match the initially set value. */
+ if (CHECK(value != START_VALUE, "bpf_map_lookup_and_delete_elem",
+ "unexpected value=%lld\n", value))
+ goto cleanup;
+
+ /* Check that the entry is non existent. */
+ err = bpf_map_lookup_elem(map_fd, &key, &value);
+ if (!ASSERT_ERR(err, "bpf_map_lookup_elem"))
+ goto cleanup;
+
+cleanup:
+ test_lookup_and_delete__destroy(skel);
+}
+
+static void test_lookup_and_delete_percpu_hash(void)
+{
+ struct test_lookup_and_delete *skel;
+ __u64 key, val, value[nr_cpus];
+ int map_fd, err, i;
+
+ /* Setup program and fill the map. */
+ skel = setup_prog(BPF_MAP_TYPE_PERCPU_HASH, &map_fd);
+ if (!ASSERT_OK_PTR(skel, "setup_prog"))
+ return;
+
+ err = fill_values_percpu(map_fd);
+ if (!ASSERT_OK(err, "fill_values_percpu"))
+ goto cleanup;
+
+ /* Lookup and delete element. */
+ key = 1;
+ err = bpf_map_lookup_and_delete_elem(map_fd, &key, value);
+ if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem"))
+ goto cleanup;
+
+ for (i = 0; i < nr_cpus; i++) {
+ val = value[i];
+
+ /* Fetched value should match the initially set value. */
+ if (CHECK(val != START_VALUE, "map value",
+ "unexpected for cpu %d: %lld\n", i, val))
+ goto cleanup;
+ }
+
+ /* Check that the entry is non existent. */
+ err = bpf_map_lookup_elem(map_fd, &key, value);
+ if (!ASSERT_ERR(err, "bpf_map_lookup_elem"))
+ goto cleanup;
+
+cleanup:
+ test_lookup_and_delete__destroy(skel);
+}
+
+static void test_lookup_and_delete_lru_hash(void)
+{
+ struct test_lookup_and_delete *skel;
+ __u64 key, value;
+ int map_fd, err;
+
+ /* Setup program and fill the LRU map. */
+ skel = setup_prog(BPF_MAP_TYPE_LRU_HASH, &map_fd);
+ if (!ASSERT_OK_PTR(skel, "setup_prog"))
+ return;
+
+ err = fill_values(map_fd);
+ if (!ASSERT_OK(err, "fill_values"))
+ goto cleanup;
+
+ /* Insert new element at key=3, should reuse LRU element. */
+ key = 3;
+ err = trigger_tp(skel, key, NEW_VALUE);
+ if (!ASSERT_OK(err, "trigger_tp"))
+ goto cleanup;
+
+ /* Lookup and delete element 3. */
+ err = bpf_map_lookup_and_delete_elem(map_fd, &key, &value);
+ if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem"))
+ goto cleanup;
+
+ /* Value should match the new value. */
+ if (CHECK(value != NEW_VALUE, "bpf_map_lookup_and_delete_elem",
+ "unexpected value=%lld\n", value))
+ goto cleanup;
+
+ /* Check that entries 3 and 1 are non existent. */
+ err = bpf_map_lookup_elem(map_fd, &key, &value);
+ if (!ASSERT_ERR(err, "bpf_map_lookup_elem"))
+ goto cleanup;
+
+ key = 1;
+ err = bpf_map_lookup_elem(map_fd, &key, &value);
+ if (!ASSERT_ERR(err, "bpf_map_lookup_elem"))
+ goto cleanup;
+
+cleanup:
+ test_lookup_and_delete__destroy(skel);
+}
+
+static void test_lookup_and_delete_lru_percpu_hash(void)
+{
+ struct test_lookup_and_delete *skel;
+ __u64 key, val, value[nr_cpus];
+ int map_fd, err, i, cpucnt = 0;
+
+ /* Setup program and fill the LRU map. */
+ skel = setup_prog(BPF_MAP_TYPE_LRU_PERCPU_HASH, &map_fd);
+ if (!ASSERT_OK_PTR(skel, "setup_prog"))
+ return;
+
+ err = fill_values_percpu(map_fd);
+ if (!ASSERT_OK(err, "fill_values_percpu"))
+ goto cleanup;
+
+ /* Insert new element at key=3, should reuse LRU element 1. */
+ key = 3;
+ err = trigger_tp(skel, key, NEW_VALUE);
+ if (!ASSERT_OK(err, "trigger_tp"))
+ goto cleanup;
+
+ /* Clean value. */
+ for (i = 0; i < nr_cpus; i++)
+ value[i] = 0;
+
+ /* Lookup and delete element 3. */
+ err = bpf_map_lookup_and_delete_elem(map_fd, &key, value);
+ if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem")) {
+ goto cleanup;
+ }
+
+ /* Check if only one CPU has set the value. */
+ for (i = 0; i < nr_cpus; i++) {
+ val = value[i];
+ if (val) {
+ if (CHECK(val != NEW_VALUE, "map value",
+ "unexpected for cpu %d: %lld\n", i, val))
+ goto cleanup;
+ cpucnt++;
+ }
+ }
+ if (CHECK(cpucnt != 1, "map value", "set for %d CPUs instead of 1!\n",
+ cpucnt))
+ goto cleanup;
+
+ /* Check that entries 3 and 1 are non existent. */
+ err = bpf_map_lookup_elem(map_fd, &key, &value);
+ if (!ASSERT_ERR(err, "bpf_map_lookup_elem"))
+ goto cleanup;
+
+ key = 1;
+ err = bpf_map_lookup_elem(map_fd, &key, &value);
+ if (!ASSERT_ERR(err, "bpf_map_lookup_elem"))
+ goto cleanup;
+
+cleanup:
+ test_lookup_and_delete__destroy(skel);
+}
+
+void test_lookup_and_delete(void)
+{
+ nr_cpus = bpf_num_possible_cpus();
+
+ if (test__start_subtest("lookup_and_delete"))
+ test_lookup_and_delete_hash();
+ if (test__start_subtest("lookup_and_delete_percpu"))
+ test_lookup_and_delete_percpu_hash();
+ if (test__start_subtest("lookup_and_delete_lru"))
+ test_lookup_and_delete_lru_hash();
+ if (test__start_subtest("lookup_and_delete_lru_percpu"))
+ test_lookup_and_delete_lru_percpu_hash();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c b/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c
new file mode 100644
index 000000000000..59adb4715394
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c
@@ -0,0 +1,559 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Check if we can migrate child sockets.
+ *
+ * 1. call listen() for 4 server sockets.
+ * 2. call connect() for 25 client sockets.
+ * 3. call listen() for 1 server socket. (migration target)
+ * 4. update a map to migrate all child sockets
+ * to the last server socket (migrate_map[cookie] = 4)
+ * 5. call shutdown() for first 4 server sockets
+ * and migrate the requests in the accept queue
+ * to the last server socket.
+ * 6. call listen() for the second server socket.
+ * 7. call shutdown() for the last server
+ * and migrate the requests in the accept queue
+ * to the second server socket.
+ * 8. call listen() for the last server.
+ * 9. call shutdown() for the second server
+ * and migrate the requests in the accept queue
+ * to the last server socket.
+ * 10. call accept() for the last server socket.
+ *
+ * Author: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
+ */
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "test_progs.h"
+#include "test_migrate_reuseport.skel.h"
+#include "network_helpers.h"
+
+#ifndef TCP_FASTOPEN_CONNECT
+#define TCP_FASTOPEN_CONNECT 30
+#endif
+
+#define IFINDEX_LO 1
+
+#define NR_SERVERS 5
+#define NR_CLIENTS (NR_SERVERS * 5)
+#define MIGRATED_TO (NR_SERVERS - 1)
+
+/* fastopenq->max_qlen and sk->sk_max_ack_backlog */
+#define QLEN (NR_CLIENTS * 5)
+
+#define MSG "Hello World\0"
+#define MSGLEN 12
+
+static struct migrate_reuseport_test_case {
+ const char *name;
+ __s64 servers[NR_SERVERS];
+ __s64 clients[NR_CLIENTS];
+ struct sockaddr_storage addr;
+ socklen_t addrlen;
+ int family;
+ int state;
+ bool drop_ack;
+ bool expire_synack_timer;
+ bool fastopen;
+ struct bpf_link *link;
+} test_cases[] = {
+ {
+ .name = "IPv4 TCP_ESTABLISHED inet_csk_listen_stop",
+ .family = AF_INET,
+ .state = BPF_TCP_ESTABLISHED,
+ .drop_ack = false,
+ .expire_synack_timer = false,
+ .fastopen = false,
+ },
+ {
+ .name = "IPv4 TCP_SYN_RECV inet_csk_listen_stop",
+ .family = AF_INET,
+ .state = BPF_TCP_SYN_RECV,
+ .drop_ack = true,
+ .expire_synack_timer = false,
+ .fastopen = true,
+ },
+ {
+ .name = "IPv4 TCP_NEW_SYN_RECV reqsk_timer_handler",
+ .family = AF_INET,
+ .state = BPF_TCP_NEW_SYN_RECV,
+ .drop_ack = true,
+ .expire_synack_timer = true,
+ .fastopen = false,
+ },
+ {
+ .name = "IPv4 TCP_NEW_SYN_RECV inet_csk_complete_hashdance",
+ .family = AF_INET,
+ .state = BPF_TCP_NEW_SYN_RECV,
+ .drop_ack = true,
+ .expire_synack_timer = false,
+ .fastopen = false,
+ },
+ {
+ .name = "IPv6 TCP_ESTABLISHED inet_csk_listen_stop",
+ .family = AF_INET6,
+ .state = BPF_TCP_ESTABLISHED,
+ .drop_ack = false,
+ .expire_synack_timer = false,
+ .fastopen = false,
+ },
+ {
+ .name = "IPv6 TCP_SYN_RECV inet_csk_listen_stop",
+ .family = AF_INET6,
+ .state = BPF_TCP_SYN_RECV,
+ .drop_ack = true,
+ .expire_synack_timer = false,
+ .fastopen = true,
+ },
+ {
+ .name = "IPv6 TCP_NEW_SYN_RECV reqsk_timer_handler",
+ .family = AF_INET6,
+ .state = BPF_TCP_NEW_SYN_RECV,
+ .drop_ack = true,
+ .expire_synack_timer = true,
+ .fastopen = false,
+ },
+ {
+ .name = "IPv6 TCP_NEW_SYN_RECV inet_csk_complete_hashdance",
+ .family = AF_INET6,
+ .state = BPF_TCP_NEW_SYN_RECV,
+ .drop_ack = true,
+ .expire_synack_timer = false,
+ .fastopen = false,
+ }
+};
+
+static void init_fds(__s64 fds[], int len)
+{
+ int i;
+
+ for (i = 0; i < len; i++)
+ fds[i] = -1;
+}
+
+static void close_fds(__s64 fds[], int len)
+{
+ int i;
+
+ for (i = 0; i < len; i++) {
+ if (fds[i] != -1) {
+ close(fds[i]);
+ fds[i] = -1;
+ }
+ }
+}
+
+static int setup_fastopen(char *buf, int size, int *saved_len, bool restore)
+{
+ int err = 0, fd, len;
+
+ fd = open("/proc/sys/net/ipv4/tcp_fastopen", O_RDWR);
+ if (!ASSERT_NEQ(fd, -1, "open"))
+ return -1;
+
+ if (restore) {
+ len = write(fd, buf, *saved_len);
+ if (!ASSERT_EQ(len, *saved_len, "write - restore"))
+ err = -1;
+ } else {
+ *saved_len = read(fd, buf, size);
+ if (!ASSERT_GE(*saved_len, 1, "read")) {
+ err = -1;
+ goto close;
+ }
+
+ err = lseek(fd, 0, SEEK_SET);
+ if (!ASSERT_OK(err, "lseek"))
+ goto close;
+
+ /* (TFO_CLIENT_ENABLE | TFO_SERVER_ENABLE |
+ * TFO_CLIENT_NO_COOKIE | TFO_SERVER_COOKIE_NOT_REQD)
+ */
+ len = write(fd, "519", 3);
+ if (!ASSERT_EQ(len, 3, "write - setup"))
+ err = -1;
+ }
+
+close:
+ close(fd);
+
+ return err;
+}
+
+static int drop_ack(struct migrate_reuseport_test_case *test_case,
+ struct test_migrate_reuseport *skel)
+{
+ if (test_case->family == AF_INET)
+ skel->bss->server_port = ((struct sockaddr_in *)
+ &test_case->addr)->sin_port;
+ else
+ skel->bss->server_port = ((struct sockaddr_in6 *)
+ &test_case->addr)->sin6_port;
+
+ test_case->link = bpf_program__attach_xdp(skel->progs.drop_ack,
+ IFINDEX_LO);
+ if (!ASSERT_OK_PTR(test_case->link, "bpf_program__attach_xdp"))
+ return -1;
+
+ return 0;
+}
+
+static int pass_ack(struct migrate_reuseport_test_case *test_case)
+{
+ int err;
+
+ err = bpf_link__detach(test_case->link);
+ if (!ASSERT_OK(err, "bpf_link__detach"))
+ return -1;
+
+ test_case->link = NULL;
+
+ return 0;
+}
+
+static int start_servers(struct migrate_reuseport_test_case *test_case,
+ struct test_migrate_reuseport *skel)
+{
+ int i, err, prog_fd, reuseport = 1, qlen = QLEN;
+
+ prog_fd = bpf_program__fd(skel->progs.migrate_reuseport);
+
+ make_sockaddr(test_case->family,
+ test_case->family == AF_INET ? "127.0.0.1" : "::1", 0,
+ &test_case->addr, &test_case->addrlen);
+
+ for (i = 0; i < NR_SERVERS; i++) {
+ test_case->servers[i] = socket(test_case->family, SOCK_STREAM,
+ IPPROTO_TCP);
+ if (!ASSERT_NEQ(test_case->servers[i], -1, "socket"))
+ return -1;
+
+ err = setsockopt(test_case->servers[i], SOL_SOCKET,
+ SO_REUSEPORT, &reuseport, sizeof(reuseport));
+ if (!ASSERT_OK(err, "setsockopt - SO_REUSEPORT"))
+ return -1;
+
+ err = bind(test_case->servers[i],
+ (struct sockaddr *)&test_case->addr,
+ test_case->addrlen);
+ if (!ASSERT_OK(err, "bind"))
+ return -1;
+
+ if (i == 0) {
+ err = setsockopt(test_case->servers[i], SOL_SOCKET,
+ SO_ATTACH_REUSEPORT_EBPF,
+ &prog_fd, sizeof(prog_fd));
+ if (!ASSERT_OK(err,
+ "setsockopt - SO_ATTACH_REUSEPORT_EBPF"))
+ return -1;
+
+ err = getsockname(test_case->servers[i],
+ (struct sockaddr *)&test_case->addr,
+ &test_case->addrlen);
+ if (!ASSERT_OK(err, "getsockname"))
+ return -1;
+ }
+
+ if (test_case->fastopen) {
+ err = setsockopt(test_case->servers[i],
+ SOL_TCP, TCP_FASTOPEN,
+ &qlen, sizeof(qlen));
+ if (!ASSERT_OK(err, "setsockopt - TCP_FASTOPEN"))
+ return -1;
+ }
+
+ /* All requests will be tied to the first four listeners */
+ if (i != MIGRATED_TO) {
+ err = listen(test_case->servers[i], qlen);
+ if (!ASSERT_OK(err, "listen"))
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int start_clients(struct migrate_reuseport_test_case *test_case)
+{
+ char buf[MSGLEN] = MSG;
+ int i, err;
+
+ for (i = 0; i < NR_CLIENTS; i++) {
+ test_case->clients[i] = socket(test_case->family, SOCK_STREAM,
+ IPPROTO_TCP);
+ if (!ASSERT_NEQ(test_case->clients[i], -1, "socket"))
+ return -1;
+
+ /* The attached XDP program drops only the final ACK, so
+ * clients will transition to TCP_ESTABLISHED immediately.
+ */
+ err = settimeo(test_case->clients[i], 100);
+ if (!ASSERT_OK(err, "settimeo"))
+ return -1;
+
+ if (test_case->fastopen) {
+ int fastopen = 1;
+
+ err = setsockopt(test_case->clients[i], IPPROTO_TCP,
+ TCP_FASTOPEN_CONNECT, &fastopen,
+ sizeof(fastopen));
+ if (!ASSERT_OK(err,
+ "setsockopt - TCP_FASTOPEN_CONNECT"))
+ return -1;
+ }
+
+ err = connect(test_case->clients[i],
+ (struct sockaddr *)&test_case->addr,
+ test_case->addrlen);
+ if (!ASSERT_OK(err, "connect"))
+ return -1;
+
+ err = write(test_case->clients[i], buf, MSGLEN);
+ if (!ASSERT_EQ(err, MSGLEN, "write"))
+ return -1;
+ }
+
+ return 0;
+}
+
+static int update_maps(struct migrate_reuseport_test_case *test_case,
+ struct test_migrate_reuseport *skel)
+{
+ int i, err, migrated_to = MIGRATED_TO;
+ int reuseport_map_fd, migrate_map_fd;
+ __u64 value;
+
+ reuseport_map_fd = bpf_map__fd(skel->maps.reuseport_map);
+ migrate_map_fd = bpf_map__fd(skel->maps.migrate_map);
+
+ for (i = 0; i < NR_SERVERS; i++) {
+ value = (__u64)test_case->servers[i];
+ err = bpf_map_update_elem(reuseport_map_fd, &i, &value,
+ BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem - reuseport_map"))
+ return -1;
+
+ err = bpf_map_lookup_elem(reuseport_map_fd, &i, &value);
+ if (!ASSERT_OK(err, "bpf_map_lookup_elem - reuseport_map"))
+ return -1;
+
+ err = bpf_map_update_elem(migrate_map_fd, &value, &migrated_to,
+ BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem - migrate_map"))
+ return -1;
+ }
+
+ return 0;
+}
+
+static int migrate_dance(struct migrate_reuseport_test_case *test_case)
+{
+ int i, err;
+
+ /* Migrate TCP_ESTABLISHED and TCP_SYN_RECV requests
+ * to the last listener based on eBPF.
+ */
+ for (i = 0; i < MIGRATED_TO; i++) {
+ err = shutdown(test_case->servers[i], SHUT_RDWR);
+ if (!ASSERT_OK(err, "shutdown"))
+ return -1;
+ }
+
+ /* No dance for TCP_NEW_SYN_RECV to migrate based on eBPF */
+ if (test_case->state == BPF_TCP_NEW_SYN_RECV)
+ return 0;
+
+ /* Note that we use the second listener instead of the
+ * first one here.
+ *
+ * The fist listener is bind()ed with port 0 and,
+ * SOCK_BINDPORT_LOCK is not set to sk_userlocks, so
+ * calling listen() again will bind() the first listener
+ * on a new ephemeral port and detach it from the existing
+ * reuseport group. (See: __inet_bind(), tcp_set_state())
+ *
+ * OTOH, the second one is bind()ed with a specific port,
+ * and SOCK_BINDPORT_LOCK is set. Thus, re-listen() will
+ * resurrect the listener on the existing reuseport group.
+ */
+ err = listen(test_case->servers[1], QLEN);
+ if (!ASSERT_OK(err, "listen"))
+ return -1;
+
+ /* Migrate from the last listener to the second one.
+ *
+ * All listeners were detached out of the reuseport_map,
+ * so migration will be done by kernel random pick from here.
+ */
+ err = shutdown(test_case->servers[MIGRATED_TO], SHUT_RDWR);
+ if (!ASSERT_OK(err, "shutdown"))
+ return -1;
+
+ /* Back to the existing reuseport group */
+ err = listen(test_case->servers[MIGRATED_TO], QLEN);
+ if (!ASSERT_OK(err, "listen"))
+ return -1;
+
+ /* Migrate back to the last one from the second one */
+ err = shutdown(test_case->servers[1], SHUT_RDWR);
+ if (!ASSERT_OK(err, "shutdown"))
+ return -1;
+
+ return 0;
+}
+
+static void count_requests(struct migrate_reuseport_test_case *test_case,
+ struct test_migrate_reuseport *skel)
+{
+ struct sockaddr_storage addr;
+ socklen_t len = sizeof(addr);
+ int err, cnt = 0, client;
+ char buf[MSGLEN];
+
+ err = settimeo(test_case->servers[MIGRATED_TO], 4000);
+ if (!ASSERT_OK(err, "settimeo"))
+ goto out;
+
+ for (; cnt < NR_CLIENTS; cnt++) {
+ client = accept(test_case->servers[MIGRATED_TO],
+ (struct sockaddr *)&addr, &len);
+ if (!ASSERT_NEQ(client, -1, "accept"))
+ goto out;
+
+ memset(buf, 0, MSGLEN);
+ read(client, &buf, MSGLEN);
+ close(client);
+
+ if (!ASSERT_STREQ(buf, MSG, "read"))
+ goto out;
+ }
+
+out:
+ ASSERT_EQ(cnt, NR_CLIENTS, "count in userspace");
+
+ switch (test_case->state) {
+ case BPF_TCP_ESTABLISHED:
+ cnt = skel->bss->migrated_at_close;
+ break;
+ case BPF_TCP_SYN_RECV:
+ cnt = skel->bss->migrated_at_close_fastopen;
+ break;
+ case BPF_TCP_NEW_SYN_RECV:
+ if (test_case->expire_synack_timer)
+ cnt = skel->bss->migrated_at_send_synack;
+ else
+ cnt = skel->bss->migrated_at_recv_ack;
+ break;
+ default:
+ cnt = 0;
+ }
+
+ ASSERT_EQ(cnt, NR_CLIENTS, "count in BPF prog");
+}
+
+static void run_test(struct migrate_reuseport_test_case *test_case,
+ struct test_migrate_reuseport *skel)
+{
+ int err, saved_len;
+ char buf[16];
+
+ skel->bss->migrated_at_close = 0;
+ skel->bss->migrated_at_close_fastopen = 0;
+ skel->bss->migrated_at_send_synack = 0;
+ skel->bss->migrated_at_recv_ack = 0;
+
+ init_fds(test_case->servers, NR_SERVERS);
+ init_fds(test_case->clients, NR_CLIENTS);
+
+ if (test_case->fastopen) {
+ memset(buf, 0, sizeof(buf));
+
+ err = setup_fastopen(buf, sizeof(buf), &saved_len, false);
+ if (!ASSERT_OK(err, "setup_fastopen - setup"))
+ return;
+ }
+
+ err = start_servers(test_case, skel);
+ if (!ASSERT_OK(err, "start_servers"))
+ goto close_servers;
+
+ if (test_case->drop_ack) {
+ /* Drop the final ACK of the 3-way handshake and stick the
+ * in-flight requests on TCP_SYN_RECV or TCP_NEW_SYN_RECV.
+ */
+ err = drop_ack(test_case, skel);
+ if (!ASSERT_OK(err, "drop_ack"))
+ goto close_servers;
+ }
+
+ /* Tie requests to the first four listners */
+ err = start_clients(test_case);
+ if (!ASSERT_OK(err, "start_clients"))
+ goto close_clients;
+
+ err = listen(test_case->servers[MIGRATED_TO], QLEN);
+ if (!ASSERT_OK(err, "listen"))
+ goto close_clients;
+
+ err = update_maps(test_case, skel);
+ if (!ASSERT_OK(err, "fill_maps"))
+ goto close_clients;
+
+ /* Migrate the requests in the accept queue only.
+ * TCP_NEW_SYN_RECV requests are not migrated at this point.
+ */
+ err = migrate_dance(test_case);
+ if (!ASSERT_OK(err, "migrate_dance"))
+ goto close_clients;
+
+ if (test_case->expire_synack_timer) {
+ /* Wait for SYN+ACK timers to expire so that
+ * reqsk_timer_handler() migrates TCP_NEW_SYN_RECV requests.
+ */
+ sleep(1);
+ }
+
+ if (test_case->link) {
+ /* Resume 3WHS and migrate TCP_NEW_SYN_RECV requests */
+ err = pass_ack(test_case);
+ if (!ASSERT_OK(err, "pass_ack"))
+ goto close_clients;
+ }
+
+ count_requests(test_case, skel);
+
+close_clients:
+ close_fds(test_case->clients, NR_CLIENTS);
+
+ if (test_case->link) {
+ err = pass_ack(test_case);
+ ASSERT_OK(err, "pass_ack - clean up");
+ }
+
+close_servers:
+ close_fds(test_case->servers, NR_SERVERS);
+
+ if (test_case->fastopen) {
+ err = setup_fastopen(buf, sizeof(buf), &saved_len, true);
+ ASSERT_OK(err, "setup_fastopen - restore");
+ }
+}
+
+void test_migrate_reuseport(void)
+{
+ struct test_migrate_reuseport *skel;
+ int i;
+
+ skel = test_migrate_reuseport__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
+ test__start_subtest(test_cases[i].name);
+ run_test(&test_cases[i], skel);
+ }
+
+ test_migrate_reuseport__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/obj_name.c b/tools/testing/selftests/bpf/prog_tests/obj_name.c
index e178416bddad..6194b776a28b 100644
--- a/tools/testing/selftests/bpf/prog_tests/obj_name.c
+++ b/tools/testing/selftests/bpf/prog_tests/obj_name.c
@@ -38,13 +38,13 @@ void test_obj_name(void)
fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
CHECK((tests[i].success && fd < 0) ||
- (!tests[i].success && fd != -1) ||
+ (!tests[i].success && fd >= 0) ||
(!tests[i].success && errno != tests[i].expected_errno),
"check-bpf-prog-name",
"fd %d(%d) errno %d(%d)\n",
fd, tests[i].success, errno, tests[i].expected_errno);
- if (fd != -1)
+ if (fd >= 0)
close(fd);
/* test different attr.map_name during BPF_MAP_CREATE */
@@ -59,13 +59,13 @@ void test_obj_name(void)
memcpy(attr.map_name, tests[i].name, ncopy);
fd = syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr));
CHECK((tests[i].success && fd < 0) ||
- (!tests[i].success && fd != -1) ||
+ (!tests[i].success && fd >= 0) ||
(!tests[i].success && errno != tests[i].expected_errno),
"check-bpf-map-name",
"fd %d(%d) errno %d(%d)\n",
fd, tests[i].success, errno, tests[i].expected_errno);
- if (fd != -1)
+ if (fd >= 0)
close(fd);
}
}
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_branches.c b/tools/testing/selftests/bpf/prog_tests/perf_branches.c
index e35c444902a7..12c4f45cee1a 100644
--- a/tools/testing/selftests/bpf/prog_tests/perf_branches.c
+++ b/tools/testing/selftests/bpf/prog_tests/perf_branches.c
@@ -74,7 +74,7 @@ static void test_perf_branches_common(int perf_fd,
/* attach perf_event */
link = bpf_program__attach_perf_event(skel->progs.perf_branches, perf_fd);
- if (CHECK(IS_ERR(link), "attach_perf_event", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_perf_event"))
goto out_destroy_skel;
/* generate some branches on cpu 0 */
@@ -119,7 +119,7 @@ static void test_perf_branches_hw(void)
* Some setups don't support branch records (virtual machines, !x86),
* so skip test in this case.
*/
- if (pfd == -1) {
+ if (pfd < 0) {
if (errno == ENOENT || errno == EOPNOTSUPP) {
printf("%s:SKIP:no PERF_SAMPLE_BRANCH_STACK\n",
__func__);
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
index ca9f0895ec84..6490e9673002 100644
--- a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
+++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
@@ -80,7 +80,7 @@ void test_perf_buffer(void)
pb_opts.sample_cb = on_sample;
pb_opts.ctx = &cpu_seen;
pb = perf_buffer__new(bpf_map__fd(skel->maps.perf_buf_map), 1, &pb_opts);
- if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
+ if (!ASSERT_OK_PTR(pb, "perf_buf__new"))
goto out_close;
CHECK(perf_buffer__epoll_fd(pb) < 0, "epoll_fd",
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c b/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c
index 72c3690844fb..33144c9432ae 100644
--- a/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c
+++ b/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c
@@ -97,8 +97,7 @@ void test_perf_event_stackmap(void)
skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
pmu_fd);
- if (CHECK(IS_ERR(skel->links.oncpu), "attach_perf_event",
- "err %ld\n", PTR_ERR(skel->links.oncpu))) {
+ if (!ASSERT_OK_PTR(skel->links.oncpu, "attach_perf_event")) {
close(pmu_fd);
goto cleanup;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/probe_user.c b/tools/testing/selftests/bpf/prog_tests/probe_user.c
index 7aecfd9e87d1..95bd12097358 100644
--- a/tools/testing/selftests/bpf/prog_tests/probe_user.c
+++ b/tools/testing/selftests/bpf/prog_tests/probe_user.c
@@ -15,7 +15,7 @@ void test_probe_user(void)
static const int zero = 0;
obj = bpf_object__open_file(obj_file, &opts);
- if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj)))
+ if (!ASSERT_OK_PTR(obj, "obj_open_file"))
return;
kprobe_prog = bpf_object__find_program_by_title(obj, prog_name);
@@ -33,11 +33,8 @@ void test_probe_user(void)
goto cleanup;
kprobe_link = bpf_program__attach(kprobe_prog);
- if (CHECK(IS_ERR(kprobe_link), "attach_kprobe",
- "err %ld\n", PTR_ERR(kprobe_link))) {
- kprobe_link = NULL;
+ if (!ASSERT_OK_PTR(kprobe_link, "attach_kprobe"))
goto cleanup;
- }
memset(&curr, 0, sizeof(curr));
in->sin_family = AF_INET;
diff --git a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c b/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
index 131d7f7eeb42..89fc98faf19e 100644
--- a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
+++ b/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
@@ -46,7 +46,7 @@ void test_prog_run_xattr(void)
tattr.prog_fd = bpf_program__fd(skel->progs.test_pkt_access);
err = bpf_prog_test_run_xattr(&tattr);
- CHECK_ATTR(err != -1 || errno != ENOSPC || tattr.retval, "run",
+ CHECK_ATTR(err >= 0 || errno != ENOSPC || tattr.retval, "run",
"err %d errno %d retval %d\n", err, errno, tattr.retval);
CHECK_ATTR(tattr.data_size_out != sizeof(pkt_v4), "data_size_out",
@@ -78,6 +78,6 @@ void test_prog_run_xattr(void)
cleanup:
if (skel)
test_pkt_access__destroy(skel);
- if (stats_fd != -1)
+ if (stats_fd >= 0)
close(stats_fd);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c
index c5fb191874ac..41720a62c4fa 100644
--- a/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c
+++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c
@@ -77,7 +77,7 @@ void test_raw_tp_test_run(void)
/* invalid cpu ID should fail with ENXIO */
opts.cpu = 0xffffffff;
err = bpf_prog_test_run_opts(prog_fd, &opts);
- CHECK(err != -1 || errno != ENXIO,
+ CHECK(err >= 0 || errno != ENXIO,
"test_run_opts_fail",
"should failed with ENXIO\n");
@@ -85,7 +85,7 @@ void test_raw_tp_test_run(void)
opts.cpu = 1;
opts.flags = 0;
err = bpf_prog_test_run_opts(prog_fd, &opts);
- CHECK(err != -1 || errno != EINVAL,
+ CHECK(err >= 0 || errno != EINVAL,
"test_run_opts_fail",
"should failed with EINVAL\n");
diff --git a/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c b/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c
index 563e12120e77..5f9eaa3ab584 100644
--- a/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c
+++ b/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c
@@ -30,7 +30,7 @@ void test_rdonly_maps(void)
struct bss bss;
obj = bpf_object__open_file(file, NULL);
- if (CHECK(IS_ERR(obj), "obj_open", "err %ld\n", PTR_ERR(obj)))
+ if (!ASSERT_OK_PTR(obj, "obj_open"))
return;
err = bpf_object__load(obj);
@@ -58,11 +58,8 @@ void test_rdonly_maps(void)
goto cleanup;
link = bpf_program__attach_raw_tracepoint(prog, "sys_enter");
- if (CHECK(IS_ERR(link), "attach_prog", "prog '%s', err %ld\n",
- t->prog_name, PTR_ERR(link))) {
- link = NULL;
+ if (!ASSERT_OK_PTR(link, "attach_prog"))
goto cleanup;
- }
/* trigger probe */
usleep(1);
diff --git a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
index ac1ee10cffd8..de2688166696 100644
--- a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
+++ b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
@@ -15,7 +15,7 @@ void test_reference_tracking(void)
int err = 0;
obj = bpf_object__open_file(file, &open_opts);
- if (CHECK_FAIL(IS_ERR(obj)))
+ if (!ASSERT_OK_PTR(obj, "obj_open_file"))
return;
if (CHECK(strcmp(bpf_object__name(obj), obj_name), "obj_name",
diff --git a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
index d3c2de2c24d1..f62361306f6d 100644
--- a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
+++ b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
@@ -76,7 +76,7 @@ __resolve_symbol(struct btf *btf, int type_id)
}
for (i = 0; i < ARRAY_SIZE(test_symbols); i++) {
- if (test_symbols[i].id != -1)
+ if (test_symbols[i].id >= 0)
continue;
if (BTF_INFO_KIND(type->info) != test_symbols[i].type)
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
index de78617f6550..4706cee84360 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
@@ -12,7 +12,7 @@
#include <sys/sysinfo.h>
#include <linux/perf_event.h>
#include <linux/ring_buffer.h>
-#include "test_ringbuf.skel.h"
+#include "test_ringbuf.lskel.h"
#define EDONE 7777
@@ -86,25 +86,70 @@ void test_ringbuf(void)
const size_t rec_sz = BPF_RINGBUF_HDR_SZ + sizeof(struct sample);
pthread_t thread;
long bg_ret = -1;
- int err, cnt;
+ int err, cnt, rb_fd;
int page_size = getpagesize();
+ void *mmap_ptr, *tmp_ptr;
skel = test_ringbuf__open();
if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
return;
- err = bpf_map__set_max_entries(skel->maps.ringbuf, page_size);
- if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n"))
- goto cleanup;
+ skel->maps.ringbuf.max_entries = page_size;
err = test_ringbuf__load(skel);
if (CHECK(err != 0, "skel_load", "skeleton load failed\n"))
goto cleanup;
+ rb_fd = skel->maps.ringbuf.map_fd;
+ /* good read/write cons_pos */
+ mmap_ptr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, rb_fd, 0);
+ ASSERT_OK_PTR(mmap_ptr, "rw_cons_pos");
+ tmp_ptr = mremap(mmap_ptr, page_size, 2 * page_size, MREMAP_MAYMOVE);
+ if (!ASSERT_ERR_PTR(tmp_ptr, "rw_extend"))
+ goto cleanup;
+ ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_EXEC), "exec_cons_pos_protect");
+ ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_rw");
+
+ /* bad writeable prod_pos */
+ mmap_ptr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, rb_fd, page_size);
+ err = -errno;
+ ASSERT_ERR_PTR(mmap_ptr, "wr_prod_pos");
+ ASSERT_EQ(err, -EPERM, "wr_prod_pos_err");
+
+ /* bad writeable data pages */
+ mmap_ptr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, rb_fd, 2 * page_size);
+ err = -errno;
+ ASSERT_ERR_PTR(mmap_ptr, "wr_data_page_one");
+ ASSERT_EQ(err, -EPERM, "wr_data_page_one_err");
+ mmap_ptr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, rb_fd, 3 * page_size);
+ ASSERT_ERR_PTR(mmap_ptr, "wr_data_page_two");
+ mmap_ptr = mmap(NULL, 2 * page_size, PROT_WRITE, MAP_SHARED, rb_fd, 2 * page_size);
+ ASSERT_ERR_PTR(mmap_ptr, "wr_data_page_all");
+
+ /* good read-only pages */
+ mmap_ptr = mmap(NULL, 4 * page_size, PROT_READ, MAP_SHARED, rb_fd, 0);
+ if (!ASSERT_OK_PTR(mmap_ptr, "ro_prod_pos"))
+ goto cleanup;
+
+ ASSERT_ERR(mprotect(mmap_ptr, 4 * page_size, PROT_WRITE), "write_protect");
+ ASSERT_ERR(mprotect(mmap_ptr, 4 * page_size, PROT_EXEC), "exec_protect");
+ ASSERT_ERR_PTR(mremap(mmap_ptr, 0, 4 * page_size, MREMAP_MAYMOVE), "ro_remap");
+ ASSERT_OK(munmap(mmap_ptr, 4 * page_size), "unmap_ro");
+
+ /* good read-only pages with initial offset */
+ mmap_ptr = mmap(NULL, page_size, PROT_READ, MAP_SHARED, rb_fd, page_size);
+ if (!ASSERT_OK_PTR(mmap_ptr, "ro_prod_pos"))
+ goto cleanup;
+
+ ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_WRITE), "write_protect");
+ ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_EXEC), "exec_protect");
+ ASSERT_ERR_PTR(mremap(mmap_ptr, 0, 3 * page_size, MREMAP_MAYMOVE), "ro_remap");
+ ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_ro");
+
/* only trigger BPF program for current process */
skel->bss->pid = getpid();
- ringbuf = ring_buffer__new(bpf_map__fd(skel->maps.ringbuf),
+ ringbuf = ring_buffer__new(skel->maps.ringbuf.map_fd,
process_sample, NULL, NULL);
if (CHECK(!ringbuf, "ringbuf_create", "failed to create ringbuf\n"))
goto cleanup;
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
index cef63e703924..167cd8a2edfd 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
@@ -63,7 +63,7 @@ void test_ringbuf_multi(void)
goto cleanup;
proto_fd = bpf_create_map(BPF_MAP_TYPE_RINGBUF, 0, 0, page_size, 0);
- if (CHECK(proto_fd == -1, "bpf_create_map", "bpf_create_map failed\n"))
+ if (CHECK(proto_fd < 0, "bpf_create_map", "bpf_create_map failed\n"))
goto cleanup;
err = bpf_map__set_inner_map_fd(skel->maps.ringbuf_hash, proto_fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
index 821b4146b7b6..4efd337d6a3c 100644
--- a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
+++ b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
@@ -78,7 +78,7 @@ static int create_maps(enum bpf_map_type inner_type)
attr.max_entries = REUSEPORT_ARRAY_SIZE;
reuseport_array = bpf_create_map_xattr(&attr);
- RET_ERR(reuseport_array == -1, "creating reuseport_array",
+ RET_ERR(reuseport_array < 0, "creating reuseport_array",
"reuseport_array:%d errno:%d\n", reuseport_array, errno);
/* Creating outer_map */
@@ -89,7 +89,7 @@ static int create_maps(enum bpf_map_type inner_type)
attr.max_entries = 1;
attr.inner_map_fd = reuseport_array;
outer_map = bpf_create_map_xattr(&attr);
- RET_ERR(outer_map == -1, "creating outer_map",
+ RET_ERR(outer_map < 0, "creating outer_map",
"outer_map:%d errno:%d\n", outer_map, errno);
return 0;
@@ -102,8 +102,9 @@ static int prepare_bpf_obj(void)
int err;
obj = bpf_object__open("test_select_reuseport_kern.o");
- RET_ERR(IS_ERR_OR_NULL(obj), "open test_select_reuseport_kern.o",
- "obj:%p PTR_ERR(obj):%ld\n", obj, PTR_ERR(obj));
+ err = libbpf_get_error(obj);
+ RET_ERR(err, "open test_select_reuseport_kern.o",
+ "obj:%p PTR_ERR(obj):%d\n", obj, err);
map = bpf_object__find_map_by_name(obj, "outer_map");
RET_ERR(!map, "find outer_map", "!map\n");
@@ -116,31 +117,31 @@ static int prepare_bpf_obj(void)
prog = bpf_program__next(NULL, obj);
RET_ERR(!prog, "get first bpf_program", "!prog\n");
select_by_skb_data_prog = bpf_program__fd(prog);
- RET_ERR(select_by_skb_data_prog == -1, "get prog fd",
+ RET_ERR(select_by_skb_data_prog < 0, "get prog fd",
"select_by_skb_data_prog:%d\n", select_by_skb_data_prog);
map = bpf_object__find_map_by_name(obj, "result_map");
RET_ERR(!map, "find result_map", "!map\n");
result_map = bpf_map__fd(map);
- RET_ERR(result_map == -1, "get result_map fd",
+ RET_ERR(result_map < 0, "get result_map fd",
"result_map:%d\n", result_map);
map = bpf_object__find_map_by_name(obj, "tmp_index_ovr_map");
RET_ERR(!map, "find tmp_index_ovr_map\n", "!map");
tmp_index_ovr_map = bpf_map__fd(map);
- RET_ERR(tmp_index_ovr_map == -1, "get tmp_index_ovr_map fd",
+ RET_ERR(tmp_index_ovr_map < 0, "get tmp_index_ovr_map fd",
"tmp_index_ovr_map:%d\n", tmp_index_ovr_map);
map = bpf_object__find_map_by_name(obj, "linum_map");
RET_ERR(!map, "find linum_map", "!map\n");
linum_map = bpf_map__fd(map);
- RET_ERR(linum_map == -1, "get linum_map fd",
+ RET_ERR(linum_map < 0, "get linum_map fd",
"linum_map:%d\n", linum_map);
map = bpf_object__find_map_by_name(obj, "data_check_map");
RET_ERR(!map, "find data_check_map", "!map\n");
data_check_map = bpf_map__fd(map);
- RET_ERR(data_check_map == -1, "get data_check_map fd",
+ RET_ERR(data_check_map < 0, "get data_check_map fd",
"data_check_map:%d\n", data_check_map);
return 0;
@@ -237,7 +238,7 @@ static long get_linum(void)
int err;
err = bpf_map_lookup_elem(linum_map, &index_zero, &linum);
- RET_ERR(err == -1, "lookup_elem(linum_map)", "err:%d errno:%d\n",
+ RET_ERR(err < 0, "lookup_elem(linum_map)", "err:%d errno:%d\n",
err, errno);
return linum;
@@ -254,11 +255,11 @@ static void check_data(int type, sa_family_t family, const struct cmd *cmd,
addrlen = sizeof(cli_sa);
err = getsockname(cli_fd, (struct sockaddr *)&cli_sa,
&addrlen);
- RET_IF(err == -1, "getsockname(cli_fd)", "err:%d errno:%d\n",
+ RET_IF(err < 0, "getsockname(cli_fd)", "err:%d errno:%d\n",
err, errno);
err = bpf_map_lookup_elem(data_check_map, &index_zero, &result);
- RET_IF(err == -1, "lookup_elem(data_check_map)", "err:%d errno:%d\n",
+ RET_IF(err < 0, "lookup_elem(data_check_map)", "err:%d errno:%d\n",
err, errno);
if (type == SOCK_STREAM) {
@@ -347,7 +348,7 @@ static void check_results(void)
for (i = 0; i < NR_RESULTS; i++) {
err = bpf_map_lookup_elem(result_map, &i, &results[i]);
- RET_IF(err == -1, "lookup_elem(result_map)",
+ RET_IF(err < 0, "lookup_elem(result_map)",
"i:%u err:%d errno:%d\n", i, err, errno);
}
@@ -524,12 +525,12 @@ static void test_syncookie(int type, sa_family_t family)
*/
err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero,
&tmp_index, BPF_ANY);
- RET_IF(err == -1, "update_elem(tmp_index_ovr_map, 0, 1)",
+ RET_IF(err < 0, "update_elem(tmp_index_ovr_map, 0, 1)",
"err:%d errno:%d\n", err, errno);
do_test(type, family, &cmd, PASS);
err = bpf_map_lookup_elem(tmp_index_ovr_map, &index_zero,
&tmp_index);
- RET_IF(err == -1 || tmp_index != -1,
+ RET_IF(err < 0 || tmp_index >= 0,
"lookup_elem(tmp_index_ovr_map)",
"err:%d errno:%d tmp_index:%d\n",
err, errno, tmp_index);
@@ -569,7 +570,7 @@ static void test_detach_bpf(int type, sa_family_t family)
for (i = 0; i < NR_RESULTS; i++) {
err = bpf_map_lookup_elem(result_map, &i, &tmp);
- RET_IF(err == -1, "lookup_elem(result_map)",
+ RET_IF(err < 0, "lookup_elem(result_map)",
"i:%u err:%d errno:%d\n", i, err, errno);
nr_run_before += tmp;
}
@@ -584,7 +585,7 @@ static void test_detach_bpf(int type, sa_family_t family)
for (i = 0; i < NR_RESULTS; i++) {
err = bpf_map_lookup_elem(result_map, &i, &tmp);
- RET_IF(err == -1, "lookup_elem(result_map)",
+ RET_IF(err < 0, "lookup_elem(result_map)",
"i:%u err:%d errno:%d\n", i, err, errno);
nr_run_after += tmp;
}
@@ -632,24 +633,24 @@ static void prepare_sk_fds(int type, sa_family_t family, bool inany)
SO_ATTACH_REUSEPORT_EBPF,
&select_by_skb_data_prog,
sizeof(select_by_skb_data_prog));
- RET_IF(err == -1, "setsockopt(SO_ATTACH_REUEPORT_EBPF)",
+ RET_IF(err < 0, "setsockopt(SO_ATTACH_REUEPORT_EBPF)",
"err:%d errno:%d\n", err, errno);
}
err = bind(sk_fds[i], (struct sockaddr *)&srv_sa, addrlen);
- RET_IF(err == -1, "bind()", "sk_fds[%d] err:%d errno:%d\n",
+ RET_IF(err < 0, "bind()", "sk_fds[%d] err:%d errno:%d\n",
i, err, errno);
if (type == SOCK_STREAM) {
err = listen(sk_fds[i], 10);
- RET_IF(err == -1, "listen()",
+ RET_IF(err < 0, "listen()",
"sk_fds[%d] err:%d errno:%d\n",
i, err, errno);
}
err = bpf_map_update_elem(reuseport_array, &i, &sk_fds[i],
BPF_NOEXIST);
- RET_IF(err == -1, "update_elem(reuseport_array)",
+ RET_IF(err < 0, "update_elem(reuseport_array)",
"sk_fds[%d] err:%d errno:%d\n", i, err, errno);
if (i == first) {
@@ -682,7 +683,7 @@ static void setup_per_test(int type, sa_family_t family, bool inany,
prepare_sk_fds(type, family, inany);
err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero, &ovr,
BPF_ANY);
- RET_IF(err == -1, "update_elem(tmp_index_ovr_map, 0, -1)",
+ RET_IF(err < 0, "update_elem(tmp_index_ovr_map, 0, -1)",
"err:%d errno:%d\n", err, errno);
/* Install reuseport_array to outer_map? */
@@ -691,7 +692,7 @@ static void setup_per_test(int type, sa_family_t family, bool inany,
err = bpf_map_update_elem(outer_map, &index_zero, &reuseport_array,
BPF_ANY);
- RET_IF(err == -1, "update_elem(outer_map, 0, reuseport_array)",
+ RET_IF(err < 0, "update_elem(outer_map, 0, reuseport_array)",
"err:%d errno:%d\n", err, errno);
}
@@ -720,18 +721,18 @@ static void cleanup_per_test(bool no_inner_map)
return;
err = bpf_map_delete_elem(outer_map, &index_zero);
- RET_IF(err == -1, "delete_elem(outer_map)",
+ RET_IF(err < 0, "delete_elem(outer_map)",
"err:%d errno:%d\n", err, errno);
}
static void cleanup(void)
{
- if (outer_map != -1) {
+ if (outer_map >= 0) {
close(outer_map);
outer_map = -1;
}
- if (reuseport_array != -1) {
+ if (reuseport_array >= 0) {
close(reuseport_array);
reuseport_array = -1;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c
index 7043e6ded0e6..023cc532992d 100644
--- a/tools/testing/selftests/bpf/prog_tests/send_signal.c
+++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c
@@ -2,7 +2,7 @@
#include <test_progs.h>
#include "test_send_signal_kern.skel.h"
-static volatile int sigusr1_received = 0;
+int sigusr1_received = 0;
static void sigusr1_handler(int signum)
{
@@ -91,8 +91,7 @@ static void test_send_signal_common(struct perf_event_attr *attr,
skel->links.send_signal_perf =
bpf_program__attach_perf_event(skel->progs.send_signal_perf, pmu_fd);
- if (CHECK(IS_ERR(skel->links.send_signal_perf), "attach_perf_event",
- "err %ld\n", PTR_ERR(skel->links.send_signal_perf)))
+ if (!ASSERT_OK_PTR(skel->links.send_signal_perf, "attach_perf_event"))
goto disable_pmu;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
index 45c82db3c58c..aee41547e7f4 100644
--- a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
+++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
@@ -480,7 +480,7 @@ static struct bpf_link *attach_lookup_prog(struct bpf_program *prog)
}
link = bpf_program__attach_netns(prog, net_fd);
- if (CHECK(IS_ERR(link), "bpf_program__attach_netns", "failed\n")) {
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_netns")) {
errno = -PTR_ERR(link);
log_err("failed to attach program '%s' to netns",
bpf_program__name(prog));
diff --git a/tools/testing/selftests/bpf/prog_tests/skeleton.c b/tools/testing/selftests/bpf/prog_tests/skeleton.c
index fe87b77af459..f6f130c99b8c 100644
--- a/tools/testing/selftests/bpf/prog_tests/skeleton.c
+++ b/tools/testing/selftests/bpf/prog_tests/skeleton.c
@@ -82,10 +82,8 @@ void test_skeleton(void)
CHECK(data->out2 != 2, "res2", "got %lld != exp %d\n", data->out2, 2);
CHECK(bss->out3 != 3, "res3", "got %d != exp %d\n", (int)bss->out3, 3);
CHECK(bss->out4 != 4, "res4", "got %lld != exp %d\n", bss->out4, 4);
- CHECK(bss->handler_out5.a != 5, "res5", "got %d != exp %d\n",
- bss->handler_out5.a, 5);
- CHECK(bss->handler_out5.b != 6, "res6", "got %lld != exp %d\n",
- bss->handler_out5.b, 6);
+ CHECK(bss->out5.a != 5, "res5", "got %d != exp %d\n", bss->out5.a, 5);
+ CHECK(bss->out5.b != 6, "res6", "got %lld != exp %d\n", bss->out5.b, 6);
CHECK(bss->out6 != 14, "res7", "got %d != exp %d\n", bss->out6, 14);
CHECK(bss->bpf_syscall != kcfg->CONFIG_BPF_SYSCALL, "ext1",
diff --git a/tools/testing/selftests/bpf/prog_tests/sock_fields.c b/tools/testing/selftests/bpf/prog_tests/sock_fields.c
index af87118e748e..577d619fb07e 100644
--- a/tools/testing/selftests/bpf/prog_tests/sock_fields.c
+++ b/tools/testing/selftests/bpf/prog_tests/sock_fields.c
@@ -97,12 +97,12 @@ static void check_result(void)
err = bpf_map_lookup_elem(linum_map_fd, &egress_linum_idx,
&egress_linum);
- CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)",
+ CHECK(err < 0, "bpf_map_lookup_elem(linum_map_fd)",
"err:%d errno:%d\n", err, errno);
err = bpf_map_lookup_elem(linum_map_fd, &ingress_linum_idx,
&ingress_linum);
- CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)",
+ CHECK(err < 0, "bpf_map_lookup_elem(linum_map_fd)",
"err:%d errno:%d\n", err, errno);
memcpy(&srv_sk, &skel->bss->srv_sk, sizeof(srv_sk));
@@ -355,14 +355,12 @@ void test_sock_fields(void)
egress_link = bpf_program__attach_cgroup(skel->progs.egress_read_sock_fields,
child_cg_fd);
- if (CHECK(IS_ERR(egress_link), "attach_cgroup(egress)", "err:%ld\n",
- PTR_ERR(egress_link)))
+ if (!ASSERT_OK_PTR(egress_link, "attach_cgroup(egress)"))
goto done;
ingress_link = bpf_program__attach_cgroup(skel->progs.ingress_read_sock_fields,
child_cg_fd);
- if (CHECK(IS_ERR(ingress_link), "attach_cgroup(ingress)", "err:%ld\n",
- PTR_ERR(ingress_link)))
+ if (!ASSERT_OK_PTR(ingress_link, "attach_cgroup(ingress)"))
goto done;
linum_map_fd = bpf_map__fd(skel->maps.linum_map);
@@ -375,8 +373,8 @@ done:
bpf_link__destroy(egress_link);
bpf_link__destroy(ingress_link);
test_sock_fields__destroy(skel);
- if (child_cg_fd != -1)
+ if (child_cg_fd >= 0)
close(child_cg_fd);
- if (parent_cg_fd != -1)
+ if (parent_cg_fd >= 0)
close(parent_cg_fd);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index ab77596b64e3..1352ec104149 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -88,11 +88,11 @@ static void test_sockmap_create_update_free(enum bpf_map_type map_type)
int s, map, err;
s = connected_socket_v4();
- if (CHECK_FAIL(s == -1))
+ if (CHECK_FAIL(s < 0))
return;
map = bpf_create_map(map_type, sizeof(int), sizeof(int), 1, 0);
- if (CHECK_FAIL(map == -1)) {
+ if (CHECK_FAIL(map < 0)) {
perror("bpf_create_map");
goto out;
}
@@ -245,7 +245,7 @@ static void test_sockmap_copy(enum bpf_map_type map_type)
opts.link_info = &linfo;
opts.link_info_len = sizeof(linfo);
link = bpf_program__attach_iter(skel->progs.copy, &opts);
- if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
goto out;
iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -304,7 +304,7 @@ static void test_sockmap_skb_verdict_attach(enum bpf_attach_type first,
}
err = bpf_prog_attach(verdict, map, second, 0);
- assert(err == -1 && errno == EBUSY);
+ ASSERT_EQ(err, -EBUSY, "prog_attach_fail");
err = bpf_prog_detach2(verdict, map, first);
if (CHECK_FAIL(err)) {
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
index 06b86addc181..7a0d64fdc192 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
@@ -98,7 +98,7 @@ static void run_tests(int family, enum bpf_map_type map_type)
int map;
map = bpf_create_map(map_type, sizeof(int), sizeof(int), 1, 0);
- if (CHECK_FAIL(map == -1)) {
+ if (CHECK_FAIL(map < 0)) {
perror("bpf_map_create");
return;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index 648d9ae898d2..515229f24a93 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -139,7 +139,7 @@
#define xbpf_map_delete_elem(fd, key) \
({ \
int __ret = bpf_map_delete_elem((fd), (key)); \
- if (__ret == -1) \
+ if (__ret < 0) \
FAIL_ERRNO("map_delete"); \
__ret; \
})
@@ -147,7 +147,7 @@
#define xbpf_map_lookup_elem(fd, key, val) \
({ \
int __ret = bpf_map_lookup_elem((fd), (key), (val)); \
- if (__ret == -1) \
+ if (__ret < 0) \
FAIL_ERRNO("map_lookup"); \
__ret; \
})
@@ -155,7 +155,7 @@
#define xbpf_map_update_elem(fd, key, val, flags) \
({ \
int __ret = bpf_map_update_elem((fd), (key), (val), (flags)); \
- if (__ret == -1) \
+ if (__ret < 0) \
FAIL_ERRNO("map_update"); \
__ret; \
})
@@ -164,7 +164,7 @@
({ \
int __ret = \
bpf_prog_attach((prog), (target), (type), (flags)); \
- if (__ret == -1) \
+ if (__ret < 0) \
FAIL_ERRNO("prog_attach(" #type ")"); \
__ret; \
})
@@ -172,7 +172,7 @@
#define xbpf_prog_detach2(prog, target, type) \
({ \
int __ret = bpf_prog_detach2((prog), (target), (type)); \
- if (__ret == -1) \
+ if (__ret < 0) \
FAIL_ERRNO("prog_detach2(" #type ")"); \
__ret; \
})
@@ -1610,6 +1610,7 @@ static void udp_redir_to_connected(int family, int sotype, int sock_mapfd,
struct sockaddr_storage addr;
int c0, c1, p0, p1;
unsigned int pass;
+ int retries = 100;
socklen_t len;
int err, n;
u64 value;
@@ -1686,9 +1687,13 @@ static void udp_redir_to_connected(int family, int sotype, int sock_mapfd,
if (pass != 1)
FAIL("%s: want pass count 1, have %d", log_prefix, pass);
+again:
n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1);
- if (n < 0)
+ if (n < 0) {
+ if (errno == EAGAIN && retries--)
+ goto again;
FAIL_ERRNO("%s: read", log_prefix);
+ }
if (n == 0)
FAIL("%s: incomplete read", log_prefix);
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
index 11a769e18f5d..0a91d8d9954b 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
@@ -62,8 +62,7 @@ retry:
skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
pmu_fd);
- if (CHECK(IS_ERR(skel->links.oncpu), "attach_perf_event",
- "err %ld\n", PTR_ERR(skel->links.oncpu))) {
+ if (!ASSERT_OK_PTR(skel->links.oncpu, "attach_perf_event")) {
close(pmu_fd);
goto cleanup;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
index 37269d23df93..04b476bd62b9 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
@@ -21,7 +21,7 @@ void test_stacktrace_map(void)
goto close_prog;
link = bpf_program__attach_tracepoint(prog, "sched", "sched_switch");
- if (CHECK(IS_ERR(link), "attach_tp", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_tp"))
goto close_prog;
/* find map fds */
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
index 404a5498e1a3..4fd30bb651ad 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
@@ -21,7 +21,7 @@ void test_stacktrace_map_raw_tp(void)
goto close_prog;
link = bpf_program__attach_raw_tracepoint(prog, "sched_switch");
- if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_raw_tp"))
goto close_prog;
/* find map fds */
@@ -59,7 +59,6 @@ void test_stacktrace_map_raw_tp(void)
goto close_prog;
close_prog:
- if (!IS_ERR_OR_NULL(link))
- bpf_link__destroy(link);
+ bpf_link__destroy(link);
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/static_linked.c b/tools/testing/selftests/bpf/prog_tests/static_linked.c
index 46556976dccc..5c4e3014e063 100644
--- a/tools/testing/selftests/bpf/prog_tests/static_linked.c
+++ b/tools/testing/selftests/bpf/prog_tests/static_linked.c
@@ -14,12 +14,7 @@ void test_static_linked(void)
return;
skel->rodata->rovar1 = 1;
- skel->bss->static_var1 = 2;
- skel->bss->static_var11 = 3;
-
skel->rodata->rovar2 = 4;
- skel->bss->static_var2 = 5;
- skel->bss->static_var22 = 6;
err = test_static_linked__load(skel);
if (!ASSERT_OK(err, "skel_load"))
@@ -32,8 +27,8 @@ void test_static_linked(void)
/* trigger */
usleep(1);
- ASSERT_EQ(skel->bss->var1, 1 * 2 + 2 + 3, "var1");
- ASSERT_EQ(skel->bss->var2, 4 * 3 + 5 + 6, "var2");
+ ASSERT_EQ(skel->data->var1, 1 * 2 + 2 + 3, "var1");
+ ASSERT_EQ(skel->data->var2, 4 * 3 + 5 + 6, "var2");
cleanup:
test_static_linked__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/syscall.c b/tools/testing/selftests/bpf/prog_tests/syscall.c
new file mode 100644
index 000000000000..81e997a69f7a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/syscall.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include "syscall.skel.h"
+
+struct args {
+ __u64 log_buf;
+ __u32 log_size;
+ int max_entries;
+ int map_fd;
+ int prog_fd;
+ int btf_fd;
+};
+
+void test_syscall(void)
+{
+ static char verifier_log[8192];
+ struct args ctx = {
+ .max_entries = 1024,
+ .log_buf = (uintptr_t) verifier_log,
+ .log_size = sizeof(verifier_log),
+ };
+ struct bpf_prog_test_run_attr tattr = {
+ .ctx_in = &ctx,
+ .ctx_size_in = sizeof(ctx),
+ };
+ struct syscall *skel = NULL;
+ __u64 key = 12, value = 0;
+ int err;
+
+ skel = syscall__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ tattr.prog_fd = bpf_program__fd(skel->progs.bpf_prog);
+ err = bpf_prog_test_run_xattr(&tattr);
+ ASSERT_EQ(err, 0, "err");
+ ASSERT_EQ(tattr.retval, 1, "retval");
+ ASSERT_GT(ctx.map_fd, 0, "ctx.map_fd");
+ ASSERT_GT(ctx.prog_fd, 0, "ctx.prog_fd");
+ ASSERT_OK(memcmp(verifier_log, "processed", sizeof("processed") - 1),
+ "verifier_log");
+
+ err = bpf_map_lookup_elem(ctx.map_fd, &key, &value);
+ ASSERT_EQ(err, 0, "map_lookup");
+ ASSERT_EQ(value, 34, "map lookup value");
+cleanup:
+ syscall__destroy(skel);
+ if (ctx.prog_fd > 0)
+ close(ctx.prog_fd);
+ if (ctx.map_fd > 0)
+ close(ctx.map_fd);
+ if (ctx.btf_fd > 0)
+ close(ctx.btf_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_bpf.c b/tools/testing/selftests/bpf/prog_tests/tc_bpf.c
new file mode 100644
index 000000000000..4a505a5adf4d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tc_bpf.c
@@ -0,0 +1,395 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include <linux/pkt_cls.h>
+
+#include "test_tc_bpf.skel.h"
+
+#define LO_IFINDEX 1
+
+#define TEST_DECLARE_OPTS(__fd) \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_h, .handle = 1); \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_p, .priority = 1); \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_f, .prog_fd = __fd); \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_hp, .handle = 1, .priority = 1); \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_hf, .handle = 1, .prog_fd = __fd); \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_pf, .priority = 1, .prog_fd = __fd); \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_hpf, .handle = 1, .priority = 1, .prog_fd = __fd); \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_hpi, .handle = 1, .priority = 1, .prog_id = 42); \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_hpr, .handle = 1, .priority = 1, \
+ .flags = BPF_TC_F_REPLACE); \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_hpfi, .handle = 1, .priority = 1, .prog_fd = __fd, \
+ .prog_id = 42); \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_prio_max, .handle = 1, .priority = UINT16_MAX + 1);
+
+static int test_tc_bpf_basic(const struct bpf_tc_hook *hook, int fd)
+{
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .handle = 1, .priority = 1, .prog_fd = fd);
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
+ int ret;
+
+ ret = bpf_obj_get_info_by_fd(fd, &info, &info_len);
+ if (!ASSERT_OK(ret, "bpf_obj_get_info_by_fd"))
+ return ret;
+
+ ret = bpf_tc_attach(hook, &opts);
+ if (!ASSERT_OK(ret, "bpf_tc_attach"))
+ return ret;
+
+ if (!ASSERT_EQ(opts.handle, 1, "handle set") ||
+ !ASSERT_EQ(opts.priority, 1, "priority set") ||
+ !ASSERT_EQ(opts.prog_id, info.id, "prog_id set"))
+ goto end;
+
+ opts.prog_id = 0;
+ opts.flags = BPF_TC_F_REPLACE;
+ ret = bpf_tc_attach(hook, &opts);
+ if (!ASSERT_OK(ret, "bpf_tc_attach replace mode"))
+ goto end;
+
+ opts.flags = opts.prog_fd = opts.prog_id = 0;
+ ret = bpf_tc_query(hook, &opts);
+ if (!ASSERT_OK(ret, "bpf_tc_query"))
+ goto end;
+
+ if (!ASSERT_EQ(opts.handle, 1, "handle set") ||
+ !ASSERT_EQ(opts.priority, 1, "priority set") ||
+ !ASSERT_EQ(opts.prog_id, info.id, "prog_id set"))
+ goto end;
+
+end:
+ opts.flags = opts.prog_fd = opts.prog_id = 0;
+ ret = bpf_tc_detach(hook, &opts);
+ ASSERT_OK(ret, "bpf_tc_detach");
+ return ret;
+}
+
+static int test_tc_bpf_api(struct bpf_tc_hook *hook, int fd)
+{
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, attach_opts, .handle = 1, .priority = 1, .prog_fd = fd);
+ DECLARE_LIBBPF_OPTS(bpf_tc_hook, inv_hook, .attach_point = BPF_TC_INGRESS);
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .handle = 1, .priority = 1);
+ int ret;
+
+ ret = bpf_tc_hook_create(NULL);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_create invalid hook = NULL"))
+ return -EINVAL;
+
+ /* hook ifindex = 0 */
+ ret = bpf_tc_hook_create(&inv_hook);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_create invalid hook ifindex == 0"))
+ return -EINVAL;
+
+ ret = bpf_tc_hook_destroy(&inv_hook);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_destroy invalid hook ifindex == 0"))
+ return -EINVAL;
+
+ ret = bpf_tc_attach(&inv_hook, &attach_opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid hook ifindex == 0"))
+ return -EINVAL;
+ attach_opts.prog_id = 0;
+
+ ret = bpf_tc_detach(&inv_hook, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid hook ifindex == 0"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(&inv_hook, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid hook ifindex == 0"))
+ return -EINVAL;
+
+ /* hook ifindex < 0 */
+ inv_hook.ifindex = -1;
+
+ ret = bpf_tc_hook_create(&inv_hook);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_create invalid hook ifindex < 0"))
+ return -EINVAL;
+
+ ret = bpf_tc_hook_destroy(&inv_hook);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_destroy invalid hook ifindex < 0"))
+ return -EINVAL;
+
+ ret = bpf_tc_attach(&inv_hook, &attach_opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid hook ifindex < 0"))
+ return -EINVAL;
+ attach_opts.prog_id = 0;
+
+ ret = bpf_tc_detach(&inv_hook, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid hook ifindex < 0"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(&inv_hook, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid hook ifindex < 0"))
+ return -EINVAL;
+
+ inv_hook.ifindex = LO_IFINDEX;
+
+ /* hook.attach_point invalid */
+ inv_hook.attach_point = 0xabcd;
+ ret = bpf_tc_hook_create(&inv_hook);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_create invalid hook.attach_point"))
+ return -EINVAL;
+
+ ret = bpf_tc_hook_destroy(&inv_hook);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_destroy invalid hook.attach_point"))
+ return -EINVAL;
+
+ ret = bpf_tc_attach(&inv_hook, &attach_opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid hook.attach_point"))
+ return -EINVAL;
+
+ ret = bpf_tc_detach(&inv_hook, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid hook.attach_point"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(&inv_hook, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid hook.attach_point"))
+ return -EINVAL;
+
+ inv_hook.attach_point = BPF_TC_INGRESS;
+
+ /* hook.attach_point valid, but parent invalid */
+ inv_hook.parent = TC_H_MAKE(1UL << 16, 10);
+ ret = bpf_tc_hook_create(&inv_hook);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_create invalid hook parent"))
+ return -EINVAL;
+
+ ret = bpf_tc_hook_destroy(&inv_hook);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_destroy invalid hook parent"))
+ return -EINVAL;
+
+ ret = bpf_tc_attach(&inv_hook, &attach_opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid hook parent"))
+ return -EINVAL;
+
+ ret = bpf_tc_detach(&inv_hook, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid hook parent"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(&inv_hook, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid hook parent"))
+ return -EINVAL;
+
+ inv_hook.attach_point = BPF_TC_CUSTOM;
+ inv_hook.parent = 0;
+ /* These return EOPNOTSUPP instead of EINVAL as parent is checked after
+ * attach_point of the hook.
+ */
+ ret = bpf_tc_hook_create(&inv_hook);
+ if (!ASSERT_EQ(ret, -EOPNOTSUPP, "bpf_tc_hook_create invalid hook parent"))
+ return -EINVAL;
+
+ ret = bpf_tc_hook_destroy(&inv_hook);
+ if (!ASSERT_EQ(ret, -EOPNOTSUPP, "bpf_tc_hook_destroy invalid hook parent"))
+ return -EINVAL;
+
+ ret = bpf_tc_attach(&inv_hook, &attach_opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid hook parent"))
+ return -EINVAL;
+
+ ret = bpf_tc_detach(&inv_hook, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid hook parent"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(&inv_hook, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid hook parent"))
+ return -EINVAL;
+
+ inv_hook.attach_point = BPF_TC_INGRESS;
+
+ /* detach */
+ {
+ TEST_DECLARE_OPTS(fd);
+
+ ret = bpf_tc_detach(NULL, &opts_hp);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid hook = NULL"))
+ return -EINVAL;
+
+ ret = bpf_tc_detach(hook, NULL);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid opts = NULL"))
+ return -EINVAL;
+
+ ret = bpf_tc_detach(hook, &opts_hpr);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid flags set"))
+ return -EINVAL;
+
+ ret = bpf_tc_detach(hook, &opts_hpf);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid prog_fd set"))
+ return -EINVAL;
+
+ ret = bpf_tc_detach(hook, &opts_hpi);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid prog_id set"))
+ return -EINVAL;
+
+ ret = bpf_tc_detach(hook, &opts_p);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid handle unset"))
+ return -EINVAL;
+
+ ret = bpf_tc_detach(hook, &opts_h);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid priority unset"))
+ return -EINVAL;
+
+ ret = bpf_tc_detach(hook, &opts_prio_max);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid priority > UINT16_MAX"))
+ return -EINVAL;
+ }
+
+ /* query */
+ {
+ TEST_DECLARE_OPTS(fd);
+
+ ret = bpf_tc_query(NULL, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid hook = NULL"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(hook, NULL);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid opts = NULL"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(hook, &opts_hpr);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid flags set"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(hook, &opts_hpf);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid prog_fd set"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(hook, &opts_hpi);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid prog_id set"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(hook, &opts_p);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid handle unset"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(hook, &opts_h);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid priority unset"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(hook, &opts_prio_max);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid priority > UINT16_MAX"))
+ return -EINVAL;
+
+ /* when chain is not present, kernel returns -EINVAL */
+ ret = bpf_tc_query(hook, &opts_hp);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query valid handle, priority set"))
+ return -EINVAL;
+ }
+
+ /* attach */
+ {
+ TEST_DECLARE_OPTS(fd);
+
+ ret = bpf_tc_attach(NULL, &opts_hp);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid hook = NULL"))
+ return -EINVAL;
+
+ ret = bpf_tc_attach(hook, NULL);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid opts = NULL"))
+ return -EINVAL;
+
+ opts_hp.flags = 42;
+ ret = bpf_tc_attach(hook, &opts_hp);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid flags"))
+ return -EINVAL;
+
+ ret = bpf_tc_attach(hook, NULL);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid prog_fd unset"))
+ return -EINVAL;
+
+ ret = bpf_tc_attach(hook, &opts_hpi);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid prog_id set"))
+ return -EINVAL;
+
+ ret = bpf_tc_attach(hook, &opts_pf);
+ if (!ASSERT_OK(ret, "bpf_tc_attach valid handle unset"))
+ return -EINVAL;
+ opts_pf.prog_fd = opts_pf.prog_id = 0;
+ ASSERT_OK(bpf_tc_detach(hook, &opts_pf), "bpf_tc_detach");
+
+ ret = bpf_tc_attach(hook, &opts_hf);
+ if (!ASSERT_OK(ret, "bpf_tc_attach valid priority unset"))
+ return -EINVAL;
+ opts_hf.prog_fd = opts_hf.prog_id = 0;
+ ASSERT_OK(bpf_tc_detach(hook, &opts_hf), "bpf_tc_detach");
+
+ ret = bpf_tc_attach(hook, &opts_prio_max);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid priority > UINT16_MAX"))
+ return -EINVAL;
+
+ ret = bpf_tc_attach(hook, &opts_f);
+ if (!ASSERT_OK(ret, "bpf_tc_attach valid both handle and priority unset"))
+ return -EINVAL;
+ opts_f.prog_fd = opts_f.prog_id = 0;
+ ASSERT_OK(bpf_tc_detach(hook, &opts_f), "bpf_tc_detach");
+ }
+
+ return 0;
+}
+
+void test_tc_bpf(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = LO_IFINDEX,
+ .attach_point = BPF_TC_INGRESS);
+ struct test_tc_bpf *skel = NULL;
+ bool hook_created = false;
+ int cls_fd, ret;
+
+ skel = test_tc_bpf__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_tc_bpf__open_and_load"))
+ return;
+
+ cls_fd = bpf_program__fd(skel->progs.cls);
+
+ ret = bpf_tc_hook_create(&hook);
+ if (ret == 0)
+ hook_created = true;
+
+ ret = ret == -EEXIST ? 0 : ret;
+ if (!ASSERT_OK(ret, "bpf_tc_hook_create(BPF_TC_INGRESS)"))
+ goto end;
+
+ hook.attach_point = BPF_TC_CUSTOM;
+ hook.parent = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS);
+ ret = bpf_tc_hook_create(&hook);
+ if (!ASSERT_EQ(ret, -EOPNOTSUPP, "bpf_tc_hook_create invalid hook.attach_point"))
+ goto end;
+
+ ret = test_tc_bpf_basic(&hook, cls_fd);
+ if (!ASSERT_OK(ret, "test_tc_internal ingress"))
+ goto end;
+
+ ret = bpf_tc_hook_destroy(&hook);
+ if (!ASSERT_EQ(ret, -EOPNOTSUPP, "bpf_tc_hook_destroy invalid hook.attach_point"))
+ goto end;
+
+ hook.attach_point = BPF_TC_INGRESS;
+ hook.parent = 0;
+ bpf_tc_hook_destroy(&hook);
+
+ ret = test_tc_bpf_basic(&hook, cls_fd);
+ if (!ASSERT_OK(ret, "test_tc_internal ingress"))
+ goto end;
+
+ bpf_tc_hook_destroy(&hook);
+
+ hook.attach_point = BPF_TC_EGRESS;
+ ret = test_tc_bpf_basic(&hook, cls_fd);
+ if (!ASSERT_OK(ret, "test_tc_internal egress"))
+ goto end;
+
+ bpf_tc_hook_destroy(&hook);
+
+ ret = test_tc_bpf_api(&hook, cls_fd);
+ if (!ASSERT_OK(ret, "test_tc_bpf_api"))
+ goto end;
+
+ bpf_tc_hook_destroy(&hook);
+
+end:
+ if (hook_created) {
+ hook.attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS;
+ bpf_tc_hook_destroy(&hook);
+ }
+ test_tc_bpf__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
new file mode 100644
index 000000000000..5703c918812b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -0,0 +1,785 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/*
+ * This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link
+ * between src and dst. The netns fwd has veth links to each src and dst. The
+ * client is in src and server in dst. The test installs a TC BPF program to each
+ * host facing veth in fwd which calls into i) bpf_redirect_neigh() to perform the
+ * neigh addr population and redirect or ii) bpf_redirect_peer() for namespace
+ * switch from ingress side; it also installs a checker prog on the egress side
+ * to drop unexpected traffic.
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <linux/limits.h>
+#include <linux/sysctl.h>
+#include <linux/if_tun.h>
+#include <linux/if.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+
+#include "test_progs.h"
+#include "network_helpers.h"
+#include "test_tc_neigh_fib.skel.h"
+#include "test_tc_neigh.skel.h"
+#include "test_tc_peer.skel.h"
+
+#define NS_SRC "ns_src"
+#define NS_FWD "ns_fwd"
+#define NS_DST "ns_dst"
+
+#define IP4_SRC "172.16.1.100"
+#define IP4_DST "172.16.2.100"
+#define IP4_TUN_SRC "172.17.1.100"
+#define IP4_TUN_FWD "172.17.1.200"
+#define IP4_PORT 9004
+
+#define IP6_SRC "0::1:dead:beef:cafe"
+#define IP6_DST "0::2:dead:beef:cafe"
+#define IP6_TUN_SRC "1::1:dead:beef:cafe"
+#define IP6_TUN_FWD "1::2:dead:beef:cafe"
+#define IP6_PORT 9006
+
+#define IP4_SLL "169.254.0.1"
+#define IP4_DLL "169.254.0.2"
+#define IP4_NET "169.254.0.0"
+
+#define MAC_DST_FWD "00:11:22:33:44:55"
+#define MAC_DST "00:22:33:44:55:66"
+
+#define IFADDR_STR_LEN 18
+#define PING_ARGS "-i 0.2 -c 3 -w 10 -q"
+
+#define SRC_PROG_PIN_FILE "/sys/fs/bpf/test_tc_src"
+#define DST_PROG_PIN_FILE "/sys/fs/bpf/test_tc_dst"
+#define CHK_PROG_PIN_FILE "/sys/fs/bpf/test_tc_chk"
+
+#define TIMEOUT_MILLIS 10000
+
+#define log_err(MSG, ...) \
+ fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
+ __FILE__, __LINE__, strerror(errno), ##__VA_ARGS__)
+
+static const char * const namespaces[] = {NS_SRC, NS_FWD, NS_DST, NULL};
+
+static int write_file(const char *path, const char *newval)
+{
+ FILE *f;
+
+ f = fopen(path, "r+");
+ if (!f)
+ return -1;
+ if (fwrite(newval, strlen(newval), 1, f) != 1) {
+ log_err("writing to %s failed", path);
+ fclose(f);
+ return -1;
+ }
+ fclose(f);
+ return 0;
+}
+
+struct nstoken {
+ int orig_netns_fd;
+};
+
+static int setns_by_fd(int nsfd)
+{
+ int err;
+
+ err = setns(nsfd, CLONE_NEWNET);
+ close(nsfd);
+
+ if (!ASSERT_OK(err, "setns"))
+ return err;
+
+ /* Switch /sys to the new namespace so that e.g. /sys/class/net
+ * reflects the devices in the new namespace.
+ */
+ err = unshare(CLONE_NEWNS);
+ if (!ASSERT_OK(err, "unshare"))
+ return err;
+
+ err = umount2("/sys", MNT_DETACH);
+ if (!ASSERT_OK(err, "umount2 /sys"))
+ return err;
+
+ err = mount("sysfs", "/sys", "sysfs", 0, NULL);
+ if (!ASSERT_OK(err, "mount /sys"))
+ return err;
+
+ err = mount("bpffs", "/sys/fs/bpf", "bpf", 0, NULL);
+ if (!ASSERT_OK(err, "mount /sys/fs/bpf"))
+ return err;
+
+ return 0;
+}
+
+/**
+ * open_netns() - Switch to specified network namespace by name.
+ *
+ * Returns token with which to restore the original namespace
+ * using close_netns().
+ */
+static struct nstoken *open_netns(const char *name)
+{
+ int nsfd;
+ char nspath[PATH_MAX];
+ int err;
+ struct nstoken *token;
+
+ token = malloc(sizeof(struct nstoken));
+ if (!ASSERT_OK_PTR(token, "malloc token"))
+ return NULL;
+
+ token->orig_netns_fd = open("/proc/self/ns/net", O_RDONLY);
+ if (!ASSERT_GE(token->orig_netns_fd, 0, "open /proc/self/ns/net"))
+ goto fail;
+
+ snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name);
+ nsfd = open(nspath, O_RDONLY | O_CLOEXEC);
+ if (!ASSERT_GE(nsfd, 0, "open netns fd"))
+ goto fail;
+
+ err = setns_by_fd(nsfd);
+ if (!ASSERT_OK(err, "setns_by_fd"))
+ goto fail;
+
+ return token;
+fail:
+ free(token);
+ return NULL;
+}
+
+static void close_netns(struct nstoken *token)
+{
+ ASSERT_OK(setns_by_fd(token->orig_netns_fd), "setns_by_fd");
+ free(token);
+}
+
+static int netns_setup_namespaces(const char *verb)
+{
+ const char * const *ns = namespaces;
+ char cmd[128];
+
+ while (*ns) {
+ snprintf(cmd, sizeof(cmd), "ip netns %s %s", verb, *ns);
+ if (!ASSERT_OK(system(cmd), cmd))
+ return -1;
+ ns++;
+ }
+ return 0;
+}
+
+struct netns_setup_result {
+ int ifindex_veth_src_fwd;
+ int ifindex_veth_dst_fwd;
+};
+
+static int get_ifaddr(const char *name, char *ifaddr)
+{
+ char path[PATH_MAX];
+ FILE *f;
+ int ret;
+
+ snprintf(path, PATH_MAX, "/sys/class/net/%s/address", name);
+ f = fopen(path, "r");
+ if (!ASSERT_OK_PTR(f, path))
+ return -1;
+
+ ret = fread(ifaddr, 1, IFADDR_STR_LEN, f);
+ if (!ASSERT_EQ(ret, IFADDR_STR_LEN, "fread ifaddr")) {
+ fclose(f);
+ return -1;
+ }
+ fclose(f);
+ return 0;
+}
+
+static int get_ifindex(const char *name)
+{
+ char path[PATH_MAX];
+ char buf[32];
+ FILE *f;
+ int ret;
+
+ snprintf(path, PATH_MAX, "/sys/class/net/%s/ifindex", name);
+ f = fopen(path, "r");
+ if (!ASSERT_OK_PTR(f, path))
+ return -1;
+
+ ret = fread(buf, 1, sizeof(buf), f);
+ if (!ASSERT_GT(ret, 0, "fread ifindex")) {
+ fclose(f);
+ return -1;
+ }
+ fclose(f);
+ return atoi(buf);
+}
+
+#define SYS(fmt, ...) \
+ ({ \
+ char cmd[1024]; \
+ snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
+ if (!ASSERT_OK(system(cmd), cmd)) \
+ goto fail; \
+ })
+
+static int netns_setup_links_and_routes(struct netns_setup_result *result)
+{
+ struct nstoken *nstoken = NULL;
+ char veth_src_fwd_addr[IFADDR_STR_LEN+1] = {};
+
+ SYS("ip link add veth_src type veth peer name veth_src_fwd");
+ SYS("ip link add veth_dst type veth peer name veth_dst_fwd");
+
+ SYS("ip link set veth_dst_fwd address " MAC_DST_FWD);
+ SYS("ip link set veth_dst address " MAC_DST);
+
+ if (get_ifaddr("veth_src_fwd", veth_src_fwd_addr))
+ goto fail;
+
+ result->ifindex_veth_src_fwd = get_ifindex("veth_src_fwd");
+ if (result->ifindex_veth_src_fwd < 0)
+ goto fail;
+ result->ifindex_veth_dst_fwd = get_ifindex("veth_dst_fwd");
+ if (result->ifindex_veth_dst_fwd < 0)
+ goto fail;
+
+ SYS("ip link set veth_src netns " NS_SRC);
+ SYS("ip link set veth_src_fwd netns " NS_FWD);
+ SYS("ip link set veth_dst_fwd netns " NS_FWD);
+ SYS("ip link set veth_dst netns " NS_DST);
+
+ /** setup in 'src' namespace */
+ nstoken = open_netns(NS_SRC);
+ if (!ASSERT_OK_PTR(nstoken, "setns src"))
+ goto fail;
+
+ SYS("ip addr add " IP4_SRC "/32 dev veth_src");
+ SYS("ip addr add " IP6_SRC "/128 dev veth_src nodad");
+ SYS("ip link set dev veth_src up");
+
+ SYS("ip route add " IP4_DST "/32 dev veth_src scope global");
+ SYS("ip route add " IP4_NET "/16 dev veth_src scope global");
+ SYS("ip route add " IP6_DST "/128 dev veth_src scope global");
+
+ SYS("ip neigh add " IP4_DST " dev veth_src lladdr %s",
+ veth_src_fwd_addr);
+ SYS("ip neigh add " IP6_DST " dev veth_src lladdr %s",
+ veth_src_fwd_addr);
+
+ close_netns(nstoken);
+
+ /** setup in 'fwd' namespace */
+ nstoken = open_netns(NS_FWD);
+ if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
+ goto fail;
+
+ /* The fwd netns automatically gets a v6 LL address / routes, but also
+ * needs v4 one in order to start ARP probing. IP4_NET route is added
+ * to the endpoints so that the ARP processing will reply.
+ */
+ SYS("ip addr add " IP4_SLL "/32 dev veth_src_fwd");
+ SYS("ip addr add " IP4_DLL "/32 dev veth_dst_fwd");
+ SYS("ip link set dev veth_src_fwd up");
+ SYS("ip link set dev veth_dst_fwd up");
+
+ SYS("ip route add " IP4_SRC "/32 dev veth_src_fwd scope global");
+ SYS("ip route add " IP6_SRC "/128 dev veth_src_fwd scope global");
+ SYS("ip route add " IP4_DST "/32 dev veth_dst_fwd scope global");
+ SYS("ip route add " IP6_DST "/128 dev veth_dst_fwd scope global");
+
+ close_netns(nstoken);
+
+ /** setup in 'dst' namespace */
+ nstoken = open_netns(NS_DST);
+ if (!ASSERT_OK_PTR(nstoken, "setns dst"))
+ goto fail;
+
+ SYS("ip addr add " IP4_DST "/32 dev veth_dst");
+ SYS("ip addr add " IP6_DST "/128 dev veth_dst nodad");
+ SYS("ip link set dev veth_dst up");
+
+ SYS("ip route add " IP4_SRC "/32 dev veth_dst scope global");
+ SYS("ip route add " IP4_NET "/16 dev veth_dst scope global");
+ SYS("ip route add " IP6_SRC "/128 dev veth_dst scope global");
+
+ SYS("ip neigh add " IP4_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+ SYS("ip neigh add " IP6_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+
+ close_netns(nstoken);
+
+ return 0;
+fail:
+ if (nstoken)
+ close_netns(nstoken);
+ return -1;
+}
+
+static int netns_load_bpf(void)
+{
+ SYS("tc qdisc add dev veth_src_fwd clsact");
+ SYS("tc filter add dev veth_src_fwd ingress bpf da object-pinned "
+ SRC_PROG_PIN_FILE);
+ SYS("tc filter add dev veth_src_fwd egress bpf da object-pinned "
+ CHK_PROG_PIN_FILE);
+
+ SYS("tc qdisc add dev veth_dst_fwd clsact");
+ SYS("tc filter add dev veth_dst_fwd ingress bpf da object-pinned "
+ DST_PROG_PIN_FILE);
+ SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned "
+ CHK_PROG_PIN_FILE);
+
+ return 0;
+fail:
+ return -1;
+}
+
+static void test_tcp(int family, const char *addr, __u16 port)
+{
+ int listen_fd = -1, accept_fd = -1, client_fd = -1;
+ char buf[] = "testing testing";
+ int n;
+ struct nstoken *nstoken;
+
+ nstoken = open_netns(NS_DST);
+ if (!ASSERT_OK_PTR(nstoken, "setns dst"))
+ return;
+
+ listen_fd = start_server(family, SOCK_STREAM, addr, port, 0);
+ if (!ASSERT_GE(listen_fd, 0, "listen"))
+ goto done;
+
+ close_netns(nstoken);
+ nstoken = open_netns(NS_SRC);
+ if (!ASSERT_OK_PTR(nstoken, "setns src"))
+ goto done;
+
+ client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS);
+ if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
+ goto done;
+
+ accept_fd = accept(listen_fd, NULL, NULL);
+ if (!ASSERT_GE(accept_fd, 0, "accept"))
+ goto done;
+
+ if (!ASSERT_OK(settimeo(accept_fd, TIMEOUT_MILLIS), "settimeo"))
+ goto done;
+
+ n = write(client_fd, buf, sizeof(buf));
+ if (!ASSERT_EQ(n, sizeof(buf), "send to server"))
+ goto done;
+
+ n = read(accept_fd, buf, sizeof(buf));
+ ASSERT_EQ(n, sizeof(buf), "recv from server");
+
+done:
+ if (nstoken)
+ close_netns(nstoken);
+ if (listen_fd >= 0)
+ close(listen_fd);
+ if (accept_fd >= 0)
+ close(accept_fd);
+ if (client_fd >= 0)
+ close(client_fd);
+}
+
+static int test_ping(int family, const char *addr)
+{
+ const char *ping = family == AF_INET6 ? "ping6" : "ping";
+
+ SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping, addr);
+ return 0;
+fail:
+ return -1;
+}
+
+static void test_connectivity(void)
+{
+ test_tcp(AF_INET, IP4_DST, IP4_PORT);
+ test_ping(AF_INET, IP4_DST);
+ test_tcp(AF_INET6, IP6_DST, IP6_PORT);
+ test_ping(AF_INET6, IP6_DST);
+}
+
+static int set_forwarding(bool enable)
+{
+ int err;
+
+ err = write_file("/proc/sys/net/ipv4/ip_forward", enable ? "1" : "0");
+ if (!ASSERT_OK(err, "set ipv4.ip_forward=0"))
+ return err;
+
+ err = write_file("/proc/sys/net/ipv6/conf/all/forwarding", enable ? "1" : "0");
+ if (!ASSERT_OK(err, "set ipv6.forwarding=0"))
+ return err;
+
+ return 0;
+}
+
+static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result)
+{
+ struct nstoken *nstoken = NULL;
+ struct test_tc_neigh_fib *skel = NULL;
+ int err;
+
+ nstoken = open_netns(NS_FWD);
+ if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
+ return;
+
+ skel = test_tc_neigh_fib__open();
+ if (!ASSERT_OK_PTR(skel, "test_tc_neigh_fib__open"))
+ goto done;
+
+ if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load"))
+ goto done;
+
+ err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
+ if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
+ goto done;
+
+ err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
+ if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
+ goto done;
+
+ err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
+ if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
+ goto done;
+
+ if (netns_load_bpf())
+ goto done;
+
+ /* bpf_fib_lookup() checks if forwarding is enabled */
+ if (!ASSERT_OK(set_forwarding(true), "enable forwarding"))
+ goto done;
+
+ test_connectivity();
+
+done:
+ if (skel)
+ test_tc_neigh_fib__destroy(skel);
+ close_netns(nstoken);
+}
+
+static void test_tc_redirect_neigh(struct netns_setup_result *setup_result)
+{
+ struct nstoken *nstoken = NULL;
+ struct test_tc_neigh *skel = NULL;
+ int err;
+
+ nstoken = open_netns(NS_FWD);
+ if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
+ return;
+
+ skel = test_tc_neigh__open();
+ if (!ASSERT_OK_PTR(skel, "test_tc_neigh__open"))
+ goto done;
+
+ skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
+ skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
+
+ err = test_tc_neigh__load(skel);
+ if (!ASSERT_OK(err, "test_tc_neigh__load"))
+ goto done;
+
+ err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
+ if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
+ goto done;
+
+ err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
+ if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
+ goto done;
+
+ err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
+ if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
+ goto done;
+
+ if (netns_load_bpf())
+ goto done;
+
+ if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
+ goto done;
+
+ test_connectivity();
+
+done:
+ if (skel)
+ test_tc_neigh__destroy(skel);
+ close_netns(nstoken);
+}
+
+static void test_tc_redirect_peer(struct netns_setup_result *setup_result)
+{
+ struct nstoken *nstoken;
+ struct test_tc_peer *skel;
+ int err;
+
+ nstoken = open_netns(NS_FWD);
+ if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
+ return;
+
+ skel = test_tc_peer__open();
+ if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
+ goto done;
+
+ skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
+ skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
+
+ err = test_tc_peer__load(skel);
+ if (!ASSERT_OK(err, "test_tc_peer__load"))
+ goto done;
+
+ err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
+ if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
+ goto done;
+
+ err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
+ if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
+ goto done;
+
+ err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
+ if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
+ goto done;
+
+ if (netns_load_bpf())
+ goto done;
+
+ if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
+ goto done;
+
+ test_connectivity();
+
+done:
+ if (skel)
+ test_tc_peer__destroy(skel);
+ close_netns(nstoken);
+}
+
+static int tun_open(char *name)
+{
+ struct ifreq ifr;
+ int fd, err;
+
+ fd = open("/dev/net/tun", O_RDWR);
+ if (!ASSERT_GE(fd, 0, "open /dev/net/tun"))
+ return -1;
+
+ memset(&ifr, 0, sizeof(ifr));
+
+ ifr.ifr_flags = IFF_TUN | IFF_NO_PI;
+ if (*name)
+ strncpy(ifr.ifr_name, name, IFNAMSIZ);
+
+ err = ioctl(fd, TUNSETIFF, &ifr);
+ if (!ASSERT_OK(err, "ioctl TUNSETIFF"))
+ goto fail;
+
+ SYS("ip link set dev %s up", name);
+
+ return fd;
+fail:
+ close(fd);
+ return -1;
+}
+
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+enum {
+ SRC_TO_TARGET = 0,
+ TARGET_TO_SRC = 1,
+};
+
+static int tun_relay_loop(int src_fd, int target_fd)
+{
+ fd_set rfds, wfds;
+
+ FD_ZERO(&rfds);
+ FD_ZERO(&wfds);
+
+ for (;;) {
+ char buf[1500];
+ int direction, nread, nwrite;
+
+ FD_SET(src_fd, &rfds);
+ FD_SET(target_fd, &rfds);
+
+ if (select(1 + MAX(src_fd, target_fd), &rfds, NULL, NULL, NULL) < 0) {
+ log_err("select failed");
+ return 1;
+ }
+
+ direction = FD_ISSET(src_fd, &rfds) ? SRC_TO_TARGET : TARGET_TO_SRC;
+
+ nread = read(direction == SRC_TO_TARGET ? src_fd : target_fd, buf, sizeof(buf));
+ if (nread < 0) {
+ log_err("read failed");
+ return 1;
+ }
+
+ nwrite = write(direction == SRC_TO_TARGET ? target_fd : src_fd, buf, nread);
+ if (nwrite != nread) {
+ log_err("write failed");
+ return 1;
+ }
+ }
+}
+
+static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
+{
+ struct test_tc_peer *skel = NULL;
+ struct nstoken *nstoken = NULL;
+ int err;
+ int tunnel_pid = -1;
+ int src_fd, target_fd;
+ int ifindex;
+
+ /* Start a L3 TUN/TAP tunnel between the src and dst namespaces.
+ * This test is using TUN/TAP instead of e.g. IPIP or GRE tunnel as those
+ * expose the L2 headers encapsulating the IP packet to BPF and hence
+ * don't have skb in suitable state for this test. Alternative to TUN/TAP
+ * would be e.g. Wireguard which would appear as a pure L3 device to BPF,
+ * but that requires much more complicated setup.
+ */
+ nstoken = open_netns(NS_SRC);
+ if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
+ return;
+
+ src_fd = tun_open("tun_src");
+ if (!ASSERT_GE(src_fd, 0, "tun_open tun_src"))
+ goto fail;
+
+ close_netns(nstoken);
+
+ nstoken = open_netns(NS_FWD);
+ if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
+ goto fail;
+
+ target_fd = tun_open("tun_fwd");
+ if (!ASSERT_GE(target_fd, 0, "tun_open tun_fwd"))
+ goto fail;
+
+ tunnel_pid = fork();
+ if (!ASSERT_GE(tunnel_pid, 0, "fork tun_relay_loop"))
+ goto fail;
+
+ if (tunnel_pid == 0)
+ exit(tun_relay_loop(src_fd, target_fd));
+
+ skel = test_tc_peer__open();
+ if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
+ goto fail;
+
+ ifindex = get_ifindex("tun_fwd");
+ if (!ASSERT_GE(ifindex, 0, "get_ifindex tun_fwd"))
+ goto fail;
+
+ skel->rodata->IFINDEX_SRC = ifindex;
+ skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
+
+ err = test_tc_peer__load(skel);
+ if (!ASSERT_OK(err, "test_tc_peer__load"))
+ goto fail;
+
+ err = bpf_program__pin(skel->progs.tc_src_l3, SRC_PROG_PIN_FILE);
+ if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
+ goto fail;
+
+ err = bpf_program__pin(skel->progs.tc_dst_l3, DST_PROG_PIN_FILE);
+ if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
+ goto fail;
+
+ err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
+ if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
+ goto fail;
+
+ /* Load "tc_src_l3" to the tun_fwd interface to redirect packets
+ * towards dst, and "tc_dst" to redirect packets
+ * and "tc_chk" on veth_dst_fwd to drop non-redirected packets.
+ */
+ SYS("tc qdisc add dev tun_fwd clsact");
+ SYS("tc filter add dev tun_fwd ingress bpf da object-pinned "
+ SRC_PROG_PIN_FILE);
+
+ SYS("tc qdisc add dev veth_dst_fwd clsact");
+ SYS("tc filter add dev veth_dst_fwd ingress bpf da object-pinned "
+ DST_PROG_PIN_FILE);
+ SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned "
+ CHK_PROG_PIN_FILE);
+
+ /* Setup route and neigh tables */
+ SYS("ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24");
+ SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP4_TUN_FWD "/24");
+
+ SYS("ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad");
+ SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad");
+
+ SYS("ip -netns " NS_SRC " route del " IP4_DST "/32 dev veth_src scope global");
+ SYS("ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD
+ " dev tun_src scope global");
+ SYS("ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev veth_dst scope global");
+ SYS("ip -netns " NS_SRC " route del " IP6_DST "/128 dev veth_src scope global");
+ SYS("ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD
+ " dev tun_src scope global");
+ SYS("ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev veth_dst scope global");
+
+ SYS("ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+ SYS("ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+
+ if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
+ goto fail;
+
+ test_connectivity();
+
+fail:
+ if (tunnel_pid > 0) {
+ kill(tunnel_pid, SIGTERM);
+ waitpid(tunnel_pid, NULL, 0);
+ }
+ if (src_fd >= 0)
+ close(src_fd);
+ if (target_fd >= 0)
+ close(target_fd);
+ if (skel)
+ test_tc_peer__destroy(skel);
+ if (nstoken)
+ close_netns(nstoken);
+}
+
+#define RUN_TEST(name) \
+ ({ \
+ struct netns_setup_result setup_result; \
+ if (test__start_subtest(#name)) \
+ if (ASSERT_OK(netns_setup_namespaces("add"), "setup namespaces")) { \
+ if (ASSERT_OK(netns_setup_links_and_routes(&setup_result), \
+ "setup links and routes")) \
+ test_ ## name(&setup_result); \
+ netns_setup_namespaces("delete"); \
+ } \
+ })
+
+static void *test_tc_redirect_run_tests(void *arg)
+{
+ RUN_TEST(tc_redirect_peer);
+ RUN_TEST(tc_redirect_peer_l3);
+ RUN_TEST(tc_redirect_neigh);
+ RUN_TEST(tc_redirect_neigh_fib);
+ return NULL;
+}
+
+void test_tc_redirect(void)
+{
+ pthread_t test_thread;
+ int err;
+
+ /* Run the tests in their own thread to isolate the namespace changes
+ * so they do not affect the environment of other tests.
+ * (specifically needed because of unshare(CLONE_NEWNS) in open_netns())
+ */
+ err = pthread_create(&test_thread, NULL, &test_tc_redirect_run_tests, NULL);
+ if (ASSERT_OK(err, "pthread_create"))
+ ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join");
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
index 08d19cafd5e8..1fa772079967 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
@@ -353,8 +353,7 @@ static void fastopen_estab(void)
return;
link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
- if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
- PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_cgroup(estab)"))
return;
if (sk_fds_connect(&sk_fds, true)) {
@@ -398,8 +397,7 @@ static void syncookie_estab(void)
return;
link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
- if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
- PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_cgroup(estab)"))
return;
if (sk_fds_connect(&sk_fds, false)) {
@@ -431,8 +429,7 @@ static void fin(void)
return;
link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
- if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
- PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_cgroup(estab)"))
return;
if (sk_fds_connect(&sk_fds, false)) {
@@ -471,8 +468,7 @@ static void __simple_estab(bool exprm)
return;
link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
- if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
- PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_cgroup(estab)"))
return;
if (sk_fds_connect(&sk_fds, false)) {
@@ -509,8 +505,7 @@ static void misc(void)
return;
link = bpf_program__attach_cgroup(misc_skel->progs.misc_estab, cg_fd);
- if (CHECK(IS_ERR(link), "attach_cgroup(misc_estab)", "err: %ld\n",
- PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_cgroup(misc_estab)"))
return;
if (sk_fds_connect(&sk_fds, false)) {
diff --git a/tools/testing/selftests/bpf/prog_tests/test_overhead.c b/tools/testing/selftests/bpf/prog_tests/test_overhead.c
index 9966685866fd..123c68c1917d 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_overhead.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_overhead.c
@@ -73,7 +73,7 @@ void test_test_overhead(void)
return;
obj = bpf_object__open_file("./test_overhead.o", NULL);
- if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj)))
+ if (!ASSERT_OK_PTR(obj, "obj_open_file"))
return;
kprobe_prog = bpf_object__find_program_by_title(obj, kprobe_name);
@@ -108,7 +108,7 @@ void test_test_overhead(void)
/* attach kprobe */
link = bpf_program__attach_kprobe(kprobe_prog, false /* retprobe */,
kprobe_func);
- if (CHECK(IS_ERR(link), "attach_kprobe", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_kprobe"))
goto cleanup;
test_run("kprobe");
bpf_link__destroy(link);
@@ -116,28 +116,28 @@ void test_test_overhead(void)
/* attach kretprobe */
link = bpf_program__attach_kprobe(kretprobe_prog, true /* retprobe */,
kprobe_func);
- if (CHECK(IS_ERR(link), "attach kretprobe", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_kretprobe"))
goto cleanup;
test_run("kretprobe");
bpf_link__destroy(link);
/* attach raw_tp */
link = bpf_program__attach_raw_tracepoint(raw_tp_prog, "task_rename");
- if (CHECK(IS_ERR(link), "attach fentry", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_raw_tp"))
goto cleanup;
test_run("raw_tp");
bpf_link__destroy(link);
/* attach fentry */
link = bpf_program__attach_trace(fentry_prog);
- if (CHECK(IS_ERR(link), "attach fentry", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_fentry"))
goto cleanup;
test_run("fentry");
bpf_link__destroy(link);
/* attach fexit */
link = bpf_program__attach_trace(fexit_prog);
- if (CHECK(IS_ERR(link), "attach fexit", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_fexit"))
goto cleanup;
test_run("fexit");
bpf_link__destroy(link);
diff --git a/tools/testing/selftests/bpf/prog_tests/trace_printk.c b/tools/testing/selftests/bpf/prog_tests/trace_printk.c
index 39b0decb1bb2..d39bc00feb45 100644
--- a/tools/testing/selftests/bpf/prog_tests/trace_printk.c
+++ b/tools/testing/selftests/bpf/prog_tests/trace_printk.c
@@ -3,7 +3,7 @@
#include <test_progs.h>
-#include "trace_printk.skel.h"
+#include "trace_printk.lskel.h"
#define TRACEBUF "/sys/kernel/debug/tracing/trace_pipe"
#define SEARCHMSG "testing,testing"
@@ -21,6 +21,9 @@ void test_trace_printk(void)
if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
return;
+ ASSERT_EQ(skel->rodata->fmt[0], 'T', "invalid printk fmt string");
+ skel->rodata->fmt[0] = 't';
+
err = trace_printk__load(skel);
if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err))
goto cleanup;
diff --git a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
index f3022d934e2d..d7f5a931d7f3 100644
--- a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
+++ b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
@@ -55,7 +55,7 @@ void test_trampoline_count(void)
/* attach 'allowed' trampoline programs */
for (i = 0; i < MAX_TRAMP_PROGS; i++) {
obj = bpf_object__open_file(object, NULL);
- if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) {
+ if (!ASSERT_OK_PTR(obj, "obj_open_file")) {
obj = NULL;
goto cleanup;
}
@@ -68,14 +68,14 @@ void test_trampoline_count(void)
if (rand() % 2) {
link = load(inst[i].obj, fentry_name);
- if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link))) {
+ if (!ASSERT_OK_PTR(link, "attach_prog")) {
link = NULL;
goto cleanup;
}
inst[i].link_fentry = link;
} else {
link = load(inst[i].obj, fexit_name);
- if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link))) {
+ if (!ASSERT_OK_PTR(link, "attach_prog")) {
link = NULL;
goto cleanup;
}
@@ -85,7 +85,7 @@ void test_trampoline_count(void)
/* and try 1 extra.. */
obj = bpf_object__open_file(object, NULL);
- if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) {
+ if (!ASSERT_OK_PTR(obj, "obj_open_file")) {
obj = NULL;
goto cleanup;
}
@@ -96,13 +96,15 @@ void test_trampoline_count(void)
/* ..that needs to fail */
link = load(obj, fentry_name);
- if (CHECK(!IS_ERR(link), "cannot attach over the limit", "err %ld\n", PTR_ERR(link))) {
+ err = libbpf_get_error(link);
+ if (!ASSERT_ERR_PTR(link, "cannot attach over the limit")) {
bpf_link__destroy(link);
goto cleanup_extra;
}
/* with E2BIG error */
- CHECK(PTR_ERR(link) != -E2BIG, "proper error check", "err %ld\n", PTR_ERR(link));
+ ASSERT_EQ(err, -E2BIG, "proper error check");
+ ASSERT_EQ(link, NULL, "ptr_is_null");
/* and finaly execute the probe */
if (CHECK_FAIL(prctl(PR_GET_NAME, comm, 0L, 0L, 0L)))
diff --git a/tools/testing/selftests/bpf/prog_tests/udp_limit.c b/tools/testing/selftests/bpf/prog_tests/udp_limit.c
index 2aba09d4d01b..56c9d6bd38a3 100644
--- a/tools/testing/selftests/bpf/prog_tests/udp_limit.c
+++ b/tools/testing/selftests/bpf/prog_tests/udp_limit.c
@@ -22,11 +22,10 @@ void test_udp_limit(void)
goto close_cgroup_fd;
skel->links.sock = bpf_program__attach_cgroup(skel->progs.sock, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.sock, "cg_attach_sock"))
+ goto close_skeleton;
skel->links.sock_release = bpf_program__attach_cgroup(skel->progs.sock_release, cgroup_fd);
- if (CHECK(IS_ERR(skel->links.sock) || IS_ERR(skel->links.sock_release),
- "cg-attach", "sock %ld sock_release %ld",
- PTR_ERR(skel->links.sock),
- PTR_ERR(skel->links.sock_release)))
+ if (!ASSERT_OK_PTR(skel->links.sock_release, "cg_attach_sock_release"))
goto close_skeleton;
/* BPF program enforces a single UDP socket per cgroup,
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
index 2c6c570b21f8..3bd5904b4db5 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
@@ -90,7 +90,7 @@ void test_xdp_bpf2bpf(void)
pb_opts.ctx = &passed;
pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map),
1, &pb_opts);
- if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
+ if (!ASSERT_OK_PTR(pb, "perf_buf__new"))
goto out;
/* Run test program */
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_link.c b/tools/testing/selftests/bpf/prog_tests/xdp_link.c
index 6f814999b395..46eed0a33c23 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_link.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_link.c
@@ -51,7 +51,7 @@ void test_xdp_link(void)
/* BPF link is not allowed to replace prog attachment */
link = bpf_program__attach_xdp(skel1->progs.xdp_handler, IFINDEX_LO);
- if (CHECK(!IS_ERR(link), "link_attach_fail", "unexpected success\n")) {
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
bpf_link__destroy(link);
/* best-effort detach prog */
opts.old_fd = prog_fd1;
@@ -67,7 +67,7 @@ void test_xdp_link(void)
/* now BPF link should attach successfully */
link = bpf_program__attach_xdp(skel1->progs.xdp_handler, IFINDEX_LO);
- if (CHECK(IS_ERR(link), "link_attach", "failed: %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "link_attach"))
goto cleanup;
skel1->links.xdp_handler = link;
@@ -95,7 +95,7 @@ void test_xdp_link(void)
/* BPF link is not allowed to replace another BPF link */
link = bpf_program__attach_xdp(skel2->progs.xdp_handler, IFINDEX_LO);
- if (CHECK(!IS_ERR(link), "link_attach_fail", "unexpected success\n")) {
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
bpf_link__destroy(link);
goto cleanup;
}
@@ -105,7 +105,7 @@ void test_xdp_link(void)
/* new link attach should succeed */
link = bpf_program__attach_xdp(skel2->progs.xdp_handler, IFINDEX_LO);
- if (CHECK(IS_ERR(link), "link_attach", "failed: %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "link_attach"))
goto cleanup;
skel2->links.xdp_handler = link;
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c
index 6dfce3fd68bc..0aa3cd34cbe3 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c
@@ -2,7 +2,6 @@
/* Copyright (c) 2020 Facebook */
#include "bpf_iter.h"
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
index b83b5d2e17dc..6c39e86b666f 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
@@ -2,7 +2,6 @@
/* Copyright (c) 2020 Facebook */
#include "bpf_iter.h"
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
index d58d9f1642b5..784a610ce039 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
@@ -3,7 +3,6 @@
#include "bpf_iter.h"
#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
index 95989f4c99b5..a28e51e2dcee 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
@@ -3,7 +3,6 @@
#include "bpf_iter.h"
#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task.c b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
index b7f32c160f4e..c86b93f33b32 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
@@ -2,7 +2,6 @@
/* Copyright (c) 2020 Facebook */
#include "bpf_iter.h"
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c
index a1ddc36f13ec..bca8b889cb10 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c
@@ -2,7 +2,6 @@
/* Copyright (c) 2020, Oracle and/or its affiliates. */
#include "bpf_iter.h"
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
#include <bpf/bpf_core_read.h>
#include <errno.h>
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
index b2f7c7c5f952..6e7b400888fe 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
@@ -2,7 +2,6 @@
/* Copyright (c) 2020 Facebook */
#include "bpf_iter.h"
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
index 43c36f5f7649..f2b8167b72a8 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
@@ -2,7 +2,6 @@
/* Copyright (c) 2020 Facebook */
#include "bpf_iter.h"
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c
index 11d1aa37cf11..4ea6a37d1345 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c
@@ -2,7 +2,6 @@
/* Copyright (c) 2020 Facebook */
#include "bpf_iter.h"
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
index 54380c5e1069..2e4775c35414 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
@@ -3,7 +3,6 @@
#include "bpf_iter.h"
#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
#include <bpf/bpf_endian.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
index b4fbddfa4e10..943f7bba180e 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
@@ -3,7 +3,6 @@
#include "bpf_iter.h"
#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
#include <bpf/bpf_endian.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c
index ee49493dc125..400fdf8d6233 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c
@@ -9,8 +9,8 @@ __u32 map1_id = 0, map2_id = 0;
__u32 map1_accessed = 0, map2_accessed = 0;
__u64 map1_seqnum = 0, map2_seqnum1 = 0, map2_seqnum2 = 0;
-static volatile const __u32 print_len;
-static volatile const __u32 ret1;
+volatile const __u32 print_len;
+volatile const __u32 ret1;
SEC("iter/bpf_map")
int dump_bpf_map(struct bpf_iter__bpf_map *ctx)
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
index f258583afbbd..cf0c485b1ed7 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
@@ -3,7 +3,6 @@
#include "bpf_iter.h"
#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
#include <bpf/bpf_endian.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
index 65f93bb03f0f..5031e21c433f 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
@@ -3,7 +3,6 @@
#include "bpf_iter.h"
#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
#include <bpf/bpf_endian.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/kfree_skb.c b/tools/testing/selftests/bpf/progs/kfree_skb.c
index a46a264ce24e..55e283050cab 100644
--- a/tools/testing/selftests/bpf/progs/kfree_skb.c
+++ b/tools/testing/selftests/bpf/progs/kfree_skb.c
@@ -109,10 +109,10 @@ int BPF_PROG(trace_kfree_skb, struct sk_buff *skb, void *location)
return 0;
}
-static volatile struct {
+struct {
bool fentry_test_ok;
bool fexit_test_ok;
-} result;
+} result = {};
SEC("fentry/eth_type_trans")
int BPF_PROG(fentry_eth_type_trans, struct sk_buff *skb, struct net_device *dev,
diff --git a/tools/testing/selftests/bpf/progs/linked_maps1.c b/tools/testing/selftests/bpf/progs/linked_maps1.c
index 52291515cc72..00bf1ca95986 100644
--- a/tools/testing/selftests/bpf/progs/linked_maps1.c
+++ b/tools/testing/selftests/bpf/progs/linked_maps1.c
@@ -75,7 +75,7 @@ int BPF_PROG(handler_exit1)
val = bpf_map_lookup_elem(&map_weak, &key);
if (val)
output_weak1 = *val;
-
+
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/syscall.c b/tools/testing/selftests/bpf/progs/syscall.c
new file mode 100644
index 000000000000..e550f728962d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/syscall.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <../../../tools/include/linux/filter.h>
+#include <linux/btf.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct args {
+ __u64 log_buf;
+ __u32 log_size;
+ int max_entries;
+ int map_fd;
+ int prog_fd;
+ int btf_fd;
+};
+
+#define BTF_INFO_ENC(kind, kind_flag, vlen) \
+ ((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
+#define BTF_TYPE_ENC(name, info, size_or_type) (name), (info), (size_or_type)
+#define BTF_INT_ENC(encoding, bits_offset, nr_bits) \
+ ((encoding) << 24 | (bits_offset) << 16 | (nr_bits))
+#define BTF_TYPE_INT_ENC(name, encoding, bits_offset, bits, sz) \
+ BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), sz), \
+ BTF_INT_ENC(encoding, bits_offset, bits)
+
+static int btf_load(void)
+{
+ struct btf_blob {
+ struct btf_header btf_hdr;
+ __u32 types[8];
+ __u32 str;
+ } raw_btf = {
+ .btf_hdr = {
+ .magic = BTF_MAGIC,
+ .version = BTF_VERSION,
+ .hdr_len = sizeof(struct btf_header),
+ .type_len = sizeof(__u32) * 8,
+ .str_off = sizeof(__u32) * 8,
+ .str_len = sizeof(__u32),
+ },
+ .types = {
+ /* long */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 64, 8), /* [1] */
+ /* unsigned long */
+ BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */
+ },
+ };
+ static union bpf_attr btf_load_attr = {
+ .btf_size = sizeof(raw_btf),
+ };
+
+ btf_load_attr.btf = (long)&raw_btf;
+ return bpf_sys_bpf(BPF_BTF_LOAD, &btf_load_attr, sizeof(btf_load_attr));
+}
+
+SEC("syscall")
+int bpf_prog(struct args *ctx)
+{
+ static char license[] = "GPL";
+ static struct bpf_insn insns[] = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ static union bpf_attr map_create_attr = {
+ .map_type = BPF_MAP_TYPE_HASH,
+ .key_size = 8,
+ .value_size = 8,
+ .btf_key_type_id = 1,
+ .btf_value_type_id = 2,
+ };
+ static union bpf_attr map_update_attr = { .map_fd = 1, };
+ static __u64 key = 12;
+ static __u64 value = 34;
+ static union bpf_attr prog_load_attr = {
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .insn_cnt = sizeof(insns) / sizeof(insns[0]),
+ };
+ int ret;
+
+ ret = btf_load();
+ if (ret <= 0)
+ return ret;
+
+ ctx->btf_fd = ret;
+ map_create_attr.max_entries = ctx->max_entries;
+ map_create_attr.btf_fd = ret;
+
+ prog_load_attr.license = (long) license;
+ prog_load_attr.insns = (long) insns;
+ prog_load_attr.log_buf = ctx->log_buf;
+ prog_load_attr.log_size = ctx->log_size;
+ prog_load_attr.log_level = 1;
+
+ ret = bpf_sys_bpf(BPF_MAP_CREATE, &map_create_attr, sizeof(map_create_attr));
+ if (ret <= 0)
+ return ret;
+ ctx->map_fd = ret;
+ insns[3].imm = ret;
+
+ map_update_attr.map_fd = ret;
+ map_update_attr.key = (long) &key;
+ map_update_attr.value = (long) &value;
+ ret = bpf_sys_bpf(BPF_MAP_UPDATE_ELEM, &map_update_attr, sizeof(map_update_attr));
+ if (ret < 0)
+ return ret;
+
+ ret = bpf_sys_bpf(BPF_PROG_LOAD, &prog_load_attr, sizeof(prog_load_attr));
+ if (ret <= 0)
+ return ret;
+ ctx->prog_fd = ret;
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/tailcall3.c b/tools/testing/selftests/bpf/progs/tailcall3.c
index 739dc2a51e74..910858fe078a 100644
--- a/tools/testing/selftests/bpf/progs/tailcall3.c
+++ b/tools/testing/selftests/bpf/progs/tailcall3.c
@@ -10,7 +10,7 @@ struct {
__uint(value_size, sizeof(__u32));
} jmp_table SEC(".maps");
-static volatile int count;
+int count = 0;
SEC("classifier/0")
int bpf_func_0(struct __sk_buff *skb)
diff --git a/tools/testing/selftests/bpf/progs/tailcall4.c b/tools/testing/selftests/bpf/progs/tailcall4.c
index f82075b47d7d..bd4be135c39d 100644
--- a/tools/testing/selftests/bpf/progs/tailcall4.c
+++ b/tools/testing/selftests/bpf/progs/tailcall4.c
@@ -10,7 +10,7 @@ struct {
__uint(value_size, sizeof(__u32));
} jmp_table SEC(".maps");
-static volatile int selector;
+int selector = 0;
#define TAIL_FUNC(x) \
SEC("classifier/" #x) \
diff --git a/tools/testing/selftests/bpf/progs/tailcall5.c b/tools/testing/selftests/bpf/progs/tailcall5.c
index ce5450744fd4..adf30a33064e 100644
--- a/tools/testing/selftests/bpf/progs/tailcall5.c
+++ b/tools/testing/selftests/bpf/progs/tailcall5.c
@@ -10,7 +10,7 @@ struct {
__uint(value_size, sizeof(__u32));
} jmp_table SEC(".maps");
-static volatile int selector;
+int selector = 0;
#define TAIL_FUNC(x) \
SEC("classifier/" #x) \
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c
index 7b1c04183824..3cc4c12817b5 100644
--- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c
@@ -20,7 +20,7 @@ int subprog_tail(struct __sk_buff *skb)
return 1;
}
-static volatile int count;
+int count = 0;
SEC("classifier/0")
int bpf_func_0(struct __sk_buff *skb)
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c
index 9a1b166b7fbe..77df6d4db895 100644
--- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c
@@ -9,7 +9,7 @@ struct {
__uint(value_size, sizeof(__u32));
} jmp_table SEC(".maps");
-static volatile int count;
+int count = 0;
__noinline
int subprog_tail_2(struct __sk_buff *skb)
diff --git a/tools/testing/selftests/bpf/progs/test_check_mtu.c b/tools/testing/selftests/bpf/progs/test_check_mtu.c
index c4a9bae96e75..71184af57749 100644
--- a/tools/testing/selftests/bpf/progs/test_check_mtu.c
+++ b/tools/testing/selftests/bpf/progs/test_check_mtu.c
@@ -11,8 +11,8 @@
char _license[] SEC("license") = "GPL";
/* Userspace will update with MTU it can see on device */
-static volatile const int GLOBAL_USER_MTU;
-static volatile const __u32 GLOBAL_USER_IFINDEX;
+volatile const int GLOBAL_USER_MTU;
+volatile const __u32 GLOBAL_USER_IFINDEX;
/* BPF-prog will update these with MTU values it can see */
__u32 global_bpf_mtu_xdp = 0;
diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.c b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
index 3c1e042962e6..e2a5acc4785c 100644
--- a/tools/testing/selftests/bpf/progs/test_cls_redirect.c
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
@@ -39,8 +39,8 @@ char _license[] SEC("license") = "Dual BSD/GPL";
/**
* Destination port and IP used for UDP encapsulation.
*/
-static volatile const __be16 ENCAPSULATION_PORT;
-static volatile const __be32 ENCAPSULATION_IP;
+volatile const __be16 ENCAPSULATION_PORT;
+volatile const __be32 ENCAPSULATION_IP;
typedef struct {
uint64_t processed_packets_total;
diff --git a/tools/testing/selftests/bpf/progs/test_global_func_args.c b/tools/testing/selftests/bpf/progs/test_global_func_args.c
index cae309538a9e..e712bf77daae 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func_args.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func_args.c
@@ -8,7 +8,7 @@ struct S {
int v;
};
-static volatile struct S global_variable;
+struct S global_variable = {};
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
diff --git a/tools/testing/selftests/bpf/progs/test_lookup_and_delete.c b/tools/testing/selftests/bpf/progs/test_lookup_and_delete.c
new file mode 100644
index 000000000000..3a193f42c7e7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_lookup_and_delete.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+__u32 set_pid = 0;
+__u64 set_key = 0;
+__u64 set_value = 0;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 2);
+ __type(key, __u64);
+ __type(value, __u64);
+} hash_map SEC(".maps");
+
+SEC("tp/syscalls/sys_enter_getpgid")
+int bpf_lookup_and_delete_test(const void *ctx)
+{
+ if (set_pid == bpf_get_current_pid_tgid() >> 32)
+ bpf_map_update_elem(&hash_map, &set_key, &set_value, BPF_NOEXIST);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_migrate_reuseport.c b/tools/testing/selftests/bpf/progs/test_migrate_reuseport.c
new file mode 100644
index 000000000000..27df571abf5b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_migrate_reuseport.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Check if we can migrate child sockets.
+ *
+ * 1. If reuse_md->migrating_sk is NULL (SYN packet),
+ * return SK_PASS without selecting a listener.
+ * 2. If reuse_md->migrating_sk is not NULL (socket migration),
+ * select a listener (reuseport_map[migrate_map[cookie]])
+ *
+ * Author: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
+ */
+
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/in.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY);
+ __uint(max_entries, 256);
+ __type(key, int);
+ __type(value, __u64);
+} reuseport_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 256);
+ __type(key, __u64);
+ __type(value, int);
+} migrate_map SEC(".maps");
+
+int migrated_at_close = 0;
+int migrated_at_close_fastopen = 0;
+int migrated_at_send_synack = 0;
+int migrated_at_recv_ack = 0;
+__be16 server_port;
+
+SEC("xdp")
+int drop_ack(struct xdp_md *xdp)
+{
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+ struct ethhdr *eth = data;
+ struct tcphdr *tcp = NULL;
+
+ if (eth + 1 > data_end)
+ goto pass;
+
+ switch (bpf_ntohs(eth->h_proto)) {
+ case ETH_P_IP: {
+ struct iphdr *ip = (struct iphdr *)(eth + 1);
+
+ if (ip + 1 > data_end)
+ goto pass;
+
+ if (ip->protocol != IPPROTO_TCP)
+ goto pass;
+
+ tcp = (struct tcphdr *)((void *)ip + ip->ihl * 4);
+ break;
+ }
+ case ETH_P_IPV6: {
+ struct ipv6hdr *ipv6 = (struct ipv6hdr *)(eth + 1);
+
+ if (ipv6 + 1 > data_end)
+ goto pass;
+
+ if (ipv6->nexthdr != IPPROTO_TCP)
+ goto pass;
+
+ tcp = (struct tcphdr *)(ipv6 + 1);
+ break;
+ }
+ default:
+ goto pass;
+ }
+
+ if (tcp + 1 > data_end)
+ goto pass;
+
+ if (tcp->dest != server_port)
+ goto pass;
+
+ if (!tcp->syn && tcp->ack)
+ return XDP_DROP;
+
+pass:
+ return XDP_PASS;
+}
+
+SEC("sk_reuseport/migrate")
+int migrate_reuseport(struct sk_reuseport_md *reuse_md)
+{
+ int *key, flags = 0, state, err;
+ __u64 cookie;
+
+ if (!reuse_md->migrating_sk)
+ return SK_PASS;
+
+ state = reuse_md->migrating_sk->state;
+ cookie = bpf_get_socket_cookie(reuse_md->sk);
+
+ key = bpf_map_lookup_elem(&migrate_map, &cookie);
+ if (!key)
+ return SK_DROP;
+
+ err = bpf_sk_select_reuseport(reuse_md, &reuseport_map, key, flags);
+ if (err)
+ return SK_PASS;
+
+ switch (state) {
+ case BPF_TCP_ESTABLISHED:
+ __sync_fetch_and_add(&migrated_at_close, 1);
+ break;
+ case BPF_TCP_SYN_RECV:
+ __sync_fetch_and_add(&migrated_at_close_fastopen, 1);
+ break;
+ case BPF_TCP_NEW_SYN_RECV:
+ if (!reuse_md->len)
+ __sync_fetch_and_add(&migrated_at_send_synack, 1);
+ else
+ __sync_fetch_and_add(&migrated_at_recv_ack, 1);
+ break;
+ }
+
+ return SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_rdonly_maps.c b/tools/testing/selftests/bpf/progs/test_rdonly_maps.c
index ecbeea2df259..fc8e8a34a3db 100644
--- a/tools/testing/selftests/bpf/progs/test_rdonly_maps.c
+++ b/tools/testing/selftests/bpf/progs/test_rdonly_maps.c
@@ -5,7 +5,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-static volatile const struct {
+const struct {
unsigned a[4];
/*
* if the struct's size is multiple of 16, compiler will put it into
@@ -15,11 +15,11 @@ static volatile const struct {
char _y;
} rdonly_values = { .a = {2, 3, 4, 5} };
-static volatile struct {
+struct {
unsigned did_run;
unsigned iters;
unsigned sum;
-} res;
+} res = {};
SEC("raw_tracepoint/sys_enter:skip_loop")
int skip_loop(struct pt_regs *ctx)
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf.c b/tools/testing/selftests/bpf/progs/test_ringbuf.c
index 6b3f288b7c63..eaa7d9dba0be 100644
--- a/tools/testing/selftests/bpf/progs/test_ringbuf.c
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf.c
@@ -35,7 +35,7 @@ long prod_pos = 0;
/* inner state */
long seq = 0;
-SEC("tp/syscalls/sys_enter_getpgid")
+SEC("fentry/__x64_sys_getpgid")
int test_ringbuf(void *ctx)
{
int cur_pid = bpf_get_current_pid_tgid() >> 32;
@@ -48,7 +48,7 @@ int test_ringbuf(void *ctx)
sample = bpf_ringbuf_reserve(&ringbuf, sizeof(*sample), 0);
if (!sample) {
__sync_fetch_and_add(&dropped, 1);
- return 1;
+ return 0;
}
sample->pid = pid;
diff --git a/tools/testing/selftests/bpf/progs/test_skeleton.c b/tools/testing/selftests/bpf/progs/test_skeleton.c
index 374ccef704e1..441fa1c552c8 100644
--- a/tools/testing/selftests/bpf/progs/test_skeleton.c
+++ b/tools/testing/selftests/bpf/progs/test_skeleton.c
@@ -38,11 +38,11 @@ extern int LINUX_KERNEL_VERSION __kconfig;
bool bpf_syscall = 0;
int kern_ver = 0;
+struct s out5 = {};
+
SEC("raw_tp/sys_enter")
int handler(const void *ctx)
{
- static volatile struct s out5;
-
out1 = in1;
out2 = in2;
out3 = in3;
diff --git a/tools/testing/selftests/bpf/progs/test_snprintf.c b/tools/testing/selftests/bpf/progs/test_snprintf.c
index e35129bea0a0..e2ad26150f9b 100644
--- a/tools/testing/selftests/bpf/progs/test_snprintf.c
+++ b/tools/testing/selftests/bpf/progs/test_snprintf.c
@@ -3,7 +3,6 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
__u32 pid = 0;
diff --git a/tools/testing/selftests/bpf/progs/test_snprintf_single.c b/tools/testing/selftests/bpf/progs/test_snprintf_single.c
index 402adaf344f9..3095837334d3 100644
--- a/tools/testing/selftests/bpf/progs/test_snprintf_single.c
+++ b/tools/testing/selftests/bpf/progs/test_snprintf_single.c
@@ -5,7 +5,7 @@
#include <bpf/bpf_helpers.h>
/* The format string is filled from the userspace such that loading fails */
-static const char fmt[10];
+const char fmt[10];
SEC("raw_tp/sys_enter")
int handler(const void *ctx)
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
index a39eba9f5201..a1cc58b10c7c 100644
--- a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
@@ -28,8 +28,8 @@ struct {
__type(value, unsigned int);
} verdict_map SEC(".maps");
-static volatile bool test_sockmap; /* toggled by user-space */
-static volatile bool test_ingress; /* toggled by user-space */
+bool test_sockmap = false; /* toggled by user-space */
+bool test_ingress = false; /* toggled by user-space */
SEC("sk_skb/stream_parser")
int prog_stream_parser(struct __sk_buff *skb)
diff --git a/tools/testing/selftests/bpf/progs/test_static_linked1.c b/tools/testing/selftests/bpf/progs/test_static_linked1.c
index ea1a6c4c7172..4f0b612e1661 100644
--- a/tools/testing/selftests/bpf/progs/test_static_linked1.c
+++ b/tools/testing/selftests/bpf/progs/test_static_linked1.c
@@ -4,10 +4,10 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-/* 8-byte aligned .bss */
-static volatile long static_var1;
-static volatile int static_var11;
-int var1 = 0;
+/* 8-byte aligned .data */
+static volatile long static_var1 = 2;
+static volatile int static_var2 = 3;
+int var1 = -1;
/* 4-byte aligned .rodata */
const volatile int rovar1;
@@ -21,7 +21,7 @@ static __noinline int subprog(int x)
SEC("raw_tp/sys_enter")
int handler1(const void *ctx)
{
- var1 = subprog(rovar1) + static_var1 + static_var11;
+ var1 = subprog(rovar1) + static_var1 + static_var2;
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_static_linked2.c b/tools/testing/selftests/bpf/progs/test_static_linked2.c
index 54d8d1ab577c..766ebd502a60 100644
--- a/tools/testing/selftests/bpf/progs/test_static_linked2.c
+++ b/tools/testing/selftests/bpf/progs/test_static_linked2.c
@@ -4,10 +4,10 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-/* 4-byte aligned .bss */
-static volatile int static_var2;
-static volatile int static_var22;
-int var2 = 0;
+/* 4-byte aligned .data */
+static volatile int static_var1 = 5;
+static volatile int static_var2 = 6;
+int var2 = -1;
/* 8-byte aligned .rodata */
const volatile long rovar2;
@@ -21,7 +21,7 @@ static __noinline int subprog(int x)
SEC("raw_tp/sys_enter")
int handler2(const void *ctx)
{
- var2 = subprog(rovar2) + static_var2 + static_var22;
+ var2 = subprog(rovar2) + static_var1 + static_var2;
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_subprogs.c b/tools/testing/selftests/bpf/progs/test_subprogs.c
index d3c5673c0218..b7c37ca09544 100644
--- a/tools/testing/selftests/bpf/progs/test_subprogs.c
+++ b/tools/testing/selftests/bpf/progs/test_subprogs.c
@@ -4,8 +4,18 @@
const char LICENSE[] SEC("license") = "GPL";
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} array SEC(".maps");
+
__noinline int sub1(int x)
{
+ int key = 0;
+
+ bpf_map_lookup_elem(&array, &key);
return x + 1;
}
@@ -23,6 +33,9 @@ static __noinline int sub3(int z)
static __noinline int sub4(int w)
{
+ int key = 0;
+
+ bpf_map_lookup_elem(&array, &key);
return w + sub3(5) + sub1(6);
}
diff --git a/tools/testing/selftests/bpf/progs/test_tc_bpf.c b/tools/testing/selftests/bpf/progs/test_tc_bpf.c
new file mode 100644
index 000000000000..18a3a7ed924a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tc_bpf.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+/* Dummy prog to test TC-BPF API */
+
+SEC("classifier")
+int cls(struct __sk_buff *skb)
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh.c b/tools/testing/selftests/bpf/progs/test_tc_neigh.c
index b985ac4e7a81..0c93d326a663 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_neigh.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_neigh.c
@@ -33,17 +33,8 @@
a.s6_addr32[3] == b.s6_addr32[3])
#endif
-enum {
- dev_src,
- dev_dst,
-};
-
-struct bpf_map_def SEC("maps") ifindex_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(int),
- .max_entries = 2,
-};
+volatile const __u32 IFINDEX_SRC;
+volatile const __u32 IFINDEX_DST;
static __always_inline bool is_remote_ep_v4(struct __sk_buff *skb,
__be32 addr)
@@ -79,14 +70,8 @@ static __always_inline bool is_remote_ep_v6(struct __sk_buff *skb,
return v6_equal(ip6h->daddr, addr);
}
-static __always_inline int get_dev_ifindex(int which)
-{
- int *ifindex = bpf_map_lookup_elem(&ifindex_map, &which);
-
- return ifindex ? *ifindex : 0;
-}
-
-SEC("chk_egress") int tc_chk(struct __sk_buff *skb)
+SEC("classifier/chk_egress")
+int tc_chk(struct __sk_buff *skb)
{
void *data_end = ctx_ptr(skb->data_end);
void *data = ctx_ptr(skb->data);
@@ -98,7 +83,8 @@ SEC("chk_egress") int tc_chk(struct __sk_buff *skb)
return !raw[0] && !raw[1] && !raw[2] ? TC_ACT_SHOT : TC_ACT_OK;
}
-SEC("dst_ingress") int tc_dst(struct __sk_buff *skb)
+SEC("classifier/dst_ingress")
+int tc_dst(struct __sk_buff *skb)
{
__u8 zero[ETH_ALEN * 2];
bool redirect = false;
@@ -119,10 +105,11 @@ SEC("dst_ingress") int tc_dst(struct __sk_buff *skb)
if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0)
return TC_ACT_SHOT;
- return bpf_redirect_neigh(get_dev_ifindex(dev_src), NULL, 0, 0);
+ return bpf_redirect_neigh(IFINDEX_SRC, NULL, 0, 0);
}
-SEC("src_ingress") int tc_src(struct __sk_buff *skb)
+SEC("classifier/src_ingress")
+int tc_src(struct __sk_buff *skb)
{
__u8 zero[ETH_ALEN * 2];
bool redirect = false;
@@ -143,7 +130,7 @@ SEC("src_ingress") int tc_src(struct __sk_buff *skb)
if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0)
return TC_ACT_SHOT;
- return bpf_redirect_neigh(get_dev_ifindex(dev_dst), NULL, 0, 0);
+ return bpf_redirect_neigh(IFINDEX_DST, NULL, 0, 0);
}
char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c b/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c
index d82ed3457030..f7ab69cf018e 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c
@@ -75,7 +75,8 @@ static __always_inline int fill_fib_params_v6(struct __sk_buff *skb,
return 0;
}
-SEC("chk_egress") int tc_chk(struct __sk_buff *skb)
+SEC("classifier/chk_egress")
+int tc_chk(struct __sk_buff *skb)
{
void *data_end = ctx_ptr(skb->data_end);
void *data = ctx_ptr(skb->data);
@@ -142,12 +143,14 @@ static __always_inline int tc_redir(struct __sk_buff *skb)
/* these are identical, but keep them separate for compatibility with the
* section names expected by test_tc_redirect.sh
*/
-SEC("dst_ingress") int tc_dst(struct __sk_buff *skb)
+SEC("classifier/dst_ingress")
+int tc_dst(struct __sk_buff *skb)
{
return tc_redir(skb);
}
-SEC("src_ingress") int tc_src(struct __sk_buff *skb)
+SEC("classifier/src_ingress")
+int tc_src(struct __sk_buff *skb)
{
return tc_redir(skb);
}
diff --git a/tools/testing/selftests/bpf/progs/test_tc_peer.c b/tools/testing/selftests/bpf/progs/test_tc_peer.c
index fc84a7685aa2..fe818cd5f010 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_peer.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_peer.c
@@ -5,41 +5,59 @@
#include <linux/bpf.h>
#include <linux/stddef.h>
#include <linux/pkt_cls.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
#include <bpf/bpf_helpers.h>
-enum {
- dev_src,
- dev_dst,
-};
+volatile const __u32 IFINDEX_SRC;
+volatile const __u32 IFINDEX_DST;
-struct bpf_map_def SEC("maps") ifindex_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(int),
- .max_entries = 2,
-};
+static const __u8 src_mac[] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55};
+static const __u8 dst_mac[] = {0x00, 0x22, 0x33, 0x44, 0x55, 0x66};
-static __always_inline int get_dev_ifindex(int which)
+SEC("classifier/chk_egress")
+int tc_chk(struct __sk_buff *skb)
{
- int *ifindex = bpf_map_lookup_elem(&ifindex_map, &which);
+ return TC_ACT_SHOT;
+}
- return ifindex ? *ifindex : 0;
+SEC("classifier/dst_ingress")
+int tc_dst(struct __sk_buff *skb)
+{
+ return bpf_redirect_peer(IFINDEX_SRC, 0);
}
-SEC("chk_egress") int tc_chk(struct __sk_buff *skb)
+SEC("classifier/src_ingress")
+int tc_src(struct __sk_buff *skb)
{
- return TC_ACT_SHOT;
+ return bpf_redirect_peer(IFINDEX_DST, 0);
}
-SEC("dst_ingress") int tc_dst(struct __sk_buff *skb)
+SEC("classifier/dst_ingress_l3")
+int tc_dst_l3(struct __sk_buff *skb)
{
- return bpf_redirect_peer(get_dev_ifindex(dev_src), 0);
+ return bpf_redirect(IFINDEX_SRC, 0);
}
-SEC("src_ingress") int tc_src(struct __sk_buff *skb)
+SEC("classifier/src_ingress_l3")
+int tc_src_l3(struct __sk_buff *skb)
{
- return bpf_redirect_peer(get_dev_ifindex(dev_dst), 0);
+ __u16 proto = skb->protocol;
+
+ if (bpf_skb_change_head(skb, ETH_HLEN, 0) != 0)
+ return TC_ACT_SHOT;
+
+ if (bpf_skb_store_bytes(skb, 0, &src_mac, ETH_ALEN, 0) != 0)
+ return TC_ACT_SHOT;
+
+ if (bpf_skb_store_bytes(skb, ETH_ALEN, &dst_mac, ETH_ALEN, 0) != 0)
+ return TC_ACT_SHOT;
+
+ if (bpf_skb_store_bytes(skb, ETH_ALEN + ETH_ALEN, &proto, sizeof(__u16), 0) != 0)
+ return TC_ACT_SHOT;
+
+ return bpf_redirect_peer(IFINDEX_DST, 0);
}
char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/trace_printk.c b/tools/testing/selftests/bpf/progs/trace_printk.c
index 8ca7f399b670..119582aa105a 100644
--- a/tools/testing/selftests/bpf/progs/trace_printk.c
+++ b/tools/testing/selftests/bpf/progs/trace_printk.c
@@ -10,11 +10,11 @@ char _license[] SEC("license") = "GPL";
int trace_printk_ret = 0;
int trace_printk_ran = 0;
-SEC("tp/raw_syscalls/sys_enter")
+const char fmt[] = "Testing,testing %d\n";
+
+SEC("fentry/__x64_sys_nanosleep")
int sys_enter(void *ctx)
{
- static const char fmt[] = "testing,testing %d\n";
-
trace_printk_ret = bpf_trace_printk(fmt, sizeof(fmt),
++trace_printk_ran);
return 0;
diff --git a/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c b/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c
new file mode 100644
index 000000000000..880debcbcd65
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0
+#define KBUILD_MODNAME "foo"
+#include <string.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+/* One map use devmap, another one use devmap_hash for testing */
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+ __uint(max_entries, 1024);
+} map_all SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP_HASH);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(struct bpf_devmap_val));
+ __uint(max_entries, 128);
+} map_egress SEC(".maps");
+
+/* map to store egress interfaces mac addresses */
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __u32);
+ __type(value, __be64);
+ __uint(max_entries, 128);
+} mac_map SEC(".maps");
+
+SEC("xdp_redirect_map_multi")
+int xdp_redirect_map_multi_prog(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ int if_index = ctx->ingress_ifindex;
+ struct ethhdr *eth = data;
+ __u16 h_proto;
+ __u64 nh_off;
+
+ nh_off = sizeof(*eth);
+ if (data + nh_off > data_end)
+ return XDP_DROP;
+
+ h_proto = eth->h_proto;
+
+ /* Using IPv4 for (BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS) testing */
+ if (h_proto == bpf_htons(ETH_P_IP))
+ return bpf_redirect_map(&map_all, 0,
+ BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS);
+ /* Using IPv6 for none flag testing */
+ else if (h_proto == bpf_htons(ETH_P_IPV6))
+ return bpf_redirect_map(&map_all, if_index, 0);
+ /* All others for BPF_F_BROADCAST testing */
+ else
+ return bpf_redirect_map(&map_all, 0, BPF_F_BROADCAST);
+}
+
+/* The following 2 progs are for 2nd devmap prog testing */
+SEC("xdp_redirect_map_ingress")
+int xdp_redirect_map_all_prog(struct xdp_md *ctx)
+{
+ return bpf_redirect_map(&map_egress, 0,
+ BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS);
+}
+
+SEC("xdp_devmap/map_prog")
+int xdp_devmap_prog(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ __u32 key = ctx->egress_ifindex;
+ struct ethhdr *eth = data;
+ __u64 nh_off;
+ __be64 *mac;
+
+ nh_off = sizeof(*eth);
+ if (data + nh_off > data_end)
+ return XDP_DROP;
+
+ mac = bpf_map_lookup_elem(&mac_map, &key);
+ if (mac)
+ __builtin_memcpy(eth->h_source, mac, ETH_ALEN);
+
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_doc_build.sh b/tools/testing/selftests/bpf/test_doc_build.sh
index 7eb940a7b2eb..ed12111cd2f0 100755
--- a/tools/testing/selftests/bpf/test_doc_build.sh
+++ b/tools/testing/selftests/bpf/test_doc_build.sh
@@ -1,5 +1,6 @@
#!/bin/bash
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+set -e
# Assume script is located under tools/testing/selftests/bpf/. We want to start
# build attempts from the top of kernel repository.
diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c
index 6a5349f9eb14..7e9049fa3edf 100644
--- a/tools/testing/selftests/bpf/test_lru_map.c
+++ b/tools/testing/selftests/bpf/test_lru_map.c
@@ -231,6 +231,14 @@ static void test_lru_sanity0(int map_type, int map_flags)
assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
errno == ENOENT);
+ /* lookup elem key=1 and delete it, then check it doesn't exist */
+ key = 1;
+ assert(!bpf_map_lookup_and_delete_elem(lru_map_fd, &key, &value));
+ assert(value[0] == 1234);
+
+ /* remove the same element from the expected map */
+ assert(!bpf_map_delete_elem(expected_map_fd, &key));
+
assert(map_equal(lru_map_fd, expected_map_fd));
close(expected_map_fd);
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 51adc42b2b40..30cbf5d98f7d 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -53,23 +53,30 @@ static void test_hashmap(unsigned int task, void *data)
value = 0;
/* BPF_NOEXIST means add new element if it doesn't exist. */
- assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 &&
/* key=1 already exists. */
errno == EEXIST);
/* -1 is an invalid flag. */
- assert(bpf_map_update_elem(fd, &key, &value, -1) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, &value, -1) < 0 &&
errno == EINVAL);
/* Check that key=1 can be found. */
assert(bpf_map_lookup_elem(fd, &key, &value) == 0 && value == 1234);
key = 2;
+ value = 1234;
+ /* Insert key=2 element. */
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
+
+ /* Check that key=2 matches the value and delete it */
+ assert(bpf_map_lookup_and_delete_elem(fd, &key, &value) == 0 && value == 1234);
+
/* Check that key=2 is not found. */
- assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
+ assert(bpf_map_lookup_elem(fd, &key, &value) < 0 && errno == ENOENT);
/* BPF_EXIST means update existing element. */
- assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) < 0 &&
/* key=2 is not there. */
errno == ENOENT);
@@ -80,7 +87,7 @@ static void test_hashmap(unsigned int task, void *data)
* inserted due to max_entries limit.
*/
key = 0;
- assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 &&
errno == E2BIG);
/* Update existing element, though the map is full. */
@@ -89,12 +96,12 @@ static void test_hashmap(unsigned int task, void *data)
key = 2;
assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
key = 3;
- assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 &&
errno == E2BIG);
/* Check that key = 0 doesn't exist. */
key = 0;
- assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
+ assert(bpf_map_delete_elem(fd, &key) < 0 && errno == ENOENT);
/* Iterate over two elements. */
assert(bpf_map_get_next_key(fd, NULL, &first_key) == 0 &&
@@ -104,7 +111,7 @@ static void test_hashmap(unsigned int task, void *data)
assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 &&
(next_key == 1 || next_key == 2) &&
(next_key != first_key));
- assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 &&
+ assert(bpf_map_get_next_key(fd, &next_key, &next_key) < 0 &&
errno == ENOENT);
/* Delete both elements. */
@@ -112,13 +119,13 @@ static void test_hashmap(unsigned int task, void *data)
assert(bpf_map_delete_elem(fd, &key) == 0);
key = 2;
assert(bpf_map_delete_elem(fd, &key) == 0);
- assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
+ assert(bpf_map_delete_elem(fd, &key) < 0 && errno == ENOENT);
key = 0;
/* Check that map is empty. */
- assert(bpf_map_get_next_key(fd, NULL, &next_key) == -1 &&
+ assert(bpf_map_get_next_key(fd, NULL, &next_key) < 0 &&
errno == ENOENT);
- assert(bpf_map_get_next_key(fd, &key, &next_key) == -1 &&
+ assert(bpf_map_get_next_key(fd, &key, &next_key) < 0 &&
errno == ENOENT);
close(fd);
@@ -166,15 +173,25 @@ static void test_hashmap_percpu(unsigned int task, void *data)
/* Insert key=1 element. */
assert(!(expected_key_mask & key));
assert(bpf_map_update_elem(fd, &key, value, BPF_ANY) == 0);
+
+ /* Lookup and delete elem key=1 and check value. */
+ assert(bpf_map_lookup_and_delete_elem(fd, &key, value) == 0 &&
+ bpf_percpu(value,0) == 100);
+
+ for (i = 0; i < nr_cpus; i++)
+ bpf_percpu(value,i) = i + 100;
+
+ /* Insert key=1 element which should not exist. */
+ assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) == 0);
expected_key_mask |= key;
/* BPF_NOEXIST means add new element if it doesn't exist. */
- assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) < 0 &&
/* key=1 already exists. */
errno == EEXIST);
/* -1 is an invalid flag. */
- assert(bpf_map_update_elem(fd, &key, value, -1) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, value, -1) < 0 &&
errno == EINVAL);
/* Check that key=1 can be found. Value could be 0 if the lookup
@@ -186,10 +203,10 @@ static void test_hashmap_percpu(unsigned int task, void *data)
key = 2;
/* Check that key=2 is not found. */
- assert(bpf_map_lookup_elem(fd, &key, value) == -1 && errno == ENOENT);
+ assert(bpf_map_lookup_elem(fd, &key, value) < 0 && errno == ENOENT);
/* BPF_EXIST means update existing element. */
- assert(bpf_map_update_elem(fd, &key, value, BPF_EXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, value, BPF_EXIST) < 0 &&
/* key=2 is not there. */
errno == ENOENT);
@@ -202,11 +219,11 @@ static void test_hashmap_percpu(unsigned int task, void *data)
* inserted due to max_entries limit.
*/
key = 0;
- assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) < 0 &&
errno == E2BIG);
/* Check that key = 0 doesn't exist. */
- assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
+ assert(bpf_map_delete_elem(fd, &key) < 0 && errno == ENOENT);
/* Iterate over two elements. */
assert(bpf_map_get_next_key(fd, NULL, &first_key) == 0 &&
@@ -237,13 +254,13 @@ static void test_hashmap_percpu(unsigned int task, void *data)
assert(bpf_map_delete_elem(fd, &key) == 0);
key = 2;
assert(bpf_map_delete_elem(fd, &key) == 0);
- assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
+ assert(bpf_map_delete_elem(fd, &key) < 0 && errno == ENOENT);
key = 0;
/* Check that map is empty. */
- assert(bpf_map_get_next_key(fd, NULL, &next_key) == -1 &&
+ assert(bpf_map_get_next_key(fd, NULL, &next_key) < 0 &&
errno == ENOENT);
- assert(bpf_map_get_next_key(fd, &key, &next_key) == -1 &&
+ assert(bpf_map_get_next_key(fd, &key, &next_key) < 0 &&
errno == ENOENT);
close(fd);
@@ -360,7 +377,7 @@ static void test_arraymap(unsigned int task, void *data)
assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
value = 0;
- assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 &&
errno == EEXIST);
/* Check that key=1 can be found. */
@@ -374,11 +391,11 @@ static void test_arraymap(unsigned int task, void *data)
* due to max_entries limit.
*/
key = 2;
- assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) < 0 &&
errno == E2BIG);
/* Check that key = 2 doesn't exist. */
- assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
+ assert(bpf_map_lookup_elem(fd, &key, &value) < 0 && errno == ENOENT);
/* Iterate over two elements. */
assert(bpf_map_get_next_key(fd, NULL, &next_key) == 0 &&
@@ -387,12 +404,12 @@ static void test_arraymap(unsigned int task, void *data)
next_key == 0);
assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 &&
next_key == 1);
- assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 &&
+ assert(bpf_map_get_next_key(fd, &next_key, &next_key) < 0 &&
errno == ENOENT);
/* Delete shouldn't succeed. */
key = 1;
- assert(bpf_map_delete_elem(fd, &key) == -1 && errno == EINVAL);
+ assert(bpf_map_delete_elem(fd, &key) < 0 && errno == EINVAL);
close(fd);
}
@@ -418,7 +435,7 @@ static void test_arraymap_percpu(unsigned int task, void *data)
assert(bpf_map_update_elem(fd, &key, values, BPF_ANY) == 0);
bpf_percpu(values, 0) = 0;
- assert(bpf_map_update_elem(fd, &key, values, BPF_NOEXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, values, BPF_NOEXIST) < 0 &&
errno == EEXIST);
/* Check that key=1 can be found. */
@@ -433,11 +450,11 @@ static void test_arraymap_percpu(unsigned int task, void *data)
/* Check that key=2 cannot be inserted due to max_entries limit. */
key = 2;
- assert(bpf_map_update_elem(fd, &key, values, BPF_EXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, values, BPF_EXIST) < 0 &&
errno == E2BIG);
/* Check that key = 2 doesn't exist. */
- assert(bpf_map_lookup_elem(fd, &key, values) == -1 && errno == ENOENT);
+ assert(bpf_map_lookup_elem(fd, &key, values) < 0 && errno == ENOENT);
/* Iterate over two elements. */
assert(bpf_map_get_next_key(fd, NULL, &next_key) == 0 &&
@@ -446,12 +463,12 @@ static void test_arraymap_percpu(unsigned int task, void *data)
next_key == 0);
assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 &&
next_key == 1);
- assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 &&
+ assert(bpf_map_get_next_key(fd, &next_key, &next_key) < 0 &&
errno == ENOENT);
/* Delete shouldn't succeed. */
key = 1;
- assert(bpf_map_delete_elem(fd, &key) == -1 && errno == EINVAL);
+ assert(bpf_map_delete_elem(fd, &key) < 0 && errno == EINVAL);
close(fd);
}
@@ -555,7 +572,7 @@ static void test_queuemap(unsigned int task, void *data)
assert(bpf_map_update_elem(fd, NULL, &vals[i], 0) == 0);
/* Check that element cannot be pushed due to max_entries limit */
- assert(bpf_map_update_elem(fd, NULL, &val, 0) == -1 &&
+ assert(bpf_map_update_elem(fd, NULL, &val, 0) < 0 &&
errno == E2BIG);
/* Peek element */
@@ -571,12 +588,12 @@ static void test_queuemap(unsigned int task, void *data)
val == vals[i]);
/* Check that there are not elements left */
- assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == -1 &&
+ assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) < 0 &&
errno == ENOENT);
/* Check that non supported functions set errno to EINVAL */
- assert(bpf_map_delete_elem(fd, NULL) == -1 && errno == EINVAL);
- assert(bpf_map_get_next_key(fd, NULL, NULL) == -1 && errno == EINVAL);
+ assert(bpf_map_delete_elem(fd, NULL) < 0 && errno == EINVAL);
+ assert(bpf_map_get_next_key(fd, NULL, NULL) < 0 && errno == EINVAL);
close(fd);
}
@@ -613,7 +630,7 @@ static void test_stackmap(unsigned int task, void *data)
assert(bpf_map_update_elem(fd, NULL, &vals[i], 0) == 0);
/* Check that element cannot be pushed due to max_entries limit */
- assert(bpf_map_update_elem(fd, NULL, &val, 0) == -1 &&
+ assert(bpf_map_update_elem(fd, NULL, &val, 0) < 0 &&
errno == E2BIG);
/* Peek element */
@@ -629,12 +646,12 @@ static void test_stackmap(unsigned int task, void *data)
val == vals[i]);
/* Check that there are not elements left */
- assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == -1 &&
+ assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) < 0 &&
errno == ENOENT);
/* Check that non supported functions set errno to EINVAL */
- assert(bpf_map_delete_elem(fd, NULL) == -1 && errno == EINVAL);
- assert(bpf_map_get_next_key(fd, NULL, NULL) == -1 && errno == EINVAL);
+ assert(bpf_map_delete_elem(fd, NULL) < 0 && errno == EINVAL);
+ assert(bpf_map_get_next_key(fd, NULL, NULL) < 0 && errno == EINVAL);
close(fd);
}
@@ -835,7 +852,7 @@ static void test_sockmap(unsigned int tasks, void *data)
}
bpf_map_rx = bpf_object__find_map_by_name(obj, "sock_map_rx");
- if (IS_ERR(bpf_map_rx)) {
+ if (!bpf_map_rx) {
printf("Failed to load map rx from verdict prog\n");
goto out_sockmap;
}
@@ -847,7 +864,7 @@ static void test_sockmap(unsigned int tasks, void *data)
}
bpf_map_tx = bpf_object__find_map_by_name(obj, "sock_map_tx");
- if (IS_ERR(bpf_map_tx)) {
+ if (!bpf_map_tx) {
printf("Failed to load map tx from verdict prog\n");
goto out_sockmap;
}
@@ -859,7 +876,7 @@ static void test_sockmap(unsigned int tasks, void *data)
}
bpf_map_msg = bpf_object__find_map_by_name(obj, "sock_map_msg");
- if (IS_ERR(bpf_map_msg)) {
+ if (!bpf_map_msg) {
printf("Failed to load map msg from msg_verdict prog\n");
goto out_sockmap;
}
@@ -871,7 +888,7 @@ static void test_sockmap(unsigned int tasks, void *data)
}
bpf_map_break = bpf_object__find_map_by_name(obj, "sock_map_break");
- if (IS_ERR(bpf_map_break)) {
+ if (!bpf_map_break) {
printf("Failed to load map tx from verdict prog\n");
goto out_sockmap;
}
@@ -1153,7 +1170,7 @@ static void test_map_in_map(void)
}
map = bpf_object__find_map_by_name(obj, "mim_array");
- if (IS_ERR(map)) {
+ if (!map) {
printf("Failed to load array of maps from test prog\n");
goto out_map_in_map;
}
@@ -1164,7 +1181,7 @@ static void test_map_in_map(void)
}
map = bpf_object__find_map_by_name(obj, "mim_hash");
- if (IS_ERR(map)) {
+ if (!map) {
printf("Failed to load hash of maps from test prog\n");
goto out_map_in_map;
}
@@ -1177,7 +1194,7 @@ static void test_map_in_map(void)
bpf_object__load(obj);
map = bpf_object__find_map_by_name(obj, "mim_array");
- if (IS_ERR(map)) {
+ if (!map) {
printf("Failed to load array of maps from test prog\n");
goto out_map_in_map;
}
@@ -1194,7 +1211,7 @@ static void test_map_in_map(void)
}
map = bpf_object__find_map_by_name(obj, "mim_hash");
- if (IS_ERR(map)) {
+ if (!map) {
printf("Failed to load hash of maps from test prog\n");
goto out_map_in_map;
}
@@ -1246,7 +1263,7 @@ static void test_map_large(void)
}
key.c = -1;
- assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 &&
errno == E2BIG);
/* Iterate through all elements. */
@@ -1254,12 +1271,12 @@ static void test_map_large(void)
key.c = -1;
for (i = 0; i < MAP_SIZE; i++)
assert(bpf_map_get_next_key(fd, &key, &key) == 0);
- assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT);
+ assert(bpf_map_get_next_key(fd, &key, &key) < 0 && errno == ENOENT);
key.c = 0;
assert(bpf_map_lookup_elem(fd, &key, &value) == 0 && value == 0);
key.a = 1;
- assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
+ assert(bpf_map_lookup_elem(fd, &key, &value) < 0 && errno == ENOENT);
close(fd);
}
@@ -1391,7 +1408,7 @@ static void test_map_parallel(void)
run_parallel(TASKS, test_update_delete, data);
/* Check that key=0 is already there. */
- assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 &&
errno == EEXIST);
/* Check that all elements were inserted. */
@@ -1399,7 +1416,7 @@ static void test_map_parallel(void)
key = -1;
for (i = 0; i < MAP_SIZE; i++)
assert(bpf_map_get_next_key(fd, &key, &key) == 0);
- assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT);
+ assert(bpf_map_get_next_key(fd, &key, &key) < 0 && errno == ENOENT);
/* Another check for all elements */
for (i = 0; i < MAP_SIZE; i++) {
@@ -1415,8 +1432,8 @@ static void test_map_parallel(void)
/* Nothing should be left. */
key = -1;
- assert(bpf_map_get_next_key(fd, NULL, &key) == -1 && errno == ENOENT);
- assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT);
+ assert(bpf_map_get_next_key(fd, NULL, &key) < 0 && errno == ENOENT);
+ assert(bpf_map_get_next_key(fd, &key, &key) < 0 && errno == ENOENT);
}
static void test_map_rdonly(void)
@@ -1434,12 +1451,12 @@ static void test_map_rdonly(void)
key = 1;
value = 1234;
/* Try to insert key=1 element. */
- assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) < 0 &&
errno == EPERM);
/* Check that key=1 is not found. */
- assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
- assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == ENOENT);
+ assert(bpf_map_lookup_elem(fd, &key, &value) < 0 && errno == ENOENT);
+ assert(bpf_map_get_next_key(fd, &key, &value) < 0 && errno == ENOENT);
close(fd);
}
@@ -1462,8 +1479,8 @@ static void test_map_wronly_hash(void)
assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
/* Check that reading elements and keys from the map is not allowed. */
- assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == EPERM);
- assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == EPERM);
+ assert(bpf_map_lookup_elem(fd, &key, &value) < 0 && errno == EPERM);
+ assert(bpf_map_get_next_key(fd, &key, &value) < 0 && errno == EPERM);
close(fd);
}
@@ -1490,10 +1507,10 @@ static void test_map_wronly_stack_or_queue(enum bpf_map_type map_type)
assert(bpf_map_update_elem(fd, NULL, &value, BPF_ANY) == 0);
/* Peek element should fail */
- assert(bpf_map_lookup_elem(fd, NULL, &value) == -1 && errno == EPERM);
+ assert(bpf_map_lookup_elem(fd, NULL, &value) < 0 && errno == EPERM);
/* Pop element should fail */
- assert(bpf_map_lookup_and_delete_elem(fd, NULL, &value) == -1 &&
+ assert(bpf_map_lookup_and_delete_elem(fd, NULL, &value) < 0 &&
errno == EPERM);
close(fd);
@@ -1547,7 +1564,7 @@ static void prepare_reuseport_grp(int type, int map_fd, size_t map_elem_size,
value = &fd32;
}
err = bpf_map_update_elem(map_fd, &index0, value, BPF_ANY);
- CHECK(err != -1 || errno != EINVAL,
+ CHECK(err >= 0 || errno != EINVAL,
"reuseport array update unbound sk",
"sock_type:%d err:%d errno:%d\n",
type, err, errno);
@@ -1576,7 +1593,7 @@ static void prepare_reuseport_grp(int type, int map_fd, size_t map_elem_size,
*/
err = bpf_map_update_elem(map_fd, &index0, value,
BPF_ANY);
- CHECK(err != -1 || errno != EINVAL,
+ CHECK(err >= 0 || errno != EINVAL,
"reuseport array update non-listening sk",
"sock_type:%d err:%d errno:%d\n",
type, err, errno);
@@ -1606,31 +1623,31 @@ static void test_reuseport_array(void)
map_fd = bpf_create_map(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
sizeof(__u32), sizeof(__u64), array_size, 0);
- CHECK(map_fd == -1, "reuseport array create",
+ CHECK(map_fd < 0, "reuseport array create",
"map_fd:%d, errno:%d\n", map_fd, errno);
/* Test lookup/update/delete with invalid index */
err = bpf_map_delete_elem(map_fd, &bad_index);
- CHECK(err != -1 || errno != E2BIG, "reuseport array del >=max_entries",
+ CHECK(err >= 0 || errno != E2BIG, "reuseport array del >=max_entries",
"err:%d errno:%d\n", err, errno);
err = bpf_map_update_elem(map_fd, &bad_index, &fd64, BPF_ANY);
- CHECK(err != -1 || errno != E2BIG,
+ CHECK(err >= 0 || errno != E2BIG,
"reuseport array update >=max_entries",
"err:%d errno:%d\n", err, errno);
err = bpf_map_lookup_elem(map_fd, &bad_index, &map_cookie);
- CHECK(err != -1 || errno != ENOENT,
+ CHECK(err >= 0 || errno != ENOENT,
"reuseport array update >=max_entries",
"err:%d errno:%d\n", err, errno);
/* Test lookup/delete non existence elem */
err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
- CHECK(err != -1 || errno != ENOENT,
+ CHECK(err >= 0 || errno != ENOENT,
"reuseport array lookup not-exist elem",
"err:%d errno:%d\n", err, errno);
err = bpf_map_delete_elem(map_fd, &index3);
- CHECK(err != -1 || errno != ENOENT,
+ CHECK(err >= 0 || errno != ENOENT,
"reuseport array del not-exist elem",
"err:%d errno:%d\n", err, errno);
@@ -1644,7 +1661,7 @@ static void test_reuseport_array(void)
/* BPF_EXIST failure case */
err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
BPF_EXIST);
- CHECK(err != -1 || errno != ENOENT,
+ CHECK(err >= 0 || errno != ENOENT,
"reuseport array update empty elem BPF_EXIST",
"sock_type:%d err:%d errno:%d\n",
type, err, errno);
@@ -1653,7 +1670,7 @@ static void test_reuseport_array(void)
/* BPF_NOEXIST success case */
err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
BPF_NOEXIST);
- CHECK(err == -1,
+ CHECK(err < 0,
"reuseport array update empty elem BPF_NOEXIST",
"sock_type:%d err:%d errno:%d\n",
type, err, errno);
@@ -1662,7 +1679,7 @@ static void test_reuseport_array(void)
/* BPF_EXIST success case. */
err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
BPF_EXIST);
- CHECK(err == -1,
+ CHECK(err < 0,
"reuseport array update same elem BPF_EXIST",
"sock_type:%d err:%d errno:%d\n", type, err, errno);
fds_idx = REUSEPORT_FD_IDX(err, fds_idx);
@@ -1670,7 +1687,7 @@ static void test_reuseport_array(void)
/* BPF_NOEXIST failure case */
err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
BPF_NOEXIST);
- CHECK(err != -1 || errno != EEXIST,
+ CHECK(err >= 0 || errno != EEXIST,
"reuseport array update non-empty elem BPF_NOEXIST",
"sock_type:%d err:%d errno:%d\n",
type, err, errno);
@@ -1679,7 +1696,7 @@ static void test_reuseport_array(void)
/* BPF_ANY case (always succeed) */
err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
BPF_ANY);
- CHECK(err == -1,
+ CHECK(err < 0,
"reuseport array update same sk with BPF_ANY",
"sock_type:%d err:%d errno:%d\n", type, err, errno);
@@ -1688,32 +1705,32 @@ static void test_reuseport_array(void)
/* The same sk cannot be added to reuseport_array twice */
err = bpf_map_update_elem(map_fd, &index3, &fd64, BPF_ANY);
- CHECK(err != -1 || errno != EBUSY,
+ CHECK(err >= 0 || errno != EBUSY,
"reuseport array update same sk with same index",
"sock_type:%d err:%d errno:%d\n",
type, err, errno);
err = bpf_map_update_elem(map_fd, &index0, &fd64, BPF_ANY);
- CHECK(err != -1 || errno != EBUSY,
+ CHECK(err >= 0 || errno != EBUSY,
"reuseport array update same sk with different index",
"sock_type:%d err:%d errno:%d\n",
type, err, errno);
/* Test delete elem */
err = bpf_map_delete_elem(map_fd, &index3);
- CHECK(err == -1, "reuseport array delete sk",
+ CHECK(err < 0, "reuseport array delete sk",
"sock_type:%d err:%d errno:%d\n",
type, err, errno);
/* Add it back with BPF_NOEXIST */
err = bpf_map_update_elem(map_fd, &index3, &fd64, BPF_NOEXIST);
- CHECK(err == -1,
+ CHECK(err < 0,
"reuseport array re-add with BPF_NOEXIST after del",
"sock_type:%d err:%d errno:%d\n", type, err, errno);
/* Test cookie */
err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
- CHECK(err == -1 || sk_cookie != map_cookie,
+ CHECK(err < 0 || sk_cookie != map_cookie,
"reuseport array lookup re-added sk",
"sock_type:%d err:%d errno:%d sk_cookie:0x%llx map_cookie:0x%llxn",
type, err, errno, sk_cookie, map_cookie);
@@ -1722,7 +1739,7 @@ static void test_reuseport_array(void)
for (f = 0; f < ARRAY_SIZE(grpa_fds64); f++)
close(grpa_fds64[f]);
err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
- CHECK(err != -1 || errno != ENOENT,
+ CHECK(err >= 0 || errno != ENOENT,
"reuseport array lookup after close()",
"sock_type:%d err:%d errno:%d\n",
type, err, errno);
@@ -1733,7 +1750,7 @@ static void test_reuseport_array(void)
CHECK(fd64 == -1, "socket(SOCK_RAW)", "err:%d errno:%d\n",
err, errno);
err = bpf_map_update_elem(map_fd, &index3, &fd64, BPF_NOEXIST);
- CHECK(err != -1 || errno != ENOTSUPP, "reuseport array update SOCK_RAW",
+ CHECK(err >= 0 || errno != ENOTSUPP, "reuseport array update SOCK_RAW",
"err:%d errno:%d\n", err, errno);
close(fd64);
@@ -1743,16 +1760,16 @@ static void test_reuseport_array(void)
/* Test 32 bit fd */
map_fd = bpf_create_map(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
sizeof(__u32), sizeof(__u32), array_size, 0);
- CHECK(map_fd == -1, "reuseport array create",
+ CHECK(map_fd < 0, "reuseport array create",
"map_fd:%d, errno:%d\n", map_fd, errno);
prepare_reuseport_grp(SOCK_STREAM, map_fd, sizeof(__u32), &fd64,
&sk_cookie, 1);
fd = fd64;
err = bpf_map_update_elem(map_fd, &index3, &fd, BPF_NOEXIST);
- CHECK(err == -1, "reuseport array update 32 bit fd",
+ CHECK(err < 0, "reuseport array update 32 bit fd",
"err:%d errno:%d\n", err, errno);
err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
- CHECK(err != -1 || errno != ENOSPC,
+ CHECK(err >= 0 || errno != ENOSPC,
"reuseport array lookup 32 bit fd",
"err:%d errno:%d\n", err, errno);
close(fd);
@@ -1798,6 +1815,8 @@ int main(void)
{
srand(time(NULL));
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
map_flags = 0;
run_all_tests();
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 6396932b97e2..6f103106a39b 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -737,6 +737,9 @@ int main(int argc, char **argv)
if (err)
return err;
+ /* Use libbpf 1.0 API mode */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
libbpf_set_print(libbpf_print_fn);
srand(time(NULL));
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index dda52cb649dc..8ef7f334e715 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -249,16 +249,17 @@ extern int test__join_cgroup(const char *path);
#define ASSERT_OK_PTR(ptr, name) ({ \
static int duration = 0; \
const void *___res = (ptr); \
- bool ___ok = !IS_ERR_OR_NULL(___res); \
- CHECK(!___ok, (name), \
- "unexpected error: %ld\n", PTR_ERR(___res)); \
+ int ___err = libbpf_get_error(___res); \
+ bool ___ok = ___err == 0; \
+ CHECK(!___ok, (name), "unexpected error: %d\n", ___err); \
___ok; \
})
#define ASSERT_ERR_PTR(ptr, name) ({ \
static int duration = 0; \
const void *___res = (ptr); \
- bool ___ok = IS_ERR(___res); \
+ int ___err = libbpf_get_error(___res); \
+ bool ___ok = ___err != 0; \
CHECK(!___ok, (name), "unexpected pointer: %p\n", ___res); \
___ok; \
})
diff --git a/tools/testing/selftests/bpf/test_tc_redirect.sh b/tools/testing/selftests/bpf/test_tc_redirect.sh
deleted file mode 100755
index 8868aa1ca902..000000000000
--- a/tools/testing/selftests/bpf/test_tc_redirect.sh
+++ /dev/null
@@ -1,216 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link
-# between src and dst. The netns fwd has veth links to each src and dst. The
-# client is in src and server in dst. The test installs a TC BPF program to each
-# host facing veth in fwd which calls into i) bpf_redirect_neigh() to perform the
-# neigh addr population and redirect or ii) bpf_redirect_peer() for namespace
-# switch from ingress side; it also installs a checker prog on the egress side
-# to drop unexpected traffic.
-
-if [[ $EUID -ne 0 ]]; then
- echo "This script must be run as root"
- echo "FAIL"
- exit 1
-fi
-
-# check that needed tools are present
-command -v nc >/dev/null 2>&1 || \
- { echo >&2 "nc is not available"; exit 1; }
-command -v dd >/dev/null 2>&1 || \
- { echo >&2 "dd is not available"; exit 1; }
-command -v timeout >/dev/null 2>&1 || \
- { echo >&2 "timeout is not available"; exit 1; }
-command -v ping >/dev/null 2>&1 || \
- { echo >&2 "ping is not available"; exit 1; }
-if command -v ping6 >/dev/null 2>&1; then PING6=ping6; else PING6=ping; fi
-command -v perl >/dev/null 2>&1 || \
- { echo >&2 "perl is not available"; exit 1; }
-command -v jq >/dev/null 2>&1 || \
- { echo >&2 "jq is not available"; exit 1; }
-command -v bpftool >/dev/null 2>&1 || \
- { echo >&2 "bpftool is not available"; exit 1; }
-
-readonly GREEN='\033[0;92m'
-readonly RED='\033[0;31m'
-readonly NC='\033[0m' # No Color
-
-readonly PING_ARG="-c 3 -w 10 -q"
-
-readonly TIMEOUT=10
-
-readonly NS_SRC="ns-src-$(mktemp -u XXXXXX)"
-readonly NS_FWD="ns-fwd-$(mktemp -u XXXXXX)"
-readonly NS_DST="ns-dst-$(mktemp -u XXXXXX)"
-
-readonly IP4_SRC="172.16.1.100"
-readonly IP4_DST="172.16.2.100"
-
-readonly IP6_SRC="::1:dead:beef:cafe"
-readonly IP6_DST="::2:dead:beef:cafe"
-
-readonly IP4_SLL="169.254.0.1"
-readonly IP4_DLL="169.254.0.2"
-readonly IP4_NET="169.254.0.0"
-
-netns_cleanup()
-{
- ip netns del ${NS_SRC}
- ip netns del ${NS_FWD}
- ip netns del ${NS_DST}
-}
-
-netns_setup()
-{
- ip netns add "${NS_SRC}"
- ip netns add "${NS_FWD}"
- ip netns add "${NS_DST}"
-
- ip link add veth_src type veth peer name veth_src_fwd
- ip link add veth_dst type veth peer name veth_dst_fwd
-
- ip link set veth_src netns ${NS_SRC}
- ip link set veth_src_fwd netns ${NS_FWD}
-
- ip link set veth_dst netns ${NS_DST}
- ip link set veth_dst_fwd netns ${NS_FWD}
-
- ip -netns ${NS_SRC} addr add ${IP4_SRC}/32 dev veth_src
- ip -netns ${NS_DST} addr add ${IP4_DST}/32 dev veth_dst
-
- # The fwd netns automatically get a v6 LL address / routes, but also
- # needs v4 one in order to start ARP probing. IP4_NET route is added
- # to the endpoints so that the ARP processing will reply.
-
- ip -netns ${NS_FWD} addr add ${IP4_SLL}/32 dev veth_src_fwd
- ip -netns ${NS_FWD} addr add ${IP4_DLL}/32 dev veth_dst_fwd
-
- ip -netns ${NS_SRC} addr add ${IP6_SRC}/128 dev veth_src nodad
- ip -netns ${NS_DST} addr add ${IP6_DST}/128 dev veth_dst nodad
-
- ip -netns ${NS_SRC} link set dev veth_src up
- ip -netns ${NS_FWD} link set dev veth_src_fwd up
-
- ip -netns ${NS_DST} link set dev veth_dst up
- ip -netns ${NS_FWD} link set dev veth_dst_fwd up
-
- ip -netns ${NS_SRC} route add ${IP4_DST}/32 dev veth_src scope global
- ip -netns ${NS_SRC} route add ${IP4_NET}/16 dev veth_src scope global
- ip -netns ${NS_FWD} route add ${IP4_SRC}/32 dev veth_src_fwd scope global
-
- ip -netns ${NS_SRC} route add ${IP6_DST}/128 dev veth_src scope global
- ip -netns ${NS_FWD} route add ${IP6_SRC}/128 dev veth_src_fwd scope global
-
- ip -netns ${NS_DST} route add ${IP4_SRC}/32 dev veth_dst scope global
- ip -netns ${NS_DST} route add ${IP4_NET}/16 dev veth_dst scope global
- ip -netns ${NS_FWD} route add ${IP4_DST}/32 dev veth_dst_fwd scope global
-
- ip -netns ${NS_DST} route add ${IP6_SRC}/128 dev veth_dst scope global
- ip -netns ${NS_FWD} route add ${IP6_DST}/128 dev veth_dst_fwd scope global
-
- fmac_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/address)
- fmac_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/address)
-
- ip -netns ${NS_SRC} neigh add ${IP4_DST} dev veth_src lladdr $fmac_src
- ip -netns ${NS_DST} neigh add ${IP4_SRC} dev veth_dst lladdr $fmac_dst
-
- ip -netns ${NS_SRC} neigh add ${IP6_DST} dev veth_src lladdr $fmac_src
- ip -netns ${NS_DST} neigh add ${IP6_SRC} dev veth_dst lladdr $fmac_dst
-}
-
-netns_test_connectivity()
-{
- set +e
-
- ip netns exec ${NS_DST} bash -c "nc -4 -l -p 9004 &"
- ip netns exec ${NS_DST} bash -c "nc -6 -l -p 9006 &"
-
- TEST="TCPv4 connectivity test"
- ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP4_DST}/9004"
- if [ $? -ne 0 ]; then
- echo -e "${TEST}: ${RED}FAIL${NC}"
- exit 1
- fi
- echo -e "${TEST}: ${GREEN}PASS${NC}"
-
- TEST="TCPv6 connectivity test"
- ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP6_DST}/9006"
- if [ $? -ne 0 ]; then
- echo -e "${TEST}: ${RED}FAIL${NC}"
- exit 1
- fi
- echo -e "${TEST}: ${GREEN}PASS${NC}"
-
- TEST="ICMPv4 connectivity test"
- ip netns exec ${NS_SRC} ping $PING_ARG ${IP4_DST}
- if [ $? -ne 0 ]; then
- echo -e "${TEST}: ${RED}FAIL${NC}"
- exit 1
- fi
- echo -e "${TEST}: ${GREEN}PASS${NC}"
-
- TEST="ICMPv6 connectivity test"
- ip netns exec ${NS_SRC} $PING6 $PING_ARG ${IP6_DST}
- if [ $? -ne 0 ]; then
- echo -e "${TEST}: ${RED}FAIL${NC}"
- exit 1
- fi
- echo -e "${TEST}: ${GREEN}PASS${NC}"
-
- set -e
-}
-
-hex_mem_str()
-{
- perl -e 'print join(" ", unpack("(H2)8", pack("L", @ARGV)))' $1
-}
-
-netns_setup_bpf()
-{
- local obj=$1
- local use_forwarding=${2:-0}
-
- ip netns exec ${NS_FWD} tc qdisc add dev veth_src_fwd clsact
- ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd ingress bpf da obj $obj sec src_ingress
- ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd egress bpf da obj $obj sec chk_egress
-
- ip netns exec ${NS_FWD} tc qdisc add dev veth_dst_fwd clsact
- ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd ingress bpf da obj $obj sec dst_ingress
- ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd egress bpf da obj $obj sec chk_egress
-
- if [ "$use_forwarding" -eq "1" ]; then
- # bpf_fib_lookup() checks if forwarding is enabled
- ip netns exec ${NS_FWD} sysctl -w net.ipv4.ip_forward=1
- ip netns exec ${NS_FWD} sysctl -w net.ipv6.conf.veth_dst_fwd.forwarding=1
- ip netns exec ${NS_FWD} sysctl -w net.ipv6.conf.veth_src_fwd.forwarding=1
- return 0
- fi
-
- veth_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/ifindex)
- veth_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/ifindex)
-
- progs=$(ip netns exec ${NS_FWD} bpftool net --json | jq -r '.[] | .tc | map(.id) | .[]')
- for prog in $progs; do
- map=$(bpftool prog show id $prog --json | jq -r '.map_ids | .? | .[]')
- if [ ! -z "$map" ]; then
- bpftool map update id $map key hex $(hex_mem_str 0) value hex $(hex_mem_str $veth_src)
- bpftool map update id $map key hex $(hex_mem_str 1) value hex $(hex_mem_str $veth_dst)
- fi
- done
-}
-
-trap netns_cleanup EXIT
-set -e
-
-netns_setup
-netns_setup_bpf test_tc_neigh.o
-netns_test_connectivity
-netns_cleanup
-netns_setup
-netns_setup_bpf test_tc_neigh_fib.o 1
-netns_test_connectivity
-netns_cleanup
-netns_setup
-netns_setup_bpf test_tc_peer.o
-netns_test_connectivity
diff --git a/tools/testing/selftests/bpf/test_tcpnotify_user.c b/tools/testing/selftests/bpf/test_tcpnotify_user.c
index 73da7fe8c152..4a39304cc5a6 100644
--- a/tools/testing/selftests/bpf/test_tcpnotify_user.c
+++ b/tools/testing/selftests/bpf/test_tcpnotify_user.c
@@ -82,6 +82,8 @@ int main(int argc, char **argv)
cpu_set_t cpuset;
__u32 key = 0;
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
CPU_ZERO(&cpuset);
CPU_SET(0, &cpuset);
pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset);
@@ -116,7 +118,7 @@ int main(int argc, char **argv)
pb_opts.sample_cb = dummyfn;
pb = perf_buffer__new(bpf_map__fd(perf_map), 8, &pb_opts);
- if (IS_ERR(pb))
+ if (!pb)
goto err;
pthread_create(&tid, NULL, poller_thread, pb);
@@ -163,7 +165,6 @@ err:
bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS);
close(cg_fd);
cleanup_cgroup_environment();
- if (!IS_ERR_OR_NULL(pb))
- perf_buffer__free(pb);
+ perf_buffer__free(pb);
return error;
}
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 1512092e1e68..3a9e332c5e36 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -1147,7 +1147,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
}
}
- if (test->insn_processed) {
+ if (!unpriv && test->insn_processed) {
uint32_t insn_processed;
char *proc;
diff --git a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh
new file mode 100755
index 000000000000..1538373157e3
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh
@@ -0,0 +1,204 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test topology:
+# - - - - - - - - - - - - - - - - - - - - - - - - -
+# | veth1 veth2 veth3 | ... init net
+# - -| - - - - - - | - - - - - - | - -
+# --------- --------- ---------
+# | veth0 | | veth0 | | veth0 | ...
+# --------- --------- ---------
+# ns1 ns2 ns3
+#
+# Test modules:
+# XDP modes: generic, native, native + egress_prog
+#
+# Test cases:
+# ARP: Testing BPF_F_BROADCAST, the ingress interface also should receive
+# the redirects.
+# ns1 -> gw: ns1, ns2, ns3, should receive the arp request
+# IPv4: Testing BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS, the ingress
+# interface should not receive the redirects.
+# ns1 -> gw: ns1 should not receive, ns2, ns3 should receive redirects.
+# IPv6: Testing none flag, all the pkts should be redirected back
+# ping test: ns1 -> ns2 (block), echo requests will be redirect back
+# egress_prog:
+# all src mac should be egress interface's mac
+
+# netns numbers
+NUM=3
+IFACES=""
+DRV_MODE="xdpgeneric xdpdrv xdpegress"
+PASS=0
+FAIL=0
+
+test_pass()
+{
+ echo "Pass: $@"
+ PASS=$((PASS + 1))
+}
+
+test_fail()
+{
+ echo "fail: $@"
+ FAIL=$((FAIL + 1))
+}
+
+clean_up()
+{
+ for i in $(seq $NUM); do
+ ip link del veth$i 2> /dev/null
+ ip netns del ns$i 2> /dev/null
+ done
+}
+
+# Kselftest framework requirement - SKIP code is 4.
+check_env()
+{
+ ip link set dev lo xdpgeneric off &>/dev/null
+ if [ $? -ne 0 ];then
+ echo "selftests: [SKIP] Could not run test without the ip xdpgeneric support"
+ exit 4
+ fi
+
+ which tcpdump &>/dev/null
+ if [ $? -ne 0 ];then
+ echo "selftests: [SKIP] Could not run test without tcpdump"
+ exit 4
+ fi
+}
+
+setup_ns()
+{
+ local mode=$1
+ IFACES=""
+
+ if [ "$mode" = "xdpegress" ]; then
+ mode="xdpdrv"
+ fi
+
+ for i in $(seq $NUM); do
+ ip netns add ns$i
+ ip link add veth$i type veth peer name veth0 netns ns$i
+ ip link set veth$i up
+ ip -n ns$i link set veth0 up
+
+ ip -n ns$i addr add 192.0.2.$i/24 dev veth0
+ ip -n ns$i addr add 2001:db8::$i/64 dev veth0
+ # Add a neigh entry for IPv4 ping test
+ ip -n ns$i neigh add 192.0.2.253 lladdr 00:00:00:00:00:01 dev veth0
+ ip -n ns$i link set veth0 $mode obj \
+ xdp_dummy.o sec xdp_dummy &> /dev/null || \
+ { test_fail "Unable to load dummy xdp" && exit 1; }
+ IFACES="$IFACES veth$i"
+ veth_mac[$i]=$(ip link show veth$i | awk '/link\/ether/ {print $2}')
+ done
+}
+
+do_egress_tests()
+{
+ local mode=$1
+
+ # mac test
+ ip netns exec ns2 tcpdump -e -i veth0 -nn -l -e &> mac_ns1-2_${mode}.log &
+ ip netns exec ns3 tcpdump -e -i veth0 -nn -l -e &> mac_ns1-3_${mode}.log &
+ sleep 0.5
+ ip netns exec ns1 ping 192.0.2.254 -i 0.1 -c 4 &> /dev/null
+ sleep 0.5
+ pkill -9 tcpdump
+
+ # mac check
+ grep -q "${veth_mac[2]} > ff:ff:ff:ff:ff:ff" mac_ns1-2_${mode}.log && \
+ test_pass "$mode mac ns1-2" || test_fail "$mode mac ns1-2"
+ grep -q "${veth_mac[3]} > ff:ff:ff:ff:ff:ff" mac_ns1-3_${mode}.log && \
+ test_pass "$mode mac ns1-3" || test_fail "$mode mac ns1-3"
+}
+
+do_ping_tests()
+{
+ local mode=$1
+
+ # ping6 test: echo request should be redirect back to itself, not others
+ ip netns exec ns1 ip neigh add 2001:db8::2 dev veth0 lladdr 00:00:00:00:00:02
+
+ ip netns exec ns1 tcpdump -i veth0 -nn -l -e &> ns1-1_${mode}.log &
+ ip netns exec ns2 tcpdump -i veth0 -nn -l -e &> ns1-2_${mode}.log &
+ ip netns exec ns3 tcpdump -i veth0 -nn -l -e &> ns1-3_${mode}.log &
+ sleep 0.5
+ # ARP test
+ ip netns exec ns1 ping 192.0.2.254 -i 0.1 -c 4 &> /dev/null
+ # IPv4 test
+ ip netns exec ns1 ping 192.0.2.253 -i 0.1 -c 4 &> /dev/null
+ # IPv6 test
+ ip netns exec ns1 ping6 2001:db8::2 -i 0.1 -c 2 &> /dev/null
+ sleep 0.5
+ pkill -9 tcpdump
+
+ # All netns should receive the redirect arp requests
+ [ $(grep -c "who-has 192.0.2.254" ns1-1_${mode}.log) -gt 4 ] && \
+ test_pass "$mode arp(F_BROADCAST) ns1-1" || \
+ test_fail "$mode arp(F_BROADCAST) ns1-1"
+ [ $(grep -c "who-has 192.0.2.254" ns1-2_${mode}.log) -le 4 ] && \
+ test_pass "$mode arp(F_BROADCAST) ns1-2" || \
+ test_fail "$mode arp(F_BROADCAST) ns1-2"
+ [ $(grep -c "who-has 192.0.2.254" ns1-3_${mode}.log) -le 4 ] && \
+ test_pass "$mode arp(F_BROADCAST) ns1-3" || \
+ test_fail "$mode arp(F_BROADCAST) ns1-3"
+
+ # ns1 should not receive the redirect echo request, others should
+ [ $(grep -c "ICMP echo request" ns1-1_${mode}.log) -eq 4 ] && \
+ test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-1" || \
+ test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-1"
+ [ $(grep -c "ICMP echo request" ns1-2_${mode}.log) -eq 4 ] && \
+ test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-2" || \
+ test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-2"
+ [ $(grep -c "ICMP echo request" ns1-3_${mode}.log) -eq 4 ] && \
+ test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-3" || \
+ test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-3"
+
+ # ns1 should receive the echo request, ns2 should not
+ [ $(grep -c "ICMP6, echo request" ns1-1_${mode}.log) -eq 4 ] && \
+ test_pass "$mode IPv6 (no flags) ns1-1" || \
+ test_fail "$mode IPv6 (no flags) ns1-1"
+ [ $(grep -c "ICMP6, echo request" ns1-2_${mode}.log) -eq 0 ] && \
+ test_pass "$mode IPv6 (no flags) ns1-2" || \
+ test_fail "$mode IPv6 (no flags) ns1-2"
+}
+
+do_tests()
+{
+ local mode=$1
+ local drv_p
+
+ case ${mode} in
+ xdpdrv) drv_p="-N";;
+ xdpegress) drv_p="-X";;
+ xdpgeneric) drv_p="-S";;
+ esac
+
+ ./xdp_redirect_multi $drv_p $IFACES &> xdp_redirect_${mode}.log &
+ xdp_pid=$!
+ sleep 1
+
+ if [ "$mode" = "xdpegress" ]; then
+ do_egress_tests $mode
+ else
+ do_ping_tests $mode
+ fi
+
+ kill $xdp_pid
+}
+
+trap clean_up 0 2 3 6 9
+
+check_env
+rm -f xdp_redirect_*.log ns*.log mac_ns*.log
+
+for mode in ${DRV_MODE}; do
+ setup_ns $mode
+ do_tests $mode
+ clean_up
+done
+
+echo "Summary: PASS $PASS, FAIL $FAIL"
+[ $FAIL -eq 0 ] && exit 0 || exit 1
diff --git a/tools/testing/selftests/bpf/verifier/and.c b/tools/testing/selftests/bpf/verifier/and.c
index ca8fdb1b3f01..7d7ebee5cc7a 100644
--- a/tools/testing/selftests/bpf/verifier/and.c
+++ b/tools/testing/selftests/bpf/verifier/and.c
@@ -61,6 +61,8 @@
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R1 !read_ok",
+ .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 0
},
diff --git a/tools/testing/selftests/bpf/verifier/bounds.c b/tools/testing/selftests/bpf/verifier/bounds.c
index 8a1caf46ffbc..e061e8799ce2 100644
--- a/tools/testing/selftests/bpf/verifier/bounds.c
+++ b/tools/testing/selftests/bpf/verifier/bounds.c
@@ -508,6 +508,8 @@
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, -1),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R0 invalid mem access 'inv'",
+ .result_unpriv = REJECT,
.result = ACCEPT
},
{
@@ -528,6 +530,8 @@
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, -1),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R0 invalid mem access 'inv'",
+ .result_unpriv = REJECT,
.result = ACCEPT
},
{
@@ -569,6 +573,8 @@
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R0 min value is outside of the allowed memory range",
+ .result_unpriv = REJECT,
.fixup_map_hash_8b = { 3 },
.result = ACCEPT,
},
@@ -589,6 +595,8 @@
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R0 min value is outside of the allowed memory range",
+ .result_unpriv = REJECT,
.fixup_map_hash_8b = { 3 },
.result = ACCEPT,
},
@@ -609,6 +617,8 @@
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R0 min value is outside of the allowed memory range",
+ .result_unpriv = REJECT,
.fixup_map_hash_8b = { 3 },
.result = ACCEPT,
},
@@ -674,6 +684,8 @@
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R0 min value is outside of the allowed memory range",
+ .result_unpriv = REJECT,
.fixup_map_hash_8b = { 3 },
.result = ACCEPT,
},
@@ -695,6 +707,8 @@
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R0 min value is outside of the allowed memory range",
+ .result_unpriv = REJECT,
.fixup_map_hash_8b = { 3 },
.result = ACCEPT,
},
diff --git a/tools/testing/selftests/bpf/verifier/dead_code.c b/tools/testing/selftests/bpf/verifier/dead_code.c
index 17fe33a75034..2c8935b3e65d 100644
--- a/tools/testing/selftests/bpf/verifier/dead_code.c
+++ b/tools/testing/selftests/bpf/verifier/dead_code.c
@@ -8,6 +8,8 @@
BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 10, -4),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R9 !read_ok",
+ .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 7,
},
diff --git a/tools/testing/selftests/bpf/verifier/jmp32.c b/tools/testing/selftests/bpf/verifier/jmp32.c
index bd5cae4a7f73..1c857b2fbdf0 100644
--- a/tools/testing/selftests/bpf/verifier/jmp32.c
+++ b/tools/testing/selftests/bpf/verifier/jmp32.c
@@ -87,6 +87,8 @@
BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R9 !read_ok",
+ .result_unpriv = REJECT,
.result = ACCEPT,
},
{
@@ -150,6 +152,8 @@
BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R9 !read_ok",
+ .result_unpriv = REJECT,
.result = ACCEPT,
},
{
@@ -213,6 +217,8 @@
BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R9 !read_ok",
+ .result_unpriv = REJECT,
.result = ACCEPT,
},
{
@@ -280,6 +286,8 @@
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R0 invalid mem access 'inv'",
+ .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
},
@@ -348,6 +356,8 @@
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R0 invalid mem access 'inv'",
+ .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
},
@@ -416,6 +426,8 @@
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R0 invalid mem access 'inv'",
+ .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
},
@@ -484,6 +496,8 @@
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R0 invalid mem access 'inv'",
+ .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
},
@@ -552,6 +566,8 @@
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R0 invalid mem access 'inv'",
+ .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
},
@@ -620,6 +636,8 @@
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R0 invalid mem access 'inv'",
+ .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
},
@@ -688,6 +706,8 @@
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R0 invalid mem access 'inv'",
+ .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
},
@@ -756,6 +776,8 @@
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R0 invalid mem access 'inv'",
+ .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
},
diff --git a/tools/testing/selftests/bpf/verifier/jset.c b/tools/testing/selftests/bpf/verifier/jset.c
index 8dcd4e0383d5..11fc68da735e 100644
--- a/tools/testing/selftests/bpf/verifier/jset.c
+++ b/tools/testing/selftests/bpf/verifier/jset.c
@@ -82,8 +82,8 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
- .retval_unpriv = 1,
- .result_unpriv = ACCEPT,
+ .errstr_unpriv = "R9 !read_ok",
+ .result_unpriv = REJECT,
.retval = 1,
.result = ACCEPT,
},
@@ -141,7 +141,8 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
- .result_unpriv = ACCEPT,
+ .errstr_unpriv = "R9 !read_ok",
+ .result_unpriv = REJECT,
.result = ACCEPT,
},
{
@@ -162,6 +163,7 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
- .result_unpriv = ACCEPT,
+ .errstr_unpriv = "R9 !read_ok",
+ .result_unpriv = REJECT,
.result = ACCEPT,
},
diff --git a/tools/testing/selftests/bpf/verifier/stack_ptr.c b/tools/testing/selftests/bpf/verifier/stack_ptr.c
index 07eaa04412ae..8ab94d65f3d5 100644
--- a/tools/testing/selftests/bpf/verifier/stack_ptr.c
+++ b/tools/testing/selftests/bpf/verifier/stack_ptr.c
@@ -295,8 +295,6 @@
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
BPF_EXIT_INSN(),
},
- .result_unpriv = REJECT,
- .errstr_unpriv = "invalid write to stack R1 off=0 size=1",
.result = ACCEPT,
.retval = 42,
},
diff --git a/tools/testing/selftests/bpf/verifier/unpriv.c b/tools/testing/selftests/bpf/verifier/unpriv.c
index bd436df5cc32..111801aea5e3 100644
--- a/tools/testing/selftests/bpf/verifier/unpriv.c
+++ b/tools/testing/selftests/bpf/verifier/unpriv.c
@@ -420,6 +420,8 @@
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R7 invalid mem access 'inv'",
+ .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 0,
},
diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
index e5913fd3b903..a3e593ddfafc 100644
--- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
+++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
@@ -120,7 +120,7 @@
.fixup_map_array_48b = { 1 },
.result = ACCEPT,
.result_unpriv = REJECT,
- .errstr_unpriv = "R2 tried to add from different maps, paths or scalars",
+ .errstr_unpriv = "R2 pointer comparison prohibited",
.retval = 0,
},
{
@@ -159,7 +159,8 @@
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
// fake-dead code; targeted from branch A to
- // prevent dead code sanitization
+ // prevent dead code sanitization, rejected
+ // via branch B however
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
@@ -167,7 +168,7 @@
.fixup_map_array_48b = { 1 },
.result = ACCEPT,
.result_unpriv = REJECT,
- .errstr_unpriv = "R2 tried to add from different maps, paths or scalars",
+ .errstr_unpriv = "R0 invalid mem access 'inv'",
.retval = 0,
},
{
@@ -300,8 +301,6 @@
},
.fixup_map_array_48b = { 3 },
.result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
.retval = 1,
},
{
@@ -371,8 +370,6 @@
},
.fixup_map_array_48b = { 3 },
.result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
.retval = 1,
},
{
@@ -472,8 +469,6 @@
},
.fixup_map_array_48b = { 3 },
.result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
.retval = 1,
},
{
@@ -766,8 +761,6 @@
},
.fixup_map_array_48b = { 3 },
.result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
.retval = 1,
},
{
diff --git a/tools/testing/selftests/bpf/xdp_redirect_multi.c b/tools/testing/selftests/bpf/xdp_redirect_multi.c
new file mode 100644
index 000000000000..3696a8f32c23
--- /dev/null
+++ b/tools/testing/selftests/bpf/xdp_redirect_multi.c
@@ -0,0 +1,226 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <linux/if_link.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <net/if.h>
+#include <unistd.h>
+#include <libgen.h>
+#include <sys/resource.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+
+#include "bpf_util.h"
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#define MAX_IFACE_NUM 32
+#define MAX_INDEX_NUM 1024
+
+static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
+static int ifaces[MAX_IFACE_NUM] = {};
+
+static void int_exit(int sig)
+{
+ __u32 prog_id = 0;
+ int i;
+
+ for (i = 0; ifaces[i] > 0; i++) {
+ if (bpf_get_link_xdp_id(ifaces[i], &prog_id, xdp_flags)) {
+ printf("bpf_get_link_xdp_id failed\n");
+ exit(1);
+ }
+ if (prog_id)
+ bpf_set_link_xdp_fd(ifaces[i], -1, xdp_flags);
+ }
+
+ exit(0);
+}
+
+static int get_mac_addr(unsigned int ifindex, void *mac_addr)
+{
+ char ifname[IF_NAMESIZE];
+ struct ifreq ifr;
+ int fd, ret = -1;
+
+ fd = socket(AF_INET, SOCK_DGRAM, 0);
+ if (fd < 0)
+ return ret;
+
+ if (!if_indextoname(ifindex, ifname))
+ goto err_out;
+
+ strcpy(ifr.ifr_name, ifname);
+
+ if (ioctl(fd, SIOCGIFHWADDR, &ifr) != 0)
+ goto err_out;
+
+ memcpy(mac_addr, ifr.ifr_hwaddr.sa_data, 6 * sizeof(char));
+ ret = 0;
+
+err_out:
+ close(fd);
+ return ret;
+}
+
+static void usage(const char *prog)
+{
+ fprintf(stderr,
+ "usage: %s [OPTS] <IFNAME|IFINDEX> <IFNAME|IFINDEX> ...\n"
+ "OPTS:\n"
+ " -S use skb-mode\n"
+ " -N enforce native mode\n"
+ " -F force loading prog\n"
+ " -X load xdp program on egress\n",
+ prog);
+}
+
+int main(int argc, char **argv)
+{
+ int prog_fd, group_all, mac_map;
+ struct bpf_program *ingress_prog, *egress_prog;
+ struct bpf_prog_load_attr prog_load_attr = {
+ .prog_type = BPF_PROG_TYPE_UNSPEC,
+ };
+ int i, ret, opt, egress_prog_fd = 0;
+ struct bpf_devmap_val devmap_val;
+ bool attach_egress_prog = false;
+ unsigned char mac_addr[6];
+ char ifname[IF_NAMESIZE];
+ struct bpf_object *obj;
+ unsigned int ifindex;
+ char filename[256];
+
+ while ((opt = getopt(argc, argv, "SNFX")) != -1) {
+ switch (opt) {
+ case 'S':
+ xdp_flags |= XDP_FLAGS_SKB_MODE;
+ break;
+ case 'N':
+ /* default, set below */
+ break;
+ case 'F':
+ xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
+ break;
+ case 'X':
+ attach_egress_prog = true;
+ break;
+ default:
+ usage(basename(argv[0]));
+ return 1;
+ }
+ }
+
+ if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) {
+ xdp_flags |= XDP_FLAGS_DRV_MODE;
+ } else if (attach_egress_prog) {
+ printf("Load xdp program on egress with SKB mode not supported yet\n");
+ goto err_out;
+ }
+
+ if (optind == argc) {
+ printf("usage: %s <IFNAME|IFINDEX> <IFNAME|IFINDEX> ...\n", argv[0]);
+ goto err_out;
+ }
+
+ printf("Get interfaces");
+ for (i = 0; i < MAX_IFACE_NUM && argv[optind + i]; i++) {
+ ifaces[i] = if_nametoindex(argv[optind + i]);
+ if (!ifaces[i])
+ ifaces[i] = strtoul(argv[optind + i], NULL, 0);
+ if (!if_indextoname(ifaces[i], ifname)) {
+ perror("Invalid interface name or i");
+ goto err_out;
+ }
+ if (ifaces[i] > MAX_INDEX_NUM) {
+ printf("Interface index to large\n");
+ goto err_out;
+ }
+ printf(" %d", ifaces[i]);
+ }
+ printf("\n");
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ prog_load_attr.file = filename;
+
+ if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
+ goto err_out;
+
+ if (attach_egress_prog)
+ group_all = bpf_object__find_map_fd_by_name(obj, "map_egress");
+ else
+ group_all = bpf_object__find_map_fd_by_name(obj, "map_all");
+ mac_map = bpf_object__find_map_fd_by_name(obj, "mac_map");
+
+ if (group_all < 0 || mac_map < 0) {
+ printf("bpf_object__find_map_fd_by_name failed\n");
+ goto err_out;
+ }
+
+ if (attach_egress_prog) {
+ /* Find ingress/egress prog for 2nd xdp prog */
+ ingress_prog = bpf_object__find_program_by_name(obj, "xdp_redirect_map_all_prog");
+ egress_prog = bpf_object__find_program_by_name(obj, "xdp_devmap_prog");
+ if (!ingress_prog || !egress_prog) {
+ printf("finding ingress/egress_prog in obj file failed\n");
+ goto err_out;
+ }
+ prog_fd = bpf_program__fd(ingress_prog);
+ egress_prog_fd = bpf_program__fd(egress_prog);
+ if (prog_fd < 0 || egress_prog_fd < 0) {
+ printf("find egress_prog fd failed\n");
+ goto err_out;
+ }
+ }
+
+ signal(SIGINT, int_exit);
+ signal(SIGTERM, int_exit);
+
+ /* Init forward multicast groups and exclude group */
+ for (i = 0; ifaces[i] > 0; i++) {
+ ifindex = ifaces[i];
+
+ if (attach_egress_prog) {
+ ret = get_mac_addr(ifindex, mac_addr);
+ if (ret < 0) {
+ printf("get interface %d mac failed\n", ifindex);
+ goto err_out;
+ }
+ ret = bpf_map_update_elem(mac_map, &ifindex, mac_addr, 0);
+ if (ret) {
+ perror("bpf_update_elem mac_map failed\n");
+ goto err_out;
+ }
+ }
+
+ /* Add all the interfaces to group all */
+ devmap_val.ifindex = ifindex;
+ devmap_val.bpf_prog.fd = egress_prog_fd;
+ ret = bpf_map_update_elem(group_all, &ifindex, &devmap_val, 0);
+ if (ret) {
+ perror("bpf_map_update_elem");
+ goto err_out;
+ }
+
+ /* bind prog_fd to each interface */
+ ret = bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags);
+ if (ret) {
+ printf("Set xdp fd failed on %d\n", ifindex);
+ goto err_out;
+ }
+ }
+
+ /* sleep some time for testing */
+ sleep(999);
+
+ return 0;
+
+err_out:
+ return 1;
+}
diff --git a/tools/testing/selftests/cgroup/.gitignore b/tools/testing/selftests/cgroup/.gitignore
index 84cfcabea838..be9643ef6285 100644
--- a/tools/testing/selftests/cgroup/.gitignore
+++ b/tools/testing/selftests/cgroup/.gitignore
@@ -2,4 +2,5 @@
test_memcontrol
test_core
test_freezer
-test_kmem \ No newline at end of file
+test_kmem
+test_kill
diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile
index f027d933595b..59e222460581 100644
--- a/tools/testing/selftests/cgroup/Makefile
+++ b/tools/testing/selftests/cgroup/Makefile
@@ -9,6 +9,7 @@ TEST_GEN_PROGS = test_memcontrol
TEST_GEN_PROGS += test_kmem
TEST_GEN_PROGS += test_core
TEST_GEN_PROGS += test_freezer
+TEST_GEN_PROGS += test_kill
include ../lib.mk
@@ -16,3 +17,4 @@ $(OUTPUT)/test_memcontrol: cgroup_util.c ../clone3/clone3_selftests.h
$(OUTPUT)/test_kmem: cgroup_util.c ../clone3/clone3_selftests.h
$(OUTPUT)/test_core: cgroup_util.c ../clone3/clone3_selftests.h
$(OUTPUT)/test_freezer: cgroup_util.c ../clone3/clone3_selftests.h
+$(OUTPUT)/test_kill: cgroup_util.c ../clone3/clone3_selftests.h ../pidfd/pidfd.h
diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c
index 027014662fb2..623cec04ad42 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.c
+++ b/tools/testing/selftests/cgroup/cgroup_util.c
@@ -5,10 +5,12 @@
#include <errno.h>
#include <fcntl.h>
#include <linux/limits.h>
+#include <poll.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <sys/inotify.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/wait.h>
@@ -252,6 +254,10 @@ int cg_killall(const char *cgroup)
char buf[PAGE_SIZE];
char *ptr = buf;
+ /* If cgroup.kill exists use it. */
+ if (!cg_write(cgroup, "cgroup.kill", "1"))
+ return 0;
+
if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
return -1;
@@ -576,3 +582,48 @@ int clone_into_cgroup_run_wait(const char *cgroup)
(void)clone_reap(pid, WEXITED);
return 0;
}
+
+int cg_prepare_for_wait(const char *cgroup)
+{
+ int fd, ret = -1;
+
+ fd = inotify_init1(0);
+ if (fd == -1)
+ return fd;
+
+ ret = inotify_add_watch(fd, cg_control(cgroup, "cgroup.events"),
+ IN_MODIFY);
+ if (ret == -1) {
+ close(fd);
+ fd = -1;
+ }
+
+ return fd;
+}
+
+int cg_wait_for(int fd)
+{
+ int ret = -1;
+ struct pollfd fds = {
+ .fd = fd,
+ .events = POLLIN,
+ };
+
+ while (true) {
+ ret = poll(&fds, 1, 10000);
+
+ if (ret == -1) {
+ if (errno == EINTR)
+ continue;
+
+ break;
+ }
+
+ if (ret > 0 && fds.revents & POLLIN) {
+ ret = 0;
+ break;
+ }
+ }
+
+ return ret;
+}
diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h
index 5a1305dd1f0b..82e59cdf16e7 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.h
+++ b/tools/testing/selftests/cgroup/cgroup_util.h
@@ -54,3 +54,5 @@ extern pid_t clone_into_cgroup(int cgroup_fd);
extern int clone_reap(pid_t pid, int options);
extern int clone_into_cgroup_run_wait(const char *cgroup);
extern int dirfd_open_opath(const char *dir);
+extern int cg_prepare_for_wait(const char *cgroup);
+extern int cg_wait_for(int fd);
diff --git a/tools/testing/selftests/cgroup/test_freezer.c b/tools/testing/selftests/cgroup/test_freezer.c
index 23d8fa4a3e4e..ff519029f6f4 100644
--- a/tools/testing/selftests/cgroup/test_freezer.c
+++ b/tools/testing/selftests/cgroup/test_freezer.c
@@ -7,9 +7,7 @@
#include <unistd.h>
#include <stdio.h>
#include <errno.h>
-#include <poll.h>
#include <stdlib.h>
-#include <sys/inotify.h>
#include <string.h>
#include <sys/wait.h>
@@ -55,61 +53,6 @@ static int cg_freeze_nowait(const char *cgroup, bool freeze)
}
/*
- * Prepare for waiting on cgroup.events file.
- */
-static int cg_prepare_for_wait(const char *cgroup)
-{
- int fd, ret = -1;
-
- fd = inotify_init1(0);
- if (fd == -1) {
- debug("Error: inotify_init1() failed\n");
- return fd;
- }
-
- ret = inotify_add_watch(fd, cg_control(cgroup, "cgroup.events"),
- IN_MODIFY);
- if (ret == -1) {
- debug("Error: inotify_add_watch() failed\n");
- close(fd);
- fd = -1;
- }
-
- return fd;
-}
-
-/*
- * Wait for an event. If there are no events for 10 seconds,
- * treat this an error.
- */
-static int cg_wait_for(int fd)
-{
- int ret = -1;
- struct pollfd fds = {
- .fd = fd,
- .events = POLLIN,
- };
-
- while (true) {
- ret = poll(&fds, 1, 10000);
-
- if (ret == -1) {
- if (errno == EINTR)
- continue;
- debug("Error: poll() failed\n");
- break;
- }
-
- if (ret > 0 && fds.revents & POLLIN) {
- ret = 0;
- break;
- }
- }
-
- return ret;
-}
-
-/*
* Attach a task to the given cgroup and wait for a cgroup frozen event.
* All transient events (e.g. populated) are ignored.
*/
diff --git a/tools/testing/selftests/cgroup/test_kill.c b/tools/testing/selftests/cgroup/test_kill.c
new file mode 100644
index 000000000000..6153690319c9
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_kill.c
@@ -0,0 +1,297 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <errno.h>
+#include <linux/limits.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "../kselftest.h"
+#include "../pidfd/pidfd.h"
+#include "cgroup_util.h"
+
+/*
+ * Kill the given cgroup and wait for the inotify signal.
+ * If there are no events in 10 seconds, treat this as an error.
+ * Then check that the cgroup is in the desired state.
+ */
+static int cg_kill_wait(const char *cgroup)
+{
+ int fd, ret = -1;
+
+ fd = cg_prepare_for_wait(cgroup);
+ if (fd < 0)
+ return fd;
+
+ ret = cg_write(cgroup, "cgroup.kill", "1");
+ if (ret)
+ goto out;
+
+ ret = cg_wait_for(fd);
+ if (ret)
+ goto out;
+
+out:
+ close(fd);
+ return ret;
+}
+
+/*
+ * A simple process running in a sleep loop until being
+ * re-parented.
+ */
+static int child_fn(const char *cgroup, void *arg)
+{
+ int ppid = getppid();
+
+ while (getppid() == ppid)
+ usleep(1000);
+
+ return getppid() == ppid;
+}
+
+static int test_cgkill_simple(const char *root)
+{
+ pid_t pids[100];
+ int ret = KSFT_FAIL;
+ char *cgroup = NULL;
+ int i;
+
+ cgroup = cg_name(root, "cg_test_simple");
+ if (!cgroup)
+ goto cleanup;
+
+ if (cg_create(cgroup))
+ goto cleanup;
+
+ for (i = 0; i < 100; i++)
+ pids[i] = cg_run_nowait(cgroup, child_fn, NULL);
+
+ if (cg_wait_for_proc_count(cgroup, 100))
+ goto cleanup;
+
+ if (cg_read_strcmp(cgroup, "cgroup.events", "populated 1\n"))
+ goto cleanup;
+
+ if (cg_kill_wait(cgroup))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ for (i = 0; i < 100; i++)
+ wait_for_pid(pids[i]);
+
+ if (ret == KSFT_PASS &&
+ cg_read_strcmp(cgroup, "cgroup.events", "populated 0\n"))
+ ret = KSFT_FAIL;
+
+ if (cgroup)
+ cg_destroy(cgroup);
+ free(cgroup);
+ return ret;
+}
+
+/*
+ * The test creates the following hierarchy:
+ * A
+ * / / \ \
+ * B E I K
+ * /\ |
+ * C D F
+ * |
+ * G
+ * |
+ * H
+ *
+ * with a process in C, H and 3 processes in K.
+ * Then it tries to kill the whole tree.
+ */
+static int test_cgkill_tree(const char *root)
+{
+ pid_t pids[5];
+ char *cgroup[10] = {0};
+ int ret = KSFT_FAIL;
+ int i;
+
+ cgroup[0] = cg_name(root, "cg_test_tree_A");
+ if (!cgroup[0])
+ goto cleanup;
+
+ cgroup[1] = cg_name(cgroup[0], "B");
+ if (!cgroup[1])
+ goto cleanup;
+
+ cgroup[2] = cg_name(cgroup[1], "C");
+ if (!cgroup[2])
+ goto cleanup;
+
+ cgroup[3] = cg_name(cgroup[1], "D");
+ if (!cgroup[3])
+ goto cleanup;
+
+ cgroup[4] = cg_name(cgroup[0], "E");
+ if (!cgroup[4])
+ goto cleanup;
+
+ cgroup[5] = cg_name(cgroup[4], "F");
+ if (!cgroup[5])
+ goto cleanup;
+
+ cgroup[6] = cg_name(cgroup[5], "G");
+ if (!cgroup[6])
+ goto cleanup;
+
+ cgroup[7] = cg_name(cgroup[6], "H");
+ if (!cgroup[7])
+ goto cleanup;
+
+ cgroup[8] = cg_name(cgroup[0], "I");
+ if (!cgroup[8])
+ goto cleanup;
+
+ cgroup[9] = cg_name(cgroup[0], "K");
+ if (!cgroup[9])
+ goto cleanup;
+
+ for (i = 0; i < 10; i++)
+ if (cg_create(cgroup[i]))
+ goto cleanup;
+
+ pids[0] = cg_run_nowait(cgroup[2], child_fn, NULL);
+ pids[1] = cg_run_nowait(cgroup[7], child_fn, NULL);
+ pids[2] = cg_run_nowait(cgroup[9], child_fn, NULL);
+ pids[3] = cg_run_nowait(cgroup[9], child_fn, NULL);
+ pids[4] = cg_run_nowait(cgroup[9], child_fn, NULL);
+
+ /*
+ * Wait until all child processes will enter
+ * corresponding cgroups.
+ */
+
+ if (cg_wait_for_proc_count(cgroup[2], 1) ||
+ cg_wait_for_proc_count(cgroup[7], 1) ||
+ cg_wait_for_proc_count(cgroup[9], 3))
+ goto cleanup;
+
+ /*
+ * Kill A and check that we get an empty notification.
+ */
+ if (cg_kill_wait(cgroup[0]))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ for (i = 0; i < 5; i++)
+ wait_for_pid(pids[i]);
+
+ if (ret == KSFT_PASS &&
+ cg_read_strcmp(cgroup[0], "cgroup.events", "populated 0\n"))
+ ret = KSFT_FAIL;
+
+ for (i = 9; i >= 0 && cgroup[i]; i--) {
+ cg_destroy(cgroup[i]);
+ free(cgroup[i]);
+ }
+
+ return ret;
+}
+
+static int forkbomb_fn(const char *cgroup, void *arg)
+{
+ int ppid;
+
+ fork();
+ fork();
+
+ ppid = getppid();
+
+ while (getppid() == ppid)
+ usleep(1000);
+
+ return getppid() == ppid;
+}
+
+/*
+ * The test runs a fork bomb in a cgroup and tries to kill it.
+ */
+static int test_cgkill_forkbomb(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *cgroup = NULL;
+ pid_t pid = -ESRCH;
+
+ cgroup = cg_name(root, "cg_forkbomb_test");
+ if (!cgroup)
+ goto cleanup;
+
+ if (cg_create(cgroup))
+ goto cleanup;
+
+ pid = cg_run_nowait(cgroup, forkbomb_fn, NULL);
+ if (pid < 0)
+ goto cleanup;
+
+ usleep(100000);
+
+ if (cg_kill_wait(cgroup))
+ goto cleanup;
+
+ if (cg_wait_for_proc_count(cgroup, 0))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (pid > 0)
+ wait_for_pid(pid);
+
+ if (ret == KSFT_PASS &&
+ cg_read_strcmp(cgroup, "cgroup.events", "populated 0\n"))
+ ret = KSFT_FAIL;
+
+ if (cgroup)
+ cg_destroy(cgroup);
+ free(cgroup);
+ return ret;
+}
+
+#define T(x) { x, #x }
+struct cgkill_test {
+ int (*fn)(const char *root);
+ const char *name;
+} tests[] = {
+ T(test_cgkill_simple),
+ T(test_cgkill_tree),
+ T(test_cgkill_forkbomb),
+};
+#undef T
+
+int main(int argc, char *argv[])
+{
+ char root[PATH_MAX];
+ int i, ret = EXIT_SUCCESS;
+
+ if (cg_find_unified_root(root, sizeof(root)))
+ ksft_exit_skip("cgroup v2 isn't mounted\n");
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ switch (tests[i].fn(root)) {
+ case KSFT_PASS:
+ ksft_test_result_pass("%s\n", tests[i].name);
+ break;
+ case KSFT_SKIP:
+ ksft_test_result_skip("%s\n", tests[i].name);
+ break;
+ default:
+ ret = EXIT_FAILURE;
+ ksft_test_result_fail("%s\n", tests[i].name);
+ break;
+ }
+ }
+
+ return ret;
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
index 4029833f7e27..160891dcb4bc 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
@@ -109,6 +109,9 @@ router_destroy()
__addr_add_del $rp1 del 192.0.2.2/24 2001:db8:1::2/64
tc qdisc del dev $rp2 clsact
+
+ ip link set dev $rp2 down
+ ip link set dev $rp1 down
}
setup_prepare()
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh
index 42d44e27802c..190c1b6b5365 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh
@@ -111,6 +111,9 @@ router_destroy()
__addr_add_del $rp1 del 192.0.2.2/24 2001:db8:1::2/64
tc qdisc del dev $rp2 clsact
+
+ ip link set dev $rp2 down
+ ip link set dev $rp1 down
}
setup_prepare()
diff --git a/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh
index 65f43a7ce9c9..1e9a4aff76a2 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh
@@ -7,6 +7,8 @@
PORT_NUM_NETIFS=0
+declare -a unsplit
+
port_setup_prepare()
{
:
@@ -20,12 +22,12 @@ port_cleanup()
devlink port unsplit $port
check_err $? "Did not unsplit $netdev"
done
+ unsplit=()
}
split_all_ports()
{
local should_fail=$1; shift
- local -a unsplit
# Loop over the splittable netdevs and create tuples of netdev along
# with its width. For example:
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh
index 5cbff8038f84..28a570006d4d 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh
@@ -93,7 +93,9 @@ switch_destroy()
lldptool -T -i $swp1 -V APP -d $(dscp_map 10) >/dev/null
lldpad_app_wait_del
+ ip link set dev $swp2 down
ip link set dev $swp2 nomaster
+ ip link set dev $swp1 down
ip link set dev $swp1 nomaster
ip link del dev br1
}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh
index 27de3d9ed08e..f4493ef9cca1 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh
@@ -29,37 +29,38 @@ cleanup()
get_prio_pg()
{
- __mlnx_qos -i $swp | sed -n '/^PFC/,/^[^[:space:]]/p' |
- grep buffer | sed 's/ \+/ /g' | cut -d' ' -f 2-
+ # Produces a string of numbers "<B0> <B1> ... <B7> ", where BX is number
+ # of buffer that priority X is mapped to.
+ dcb -j buffer show dev $swp |
+ jq -r '[.prio_buffer | .[] | tostring + " "] | add'
}
get_prio_pfc()
{
- __mlnx_qos -i $swp | sed -n '/^PFC/,/^[^[:space:]]/p' |
- grep enabled | sed 's/ \+/ /g' | cut -d' ' -f 2-
+ # Produces a string of numbers "<P0> <P1> ... <P7> ", where PX denotes
+ # whether priority X has PFC enabled (the value is 1) or disabled (0).
+ dcb -j pfc show dev $swp |
+ jq -r '[.prio_pfc | .[] | if . then "1 " else "0 " end] | add'
}
get_prio_tc()
{
- __mlnx_qos -i $swp | sed -n '/^tc/,$p' |
- awk '/^tc/ { TC = $2 }
- /priority:/ { PRIO[$2]=TC }
- END {
- for (i in PRIO)
- printf("%d ", PRIO[i])
- }'
+ # Produces a string of numbers "<T0> <T1> ... <T7> ", where TC is number
+ # of TC that priority X is mapped to.
+ dcb -j ets show dev $swp |
+ jq -r '[.prio_tc | .[] | tostring + " "] | add'
}
get_buf_size()
{
local idx=$1; shift
- __mlnx_qos -i $swp | grep Receive | sed 's/.*: //' | cut -d, -f $((idx + 1))
+ dcb -j buffer show dev $swp | jq ".buffer_size[$idx]"
}
get_tot_size()
{
- __mlnx_qos -i $swp | grep Receive | sed 's/.*total_size=//'
+ dcb -j buffer show dev $swp | jq '.total_size'
}
check_prio_pg()
@@ -121,18 +122,18 @@ test_dcb_ets()
{
RET=0
- __mlnx_qos -i $swp --prio_tc=0,2,4,6,1,3,5,7 > /dev/null
+ dcb ets set dev $swp prio-tc 0:0 1:2 2:4 3:6 4:1 5:3 6:5 7:7
check_prio_pg "0 2 4 6 1 3 5 7 "
check_prio_tc "0 2 4 6 1 3 5 7 "
check_prio_pfc "0 0 0 0 0 0 0 0 "
- __mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null
+ dcb ets set dev $swp prio-tc all:0
check_prio_pg "0 0 0 0 0 0 0 0 "
check_prio_tc "0 0 0 0 0 0 0 0 "
- __mlnx_qos -i $swp --prio2buffer=1,3,5,7,0,2,4,6 &> /dev/null
+ dcb buffer set dev $swp prio-buffer 0:1 1:3 2:5 3:7 4:0 5:2 6:4 7:6 2>/dev/null
check_fail $? "prio2buffer accepted in DCB mode"
log_test "Configuring headroom through ETS"
@@ -174,7 +175,7 @@ test_pfc()
{
RET=0
- __mlnx_qos -i $swp --prio_tc=0,0,0,0,0,1,2,3 > /dev/null
+ dcb ets set dev $swp prio-tc all:0 5:1 6:2 7:3
local buf0size=$(get_buf_size 0)
local buf1size=$(get_buf_size 1)
@@ -193,7 +194,7 @@ test_pfc()
RET=0
- __mlnx_qos -i $swp --pfc=0,0,0,0,0,1,1,1 --cable_len=0 > /dev/null
+ dcb pfc set dev $swp prio-pfc all:off 5:on 6:on 7:on delay 0
check_prio_pg "0 0 0 0 0 1 2 3 "
check_prio_pfc "0 0 0 0 0 1 1 1 "
@@ -210,7 +211,7 @@ test_pfc()
RET=0
- __mlnx_qos -i $swp --pfc=0,0,0,0,0,1,1,1 --cable_len=1000 > /dev/null
+ dcb pfc set dev $swp delay 1000
check_buf_size 0 "== $buf0size"
check_buf_size 1 "> $buf1size"
@@ -221,8 +222,8 @@ test_pfc()
RET=0
- __mlnx_qos -i $swp --pfc=0,0,0,0,0,0,0,0 --cable_len=0 > /dev/null
- __mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null
+ dcb pfc set dev $swp prio-pfc all:off delay 0
+ dcb ets set dev $swp prio-tc all:0
check_prio_pg "0 0 0 0 0 0 0 0 "
check_prio_tc "0 0 0 0 0 0 0 0 "
@@ -242,13 +243,13 @@ test_tc_priomap()
{
RET=0
- __mlnx_qos -i $swp --prio_tc=0,1,2,3,4,5,6,7 > /dev/null
+ dcb ets set dev $swp prio-tc 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
check_prio_pg "0 1 2 3 4 5 6 7 "
tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
check_prio_pg "0 0 0 0 0 0 0 0 "
- __mlnx_qos -i $swp --prio2buffer=1,3,5,7,0,2,4,6 > /dev/null
+ dcb buffer set dev $swp prio-buffer 0:1 1:3 2:5 3:7 4:0 5:2 6:4 7:6
check_prio_pg "1 3 5 7 0 2 4 6 "
tc qdisc delete dev $swp root
@@ -256,9 +257,9 @@ test_tc_priomap()
# Clean up.
tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
- __mlnx_qos -i $swp --prio2buffer=0,0,0,0,0,0,0,0 > /dev/null
+ dcb buffer set dev $swp prio-buffer all:0
tc qdisc delete dev $swp root
- __mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null
+ dcb ets set dev $swp prio-tc all:0
log_test "TC: priomap"
}
@@ -270,12 +271,12 @@ test_tc_sizes()
RET=0
- __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 &> /dev/null
+ dcb buffer set dev $swp buffer-size all:0 0:$size 2>/dev/null
check_fail $? "buffer_size should fail before qdisc is added"
tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
- __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null
+ dcb buffer set dev $swp buffer-size all:0 0:$size
check_err $? "buffer_size should pass after qdisc is added"
check_buf_size 0 "== $size" "set size: "
@@ -283,26 +284,26 @@ test_tc_sizes()
check_buf_size 0 "== $size" "set MTU: "
mtu_restore $swp
- __mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null
+ dcb buffer set dev $swp buffer-size all:0
# After replacing the qdisc for the same kind, buffer_size still has to
# work.
tc qdisc replace dev $swp root handle 1: bfifo limit 1M
- __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null
+ dcb buffer set dev $swp buffer-size all:0 0:$size
check_buf_size 0 "== $size" "post replace, set size: "
- __mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null
+ dcb buffer set dev $swp buffer-size all:0
# Likewise after replacing for a different kind.
tc qdisc replace dev $swp root handle 2: prio bands 8
- __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null
+ dcb buffer set dev $swp buffer-size all:0 0:$size
check_buf_size 0 "== $size" "post replace different kind, set size: "
tc qdisc delete dev $swp root
- __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 &> /dev/null
+ dcb buffer set dev $swp buffer-size all:0 0:$size 2>/dev/null
check_fail $? "buffer_size should fail after qdisc is deleted"
log_test "TC: buffer size"
@@ -363,10 +364,10 @@ test_tc_int_buf()
tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
test_int_buf "TC: "
- __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null
+ dcb buffer set dev $swp buffer-size all:0 0:$size
test_int_buf "TC+buffsize: "
- __mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null
+ dcb buffer set dev $swp buffer-size all:0
tc qdisc delete dev $swp root
}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh
index 0bf76f13c030..faa51012cdac 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh
@@ -82,17 +82,3 @@ bail_on_lldpad()
fi
fi
}
-
-__mlnx_qos()
-{
- local err
-
- mlnx_qos "$@" 2>/dev/null
- err=$?
-
- if ((err)); then
- echo "Error ($err) in mlnx_qos $@" >/dev/stderr
- fi
-
- return $err
-}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh
index 5c7700212f75..5d5622fc2758 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh
@@ -171,7 +171,7 @@ switch_create()
# assignment.
tc qdisc replace dev $swp1 root handle 1: \
ets bands 8 strict 8 priomap 7 6
- __mlnx_qos -i $swp1 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null
+ dcb buffer set dev $swp1 prio-buffer all:0 1:1
# $swp2
# -----
@@ -209,8 +209,8 @@ switch_create()
# the lossless prio into a buffer of its own. Don't bother with buffer
# sizes though, there is not going to be any pressure in the "backward"
# direction.
- __mlnx_qos -i $swp3 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null
- __mlnx_qos -i $swp3 --pfc=0,1,0,0,0,0,0,0 >/dev/null
+ dcb buffer set dev $swp3 prio-buffer all:0 1:1
+ dcb pfc set dev $swp3 prio-pfc all:off 1:on
# $swp4
# -----
@@ -226,11 +226,11 @@ switch_create()
# Configure qdisc so that we can hand-tune headroom.
tc qdisc replace dev $swp4 root handle 1: \
ets bands 8 strict 8 priomap 7 6
- __mlnx_qos -i $swp4 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null
- __mlnx_qos -i $swp4 --pfc=0,1,0,0,0,0,0,0 >/dev/null
+ dcb buffer set dev $swp4 prio-buffer all:0 1:1
+ dcb pfc set dev $swp4 prio-pfc all:off 1:on
# PG0 will get autoconfigured to Xoff, give PG1 arbitrarily 100K, which
# is (-2*MTU) about 80K of delay provision.
- __mlnx_qos -i $swp4 --buffer_size=0,$_100KB,0,0,0,0,0,0 >/dev/null
+ dcb buffer set dev $swp4 buffer-size all:0 1:$_100KB
# bridges
# -------
@@ -273,9 +273,9 @@ switch_destroy()
# $swp4
# -----
- __mlnx_qos -i $swp4 --buffer_size=0,0,0,0,0,0,0,0 >/dev/null
- __mlnx_qos -i $swp4 --pfc=0,0,0,0,0,0,0,0 >/dev/null
- __mlnx_qos -i $swp4 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null
+ dcb buffer set dev $swp4 buffer-size all:0
+ dcb pfc set dev $swp4 prio-pfc all:off
+ dcb buffer set dev $swp4 prio-buffer all:0
tc qdisc del dev $swp4 root
devlink_tc_bind_pool_th_restore $swp4 1 ingress
@@ -288,8 +288,8 @@ switch_destroy()
# $swp3
# -----
- __mlnx_qos -i $swp3 --pfc=0,0,0,0,0,0,0,0 >/dev/null
- __mlnx_qos -i $swp3 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null
+ dcb pfc set dev $swp3 prio-pfc all:off
+ dcb buffer set dev $swp3 prio-buffer all:0
tc qdisc del dev $swp3 root
devlink_tc_bind_pool_th_restore $swp3 1 egress
@@ -315,7 +315,7 @@ switch_destroy()
# $swp1
# -----
- __mlnx_qos -i $swp1 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null
+ dcb buffer set dev $swp1 prio-buffer all:0
tc qdisc del dev $swp1 root
devlink_tc_bind_pool_th_restore $swp1 1 ingress
diff --git a/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh
index e93878d42596..683759d29199 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh
@@ -68,7 +68,7 @@ wait_for_routes()
local t0=$1; shift
local route_count=$1; shift
- local t1=$(ip route | grep -o 'offload' | wc -l)
+ local t1=$(ip route | grep 'offload' | grep -v 'offload_failed' | wc -l)
local delta=$((t1 - t0))
echo $delta
[[ $delta -ge $route_count ]]
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh
index 093bed088ad0..373d5f2a846e 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh
@@ -234,15 +234,15 @@ __tc_sample_rate_test()
psample_capture_start
- ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+ ip vrf exec v$h1 $MZ $h1 -c 320000 -d 100usec -p 64 -A 192.0.2.1 \
-B $dip -t udp dp=52768,sp=42768 -q
psample_capture_stop
pkts=$(grep -e "group 1 " $CAPTURE_FILE | wc -l)
- pct=$((100 * (pkts - 100) / 100))
+ pct=$((100 * (pkts - 10000) / 10000))
(( -25 <= pct && pct <= 25))
- check_err $? "Expected 100 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%"
+ check_err $? "Expected 10000 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%"
log_test "tc sample rate ($desc)"
@@ -587,15 +587,15 @@ __tc_sample_acl_rate_test()
psample_capture_start
- ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+ ip vrf exec v$h1 $MZ $h1 -c 320000 -d 100usec -p 64 -A 192.0.2.1 \
-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
psample_capture_stop
pkts=$(grep -e "group 1 " $CAPTURE_FILE | wc -l)
- pct=$((100 * (pkts - 100) / 100))
+ pct=$((100 * (pkts - 10000) / 10000))
(( -25 <= pct && pct <= 25))
- check_err $? "Expected 100 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%"
+ check_err $? "Expected 10000 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%"
# Setup a filter that should not match any packet and make sure packets
# are not sampled.
diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
index 40909c254365..9de1d123f4f5 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
@@ -5,12 +5,13 @@ lib_dir=$(dirname $0)/../../../net/forwarding
ALL_TESTS="fw_flash_test params_test regions_test reload_test \
netns_reload_test resource_test dev_info_test \
- empty_reporter_test dummy_reporter_test"
+ empty_reporter_test dummy_reporter_test rate_test"
NUM_NETIFS=0
source $lib_dir/lib.sh
BUS_ADDR=10
PORT_COUNT=4
+VF_COUNT=4
DEV_NAME=netdevsim$BUS_ADDR
SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV_NAME/net/
DEBUGFS_DIR=/sys/kernel/debug/netdevsim/$DEV_NAME/
@@ -507,6 +508,170 @@ dummy_reporter_test()
log_test "dummy reporter test"
}
+rate_leafs_get()
+{
+ local handle=$1
+
+ cmd_jq "devlink port function rate show -j" \
+ '.[] | to_entries | .[] | select(.value.type == "leaf") | .key | select(contains("'$handle'"))'
+}
+
+rate_nodes_get()
+{
+ local handle=$1
+
+ cmd_jq "devlink port function rate show -j" \
+ '.[] | to_entries | .[] | select(.value.type == "node") | .key | select(contains("'$handle'"))'
+}
+
+rate_attr_set()
+{
+ local handle=$1
+ local name=$2
+ local value=$3
+ local units=$4
+
+ devlink port function rate set $handle $name $value$units
+}
+
+rate_attr_get()
+{
+ local handle=$1
+ local name=$2
+
+ cmd_jq "devlink port function rate show $handle -j" '.[][].'$name
+}
+
+rate_attr_tx_rate_check()
+{
+ local handle=$1
+ local name=$2
+ local rate=$3
+ local debug_file=$4
+
+ rate_attr_set $handle $name $rate mbit
+ check_err $? "Failed to set $name value"
+
+ local debug_value=$(cat $debug_file)
+ check_err $? "Failed to read $name value from debugfs"
+ [ "$debug_value" == "$rate" ]
+ check_err $? "Unexpected $name debug value $debug_value != $rate"
+
+ local api_value=$(( $(rate_attr_get $handle $name) * 8 / 1000000 ))
+ check_err $? "Failed to get $name attr value"
+ [ "$api_value" == "$rate" ]
+ check_err $? "Unexpected $name attr value $api_value != $rate"
+}
+
+rate_attr_parent_check()
+{
+ local handle=$1
+ local parent=$2
+ local debug_file=$3
+
+ rate_attr_set $handle parent $parent
+ check_err $? "Failed to set parent"
+
+ debug_value=$(cat $debug_file)
+ check_err $? "Failed to get parent debugfs value"
+ [ "$debug_value" == "$parent" ]
+ check_err $? "Unexpected parent debug value $debug_value != $parent"
+
+ api_value=$(rate_attr_get $r_obj parent)
+ check_err $? "Failed to get parent attr value"
+ [ "$api_value" == "$parent" ]
+ check_err $? "Unexpected parent attr value $api_value != $parent"
+}
+
+rate_node_add()
+{
+ local handle=$1
+
+ devlink port function rate add $handle
+}
+
+rate_node_del()
+{
+ local handle=$1
+
+ devlink port function rate del $handle
+}
+
+rate_test()
+{
+ RET=0
+
+ echo $VF_COUNT > /sys/bus/netdevsim/devices/$DEV_NAME/sriov_numvfs
+ devlink dev eswitch set $DL_HANDLE mode switchdev
+ local leafs=`rate_leafs_get $DL_HANDLE`
+ local num_leafs=`echo $leafs | wc -w`
+ [ "$num_leafs" == "$VF_COUNT" ]
+ check_err $? "Expected $VF_COUNT rate leafs but got $num_leafs"
+
+ rate=10
+ for r_obj in $leafs
+ do
+ rate_attr_tx_rate_check $r_obj tx_share $rate \
+ $DEBUGFS_DIR/ports/${r_obj##*/}/tx_share
+ rate=$(($rate+10))
+ done
+
+ rate=100
+ for r_obj in $leafs
+ do
+ rate_attr_tx_rate_check $r_obj tx_max $rate \
+ $DEBUGFS_DIR/ports/${r_obj##*/}/tx_max
+ rate=$(($rate+100))
+ done
+
+ local node1_name='group1'
+ local node1="$DL_HANDLE/$node1_name"
+ rate_node_add "$node1"
+ check_err $? "Failed to add node $node1"
+
+ local num_nodes=`rate_nodes_get $DL_HANDLE | wc -w`
+ [ $num_nodes == 1 ]
+ check_err $? "Expected 1 rate node in output but got $num_nodes"
+
+ local node_tx_share=10
+ rate_attr_tx_rate_check $node1 tx_share $node_tx_share \
+ $DEBUGFS_DIR/rate_nodes/${node1##*/}/tx_share
+
+ local node_tx_max=100
+ rate_attr_tx_rate_check $node1 tx_max $node_tx_max \
+ $DEBUGFS_DIR/rate_nodes/${node1##*/}/tx_max
+
+ rate_node_del "$node1"
+ check_err $? "Failed to delete node $node1"
+ local num_nodes=`rate_nodes_get $DL_HANDLE | wc -w`
+ [ $num_nodes == 0 ]
+ check_err $? "Expected 0 rate node but got $num_nodes"
+
+ local node1_name='group1'
+ local node1="$DL_HANDLE/$node1_name"
+ rate_node_add "$node1"
+ check_err $? "Failed to add node $node1"
+
+ rate_attr_parent_check $r_obj $node1_name \
+ $DEBUGFS_DIR/ports/${r_obj##*/}/rate_parent
+
+ local node2_name='group2'
+ local node2="$DL_HANDLE/$node2_name"
+ rate_node_add "$node2"
+ check_err $? "Failed to add node $node2"
+
+ rate_attr_parent_check $node2 $node1_name \
+ $DEBUGFS_DIR/rate_nodes/$node2_name/rate_parent
+ rate_node_del "$node2"
+ check_err $? "Failed to delete node $node2"
+ rate_attr_set "$r_obj" noparent
+ check_err $? "Failed to unset $r_obj parent node"
+ rate_node_del "$node1"
+ check_err $? "Failed to delete node $node1"
+
+ log_test "rate test"
+}
+
setup_prepare()
{
modprobe netdevsim
diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh
index da49ad2761b5..109900c817be 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh
@@ -24,13 +24,15 @@ ALL_TESTS="
NETDEVSIM_PATH=/sys/bus/netdevsim/
DEV_ADDR=1337
DEV=netdevsim${DEV_ADDR}
-DEVLINK_DEV=netdevsim/${DEV}
DEBUGFS_DIR=/sys/kernel/debug/netdevsim/$DEV/
SLEEP_TIME=1
NETDEV=""
NUM_NETIFS=0
source $lib_dir/lib.sh
+
+DEVLINK_DEV=
source $lib_dir/devlink_lib.sh
+DEVLINK_DEV=netdevsim/${DEV}
require_command udevadm
@@ -163,6 +165,16 @@ trap_stats_test()
devlink_trap_action_set $trap_name "drop"
devlink_trap_stats_idle_test $trap_name
check_err $? "Stats of trap $trap_name not idle when action is drop"
+
+ echo "y"> $DEBUGFS_DIR/fail_trap_drop_counter_get
+ devlink -s trap show $DEVLINK_DEV trap $trap_name &> /dev/null
+ check_fail $? "Managed to read trap (hard dropped) statistics when should not"
+ echo "n"> $DEBUGFS_DIR/fail_trap_drop_counter_get
+ devlink -s trap show $DEVLINK_DEV trap $trap_name &> /dev/null
+ check_err $? "Did not manage to read trap (hard dropped) statistics when should"
+
+ devlink_trap_drop_stats_idle_test $trap_name
+ check_fail $? "Drop stats of trap $trap_name idle when should not"
else
devlink_trap_stats_idle_test $trap_name
check_fail $? "Stats of non-drop trap $trap_name idle when should not"
diff --git a/tools/testing/selftests/drivers/net/netdevsim/fib.sh b/tools/testing/selftests/drivers/net/netdevsim/fib.sh
index 251f228ce63e..fc794cd30389 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/fib.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/fib.sh
@@ -33,13 +33,15 @@ ALL_TESTS="
NETDEVSIM_PATH=/sys/bus/netdevsim/
DEV_ADDR=1337
DEV=netdevsim${DEV_ADDR}
-DEVLINK_DEV=netdevsim/${DEV}
SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/
NUM_NETIFS=0
source $lib_dir/lib.sh
-source $lib_dir/devlink_lib.sh
source $lib_dir/fib_offload_lib.sh
+DEVLINK_DEV=
+source $lib_dir/devlink_lib.sh
+DEVLINK_DEV=netdevsim/${DEV}
+
ipv4_identical_routes()
{
fib_ipv4_identical_routes_test "testns1"
diff --git a/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh
index ba75c81cda91..e8e0dc088d6a 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh
@@ -44,12 +44,14 @@ ALL_TESTS="
NETDEVSIM_PATH=/sys/bus/netdevsim/
DEV_ADDR=1337
DEV=netdevsim${DEV_ADDR}
-DEVLINK_DEV=netdevsim/${DEV}
SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/
DEBUGFS_NET_DIR=/sys/kernel/debug/netdevsim/$DEV/
NUM_NETIFS=0
source $lib_dir/lib.sh
+
+DEVLINK_DEV=
source $lib_dir/devlink_lib.sh
+DEVLINK_DEV=netdevsim/${DEV}
nexthop_check()
{
diff --git a/tools/testing/selftests/drivers/net/netdevsim/psample.sh b/tools/testing/selftests/drivers/net/netdevsim/psample.sh
index ee10b1a8933c..e689ff7a0b12 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/psample.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/psample.sh
@@ -14,13 +14,15 @@ ALL_TESTS="
NETDEVSIM_PATH=/sys/bus/netdevsim/
DEV_ADDR=1337
DEV=netdevsim${DEV_ADDR}
-DEVLINK_DEV=netdevsim/${DEV}
SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/
PSAMPLE_DIR=/sys/kernel/debug/netdevsim/$DEV/psample/
CAPTURE_FILE=$(mktemp)
NUM_NETIFS=0
source $lib_dir/lib.sh
+
+DEVLINK_DEV=
source $lib_dir/devlink_lib.sh
+DEVLINK_DEV=netdevsim/${DEV}
# Available at https://github.com/Mellanox/libpsample
require_command psample
diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile
index cf69b2fcce59..dd61118df66e 100644
--- a/tools/testing/selftests/exec/Makefile
+++ b/tools/testing/selftests/exec/Makefile
@@ -28,8 +28,8 @@ $(OUTPUT)/execveat.denatured: $(OUTPUT)/execveat
cp $< $@
chmod -x $@
$(OUTPUT)/load_address_4096: load_address.c
- $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000 -pie $< -o $@
+ $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000 -pie -static $< -o $@
$(OUTPUT)/load_address_2097152: load_address.c
- $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x200000 -pie $< -o $@
+ $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x200000 -pie -static $< -o $@
$(OUTPUT)/load_address_16777216: load_address.c
- $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000000 -pie $< -o $@
+ $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000000 -pie -static $< -o $@
diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore
index 0efcd494daab..0e78b49d0f2f 100644
--- a/tools/testing/selftests/futex/functional/.gitignore
+++ b/tools/testing/selftests/futex/functional/.gitignore
@@ -6,3 +6,5 @@ futex_wait_private_mapped_file
futex_wait_timeout
futex_wait_uninitialized_heap
futex_wait_wouldblock
+futex_wait
+futex_requeue
diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile
index 23207829ec75..bd1fec59e010 100644
--- a/tools/testing/selftests/futex/functional/Makefile
+++ b/tools/testing/selftests/futex/functional/Makefile
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
-INCLUDES := -I../include -I../../
+INCLUDES := -I../include -I../../ -I../../../../../usr/include/ \
+ -I$(KBUILD_OUTPUT)/kselftest/usr/include
CFLAGS := $(CFLAGS) -g -O2 -Wall -D_GNU_SOURCE -pthread $(INCLUDES)
LDLIBS := -lpthread -lrt
@@ -14,7 +15,9 @@ TEST_GEN_FILES := \
futex_requeue_pi_signal_restart \
futex_requeue_pi_mismatched_ops \
futex_wait_uninitialized_heap \
- futex_wait_private_mapped_file
+ futex_wait_private_mapped_file \
+ futex_wait \
+ futex_requeue
TEST_PROGS := run.sh
diff --git a/tools/testing/selftests/futex/functional/futex_requeue.c b/tools/testing/selftests/futex/functional/futex_requeue.c
new file mode 100644
index 000000000000..51485be6eb2f
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_requeue.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright Collabora Ltd., 2021
+ *
+ * futex cmp requeue test by André Almeida <andrealmeid@collabora.com>
+ */
+
+#include <pthread.h>
+#include <limits.h>
+#include "logging.h"
+#include "futextest.h"
+
+#define TEST_NAME "futex-requeue"
+#define timeout_ns 30000000
+#define WAKE_WAIT_US 10000
+
+volatile futex_t *f1;
+
+void usage(char *prog)
+{
+ printf("Usage: %s\n", prog);
+ printf(" -c Use color\n");
+ printf(" -h Display this help message\n");
+ printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+ VQUIET, VCRITICAL, VINFO);
+}
+
+void *waiterfn(void *arg)
+{
+ struct timespec to;
+
+ to.tv_sec = 0;
+ to.tv_nsec = timeout_ns;
+
+ if (futex_wait(f1, *f1, &to, 0))
+ printf("waiter failed errno %d\n", errno);
+
+ return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+ pthread_t waiter[10];
+ int res, ret = RET_PASS;
+ int c, i;
+ volatile futex_t _f1 = 0;
+ volatile futex_t f2 = 0;
+
+ f1 = &_f1;
+
+ while ((c = getopt(argc, argv, "cht:v:")) != -1) {
+ switch (c) {
+ case 'c':
+ log_color(1);
+ break;
+ case 'h':
+ usage(basename(argv[0]));
+ exit(0);
+ case 'v':
+ log_verbosity(atoi(optarg));
+ break;
+ default:
+ usage(basename(argv[0]));
+ exit(1);
+ }
+ }
+
+ ksft_print_header();
+ ksft_set_plan(2);
+ ksft_print_msg("%s: Test futex_requeue\n",
+ basename(argv[0]));
+
+ /*
+ * Requeue a waiter from f1 to f2, and wake f2.
+ */
+ if (pthread_create(&waiter[0], NULL, waiterfn, NULL))
+ error("pthread_create failed\n", errno);
+
+ usleep(WAKE_WAIT_US);
+
+ info("Requeuing 1 futex from f1 to f2\n");
+ res = futex_cmp_requeue(f1, 0, &f2, 0, 1, 0);
+ if (res != 1) {
+ ksft_test_result_fail("futex_requeue simple returned: %d %s\n",
+ res ? errno : res,
+ res ? strerror(errno) : "");
+ ret = RET_FAIL;
+ }
+
+
+ info("Waking 1 futex at f2\n");
+ res = futex_wake(&f2, 1, 0);
+ if (res != 1) {
+ ksft_test_result_fail("futex_requeue simple returned: %d %s\n",
+ res ? errno : res,
+ res ? strerror(errno) : "");
+ ret = RET_FAIL;
+ } else {
+ ksft_test_result_pass("futex_requeue simple succeeds\n");
+ }
+
+
+ /*
+ * Create 10 waiters at f1. At futex_requeue, wake 3 and requeue 7.
+ * At futex_wake, wake INT_MAX (should be exactly 7).
+ */
+ for (i = 0; i < 10; i++) {
+ if (pthread_create(&waiter[i], NULL, waiterfn, NULL))
+ error("pthread_create failed\n", errno);
+ }
+
+ usleep(WAKE_WAIT_US);
+
+ info("Waking 3 futexes at f1 and requeuing 7 futexes from f1 to f2\n");
+ res = futex_cmp_requeue(f1, 0, &f2, 3, 7, 0);
+ if (res != 10) {
+ ksft_test_result_fail("futex_requeue many returned: %d %s\n",
+ res ? errno : res,
+ res ? strerror(errno) : "");
+ ret = RET_FAIL;
+ }
+
+ info("Waking INT_MAX futexes at f2\n");
+ res = futex_wake(&f2, INT_MAX, 0);
+ if (res != 7) {
+ ksft_test_result_fail("futex_requeue many returned: %d %s\n",
+ res ? errno : res,
+ res ? strerror(errno) : "");
+ ret = RET_FAIL;
+ } else {
+ ksft_test_result_pass("futex_requeue many succeeds\n");
+ }
+
+ ksft_print_cnts();
+ return ret;
+}
diff --git a/tools/testing/selftests/futex/functional/futex_wait.c b/tools/testing/selftests/futex/functional/futex_wait.c
new file mode 100644
index 000000000000..685140d9b93d
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_wait.c
@@ -0,0 +1,171 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright Collabora Ltd., 2021
+ *
+ * futex cmp requeue test by André Almeida <andrealmeid@collabora.com>
+ */
+
+#include <pthread.h>
+#include <sys/shm.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include "logging.h"
+#include "futextest.h"
+
+#define TEST_NAME "futex-wait"
+#define timeout_ns 30000000
+#define WAKE_WAIT_US 10000
+#define SHM_PATH "futex_shm_file"
+
+void *futex;
+
+void usage(char *prog)
+{
+ printf("Usage: %s\n", prog);
+ printf(" -c Use color\n");
+ printf(" -h Display this help message\n");
+ printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+ VQUIET, VCRITICAL, VINFO);
+}
+
+static void *waiterfn(void *arg)
+{
+ struct timespec to;
+ unsigned int flags = 0;
+
+ if (arg)
+ flags = *((unsigned int *) arg);
+
+ to.tv_sec = 0;
+ to.tv_nsec = timeout_ns;
+
+ if (futex_wait(futex, 0, &to, flags))
+ printf("waiter failed errno %d\n", errno);
+
+ return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+ int res, ret = RET_PASS, fd, c, shm_id;
+ u_int32_t f_private = 0, *shared_data;
+ unsigned int flags = FUTEX_PRIVATE_FLAG;
+ pthread_t waiter;
+ void *shm;
+
+ futex = &f_private;
+
+ while ((c = getopt(argc, argv, "cht:v:")) != -1) {
+ switch (c) {
+ case 'c':
+ log_color(1);
+ break;
+ case 'h':
+ usage(basename(argv[0]));
+ exit(0);
+ case 'v':
+ log_verbosity(atoi(optarg));
+ break;
+ default:
+ usage(basename(argv[0]));
+ exit(1);
+ }
+ }
+
+ ksft_print_header();
+ ksft_set_plan(3);
+ ksft_print_msg("%s: Test futex_wait\n", basename(argv[0]));
+
+ /* Testing a private futex */
+ info("Calling private futex_wait on futex: %p\n", futex);
+ if (pthread_create(&waiter, NULL, waiterfn, (void *) &flags))
+ error("pthread_create failed\n", errno);
+
+ usleep(WAKE_WAIT_US);
+
+ info("Calling private futex_wake on futex: %p\n", futex);
+ res = futex_wake(futex, 1, FUTEX_PRIVATE_FLAG);
+ if (res != 1) {
+ ksft_test_result_fail("futex_wake private returned: %d %s\n",
+ errno, strerror(errno));
+ ret = RET_FAIL;
+ } else {
+ ksft_test_result_pass("futex_wake private succeeds\n");
+ }
+
+ /* Testing an anon page shared memory */
+ shm_id = shmget(IPC_PRIVATE, 4096, IPC_CREAT | 0666);
+ if (shm_id < 0) {
+ perror("shmget");
+ exit(1);
+ }
+
+ shared_data = shmat(shm_id, NULL, 0);
+
+ *shared_data = 0;
+ futex = shared_data;
+
+ info("Calling shared (page anon) futex_wait on futex: %p\n", futex);
+ if (pthread_create(&waiter, NULL, waiterfn, NULL))
+ error("pthread_create failed\n", errno);
+
+ usleep(WAKE_WAIT_US);
+
+ info("Calling shared (page anon) futex_wake on futex: %p\n", futex);
+ res = futex_wake(futex, 1, 0);
+ if (res != 1) {
+ ksft_test_result_fail("futex_wake shared (page anon) returned: %d %s\n",
+ errno, strerror(errno));
+ ret = RET_FAIL;
+ } else {
+ ksft_test_result_pass("futex_wake shared (page anon) succeeds\n");
+ }
+
+
+ /* Testing a file backed shared memory */
+ fd = open(SHM_PATH, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
+ if (fd < 0) {
+ perror("open");
+ exit(1);
+ }
+
+ if (ftruncate(fd, sizeof(f_private))) {
+ perror("ftruncate");
+ exit(1);
+ }
+
+ shm = mmap(NULL, sizeof(f_private), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if (shm == MAP_FAILED) {
+ perror("mmap");
+ exit(1);
+ }
+
+ memcpy(shm, &f_private, sizeof(f_private));
+
+ futex = shm;
+
+ info("Calling shared (file backed) futex_wait on futex: %p\n", futex);
+ if (pthread_create(&waiter, NULL, waiterfn, NULL))
+ error("pthread_create failed\n", errno);
+
+ usleep(WAKE_WAIT_US);
+
+ info("Calling shared (file backed) futex_wake on futex: %p\n", futex);
+ res = futex_wake(shm, 1, 0);
+ if (res != 1) {
+ ksft_test_result_fail("futex_wake shared (file backed) returned: %d %s\n",
+ errno, strerror(errno));
+ ret = RET_FAIL;
+ } else {
+ ksft_test_result_pass("futex_wake shared (file backed) succeeds\n");
+ }
+
+ /* Freeing resources */
+ shmdt(shared_data);
+ munmap(shm, sizeof(f_private));
+ remove(SHM_PATH);
+ close(fd);
+
+ ksft_print_cnts();
+ return ret;
+}
diff --git a/tools/testing/selftests/futex/functional/futex_wait_timeout.c b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
index ee55e6d389a3..1f8f6daaf1e7 100644
--- a/tools/testing/selftests/futex/functional/futex_wait_timeout.c
+++ b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
@@ -11,21 +11,18 @@
*
* HISTORY
* 2009-Nov-6: Initial version by Darren Hart <dvhart@linux.intel.com>
+ * 2021-Apr-26: More test cases by André Almeida <andrealmeid@collabora.com>
*
*****************************************************************************/
-#include <errno.h>
-#include <getopt.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
+#include <pthread.h>
#include "futextest.h"
#include "logging.h"
#define TEST_NAME "futex-wait-timeout"
static long timeout_ns = 100000; /* 100us default timeout */
+static futex_t futex_pi;
void usage(char *prog)
{
@@ -37,11 +34,67 @@ void usage(char *prog)
VQUIET, VCRITICAL, VINFO);
}
+/*
+ * Get a PI lock and hold it forever, so the main thread lock_pi will block
+ * and we can test the timeout
+ */
+void *get_pi_lock(void *arg)
+{
+ int ret;
+ volatile futex_t lock = 0;
+
+ ret = futex_lock_pi(&futex_pi, NULL, 0, 0);
+ if (ret != 0)
+ error("futex_lock_pi failed\n", ret);
+
+ /* Blocks forever */
+ ret = futex_wait(&lock, 0, NULL, 0);
+ error("futex_wait failed\n", ret);
+
+ return NULL;
+}
+
+/*
+ * Check if the function returned the expected error
+ */
+static void test_timeout(int res, int *ret, char *test_name, int err)
+{
+ if (!res || errno != err) {
+ ksft_test_result_fail("%s returned %d\n", test_name,
+ res < 0 ? errno : res);
+ *ret = RET_FAIL;
+ } else {
+ ksft_test_result_pass("%s succeeds\n", test_name);
+ }
+}
+
+/*
+ * Calculate absolute timeout and correct overflow
+ */
+static int futex_get_abs_timeout(clockid_t clockid, struct timespec *to,
+ long timeout_ns)
+{
+ if (clock_gettime(clockid, to)) {
+ error("clock_gettime failed\n", errno);
+ return errno;
+ }
+
+ to->tv_nsec += timeout_ns;
+
+ if (to->tv_nsec >= 1000000000) {
+ to->tv_sec++;
+ to->tv_nsec -= 1000000000;
+ }
+
+ return 0;
+}
+
int main(int argc, char *argv[])
{
futex_t f1 = FUTEX_INITIALIZER;
- struct timespec to;
int res, ret = RET_PASS;
+ struct timespec to;
+ pthread_t thread;
int c;
while ((c = getopt(argc, argv, "cht:v:")) != -1) {
@@ -65,22 +118,63 @@ int main(int argc, char *argv[])
}
ksft_print_header();
- ksft_set_plan(1);
+ ksft_set_plan(7);
ksft_print_msg("%s: Block on a futex and wait for timeout\n",
basename(argv[0]));
ksft_print_msg("\tArguments: timeout=%ldns\n", timeout_ns);
- /* initialize timeout */
+ pthread_create(&thread, NULL, get_pi_lock, NULL);
+
+ /* initialize relative timeout */
to.tv_sec = 0;
to.tv_nsec = timeout_ns;
- info("Calling futex_wait on f1: %u @ %p\n", f1, &f1);
- res = futex_wait(&f1, f1, &to, FUTEX_PRIVATE_FLAG);
- if (!res || errno != ETIMEDOUT) {
- fail("futex_wait returned %d\n", ret < 0 ? errno : ret);
- ret = RET_FAIL;
- }
+ res = futex_wait(&f1, f1, &to, 0);
+ test_timeout(res, &ret, "futex_wait relative", ETIMEDOUT);
+
+ /* FUTEX_WAIT_BITSET with CLOCK_REALTIME */
+ if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns))
+ return RET_FAIL;
+ res = futex_wait_bitset(&f1, f1, &to, 1, FUTEX_CLOCK_REALTIME);
+ test_timeout(res, &ret, "futex_wait_bitset realtime", ETIMEDOUT);
+
+ /* FUTEX_WAIT_BITSET with CLOCK_MONOTONIC */
+ if (futex_get_abs_timeout(CLOCK_MONOTONIC, &to, timeout_ns))
+ return RET_FAIL;
+ res = futex_wait_bitset(&f1, f1, &to, 1, 0);
+ test_timeout(res, &ret, "futex_wait_bitset monotonic", ETIMEDOUT);
+
+ /* FUTEX_WAIT_REQUEUE_PI with CLOCK_REALTIME */
+ if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns))
+ return RET_FAIL;
+ res = futex_wait_requeue_pi(&f1, f1, &futex_pi, &to, FUTEX_CLOCK_REALTIME);
+ test_timeout(res, &ret, "futex_wait_requeue_pi realtime", ETIMEDOUT);
+
+ /* FUTEX_WAIT_REQUEUE_PI with CLOCK_MONOTONIC */
+ if (futex_get_abs_timeout(CLOCK_MONOTONIC, &to, timeout_ns))
+ return RET_FAIL;
+ res = futex_wait_requeue_pi(&f1, f1, &futex_pi, &to, 0);
+ test_timeout(res, &ret, "futex_wait_requeue_pi monotonic", ETIMEDOUT);
+
+ /*
+ * FUTEX_LOCK_PI with CLOCK_REALTIME
+ * Due to historical reasons, FUTEX_LOCK_PI supports only realtime
+ * clock, but requires the caller to not set CLOCK_REALTIME flag.
+ *
+ * If you call FUTEX_LOCK_PI with a monotonic clock, it'll be
+ * interpreted as a realtime clock, and (unless you mess your machine's
+ * time or your time machine) the monotonic clock value is always
+ * smaller than realtime and the syscall will timeout immediately.
+ */
+ if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns))
+ return RET_FAIL;
+ res = futex_lock_pi(&futex_pi, &to, 0, 0);
+ test_timeout(res, &ret, "futex_lock_pi realtime", ETIMEDOUT);
+
+ /* Test operations that don't support FUTEX_CLOCK_REALTIME */
+ res = futex_lock_pi(&futex_pi, NULL, 0, FUTEX_CLOCK_REALTIME);
+ test_timeout(res, &ret, "futex_lock_pi invalid timeout flag", ENOSYS);
- print_result(TEST_NAME, ret);
+ ksft_print_cnts();
return ret;
}
diff --git a/tools/testing/selftests/futex/functional/run.sh b/tools/testing/selftests/futex/functional/run.sh
index 1acb6ace1680..11a9d62290f5 100755
--- a/tools/testing/selftests/futex/functional/run.sh
+++ b/tools/testing/selftests/futex/functional/run.sh
@@ -73,3 +73,9 @@ echo
echo
./futex_wait_uninitialized_heap $COLOR
./futex_wait_private_mapped_file $COLOR
+
+echo
+./futex_wait $COLOR
+
+echo
+./futex_requeue $COLOR
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index bd83158e0e0b..06a351b4f93b 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
+/aarch64/debug-exceptions
/aarch64/get-reg-list
-/aarch64/get-reg-list-sve
/aarch64/vgic_init
/s390x/memop
/s390x/resets
@@ -8,12 +8,15 @@
/x86_64/cr4_cpuid_sync_test
/x86_64/debug_regs
/x86_64/evmcs_test
+/x86_64/emulator_error_test
/x86_64/get_cpuid_test
/x86_64/get_msr_index_features
/x86_64/kvm_pv_test
/x86_64/hyperv_clock
/x86_64/hyperv_cpuid
+/x86_64/hyperv_features
/x86_64/mmio_warning_test
+/x86_64/mmu_role_test
/x86_64/platform_info_test
/x86_64/set_boot_cpu_id
/x86_64/set_sregs_test
@@ -29,6 +32,7 @@
/x86_64/vmx_preemption_timer_test
/x86_64/vmx_set_nested_state_test
/x86_64/vmx_tsc_adjust_test
+/x86_64/vmx_nested_tsc_scaling_test
/x86_64/xapic_ipi_test
/x86_64/xen_shinfo_test
/x86_64/xen_vmcall_test
@@ -41,5 +45,7 @@
/kvm_create_max_vcpus
/kvm_page_table_test
/memslot_modification_stress_test
+/memslot_perf_test
/set_memory_region_test
/steal_time
+/kvm_binary_stats_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index e439d027939d..b853be2ae3c6 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -33,19 +33,22 @@ ifeq ($(ARCH),s390)
UNAME_M := s390x
endif
-LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c lib/perf_test_util.c
-LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S
-LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c
+LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/rbtree.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c lib/perf_test_util.c
+LIBKVM_x86_64 = lib/x86_64/apic.c lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S
+LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c lib/aarch64/handlers.S
LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c
TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test
TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features
TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
+TEST_GEN_PROGS_x86_64 += x86_64/emulator_error_test
TEST_GEN_PROGS_x86_64 += x86_64/get_cpuid_test
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
+TEST_GEN_PROGS_x86_64 += x86_64/hyperv_features
TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test
TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test
+TEST_GEN_PROGS_x86_64 += x86_64/mmu_role_test
TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test
TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id
TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test
@@ -60,6 +63,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
+TEST_GEN_PROGS_x86_64 += x86_64/vmx_nested_tsc_scaling_test
TEST_GEN_PROGS_x86_64 += x86_64/xapic_ipi_test
TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test
TEST_GEN_PROGS_x86_64 += x86_64/debug_regs
@@ -74,11 +78,13 @@ TEST_GEN_PROGS_x86_64 += hardware_disable_test
TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus
TEST_GEN_PROGS_x86_64 += kvm_page_table_test
TEST_GEN_PROGS_x86_64 += memslot_modification_stress_test
+TEST_GEN_PROGS_x86_64 += memslot_perf_test
TEST_GEN_PROGS_x86_64 += set_memory_region_test
TEST_GEN_PROGS_x86_64 += steal_time
+TEST_GEN_PROGS_x86_64 += kvm_binary_stats_test
+TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions
TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list
-TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list-sve
TEST_GEN_PROGS_aarch64 += aarch64/vgic_init
TEST_GEN_PROGS_aarch64 += demand_paging_test
TEST_GEN_PROGS_aarch64 += dirty_log_test
@@ -87,6 +93,7 @@ TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus
TEST_GEN_PROGS_aarch64 += kvm_page_table_test
TEST_GEN_PROGS_aarch64 += set_memory_region_test
TEST_GEN_PROGS_aarch64 += steal_time
+TEST_GEN_PROGS_aarch64 += kvm_binary_stats_test
TEST_GEN_PROGS_s390x = s390x/memop
TEST_GEN_PROGS_s390x += s390x/resets
@@ -96,6 +103,7 @@ TEST_GEN_PROGS_s390x += dirty_log_test
TEST_GEN_PROGS_s390x += kvm_create_max_vcpus
TEST_GEN_PROGS_s390x += kvm_page_table_test
TEST_GEN_PROGS_s390x += set_memory_region_test
+TEST_GEN_PROGS_s390x += kvm_binary_stats_test
TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M))
LIBKVM += $(LIBKVM_$(UNAME_M))
diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
new file mode 100644
index 000000000000..e5e6c92b60da
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
@@ -0,0 +1,250 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_util.h>
+#include <kvm_util.h>
+#include <processor.h>
+
+#define VCPU_ID 0
+
+#define MDSCR_KDE (1 << 13)
+#define MDSCR_MDE (1 << 15)
+#define MDSCR_SS (1 << 0)
+
+#define DBGBCR_LEN8 (0xff << 5)
+#define DBGBCR_EXEC (0x0 << 3)
+#define DBGBCR_EL1 (0x1 << 1)
+#define DBGBCR_E (0x1 << 0)
+
+#define DBGWCR_LEN8 (0xff << 5)
+#define DBGWCR_RD (0x1 << 3)
+#define DBGWCR_WR (0x2 << 3)
+#define DBGWCR_EL1 (0x1 << 1)
+#define DBGWCR_E (0x1 << 0)
+
+#define SPSR_D (1 << 9)
+#define SPSR_SS (1 << 21)
+
+extern unsigned char sw_bp, hw_bp, bp_svc, bp_brk, hw_wp, ss_start;
+static volatile uint64_t sw_bp_addr, hw_bp_addr;
+static volatile uint64_t wp_addr, wp_data_addr;
+static volatile uint64_t svc_addr;
+static volatile uint64_t ss_addr[4], ss_idx;
+#define PC(v) ((uint64_t)&(v))
+
+static void reset_debug_state(void)
+{
+ asm volatile("msr daifset, #8");
+
+ write_sysreg(osdlr_el1, 0);
+ write_sysreg(oslar_el1, 0);
+ isb();
+
+ write_sysreg(mdscr_el1, 0);
+ /* This test only uses the first bp and wp slot. */
+ write_sysreg(dbgbvr0_el1, 0);
+ write_sysreg(dbgbcr0_el1, 0);
+ write_sysreg(dbgwcr0_el1, 0);
+ write_sysreg(dbgwvr0_el1, 0);
+ isb();
+}
+
+static void install_wp(uint64_t addr)
+{
+ uint32_t wcr;
+ uint32_t mdscr;
+
+ wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E;
+ write_sysreg(dbgwcr0_el1, wcr);
+ write_sysreg(dbgwvr0_el1, addr);
+ isb();
+
+ asm volatile("msr daifclr, #8");
+
+ mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_MDE;
+ write_sysreg(mdscr_el1, mdscr);
+ isb();
+}
+
+static void install_hw_bp(uint64_t addr)
+{
+ uint32_t bcr;
+ uint32_t mdscr;
+
+ bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E;
+ write_sysreg(dbgbcr0_el1, bcr);
+ write_sysreg(dbgbvr0_el1, addr);
+ isb();
+
+ asm volatile("msr daifclr, #8");
+
+ mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_MDE;
+ write_sysreg(mdscr_el1, mdscr);
+ isb();
+}
+
+static void install_ss(void)
+{
+ uint32_t mdscr;
+
+ asm volatile("msr daifclr, #8");
+
+ mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_SS;
+ write_sysreg(mdscr_el1, mdscr);
+ isb();
+}
+
+static volatile char write_data;
+
+static void guest_code(void)
+{
+ GUEST_SYNC(0);
+
+ /* Software-breakpoint */
+ asm volatile("sw_bp: brk #0");
+ GUEST_ASSERT_EQ(sw_bp_addr, PC(sw_bp));
+
+ GUEST_SYNC(1);
+
+ /* Hardware-breakpoint */
+ reset_debug_state();
+ install_hw_bp(PC(hw_bp));
+ asm volatile("hw_bp: nop");
+ GUEST_ASSERT_EQ(hw_bp_addr, PC(hw_bp));
+
+ GUEST_SYNC(2);
+
+ /* Hardware-breakpoint + svc */
+ reset_debug_state();
+ install_hw_bp(PC(bp_svc));
+ asm volatile("bp_svc: svc #0");
+ GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_svc));
+ GUEST_ASSERT_EQ(svc_addr, PC(bp_svc) + 4);
+
+ GUEST_SYNC(3);
+
+ /* Hardware-breakpoint + software-breakpoint */
+ reset_debug_state();
+ install_hw_bp(PC(bp_brk));
+ asm volatile("bp_brk: brk #0");
+ GUEST_ASSERT_EQ(sw_bp_addr, PC(bp_brk));
+ GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_brk));
+
+ GUEST_SYNC(4);
+
+ /* Watchpoint */
+ reset_debug_state();
+ install_wp(PC(write_data));
+ write_data = 'x';
+ GUEST_ASSERT_EQ(write_data, 'x');
+ GUEST_ASSERT_EQ(wp_data_addr, PC(write_data));
+
+ GUEST_SYNC(5);
+
+ /* Single-step */
+ reset_debug_state();
+ install_ss();
+ ss_idx = 0;
+ asm volatile("ss_start:\n"
+ "mrs x0, esr_el1\n"
+ "add x0, x0, #1\n"
+ "msr daifset, #8\n"
+ : : : "x0");
+ GUEST_ASSERT_EQ(ss_addr[0], PC(ss_start));
+ GUEST_ASSERT_EQ(ss_addr[1], PC(ss_start) + 4);
+ GUEST_ASSERT_EQ(ss_addr[2], PC(ss_start) + 8);
+
+ GUEST_DONE();
+}
+
+static void guest_sw_bp_handler(struct ex_regs *regs)
+{
+ sw_bp_addr = regs->pc;
+ regs->pc += 4;
+}
+
+static void guest_hw_bp_handler(struct ex_regs *regs)
+{
+ hw_bp_addr = regs->pc;
+ regs->pstate |= SPSR_D;
+}
+
+static void guest_wp_handler(struct ex_regs *regs)
+{
+ wp_data_addr = read_sysreg(far_el1);
+ wp_addr = regs->pc;
+ regs->pstate |= SPSR_D;
+}
+
+static void guest_ss_handler(struct ex_regs *regs)
+{
+ GUEST_ASSERT_1(ss_idx < 4, ss_idx);
+ ss_addr[ss_idx++] = regs->pc;
+ regs->pstate |= SPSR_SS;
+}
+
+static void guest_svc_handler(struct ex_regs *regs)
+{
+ svc_addr = regs->pc;
+}
+
+static int debug_version(struct kvm_vm *vm)
+{
+ uint64_t id_aa64dfr0;
+
+ get_reg(vm, VCPU_ID, ARM64_SYS_REG(ID_AA64DFR0_EL1), &id_aa64dfr0);
+ return id_aa64dfr0 & 0xf;
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vm *vm;
+ struct ucall uc;
+ int stage;
+
+ vm = vm_create_default(VCPU_ID, 0, guest_code);
+ ucall_init(vm, NULL);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vm, VCPU_ID);
+
+ if (debug_version(vm) < 6) {
+ print_skip("Armv8 debug architecture not supported.");
+ kvm_vm_free(vm);
+ exit(KSFT_SKIP);
+ }
+
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+ ESR_EC_BRK_INS, guest_sw_bp_handler);
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+ ESR_EC_HW_BP_CURRENT, guest_hw_bp_handler);
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+ ESR_EC_WP_CURRENT, guest_wp_handler);
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+ ESR_EC_SSTEP_CURRENT, guest_ss_handler);
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+ ESR_EC_SVC64, guest_svc_handler);
+
+ for (stage = 0; stage < 7; stage++) {
+ vcpu_run(vm, VCPU_ID);
+
+ switch (get_ucall(vm, VCPU_ID, &uc)) {
+ case UCALL_SYNC:
+ TEST_ASSERT(uc.args[1] == stage,
+ "Stage %d: Unexpected sync ucall, got %lx",
+ stage, (ulong)uc.args[1]);
+ break;
+ case UCALL_ABORT:
+ TEST_FAIL("%s at %s:%ld\n\tvalues: %#lx, %#lx",
+ (const char *)uc.args[0],
+ __FILE__, uc.args[1], uc.args[2], uc.args[3]);
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+
+done:
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c b/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c
deleted file mode 100644
index efba76682b4b..000000000000
--- a/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c
+++ /dev/null
@@ -1,3 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#define REG_LIST_SVE
-#include "get-reg-list.c"
diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
index 486932164cf2..a16c8f05366c 100644
--- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c
+++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
@@ -27,17 +27,37 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
#include "kvm_util.h"
#include "test_util.h"
#include "processor.h"
-#ifdef REG_LIST_SVE
-#define reg_list_sve() (true)
-#else
-#define reg_list_sve() (false)
-#endif
+static struct kvm_reg_list *reg_list;
+static __u64 *blessed_reg, blessed_n;
-#define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_COPROC_MASK)
+struct reg_sublist {
+ const char *name;
+ long capability;
+ int feature;
+ bool finalize;
+ __u64 *regs;
+ __u64 regs_n;
+ __u64 *rejects_set;
+ __u64 rejects_set_n;
+};
+
+struct vcpu_config {
+ char *name;
+ struct reg_sublist sublists[];
+};
+
+static struct vcpu_config *vcpu_configs[];
+static int vcpu_configs_n;
+
+#define for_each_sublist(c, s) \
+ for ((s) = &(c)->sublists[0]; (s)->regs; ++(s))
#define for_each_reg(i) \
for ((i) = 0; (i) < reg_list->n; ++(i))
@@ -54,12 +74,41 @@
for_each_reg_filtered(i) \
if (!find_reg(blessed_reg, blessed_n, reg_list->reg[i]))
+static const char *config_name(struct vcpu_config *c)
+{
+ struct reg_sublist *s;
+ int len = 0;
-static struct kvm_reg_list *reg_list;
+ if (c->name)
+ return c->name;
-static __u64 base_regs[], vregs[], sve_regs[], rejects_set[];
-static __u64 base_regs_n, vregs_n, sve_regs_n, rejects_set_n;
-static __u64 *blessed_reg, blessed_n;
+ for_each_sublist(c, s)
+ len += strlen(s->name) + 1;
+
+ c->name = malloc(len);
+
+ len = 0;
+ for_each_sublist(c, s) {
+ if (!strcmp(s->name, "base"))
+ continue;
+ strcat(c->name + len, s->name);
+ len += strlen(s->name) + 1;
+ c->name[len - 1] = '+';
+ }
+ c->name[len - 1] = '\0';
+
+ return c->name;
+}
+
+static bool has_cap(struct vcpu_config *c, long capability)
+{
+ struct reg_sublist *s;
+
+ for_each_sublist(c, s)
+ if (s->capability == capability)
+ return true;
+ return false;
+}
static bool filter_reg(__u64 reg)
{
@@ -96,11 +145,13 @@ static const char *str_with_index(const char *template, __u64 index)
return (const char *)str;
}
+#define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_COPROC_MASK)
+
#define CORE_REGS_XX_NR_WORDS 2
#define CORE_SPSR_XX_NR_WORDS 2
#define CORE_FPREGS_XX_NR_WORDS 4
-static const char *core_id_to_str(__u64 id)
+static const char *core_id_to_str(struct vcpu_config *c, __u64 id)
{
__u64 core_off = id & ~REG_MASK, idx;
@@ -111,7 +162,7 @@ static const char *core_id_to_str(__u64 id)
case KVM_REG_ARM_CORE_REG(regs.regs[0]) ...
KVM_REG_ARM_CORE_REG(regs.regs[30]):
idx = (core_off - KVM_REG_ARM_CORE_REG(regs.regs[0])) / CORE_REGS_XX_NR_WORDS;
- TEST_ASSERT(idx < 31, "Unexpected regs.regs index: %lld", idx);
+ TEST_ASSERT(idx < 31, "%s: Unexpected regs.regs index: %lld", config_name(c), idx);
return str_with_index("KVM_REG_ARM_CORE_REG(regs.regs[##])", idx);
case KVM_REG_ARM_CORE_REG(regs.sp):
return "KVM_REG_ARM_CORE_REG(regs.sp)";
@@ -126,12 +177,12 @@ static const char *core_id_to_str(__u64 id)
case KVM_REG_ARM_CORE_REG(spsr[0]) ...
KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]):
idx = (core_off - KVM_REG_ARM_CORE_REG(spsr[0])) / CORE_SPSR_XX_NR_WORDS;
- TEST_ASSERT(idx < KVM_NR_SPSR, "Unexpected spsr index: %lld", idx);
+ TEST_ASSERT(idx < KVM_NR_SPSR, "%s: Unexpected spsr index: %lld", config_name(c), idx);
return str_with_index("KVM_REG_ARM_CORE_REG(spsr[##])", idx);
case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ...
KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]):
idx = (core_off - KVM_REG_ARM_CORE_REG(fp_regs.vregs[0])) / CORE_FPREGS_XX_NR_WORDS;
- TEST_ASSERT(idx < 32, "Unexpected fp_regs.vregs index: %lld", idx);
+ TEST_ASSERT(idx < 32, "%s: Unexpected fp_regs.vregs index: %lld", config_name(c), idx);
return str_with_index("KVM_REG_ARM_CORE_REG(fp_regs.vregs[##])", idx);
case KVM_REG_ARM_CORE_REG(fp_regs.fpsr):
return "KVM_REG_ARM_CORE_REG(fp_regs.fpsr)";
@@ -139,11 +190,11 @@ static const char *core_id_to_str(__u64 id)
return "KVM_REG_ARM_CORE_REG(fp_regs.fpcr)";
}
- TEST_FAIL("Unknown core reg id: 0x%llx", id);
+ TEST_FAIL("%s: Unknown core reg id: 0x%llx", config_name(c), id);
return NULL;
}
-static const char *sve_id_to_str(__u64 id)
+static const char *sve_id_to_str(struct vcpu_config *c, __u64 id)
{
__u64 sve_off, n, i;
@@ -153,37 +204,37 @@ static const char *sve_id_to_str(__u64 id)
sve_off = id & ~(REG_MASK | ((1ULL << 5) - 1));
i = id & (KVM_ARM64_SVE_MAX_SLICES - 1);
- TEST_ASSERT(i == 0, "Currently we don't expect slice > 0, reg id 0x%llx", id);
+ TEST_ASSERT(i == 0, "%s: Currently we don't expect slice > 0, reg id 0x%llx", config_name(c), id);
switch (sve_off) {
case KVM_REG_ARM64_SVE_ZREG_BASE ...
KVM_REG_ARM64_SVE_ZREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_ZREGS - 1:
n = (id >> 5) & (KVM_ARM64_SVE_NUM_ZREGS - 1);
TEST_ASSERT(id == KVM_REG_ARM64_SVE_ZREG(n, 0),
- "Unexpected bits set in SVE ZREG id: 0x%llx", id);
+ "%s: Unexpected bits set in SVE ZREG id: 0x%llx", config_name(c), id);
return str_with_index("KVM_REG_ARM64_SVE_ZREG(##, 0)", n);
case KVM_REG_ARM64_SVE_PREG_BASE ...
KVM_REG_ARM64_SVE_PREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_PREGS - 1:
n = (id >> 5) & (KVM_ARM64_SVE_NUM_PREGS - 1);
TEST_ASSERT(id == KVM_REG_ARM64_SVE_PREG(n, 0),
- "Unexpected bits set in SVE PREG id: 0x%llx", id);
+ "%s: Unexpected bits set in SVE PREG id: 0x%llx", config_name(c), id);
return str_with_index("KVM_REG_ARM64_SVE_PREG(##, 0)", n);
case KVM_REG_ARM64_SVE_FFR_BASE:
TEST_ASSERT(id == KVM_REG_ARM64_SVE_FFR(0),
- "Unexpected bits set in SVE FFR id: 0x%llx", id);
+ "%s: Unexpected bits set in SVE FFR id: 0x%llx", config_name(c), id);
return "KVM_REG_ARM64_SVE_FFR(0)";
}
return NULL;
}
-static void print_reg(__u64 id)
+static void print_reg(struct vcpu_config *c, __u64 id)
{
unsigned op0, op1, crn, crm, op2;
const char *reg_size = NULL;
TEST_ASSERT((id & KVM_REG_ARCH_MASK) == KVM_REG_ARM64,
- "KVM_REG_ARM64 missing in reg id: 0x%llx", id);
+ "%s: KVM_REG_ARM64 missing in reg id: 0x%llx", config_name(c), id);
switch (id & KVM_REG_SIZE_MASK) {
case KVM_REG_SIZE_U8:
@@ -214,17 +265,17 @@ static void print_reg(__u64 id)
reg_size = "KVM_REG_SIZE_U2048";
break;
default:
- TEST_FAIL("Unexpected reg size: 0x%llx in reg id: 0x%llx",
- (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id);
+ TEST_FAIL("%s: Unexpected reg size: 0x%llx in reg id: 0x%llx",
+ config_name(c), (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id);
}
switch (id & KVM_REG_ARM_COPROC_MASK) {
case KVM_REG_ARM_CORE:
- printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_CORE | %s,\n", reg_size, core_id_to_str(id));
+ printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_CORE | %s,\n", reg_size, core_id_to_str(c, id));
break;
case KVM_REG_ARM_DEMUX:
TEST_ASSERT(!(id & ~(REG_MASK | KVM_REG_ARM_DEMUX_ID_MASK | KVM_REG_ARM_DEMUX_VAL_MASK)),
- "Unexpected bits set in DEMUX reg id: 0x%llx", id);
+ "%s: Unexpected bits set in DEMUX reg id: 0x%llx", config_name(c), id);
printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | %lld,\n",
reg_size, id & KVM_REG_ARM_DEMUX_VAL_MASK);
break;
@@ -235,23 +286,23 @@ static void print_reg(__u64 id)
crm = (id & KVM_REG_ARM64_SYSREG_CRM_MASK) >> KVM_REG_ARM64_SYSREG_CRM_SHIFT;
op2 = (id & KVM_REG_ARM64_SYSREG_OP2_MASK) >> KVM_REG_ARM64_SYSREG_OP2_SHIFT;
TEST_ASSERT(id == ARM64_SYS_REG(op0, op1, crn, crm, op2),
- "Unexpected bits set in SYSREG reg id: 0x%llx", id);
+ "%s: Unexpected bits set in SYSREG reg id: 0x%llx", config_name(c), id);
printf("\tARM64_SYS_REG(%d, %d, %d, %d, %d),\n", op0, op1, crn, crm, op2);
break;
case KVM_REG_ARM_FW:
TEST_ASSERT(id == KVM_REG_ARM_FW_REG(id & 0xffff),
- "Unexpected bits set in FW reg id: 0x%llx", id);
+ "%s: Unexpected bits set in FW reg id: 0x%llx", config_name(c), id);
printf("\tKVM_REG_ARM_FW_REG(%lld),\n", id & 0xffff);
break;
case KVM_REG_ARM64_SVE:
- if (reg_list_sve())
- printf("\t%s,\n", sve_id_to_str(id));
+ if (has_cap(c, KVM_CAP_ARM_SVE))
+ printf("\t%s,\n", sve_id_to_str(c, id));
else
- TEST_FAIL("KVM_REG_ARM64_SVE is an unexpected coproc type in reg id: 0x%llx", id);
+ TEST_FAIL("%s: KVM_REG_ARM64_SVE is an unexpected coproc type in reg id: 0x%llx", config_name(c), id);
break;
default:
- TEST_FAIL("Unexpected coproc type: 0x%llx in reg id: 0x%llx",
- (id & KVM_REG_ARM_COPROC_MASK) >> KVM_REG_ARM_COPROC_SHIFT, id);
+ TEST_FAIL("%s: Unexpected coproc type: 0x%llx in reg id: 0x%llx",
+ config_name(c), (id & KVM_REG_ARM_COPROC_MASK) >> KVM_REG_ARM_COPROC_SHIFT, id);
}
}
@@ -312,56 +363,58 @@ static void core_reg_fixup(void)
reg_list = tmp;
}
-static void prepare_vcpu_init(struct kvm_vcpu_init *init)
+static void prepare_vcpu_init(struct vcpu_config *c, struct kvm_vcpu_init *init)
{
- if (reg_list_sve())
- init->features[0] |= 1 << KVM_ARM_VCPU_SVE;
+ struct reg_sublist *s;
+
+ for_each_sublist(c, s)
+ if (s->capability)
+ init->features[s->feature / 32] |= 1 << (s->feature % 32);
}
-static void finalize_vcpu(struct kvm_vm *vm, uint32_t vcpuid)
+static void finalize_vcpu(struct kvm_vm *vm, uint32_t vcpuid, struct vcpu_config *c)
{
+ struct reg_sublist *s;
int feature;
- if (reg_list_sve()) {
- feature = KVM_ARM_VCPU_SVE;
- vcpu_ioctl(vm, vcpuid, KVM_ARM_VCPU_FINALIZE, &feature);
+ for_each_sublist(c, s) {
+ if (s->finalize) {
+ feature = s->feature;
+ vcpu_ioctl(vm, vcpuid, KVM_ARM_VCPU_FINALIZE, &feature);
+ }
}
}
-static void check_supported(void)
+static void check_supported(struct vcpu_config *c)
{
- if (reg_list_sve() && !kvm_check_cap(KVM_CAP_ARM_SVE)) {
- fprintf(stderr, "SVE not available, skipping tests\n");
- exit(KSFT_SKIP);
+ struct reg_sublist *s;
+
+ for_each_sublist(c, s) {
+ if (s->capability && !kvm_check_cap(s->capability)) {
+ fprintf(stderr, "%s: %s not available, skipping tests\n", config_name(c), s->name);
+ exit(KSFT_SKIP);
+ }
}
}
-int main(int ac, char **av)
+static bool print_list;
+static bool print_filtered;
+static bool fixup_core_regs;
+
+static void run_test(struct vcpu_config *c)
{
struct kvm_vcpu_init init = { .target = -1, };
- int new_regs = 0, missing_regs = 0, i;
+ int new_regs = 0, missing_regs = 0, i, n;
int failed_get = 0, failed_set = 0, failed_reject = 0;
- bool print_list = false, print_filtered = false, fixup_core_regs = false;
struct kvm_vm *vm;
- __u64 *vec_regs;
+ struct reg_sublist *s;
- check_supported();
-
- for (i = 1; i < ac; ++i) {
- if (strcmp(av[i], "--core-reg-fixup") == 0)
- fixup_core_regs = true;
- else if (strcmp(av[i], "--list") == 0)
- print_list = true;
- else if (strcmp(av[i], "--list-filtered") == 0)
- print_filtered = true;
- else
- TEST_FAIL("Unknown option: %s\n", av[i]);
- }
+ check_supported(c);
vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
- prepare_vcpu_init(&init);
+ prepare_vcpu_init(c, &init);
aarch64_vcpu_add_default(vm, 0, &init, NULL);
- finalize_vcpu(vm, 0);
+ finalize_vcpu(vm, 0, c);
reg_list = vcpu_get_reg_list(vm, 0);
@@ -374,10 +427,10 @@ int main(int ac, char **av)
__u64 id = reg_list->reg[i];
if ((print_list && !filter_reg(id)) ||
(print_filtered && filter_reg(id)))
- print_reg(id);
+ print_reg(c, id);
}
putchar('\n');
- return 0;
+ return;
}
/*
@@ -396,50 +449,52 @@ int main(int ac, char **av)
.id = reg_list->reg[i],
.addr = (__u64)&addr,
};
+ bool reject_reg = false;
int ret;
ret = _vcpu_ioctl(vm, 0, KVM_GET_ONE_REG, &reg);
if (ret) {
- puts("Failed to get ");
- print_reg(reg.id);
+ printf("%s: Failed to get ", config_name(c));
+ print_reg(c, reg.id);
putchar('\n');
++failed_get;
}
/* rejects_set registers are rejected after KVM_ARM_VCPU_FINALIZE */
- if (find_reg(rejects_set, rejects_set_n, reg.id)) {
- ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, &reg);
- if (ret != -1 || errno != EPERM) {
- printf("Failed to reject (ret=%d, errno=%d) ", ret, errno);
- print_reg(reg.id);
- putchar('\n');
- ++failed_reject;
+ for_each_sublist(c, s) {
+ if (s->rejects_set && find_reg(s->rejects_set, s->rejects_set_n, reg.id)) {
+ reject_reg = true;
+ ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, &reg);
+ if (ret != -1 || errno != EPERM) {
+ printf("%s: Failed to reject (ret=%d, errno=%d) ", config_name(c), ret, errno);
+ print_reg(c, reg.id);
+ putchar('\n');
+ ++failed_reject;
+ }
+ break;
}
- continue;
}
- ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, &reg);
- if (ret) {
- puts("Failed to set ");
- print_reg(reg.id);
- putchar('\n');
- ++failed_set;
+ if (!reject_reg) {
+ ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, &reg);
+ if (ret) {
+ printf("%s: Failed to set ", config_name(c));
+ print_reg(c, reg.id);
+ putchar('\n');
+ ++failed_set;
+ }
}
}
- if (reg_list_sve()) {
- blessed_n = base_regs_n + sve_regs_n;
- vec_regs = sve_regs;
- } else {
- blessed_n = base_regs_n + vregs_n;
- vec_regs = vregs;
- }
-
+ for_each_sublist(c, s)
+ blessed_n += s->regs_n;
blessed_reg = calloc(blessed_n, sizeof(__u64));
- for (i = 0; i < base_regs_n; ++i)
- blessed_reg[i] = base_regs[i];
- for (i = 0; i < blessed_n - base_regs_n; ++i)
- blessed_reg[base_regs_n + i] = vec_regs[i];
+
+ n = 0;
+ for_each_sublist(c, s) {
+ for (i = 0; i < s->regs_n; ++i)
+ blessed_reg[n++] = s->regs[i];
+ }
for_each_new_reg(i)
++new_regs;
@@ -448,40 +503,141 @@ int main(int ac, char **av)
++missing_regs;
if (new_regs || missing_regs) {
- printf("Number blessed registers: %5lld\n", blessed_n);
- printf("Number registers: %5lld\n", reg_list->n);
+ printf("%s: Number blessed registers: %5lld\n", config_name(c), blessed_n);
+ printf("%s: Number registers: %5lld\n", config_name(c), reg_list->n);
}
if (new_regs) {
- printf("\nThere are %d new registers.\n"
+ printf("\n%s: There are %d new registers.\n"
"Consider adding them to the blessed reg "
- "list with the following lines:\n\n", new_regs);
+ "list with the following lines:\n\n", config_name(c), new_regs);
for_each_new_reg(i)
- print_reg(reg_list->reg[i]);
+ print_reg(c, reg_list->reg[i]);
putchar('\n');
}
if (missing_regs) {
- printf("\nThere are %d missing registers.\n"
- "The following lines are missing registers:\n\n", missing_regs);
+ printf("\n%s: There are %d missing registers.\n"
+ "The following lines are missing registers:\n\n", config_name(c), missing_regs);
for_each_missing_reg(i)
- print_reg(blessed_reg[i]);
+ print_reg(c, blessed_reg[i]);
putchar('\n');
}
TEST_ASSERT(!missing_regs && !failed_get && !failed_set && !failed_reject,
- "There are %d missing registers; "
+ "%s: There are %d missing registers; "
"%d registers failed get; %d registers failed set; %d registers failed reject",
- missing_regs, failed_get, failed_set, failed_reject);
+ config_name(c), missing_regs, failed_get, failed_set, failed_reject);
- return 0;
+ pr_info("%s: PASS\n", config_name(c));
+ blessed_n = 0;
+ free(blessed_reg);
+ free(reg_list);
+ kvm_vm_free(vm);
+}
+
+static void help(void)
+{
+ struct vcpu_config *c;
+ int i;
+
+ printf(
+ "\n"
+ "usage: get-reg-list [--config=<selection>] [--list] [--list-filtered] [--core-reg-fixup]\n\n"
+ " --config=<selection> Used to select a specific vcpu configuration for the test/listing\n"
+ " '<selection>' may be\n");
+
+ for (i = 0; i < vcpu_configs_n; ++i) {
+ c = vcpu_configs[i];
+ printf(
+ " '%s'\n", config_name(c));
+ }
+
+ printf(
+ "\n"
+ " --list Print the register list rather than test it (requires --config)\n"
+ " --list-filtered Print registers that would normally be filtered out (requires --config)\n"
+ " --core-reg-fixup Needed when running on old kernels with broken core reg listings\n"
+ "\n"
+ );
+}
+
+static struct vcpu_config *parse_config(const char *config)
+{
+ struct vcpu_config *c;
+ int i;
+
+ if (config[8] != '=')
+ help(), exit(1);
+
+ for (i = 0; i < vcpu_configs_n; ++i) {
+ c = vcpu_configs[i];
+ if (strcmp(config_name(c), &config[9]) == 0)
+ break;
+ }
+
+ if (i == vcpu_configs_n)
+ help(), exit(1);
+
+ return c;
+}
+
+int main(int ac, char **av)
+{
+ struct vcpu_config *c, *sel = NULL;
+ int i, ret = 0;
+ pid_t pid;
+
+ for (i = 1; i < ac; ++i) {
+ if (strcmp(av[i], "--core-reg-fixup") == 0)
+ fixup_core_regs = true;
+ else if (strncmp(av[i], "--config", 8) == 0)
+ sel = parse_config(av[i]);
+ else if (strcmp(av[i], "--list") == 0)
+ print_list = true;
+ else if (strcmp(av[i], "--list-filtered") == 0)
+ print_filtered = true;
+ else if (strcmp(av[i], "--help") == 0 || strcmp(av[1], "-h") == 0)
+ help(), exit(0);
+ else
+ help(), exit(1);
+ }
+
+ if (print_list || print_filtered) {
+ /*
+ * We only want to print the register list of a single config.
+ */
+ if (!sel)
+ help(), exit(1);
+ }
+
+ for (i = 0; i < vcpu_configs_n; ++i) {
+ c = vcpu_configs[i];
+ if (sel && c != sel)
+ continue;
+
+ pid = fork();
+
+ if (!pid) {
+ run_test(c);
+ exit(0);
+ } else {
+ int wstatus;
+ pid_t wpid = wait(&wstatus);
+ TEST_ASSERT(wpid == pid && WIFEXITED(wstatus), "wait: Unexpected return");
+ if (WEXITSTATUS(wstatus) && WEXITSTATUS(wstatus) != KSFT_SKIP)
+ ret = KSFT_FAIL;
+ }
+ }
+
+ return ret;
}
/*
* The current blessed list was primed with the output of kernel version
* v4.15 with --core-reg-fixup and then later updated with new registers.
*
- * The blessed list is up to date with kernel version v5.10-rc5
+ * The blessed list is up to date with kernel version v5.13-rc3
*/
static __u64 base_regs[] = {
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[0]),
@@ -673,8 +829,6 @@ static __u64 base_regs[] = {
ARM64_SYS_REG(3, 0, 5, 2, 0), /* ESR_EL1 */
ARM64_SYS_REG(3, 0, 6, 0, 0), /* FAR_EL1 */
ARM64_SYS_REG(3, 0, 7, 4, 0), /* PAR_EL1 */
- ARM64_SYS_REG(3, 0, 9, 14, 1), /* PMINTENSET_EL1 */
- ARM64_SYS_REG(3, 0, 9, 14, 2), /* PMINTENCLR_EL1 */
ARM64_SYS_REG(3, 0, 10, 2, 0), /* MAIR_EL1 */
ARM64_SYS_REG(3, 0, 10, 3, 0), /* AMAIR_EL1 */
ARM64_SYS_REG(3, 0, 12, 0, 0), /* VBAR_EL1 */
@@ -683,6 +837,16 @@ static __u64 base_regs[] = {
ARM64_SYS_REG(3, 0, 13, 0, 4), /* TPIDR_EL1 */
ARM64_SYS_REG(3, 0, 14, 1, 0), /* CNTKCTL_EL1 */
ARM64_SYS_REG(3, 2, 0, 0, 0), /* CSSELR_EL1 */
+ ARM64_SYS_REG(3, 3, 13, 0, 2), /* TPIDR_EL0 */
+ ARM64_SYS_REG(3, 3, 13, 0, 3), /* TPIDRRO_EL0 */
+ ARM64_SYS_REG(3, 4, 3, 0, 0), /* DACR32_EL2 */
+ ARM64_SYS_REG(3, 4, 5, 0, 1), /* IFSR32_EL2 */
+ ARM64_SYS_REG(3, 4, 5, 3, 0), /* FPEXC32_EL2 */
+};
+
+static __u64 pmu_regs[] = {
+ ARM64_SYS_REG(3, 0, 9, 14, 1), /* PMINTENSET_EL1 */
+ ARM64_SYS_REG(3, 0, 9, 14, 2), /* PMINTENCLR_EL1 */
ARM64_SYS_REG(3, 3, 9, 12, 0), /* PMCR_EL0 */
ARM64_SYS_REG(3, 3, 9, 12, 1), /* PMCNTENSET_EL0 */
ARM64_SYS_REG(3, 3, 9, 12, 2), /* PMCNTENCLR_EL0 */
@@ -692,8 +856,6 @@ static __u64 base_regs[] = {
ARM64_SYS_REG(3, 3, 9, 13, 0), /* PMCCNTR_EL0 */
ARM64_SYS_REG(3, 3, 9, 14, 0), /* PMUSERENR_EL0 */
ARM64_SYS_REG(3, 3, 9, 14, 3), /* PMOVSSET_EL0 */
- ARM64_SYS_REG(3, 3, 13, 0, 2), /* TPIDR_EL0 */
- ARM64_SYS_REG(3, 3, 13, 0, 3), /* TPIDRRO_EL0 */
ARM64_SYS_REG(3, 3, 14, 8, 0),
ARM64_SYS_REG(3, 3, 14, 8, 1),
ARM64_SYS_REG(3, 3, 14, 8, 2),
@@ -757,11 +919,7 @@ static __u64 base_regs[] = {
ARM64_SYS_REG(3, 3, 14, 15, 5),
ARM64_SYS_REG(3, 3, 14, 15, 6),
ARM64_SYS_REG(3, 3, 14, 15, 7), /* PMCCFILTR_EL0 */
- ARM64_SYS_REG(3, 4, 3, 0, 0), /* DACR32_EL2 */
- ARM64_SYS_REG(3, 4, 5, 0, 1), /* IFSR32_EL2 */
- ARM64_SYS_REG(3, 4, 5, 3, 0), /* FPEXC32_EL2 */
};
-static __u64 base_regs_n = ARRAY_SIZE(base_regs);
static __u64 vregs[] = {
KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]),
@@ -797,7 +955,6 @@ static __u64 vregs[] = {
KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[30]),
KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]),
};
-static __u64 vregs_n = ARRAY_SIZE(vregs);
static __u64 sve_regs[] = {
KVM_REG_ARM64_SVE_VLS,
@@ -852,11 +1009,57 @@ static __u64 sve_regs[] = {
KVM_REG_ARM64_SVE_FFR(0),
ARM64_SYS_REG(3, 0, 1, 2, 0), /* ZCR_EL1 */
};
-static __u64 sve_regs_n = ARRAY_SIZE(sve_regs);
-static __u64 rejects_set[] = {
-#ifdef REG_LIST_SVE
+static __u64 sve_rejects_set[] = {
KVM_REG_ARM64_SVE_VLS,
-#endif
};
-static __u64 rejects_set_n = ARRAY_SIZE(rejects_set);
+
+#define BASE_SUBLIST \
+ { "base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), }
+#define VREGS_SUBLIST \
+ { "vregs", .regs = vregs, .regs_n = ARRAY_SIZE(vregs), }
+#define PMU_SUBLIST \
+ { "pmu", .regs = pmu_regs, .regs_n = ARRAY_SIZE(pmu_regs), }
+#define SVE_SUBLIST \
+ { "sve", .capability = KVM_CAP_ARM_SVE, .feature = KVM_ARM_VCPU_SVE, .finalize = true, \
+ .regs = sve_regs, .regs_n = ARRAY_SIZE(sve_regs), \
+ .rejects_set = sve_rejects_set, .rejects_set_n = ARRAY_SIZE(sve_rejects_set), }
+
+static struct vcpu_config vregs_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ VREGS_SUBLIST,
+ {0},
+ },
+};
+static struct vcpu_config vregs_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ VREGS_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
+static struct vcpu_config sve_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ SVE_SUBLIST,
+ {0},
+ },
+};
+static struct vcpu_config sve_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ SVE_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_config *vcpu_configs[] = {
+ &vregs_config,
+ &vregs_pmu_config,
+ &sve_config,
+ &sve_pmu_config,
+};
+static int vcpu_configs_n = ARRAY_SIZE(vcpu_configs);
diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c
index 5f7a229c3af1..b74704305835 100644
--- a/tools/testing/selftests/kvm/demand_paging_test.c
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -9,6 +9,7 @@
#define _GNU_SOURCE /* for pipe2 */
+#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
@@ -38,6 +39,7 @@
static int nr_vcpus = 1;
static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
+static size_t demand_paging_size;
static char *guest_data_prototype;
static void *vcpu_worker(void *data)
@@ -71,36 +73,51 @@ static void *vcpu_worker(void *data)
return NULL;
}
-static int handle_uffd_page_request(int uffd, uint64_t addr)
+static int handle_uffd_page_request(int uffd_mode, int uffd, uint64_t addr)
{
- pid_t tid;
+ pid_t tid = syscall(__NR_gettid);
struct timespec start;
struct timespec ts_diff;
- struct uffdio_copy copy;
int r;
- tid = syscall(__NR_gettid);
+ clock_gettime(CLOCK_MONOTONIC, &start);
- copy.src = (uint64_t)guest_data_prototype;
- copy.dst = addr;
- copy.len = perf_test_args.host_page_size;
- copy.mode = 0;
+ if (uffd_mode == UFFDIO_REGISTER_MODE_MISSING) {
+ struct uffdio_copy copy;
- clock_gettime(CLOCK_MONOTONIC, &start);
+ copy.src = (uint64_t)guest_data_prototype;
+ copy.dst = addr;
+ copy.len = demand_paging_size;
+ copy.mode = 0;
- r = ioctl(uffd, UFFDIO_COPY, &copy);
- if (r == -1) {
- pr_info("Failed Paged in 0x%lx from thread %d with errno: %d\n",
- addr, tid, errno);
- return r;
+ r = ioctl(uffd, UFFDIO_COPY, &copy);
+ if (r == -1) {
+ pr_info("Failed UFFDIO_COPY in 0x%lx from thread %d with errno: %d\n",
+ addr, tid, errno);
+ return r;
+ }
+ } else if (uffd_mode == UFFDIO_REGISTER_MODE_MINOR) {
+ struct uffdio_continue cont = {0};
+
+ cont.range.start = addr;
+ cont.range.len = demand_paging_size;
+
+ r = ioctl(uffd, UFFDIO_CONTINUE, &cont);
+ if (r == -1) {
+ pr_info("Failed UFFDIO_CONTINUE in 0x%lx from thread %d with errno: %d\n",
+ addr, tid, errno);
+ return r;
+ }
+ } else {
+ TEST_FAIL("Invalid uffd mode %d", uffd_mode);
}
ts_diff = timespec_elapsed(start);
- PER_PAGE_DEBUG("UFFDIO_COPY %d \t%ld ns\n", tid,
+ PER_PAGE_DEBUG("UFFD page-in %d \t%ld ns\n", tid,
timespec_to_ns(ts_diff));
PER_PAGE_DEBUG("Paged in %ld bytes at 0x%lx from thread %d\n",
- perf_test_args.host_page_size, addr, tid);
+ demand_paging_size, addr, tid);
return 0;
}
@@ -108,6 +125,7 @@ static int handle_uffd_page_request(int uffd, uint64_t addr)
bool quit_uffd_thread;
struct uffd_handler_args {
+ int uffd_mode;
int uffd;
int pipefd;
useconds_t delay;
@@ -169,7 +187,7 @@ static void *uffd_handler_thread_fn(void *arg)
if (r == -1) {
if (errno == EAGAIN)
continue;
- pr_info("Read of uffd gor errno %d", errno);
+ pr_info("Read of uffd got errno %d\n", errno);
return NULL;
}
@@ -184,7 +202,7 @@ static void *uffd_handler_thread_fn(void *arg)
if (delay)
usleep(delay);
addr = msg.arg.pagefault.address;
- r = handle_uffd_page_request(uffd, addr);
+ r = handle_uffd_page_request(uffd_args->uffd_mode, uffd, addr);
if (r < 0)
return NULL;
pages++;
@@ -198,43 +216,53 @@ static void *uffd_handler_thread_fn(void *arg)
return NULL;
}
-static int setup_demand_paging(struct kvm_vm *vm,
- pthread_t *uffd_handler_thread, int pipefd,
- useconds_t uffd_delay,
- struct uffd_handler_args *uffd_args,
- void *hva, uint64_t len)
+static void setup_demand_paging(struct kvm_vm *vm,
+ pthread_t *uffd_handler_thread, int pipefd,
+ int uffd_mode, useconds_t uffd_delay,
+ struct uffd_handler_args *uffd_args,
+ void *hva, void *alias, uint64_t len)
{
+ bool is_minor = (uffd_mode == UFFDIO_REGISTER_MODE_MINOR);
int uffd;
struct uffdio_api uffdio_api;
struct uffdio_register uffdio_register;
+ uint64_t expected_ioctls = ((uint64_t) 1) << _UFFDIO_COPY;
- uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
- if (uffd == -1) {
- pr_info("uffd creation failed\n");
- return -1;
+ PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n",
+ is_minor ? "MINOR" : "MISSING",
+ is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY");
+
+ /* In order to get minor faults, prefault via the alias. */
+ if (is_minor) {
+ size_t p;
+
+ expected_ioctls = ((uint64_t) 1) << _UFFDIO_CONTINUE;
+
+ TEST_ASSERT(alias != NULL, "Alias required for minor faults");
+ for (p = 0; p < (len / demand_paging_size); ++p) {
+ memcpy(alias + (p * demand_paging_size),
+ guest_data_prototype, demand_paging_size);
+ }
}
+ uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
+ TEST_ASSERT(uffd >= 0, "uffd creation failed, errno: %d", errno);
+
uffdio_api.api = UFFD_API;
uffdio_api.features = 0;
- if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1) {
- pr_info("ioctl uffdio_api failed\n");
- return -1;
- }
+ TEST_ASSERT(ioctl(uffd, UFFDIO_API, &uffdio_api) != -1,
+ "ioctl UFFDIO_API failed: %" PRIu64,
+ (uint64_t)uffdio_api.api);
uffdio_register.range.start = (uint64_t)hva;
uffdio_register.range.len = len;
- uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
- if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1) {
- pr_info("ioctl uffdio_register failed\n");
- return -1;
- }
-
- if ((uffdio_register.ioctls & UFFD_API_RANGE_IOCTLS) !=
- UFFD_API_RANGE_IOCTLS) {
- pr_info("unexpected userfaultfd ioctl set\n");
- return -1;
- }
+ uffdio_register.mode = uffd_mode;
+ TEST_ASSERT(ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) != -1,
+ "ioctl UFFDIO_REGISTER failed");
+ TEST_ASSERT((uffdio_register.ioctls & expected_ioctls) ==
+ expected_ioctls, "missing userfaultfd ioctls");
+ uffd_args->uffd_mode = uffd_mode;
uffd_args->uffd = uffd;
uffd_args->pipefd = pipefd;
uffd_args->delay = uffd_delay;
@@ -243,13 +271,12 @@ static int setup_demand_paging(struct kvm_vm *vm,
PER_VCPU_DEBUG("Created uffd thread for HVA range [%p, %p)\n",
hva, hva + len);
-
- return 0;
}
struct test_params {
- bool use_uffd;
+ int uffd_mode;
useconds_t uffd_delay;
+ enum vm_mem_backing_src_type src_type;
bool partition_vcpu_memory_access;
};
@@ -267,14 +294,16 @@ static void run_test(enum vm_guest_mode mode, void *arg)
int r;
vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size,
- VM_MEM_SRC_ANONYMOUS);
+ p->src_type);
perf_test_args.wr_fract = 1;
- guest_data_prototype = malloc(perf_test_args.host_page_size);
+ demand_paging_size = get_backing_src_pagesz(p->src_type);
+
+ guest_data_prototype = malloc(demand_paging_size);
TEST_ASSERT(guest_data_prototype,
"Failed to allocate buffer for guest data pattern");
- memset(guest_data_prototype, 0xAB, perf_test_args.host_page_size);
+ memset(guest_data_prototype, 0xAB, demand_paging_size);
vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads));
TEST_ASSERT(vcpu_threads, "Memory allocation failed");
@@ -282,7 +311,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size,
p->partition_vcpu_memory_access);
- if (p->use_uffd) {
+ if (p->uffd_mode) {
uffd_handler_threads =
malloc(nr_vcpus * sizeof(*uffd_handler_threads));
TEST_ASSERT(uffd_handler_threads, "Memory allocation failed");
@@ -296,6 +325,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
vm_paddr_t vcpu_gpa;
void *vcpu_hva;
+ void *vcpu_alias;
uint64_t vcpu_mem_size;
@@ -310,8 +340,9 @@ static void run_test(enum vm_guest_mode mode, void *arg)
PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n",
vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_mem_size);
- /* Cache the HVA pointer of the region */
+ /* Cache the host addresses of the region */
vcpu_hva = addr_gpa2hva(vm, vcpu_gpa);
+ vcpu_alias = addr_gpa2alias(vm, vcpu_gpa);
/*
* Set up user fault fd to handle demand paging
@@ -321,13 +352,11 @@ static void run_test(enum vm_guest_mode mode, void *arg)
O_CLOEXEC | O_NONBLOCK);
TEST_ASSERT(!r, "Failed to set up pipefd");
- r = setup_demand_paging(vm,
- &uffd_handler_threads[vcpu_id],
- pipefds[vcpu_id * 2],
- p->uffd_delay, &uffd_args[vcpu_id],
- vcpu_hva, vcpu_mem_size);
- if (r < 0)
- exit(-r);
+ setup_demand_paging(vm, &uffd_handler_threads[vcpu_id],
+ pipefds[vcpu_id * 2], p->uffd_mode,
+ p->uffd_delay, &uffd_args[vcpu_id],
+ vcpu_hva, vcpu_alias,
+ vcpu_mem_size);
}
}
@@ -355,7 +384,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
pr_info("All vCPU threads joined\n");
- if (p->use_uffd) {
+ if (p->uffd_mode) {
char c;
/* Tell the user fault fd handler threads to quit */
@@ -377,7 +406,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
free(guest_data_prototype);
free(vcpu_threads);
- if (p->use_uffd) {
+ if (p->uffd_mode) {
free(uffd_handler_threads);
free(uffd_args);
free(pipefds);
@@ -387,17 +416,19 @@ static void run_test(enum vm_guest_mode mode, void *arg)
static void help(char *name)
{
puts("");
- printf("usage: %s [-h] [-m mode] [-u] [-d uffd_delay_usec]\n"
- " [-b memory] [-v vcpus] [-o]\n", name);
+ printf("usage: %s [-h] [-m vm_mode] [-u uffd_mode] [-d uffd_delay_usec]\n"
+ " [-b memory] [-t type] [-v vcpus] [-o]\n", name);
guest_modes_help();
- printf(" -u: use User Fault FD to handle vCPU page\n"
- " faults.\n");
+ printf(" -u: use userfaultfd to handle vCPU page faults. Mode is a\n"
+ " UFFD registration mode: 'MISSING' or 'MINOR'.\n");
printf(" -d: add a delay in usec to the User Fault\n"
" FD handler to simulate demand paging\n"
" overheads. Ignored without -u.\n");
printf(" -b: specify the size of the memory region which should be\n"
" demand paged by each vCPU. e.g. 10M or 3G.\n"
" Default: 1G\n");
+ printf(" -t: The type of backing memory to use. Default: anonymous\n");
+ backing_src_help();
printf(" -v: specify the number of vCPUs to run.\n");
printf(" -o: Overlap guest memory accesses instead of partitioning\n"
" them into a separate region of memory for each vCPU.\n");
@@ -409,19 +440,24 @@ int main(int argc, char *argv[])
{
int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
struct test_params p = {
+ .src_type = VM_MEM_SRC_ANONYMOUS,
.partition_vcpu_memory_access = true,
};
int opt;
guest_modes_append_default();
- while ((opt = getopt(argc, argv, "hm:ud:b:v:o")) != -1) {
+ while ((opt = getopt(argc, argv, "hm:u:d:b:t:v:o")) != -1) {
switch (opt) {
case 'm':
guest_modes_cmdline(optarg);
break;
case 'u':
- p.use_uffd = true;
+ if (!strcmp("MISSING", optarg))
+ p.uffd_mode = UFFDIO_REGISTER_MODE_MISSING;
+ else if (!strcmp("MINOR", optarg))
+ p.uffd_mode = UFFDIO_REGISTER_MODE_MINOR;
+ TEST_ASSERT(p.uffd_mode, "UFFD mode must be 'MISSING' or 'MINOR'.");
break;
case 'd':
p.uffd_delay = strtoul(optarg, NULL, 0);
@@ -430,6 +466,9 @@ int main(int argc, char *argv[])
case 'b':
guest_percpu_mem_size = parse_size(optarg);
break;
+ case 't':
+ p.src_type = parse_backing_src_type(optarg);
+ break;
case 'v':
nr_vcpus = atoi(optarg);
TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus,
@@ -445,6 +484,11 @@ int main(int argc, char *argv[])
}
}
+ if (p.uffd_mode == UFFDIO_REGISTER_MODE_MINOR &&
+ !backing_src_is_shared(p.src_type)) {
+ TEST_FAIL("userfaultfd MINOR mode requires shared memory; pick a different -t");
+ }
+
for_each_guest_mode(run_test, &p);
return 0;
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index 81edbd23d371..5fe0140e407e 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -16,7 +16,6 @@
#include <errno.h>
#include <linux/bitmap.h>
#include <linux/bitops.h>
-#include <asm/barrier.h>
#include <linux/atomic.h>
#include "kvm_util.h"
@@ -681,7 +680,7 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid,
pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
vm = vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
- kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+ kvm_vm_elf_load(vm, program_invocation_name);
#ifdef __x86_64__
vm_create_irqchip(vm);
#endif
@@ -761,7 +760,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
KVM_MEM_LOG_DIRTY_PAGES);
/* Do mapping for the dirty track memory slot */
- virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0);
+ virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages);
/* Cache the HVA pointer of the region */
host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem);
diff --git a/tools/testing/selftests/kvm/hardware_disable_test.c b/tools/testing/selftests/kvm/hardware_disable_test.c
index 5aadf84c91c0..b21c69a56daa 100644
--- a/tools/testing/selftests/kvm/hardware_disable_test.c
+++ b/tools/testing/selftests/kvm/hardware_disable_test.c
@@ -105,7 +105,7 @@ static void run_test(uint32_t run)
CPU_SET(i, &cpu_set);
vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
- kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+ kvm_vm_elf_load(vm, program_invocation_name);
vm_create_irqchip(vm);
pr_debug("%s: [%d] start vcpus\n", __func__, run);
@@ -132,6 +132,36 @@ static void run_test(uint32_t run)
TEST_ASSERT(false, "%s: [%d] child escaped the ninja\n", __func__, run);
}
+void wait_for_child_setup(pid_t pid)
+{
+ /*
+ * Wait for the child to post to the semaphore, but wake up periodically
+ * to check if the child exited prematurely.
+ */
+ for (;;) {
+ const struct timespec wait_period = { .tv_sec = 1 };
+ int status;
+
+ if (!sem_timedwait(sem, &wait_period))
+ return;
+
+ /* Child is still running, keep waiting. */
+ if (pid != waitpid(pid, &status, WNOHANG))
+ continue;
+
+ /*
+ * Child is no longer running, which is not expected.
+ *
+ * If it exited with a non-zero status, we explicitly forward
+ * the child's status in case it exited with KSFT_SKIP.
+ */
+ if (WIFEXITED(status))
+ exit(WEXITSTATUS(status));
+ else
+ TEST_ASSERT(false, "Child exited unexpectedly");
+ }
+}
+
int main(int argc, char **argv)
{
uint32_t i;
@@ -148,7 +178,7 @@ int main(int argc, char **argv)
run_test(i); /* This function always exits */
pr_debug("%s: [%d] waiting semaphore\n", __func__, i);
- sem_wait(sem);
+ wait_for_child_setup(pid);
r = (rand() % DELAY_US_MAX) + 1;
pr_debug("%s: [%d] waiting %dus\n", __func__, i, r);
usleep(r);
diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h
index b7fa0c8551db..27dc5c2e56b9 100644
--- a/tools/testing/selftests/kvm/include/aarch64/processor.h
+++ b/tools/testing/selftests/kvm/include/aarch64/processor.h
@@ -8,16 +8,20 @@
#define SELFTEST_KVM_PROCESSOR_H
#include "kvm_util.h"
+#include <linux/stringify.h>
#define ARM64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
-#define CPACR_EL1 3, 0, 1, 0, 2
-#define TCR_EL1 3, 0, 2, 0, 2
-#define MAIR_EL1 3, 0, 10, 2, 0
-#define TTBR0_EL1 3, 0, 2, 0, 0
-#define SCTLR_EL1 3, 0, 1, 0, 0
+#define CPACR_EL1 3, 0, 1, 0, 2
+#define TCR_EL1 3, 0, 2, 0, 2
+#define MAIR_EL1 3, 0, 10, 2, 0
+#define TTBR0_EL1 3, 0, 2, 0, 0
+#define SCTLR_EL1 3, 0, 1, 0, 0
+#define VBAR_EL1 3, 0, 12, 0, 0
+
+#define ID_AA64DFR0_EL1 3, 0, 0, 5, 0
/*
* Default MAIR
@@ -56,4 +60,73 @@ void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *ini
void aarch64_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_vcpu_init *init, void *guest_code);
+struct ex_regs {
+ u64 regs[31];
+ u64 sp;
+ u64 pc;
+ u64 pstate;
+};
+
+#define VECTOR_NUM 16
+
+enum {
+ VECTOR_SYNC_CURRENT_SP0,
+ VECTOR_IRQ_CURRENT_SP0,
+ VECTOR_FIQ_CURRENT_SP0,
+ VECTOR_ERROR_CURRENT_SP0,
+
+ VECTOR_SYNC_CURRENT,
+ VECTOR_IRQ_CURRENT,
+ VECTOR_FIQ_CURRENT,
+ VECTOR_ERROR_CURRENT,
+
+ VECTOR_SYNC_LOWER_64,
+ VECTOR_IRQ_LOWER_64,
+ VECTOR_FIQ_LOWER_64,
+ VECTOR_ERROR_LOWER_64,
+
+ VECTOR_SYNC_LOWER_32,
+ VECTOR_IRQ_LOWER_32,
+ VECTOR_FIQ_LOWER_32,
+ VECTOR_ERROR_LOWER_32,
+};
+
+#define VECTOR_IS_SYNC(v) ((v) == VECTOR_SYNC_CURRENT_SP0 || \
+ (v) == VECTOR_SYNC_CURRENT || \
+ (v) == VECTOR_SYNC_LOWER_64 || \
+ (v) == VECTOR_SYNC_LOWER_32)
+
+#define ESR_EC_NUM 64
+#define ESR_EC_SHIFT 26
+#define ESR_EC_MASK (ESR_EC_NUM - 1)
+
+#define ESR_EC_SVC64 0x15
+#define ESR_EC_HW_BP_CURRENT 0x31
+#define ESR_EC_SSTEP_CURRENT 0x33
+#define ESR_EC_WP_CURRENT 0x35
+#define ESR_EC_BRK_INS 0x3c
+
+void vm_init_descriptor_tables(struct kvm_vm *vm);
+void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid);
+
+typedef void(*handler_fn)(struct ex_regs *);
+void vm_install_exception_handler(struct kvm_vm *vm,
+ int vector, handler_fn handler);
+void vm_install_sync_handler(struct kvm_vm *vm,
+ int vector, int ec, handler_fn handler);
+
+#define write_sysreg(reg, val) \
+({ \
+ u64 __val = (u64)(val); \
+ asm volatile("msr " __stringify(reg) ", %x0" : : "rZ" (__val)); \
+})
+
+#define read_sysreg(reg) \
+({ u64 val; \
+ asm volatile("mrs %0, "__stringify(reg) : "=r"(val) : : "memory");\
+ val; \
+})
+
+#define isb() asm volatile("isb" : : : "memory")
+
#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index a8f022794ce3..615ab254899d 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -30,6 +30,7 @@ typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */
/* Minimum allocated guest virtual and physical addresses */
#define KVM_UTIL_MIN_VADDR 0x2000
+#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
#define DEFAULT_GUEST_PHY_PAGES 512
#define DEFAULT_GUEST_STACK_VADDR_MIN 0xab6000
@@ -43,6 +44,7 @@ enum vm_guest_mode {
VM_MODE_P40V48_4K,
VM_MODE_P40V48_64K,
VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */
+ VM_MODE_P47V64_4K,
NUM_VM_MODES,
};
@@ -60,7 +62,7 @@ enum vm_guest_mode {
#elif defined(__s390x__)
-#define VM_MODE_DEFAULT VM_MODE_P52V48_4K
+#define VM_MODE_DEFAULT VM_MODE_P47V64_4K
#define MIN_PAGE_SHIFT 12U
#define ptes_per_page(page_size) ((page_size) / 16)
@@ -77,6 +79,7 @@ struct vm_guest_mode_params {
};
extern const struct vm_guest_mode_params vm_guest_mode_params[];
+int open_kvm_dev_path_or_exit(void);
int kvm_check_cap(long cap);
int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap);
int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id,
@@ -96,8 +99,7 @@ uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm);
int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva,
size_t len);
-void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename,
- uint32_t data_memslot, uint32_t pgd_memslot);
+void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename);
void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
@@ -139,13 +141,16 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa);
void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot);
void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid);
-vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
- uint32_t data_memslot, uint32_t pgd_memslot);
+vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min);
+vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages);
+vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm);
+
void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- unsigned int npages, uint32_t pgd_memslot);
+ unsigned int npages);
void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa);
void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva);
vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva);
+void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa);
/*
* Address Guest Virtual to Guest Physical
@@ -234,7 +239,7 @@ int kvm_device_access(int dev_fd, uint32_t group, uint64_t attr,
const char *exit_reason_str(unsigned int exit_reason);
-void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot);
+void virt_pgd_alloc(struct kvm_vm *vm);
/*
* VM Virtual Page Map
@@ -252,13 +257,13 @@ void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot);
* Within @vm, creates a virtual translation for the page starting
* at @vaddr to the page starting at @paddr.
*/
-void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- uint32_t memslot);
+void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr);
vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
uint32_t memslot);
vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
vm_paddr_t paddr_min, uint32_t memslot);
+vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm);
/*
* Create a VM with reasonable defaults
@@ -283,10 +288,11 @@ struct kvm_vm *vm_create_default_with_vcpus(uint32_t nr_vcpus, uint64_t extra_me
uint32_t num_percpu_pages, void *guest_code,
uint32_t vcpuids[]);
-/* Like vm_create_default_with_vcpus, but accepts mode as a parameter */
+/* Like vm_create_default_with_vcpus, but accepts mode and slot0 memory as a parameter */
struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
- uint64_t extra_mem_pages, uint32_t num_percpu_pages,
- void *guest_code, uint32_t vcpuids[]);
+ uint64_t slot0_mem_pages, uint64_t extra_mem_pages,
+ uint32_t num_percpu_pages, void *guest_code,
+ uint32_t vcpuids[]);
/*
* Adds a vCPU with reasonable defaults (e.g. a stack)
@@ -302,7 +308,7 @@ bool vm_is_unrestricted_guest(struct kvm_vm *vm);
unsigned int vm_get_page_size(struct kvm_vm *vm);
unsigned int vm_get_page_shift(struct kvm_vm *vm);
-unsigned int vm_get_max_gfn(struct kvm_vm *vm);
+uint64_t vm_get_max_gfn(struct kvm_vm *vm);
int vm_get_fd(struct kvm_vm *vm);
unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size);
@@ -347,6 +353,7 @@ enum {
UCALL_SYNC,
UCALL_ABORT,
UCALL_DONE,
+ UCALL_UNHANDLED,
};
#define UCALL_MAX_ARGS 6
@@ -365,26 +372,31 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc);
ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4)
#define GUEST_SYNC(stage) ucall(UCALL_SYNC, 2, "hello", stage)
#define GUEST_DONE() ucall(UCALL_DONE, 0)
-#define __GUEST_ASSERT(_condition, _nargs, _args...) do { \
- if (!(_condition)) \
- ucall(UCALL_ABORT, 2 + _nargs, \
- "Failed guest assert: " \
- #_condition, __LINE__, _args); \
+#define __GUEST_ASSERT(_condition, _condstr, _nargs, _args...) do { \
+ if (!(_condition)) \
+ ucall(UCALL_ABORT, 2 + _nargs, \
+ "Failed guest assert: " \
+ _condstr, __LINE__, _args); \
} while (0)
#define GUEST_ASSERT(_condition) \
- __GUEST_ASSERT((_condition), 0, 0)
+ __GUEST_ASSERT(_condition, #_condition, 0, 0)
#define GUEST_ASSERT_1(_condition, arg1) \
- __GUEST_ASSERT((_condition), 1, (arg1))
+ __GUEST_ASSERT(_condition, #_condition, 1, (arg1))
#define GUEST_ASSERT_2(_condition, arg1, arg2) \
- __GUEST_ASSERT((_condition), 2, (arg1), (arg2))
+ __GUEST_ASSERT(_condition, #_condition, 2, (arg1), (arg2))
#define GUEST_ASSERT_3(_condition, arg1, arg2, arg3) \
- __GUEST_ASSERT((_condition), 3, (arg1), (arg2), (arg3))
+ __GUEST_ASSERT(_condition, #_condition, 3, (arg1), (arg2), (arg3))
#define GUEST_ASSERT_4(_condition, arg1, arg2, arg3, arg4) \
- __GUEST_ASSERT((_condition), 4, (arg1), (arg2), (arg3), (arg4))
+ __GUEST_ASSERT(_condition, #_condition, 4, (arg1), (arg2), (arg3), (arg4))
+
+#define GUEST_ASSERT_EQ(a, b) __GUEST_ASSERT((a) == (b), #a " == " #b, 2, a, b)
+
+int vm_get_stats_fd(struct kvm_vm *vm);
+int vcpu_get_stats_fd(struct kvm_vm *vm, uint32_t vcpuid);
#endif /* SELFTEST_KVM_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index fade3130eb01..d79be15dd3d2 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -17,6 +17,7 @@
#include <errno.h>
#include <unistd.h>
#include <fcntl.h>
+#include <sys/mman.h>
#include "kselftest.h"
static inline int _no_printf(const char *format, ...) { return 0; }
@@ -84,6 +85,8 @@ enum vm_mem_backing_src_type {
VM_MEM_SRC_ANONYMOUS_HUGETLB_1GB,
VM_MEM_SRC_ANONYMOUS_HUGETLB_2GB,
VM_MEM_SRC_ANONYMOUS_HUGETLB_16GB,
+ VM_MEM_SRC_SHMEM,
+ VM_MEM_SRC_SHARED_HUGETLB,
NUM_SRC_TYPES,
};
@@ -100,4 +103,13 @@ size_t get_backing_src_pagesz(uint32_t i);
void backing_src_help(void);
enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name);
+/*
+ * Whether or not the given source type is shared memory (as opposed to
+ * anonymous).
+ */
+static inline bool backing_src_is_shared(enum vm_mem_backing_src_type t)
+{
+ return vm_mem_backing_src_alias(t)->flag & MAP_SHARED;
+}
+
#endif /* SELFTEST_KVM_TEST_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/apic.h b/tools/testing/selftests/kvm/include/x86_64/apic.h
new file mode 100644
index 000000000000..0be4757f1f20
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86_64/apic.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * tools/testing/selftests/kvm/include/x86_64/apic.h
+ *
+ * Copyright (C) 2021, Google LLC.
+ */
+
+#ifndef SELFTEST_KVM_APIC_H
+#define SELFTEST_KVM_APIC_H
+
+#include <stdint.h>
+
+#include "processor.h"
+
+#define APIC_DEFAULT_GPA 0xfee00000ULL
+
+/* APIC base address MSR and fields */
+#define MSR_IA32_APICBASE 0x0000001b
+#define MSR_IA32_APICBASE_BSP (1<<8)
+#define MSR_IA32_APICBASE_EXTD (1<<10)
+#define MSR_IA32_APICBASE_ENABLE (1<<11)
+#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
+#define GET_APIC_BASE(x) (((x) >> 12) << 12)
+
+#define APIC_BASE_MSR 0x800
+#define X2APIC_ENABLE (1UL << 10)
+#define APIC_ID 0x20
+#define APIC_LVR 0x30
+#define GET_APIC_ID_FIELD(x) (((x) >> 24) & 0xFF)
+#define APIC_TASKPRI 0x80
+#define APIC_PROCPRI 0xA0
+#define APIC_EOI 0xB0
+#define APIC_SPIV 0xF0
+#define APIC_SPIV_FOCUS_DISABLED (1 << 9)
+#define APIC_SPIV_APIC_ENABLED (1 << 8)
+#define APIC_ICR 0x300
+#define APIC_DEST_SELF 0x40000
+#define APIC_DEST_ALLINC 0x80000
+#define APIC_DEST_ALLBUT 0xC0000
+#define APIC_ICR_RR_MASK 0x30000
+#define APIC_ICR_RR_INVALID 0x00000
+#define APIC_ICR_RR_INPROG 0x10000
+#define APIC_ICR_RR_VALID 0x20000
+#define APIC_INT_LEVELTRIG 0x08000
+#define APIC_INT_ASSERT 0x04000
+#define APIC_ICR_BUSY 0x01000
+#define APIC_DEST_LOGICAL 0x00800
+#define APIC_DEST_PHYSICAL 0x00000
+#define APIC_DM_FIXED 0x00000
+#define APIC_DM_FIXED_MASK 0x00700
+#define APIC_DM_LOWEST 0x00100
+#define APIC_DM_SMI 0x00200
+#define APIC_DM_REMRD 0x00300
+#define APIC_DM_NMI 0x00400
+#define APIC_DM_INIT 0x00500
+#define APIC_DM_STARTUP 0x00600
+#define APIC_DM_EXTINT 0x00700
+#define APIC_VECTOR_MASK 0x000FF
+#define APIC_ICR2 0x310
+#define SET_APIC_DEST_FIELD(x) ((x) << 24)
+
+void apic_disable(void);
+void xapic_enable(void);
+void x2apic_enable(void);
+
+static inline uint32_t get_bsp_flag(void)
+{
+ return rdmsr(MSR_IA32_APICBASE) & MSR_IA32_APICBASE_BSP;
+}
+
+static inline uint32_t xapic_read_reg(unsigned int reg)
+{
+ return ((volatile uint32_t *)APIC_DEFAULT_GPA)[reg >> 2];
+}
+
+static inline void xapic_write_reg(unsigned int reg, uint32_t val)
+{
+ ((volatile uint32_t *)APIC_DEFAULT_GPA)[reg >> 2] = val;
+}
+
+static inline uint64_t x2apic_read_reg(unsigned int reg)
+{
+ return rdmsr(APIC_BASE_MSR + (reg >> 4));
+}
+
+static inline void x2apic_write_reg(unsigned int reg, uint64_t value)
+{
+ wrmsr(APIC_BASE_MSR + (reg >> 4), value);
+}
+
+#endif /* SELFTEST_KVM_APIC_H */
diff --git a/tools/testing/selftests/kvm/include/evmcs.h b/tools/testing/selftests/kvm/include/x86_64/evmcs.h
index a034438b6266..c9af97abd622 100644
--- a/tools/testing/selftests/kvm/include/evmcs.h
+++ b/tools/testing/selftests/kvm/include/x86_64/evmcs.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
- * tools/testing/selftests/kvm/include/vmx.h
+ * tools/testing/selftests/kvm/include/x86_64/evmcs.h
*
* Copyright (C) 2018, Red Hat, Inc.
*
diff --git a/tools/testing/selftests/kvm/include/x86_64/hyperv.h b/tools/testing/selftests/kvm/include/x86_64/hyperv.h
new file mode 100644
index 000000000000..412eaee7884a
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86_64/hyperv.h
@@ -0,0 +1,185 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * tools/testing/selftests/kvm/include/x86_64/hyperv.h
+ *
+ * Copyright (C) 2021, Red Hat, Inc.
+ *
+ */
+
+#ifndef SELFTEST_KVM_HYPERV_H
+#define SELFTEST_KVM_HYPERV_H
+
+#define HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS 0x40000000
+#define HYPERV_CPUID_INTERFACE 0x40000001
+#define HYPERV_CPUID_VERSION 0x40000002
+#define HYPERV_CPUID_FEATURES 0x40000003
+#define HYPERV_CPUID_ENLIGHTMENT_INFO 0x40000004
+#define HYPERV_CPUID_IMPLEMENT_LIMITS 0x40000005
+#define HYPERV_CPUID_CPU_MANAGEMENT_FEATURES 0x40000007
+#define HYPERV_CPUID_NESTED_FEATURES 0x4000000A
+#define HYPERV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS 0x40000080
+#define HYPERV_CPUID_SYNDBG_INTERFACE 0x40000081
+#define HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES 0x40000082
+
+#define HV_X64_MSR_GUEST_OS_ID 0x40000000
+#define HV_X64_MSR_HYPERCALL 0x40000001
+#define HV_X64_MSR_VP_INDEX 0x40000002
+#define HV_X64_MSR_RESET 0x40000003
+#define HV_X64_MSR_VP_RUNTIME 0x40000010
+#define HV_X64_MSR_TIME_REF_COUNT 0x40000020
+#define HV_X64_MSR_REFERENCE_TSC 0x40000021
+#define HV_X64_MSR_TSC_FREQUENCY 0x40000022
+#define HV_X64_MSR_APIC_FREQUENCY 0x40000023
+#define HV_X64_MSR_EOI 0x40000070
+#define HV_X64_MSR_ICR 0x40000071
+#define HV_X64_MSR_TPR 0x40000072
+#define HV_X64_MSR_VP_ASSIST_PAGE 0x40000073
+#define HV_X64_MSR_SCONTROL 0x40000080
+#define HV_X64_MSR_SVERSION 0x40000081
+#define HV_X64_MSR_SIEFP 0x40000082
+#define HV_X64_MSR_SIMP 0x40000083
+#define HV_X64_MSR_EOM 0x40000084
+#define HV_X64_MSR_SINT0 0x40000090
+#define HV_X64_MSR_SINT1 0x40000091
+#define HV_X64_MSR_SINT2 0x40000092
+#define HV_X64_MSR_SINT3 0x40000093
+#define HV_X64_MSR_SINT4 0x40000094
+#define HV_X64_MSR_SINT5 0x40000095
+#define HV_X64_MSR_SINT6 0x40000096
+#define HV_X64_MSR_SINT7 0x40000097
+#define HV_X64_MSR_SINT8 0x40000098
+#define HV_X64_MSR_SINT9 0x40000099
+#define HV_X64_MSR_SINT10 0x4000009A
+#define HV_X64_MSR_SINT11 0x4000009B
+#define HV_X64_MSR_SINT12 0x4000009C
+#define HV_X64_MSR_SINT13 0x4000009D
+#define HV_X64_MSR_SINT14 0x4000009E
+#define HV_X64_MSR_SINT15 0x4000009F
+#define HV_X64_MSR_STIMER0_CONFIG 0x400000B0
+#define HV_X64_MSR_STIMER0_COUNT 0x400000B1
+#define HV_X64_MSR_STIMER1_CONFIG 0x400000B2
+#define HV_X64_MSR_STIMER1_COUNT 0x400000B3
+#define HV_X64_MSR_STIMER2_CONFIG 0x400000B4
+#define HV_X64_MSR_STIMER2_COUNT 0x400000B5
+#define HV_X64_MSR_STIMER3_CONFIG 0x400000B6
+#define HV_X64_MSR_STIMER3_COUNT 0x400000B7
+#define HV_X64_MSR_GUEST_IDLE 0x400000F0
+#define HV_X64_MSR_CRASH_P0 0x40000100
+#define HV_X64_MSR_CRASH_P1 0x40000101
+#define HV_X64_MSR_CRASH_P2 0x40000102
+#define HV_X64_MSR_CRASH_P3 0x40000103
+#define HV_X64_MSR_CRASH_P4 0x40000104
+#define HV_X64_MSR_CRASH_CTL 0x40000105
+#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106
+#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107
+#define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108
+#define HV_X64_MSR_TSC_INVARIANT_CONTROL 0x40000118
+
+#define HV_X64_MSR_SYNDBG_CONTROL 0x400000F1
+#define HV_X64_MSR_SYNDBG_STATUS 0x400000F2
+#define HV_X64_MSR_SYNDBG_SEND_BUFFER 0x400000F3
+#define HV_X64_MSR_SYNDBG_RECV_BUFFER 0x400000F4
+#define HV_X64_MSR_SYNDBG_PENDING_BUFFER 0x400000F5
+#define HV_X64_MSR_SYNDBG_OPTIONS 0x400000FF
+
+/* HYPERV_CPUID_FEATURES.EAX */
+#define HV_MSR_VP_RUNTIME_AVAILABLE BIT(0)
+#define HV_MSR_TIME_REF_COUNT_AVAILABLE BIT(1)
+#define HV_MSR_SYNIC_AVAILABLE BIT(2)
+#define HV_MSR_SYNTIMER_AVAILABLE BIT(3)
+#define HV_MSR_APIC_ACCESS_AVAILABLE BIT(4)
+#define HV_MSR_HYPERCALL_AVAILABLE BIT(5)
+#define HV_MSR_VP_INDEX_AVAILABLE BIT(6)
+#define HV_MSR_RESET_AVAILABLE BIT(7)
+#define HV_MSR_STAT_PAGES_AVAILABLE BIT(8)
+#define HV_MSR_REFERENCE_TSC_AVAILABLE BIT(9)
+#define HV_MSR_GUEST_IDLE_AVAILABLE BIT(10)
+#define HV_ACCESS_FREQUENCY_MSRS BIT(11)
+#define HV_ACCESS_REENLIGHTENMENT BIT(13)
+#define HV_ACCESS_TSC_INVARIANT BIT(15)
+
+/* HYPERV_CPUID_FEATURES.EBX */
+#define HV_CREATE_PARTITIONS BIT(0)
+#define HV_ACCESS_PARTITION_ID BIT(1)
+#define HV_ACCESS_MEMORY_POOL BIT(2)
+#define HV_ADJUST_MESSAGE_BUFFERS BIT(3)
+#define HV_POST_MESSAGES BIT(4)
+#define HV_SIGNAL_EVENTS BIT(5)
+#define HV_CREATE_PORT BIT(6)
+#define HV_CONNECT_PORT BIT(7)
+#define HV_ACCESS_STATS BIT(8)
+#define HV_DEBUGGING BIT(11)
+#define HV_CPU_MANAGEMENT BIT(12)
+#define HV_ISOLATION BIT(22)
+
+/* HYPERV_CPUID_FEATURES.EDX */
+#define HV_X64_MWAIT_AVAILABLE BIT(0)
+#define HV_X64_GUEST_DEBUGGING_AVAILABLE BIT(1)
+#define HV_X64_PERF_MONITOR_AVAILABLE BIT(2)
+#define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE BIT(3)
+#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE BIT(4)
+#define HV_X64_GUEST_IDLE_STATE_AVAILABLE BIT(5)
+#define HV_FEATURE_FREQUENCY_MSRS_AVAILABLE BIT(8)
+#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE BIT(10)
+#define HV_FEATURE_DEBUG_MSRS_AVAILABLE BIT(11)
+#define HV_STIMER_DIRECT_MODE_AVAILABLE BIT(19)
+
+/* HYPERV_CPUID_ENLIGHTMENT_INFO.EAX */
+#define HV_X64_AS_SWITCH_RECOMMENDED BIT(0)
+#define HV_X64_LOCAL_TLB_FLUSH_RECOMMENDED BIT(1)
+#define HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED BIT(2)
+#define HV_X64_APIC_ACCESS_RECOMMENDED BIT(3)
+#define HV_X64_SYSTEM_RESET_RECOMMENDED BIT(4)
+#define HV_X64_RELAXED_TIMING_RECOMMENDED BIT(5)
+#define HV_DEPRECATING_AEOI_RECOMMENDED BIT(9)
+#define HV_X64_CLUSTER_IPI_RECOMMENDED BIT(10)
+#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED BIT(11)
+#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED BIT(14)
+
+/* HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX */
+#define HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING BIT(1)
+
+/* Hypercalls */
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE 0x0002
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST 0x0003
+#define HVCALL_NOTIFY_LONG_SPIN_WAIT 0x0008
+#define HVCALL_SEND_IPI 0x000b
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX 0x0013
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX 0x0014
+#define HVCALL_SEND_IPI_EX 0x0015
+#define HVCALL_GET_PARTITION_ID 0x0046
+#define HVCALL_DEPOSIT_MEMORY 0x0048
+#define HVCALL_CREATE_VP 0x004e
+#define HVCALL_GET_VP_REGISTERS 0x0050
+#define HVCALL_SET_VP_REGISTERS 0x0051
+#define HVCALL_POST_MESSAGE 0x005c
+#define HVCALL_SIGNAL_EVENT 0x005d
+#define HVCALL_POST_DEBUG_DATA 0x0069
+#define HVCALL_RETRIEVE_DEBUG_DATA 0x006a
+#define HVCALL_RESET_DEBUG_SESSION 0x006b
+#define HVCALL_ADD_LOGICAL_PROCESSOR 0x0076
+#define HVCALL_MAP_DEVICE_INTERRUPT 0x007c
+#define HVCALL_UNMAP_DEVICE_INTERRUPT 0x007d
+#define HVCALL_RETARGET_INTERRUPT 0x007e
+#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af
+#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0
+
+#define HV_FLUSH_ALL_PROCESSORS BIT(0)
+#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES BIT(1)
+#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY BIT(2)
+#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3)
+
+/* hypercall status code */
+#define HV_STATUS_SUCCESS 0
+#define HV_STATUS_INVALID_HYPERCALL_CODE 2
+#define HV_STATUS_INVALID_HYPERCALL_INPUT 3
+#define HV_STATUS_INVALID_ALIGNMENT 4
+#define HV_STATUS_INVALID_PARAMETER 5
+#define HV_STATUS_ACCESS_DENIED 6
+#define HV_STATUS_OPERATION_DENIED 8
+#define HV_STATUS_INSUFFICIENT_MEMORY 11
+#define HV_STATUS_INVALID_PORT_ID 17
+#define HV_STATUS_INVALID_CONNECTION_ID 18
+#define HV_STATUS_INSUFFICIENT_BUFFERS 19
+
+#endif /* !SELFTEST_KVM_HYPERV_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 0b30b4e15c38..242ae8e09a65 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -13,6 +13,8 @@
#include <asm/msr-index.h>
+#include "../kvm_util.h"
+
#define X86_EFLAGS_FIXED (1u << 1)
#define X86_CR4_VME (1ul << 0)
@@ -53,7 +55,8 @@
#define CPUID_PKU (1ul << 3)
#define CPUID_LA57 (1ul << 16)
-#define UNEXPECTED_VECTOR_PORT 0xfff0u
+/* CPUID.0x8000_0001.EDX */
+#define CPUID_GBPAGES (1ul << 26)
/* General Registers in 64-Bit Mode */
struct gpr64_regs {
@@ -391,9 +394,13 @@ struct ex_regs {
void vm_init_descriptor_tables(struct kvm_vm *vm);
void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid);
-void vm_handle_exception(struct kvm_vm *vm, int vector,
+void vm_install_exception_handler(struct kvm_vm *vm, int vector,
void (*handler)(struct ex_regs *));
+uint64_t vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr);
+void vm_set_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr,
+ uint64_t pte);
+
/*
* set_cpuid() - overwrites a matching cpuid entry with the provided value.
* matches based on ent->function && ent->index. returns true
@@ -410,6 +417,14 @@ struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void);
void vcpu_set_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid);
struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid);
+enum x86_page_size {
+ X86_PAGE_SIZE_4K = 0,
+ X86_PAGE_SIZE_2M,
+ X86_PAGE_SIZE_1G,
+};
+void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+ enum x86_page_size page_size);
+
/*
* Basic CPU control in CR0
*/
@@ -425,53 +440,6 @@ struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpui
#define X86_CR0_CD (1UL<<30) /* Cache Disable */
#define X86_CR0_PG (1UL<<31) /* Paging */
-#define APIC_DEFAULT_GPA 0xfee00000ULL
-
-/* APIC base address MSR and fields */
-#define MSR_IA32_APICBASE 0x0000001b
-#define MSR_IA32_APICBASE_BSP (1<<8)
-#define MSR_IA32_APICBASE_EXTD (1<<10)
-#define MSR_IA32_APICBASE_ENABLE (1<<11)
-#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
-#define GET_APIC_BASE(x) (((x) >> 12) << 12)
-
-#define APIC_BASE_MSR 0x800
-#define X2APIC_ENABLE (1UL << 10)
-#define APIC_ID 0x20
-#define APIC_LVR 0x30
-#define GET_APIC_ID_FIELD(x) (((x) >> 24) & 0xFF)
-#define APIC_TASKPRI 0x80
-#define APIC_PROCPRI 0xA0
-#define APIC_EOI 0xB0
-#define APIC_SPIV 0xF0
-#define APIC_SPIV_FOCUS_DISABLED (1 << 9)
-#define APIC_SPIV_APIC_ENABLED (1 << 8)
-#define APIC_ICR 0x300
-#define APIC_DEST_SELF 0x40000
-#define APIC_DEST_ALLINC 0x80000
-#define APIC_DEST_ALLBUT 0xC0000
-#define APIC_ICR_RR_MASK 0x30000
-#define APIC_ICR_RR_INVALID 0x00000
-#define APIC_ICR_RR_INPROG 0x10000
-#define APIC_ICR_RR_VALID 0x20000
-#define APIC_INT_LEVELTRIG 0x08000
-#define APIC_INT_ASSERT 0x04000
-#define APIC_ICR_BUSY 0x01000
-#define APIC_DEST_LOGICAL 0x00800
-#define APIC_DEST_PHYSICAL 0x00000
-#define APIC_DM_FIXED 0x00000
-#define APIC_DM_FIXED_MASK 0x00700
-#define APIC_DM_LOWEST 0x00100
-#define APIC_DM_SMI 0x00200
-#define APIC_DM_REMRD 0x00300
-#define APIC_DM_NMI 0x00400
-#define APIC_DM_INIT 0x00500
-#define APIC_DM_STARTUP 0x00600
-#define APIC_DM_EXTINT 0x00700
-#define APIC_VECTOR_MASK 0x000FF
-#define APIC_ICR2 0x310
-#define SET_APIC_DEST_FIELD(x) ((x) << 24)
-
/* VMX_EPT_VPID_CAP bits */
#define VMX_EPT_VPID_CAP_AD_BITS (1ULL << 21)
diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h
index 65eb1079a161..583ceb0d1457 100644
--- a/tools/testing/selftests/kvm/include/x86_64/vmx.h
+++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h
@@ -10,6 +10,7 @@
#include <stdint.h>
#include "processor.h"
+#include "apic.h"
/*
* Definitions of Primary Processor-Based VM-Execution Controls.
@@ -607,15 +608,13 @@ bool nested_vmx_supported(void);
void nested_vmx_check_supported(void);
void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr, uint32_t eptp_memslot);
+ uint64_t nested_paddr, uint64_t paddr);
void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr, uint64_t size,
- uint32_t eptp_memslot);
+ uint64_t nested_paddr, uint64_t paddr, uint64_t size);
void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint32_t memslot, uint32_t eptp_memslot);
+ uint32_t memslot);
void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
uint32_t eptp_memslot);
-void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint32_t eptp_memslot);
+void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm);
#endif /* SELFTEST_KVM_VMX_H */
diff --git a/tools/testing/selftests/kvm/kvm_binary_stats_test.c b/tools/testing/selftests/kvm/kvm_binary_stats_test.c
new file mode 100644
index 000000000000..5906bbc08483
--- /dev/null
+++ b/tools/testing/selftests/kvm/kvm_binary_stats_test.c
@@ -0,0 +1,237 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * kvm_binary_stats_test
+ *
+ * Copyright (C) 2021, Google LLC.
+ *
+ * Test the fd-based interface for KVM statistics.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "asm/kvm.h"
+#include "linux/kvm.h"
+
+static void stats_test(int stats_fd)
+{
+ ssize_t ret;
+ int i;
+ size_t size_desc;
+ size_t size_data = 0;
+ struct kvm_stats_header *header;
+ char *id;
+ struct kvm_stats_desc *stats_desc;
+ u64 *stats_data;
+ struct kvm_stats_desc *pdesc;
+
+ /* Read kvm stats header */
+ header = malloc(sizeof(*header));
+ TEST_ASSERT(header, "Allocate memory for stats header");
+
+ ret = read(stats_fd, header, sizeof(*header));
+ TEST_ASSERT(ret == sizeof(*header), "Read stats header");
+ size_desc = sizeof(*stats_desc) + header->name_size;
+
+ /* Read kvm stats id string */
+ id = malloc(header->name_size);
+ TEST_ASSERT(id, "Allocate memory for id string");
+ ret = read(stats_fd, id, header->name_size);
+ TEST_ASSERT(ret == header->name_size, "Read id string");
+
+ /* Check id string, that should start with "kvm" */
+ TEST_ASSERT(!strncmp(id, "kvm", 3) && strlen(id) < header->name_size,
+ "Invalid KVM stats type, id: %s", id);
+
+ /* Sanity check for other fields in header */
+ if (header->num_desc == 0) {
+ printf("No KVM stats defined!");
+ return;
+ }
+ /* Check overlap */
+ TEST_ASSERT(header->desc_offset > 0 && header->data_offset > 0
+ && header->desc_offset >= sizeof(*header)
+ && header->data_offset >= sizeof(*header),
+ "Invalid offset fields in header");
+ TEST_ASSERT(header->desc_offset > header->data_offset ||
+ (header->desc_offset + size_desc * header->num_desc <=
+ header->data_offset),
+ "Descriptor block is overlapped with data block");
+
+ /* Allocate memory for stats descriptors */
+ stats_desc = calloc(header->num_desc, size_desc);
+ TEST_ASSERT(stats_desc, "Allocate memory for stats descriptors");
+ /* Read kvm stats descriptors */
+ ret = pread(stats_fd, stats_desc,
+ size_desc * header->num_desc, header->desc_offset);
+ TEST_ASSERT(ret == size_desc * header->num_desc,
+ "Read KVM stats descriptors");
+
+ /* Sanity check for fields in descriptors */
+ for (i = 0; i < header->num_desc; ++i) {
+ pdesc = (void *)stats_desc + i * size_desc;
+ /* Check type,unit,base boundaries */
+ TEST_ASSERT((pdesc->flags & KVM_STATS_TYPE_MASK)
+ <= KVM_STATS_TYPE_MAX, "Unknown KVM stats type");
+ TEST_ASSERT((pdesc->flags & KVM_STATS_UNIT_MASK)
+ <= KVM_STATS_UNIT_MAX, "Unknown KVM stats unit");
+ TEST_ASSERT((pdesc->flags & KVM_STATS_BASE_MASK)
+ <= KVM_STATS_BASE_MAX, "Unknown KVM stats base");
+ /* Check exponent for stats unit
+ * Exponent for counter should be greater than or equal to 0
+ * Exponent for unit bytes should be greater than or equal to 0
+ * Exponent for unit seconds should be less than or equal to 0
+ * Exponent for unit clock cycles should be greater than or
+ * equal to 0
+ */
+ switch (pdesc->flags & KVM_STATS_UNIT_MASK) {
+ case KVM_STATS_UNIT_NONE:
+ case KVM_STATS_UNIT_BYTES:
+ case KVM_STATS_UNIT_CYCLES:
+ TEST_ASSERT(pdesc->exponent >= 0,
+ "Unsupported KVM stats unit");
+ break;
+ case KVM_STATS_UNIT_SECONDS:
+ TEST_ASSERT(pdesc->exponent <= 0,
+ "Unsupported KVM stats unit");
+ break;
+ }
+ /* Check name string */
+ TEST_ASSERT(strlen(pdesc->name) < header->name_size,
+ "KVM stats name(%s) too long", pdesc->name);
+ /* Check size field, which should not be zero */
+ TEST_ASSERT(pdesc->size, "KVM descriptor(%s) with size of 0",
+ pdesc->name);
+ size_data += pdesc->size * sizeof(*stats_data);
+ }
+ /* Check overlap */
+ TEST_ASSERT(header->data_offset >= header->desc_offset
+ || header->data_offset + size_data <= header->desc_offset,
+ "Data block is overlapped with Descriptor block");
+ /* Check validity of all stats data size */
+ TEST_ASSERT(size_data >= header->num_desc * sizeof(*stats_data),
+ "Data size is not correct");
+ /* Check stats offset */
+ for (i = 0; i < header->num_desc; ++i) {
+ pdesc = (void *)stats_desc + i * size_desc;
+ TEST_ASSERT(pdesc->offset < size_data,
+ "Invalid offset (%u) for stats: %s",
+ pdesc->offset, pdesc->name);
+ }
+
+ /* Allocate memory for stats data */
+ stats_data = malloc(size_data);
+ TEST_ASSERT(stats_data, "Allocate memory for stats data");
+ /* Read kvm stats data as a bulk */
+ ret = pread(stats_fd, stats_data, size_data, header->data_offset);
+ TEST_ASSERT(ret == size_data, "Read KVM stats data");
+ /* Read kvm stats data one by one */
+ size_data = 0;
+ for (i = 0; i < header->num_desc; ++i) {
+ pdesc = (void *)stats_desc + i * size_desc;
+ ret = pread(stats_fd, stats_data,
+ pdesc->size * sizeof(*stats_data),
+ header->data_offset + size_data);
+ TEST_ASSERT(ret == pdesc->size * sizeof(*stats_data),
+ "Read data of KVM stats: %s", pdesc->name);
+ size_data += pdesc->size * sizeof(*stats_data);
+ }
+
+ free(stats_data);
+ free(stats_desc);
+ free(id);
+ free(header);
+}
+
+
+static void vm_stats_test(struct kvm_vm *vm)
+{
+ int stats_fd;
+
+ /* Get fd for VM stats */
+ stats_fd = vm_get_stats_fd(vm);
+ TEST_ASSERT(stats_fd >= 0, "Get VM stats fd");
+
+ stats_test(stats_fd);
+ close(stats_fd);
+ TEST_ASSERT(fcntl(stats_fd, F_GETFD) == -1, "Stats fd not freed");
+}
+
+static void vcpu_stats_test(struct kvm_vm *vm, int vcpu_id)
+{
+ int stats_fd;
+
+ /* Get fd for VCPU stats */
+ stats_fd = vcpu_get_stats_fd(vm, vcpu_id);
+ TEST_ASSERT(stats_fd >= 0, "Get VCPU stats fd");
+
+ stats_test(stats_fd);
+ close(stats_fd);
+ TEST_ASSERT(fcntl(stats_fd, F_GETFD) == -1, "Stats fd not freed");
+}
+
+#define DEFAULT_NUM_VM 4
+#define DEFAULT_NUM_VCPU 4
+
+/*
+ * Usage: kvm_bin_form_stats [#vm] [#vcpu]
+ * The first parameter #vm set the number of VMs being created.
+ * The second parameter #vcpu set the number of VCPUs being created.
+ * By default, DEFAULT_NUM_VM VM and DEFAULT_NUM_VCPU VCPU for the VM would be
+ * created for testing.
+ */
+
+int main(int argc, char *argv[])
+{
+ int i, j;
+ struct kvm_vm **vms;
+ int max_vm = DEFAULT_NUM_VM;
+ int max_vcpu = DEFAULT_NUM_VCPU;
+
+ /* Get the number of VMs and VCPUs that would be created for testing. */
+ if (argc > 1) {
+ max_vm = strtol(argv[1], NULL, 0);
+ if (max_vm <= 0)
+ max_vm = DEFAULT_NUM_VM;
+ }
+ if (argc > 2) {
+ max_vcpu = strtol(argv[2], NULL, 0);
+ if (max_vcpu <= 0)
+ max_vcpu = DEFAULT_NUM_VCPU;
+ }
+
+ /* Check the extension for binary stats */
+ if (kvm_check_cap(KVM_CAP_BINARY_STATS_FD) <= 0) {
+ print_skip("Binary form statistics interface is not supported");
+ exit(KSFT_SKIP);
+ }
+
+ /* Create VMs and VCPUs */
+ vms = malloc(sizeof(vms[0]) * max_vm);
+ TEST_ASSERT(vms, "Allocate memory for storing VM pointers");
+ for (i = 0; i < max_vm; ++i) {
+ vms[i] = vm_create(VM_MODE_DEFAULT,
+ DEFAULT_GUEST_PHY_PAGES, O_RDWR);
+ for (j = 0; j < max_vcpu; ++j)
+ vm_vcpu_add(vms[i], j);
+ }
+
+ /* Check stats read for every VM and VCPU */
+ for (i = 0; i < max_vm; ++i) {
+ vm_stats_test(vms[i]);
+ for (j = 0; j < max_vcpu; ++j)
+ vcpu_stats_test(vms[i], j);
+ }
+
+ for (i = 0; i < max_vm; ++i)
+ kvm_vm_free(vms[i]);
+ free(vms);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c
index 1c4753fff19e..0d04a7db7f24 100644
--- a/tools/testing/selftests/kvm/kvm_page_table_test.c
+++ b/tools/testing/selftests/kvm/kvm_page_table_test.c
@@ -268,7 +268,7 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg)
/* Create a VM with enough guest pages */
guest_num_pages = test_mem_size / guest_page_size;
- vm = vm_create_with_vcpus(mode, nr_vcpus,
+ vm = vm_create_with_vcpus(mode, nr_vcpus, DEFAULT_GUEST_PHY_PAGES,
guest_num_pages, 0, guest_code, NULL);
/* Align down GPA of the testing memslot */
@@ -303,7 +303,7 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg)
TEST_MEM_SLOT_INDEX, guest_num_pages, 0);
/* Do mapping(GVA->GPA) for the testing memory slot */
- virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0);
+ virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages);
/* Cache the HVA pointer of the region */
host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem);
diff --git a/tools/testing/selftests/kvm/lib/aarch64/handlers.S b/tools/testing/selftests/kvm/lib/aarch64/handlers.S
new file mode 100644
index 000000000000..0e443eadfac6
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/aarch64/handlers.S
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+.macro save_registers
+ add sp, sp, #-16 * 17
+
+ stp x0, x1, [sp, #16 * 0]
+ stp x2, x3, [sp, #16 * 1]
+ stp x4, x5, [sp, #16 * 2]
+ stp x6, x7, [sp, #16 * 3]
+ stp x8, x9, [sp, #16 * 4]
+ stp x10, x11, [sp, #16 * 5]
+ stp x12, x13, [sp, #16 * 6]
+ stp x14, x15, [sp, #16 * 7]
+ stp x16, x17, [sp, #16 * 8]
+ stp x18, x19, [sp, #16 * 9]
+ stp x20, x21, [sp, #16 * 10]
+ stp x22, x23, [sp, #16 * 11]
+ stp x24, x25, [sp, #16 * 12]
+ stp x26, x27, [sp, #16 * 13]
+ stp x28, x29, [sp, #16 * 14]
+
+ /*
+ * This stores sp_el1 into ex_regs.sp so exception handlers can "look"
+ * at it. It will _not_ be used to restore the sp on return from the
+ * exception so handlers can not update it.
+ */
+ add x1, sp, #16 * 17
+ stp x30, x1, [sp, #16 * 15] /* x30, SP */
+
+ mrs x1, elr_el1
+ mrs x2, spsr_el1
+ stp x1, x2, [sp, #16 * 16] /* PC, PSTATE */
+.endm
+
+.macro restore_registers
+ ldp x1, x2, [sp, #16 * 16] /* PC, PSTATE */
+ msr elr_el1, x1
+ msr spsr_el1, x2
+
+ /* sp is not restored */
+ ldp x30, xzr, [sp, #16 * 15] /* x30, SP */
+
+ ldp x28, x29, [sp, #16 * 14]
+ ldp x26, x27, [sp, #16 * 13]
+ ldp x24, x25, [sp, #16 * 12]
+ ldp x22, x23, [sp, #16 * 11]
+ ldp x20, x21, [sp, #16 * 10]
+ ldp x18, x19, [sp, #16 * 9]
+ ldp x16, x17, [sp, #16 * 8]
+ ldp x14, x15, [sp, #16 * 7]
+ ldp x12, x13, [sp, #16 * 6]
+ ldp x10, x11, [sp, #16 * 5]
+ ldp x8, x9, [sp, #16 * 4]
+ ldp x6, x7, [sp, #16 * 3]
+ ldp x4, x5, [sp, #16 * 2]
+ ldp x2, x3, [sp, #16 * 1]
+ ldp x0, x1, [sp, #16 * 0]
+
+ add sp, sp, #16 * 17
+
+ eret
+.endm
+
+.pushsection ".entry.text", "ax"
+.balign 0x800
+.global vectors
+vectors:
+.popsection
+
+.set vector, 0
+
+/*
+ * Build an exception handler for vector and append a jump to it into
+ * vectors (while making sure that it's 0x80 aligned).
+ */
+.macro HANDLER, label
+handler_\label:
+ save_registers
+ mov x0, sp
+ mov x1, #vector
+ bl route_exception
+ restore_registers
+
+.pushsection ".entry.text", "ax"
+.balign 0x80
+ b handler_\label
+.popsection
+
+.set vector, vector + 1
+.endm
+
+.macro HANDLER_INVALID
+.pushsection ".entry.text", "ax"
+.balign 0x80
+/* This will abort so no need to save and restore registers. */
+ mov x0, #vector
+ mov x1, #0 /* ec */
+ mov x2, #0 /* valid_ec */
+ b kvm_exit_unexpected_exception
+.popsection
+
+.set vector, vector + 1
+.endm
+
+/*
+ * Caution: be sure to not add anything between the declaration of vectors
+ * above and these macro calls that will build the vectors table below it.
+ */
+ HANDLER_INVALID // Synchronous EL1t
+ HANDLER_INVALID // IRQ EL1t
+ HANDLER_INVALID // FIQ EL1t
+ HANDLER_INVALID // Error EL1t
+
+ HANDLER el1h_sync // Synchronous EL1h
+ HANDLER el1h_irq // IRQ EL1h
+ HANDLER el1h_fiq // FIQ EL1h
+ HANDLER el1h_error // Error EL1h
+
+ HANDLER el0_sync_64 // Synchronous 64-bit EL0
+ HANDLER el0_irq_64 // IRQ 64-bit EL0
+ HANDLER el0_fiq_64 // FIQ 64-bit EL0
+ HANDLER el0_error_64 // Error 64-bit EL0
+
+ HANDLER el0_sync_32 // Synchronous 32-bit EL0
+ HANDLER el0_irq_32 // IRQ 32-bit EL0
+ HANDLER el0_fiq_32 // FIQ 32-bit EL0
+ HANDLER el0_error_32 // Error 32-bit EL0
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
index cee92d477dc0..9f49f6caafe5 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -6,14 +6,16 @@
*/
#include <linux/compiler.h>
+#include <assert.h>
#include "kvm_util.h"
#include "../kvm_util_internal.h"
#include "processor.h"
-#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
#define DEFAULT_ARM64_GUEST_STACK_VADDR_MIN 0xac0000
+static vm_vaddr_t exception_handlers;
+
static uint64_t page_align(struct kvm_vm *vm, uint64_t v)
{
return (v + vm->page_size) & ~(vm->page_size - 1);
@@ -72,19 +74,19 @@ static uint64_t __maybe_unused ptrs_per_pte(struct kvm_vm *vm)
return 1 << (vm->page_shift - 3);
}
-void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot)
+void virt_pgd_alloc(struct kvm_vm *vm)
{
if (!vm->pgd_created) {
vm_paddr_t paddr = vm_phy_pages_alloc(vm,
page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
vm->pgd = paddr;
vm->pgd_created = true;
}
}
-void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- uint32_t pgd_memslot, uint64_t flags)
+static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+ uint64_t flags)
{
uint8_t attr_idx = flags & 7;
uint64_t *ptep;
@@ -104,25 +106,19 @@ void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
paddr, vm->max_gfn, vm->page_size);
ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, vaddr) * 8;
- if (!*ptep) {
- *ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
- *ptep |= 3;
- }
+ if (!*ptep)
+ *ptep = vm_alloc_page_table(vm) | 3;
switch (vm->pgtable_levels) {
case 4:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, vaddr) * 8;
- if (!*ptep) {
- *ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
- *ptep |= 3;
- }
+ if (!*ptep)
+ *ptep = vm_alloc_page_table(vm) | 3;
/* fall through */
case 3:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, vaddr) * 8;
- if (!*ptep) {
- *ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
- *ptep |= 3;
- }
+ if (!*ptep)
+ *ptep = vm_alloc_page_table(vm) | 3;
/* fall through */
case 2:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, vaddr) * 8;
@@ -135,12 +131,11 @@ void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
*ptep |= (attr_idx << 2) | (1 << 10) /* Access Flag */;
}
-void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- uint32_t pgd_memslot)
+void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
{
uint64_t attr_idx = 4; /* NORMAL (See DEFAULT_MAIR_EL1) */
- _virt_pg_map(vm, vaddr, paddr, pgd_memslot, attr_idx);
+ _virt_pg_map(vm, vaddr, paddr, attr_idx);
}
vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
@@ -302,7 +297,7 @@ void aarch64_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid,
DEFAULT_STACK_PGS * vm->page_size :
vm->page_size;
uint64_t stack_vaddr = vm_vaddr_alloc(vm, stack_size,
- DEFAULT_ARM64_GUEST_STACK_VADDR_MIN, 0, 0);
+ DEFAULT_ARM64_GUEST_STACK_VADDR_MIN);
vm_vcpu_add(vm, vcpuid);
aarch64_vcpu_setup(vm, vcpuid, init);
@@ -334,6 +329,100 @@ void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
va_end(ap);
}
+void kvm_exit_unexpected_exception(int vector, uint64_t ec, bool valid_ec)
+{
+ ucall(UCALL_UNHANDLED, 3, vector, ec, valid_ec);
+ while (1)
+ ;
+}
+
void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid)
{
+ struct ucall uc;
+
+ if (get_ucall(vm, vcpuid, &uc) != UCALL_UNHANDLED)
+ return;
+
+ if (uc.args[2]) /* valid_ec */ {
+ assert(VECTOR_IS_SYNC(uc.args[0]));
+ TEST_FAIL("Unexpected exception (vector:0x%lx, ec:0x%lx)",
+ uc.args[0], uc.args[1]);
+ } else {
+ assert(!VECTOR_IS_SYNC(uc.args[0]));
+ TEST_FAIL("Unexpected exception (vector:0x%lx)",
+ uc.args[0]);
+ }
+}
+
+struct handlers {
+ handler_fn exception_handlers[VECTOR_NUM][ESR_EC_NUM];
+};
+
+void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid)
+{
+ extern char vectors;
+
+ set_reg(vm, vcpuid, ARM64_SYS_REG(VBAR_EL1), (uint64_t)&vectors);
+}
+
+void route_exception(struct ex_regs *regs, int vector)
+{
+ struct handlers *handlers = (struct handlers *)exception_handlers;
+ bool valid_ec;
+ int ec = 0;
+
+ switch (vector) {
+ case VECTOR_SYNC_CURRENT:
+ case VECTOR_SYNC_LOWER_64:
+ ec = (read_sysreg(esr_el1) >> ESR_EC_SHIFT) & ESR_EC_MASK;
+ valid_ec = true;
+ break;
+ case VECTOR_IRQ_CURRENT:
+ case VECTOR_IRQ_LOWER_64:
+ case VECTOR_FIQ_CURRENT:
+ case VECTOR_FIQ_LOWER_64:
+ case VECTOR_ERROR_CURRENT:
+ case VECTOR_ERROR_LOWER_64:
+ ec = 0;
+ valid_ec = false;
+ break;
+ default:
+ valid_ec = false;
+ goto unexpected_exception;
+ }
+
+ if (handlers && handlers->exception_handlers[vector][ec])
+ return handlers->exception_handlers[vector][ec](regs);
+
+unexpected_exception:
+ kvm_exit_unexpected_exception(vector, ec, valid_ec);
+}
+
+void vm_init_descriptor_tables(struct kvm_vm *vm)
+{
+ vm->handlers = vm_vaddr_alloc(vm, sizeof(struct handlers),
+ vm->page_size, 0, 0);
+
+ *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
+}
+
+void vm_install_sync_handler(struct kvm_vm *vm, int vector, int ec,
+ void (*handler)(struct ex_regs *))
+{
+ struct handlers *handlers = addr_gva2hva(vm, vm->handlers);
+
+ assert(VECTOR_IS_SYNC(vector));
+ assert(vector < VECTOR_NUM);
+ assert(ec < ESR_EC_NUM);
+ handlers->exception_handlers[vector][ec] = handler;
+}
+
+void vm_install_exception_handler(struct kvm_vm *vm, int vector,
+ void (*handler)(struct ex_regs *))
+{
+ struct handlers *handlers = addr_gva2hva(vm, vm->handlers);
+
+ assert(!VECTOR_IS_SYNC(vector));
+ assert(vector < VECTOR_NUM);
+ handlers->exception_handlers[vector][0] = handler;
}
diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
index 2f37b90ee1a9..e0b0164e9af8 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
@@ -14,7 +14,7 @@ static bool ucall_mmio_init(struct kvm_vm *vm, vm_paddr_t gpa)
if (kvm_userspace_memory_region_find(vm, gpa, gpa + 1))
return false;
- virt_pg_map(vm, gpa, gpa, 0);
+ virt_pg_map(vm, gpa, gpa);
ucall_exit_mmio_addr = (vm_vaddr_t *)gpa;
sync_global_to_guest(vm, ucall_exit_mmio_addr);
diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c
index bc75a91e00a6..eac44f5d0db0 100644
--- a/tools/testing/selftests/kvm/lib/elf.c
+++ b/tools/testing/selftests/kvm/lib/elf.c
@@ -111,8 +111,7 @@ static void elfhdr_get(const char *filename, Elf64_Ehdr *hdrp)
* by the image and it needs to have sufficient available physical pages, to
* back the virtual pages used to load the image.
*/
-void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename,
- uint32_t data_memslot, uint32_t pgd_memslot)
+void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename)
{
off_t offset, offset_rv;
Elf64_Ehdr hdr;
@@ -164,8 +163,7 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename,
seg_vend |= vm->page_size - 1;
size_t seg_size = seg_vend - seg_vstart + 1;
- vm_vaddr_t vaddr = vm_vaddr_alloc(vm, seg_size, seg_vstart,
- data_memslot, pgd_memslot);
+ vm_vaddr_t vaddr = vm_vaddr_alloc(vm, seg_size, seg_vstart);
TEST_ASSERT(vaddr == seg_vstart, "Unable to allocate "
"virtual memory for segment at requested min addr,\n"
" segment idx: %u\n"
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index fc83f6c5902d..5b56b57b3c20 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -32,6 +32,34 @@ static void *align(void *x, size_t size)
}
/*
+ * Open KVM_DEV_PATH if available, otherwise exit the entire program.
+ *
+ * Input Args:
+ * flags - The flags to pass when opening KVM_DEV_PATH.
+ *
+ * Return:
+ * The opened file descriptor of /dev/kvm.
+ */
+static int _open_kvm_dev_path_or_exit(int flags)
+{
+ int fd;
+
+ fd = open(KVM_DEV_PATH, flags);
+ if (fd < 0) {
+ print_skip("%s not available, is KVM loaded? (errno: %d)",
+ KVM_DEV_PATH, errno);
+ exit(KSFT_SKIP);
+ }
+
+ return fd;
+}
+
+int open_kvm_dev_path_or_exit(void)
+{
+ return _open_kvm_dev_path_or_exit(O_RDONLY);
+}
+
+/*
* Capability
*
* Input Args:
@@ -52,12 +80,9 @@ int kvm_check_cap(long cap)
int ret;
int kvm_fd;
- kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
- if (kvm_fd < 0)
- exit(KSFT_SKIP);
-
+ kvm_fd = open_kvm_dev_path_or_exit();
ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap);
- TEST_ASSERT(ret != -1, "KVM_CHECK_EXTENSION IOCTL failed,\n"
+ TEST_ASSERT(ret >= 0, "KVM_CHECK_EXTENSION IOCTL failed,\n"
" rc: %i errno: %i", ret, errno);
close(kvm_fd);
@@ -128,9 +153,7 @@ void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size)
static void vm_open(struct kvm_vm *vm, int perm)
{
- vm->kvm_fd = open(KVM_DEV_PATH, perm);
- if (vm->kvm_fd < 0)
- exit(KSFT_SKIP);
+ vm->kvm_fd = _open_kvm_dev_path_or_exit(perm);
if (!kvm_check_cap(KVM_CAP_IMMEDIATE_EXIT)) {
print_skip("immediate_exit not available");
@@ -152,6 +175,7 @@ const char *vm_guest_mode_string(uint32_t i)
[VM_MODE_P40V48_4K] = "PA-bits:40, VA-bits:48, 4K pages",
[VM_MODE_P40V48_64K] = "PA-bits:40, VA-bits:48, 64K pages",
[VM_MODE_PXXV48_4K] = "PA-bits:ANY, VA-bits:48, 4K pages",
+ [VM_MODE_P47V64_4K] = "PA-bits:47, VA-bits:64, 4K pages",
};
_Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES,
"Missing new mode strings?");
@@ -169,6 +193,7 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = {
{ 40, 48, 0x1000, 12 },
{ 40, 48, 0x10000, 16 },
{ 0, 0, 0x1000, 12 },
+ { 47, 64, 0x1000, 12 },
};
_Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES,
"Missing new mode params?");
@@ -203,7 +228,9 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
TEST_ASSERT(vm != NULL, "Insufficient Memory");
INIT_LIST_HEAD(&vm->vcpus);
- INIT_LIST_HEAD(&vm->userspace_mem_regions);
+ vm->regions.gpa_tree = RB_ROOT;
+ vm->regions.hva_tree = RB_ROOT;
+ hash_init(vm->regions.slot_hash);
vm->mode = mode;
vm->type = 0;
@@ -252,6 +279,9 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
TEST_FAIL("VM_MODE_PXXV48_4K not supported on non-x86 platforms");
#endif
break;
+ case VM_MODE_P47V64_4K:
+ vm->pgtable_levels = 5;
+ break;
default:
TEST_FAIL("Unknown guest mode, mode: 0x%x", mode);
}
@@ -283,21 +313,50 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
return vm;
}
+/*
+ * VM Create with customized parameters
+ *
+ * Input Args:
+ * mode - VM Mode (e.g. VM_MODE_P52V48_4K)
+ * nr_vcpus - VCPU count
+ * slot0_mem_pages - Slot0 physical memory size
+ * extra_mem_pages - Non-slot0 physical memory total size
+ * num_percpu_pages - Per-cpu physical memory pages
+ * guest_code - Guest entry point
+ * vcpuids - VCPU IDs
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Pointer to opaque structure that describes the created VM.
+ *
+ * Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K),
+ * with customized slot0 memory size, at least 512 pages currently.
+ * extra_mem_pages is only used to calculate the maximum page table size,
+ * no real memory allocation for non-slot0 memory in this function.
+ */
struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
- uint64_t extra_mem_pages, uint32_t num_percpu_pages,
- void *guest_code, uint32_t vcpuids[])
+ uint64_t slot0_mem_pages, uint64_t extra_mem_pages,
+ uint32_t num_percpu_pages, void *guest_code,
+ uint32_t vcpuids[])
{
+ uint64_t vcpu_pages, extra_pg_pages, pages;
+ struct kvm_vm *vm;
+ int i;
+
+ /* Force slot0 memory size not small than DEFAULT_GUEST_PHY_PAGES */
+ if (slot0_mem_pages < DEFAULT_GUEST_PHY_PAGES)
+ slot0_mem_pages = DEFAULT_GUEST_PHY_PAGES;
+
/* The maximum page table size for a memory region will be when the
* smallest pages are used. Considering each page contains x page
* table descriptors, the total extra size for page tables (for extra
* N pages) will be: N/x+N/x^2+N/x^3+... which is definitely smaller
* than N/x*2.
*/
- uint64_t vcpu_pages = (DEFAULT_STACK_PGS + num_percpu_pages) * nr_vcpus;
- uint64_t extra_pg_pages = (extra_mem_pages + vcpu_pages) / PTES_PER_MIN_PAGE * 2;
- uint64_t pages = DEFAULT_GUEST_PHY_PAGES + vcpu_pages + extra_pg_pages;
- struct kvm_vm *vm;
- int i;
+ vcpu_pages = (DEFAULT_STACK_PGS + num_percpu_pages) * nr_vcpus;
+ extra_pg_pages = (slot0_mem_pages + extra_mem_pages + vcpu_pages) / PTES_PER_MIN_PAGE * 2;
+ pages = slot0_mem_pages + vcpu_pages + extra_pg_pages;
TEST_ASSERT(nr_vcpus <= kvm_check_cap(KVM_CAP_MAX_VCPUS),
"nr_vcpus = %d too large for host, max-vcpus = %d",
@@ -306,7 +365,7 @@ struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
pages = vm_adjust_num_guest_pages(mode, pages);
vm = vm_create(mode, pages, O_RDWR);
- kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+ kvm_vm_elf_load(vm, program_invocation_name);
#ifdef __x86_64__
vm_create_irqchip(vm);
@@ -316,10 +375,6 @@ struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
uint32_t vcpuid = vcpuids ? vcpuids[i] : i;
vm_vcpu_add_default(vm, vcpuid, guest_code);
-
-#ifdef __x86_64__
- vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid());
-#endif
}
return vm;
@@ -329,8 +384,8 @@ struct kvm_vm *vm_create_default_with_vcpus(uint32_t nr_vcpus, uint64_t extra_me
uint32_t num_percpu_pages, void *guest_code,
uint32_t vcpuids[])
{
- return vm_create_with_vcpus(VM_MODE_DEFAULT, nr_vcpus, extra_mem_pages,
- num_percpu_pages, guest_code, vcpuids);
+ return vm_create_with_vcpus(VM_MODE_DEFAULT, nr_vcpus, DEFAULT_GUEST_PHY_PAGES,
+ extra_mem_pages, num_percpu_pages, guest_code, vcpuids);
}
struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
@@ -355,13 +410,14 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
*/
void kvm_vm_restart(struct kvm_vm *vmp, int perm)
{
+ int ctr;
struct userspace_mem_region *region;
vm_open(vmp, perm);
if (vmp->has_irqchip)
vm_create_irqchip(vmp);
- list_for_each_entry(region, &vmp->userspace_mem_regions, list) {
+ hash_for_each(vmp->regions.slot_hash, ctr, region, slot_node) {
int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
" rc: %i errno: %i\n"
@@ -424,14 +480,21 @@ uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm)
static struct userspace_mem_region *
userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end)
{
- struct userspace_mem_region *region;
+ struct rb_node *node;
- list_for_each_entry(region, &vm->userspace_mem_regions, list) {
+ for (node = vm->regions.gpa_tree.rb_node; node; ) {
+ struct userspace_mem_region *region =
+ container_of(node, struct userspace_mem_region, gpa_node);
uint64_t existing_start = region->region.guest_phys_addr;
uint64_t existing_end = region->region.guest_phys_addr
+ region->region.memory_size - 1;
if (start <= existing_end && end >= existing_start)
return region;
+
+ if (start < existing_start)
+ node = node->rb_left;
+ else
+ node = node->rb_right;
}
return NULL;
@@ -546,11 +609,16 @@ void kvm_vm_release(struct kvm_vm *vmp)
}
static void __vm_mem_region_delete(struct kvm_vm *vm,
- struct userspace_mem_region *region)
+ struct userspace_mem_region *region,
+ bool unlink)
{
int ret;
- list_del(&region->list);
+ if (unlink) {
+ rb_erase(&region->gpa_node, &vm->regions.gpa_tree);
+ rb_erase(&region->hva_node, &vm->regions.hva_tree);
+ hash_del(&region->slot_node);
+ }
region->region.memory_size = 0;
ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
@@ -569,14 +637,16 @@ static void __vm_mem_region_delete(struct kvm_vm *vm,
*/
void kvm_vm_free(struct kvm_vm *vmp)
{
- struct userspace_mem_region *region, *tmp;
+ int ctr;
+ struct hlist_node *node;
+ struct userspace_mem_region *region;
if (vmp == NULL)
return;
/* Free userspace_mem_regions. */
- list_for_each_entry_safe(region, tmp, &vmp->userspace_mem_regions, list)
- __vm_mem_region_delete(vmp, region);
+ hash_for_each_safe(vmp->regions.slot_hash, ctr, node, region, slot_node)
+ __vm_mem_region_delete(vmp, region, false);
/* Free sparsebit arrays. */
sparsebit_free(&vmp->vpages_valid);
@@ -658,13 +728,64 @@ int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, vm_vaddr_t gva, size_t len)
return 0;
}
+static void vm_userspace_mem_region_gpa_insert(struct rb_root *gpa_tree,
+ struct userspace_mem_region *region)
+{
+ struct rb_node **cur, *parent;
+
+ for (cur = &gpa_tree->rb_node, parent = NULL; *cur; ) {
+ struct userspace_mem_region *cregion;
+
+ cregion = container_of(*cur, typeof(*cregion), gpa_node);
+ parent = *cur;
+ if (region->region.guest_phys_addr <
+ cregion->region.guest_phys_addr)
+ cur = &(*cur)->rb_left;
+ else {
+ TEST_ASSERT(region->region.guest_phys_addr !=
+ cregion->region.guest_phys_addr,
+ "Duplicate GPA in region tree");
+
+ cur = &(*cur)->rb_right;
+ }
+ }
+
+ rb_link_node(&region->gpa_node, parent, cur);
+ rb_insert_color(&region->gpa_node, gpa_tree);
+}
+
+static void vm_userspace_mem_region_hva_insert(struct rb_root *hva_tree,
+ struct userspace_mem_region *region)
+{
+ struct rb_node **cur, *parent;
+
+ for (cur = &hva_tree->rb_node, parent = NULL; *cur; ) {
+ struct userspace_mem_region *cregion;
+
+ cregion = container_of(*cur, typeof(*cregion), hva_node);
+ parent = *cur;
+ if (region->host_mem < cregion->host_mem)
+ cur = &(*cur)->rb_left;
+ else {
+ TEST_ASSERT(region->host_mem !=
+ cregion->host_mem,
+ "Duplicate HVA in region tree");
+
+ cur = &(*cur)->rb_right;
+ }
+ }
+
+ rb_link_node(&region->hva_node, parent, cur);
+ rb_insert_color(&region->hva_node, hva_tree);
+}
+
/*
* VM Userspace Memory Region Add
*
* Input Args:
* vm - Virtual Machine
- * backing_src - Storage source for this region.
- * NULL to use anonymous memory.
+ * src_type - Storage source for this region.
+ * NULL to use anonymous memory.
* guest_paddr - Starting guest physical address
* slot - KVM region slot
* npages - Number of physical pages
@@ -722,7 +843,8 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
(uint64_t) region->region.memory_size);
/* Confirm no region with the requested slot already exists. */
- list_for_each_entry(region, &vm->userspace_mem_regions, list) {
+ hash_for_each_possible(vm->regions.slot_hash, region, slot_node,
+ slot) {
if (region->region.slot != slot)
continue;
@@ -755,11 +877,30 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
if (alignment > 1)
region->mmap_size += alignment;
+ region->fd = -1;
+ if (backing_src_is_shared(src_type)) {
+ int memfd_flags = MFD_CLOEXEC;
+
+ if (src_type == VM_MEM_SRC_SHARED_HUGETLB)
+ memfd_flags |= MFD_HUGETLB;
+
+ region->fd = memfd_create("kvm_selftest", memfd_flags);
+ TEST_ASSERT(region->fd != -1,
+ "memfd_create failed, errno: %i", errno);
+
+ ret = ftruncate(region->fd, region->mmap_size);
+ TEST_ASSERT(ret == 0, "ftruncate failed, errno: %i", errno);
+
+ ret = fallocate(region->fd,
+ FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0,
+ region->mmap_size);
+ TEST_ASSERT(ret == 0, "fallocate failed, errno: %i", errno);
+ }
+
region->mmap_start = mmap(NULL, region->mmap_size,
PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS
- | vm_mem_backing_src_alias(src_type)->flag,
- -1, 0);
+ vm_mem_backing_src_alias(src_type)->flag,
+ region->fd, 0);
TEST_ASSERT(region->mmap_start != MAP_FAILED,
"test_malloc failed, mmap_start: %p errno: %i",
region->mmap_start, errno);
@@ -793,8 +934,23 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
ret, errno, slot, flags,
guest_paddr, (uint64_t) region->region.memory_size);
- /* Add to linked-list of memory regions. */
- list_add(&region->list, &vm->userspace_mem_regions);
+ /* Add to quick lookup data structures */
+ vm_userspace_mem_region_gpa_insert(&vm->regions.gpa_tree, region);
+ vm_userspace_mem_region_hva_insert(&vm->regions.hva_tree, region);
+ hash_add(vm->regions.slot_hash, &region->slot_node, slot);
+
+ /* If shared memory, create an alias. */
+ if (region->fd >= 0) {
+ region->mmap_alias = mmap(NULL, region->mmap_size,
+ PROT_READ | PROT_WRITE,
+ vm_mem_backing_src_alias(src_type)->flag,
+ region->fd, 0);
+ TEST_ASSERT(region->mmap_alias != MAP_FAILED,
+ "mmap of alias failed, errno: %i", errno);
+
+ /* Align host alias address */
+ region->host_alias = align(region->mmap_alias, alignment);
+ }
}
/*
@@ -817,10 +973,10 @@ memslot2region(struct kvm_vm *vm, uint32_t memslot)
{
struct userspace_mem_region *region;
- list_for_each_entry(region, &vm->userspace_mem_regions, list) {
+ hash_for_each_possible(vm->regions.slot_hash, region, slot_node,
+ memslot)
if (region->region.slot == memslot)
return region;
- }
fprintf(stderr, "No mem region with the requested slot found,\n"
" requested slot: %u\n", memslot);
@@ -905,7 +1061,7 @@ void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa)
*/
void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot)
{
- __vm_mem_region_delete(vm, memslot2region(vm, slot));
+ __vm_mem_region_delete(vm, memslot2region(vm, slot), true);
}
/*
@@ -925,9 +1081,7 @@ static int vcpu_mmap_sz(void)
{
int dev_fd, ret;
- dev_fd = open(KVM_DEV_PATH, O_RDONLY);
- if (dev_fd < 0)
- exit(KSFT_SKIP);
+ dev_fd = open_kvm_dev_path_or_exit();
ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL);
TEST_ASSERT(ret >= sizeof(struct kvm_run),
@@ -1093,12 +1247,13 @@ va_found:
* a unique set of pages, with the minimum real allocation being at least
* a page.
*/
-vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
- uint32_t data_memslot, uint32_t pgd_memslot)
+vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min)
{
uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0);
- virt_pgd_alloc(vm, pgd_memslot);
+ virt_pgd_alloc(vm);
+ vm_paddr_t paddr = vm_phy_pages_alloc(vm, pages,
+ KVM_UTIL_MIN_PFN * vm->page_size, 0);
/*
* Find an unused range of virtual page addresses of at least
@@ -1108,13 +1263,9 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
/* Map the virtual pages. */
for (vm_vaddr_t vaddr = vaddr_start; pages > 0;
- pages--, vaddr += vm->page_size) {
- vm_paddr_t paddr;
-
- paddr = vm_phy_page_alloc(vm,
- KVM_UTIL_MIN_PFN * vm->page_size, data_memslot);
+ pages--, vaddr += vm->page_size, paddr += vm->page_size) {
- virt_pg_map(vm, vaddr, paddr, pgd_memslot);
+ virt_pg_map(vm, vaddr, paddr);
sparsebit_set(vm->vpages_mapped,
vaddr >> vm->page_shift);
@@ -1124,6 +1275,44 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
}
/*
+ * VM Virtual Address Allocate Pages
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Starting guest virtual address
+ *
+ * Allocates at least N system pages worth of bytes within the virtual address
+ * space of the vm.
+ */
+vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages)
+{
+ return vm_vaddr_alloc(vm, nr_pages * getpagesize(), KVM_UTIL_MIN_VADDR);
+}
+
+/*
+ * VM Virtual Address Allocate Page
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Starting guest virtual address
+ *
+ * Allocates at least one system page worth of bytes within the virtual address
+ * space of the vm.
+ */
+vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm)
+{
+ return vm_vaddr_alloc_pages(vm, 1);
+}
+
+/*
* Map a range of VM virtual address to the VM's physical address
*
* Input Args:
@@ -1141,7 +1330,7 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
* @npages starting at @vaddr to the page range starting at @paddr.
*/
void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- unsigned int npages, uint32_t pgd_memslot)
+ unsigned int npages)
{
size_t page_size = vm->page_size;
size_t size = npages * page_size;
@@ -1150,7 +1339,7 @@ void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
while (npages--) {
- virt_pg_map(vm, vaddr, paddr, pgd_memslot);
+ virt_pg_map(vm, vaddr, paddr);
vaddr += page_size;
paddr += page_size;
}
@@ -1177,16 +1366,14 @@ void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
{
struct userspace_mem_region *region;
- list_for_each_entry(region, &vm->userspace_mem_regions, list) {
- if ((gpa >= region->region.guest_phys_addr)
- && (gpa <= (region->region.guest_phys_addr
- + region->region.memory_size - 1)))
- return (void *) ((uintptr_t) region->host_mem
- + (gpa - region->region.guest_phys_addr));
+ region = userspace_mem_region_find(vm, gpa, gpa);
+ if (!region) {
+ TEST_FAIL("No vm physical memory at 0x%lx", gpa);
+ return NULL;
}
- TEST_FAIL("No vm physical memory at 0x%lx", gpa);
- return NULL;
+ return (void *)((uintptr_t)region->host_mem
+ + (gpa - region->region.guest_phys_addr));
}
/*
@@ -1208,15 +1395,22 @@ void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
*/
vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva)
{
- struct userspace_mem_region *region;
+ struct rb_node *node;
+
+ for (node = vm->regions.hva_tree.rb_node; node; ) {
+ struct userspace_mem_region *region =
+ container_of(node, struct userspace_mem_region, hva_node);
- list_for_each_entry(region, &vm->userspace_mem_regions, list) {
- if ((hva >= region->host_mem)
- && (hva <= (region->host_mem
- + region->region.memory_size - 1)))
- return (vm_paddr_t) ((uintptr_t)
- region->region.guest_phys_addr
- + (hva - (uintptr_t) region->host_mem));
+ if (hva >= region->host_mem) {
+ if (hva <= (region->host_mem
+ + region->region.memory_size - 1))
+ return (vm_paddr_t)((uintptr_t)
+ region->region.guest_phys_addr
+ + (hva - (uintptr_t)region->host_mem));
+
+ node = node->rb_right;
+ } else
+ node = node->rb_left;
}
TEST_FAIL("No mapping to a guest physical address, hva: %p", hva);
@@ -1224,6 +1418,42 @@ vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva)
}
/*
+ * Address VM physical to Host Virtual *alias*.
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * gpa - VM physical address
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Equivalent address within the host virtual *alias* area, or NULL
+ * (without failing the test) if the guest memory is not shared (so
+ * no alias exists).
+ *
+ * When vm_create() and related functions are called with a shared memory
+ * src_type, we also create a writable, shared alias mapping of the
+ * underlying guest memory. This allows the host to manipulate guest memory
+ * without mapping that memory in the guest's address space. And, for
+ * userfaultfd-based demand paging, we can do so without triggering userfaults.
+ */
+void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa)
+{
+ struct userspace_mem_region *region;
+ uintptr_t offset;
+
+ region = userspace_mem_region_find(vm, gpa, gpa);
+ if (!region)
+ return NULL;
+
+ if (!region->host_alias)
+ return NULL;
+
+ offset = gpa - region->region.guest_phys_addr;
+ return (void *) ((uintptr_t) region->host_alias + offset);
+}
+
+/*
* VM Create IRQ Chip
*
* Input Args:
@@ -1822,6 +2052,7 @@ int kvm_device_access(int dev_fd, uint32_t group, uint64_t attr,
*/
void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
{
+ int ctr;
struct userspace_mem_region *region;
struct vcpu *vcpu;
@@ -1829,7 +2060,7 @@ void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd);
fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size);
fprintf(stream, "%*sMem Regions:\n", indent, "");
- list_for_each_entry(region, &vm->userspace_mem_regions, list) {
+ hash_for_each(vm->regions.slot_hash, ctr, region, slot_node) {
fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx "
"host_virt: %p\n", indent + 2, "",
(uint64_t) region->region.guest_phys_addr,
@@ -1978,6 +2209,14 @@ vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
return vm_phy_pages_alloc(vm, 1, paddr_min, memslot);
}
+/* Arbitrary minimum physical address used for virtual translation tables. */
+#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
+
+vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm)
+{
+ return vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
+}
+
/*
* Address Guest Virtual to Host Virtual
*
@@ -2015,10 +2254,7 @@ bool vm_is_unrestricted_guest(struct kvm_vm *vm)
if (vm == NULL) {
/* Ensure that the KVM vendor-specific module is loaded. */
- f = fopen(KVM_DEV_PATH, "r");
- TEST_ASSERT(f != NULL, "Error in opening KVM dev file: %d",
- errno);
- fclose(f);
+ close(open_kvm_dev_path_or_exit());
}
f = fopen("/sys/module/kvm_intel/parameters/unrestricted_guest", "r");
@@ -2041,7 +2277,7 @@ unsigned int vm_get_page_shift(struct kvm_vm *vm)
return vm->page_shift;
}
-unsigned int vm_get_max_gfn(struct kvm_vm *vm)
+uint64_t vm_get_max_gfn(struct kvm_vm *vm)
{
return vm->max_gfn;
}
@@ -2090,3 +2326,15 @@ unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size)
n = DIV_ROUND_UP(size, vm_guest_mode_params[mode].page_size);
return vm_adjust_num_guest_pages(mode, n);
}
+
+int vm_get_stats_fd(struct kvm_vm *vm)
+{
+ return ioctl(vm->fd, KVM_GET_STATS_FD, NULL);
+}
+
+int vcpu_get_stats_fd(struct kvm_vm *vm, uint32_t vcpuid)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+
+ return ioctl(vcpu->fd, KVM_GET_STATS_FD, NULL);
+}
diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
index 91ce1b5d480b..a03febc24ba6 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h
+++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
@@ -8,6 +8,9 @@
#ifndef SELFTEST_KVM_UTIL_INTERNAL_H
#define SELFTEST_KVM_UTIL_INTERNAL_H
+#include "linux/hashtable.h"
+#include "linux/rbtree.h"
+
#include "sparsebit.h"
struct userspace_mem_region {
@@ -16,9 +19,13 @@ struct userspace_mem_region {
int fd;
off_t offset;
void *host_mem;
+ void *host_alias;
void *mmap_start;
+ void *mmap_alias;
size_t mmap_size;
- struct list_head list;
+ struct rb_node gpa_node;
+ struct rb_node hva_node;
+ struct hlist_node slot_node;
};
struct vcpu {
@@ -31,6 +38,12 @@ struct vcpu {
uint32_t dirty_gfns_count;
};
+struct userspace_mem_regions {
+ struct rb_root gpa_tree;
+ struct rb_root hva_tree;
+ DECLARE_HASHTABLE(slot_hash, 9);
+};
+
struct kvm_vm {
int mode;
unsigned long type;
@@ -43,7 +56,7 @@ struct kvm_vm {
unsigned int va_bits;
uint64_t max_gfn;
struct list_head vcpus;
- struct list_head userspace_mem_regions;
+ struct userspace_mem_regions regions;
struct sparsebit *vpages_valid;
struct sparsebit *vpages_mapped;
bool has_irqchip;
diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c
index 81490b9b4e32..b488f4aefea8 100644
--- a/tools/testing/selftests/kvm/lib/perf_test_util.c
+++ b/tools/testing/selftests/kvm/lib/perf_test_util.c
@@ -2,6 +2,7 @@
/*
* Copyright (C) 2020, Google LLC.
*/
+#include <inttypes.h>
#include "kvm_util.h"
#include "perf_test_util.h"
@@ -68,7 +69,7 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
TEST_ASSERT(vcpu_memory_bytes % perf_test_args.guest_page_size == 0,
"Guest memory size is not guest page size aligned.");
- vm = vm_create_with_vcpus(mode, vcpus,
+ vm = vm_create_with_vcpus(mode, vcpus, DEFAULT_GUEST_PHY_PAGES,
(vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size,
0, guest_code, NULL);
@@ -80,7 +81,8 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
*/
TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm),
"Requested more guest memory than address space allows.\n"
- " guest pages: %lx max gfn: %x vcpus: %d wss: %lx]\n",
+ " guest pages: %" PRIx64 " max gfn: %" PRIx64
+ " vcpus: %d wss: %" PRIx64 "]\n",
guest_num_pages, vm_get_max_gfn(vm), vcpus,
vcpu_memory_bytes);
@@ -99,7 +101,7 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
guest_num_pages, 0);
/* Do mapping for the demand paging memory slot */
- virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0);
+ virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages);
ucall_init(vm, NULL);
diff --git a/tools/testing/selftests/kvm/lib/rbtree.c b/tools/testing/selftests/kvm/lib/rbtree.c
new file mode 100644
index 000000000000..a703f0194ea3
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/rbtree.c
@@ -0,0 +1 @@
+#include "../../../../lib/rbtree.c"
diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c
index 0152f356c099..f87c7137598e 100644
--- a/tools/testing/selftests/kvm/lib/s390x/processor.c
+++ b/tools/testing/selftests/kvm/lib/s390x/processor.c
@@ -9,11 +9,9 @@
#include "kvm_util.h"
#include "../kvm_util_internal.h"
-#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
-
#define PAGES_PER_REGION 4
-void virt_pgd_alloc(struct kvm_vm *vm, uint32_t memslot)
+void virt_pgd_alloc(struct kvm_vm *vm)
{
vm_paddr_t paddr;
@@ -24,7 +22,7 @@ void virt_pgd_alloc(struct kvm_vm *vm, uint32_t memslot)
return;
paddr = vm_phy_pages_alloc(vm, PAGES_PER_REGION,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR, memslot);
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
memset(addr_gpa2hva(vm, paddr), 0xff, PAGES_PER_REGION * vm->page_size);
vm->pgd = paddr;
@@ -36,12 +34,12 @@ void virt_pgd_alloc(struct kvm_vm *vm, uint32_t memslot)
* a page table (ri == 4). Returns a suitable region/segment table entry
* which points to the freshly allocated pages.
*/
-static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri, uint32_t memslot)
+static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri)
{
uint64_t taddr;
taddr = vm_phy_pages_alloc(vm, ri < 4 ? PAGES_PER_REGION : 1,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR, memslot);
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
memset(addr_gpa2hva(vm, taddr), 0xff, PAGES_PER_REGION * vm->page_size);
return (taddr & REGION_ENTRY_ORIGIN)
@@ -49,8 +47,7 @@ static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri, uint32_t memslot)
| ((ri < 4 ? (PAGES_PER_REGION - 1) : 0) & REGION_ENTRY_LENGTH);
}
-void virt_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa,
- uint32_t memslot)
+void virt_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa)
{
int ri, idx;
uint64_t *entry;
@@ -77,7 +74,7 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa,
for (ri = 1; ri <= 4; ri++) {
idx = (gva >> (64 - 11 * ri)) & 0x7ffu;
if (entry[idx] & REGION_ENTRY_INVALID)
- entry[idx] = virt_alloc_region(vm, ri, memslot);
+ entry[idx] = virt_alloc_region(vm, ri);
entry = addr_gpa2hva(vm, entry[idx] & REGION_ENTRY_ORIGIN);
}
@@ -170,7 +167,7 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
vm->page_size);
stack_vaddr = vm_vaddr_alloc(vm, stack_size,
- DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0);
+ DEFAULT_GUEST_STACK_VADDR_MIN);
vm_vcpu_add(vm, vcpuid);
diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c
index 63d2bc7d757b..af1031fed97f 100644
--- a/tools/testing/selftests/kvm/lib/test_util.c
+++ b/tools/testing/selftests/kvm/lib/test_util.c
@@ -166,72 +166,89 @@ size_t get_def_hugetlb_pagesz(void)
return 0;
}
+#define ANON_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS)
+#define ANON_HUGE_FLAGS (ANON_FLAGS | MAP_HUGETLB)
+
const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i)
{
static const struct vm_mem_backing_src_alias aliases[] = {
[VM_MEM_SRC_ANONYMOUS] = {
.name = "anonymous",
- .flag = 0,
+ .flag = ANON_FLAGS,
},
[VM_MEM_SRC_ANONYMOUS_THP] = {
.name = "anonymous_thp",
- .flag = 0,
+ .flag = ANON_FLAGS,
},
[VM_MEM_SRC_ANONYMOUS_HUGETLB] = {
.name = "anonymous_hugetlb",
- .flag = MAP_HUGETLB,
+ .flag = ANON_HUGE_FLAGS,
},
[VM_MEM_SRC_ANONYMOUS_HUGETLB_16KB] = {
.name = "anonymous_hugetlb_16kb",
- .flag = MAP_HUGETLB | MAP_HUGE_16KB,
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_16KB,
},
[VM_MEM_SRC_ANONYMOUS_HUGETLB_64KB] = {
.name = "anonymous_hugetlb_64kb",
- .flag = MAP_HUGETLB | MAP_HUGE_64KB,
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_64KB,
},
[VM_MEM_SRC_ANONYMOUS_HUGETLB_512KB] = {
.name = "anonymous_hugetlb_512kb",
- .flag = MAP_HUGETLB | MAP_HUGE_512KB,
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_512KB,
},
[VM_MEM_SRC_ANONYMOUS_HUGETLB_1MB] = {
.name = "anonymous_hugetlb_1mb",
- .flag = MAP_HUGETLB | MAP_HUGE_1MB,
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_1MB,
},
[VM_MEM_SRC_ANONYMOUS_HUGETLB_2MB] = {
.name = "anonymous_hugetlb_2mb",
- .flag = MAP_HUGETLB | MAP_HUGE_2MB,
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_2MB,
},
[VM_MEM_SRC_ANONYMOUS_HUGETLB_8MB] = {
.name = "anonymous_hugetlb_8mb",
- .flag = MAP_HUGETLB | MAP_HUGE_8MB,
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_8MB,
},
[VM_MEM_SRC_ANONYMOUS_HUGETLB_16MB] = {
.name = "anonymous_hugetlb_16mb",
- .flag = MAP_HUGETLB | MAP_HUGE_16MB,
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_16MB,
},
[VM_MEM_SRC_ANONYMOUS_HUGETLB_32MB] = {
.name = "anonymous_hugetlb_32mb",
- .flag = MAP_HUGETLB | MAP_HUGE_32MB,
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_32MB,
},
[VM_MEM_SRC_ANONYMOUS_HUGETLB_256MB] = {
.name = "anonymous_hugetlb_256mb",
- .flag = MAP_HUGETLB | MAP_HUGE_256MB,
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_256MB,
},
[VM_MEM_SRC_ANONYMOUS_HUGETLB_512MB] = {
.name = "anonymous_hugetlb_512mb",
- .flag = MAP_HUGETLB | MAP_HUGE_512MB,
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_512MB,
},
[VM_MEM_SRC_ANONYMOUS_HUGETLB_1GB] = {
.name = "anonymous_hugetlb_1gb",
- .flag = MAP_HUGETLB | MAP_HUGE_1GB,
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_1GB,
},
[VM_MEM_SRC_ANONYMOUS_HUGETLB_2GB] = {
.name = "anonymous_hugetlb_2gb",
- .flag = MAP_HUGETLB | MAP_HUGE_2GB,
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_2GB,
},
[VM_MEM_SRC_ANONYMOUS_HUGETLB_16GB] = {
.name = "anonymous_hugetlb_16gb",
- .flag = MAP_HUGETLB | MAP_HUGE_16GB,
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_16GB,
+ },
+ [VM_MEM_SRC_SHMEM] = {
+ .name = "shmem",
+ .flag = MAP_SHARED,
+ },
+ [VM_MEM_SRC_SHARED_HUGETLB] = {
+ .name = "shared_hugetlb",
+ /*
+ * No MAP_HUGETLB, we use MFD_HUGETLB instead. Since
+ * we're using "file backed" memory, we need to specify
+ * this when the FD is created, not when the area is
+ * mapped.
+ */
+ .flag = MAP_SHARED,
},
};
_Static_assert(ARRAY_SIZE(aliases) == NUM_SRC_TYPES,
@@ -250,10 +267,12 @@ size_t get_backing_src_pagesz(uint32_t i)
switch (i) {
case VM_MEM_SRC_ANONYMOUS:
+ case VM_MEM_SRC_SHMEM:
return getpagesize();
case VM_MEM_SRC_ANONYMOUS_THP:
return get_trans_hugepagesz();
case VM_MEM_SRC_ANONYMOUS_HUGETLB:
+ case VM_MEM_SRC_SHARED_HUGETLB:
return get_def_hugetlb_pagesz();
default:
return MAP_HUGE_PAGE_SIZE(flag);
diff --git a/tools/testing/selftests/kvm/lib/x86_64/apic.c b/tools/testing/selftests/kvm/lib/x86_64/apic.c
new file mode 100644
index 000000000000..7168e25c194e
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86_64/apic.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * tools/testing/selftests/kvm/lib/x86_64/processor.c
+ *
+ * Copyright (C) 2021, Google LLC.
+ */
+
+#include "apic.h"
+
+void apic_disable(void)
+{
+ wrmsr(MSR_IA32_APICBASE,
+ rdmsr(MSR_IA32_APICBASE) &
+ ~(MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD));
+}
+
+void xapic_enable(void)
+{
+ uint64_t val = rdmsr(MSR_IA32_APICBASE);
+
+ /* Per SDM: to enable xAPIC when in x2APIC must first disable APIC */
+ if (val & MSR_IA32_APICBASE_EXTD) {
+ apic_disable();
+ wrmsr(MSR_IA32_APICBASE,
+ rdmsr(MSR_IA32_APICBASE) | MSR_IA32_APICBASE_ENABLE);
+ } else if (!(val & MSR_IA32_APICBASE_ENABLE)) {
+ wrmsr(MSR_IA32_APICBASE, val | MSR_IA32_APICBASE_ENABLE);
+ }
+
+ /*
+ * Per SDM: reset value of spurious interrupt vector register has the
+ * APIC software enabled bit=0. It must be enabled in addition to the
+ * enable bit in the MSR.
+ */
+ val = xapic_read_reg(APIC_SPIV) | APIC_SPIV_APIC_ENABLED;
+ xapic_write_reg(APIC_SPIV, val);
+}
+
+void x2apic_enable(void)
+{
+ wrmsr(MSR_IA32_APICBASE, rdmsr(MSR_IA32_APICBASE) |
+ MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD);
+ x2apic_write_reg(APIC_SPIV,
+ x2apic_read_reg(APIC_SPIV) | APIC_SPIV_APIC_ENABLED);
+}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index a8906e60a108..28cb881f440d 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -17,13 +17,10 @@
#define DEFAULT_CODE_SELECTOR 0x8
#define DEFAULT_DATA_SELECTOR 0x10
-/* Minimum physical address used for virtual translation tables. */
-#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
-
vm_vaddr_t exception_handlers;
/* Virtual translation table structure declarations */
-struct pageMapL4Entry {
+struct pageUpperEntry {
uint64_t present:1;
uint64_t writable:1;
uint64_t user:1;
@@ -33,37 +30,7 @@ struct pageMapL4Entry {
uint64_t ignored_06:1;
uint64_t page_size:1;
uint64_t ignored_11_08:4;
- uint64_t address:40;
- uint64_t ignored_62_52:11;
- uint64_t execute_disable:1;
-};
-
-struct pageDirectoryPointerEntry {
- uint64_t present:1;
- uint64_t writable:1;
- uint64_t user:1;
- uint64_t write_through:1;
- uint64_t cache_disable:1;
- uint64_t accessed:1;
- uint64_t ignored_06:1;
- uint64_t page_size:1;
- uint64_t ignored_11_08:4;
- uint64_t address:40;
- uint64_t ignored_62_52:11;
- uint64_t execute_disable:1;
-};
-
-struct pageDirectoryEntry {
- uint64_t present:1;
- uint64_t writable:1;
- uint64_t user:1;
- uint64_t write_through:1;
- uint64_t cache_disable:1;
- uint64_t accessed:1;
- uint64_t ignored_06:1;
- uint64_t page_size:1;
- uint64_t ignored_11_08:4;
- uint64_t address:40;
+ uint64_t pfn:40;
uint64_t ignored_62_52:11;
uint64_t execute_disable:1;
};
@@ -79,7 +46,7 @@ struct pageTableEntry {
uint64_t reserved_07:1;
uint64_t global:1;
uint64_t ignored_11_09:3;
- uint64_t address:40;
+ uint64_t pfn:40;
uint64_t ignored_62_52:11;
uint64_t execute_disable:1;
};
@@ -207,96 +174,211 @@ void sregs_dump(FILE *stream, struct kvm_sregs *sregs,
}
}
-void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot)
+void virt_pgd_alloc(struct kvm_vm *vm)
{
TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
/* If needed, create page map l4 table. */
if (!vm->pgd_created) {
- vm_paddr_t paddr = vm_phy_page_alloc(vm,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
- vm->pgd = paddr;
+ vm->pgd = vm_alloc_page_table(vm);
vm->pgd_created = true;
}
}
-void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- uint32_t pgd_memslot)
+static void *virt_get_pte(struct kvm_vm *vm, uint64_t pt_pfn, uint64_t vaddr,
+ int level)
+{
+ uint64_t *page_table = addr_gpa2hva(vm, pt_pfn << vm->page_shift);
+ int index = vaddr >> (vm->page_shift + level * 9) & 0x1ffu;
+
+ return &page_table[index];
+}
+
+static struct pageUpperEntry *virt_create_upper_pte(struct kvm_vm *vm,
+ uint64_t pt_pfn,
+ uint64_t vaddr,
+ uint64_t paddr,
+ int level,
+ enum x86_page_size page_size)
+{
+ struct pageUpperEntry *pte = virt_get_pte(vm, pt_pfn, vaddr, level);
+
+ if (!pte->present) {
+ pte->writable = true;
+ pte->present = true;
+ pte->page_size = (level == page_size);
+ if (pte->page_size)
+ pte->pfn = paddr >> vm->page_shift;
+ else
+ pte->pfn = vm_alloc_page_table(vm) >> vm->page_shift;
+ } else {
+ /*
+ * Entry already present. Assert that the caller doesn't want
+ * a hugepage at this level, and that there isn't a hugepage at
+ * this level.
+ */
+ TEST_ASSERT(level != page_size,
+ "Cannot create hugepage at level: %u, vaddr: 0x%lx\n",
+ page_size, vaddr);
+ TEST_ASSERT(!pte->page_size,
+ "Cannot create page table at level: %u, vaddr: 0x%lx\n",
+ level, vaddr);
+ }
+ return pte;
+}
+
+void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+ enum x86_page_size page_size)
+{
+ const uint64_t pg_size = 1ull << ((page_size * 9) + 12);
+ struct pageUpperEntry *pml4e, *pdpe, *pde;
+ struct pageTableEntry *pte;
+
+ TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K,
+ "Unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+
+ TEST_ASSERT((vaddr % pg_size) == 0,
+ "Virtual address not aligned,\n"
+ "vaddr: 0x%lx page size: 0x%lx", vaddr, pg_size);
+ TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (vaddr >> vm->page_shift)),
+ "Invalid virtual address, vaddr: 0x%lx", vaddr);
+ TEST_ASSERT((paddr % pg_size) == 0,
+ "Physical address not aligned,\n"
+ " paddr: 0x%lx page size: 0x%lx", paddr, pg_size);
+ TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
+ "Physical address beyond maximum supported,\n"
+ " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+ paddr, vm->max_gfn, vm->page_size);
+
+ /*
+ * Allocate upper level page tables, if not already present. Return
+ * early if a hugepage was created.
+ */
+ pml4e = virt_create_upper_pte(vm, vm->pgd >> vm->page_shift,
+ vaddr, paddr, 3, page_size);
+ if (pml4e->page_size)
+ return;
+
+ pdpe = virt_create_upper_pte(vm, pml4e->pfn, vaddr, paddr, 2, page_size);
+ if (pdpe->page_size)
+ return;
+
+ pde = virt_create_upper_pte(vm, pdpe->pfn, vaddr, paddr, 1, page_size);
+ if (pde->page_size)
+ return;
+
+ /* Fill in page table entry. */
+ pte = virt_get_pte(vm, pde->pfn, vaddr, 0);
+ TEST_ASSERT(!pte->present,
+ "PTE already present for 4k page at vaddr: 0x%lx\n", vaddr);
+ pte->pfn = paddr >> vm->page_shift;
+ pte->writable = true;
+ pte->present = 1;
+}
+
+void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
+{
+ __virt_pg_map(vm, vaddr, paddr, X86_PAGE_SIZE_4K);
+}
+
+static struct pageTableEntry *_vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid,
+ uint64_t vaddr)
{
uint16_t index[4];
- struct pageMapL4Entry *pml4e;
+ struct pageUpperEntry *pml4e, *pdpe, *pde;
+ struct pageTableEntry *pte;
+ struct kvm_cpuid_entry2 *entry;
+ struct kvm_sregs sregs;
+ int max_phy_addr;
+ /* Set the bottom 52 bits. */
+ uint64_t rsvd_mask = 0x000fffffffffffff;
+
+ entry = kvm_get_supported_cpuid_index(0x80000008, 0);
+ max_phy_addr = entry->eax & 0x000000ff;
+ /* Clear the bottom bits of the reserved mask. */
+ rsvd_mask = (rsvd_mask >> max_phy_addr) << max_phy_addr;
+
+ /*
+ * SDM vol 3, fig 4-11 "Formats of CR3 and Paging-Structure Entries
+ * with 4-Level Paging and 5-Level Paging".
+ * If IA32_EFER.NXE = 0 and the P flag of a paging-structure entry is 1,
+ * the XD flag (bit 63) is reserved.
+ */
+ vcpu_sregs_get(vm, vcpuid, &sregs);
+ if ((sregs.efer & EFER_NX) == 0) {
+ rsvd_mask |= (1ull << 63);
+ }
TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
-
- TEST_ASSERT((vaddr % vm->page_size) == 0,
- "Virtual address not on page boundary,\n"
- " vaddr: 0x%lx vm->page_size: 0x%x",
- vaddr, vm->page_size);
TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
(vaddr >> vm->page_shift)),
"Invalid virtual address, vaddr: 0x%lx",
vaddr);
- TEST_ASSERT((paddr % vm->page_size) == 0,
- "Physical address not on page boundary,\n"
- " paddr: 0x%lx vm->page_size: 0x%x",
- paddr, vm->page_size);
- TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
- "Physical address beyond beyond maximum supported,\n"
- " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
- paddr, vm->max_gfn, vm->page_size);
+ /*
+ * Based on the mode check above there are 48 bits in the vaddr, so
+ * shift 16 to sign extend the last bit (bit-47),
+ */
+ TEST_ASSERT(vaddr == (((int64_t)vaddr << 16) >> 16),
+ "Canonical check failed. The virtual address is invalid.");
index[0] = (vaddr >> 12) & 0x1ffu;
index[1] = (vaddr >> 21) & 0x1ffu;
index[2] = (vaddr >> 30) & 0x1ffu;
index[3] = (vaddr >> 39) & 0x1ffu;
- /* Allocate page directory pointer table if not present. */
pml4e = addr_gpa2hva(vm, vm->pgd);
- if (!pml4e[index[3]].present) {
- pml4e[index[3]].address = vm_phy_page_alloc(vm,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
- >> vm->page_shift;
- pml4e[index[3]].writable = true;
- pml4e[index[3]].present = true;
- }
+ TEST_ASSERT(pml4e[index[3]].present,
+ "Expected pml4e to be present for gva: 0x%08lx", vaddr);
+ TEST_ASSERT((*(uint64_t*)(&pml4e[index[3]]) &
+ (rsvd_mask | (1ull << 7))) == 0,
+ "Unexpected reserved bits set.");
+
+ pdpe = addr_gpa2hva(vm, pml4e[index[3]].pfn * vm->page_size);
+ TEST_ASSERT(pdpe[index[2]].present,
+ "Expected pdpe to be present for gva: 0x%08lx", vaddr);
+ TEST_ASSERT(pdpe[index[2]].page_size == 0,
+ "Expected pdpe to map a pde not a 1-GByte page.");
+ TEST_ASSERT((*(uint64_t*)(&pdpe[index[2]]) & rsvd_mask) == 0,
+ "Unexpected reserved bits set.");
+
+ pde = addr_gpa2hva(vm, pdpe[index[2]].pfn * vm->page_size);
+ TEST_ASSERT(pde[index[1]].present,
+ "Expected pde to be present for gva: 0x%08lx", vaddr);
+ TEST_ASSERT(pde[index[1]].page_size == 0,
+ "Expected pde to map a pte not a 2-MByte page.");
+ TEST_ASSERT((*(uint64_t*)(&pde[index[1]]) & rsvd_mask) == 0,
+ "Unexpected reserved bits set.");
+
+ pte = addr_gpa2hva(vm, pde[index[1]].pfn * vm->page_size);
+ TEST_ASSERT(pte[index[0]].present,
+ "Expected pte to be present for gva: 0x%08lx", vaddr);
+
+ return &pte[index[0]];
+}
- /* Allocate page directory table if not present. */
- struct pageDirectoryPointerEntry *pdpe;
- pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
- if (!pdpe[index[2]].present) {
- pdpe[index[2]].address = vm_phy_page_alloc(vm,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
- >> vm->page_shift;
- pdpe[index[2]].writable = true;
- pdpe[index[2]].present = true;
- }
+uint64_t vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr)
+{
+ struct pageTableEntry *pte = _vm_get_page_table_entry(vm, vcpuid, vaddr);
- /* Allocate page table if not present. */
- struct pageDirectoryEntry *pde;
- pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
- if (!pde[index[1]].present) {
- pde[index[1]].address = vm_phy_page_alloc(vm,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
- >> vm->page_shift;
- pde[index[1]].writable = true;
- pde[index[1]].present = true;
- }
+ return *(uint64_t *)pte;
+}
- /* Fill in page table entry. */
- struct pageTableEntry *pte;
- pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
- pte[index[0]].address = paddr >> vm->page_shift;
- pte[index[0]].writable = true;
- pte[index[0]].present = 1;
+void vm_set_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr,
+ uint64_t pte)
+{
+ struct pageTableEntry *new_pte = _vm_get_page_table_entry(vm, vcpuid,
+ vaddr);
+
+ *(uint64_t *)new_pte = pte;
}
void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
{
- struct pageMapL4Entry *pml4e, *pml4e_start;
- struct pageDirectoryPointerEntry *pdpe, *pdpe_start;
- struct pageDirectoryEntry *pde, *pde_start;
+ struct pageUpperEntry *pml4e, *pml4e_start;
+ struct pageUpperEntry *pdpe, *pdpe_start;
+ struct pageUpperEntry *pde, *pde_start;
struct pageTableEntry *pte, *pte_start;
if (!vm->pgd_created)
@@ -307,8 +389,7 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
fprintf(stream, "%*s index hvaddr gpaddr "
"addr w exec dirty\n",
indent, "");
- pml4e_start = (struct pageMapL4Entry *) addr_gpa2hva(vm,
- vm->pgd);
+ pml4e_start = (struct pageUpperEntry *) addr_gpa2hva(vm, vm->pgd);
for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) {
pml4e = &pml4e_start[n1];
if (!pml4e->present)
@@ -317,11 +398,10 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
" %u\n",
indent, "",
pml4e - pml4e_start, pml4e,
- addr_hva2gpa(vm, pml4e), (uint64_t) pml4e->address,
+ addr_hva2gpa(vm, pml4e), (uint64_t) pml4e->pfn,
pml4e->writable, pml4e->execute_disable);
- pdpe_start = addr_gpa2hva(vm, pml4e->address
- * vm->page_size);
+ pdpe_start = addr_gpa2hva(vm, pml4e->pfn * vm->page_size);
for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) {
pdpe = &pdpe_start[n2];
if (!pdpe->present)
@@ -331,11 +411,10 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
indent, "",
pdpe - pdpe_start, pdpe,
addr_hva2gpa(vm, pdpe),
- (uint64_t) pdpe->address, pdpe->writable,
+ (uint64_t) pdpe->pfn, pdpe->writable,
pdpe->execute_disable);
- pde_start = addr_gpa2hva(vm,
- pdpe->address * vm->page_size);
+ pde_start = addr_gpa2hva(vm, pdpe->pfn * vm->page_size);
for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) {
pde = &pde_start[n3];
if (!pde->present)
@@ -344,11 +423,10 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
"0x%-12lx 0x%-10lx %u %u\n",
indent, "", pde - pde_start, pde,
addr_hva2gpa(vm, pde),
- (uint64_t) pde->address, pde->writable,
+ (uint64_t) pde->pfn, pde->writable,
pde->execute_disable);
- pte_start = addr_gpa2hva(vm,
- pde->address * vm->page_size);
+ pte_start = addr_gpa2hva(vm, pde->pfn * vm->page_size);
for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) {
pte = &pte_start[n4];
if (!pte->present)
@@ -359,7 +437,7 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
indent, "",
pte - pte_start, pte,
addr_hva2gpa(vm, pte),
- (uint64_t) pte->address,
+ (uint64_t) pte->pfn,
pte->writable,
pte->execute_disable,
pte->dirty,
@@ -480,9 +558,7 @@ static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector,
vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
{
uint16_t index[4];
- struct pageMapL4Entry *pml4e;
- struct pageDirectoryPointerEntry *pdpe;
- struct pageDirectoryEntry *pde;
+ struct pageUpperEntry *pml4e, *pdpe, *pde;
struct pageTableEntry *pte;
TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
@@ -499,43 +575,39 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
if (!pml4e[index[3]].present)
goto unmapped_gva;
- pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
+ pdpe = addr_gpa2hva(vm, pml4e[index[3]].pfn * vm->page_size);
if (!pdpe[index[2]].present)
goto unmapped_gva;
- pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
+ pde = addr_gpa2hva(vm, pdpe[index[2]].pfn * vm->page_size);
if (!pde[index[1]].present)
goto unmapped_gva;
- pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
+ pte = addr_gpa2hva(vm, pde[index[1]].pfn * vm->page_size);
if (!pte[index[0]].present)
goto unmapped_gva;
- return (pte[index[0]].address * vm->page_size) + (gva & 0xfffu);
+ return (pte[index[0]].pfn * vm->page_size) + (gva & 0xfffu);
unmapped_gva:
TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva);
exit(EXIT_FAILURE);
}
-static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt, int gdt_memslot,
- int pgd_memslot)
+static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt)
{
if (!vm->gdt)
- vm->gdt = vm_vaddr_alloc(vm, getpagesize(),
- KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot);
+ vm->gdt = vm_vaddr_alloc_page(vm);
dt->base = vm->gdt;
dt->limit = getpagesize();
}
static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp,
- int selector, int gdt_memslot,
- int pgd_memslot)
+ int selector)
{
if (!vm->tss)
- vm->tss = vm_vaddr_alloc(vm, getpagesize(),
- KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot);
+ vm->tss = vm_vaddr_alloc_page(vm);
memset(segp, 0, sizeof(*segp));
segp->base = vm->tss;
@@ -546,7 +618,7 @@ static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp,
kvm_seg_fill_gdt_64bit(vm, segp);
}
-static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot)
+static void vcpu_setup(struct kvm_vm *vm, int vcpuid)
{
struct kvm_sregs sregs;
@@ -555,7 +627,7 @@ static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_m
sregs.idt.limit = 0;
- kvm_setup_gdt(vm, &sregs.gdt, gdt_memslot, pgd_memslot);
+ kvm_setup_gdt(vm, &sregs.gdt);
switch (vm->mode) {
case VM_MODE_PXXV48_4K:
@@ -567,7 +639,7 @@ static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_m
kvm_seg_set_kernel_code_64bit(vm, DEFAULT_CODE_SELECTOR, &sregs.cs);
kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.ds);
kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.es);
- kvm_setup_tss_64bit(vm, &sregs.tr, 0x18, gdt_memslot, pgd_memslot);
+ kvm_setup_tss_64bit(vm, &sregs.tr, 0x18);
break;
default:
@@ -584,11 +656,11 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
struct kvm_regs regs;
vm_vaddr_t stack_vaddr;
stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(),
- DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0);
+ DEFAULT_GUEST_STACK_VADDR_MIN);
/* Create VCPU */
vm_vcpu_add(vm, vcpuid);
- vcpu_setup(vm, vcpuid, 0, 0);
+ vcpu_setup(vm, vcpuid);
/* Setup guest general purpose registers */
vcpu_regs_get(vm, vcpuid, &regs);
@@ -600,6 +672,9 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
/* Setup the MP state */
mp_state.mp_state = 0;
vcpu_set_mp_state(vm, vcpuid, &mp_state);
+
+ /* Setup supported CPUIDs */
+ vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid());
}
/*
@@ -657,9 +732,7 @@ struct kvm_cpuid2 *kvm_get_supported_cpuid(void)
return cpuid;
cpuid = allocate_kvm_cpuid2();
- kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
- if (kvm_fd < 0)
- exit(KSFT_SKIP);
+ kvm_fd = open_kvm_dev_path_or_exit();
ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid);
TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n",
@@ -691,9 +764,7 @@ uint64_t kvm_get_feature_msr(uint64_t msr_index)
buffer.header.nmsrs = 1;
buffer.entry.index = msr_index;
- kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
- if (kvm_fd < 0)
- exit(KSFT_SKIP);
+ kvm_fd = open_kvm_dev_path_or_exit();
r = ioctl(kvm_fd, KVM_GET_MSRS, &buffer.header);
TEST_ASSERT(r == 1, "KVM_GET_MSRS IOCTL failed,\n"
@@ -986,9 +1057,7 @@ struct kvm_msr_list *kvm_get_msr_index_list(void)
struct kvm_msr_list *list;
int nmsrs, r, kvm_fd;
- kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
- if (kvm_fd < 0)
- exit(KSFT_SKIP);
+ kvm_fd = open_kvm_dev_path_or_exit();
nmsrs = kvm_get_num_msrs_fd(kvm_fd);
list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
@@ -1207,7 +1276,7 @@ static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr,
void kvm_exit_unexpected_vector(uint32_t value)
{
- outl(UNEXPECTED_VECTOR_PORT, value);
+ ucall(UCALL_UNHANDLED, 1, value);
}
void route_exception(struct ex_regs *regs)
@@ -1228,8 +1297,8 @@ void vm_init_descriptor_tables(struct kvm_vm *vm)
extern void *idt_handlers;
int i;
- vm->idt = vm_vaddr_alloc(vm, getpagesize(), 0x2000, 0, 0);
- vm->handlers = vm_vaddr_alloc(vm, 256 * sizeof(void *), 0x2000, 0, 0);
+ vm->idt = vm_vaddr_alloc_page(vm);
+ vm->handlers = vm_vaddr_alloc_page(vm);
/* Handlers have the same address in both address spaces.*/
for (i = 0; i < NUM_INTERRUPTS; i++)
set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0,
@@ -1250,8 +1319,8 @@ void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid)
*(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
}
-void vm_handle_exception(struct kvm_vm *vm, int vector,
- void (*handler)(struct ex_regs *))
+void vm_install_exception_handler(struct kvm_vm *vm, int vector,
+ void (*handler)(struct ex_regs *))
{
vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers);
@@ -1260,16 +1329,13 @@ void vm_handle_exception(struct kvm_vm *vm, int vector,
void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid)
{
- if (vcpu_state(vm, vcpuid)->exit_reason == KVM_EXIT_IO
- && vcpu_state(vm, vcpuid)->io.port == UNEXPECTED_VECTOR_PORT
- && vcpu_state(vm, vcpuid)->io.size == 4) {
- /* Grab pointer to io data */
- uint32_t *data = (void *)vcpu_state(vm, vcpuid)
- + vcpu_state(vm, vcpuid)->io.data_offset;
-
- TEST_ASSERT(false,
- "Unexpected vectored event in guest (vector:0x%x)",
- *data);
+ struct ucall uc;
+
+ if (get_ucall(vm, vcpuid, &uc) == UCALL_UNHANDLED) {
+ uint64_t vector = uc.args[0];
+
+ TEST_FAIL("Unexpected vectored event in guest (vector:0x%lx)",
+ vector);
}
}
@@ -1312,9 +1378,7 @@ struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void)
return cpuid;
cpuid = allocate_kvm_cpuid2();
- kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
- if (kvm_fd < 0)
- exit(KSFT_SKIP);
+ kvm_fd = open_kvm_dev_path_or_exit();
ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_HV_CPUID failed %d %d\n",
diff --git a/tools/testing/selftests/kvm/lib/x86_64/svm.c b/tools/testing/selftests/kvm/lib/x86_64/svm.c
index 827fe6028dd4..2ac98d70d02b 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/svm.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/svm.c
@@ -30,17 +30,14 @@ u64 rflags;
struct svm_test_data *
vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva)
{
- vm_vaddr_t svm_gva = vm_vaddr_alloc(vm, getpagesize(),
- 0x10000, 0, 0);
+ vm_vaddr_t svm_gva = vm_vaddr_alloc_page(vm);
struct svm_test_data *svm = addr_gva2hva(vm, svm_gva);
- svm->vmcb = (void *)vm_vaddr_alloc(vm, getpagesize(),
- 0x10000, 0, 0);
+ svm->vmcb = (void *)vm_vaddr_alloc_page(vm);
svm->vmcb_hva = addr_gva2hva(vm, (uintptr_t)svm->vmcb);
svm->vmcb_gpa = addr_gva2gpa(vm, (uintptr_t)svm->vmcb);
- svm->save_area = (void *)vm_vaddr_alloc(vm, getpagesize(),
- 0x10000, 0, 0);
+ svm->save_area = (void *)vm_vaddr_alloc_page(vm);
svm->save_area_hva = addr_gva2hva(vm, (uintptr_t)svm->save_area);
svm->save_area_gpa = addr_gva2gpa(vm, (uintptr_t)svm->save_area);
diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
index 2448b30e8efa..d089d8b850b5 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
@@ -77,50 +77,48 @@ int vcpu_enable_evmcs(struct kvm_vm *vm, int vcpu_id)
struct vmx_pages *
vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva)
{
- vm_vaddr_t vmx_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vm_vaddr_t vmx_gva = vm_vaddr_alloc_page(vm);
struct vmx_pages *vmx = addr_gva2hva(vm, vmx_gva);
/* Setup of a region of guest memory for the vmxon region. */
- vmx->vmxon = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->vmxon = (void *)vm_vaddr_alloc_page(vm);
vmx->vmxon_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmxon);
vmx->vmxon_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmxon);
/* Setup of a region of guest memory for a vmcs. */
- vmx->vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->vmcs = (void *)vm_vaddr_alloc_page(vm);
vmx->vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmcs);
vmx->vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmcs);
/* Setup of a region of guest memory for the MSR bitmap. */
- vmx->msr = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->msr = (void *)vm_vaddr_alloc_page(vm);
vmx->msr_hva = addr_gva2hva(vm, (uintptr_t)vmx->msr);
vmx->msr_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->msr);
memset(vmx->msr_hva, 0, getpagesize());
/* Setup of a region of guest memory for the shadow VMCS. */
- vmx->shadow_vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->shadow_vmcs = (void *)vm_vaddr_alloc_page(vm);
vmx->shadow_vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->shadow_vmcs);
vmx->shadow_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->shadow_vmcs);
/* Setup of a region of guest memory for the VMREAD and VMWRITE bitmaps. */
- vmx->vmread = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->vmread = (void *)vm_vaddr_alloc_page(vm);
vmx->vmread_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmread);
vmx->vmread_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmread);
memset(vmx->vmread_hva, 0, getpagesize());
- vmx->vmwrite = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->vmwrite = (void *)vm_vaddr_alloc_page(vm);
vmx->vmwrite_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmwrite);
vmx->vmwrite_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmwrite);
memset(vmx->vmwrite_hva, 0, getpagesize());
/* Setup of a region of guest memory for the VP Assist page. */
- vmx->vp_assist = (void *)vm_vaddr_alloc(vm, getpagesize(),
- 0x10000, 0, 0);
+ vmx->vp_assist = (void *)vm_vaddr_alloc_page(vm);
vmx->vp_assist_hva = addr_gva2hva(vm, (uintptr_t)vmx->vp_assist);
vmx->vp_assist_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vp_assist);
/* Setup of a region of guest memory for the enlightened VMCS. */
- vmx->enlightened_vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(),
- 0x10000, 0, 0);
+ vmx->enlightened_vmcs = (void *)vm_vaddr_alloc_page(vm);
vmx->enlightened_vmcs_hva =
addr_gva2hva(vm, (uintptr_t)vmx->enlightened_vmcs);
vmx->enlightened_vmcs_gpa =
@@ -395,7 +393,7 @@ void nested_vmx_check_supported(void)
}
void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr, uint32_t eptp_memslot)
+ uint64_t nested_paddr, uint64_t paddr)
{
uint16_t index[4];
struct eptPageTableEntry *pml4e;
@@ -428,9 +426,7 @@ void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
/* Allocate page directory pointer table if not present. */
pml4e = vmx->eptp_hva;
if (!pml4e[index[3]].readable) {
- pml4e[index[3]].address = vm_phy_page_alloc(vm,
- KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot)
- >> vm->page_shift;
+ pml4e[index[3]].address = vm_alloc_page_table(vm) >> vm->page_shift;
pml4e[index[3]].writable = true;
pml4e[index[3]].readable = true;
pml4e[index[3]].executable = true;
@@ -440,9 +436,7 @@ void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
struct eptPageTableEntry *pdpe;
pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
if (!pdpe[index[2]].readable) {
- pdpe[index[2]].address = vm_phy_page_alloc(vm,
- KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot)
- >> vm->page_shift;
+ pdpe[index[2]].address = vm_alloc_page_table(vm) >> vm->page_shift;
pdpe[index[2]].writable = true;
pdpe[index[2]].readable = true;
pdpe[index[2]].executable = true;
@@ -452,9 +446,7 @@ void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
struct eptPageTableEntry *pde;
pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
if (!pde[index[1]].readable) {
- pde[index[1]].address = vm_phy_page_alloc(vm,
- KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot)
- >> vm->page_shift;
+ pde[index[1]].address = vm_alloc_page_table(vm) >> vm->page_shift;
pde[index[1]].writable = true;
pde[index[1]].readable = true;
pde[index[1]].executable = true;
@@ -494,8 +486,7 @@ void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
* page range starting at nested_paddr to the page range starting at paddr.
*/
void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr, uint64_t size,
- uint32_t eptp_memslot)
+ uint64_t nested_paddr, uint64_t paddr, uint64_t size)
{
size_t page_size = vm->page_size;
size_t npages = size / page_size;
@@ -504,7 +495,7 @@ void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
while (npages--) {
- nested_pg_map(vmx, vm, nested_paddr, paddr, eptp_memslot);
+ nested_pg_map(vmx, vm, nested_paddr, paddr);
nested_paddr += page_size;
paddr += page_size;
}
@@ -514,7 +505,7 @@ void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
* physical pages in VM.
*/
void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint32_t memslot, uint32_t eptp_memslot)
+ uint32_t memslot)
{
sparsebit_idx_t i, last;
struct userspace_mem_region *region =
@@ -530,24 +521,21 @@ void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
nested_map(vmx, vm,
(uint64_t)i << vm->page_shift,
(uint64_t)i << vm->page_shift,
- 1 << vm->page_shift,
- eptp_memslot);
+ 1 << vm->page_shift);
}
}
void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
uint32_t eptp_memslot)
{
- vmx->eptp = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->eptp = (void *)vm_vaddr_alloc_page(vm);
vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp);
vmx->eptp_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->eptp);
}
-void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint32_t eptp_memslot)
+void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm)
{
- vmx->apic_access = (void *)vm_vaddr_alloc(vm, getpagesize(),
- 0x10000, 0, 0);
+ vmx->apic_access = (void *)vm_vaddr_alloc_page(vm);
vmx->apic_access_hva = addr_gva2hva(vm, (uintptr_t)vmx->apic_access);
vmx->apic_access_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->apic_access);
}
diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
index 6096bf0a5b34..98351ba0933c 100644
--- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c
+++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
@@ -71,14 +71,22 @@ struct memslot_antagonist_args {
};
static void add_remove_memslot(struct kvm_vm *vm, useconds_t delay,
- uint64_t nr_modifications, uint64_t gpa)
+ uint64_t nr_modifications)
{
+ const uint64_t pages = 1;
+ uint64_t gpa;
int i;
+ /*
+ * Add the dummy memslot just below the perf_test_util memslot, which is
+ * at the top of the guest physical address space.
+ */
+ gpa = guest_test_phys_mem - pages * vm_get_page_size(vm);
+
for (i = 0; i < nr_modifications; i++) {
usleep(delay);
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, gpa,
- DUMMY_MEMSLOT_INDEX, 1, 0);
+ DUMMY_MEMSLOT_INDEX, pages, 0);
vm_mem_region_delete(vm, DUMMY_MEMSLOT_INDEX);
}
@@ -120,11 +128,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
pr_info("Started all vCPUs\n");
add_remove_memslot(vm, p->memslot_modification_delay,
- p->nr_memslot_modifications,
- guest_test_phys_mem +
- (guest_percpu_mem_size * nr_vcpus) +
- perf_test_args.host_page_size +
- perf_test_args.guest_page_size);
+ p->nr_memslot_modifications);
run_vcpus = false;
diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c
new file mode 100644
index 000000000000..d6e381e01db7
--- /dev/null
+++ b/tools/testing/selftests/kvm/memslot_perf_test.c
@@ -0,0 +1,1037 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * A memslot-related performance benchmark.
+ *
+ * Copyright (C) 2021 Oracle and/or its affiliates.
+ *
+ * Basic guest setup / host vCPU thread code lifted from set_memory_region_test.
+ */
+#include <pthread.h>
+#include <sched.h>
+#include <semaphore.h>
+#include <stdatomic.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <linux/compiler.h>
+
+#include <test_util.h>
+#include <kvm_util.h>
+#include <processor.h>
+
+#define VCPU_ID 0
+
+#define MEM_SIZE ((512U << 20) + 4096)
+#define MEM_SIZE_PAGES (MEM_SIZE / 4096)
+#define MEM_GPA 0x10000000UL
+#define MEM_AUX_GPA MEM_GPA
+#define MEM_SYNC_GPA MEM_AUX_GPA
+#define MEM_TEST_GPA (MEM_AUX_GPA + 4096)
+#define MEM_TEST_SIZE (MEM_SIZE - 4096)
+static_assert(MEM_SIZE % 4096 == 0, "invalid mem size");
+static_assert(MEM_TEST_SIZE % 4096 == 0, "invalid mem test size");
+
+/*
+ * 32 MiB is max size that gets well over 100 iterations on 509 slots.
+ * Considering that each slot needs to have at least one page up to
+ * 8194 slots in use can then be tested (although with slightly
+ * limited resolution).
+ */
+#define MEM_SIZE_MAP ((32U << 20) + 4096)
+#define MEM_SIZE_MAP_PAGES (MEM_SIZE_MAP / 4096)
+#define MEM_TEST_MAP_SIZE (MEM_SIZE_MAP - 4096)
+#define MEM_TEST_MAP_SIZE_PAGES (MEM_TEST_MAP_SIZE / 4096)
+static_assert(MEM_SIZE_MAP % 4096 == 0, "invalid map test region size");
+static_assert(MEM_TEST_MAP_SIZE % 4096 == 0, "invalid map test region size");
+static_assert(MEM_TEST_MAP_SIZE_PAGES % 2 == 0, "invalid map test region size");
+static_assert(MEM_TEST_MAP_SIZE_PAGES > 2, "invalid map test region size");
+
+/*
+ * 128 MiB is min size that fills 32k slots with at least one page in each
+ * while at the same time gets 100+ iterations in such test
+ */
+#define MEM_TEST_UNMAP_SIZE (128U << 20)
+#define MEM_TEST_UNMAP_SIZE_PAGES (MEM_TEST_UNMAP_SIZE / 4096)
+/* 2 MiB chunk size like a typical huge page */
+#define MEM_TEST_UNMAP_CHUNK_PAGES (2U << (20 - 12))
+static_assert(MEM_TEST_UNMAP_SIZE <= MEM_TEST_SIZE,
+ "invalid unmap test region size");
+static_assert(MEM_TEST_UNMAP_SIZE % 4096 == 0,
+ "invalid unmap test region size");
+static_assert(MEM_TEST_UNMAP_SIZE_PAGES %
+ (2 * MEM_TEST_UNMAP_CHUNK_PAGES) == 0,
+ "invalid unmap test region size");
+
+/*
+ * For the move active test the middle of the test area is placed on
+ * a memslot boundary: half lies in the memslot being moved, half in
+ * other memslot(s).
+ *
+ * When running this test with 32k memslots (32764, really) each memslot
+ * contains 4 pages.
+ * The last one additionally contains the remaining 21 pages of memory,
+ * for the total size of 25 pages.
+ * Hence, the maximum size here is 50 pages.
+ */
+#define MEM_TEST_MOVE_SIZE_PAGES (50)
+#define MEM_TEST_MOVE_SIZE (MEM_TEST_MOVE_SIZE_PAGES * 4096)
+#define MEM_TEST_MOVE_GPA_DEST (MEM_GPA + MEM_SIZE)
+static_assert(MEM_TEST_MOVE_SIZE <= MEM_TEST_SIZE,
+ "invalid move test region size");
+
+#define MEM_TEST_VAL_1 0x1122334455667788
+#define MEM_TEST_VAL_2 0x99AABBCCDDEEFF00
+
+struct vm_data {
+ struct kvm_vm *vm;
+ pthread_t vcpu_thread;
+ uint32_t nslots;
+ uint64_t npages;
+ uint64_t pages_per_slot;
+ void **hva_slots;
+ bool mmio_ok;
+ uint64_t mmio_gpa_min;
+ uint64_t mmio_gpa_max;
+};
+
+struct sync_area {
+ atomic_bool start_flag;
+ atomic_bool exit_flag;
+ atomic_bool sync_flag;
+ void *move_area_ptr;
+};
+
+/*
+ * Technically, we need also for the atomic bool to be address-free, which
+ * is recommended, but not strictly required, by C11 for lockless
+ * implementations.
+ * However, in practice both GCC and Clang fulfill this requirement on
+ * all KVM-supported platforms.
+ */
+static_assert(ATOMIC_BOOL_LOCK_FREE == 2, "atomic bool is not lockless");
+
+static sem_t vcpu_ready;
+
+static bool map_unmap_verify;
+
+static bool verbose;
+#define pr_info_v(...) \
+ do { \
+ if (verbose) \
+ pr_info(__VA_ARGS__); \
+ } while (0)
+
+static void *vcpu_worker(void *data)
+{
+ struct vm_data *vm = data;
+ struct kvm_run *run;
+ struct ucall uc;
+ uint64_t cmd;
+
+ run = vcpu_state(vm->vm, VCPU_ID);
+ while (1) {
+ vcpu_run(vm->vm, VCPU_ID);
+
+ if (run->exit_reason == KVM_EXIT_IO) {
+ cmd = get_ucall(vm->vm, VCPU_ID, &uc);
+ if (cmd != UCALL_SYNC)
+ break;
+
+ sem_post(&vcpu_ready);
+ continue;
+ }
+
+ if (run->exit_reason != KVM_EXIT_MMIO)
+ break;
+
+ TEST_ASSERT(vm->mmio_ok, "Unexpected mmio exit");
+ TEST_ASSERT(run->mmio.is_write, "Unexpected mmio read");
+ TEST_ASSERT(run->mmio.len == 8,
+ "Unexpected exit mmio size = %u", run->mmio.len);
+ TEST_ASSERT(run->mmio.phys_addr >= vm->mmio_gpa_min &&
+ run->mmio.phys_addr <= vm->mmio_gpa_max,
+ "Unexpected exit mmio address = 0x%llx",
+ run->mmio.phys_addr);
+ }
+
+ if (run->exit_reason == KVM_EXIT_IO && cmd == UCALL_ABORT)
+ TEST_FAIL("%s at %s:%ld, val = %lu", (const char *)uc.args[0],
+ __FILE__, uc.args[1], uc.args[2]);
+
+ return NULL;
+}
+
+static void wait_for_vcpu(void)
+{
+ struct timespec ts;
+
+ TEST_ASSERT(!clock_gettime(CLOCK_REALTIME, &ts),
+ "clock_gettime() failed: %d\n", errno);
+
+ ts.tv_sec += 2;
+ TEST_ASSERT(!sem_timedwait(&vcpu_ready, &ts),
+ "sem_timedwait() failed: %d\n", errno);
+}
+
+static void *vm_gpa2hva(struct vm_data *data, uint64_t gpa, uint64_t *rempages)
+{
+ uint64_t gpage, pgoffs;
+ uint32_t slot, slotoffs;
+ void *base;
+
+ TEST_ASSERT(gpa >= MEM_GPA, "Too low gpa to translate");
+ TEST_ASSERT(gpa < MEM_GPA + data->npages * 4096,
+ "Too high gpa to translate");
+ gpa -= MEM_GPA;
+
+ gpage = gpa / 4096;
+ pgoffs = gpa % 4096;
+ slot = min(gpage / data->pages_per_slot, (uint64_t)data->nslots - 1);
+ slotoffs = gpage - (slot * data->pages_per_slot);
+
+ if (rempages) {
+ uint64_t slotpages;
+
+ if (slot == data->nslots - 1)
+ slotpages = data->npages - slot * data->pages_per_slot;
+ else
+ slotpages = data->pages_per_slot;
+
+ TEST_ASSERT(!pgoffs,
+ "Asking for remaining pages in slot but gpa not page aligned");
+ *rempages = slotpages - slotoffs;
+ }
+
+ base = data->hva_slots[slot];
+ return (uint8_t *)base + slotoffs * 4096 + pgoffs;
+}
+
+static uint64_t vm_slot2gpa(struct vm_data *data, uint32_t slot)
+{
+ TEST_ASSERT(slot < data->nslots, "Too high slot number");
+
+ return MEM_GPA + slot * data->pages_per_slot * 4096;
+}
+
+static struct vm_data *alloc_vm(void)
+{
+ struct vm_data *data;
+
+ data = malloc(sizeof(*data));
+ TEST_ASSERT(data, "malloc(vmdata) failed");
+
+ data->vm = NULL;
+ data->hva_slots = NULL;
+
+ return data;
+}
+
+static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots,
+ void *guest_code, uint64_t mempages,
+ struct timespec *slot_runtime)
+{
+ uint32_t max_mem_slots;
+ uint64_t rempages;
+ uint64_t guest_addr;
+ uint32_t slot;
+ struct timespec tstart;
+ struct sync_area *sync;
+
+ max_mem_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS);
+ TEST_ASSERT(max_mem_slots > 1,
+ "KVM_CAP_NR_MEMSLOTS should be greater than 1");
+ TEST_ASSERT(nslots > 1 || nslots == -1,
+ "Slot count cap should be greater than 1");
+ if (nslots != -1)
+ max_mem_slots = min(max_mem_slots, (uint32_t)nslots);
+ pr_info_v("Allowed number of memory slots: %"PRIu32"\n", max_mem_slots);
+
+ TEST_ASSERT(mempages > 1,
+ "Can't test without any memory");
+
+ data->npages = mempages;
+ data->nslots = max_mem_slots - 1;
+ data->pages_per_slot = mempages / data->nslots;
+ if (!data->pages_per_slot) {
+ *maxslots = mempages + 1;
+ return false;
+ }
+
+ rempages = mempages % data->nslots;
+ data->hva_slots = malloc(sizeof(*data->hva_slots) * data->nslots);
+ TEST_ASSERT(data->hva_slots, "malloc() fail");
+
+ data->vm = vm_create_default(VCPU_ID, mempages, guest_code);
+
+ pr_info_v("Adding slots 1..%i, each slot with %"PRIu64" pages + %"PRIu64" extra pages last\n",
+ max_mem_slots - 1, data->pages_per_slot, rempages);
+
+ clock_gettime(CLOCK_MONOTONIC, &tstart);
+ for (slot = 1, guest_addr = MEM_GPA; slot < max_mem_slots; slot++) {
+ uint64_t npages;
+
+ npages = data->pages_per_slot;
+ if (slot == max_mem_slots - 1)
+ npages += rempages;
+
+ vm_userspace_mem_region_add(data->vm, VM_MEM_SRC_ANONYMOUS,
+ guest_addr, slot, npages,
+ 0);
+ guest_addr += npages * 4096;
+ }
+ *slot_runtime = timespec_elapsed(tstart);
+
+ for (slot = 0, guest_addr = MEM_GPA; slot < max_mem_slots - 1; slot++) {
+ uint64_t npages;
+ uint64_t gpa;
+
+ npages = data->pages_per_slot;
+ if (slot == max_mem_slots - 2)
+ npages += rempages;
+
+ gpa = vm_phy_pages_alloc(data->vm, npages, guest_addr,
+ slot + 1);
+ TEST_ASSERT(gpa == guest_addr,
+ "vm_phy_pages_alloc() failed\n");
+
+ data->hva_slots[slot] = addr_gpa2hva(data->vm, guest_addr);
+ memset(data->hva_slots[slot], 0, npages * 4096);
+
+ guest_addr += npages * 4096;
+ }
+
+ virt_map(data->vm, MEM_GPA, MEM_GPA, mempages);
+
+ sync = (typeof(sync))vm_gpa2hva(data, MEM_SYNC_GPA, NULL);
+ atomic_init(&sync->start_flag, false);
+ atomic_init(&sync->exit_flag, false);
+ atomic_init(&sync->sync_flag, false);
+
+ data->mmio_ok = false;
+
+ return true;
+}
+
+static void launch_vm(struct vm_data *data)
+{
+ pr_info_v("Launching the test VM\n");
+
+ pthread_create(&data->vcpu_thread, NULL, vcpu_worker, data);
+
+ /* Ensure the guest thread is spun up. */
+ wait_for_vcpu();
+}
+
+static void free_vm(struct vm_data *data)
+{
+ kvm_vm_free(data->vm);
+ free(data->hva_slots);
+ free(data);
+}
+
+static void wait_guest_exit(struct vm_data *data)
+{
+ pthread_join(data->vcpu_thread, NULL);
+}
+
+static void let_guest_run(struct sync_area *sync)
+{
+ atomic_store_explicit(&sync->start_flag, true, memory_order_release);
+}
+
+static void guest_spin_until_start(void)
+{
+ struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
+
+ while (!atomic_load_explicit(&sync->start_flag, memory_order_acquire))
+ ;
+}
+
+static void make_guest_exit(struct sync_area *sync)
+{
+ atomic_store_explicit(&sync->exit_flag, true, memory_order_release);
+}
+
+static bool _guest_should_exit(void)
+{
+ struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
+
+ return atomic_load_explicit(&sync->exit_flag, memory_order_acquire);
+}
+
+#define guest_should_exit() unlikely(_guest_should_exit())
+
+/*
+ * noinline so we can easily see how much time the host spends waiting
+ * for the guest.
+ * For the same reason use alarm() instead of polling clock_gettime()
+ * to implement a wait timeout.
+ */
+static noinline void host_perform_sync(struct sync_area *sync)
+{
+ alarm(2);
+
+ atomic_store_explicit(&sync->sync_flag, true, memory_order_release);
+ while (atomic_load_explicit(&sync->sync_flag, memory_order_acquire))
+ ;
+
+ alarm(0);
+}
+
+static bool guest_perform_sync(void)
+{
+ struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
+ bool expected;
+
+ do {
+ if (guest_should_exit())
+ return false;
+
+ expected = true;
+ } while (!atomic_compare_exchange_weak_explicit(&sync->sync_flag,
+ &expected, false,
+ memory_order_acq_rel,
+ memory_order_relaxed));
+
+ return true;
+}
+
+static void guest_code_test_memslot_move(void)
+{
+ struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
+ uintptr_t base = (typeof(base))READ_ONCE(sync->move_area_ptr);
+
+ GUEST_SYNC(0);
+
+ guest_spin_until_start();
+
+ while (!guest_should_exit()) {
+ uintptr_t ptr;
+
+ for (ptr = base; ptr < base + MEM_TEST_MOVE_SIZE;
+ ptr += 4096)
+ *(uint64_t *)ptr = MEM_TEST_VAL_1;
+
+ /*
+ * No host sync here since the MMIO exits are so expensive
+ * that the host would spend most of its time waiting for
+ * the guest and so instead of measuring memslot move
+ * performance we would measure the performance and
+ * likelihood of MMIO exits
+ */
+ }
+
+ GUEST_DONE();
+}
+
+static void guest_code_test_memslot_map(void)
+{
+ struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
+
+ GUEST_SYNC(0);
+
+ guest_spin_until_start();
+
+ while (1) {
+ uintptr_t ptr;
+
+ for (ptr = MEM_TEST_GPA;
+ ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2; ptr += 4096)
+ *(uint64_t *)ptr = MEM_TEST_VAL_1;
+
+ if (!guest_perform_sync())
+ break;
+
+ for (ptr = MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2;
+ ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE; ptr += 4096)
+ *(uint64_t *)ptr = MEM_TEST_VAL_2;
+
+ if (!guest_perform_sync())
+ break;
+ }
+
+ GUEST_DONE();
+}
+
+static void guest_code_test_memslot_unmap(void)
+{
+ struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
+
+ GUEST_SYNC(0);
+
+ guest_spin_until_start();
+
+ while (1) {
+ uintptr_t ptr = MEM_TEST_GPA;
+
+ /*
+ * We can afford to access (map) just a small number of pages
+ * per host sync as otherwise the host will spend
+ * a significant amount of its time waiting for the guest
+ * (instead of doing unmap operations), so this will
+ * effectively turn this test into a map performance test.
+ *
+ * Just access a single page to be on the safe side.
+ */
+ *(uint64_t *)ptr = MEM_TEST_VAL_1;
+
+ if (!guest_perform_sync())
+ break;
+
+ ptr += MEM_TEST_UNMAP_SIZE / 2;
+ *(uint64_t *)ptr = MEM_TEST_VAL_2;
+
+ if (!guest_perform_sync())
+ break;
+ }
+
+ GUEST_DONE();
+}
+
+static void guest_code_test_memslot_rw(void)
+{
+ GUEST_SYNC(0);
+
+ guest_spin_until_start();
+
+ while (1) {
+ uintptr_t ptr;
+
+ for (ptr = MEM_TEST_GPA;
+ ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += 4096)
+ *(uint64_t *)ptr = MEM_TEST_VAL_1;
+
+ if (!guest_perform_sync())
+ break;
+
+ for (ptr = MEM_TEST_GPA + 4096 / 2;
+ ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += 4096) {
+ uint64_t val = *(uint64_t *)ptr;
+
+ GUEST_ASSERT_1(val == MEM_TEST_VAL_2, val);
+ *(uint64_t *)ptr = 0;
+ }
+
+ if (!guest_perform_sync())
+ break;
+ }
+
+ GUEST_DONE();
+}
+
+static bool test_memslot_move_prepare(struct vm_data *data,
+ struct sync_area *sync,
+ uint64_t *maxslots, bool isactive)
+{
+ uint64_t movesrcgpa, movetestgpa;
+
+ movesrcgpa = vm_slot2gpa(data, data->nslots - 1);
+
+ if (isactive) {
+ uint64_t lastpages;
+
+ vm_gpa2hva(data, movesrcgpa, &lastpages);
+ if (lastpages < MEM_TEST_MOVE_SIZE_PAGES / 2) {
+ *maxslots = 0;
+ return false;
+ }
+ }
+
+ movetestgpa = movesrcgpa - (MEM_TEST_MOVE_SIZE / (isactive ? 2 : 1));
+ sync->move_area_ptr = (void *)movetestgpa;
+
+ if (isactive) {
+ data->mmio_ok = true;
+ data->mmio_gpa_min = movesrcgpa;
+ data->mmio_gpa_max = movesrcgpa + MEM_TEST_MOVE_SIZE / 2 - 1;
+ }
+
+ return true;
+}
+
+static bool test_memslot_move_prepare_active(struct vm_data *data,
+ struct sync_area *sync,
+ uint64_t *maxslots)
+{
+ return test_memslot_move_prepare(data, sync, maxslots, true);
+}
+
+static bool test_memslot_move_prepare_inactive(struct vm_data *data,
+ struct sync_area *sync,
+ uint64_t *maxslots)
+{
+ return test_memslot_move_prepare(data, sync, maxslots, false);
+}
+
+static void test_memslot_move_loop(struct vm_data *data, struct sync_area *sync)
+{
+ uint64_t movesrcgpa;
+
+ movesrcgpa = vm_slot2gpa(data, data->nslots - 1);
+ vm_mem_region_move(data->vm, data->nslots - 1 + 1,
+ MEM_TEST_MOVE_GPA_DEST);
+ vm_mem_region_move(data->vm, data->nslots - 1 + 1, movesrcgpa);
+}
+
+static void test_memslot_do_unmap(struct vm_data *data,
+ uint64_t offsp, uint64_t count)
+{
+ uint64_t gpa, ctr;
+
+ for (gpa = MEM_TEST_GPA + offsp * 4096, ctr = 0; ctr < count; ) {
+ uint64_t npages;
+ void *hva;
+ int ret;
+
+ hva = vm_gpa2hva(data, gpa, &npages);
+ TEST_ASSERT(npages, "Empty memory slot at gptr 0x%"PRIx64, gpa);
+ npages = min(npages, count - ctr);
+ ret = madvise(hva, npages * 4096, MADV_DONTNEED);
+ TEST_ASSERT(!ret,
+ "madvise(%p, MADV_DONTNEED) on VM memory should not fail for gptr 0x%"PRIx64,
+ hva, gpa);
+ ctr += npages;
+ gpa += npages * 4096;
+ }
+ TEST_ASSERT(ctr == count,
+ "madvise(MADV_DONTNEED) should exactly cover all of the requested area");
+}
+
+static void test_memslot_map_unmap_check(struct vm_data *data,
+ uint64_t offsp, uint64_t valexp)
+{
+ uint64_t gpa;
+ uint64_t *val;
+
+ if (!map_unmap_verify)
+ return;
+
+ gpa = MEM_TEST_GPA + offsp * 4096;
+ val = (typeof(val))vm_gpa2hva(data, gpa, NULL);
+ TEST_ASSERT(*val == valexp,
+ "Guest written values should read back correctly before unmap (%"PRIu64" vs %"PRIu64" @ %"PRIx64")",
+ *val, valexp, gpa);
+ *val = 0;
+}
+
+static void test_memslot_map_loop(struct vm_data *data, struct sync_area *sync)
+{
+ /*
+ * Unmap the second half of the test area while guest writes to (maps)
+ * the first half.
+ */
+ test_memslot_do_unmap(data, MEM_TEST_MAP_SIZE_PAGES / 2,
+ MEM_TEST_MAP_SIZE_PAGES / 2);
+
+ /*
+ * Wait for the guest to finish writing the first half of the test
+ * area, verify the written value on the first and the last page of
+ * this area and then unmap it.
+ * Meanwhile, the guest is writing to (mapping) the second half of
+ * the test area.
+ */
+ host_perform_sync(sync);
+ test_memslot_map_unmap_check(data, 0, MEM_TEST_VAL_1);
+ test_memslot_map_unmap_check(data,
+ MEM_TEST_MAP_SIZE_PAGES / 2 - 1,
+ MEM_TEST_VAL_1);
+ test_memslot_do_unmap(data, 0, MEM_TEST_MAP_SIZE_PAGES / 2);
+
+
+ /*
+ * Wait for the guest to finish writing the second half of the test
+ * area and verify the written value on the first and the last page
+ * of this area.
+ * The area will be unmapped at the beginning of the next loop
+ * iteration.
+ * Meanwhile, the guest is writing to (mapping) the first half of
+ * the test area.
+ */
+ host_perform_sync(sync);
+ test_memslot_map_unmap_check(data, MEM_TEST_MAP_SIZE_PAGES / 2,
+ MEM_TEST_VAL_2);
+ test_memslot_map_unmap_check(data, MEM_TEST_MAP_SIZE_PAGES - 1,
+ MEM_TEST_VAL_2);
+}
+
+static void test_memslot_unmap_loop_common(struct vm_data *data,
+ struct sync_area *sync,
+ uint64_t chunk)
+{
+ uint64_t ctr;
+
+ /*
+ * Wait for the guest to finish mapping page(s) in the first half
+ * of the test area, verify the written value and then perform unmap
+ * of this area.
+ * Meanwhile, the guest is writing to (mapping) page(s) in the second
+ * half of the test area.
+ */
+ host_perform_sync(sync);
+ test_memslot_map_unmap_check(data, 0, MEM_TEST_VAL_1);
+ for (ctr = 0; ctr < MEM_TEST_UNMAP_SIZE_PAGES / 2; ctr += chunk)
+ test_memslot_do_unmap(data, ctr, chunk);
+
+ /* Likewise, but for the opposite host / guest areas */
+ host_perform_sync(sync);
+ test_memslot_map_unmap_check(data, MEM_TEST_UNMAP_SIZE_PAGES / 2,
+ MEM_TEST_VAL_2);
+ for (ctr = MEM_TEST_UNMAP_SIZE_PAGES / 2;
+ ctr < MEM_TEST_UNMAP_SIZE_PAGES; ctr += chunk)
+ test_memslot_do_unmap(data, ctr, chunk);
+}
+
+static void test_memslot_unmap_loop(struct vm_data *data,
+ struct sync_area *sync)
+{
+ test_memslot_unmap_loop_common(data, sync, 1);
+}
+
+static void test_memslot_unmap_loop_chunked(struct vm_data *data,
+ struct sync_area *sync)
+{
+ test_memslot_unmap_loop_common(data, sync, MEM_TEST_UNMAP_CHUNK_PAGES);
+}
+
+static void test_memslot_rw_loop(struct vm_data *data, struct sync_area *sync)
+{
+ uint64_t gptr;
+
+ for (gptr = MEM_TEST_GPA + 4096 / 2;
+ gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += 4096)
+ *(uint64_t *)vm_gpa2hva(data, gptr, NULL) = MEM_TEST_VAL_2;
+
+ host_perform_sync(sync);
+
+ for (gptr = MEM_TEST_GPA;
+ gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += 4096) {
+ uint64_t *vptr = (typeof(vptr))vm_gpa2hva(data, gptr, NULL);
+ uint64_t val = *vptr;
+
+ TEST_ASSERT(val == MEM_TEST_VAL_1,
+ "Guest written values should read back correctly (is %"PRIu64" @ %"PRIx64")",
+ val, gptr);
+ *vptr = 0;
+ }
+
+ host_perform_sync(sync);
+}
+
+struct test_data {
+ const char *name;
+ uint64_t mem_size;
+ void (*guest_code)(void);
+ bool (*prepare)(struct vm_data *data, struct sync_area *sync,
+ uint64_t *maxslots);
+ void (*loop)(struct vm_data *data, struct sync_area *sync);
+};
+
+static bool test_execute(int nslots, uint64_t *maxslots,
+ unsigned int maxtime,
+ const struct test_data *tdata,
+ uint64_t *nloops,
+ struct timespec *slot_runtime,
+ struct timespec *guest_runtime)
+{
+ uint64_t mem_size = tdata->mem_size ? : MEM_SIZE_PAGES;
+ struct vm_data *data;
+ struct sync_area *sync;
+ struct timespec tstart;
+ bool ret = true;
+
+ data = alloc_vm();
+ if (!prepare_vm(data, nslots, maxslots, tdata->guest_code,
+ mem_size, slot_runtime)) {
+ ret = false;
+ goto exit_free;
+ }
+
+ sync = (typeof(sync))vm_gpa2hva(data, MEM_SYNC_GPA, NULL);
+
+ if (tdata->prepare &&
+ !tdata->prepare(data, sync, maxslots)) {
+ ret = false;
+ goto exit_free;
+ }
+
+ launch_vm(data);
+
+ clock_gettime(CLOCK_MONOTONIC, &tstart);
+ let_guest_run(sync);
+
+ while (1) {
+ *guest_runtime = timespec_elapsed(tstart);
+ if (guest_runtime->tv_sec >= maxtime)
+ break;
+
+ tdata->loop(data, sync);
+
+ (*nloops)++;
+ }
+
+ make_guest_exit(sync);
+ wait_guest_exit(data);
+
+exit_free:
+ free_vm(data);
+
+ return ret;
+}
+
+static const struct test_data tests[] = {
+ {
+ .name = "map",
+ .mem_size = MEM_SIZE_MAP_PAGES,
+ .guest_code = guest_code_test_memslot_map,
+ .loop = test_memslot_map_loop,
+ },
+ {
+ .name = "unmap",
+ .mem_size = MEM_TEST_UNMAP_SIZE_PAGES + 1,
+ .guest_code = guest_code_test_memslot_unmap,
+ .loop = test_memslot_unmap_loop,
+ },
+ {
+ .name = "unmap chunked",
+ .mem_size = MEM_TEST_UNMAP_SIZE_PAGES + 1,
+ .guest_code = guest_code_test_memslot_unmap,
+ .loop = test_memslot_unmap_loop_chunked,
+ },
+ {
+ .name = "move active area",
+ .guest_code = guest_code_test_memslot_move,
+ .prepare = test_memslot_move_prepare_active,
+ .loop = test_memslot_move_loop,
+ },
+ {
+ .name = "move inactive area",
+ .guest_code = guest_code_test_memslot_move,
+ .prepare = test_memslot_move_prepare_inactive,
+ .loop = test_memslot_move_loop,
+ },
+ {
+ .name = "RW",
+ .guest_code = guest_code_test_memslot_rw,
+ .loop = test_memslot_rw_loop
+ },
+};
+
+#define NTESTS ARRAY_SIZE(tests)
+
+struct test_args {
+ int tfirst;
+ int tlast;
+ int nslots;
+ int seconds;
+ int runs;
+};
+
+static void help(char *name, struct test_args *targs)
+{
+ int ctr;
+
+ pr_info("usage: %s [-h] [-v] [-d] [-s slots] [-f first_test] [-e last_test] [-l test_length] [-r run_count]\n",
+ name);
+ pr_info(" -h: print this help screen.\n");
+ pr_info(" -v: enable verbose mode (not for benchmarking).\n");
+ pr_info(" -d: enable extra debug checks.\n");
+ pr_info(" -s: specify memslot count cap (-1 means no cap; currently: %i)\n",
+ targs->nslots);
+ pr_info(" -f: specify the first test to run (currently: %i; max %zu)\n",
+ targs->tfirst, NTESTS - 1);
+ pr_info(" -e: specify the last test to run (currently: %i; max %zu)\n",
+ targs->tlast, NTESTS - 1);
+ pr_info(" -l: specify the test length in seconds (currently: %i)\n",
+ targs->seconds);
+ pr_info(" -r: specify the number of runs per test (currently: %i)\n",
+ targs->runs);
+
+ pr_info("\nAvailable tests:\n");
+ for (ctr = 0; ctr < NTESTS; ctr++)
+ pr_info("%d: %s\n", ctr, tests[ctr].name);
+}
+
+static bool parse_args(int argc, char *argv[],
+ struct test_args *targs)
+{
+ int opt;
+
+ while ((opt = getopt(argc, argv, "hvds:f:e:l:r:")) != -1) {
+ switch (opt) {
+ case 'h':
+ default:
+ help(argv[0], targs);
+ return false;
+ case 'v':
+ verbose = true;
+ break;
+ case 'd':
+ map_unmap_verify = true;
+ break;
+ case 's':
+ targs->nslots = atoi(optarg);
+ if (targs->nslots <= 0 && targs->nslots != -1) {
+ pr_info("Slot count cap has to be positive or -1 for no cap\n");
+ return false;
+ }
+ break;
+ case 'f':
+ targs->tfirst = atoi(optarg);
+ if (targs->tfirst < 0) {
+ pr_info("First test to run has to be non-negative\n");
+ return false;
+ }
+ break;
+ case 'e':
+ targs->tlast = atoi(optarg);
+ if (targs->tlast < 0 || targs->tlast >= NTESTS) {
+ pr_info("Last test to run has to be non-negative and less than %zu\n",
+ NTESTS);
+ return false;
+ }
+ break;
+ case 'l':
+ targs->seconds = atoi(optarg);
+ if (targs->seconds < 0) {
+ pr_info("Test length in seconds has to be non-negative\n");
+ return false;
+ }
+ break;
+ case 'r':
+ targs->runs = atoi(optarg);
+ if (targs->runs <= 0) {
+ pr_info("Runs per test has to be positive\n");
+ return false;
+ }
+ break;
+ }
+ }
+
+ if (optind < argc) {
+ help(argv[0], targs);
+ return false;
+ }
+
+ if (targs->tfirst > targs->tlast) {
+ pr_info("First test to run cannot be greater than the last test to run\n");
+ return false;
+ }
+
+ return true;
+}
+
+struct test_result {
+ struct timespec slot_runtime, guest_runtime, iter_runtime;
+ int64_t slottimens, runtimens;
+ uint64_t nloops;
+};
+
+static bool test_loop(const struct test_data *data,
+ const struct test_args *targs,
+ struct test_result *rbestslottime,
+ struct test_result *rbestruntime)
+{
+ uint64_t maxslots;
+ struct test_result result;
+
+ result.nloops = 0;
+ if (!test_execute(targs->nslots, &maxslots, targs->seconds, data,
+ &result.nloops,
+ &result.slot_runtime, &result.guest_runtime)) {
+ if (maxslots)
+ pr_info("Memslot count too high for this test, decrease the cap (max is %"PRIu64")\n",
+ maxslots);
+ else
+ pr_info("Memslot count may be too high for this test, try adjusting the cap\n");
+
+ return false;
+ }
+
+ pr_info("Test took %ld.%.9lds for slot setup + %ld.%.9lds all iterations\n",
+ result.slot_runtime.tv_sec, result.slot_runtime.tv_nsec,
+ result.guest_runtime.tv_sec, result.guest_runtime.tv_nsec);
+ if (!result.nloops) {
+ pr_info("No full loops done - too short test time or system too loaded?\n");
+ return true;
+ }
+
+ result.iter_runtime = timespec_div(result.guest_runtime,
+ result.nloops);
+ pr_info("Done %"PRIu64" iterations, avg %ld.%.9lds each\n",
+ result.nloops,
+ result.iter_runtime.tv_sec,
+ result.iter_runtime.tv_nsec);
+ result.slottimens = timespec_to_ns(result.slot_runtime);
+ result.runtimens = timespec_to_ns(result.iter_runtime);
+
+ /*
+ * Only rank the slot setup time for tests using the whole test memory
+ * area so they are comparable
+ */
+ if (!data->mem_size &&
+ (!rbestslottime->slottimens ||
+ result.slottimens < rbestslottime->slottimens))
+ *rbestslottime = result;
+ if (!rbestruntime->runtimens ||
+ result.runtimens < rbestruntime->runtimens)
+ *rbestruntime = result;
+
+ return true;
+}
+
+int main(int argc, char *argv[])
+{
+ struct test_args targs = {
+ .tfirst = 0,
+ .tlast = NTESTS - 1,
+ .nslots = -1,
+ .seconds = 5,
+ .runs = 1,
+ };
+ struct test_result rbestslottime;
+ int tctr;
+
+ /* Tell stdout not to buffer its content */
+ setbuf(stdout, NULL);
+
+ if (!parse_args(argc, argv, &targs))
+ return -1;
+
+ rbestslottime.slottimens = 0;
+ for (tctr = targs.tfirst; tctr <= targs.tlast; tctr++) {
+ const struct test_data *data = &tests[tctr];
+ unsigned int runctr;
+ struct test_result rbestruntime;
+
+ if (tctr > targs.tfirst)
+ pr_info("\n");
+
+ pr_info("Testing %s performance with %i runs, %d seconds each\n",
+ data->name, targs.runs, targs.seconds);
+
+ rbestruntime.runtimens = 0;
+ for (runctr = 0; runctr < targs.runs; runctr++)
+ if (!test_loop(data, &targs,
+ &rbestslottime, &rbestruntime))
+ break;
+
+ if (rbestruntime.runtimens)
+ pr_info("Best runtime result was %ld.%.9lds per iteration (with %"PRIu64" iterations)\n",
+ rbestruntime.iter_runtime.tv_sec,
+ rbestruntime.iter_runtime.tv_nsec,
+ rbestruntime.nloops);
+ }
+
+ if (rbestslottime.slottimens)
+ pr_info("Best slot setup time for the whole test area was %ld.%.9lds\n",
+ rbestslottime.slot_runtime.tv_sec,
+ rbestslottime.slot_runtime.tv_nsec);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c
index 978f5b5f4dc0..85b18bb8f762 100644
--- a/tools/testing/selftests/kvm/set_memory_region_test.c
+++ b/tools/testing/selftests/kvm/set_memory_region_test.c
@@ -132,7 +132,7 @@ static struct kvm_vm *spawn_vm(pthread_t *vcpu_thread, void *guest_code)
gpa = vm_phy_pages_alloc(vm, 2, MEM_REGION_GPA, MEM_REGION_SLOT);
TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc\n");
- virt_map(vm, MEM_REGION_GPA, MEM_REGION_GPA, 2, 0);
+ virt_map(vm, MEM_REGION_GPA, MEM_REGION_GPA, 2);
/* Ditto for the host mapping so that both pages can be zeroed. */
hva = addr_gpa2hva(vm, MEM_REGION_GPA);
@@ -376,7 +376,7 @@ static void test_add_max_memory_regions(void)
pr_info("Adding slots 0..%i, each memory region with %dK size\n",
(max_mem_slots - 1), MEM_REGION_SIZE >> 10);
- mem = mmap(NULL, MEM_REGION_SIZE * max_mem_slots + alignment,
+ mem = mmap(NULL, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment,
PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
TEST_ASSERT(mem != MAP_FAILED, "Failed to mmap() host");
mem_aligned = (void *)(((size_t) mem + alignment - 1) & ~(alignment - 1));
@@ -401,7 +401,7 @@ static void test_add_max_memory_regions(void)
TEST_ASSERT(ret == -1 && errno == EINVAL,
"Adding one more memory slot should fail with EINVAL");
- munmap(mem, MEM_REGION_SIZE * max_mem_slots + alignment);
+ munmap(mem, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment);
munmap(mem_extra, MEM_REGION_SIZE);
kvm_vm_free(vm);
}
diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c
index fcc840088c91..b0031f2d38fd 100644
--- a/tools/testing/selftests/kvm/steal_time.c
+++ b/tools/testing/selftests/kvm/steal_time.c
@@ -73,8 +73,6 @@ static void steal_time_init(struct kvm_vm *vm)
for (i = 0; i < NR_VCPUS; ++i) {
int ret;
- vcpu_set_cpuid(vm, i, kvm_get_supported_cpuid());
-
/* ST_GPA_BASE is identity mapped */
st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE);
sync_global_to_guest(vm, st_gva[i]);
@@ -295,7 +293,7 @@ int main(int ac, char **av)
vm = vm_create_default(0, 0, guest_code);
gpages = vm_calc_num_guest_pages(VM_MODE_DEFAULT, STEAL_TIME_SIZE * NR_VCPUS);
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, ST_GPA_BASE, 1, gpages, 0);
- virt_map(vm, ST_GPA_BASE, ST_GPA_BASE, gpages, 0);
+ virt_map(vm, ST_GPA_BASE, ST_GPA_BASE, gpages);
ucall_init(vm, NULL);
/* Add the rest of the VCPUs */
diff --git a/tools/testing/selftests/kvm/x86_64/emulator_error_test.c b/tools/testing/selftests/kvm/x86_64/emulator_error_test.c
new file mode 100644
index 000000000000..f070ff0224fa
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/emulator_error_test.c
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020, Google LLC.
+ *
+ * Tests for KVM_CAP_EXIT_ON_EMULATION_FAILURE capability.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "vmx.h"
+
+#define VCPU_ID 1
+#define PAGE_SIZE 4096
+#define MAXPHYADDR 36
+
+#define MEM_REGION_GVA 0x0000123456789000
+#define MEM_REGION_GPA 0x0000000700000000
+#define MEM_REGION_SLOT 10
+#define MEM_REGION_SIZE PAGE_SIZE
+
+static void guest_code(void)
+{
+ __asm__ __volatile__("flds (%[addr])"
+ :: [addr]"r"(MEM_REGION_GVA));
+
+ GUEST_DONE();
+}
+
+static void run_guest(struct kvm_vm *vm)
+{
+ int rc;
+
+ rc = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(rc == 0, "vcpu_run failed: %d\n", rc);
+}
+
+/*
+ * Accessors to get R/M, REG, and Mod bits described in the SDM vol 2,
+ * figure 2-2 "Table Interpretation of ModR/M Byte (C8H)".
+ */
+#define GET_RM(insn_byte) (insn_byte & 0x7)
+#define GET_REG(insn_byte) ((insn_byte & 0x38) >> 3)
+#define GET_MOD(insn_byte) ((insn_byte & 0xc) >> 6)
+
+/* Ensure we are dealing with a simple 2-byte flds instruction. */
+static bool is_flds(uint8_t *insn_bytes, uint8_t insn_size)
+{
+ return insn_size >= 2 &&
+ insn_bytes[0] == 0xd9 &&
+ GET_REG(insn_bytes[1]) == 0x0 &&
+ GET_MOD(insn_bytes[1]) == 0x0 &&
+ /* Ensure there is no SIB byte. */
+ GET_RM(insn_bytes[1]) != 0x4 &&
+ /* Ensure there is no displacement byte. */
+ GET_RM(insn_bytes[1]) != 0x5;
+}
+
+static void process_exit_on_emulation_error(struct kvm_vm *vm)
+{
+ struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ struct kvm_regs regs;
+ uint8_t *insn_bytes;
+ uint8_t insn_size;
+ uint64_t flags;
+
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_INTERNAL_ERROR,
+ "Unexpected exit reason: %u (%s)",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION,
+ "Unexpected suberror: %u",
+ run->emulation_failure.suberror);
+
+ if (run->emulation_failure.ndata >= 1) {
+ flags = run->emulation_failure.flags;
+ if ((flags & KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES) &&
+ run->emulation_failure.ndata >= 3) {
+ insn_size = run->emulation_failure.insn_size;
+ insn_bytes = run->emulation_failure.insn_bytes;
+
+ TEST_ASSERT(insn_size <= 15 && insn_size > 0,
+ "Unexpected instruction size: %u",
+ insn_size);
+
+ TEST_ASSERT(is_flds(insn_bytes, insn_size),
+ "Unexpected instruction. Expected 'flds' (0xd9 /0)");
+
+ /*
+ * If is_flds() succeeded then the instruction bytes
+ * contained an flds instruction that is 2-bytes in
+ * length (ie: no prefix, no SIB, no displacement).
+ */
+ vcpu_regs_get(vm, VCPU_ID, &regs);
+ regs.rip += 2;
+ vcpu_regs_set(vm, VCPU_ID, &regs);
+ }
+ }
+}
+
+static void do_guest_assert(struct kvm_vm *vm, struct ucall *uc)
+{
+ TEST_FAIL("%s at %s:%ld", (const char *)uc->args[0], __FILE__,
+ uc->args[1]);
+}
+
+static void check_for_guest_assert(struct kvm_vm *vm)
+{
+ struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ struct ucall uc;
+
+ if (run->exit_reason == KVM_EXIT_IO &&
+ get_ucall(vm, VCPU_ID, &uc) == UCALL_ABORT) {
+ do_guest_assert(vm, &uc);
+ }
+}
+
+static void process_ucall_done(struct kvm_vm *vm)
+{
+ struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ struct ucall uc;
+
+ check_for_guest_assert(vm);
+
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Unexpected exit reason: %u (%s)",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ TEST_ASSERT(get_ucall(vm, VCPU_ID, &uc) == UCALL_DONE,
+ "Unexpected ucall command: %lu, expected UCALL_DONE (%d)",
+ uc.cmd, UCALL_DONE);
+}
+
+static uint64_t process_ucall(struct kvm_vm *vm)
+{
+ struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ struct ucall uc;
+
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Unexpected exit reason: %u (%s)",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vm, VCPU_ID, &uc)) {
+ case UCALL_SYNC:
+ break;
+ case UCALL_ABORT:
+ do_guest_assert(vm, &uc);
+ break;
+ case UCALL_DONE:
+ process_ucall_done(vm);
+ break;
+ default:
+ TEST_ASSERT(false, "Unexpected ucall");
+ }
+
+ return uc.cmd;
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_enable_cap emul_failure_cap = {
+ .cap = KVM_CAP_EXIT_ON_EMULATION_FAILURE,
+ .args[0] = 1,
+ };
+ struct kvm_cpuid_entry2 *entry;
+ struct kvm_cpuid2 *cpuid;
+ struct kvm_vm *vm;
+ uint64_t gpa, pte;
+ uint64_t *hva;
+ int rc;
+
+ /* Tell stdout not to buffer its content */
+ setbuf(stdout, NULL);
+
+ vm = vm_create_default(VCPU_ID, 0, guest_code);
+
+ if (!kvm_check_cap(KVM_CAP_SMALLER_MAXPHYADDR)) {
+ printf("module parameter 'allow_smaller_maxphyaddr' is not set. Skipping test.\n");
+ return 0;
+ }
+
+ cpuid = kvm_get_supported_cpuid();
+
+ entry = kvm_get_supported_cpuid_index(0x80000008, 0);
+ entry->eax = (entry->eax & 0xffffff00) | MAXPHYADDR;
+ set_cpuid(cpuid, entry);
+
+ vcpu_set_cpuid(vm, VCPU_ID, cpuid);
+
+ rc = kvm_check_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE);
+ TEST_ASSERT(rc, "KVM_CAP_EXIT_ON_EMULATION_FAILURE is unavailable");
+ vm_enable_cap(vm, &emul_failure_cap);
+
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ MEM_REGION_GPA, MEM_REGION_SLOT,
+ MEM_REGION_SIZE / PAGE_SIZE, 0);
+ gpa = vm_phy_pages_alloc(vm, MEM_REGION_SIZE / PAGE_SIZE,
+ MEM_REGION_GPA, MEM_REGION_SLOT);
+ TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc\n");
+ virt_map(vm, MEM_REGION_GVA, MEM_REGION_GPA, 1);
+ hva = addr_gpa2hva(vm, MEM_REGION_GPA);
+ memset(hva, 0, PAGE_SIZE);
+ pte = vm_get_page_table_entry(vm, VCPU_ID, MEM_REGION_GVA);
+ vm_set_page_table_entry(vm, VCPU_ID, MEM_REGION_GVA, pte | (1ull << 36));
+
+ run_guest(vm);
+ process_exit_on_emulation_error(vm);
+ run_guest(vm);
+
+ TEST_ASSERT(process_ucall(vm) == UCALL_DONE, "Expected UCALL_DONE");
+
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
index 63096cea26c6..2b46dcca86a8 100644
--- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
@@ -22,15 +22,6 @@
static int ud_count;
-void enable_x2apic(void)
-{
- uint32_t spiv_reg = APIC_BASE_MSR + (APIC_SPIV >> 4);
-
- wrmsr(MSR_IA32_APICBASE, rdmsr(MSR_IA32_APICBASE) |
- MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD);
- wrmsr(spiv_reg, rdmsr(spiv_reg) | APIC_SPIV_APIC_ENABLED);
-}
-
static void guest_ud_handler(struct ex_regs *regs)
{
ud_count++;
@@ -59,7 +50,7 @@ void guest_code(struct vmx_pages *vmx_pages)
#define L2_GUEST_STACK_SIZE 64
unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
- enable_x2apic();
+ x2apic_enable();
GUEST_SYNC(1);
GUEST_SYNC(2);
@@ -121,14 +112,38 @@ void inject_nmi(struct kvm_vm *vm)
vcpu_events_set(vm, VCPU_ID, &events);
}
+static void save_restore_vm(struct kvm_vm *vm)
+{
+ struct kvm_regs regs1, regs2;
+ struct kvm_x86_state *state;
+
+ state = vcpu_save_state(vm, VCPU_ID);
+ memset(&regs1, 0, sizeof(regs1));
+ vcpu_regs_get(vm, VCPU_ID, &regs1);
+
+ kvm_vm_release(vm);
+
+ /* Restore state in a new VM. */
+ kvm_vm_restart(vm, O_RDWR);
+ vm_vcpu_add(vm, VCPU_ID);
+ vcpu_set_hv_cpuid(vm, VCPU_ID);
+ vcpu_enable_evmcs(vm, VCPU_ID);
+ vcpu_load_state(vm, VCPU_ID, state);
+ free(state);
+
+ memset(&regs2, 0, sizeof(regs2));
+ vcpu_regs_get(vm, VCPU_ID, &regs2);
+ TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
+ "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
+ (ulong) regs2.rdi, (ulong) regs2.rsi);
+}
+
int main(int argc, char *argv[])
{
vm_vaddr_t vmx_pages_gva = 0;
- struct kvm_regs regs1, regs2;
struct kvm_vm *vm;
struct kvm_run *run;
- struct kvm_x86_state *state;
struct ucall uc;
int stage;
@@ -145,21 +160,18 @@ int main(int argc, char *argv[])
vcpu_set_hv_cpuid(vm, VCPU_ID);
vcpu_enable_evmcs(vm, VCPU_ID);
- run = vcpu_state(vm, VCPU_ID);
-
- vcpu_regs_get(vm, VCPU_ID, &regs1);
-
vcpu_alloc_vmx(vm, &vmx_pages_gva);
vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
vm_init_descriptor_tables(vm);
vcpu_init_descriptor_tables(vm, VCPU_ID);
- vm_handle_exception(vm, UD_VECTOR, guest_ud_handler);
- vm_handle_exception(vm, NMI_VECTOR, guest_nmi_handler);
+ vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
+ vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler);
pr_info("Running L1 which uses EVMCS to run L2\n");
for (stage = 1;; stage++) {
+ run = vcpu_state(vm, VCPU_ID);
_vcpu_run(vm, VCPU_ID);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
"Stage %d: unexpected exit reason: %u (%s),\n",
@@ -184,32 +196,23 @@ int main(int argc, char *argv[])
uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
stage, (ulong)uc.args[1]);
- state = vcpu_save_state(vm, VCPU_ID);
- memset(&regs1, 0, sizeof(regs1));
- vcpu_regs_get(vm, VCPU_ID, &regs1);
-
- kvm_vm_release(vm);
-
- /* Restore state in a new VM. */
- kvm_vm_restart(vm, O_RDWR);
- vm_vcpu_add(vm, VCPU_ID);
- vcpu_set_hv_cpuid(vm, VCPU_ID);
- vcpu_enable_evmcs(vm, VCPU_ID);
- vcpu_load_state(vm, VCPU_ID, state);
- run = vcpu_state(vm, VCPU_ID);
- free(state);
-
- memset(&regs2, 0, sizeof(regs2));
- vcpu_regs_get(vm, VCPU_ID, &regs2);
- TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
- "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
- (ulong) regs2.rdi, (ulong) regs2.rsi);
+ save_restore_vm(vm);
/* Force immediate L2->L1 exit before resuming */
if (stage == 8) {
pr_info("Injecting NMI into L1 before L2 had a chance to run after restore\n");
inject_nmi(vm);
}
+
+ /*
+ * Do KVM_GET_NESTED_STATE/KVM_SET_NESTED_STATE for a freshly
+ * restored VM (before the first KVM_RUN) to check that
+ * KVM_STATE_NESTED_EVMCS is not lost.
+ */
+ if (stage == 9) {
+ pr_info("Trying extra KVM_GET_NESTED_STATE/KVM_SET_NESTED_STATE cycle\n");
+ save_restore_vm(vm);
+ }
}
done:
diff --git a/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c b/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c
index 9b78e8889638..a711f83749ea 100644
--- a/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c
+++ b/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c
@@ -19,7 +19,12 @@ struct {
u32 function;
u32 index;
} mangled_cpuids[] = {
+ /*
+ * These entries depend on the vCPU's XCR0 register and IA32_XSS MSR,
+ * which are not controlled for by this test.
+ */
{.function = 0xd, .index = 0},
+ {.function = 0xd, .index = 1},
};
static void test_guest_cpuids(struct kvm_cpuid2 *guest_cpuid)
@@ -140,8 +145,7 @@ static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid, int stage)
struct kvm_cpuid2 *vcpu_alloc_cpuid(struct kvm_vm *vm, vm_vaddr_t *p_gva, struct kvm_cpuid2 *cpuid)
{
int size = sizeof(*cpuid) + cpuid->nent * sizeof(cpuid->entries[0]);
- vm_vaddr_t gva = vm_vaddr_alloc(vm, size,
- getpagesize(), 0, 0);
+ vm_vaddr_t gva = vm_vaddr_alloc(vm, size, KVM_UTIL_MIN_VADDR);
struct kvm_cpuid2 *guest_cpuids = addr_gva2hva(vm, gva);
memcpy(guest_cpuids, cpuid, size);
diff --git a/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c b/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c
index cb953df4d7d0..8aed0db1331d 100644
--- a/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c
+++ b/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c
@@ -37,9 +37,7 @@ static void test_get_msr_index(void)
int old_res, res, kvm_fd, r;
struct kvm_msr_list *list;
- kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
- if (kvm_fd < 0)
- exit(KSFT_SKIP);
+ kvm_fd = open_kvm_dev_path_or_exit();
old_res = kvm_num_index_msrs(kvm_fd, 0);
TEST_ASSERT(old_res != 0, "Expecting nmsrs to be > 0");
@@ -101,9 +99,7 @@ static void test_get_msr_feature(void)
int res, old_res, i, kvm_fd;
struct kvm_msr_list *feature_list;
- kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
- if (kvm_fd < 0)
- exit(KSFT_SKIP);
+ kvm_fd = open_kvm_dev_path_or_exit();
old_res = kvm_num_feature_msrs(kvm_fd, 0);
TEST_ASSERT(old_res != 0, "Expecting nmsrs to be > 0");
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
index 7f1d2765572c..bab10ae787b6 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
@@ -7,6 +7,7 @@
#include "test_util.h"
#include "kvm_util.h"
#include "processor.h"
+#include "hyperv.h"
struct ms_hyperv_tsc_page {
volatile u32 tsc_sequence;
@@ -15,13 +16,6 @@ struct ms_hyperv_tsc_page {
volatile s64 tsc_offset;
} __packed;
-#define HV_X64_MSR_GUEST_OS_ID 0x40000000
-#define HV_X64_MSR_TIME_REF_COUNT 0x40000020
-#define HV_X64_MSR_REFERENCE_TSC 0x40000021
-#define HV_X64_MSR_TSC_FREQUENCY 0x40000022
-#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106
-#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107
-
/* Simplified mul_u64_u64_shr() */
static inline u64 mul_u64_u64_shr64(u64 a, u64 b)
{
@@ -220,7 +214,7 @@ int main(void)
vcpu_set_hv_cpuid(vm, VCPU_ID);
- tsc_page_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ tsc_page_gva = vm_vaddr_alloc_page(vm);
memset(addr_gpa2hva(vm, tsc_page_gva), 0x0, getpagesize());
TEST_ASSERT((addr_gva2gpa(vm, tsc_page_gva) & (getpagesize() - 1)) == 0,
"TSC page has to be page aligned\n");
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
new file mode 100644
index 000000000000..42bd658f52a8
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
@@ -0,0 +1,649 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021, Red Hat, Inc.
+ *
+ * Tests for Hyper-V features enablement
+ */
+#include <asm/kvm_para.h>
+#include <linux/kvm_para.h>
+#include <stdint.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "hyperv.h"
+
+#define VCPU_ID 0
+#define LINUX_OS_ID ((u64)0x8100 << 48)
+
+extern unsigned char rdmsr_start;
+extern unsigned char rdmsr_end;
+
+static u64 do_rdmsr(u32 idx)
+{
+ u32 lo, hi;
+
+ asm volatile("rdmsr_start: rdmsr;"
+ "rdmsr_end:"
+ : "=a"(lo), "=c"(hi)
+ : "c"(idx));
+
+ return (((u64) hi) << 32) | lo;
+}
+
+extern unsigned char wrmsr_start;
+extern unsigned char wrmsr_end;
+
+static void do_wrmsr(u32 idx, u64 val)
+{
+ u32 lo, hi;
+
+ lo = val;
+ hi = val >> 32;
+
+ asm volatile("wrmsr_start: wrmsr;"
+ "wrmsr_end:"
+ : : "a"(lo), "c"(idx), "d"(hi));
+}
+
+static int nr_gp;
+
+static inline u64 hypercall(u64 control, vm_vaddr_t input_address,
+ vm_vaddr_t output_address)
+{
+ u64 hv_status;
+
+ asm volatile("mov %3, %%r8\n"
+ "vmcall"
+ : "=a" (hv_status),
+ "+c" (control), "+d" (input_address)
+ : "r" (output_address)
+ : "cc", "memory", "r8", "r9", "r10", "r11");
+
+ return hv_status;
+}
+
+static void guest_gp_handler(struct ex_regs *regs)
+{
+ unsigned char *rip = (unsigned char *)regs->rip;
+ bool r, w;
+
+ r = rip == &rdmsr_start;
+ w = rip == &wrmsr_start;
+ GUEST_ASSERT(r || w);
+
+ nr_gp++;
+
+ if (r)
+ regs->rip = (uint64_t)&rdmsr_end;
+ else
+ regs->rip = (uint64_t)&wrmsr_end;
+}
+
+struct msr_data {
+ uint32_t idx;
+ bool available;
+ bool write;
+ u64 write_val;
+};
+
+struct hcall_data {
+ uint64_t control;
+ uint64_t expect;
+};
+
+static void guest_msr(struct msr_data *msr)
+{
+ int i = 0;
+
+ while (msr->idx) {
+ WRITE_ONCE(nr_gp, 0);
+ if (!msr->write)
+ do_rdmsr(msr->idx);
+ else
+ do_wrmsr(msr->idx, msr->write_val);
+
+ if (msr->available)
+ GUEST_ASSERT(READ_ONCE(nr_gp) == 0);
+ else
+ GUEST_ASSERT(READ_ONCE(nr_gp) == 1);
+
+ GUEST_SYNC(i++);
+ }
+
+ GUEST_DONE();
+}
+
+static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall)
+{
+ int i = 0;
+
+ wrmsr(HV_X64_MSR_GUEST_OS_ID, LINUX_OS_ID);
+ wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa);
+
+ while (hcall->control) {
+ GUEST_ASSERT(hypercall(hcall->control, pgs_gpa,
+ pgs_gpa + 4096) == hcall->expect);
+ GUEST_SYNC(i++);
+ }
+
+ GUEST_DONE();
+}
+
+static void hv_set_cpuid(struct kvm_vm *vm, struct kvm_cpuid2 *cpuid,
+ struct kvm_cpuid_entry2 *feat,
+ struct kvm_cpuid_entry2 *recomm,
+ struct kvm_cpuid_entry2 *dbg)
+{
+ TEST_ASSERT(set_cpuid(cpuid, feat),
+ "failed to set KVM_CPUID_FEATURES leaf");
+ TEST_ASSERT(set_cpuid(cpuid, recomm),
+ "failed to set HYPERV_CPUID_ENLIGHTMENT_INFO leaf");
+ TEST_ASSERT(set_cpuid(cpuid, dbg),
+ "failed to set HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES leaf");
+ vcpu_set_cpuid(vm, VCPU_ID, cpuid);
+}
+
+static void guest_test_msrs_access(struct kvm_vm *vm, struct msr_data *msr,
+ struct kvm_cpuid2 *best)
+{
+ struct kvm_run *run;
+ struct ucall uc;
+ int stage = 0, r;
+ struct kvm_cpuid_entry2 feat = {
+ .function = HYPERV_CPUID_FEATURES
+ };
+ struct kvm_cpuid_entry2 recomm = {
+ .function = HYPERV_CPUID_ENLIGHTMENT_INFO
+ };
+ struct kvm_cpuid_entry2 dbg = {
+ .function = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES
+ };
+ struct kvm_enable_cap cap = {0};
+
+ run = vcpu_state(vm, VCPU_ID);
+
+ while (true) {
+ switch (stage) {
+ case 0:
+ /*
+ * Only available when Hyper-V identification is set
+ */
+ msr->idx = HV_X64_MSR_GUEST_OS_ID;
+ msr->write = 0;
+ msr->available = 0;
+ break;
+ case 1:
+ msr->idx = HV_X64_MSR_HYPERCALL;
+ msr->write = 0;
+ msr->available = 0;
+ break;
+ case 2:
+ feat.eax |= HV_MSR_HYPERCALL_AVAILABLE;
+ /*
+ * HV_X64_MSR_GUEST_OS_ID has to be written first to make
+ * HV_X64_MSR_HYPERCALL available.
+ */
+ msr->idx = HV_X64_MSR_GUEST_OS_ID;
+ msr->write = 1;
+ msr->write_val = LINUX_OS_ID;
+ msr->available = 1;
+ break;
+ case 3:
+ msr->idx = HV_X64_MSR_GUEST_OS_ID;
+ msr->write = 0;
+ msr->available = 1;
+ break;
+ case 4:
+ msr->idx = HV_X64_MSR_HYPERCALL;
+ msr->write = 0;
+ msr->available = 1;
+ break;
+
+ case 5:
+ msr->idx = HV_X64_MSR_VP_RUNTIME;
+ msr->write = 0;
+ msr->available = 0;
+ break;
+ case 6:
+ feat.eax |= HV_MSR_VP_RUNTIME_AVAILABLE;
+ msr->write = 0;
+ msr->available = 1;
+ break;
+ case 7:
+ /* Read only */
+ msr->write = 1;
+ msr->write_val = 1;
+ msr->available = 0;
+ break;
+
+ case 8:
+ msr->idx = HV_X64_MSR_TIME_REF_COUNT;
+ msr->write = 0;
+ msr->available = 0;
+ break;
+ case 9:
+ feat.eax |= HV_MSR_TIME_REF_COUNT_AVAILABLE;
+ msr->write = 0;
+ msr->available = 1;
+ break;
+ case 10:
+ /* Read only */
+ msr->write = 1;
+ msr->write_val = 1;
+ msr->available = 0;
+ break;
+
+ case 11:
+ msr->idx = HV_X64_MSR_VP_INDEX;
+ msr->write = 0;
+ msr->available = 0;
+ break;
+ case 12:
+ feat.eax |= HV_MSR_VP_INDEX_AVAILABLE;
+ msr->write = 0;
+ msr->available = 1;
+ break;
+ case 13:
+ /* Read only */
+ msr->write = 1;
+ msr->write_val = 1;
+ msr->available = 0;
+ break;
+
+ case 14:
+ msr->idx = HV_X64_MSR_RESET;
+ msr->write = 0;
+ msr->available = 0;
+ break;
+ case 15:
+ feat.eax |= HV_MSR_RESET_AVAILABLE;
+ msr->write = 0;
+ msr->available = 1;
+ break;
+ case 16:
+ msr->write = 1;
+ msr->write_val = 0;
+ msr->available = 1;
+ break;
+
+ case 17:
+ msr->idx = HV_X64_MSR_REFERENCE_TSC;
+ msr->write = 0;
+ msr->available = 0;
+ break;
+ case 18:
+ feat.eax |= HV_MSR_REFERENCE_TSC_AVAILABLE;
+ msr->write = 0;
+ msr->available = 1;
+ break;
+ case 19:
+ msr->write = 1;
+ msr->write_val = 0;
+ msr->available = 1;
+ break;
+
+ case 20:
+ msr->idx = HV_X64_MSR_EOM;
+ msr->write = 0;
+ msr->available = 0;
+ break;
+ case 21:
+ /*
+ * Remains unavailable even with KVM_CAP_HYPERV_SYNIC2
+ * capability enabled and guest visible CPUID bit unset.
+ */
+ cap.cap = KVM_CAP_HYPERV_SYNIC2;
+ vcpu_enable_cap(vm, VCPU_ID, &cap);
+ break;
+ case 22:
+ feat.eax |= HV_MSR_SYNIC_AVAILABLE;
+ msr->write = 0;
+ msr->available = 1;
+ break;
+ case 23:
+ msr->write = 1;
+ msr->write_val = 0;
+ msr->available = 1;
+ break;
+
+ case 24:
+ msr->idx = HV_X64_MSR_STIMER0_CONFIG;
+ msr->write = 0;
+ msr->available = 0;
+ break;
+ case 25:
+ feat.eax |= HV_MSR_SYNTIMER_AVAILABLE;
+ msr->write = 0;
+ msr->available = 1;
+ break;
+ case 26:
+ msr->write = 1;
+ msr->write_val = 0;
+ msr->available = 1;
+ break;
+ case 27:
+ /* Direct mode test */
+ msr->write = 1;
+ msr->write_val = 1 << 12;
+ msr->available = 0;
+ break;
+ case 28:
+ feat.edx |= HV_STIMER_DIRECT_MODE_AVAILABLE;
+ msr->available = 1;
+ break;
+
+ case 29:
+ msr->idx = HV_X64_MSR_EOI;
+ msr->write = 0;
+ msr->available = 0;
+ break;
+ case 30:
+ feat.eax |= HV_MSR_APIC_ACCESS_AVAILABLE;
+ msr->write = 1;
+ msr->write_val = 1;
+ msr->available = 1;
+ break;
+
+ case 31:
+ msr->idx = HV_X64_MSR_TSC_FREQUENCY;
+ msr->write = 0;
+ msr->available = 0;
+ break;
+ case 32:
+ feat.eax |= HV_ACCESS_FREQUENCY_MSRS;
+ msr->write = 0;
+ msr->available = 1;
+ break;
+ case 33:
+ /* Read only */
+ msr->write = 1;
+ msr->write_val = 1;
+ msr->available = 0;
+ break;
+
+ case 34:
+ msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL;
+ msr->write = 0;
+ msr->available = 0;
+ break;
+ case 35:
+ feat.eax |= HV_ACCESS_REENLIGHTENMENT;
+ msr->write = 0;
+ msr->available = 1;
+ break;
+ case 36:
+ msr->write = 1;
+ msr->write_val = 1;
+ msr->available = 1;
+ break;
+ case 37:
+ /* Can only write '0' */
+ msr->idx = HV_X64_MSR_TSC_EMULATION_STATUS;
+ msr->write = 1;
+ msr->write_val = 1;
+ msr->available = 0;
+ break;
+
+ case 38:
+ msr->idx = HV_X64_MSR_CRASH_P0;
+ msr->write = 0;
+ msr->available = 0;
+ break;
+ case 39:
+ feat.edx |= HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE;
+ msr->write = 0;
+ msr->available = 1;
+ break;
+ case 40:
+ msr->write = 1;
+ msr->write_val = 1;
+ msr->available = 1;
+ break;
+
+ case 41:
+ msr->idx = HV_X64_MSR_SYNDBG_STATUS;
+ msr->write = 0;
+ msr->available = 0;
+ break;
+ case 42:
+ feat.edx |= HV_FEATURE_DEBUG_MSRS_AVAILABLE;
+ dbg.eax |= HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING;
+ msr->write = 0;
+ msr->available = 1;
+ break;
+ case 43:
+ msr->write = 1;
+ msr->write_val = 0;
+ msr->available = 1;
+ break;
+
+ case 44:
+ /* END */
+ msr->idx = 0;
+ break;
+ }
+
+ hv_set_cpuid(vm, best, &feat, &recomm, &dbg);
+
+ if (msr->idx)
+ pr_debug("Stage %d: testing msr: 0x%x for %s\n", stage,
+ msr->idx, msr->write ? "write" : "read");
+ else
+ pr_debug("Stage %d: finish\n", stage);
+
+ r = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(!r, "vcpu_run failed: %d\n", r);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "unexpected exit reason: %u (%s)",
+ run->exit_reason, exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vm, VCPU_ID, &uc)) {
+ case UCALL_SYNC:
+ TEST_ASSERT(uc.args[1] == stage,
+ "Unexpected stage: %ld (%d expected)\n",
+ uc.args[1], stage);
+ break;
+ case UCALL_ABORT:
+ TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
+ __FILE__, uc.args[1]);
+ return;
+ case UCALL_DONE:
+ return;
+ }
+
+ stage++;
+ }
+}
+
+static void guest_test_hcalls_access(struct kvm_vm *vm, struct hcall_data *hcall,
+ void *input, void *output, struct kvm_cpuid2 *best)
+{
+ struct kvm_run *run;
+ struct ucall uc;
+ int stage = 0, r;
+ struct kvm_cpuid_entry2 feat = {
+ .function = HYPERV_CPUID_FEATURES,
+ .eax = HV_MSR_HYPERCALL_AVAILABLE
+ };
+ struct kvm_cpuid_entry2 recomm = {
+ .function = HYPERV_CPUID_ENLIGHTMENT_INFO
+ };
+ struct kvm_cpuid_entry2 dbg = {
+ .function = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES
+ };
+
+ run = vcpu_state(vm, VCPU_ID);
+
+ while (true) {
+ switch (stage) {
+ case 0:
+ hcall->control = 0xdeadbeef;
+ hcall->expect = HV_STATUS_INVALID_HYPERCALL_CODE;
+ break;
+
+ case 1:
+ hcall->control = HVCALL_POST_MESSAGE;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 2:
+ feat.ebx |= HV_POST_MESSAGES;
+ hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT;
+ break;
+
+ case 3:
+ hcall->control = HVCALL_SIGNAL_EVENT;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 4:
+ feat.ebx |= HV_SIGNAL_EVENTS;
+ hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT;
+ break;
+
+ case 5:
+ hcall->control = HVCALL_RESET_DEBUG_SESSION;
+ hcall->expect = HV_STATUS_INVALID_HYPERCALL_CODE;
+ break;
+ case 6:
+ dbg.eax |= HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 7:
+ feat.ebx |= HV_DEBUGGING;
+ hcall->expect = HV_STATUS_OPERATION_DENIED;
+ break;
+
+ case 8:
+ hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 9:
+ recomm.eax |= HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED;
+ hcall->expect = HV_STATUS_SUCCESS;
+ break;
+ case 10:
+ hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 11:
+ recomm.eax |= HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED;
+ hcall->expect = HV_STATUS_SUCCESS;
+ break;
+
+ case 12:
+ hcall->control = HVCALL_SEND_IPI;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 13:
+ recomm.eax |= HV_X64_CLUSTER_IPI_RECOMMENDED;
+ hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT;
+ break;
+ case 14:
+ /* Nothing in 'sparse banks' -> success */
+ hcall->control = HVCALL_SEND_IPI_EX;
+ hcall->expect = HV_STATUS_SUCCESS;
+ break;
+
+ case 15:
+ hcall->control = HVCALL_NOTIFY_LONG_SPIN_WAIT;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 16:
+ recomm.ebx = 0xfff;
+ hcall->expect = HV_STATUS_SUCCESS;
+ break;
+
+ case 17:
+ /* END */
+ hcall->control = 0;
+ break;
+ }
+
+ hv_set_cpuid(vm, best, &feat, &recomm, &dbg);
+
+ if (hcall->control)
+ pr_debug("Stage %d: testing hcall: 0x%lx\n", stage,
+ hcall->control);
+ else
+ pr_debug("Stage %d: finish\n", stage);
+
+ r = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(!r, "vcpu_run failed: %d\n", r);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "unexpected exit reason: %u (%s)",
+ run->exit_reason, exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vm, VCPU_ID, &uc)) {
+ case UCALL_SYNC:
+ TEST_ASSERT(uc.args[1] == stage,
+ "Unexpected stage: %ld (%d expected)\n",
+ uc.args[1], stage);
+ break;
+ case UCALL_ABORT:
+ TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
+ __FILE__, uc.args[1]);
+ return;
+ case UCALL_DONE:
+ return;
+ }
+
+ stage++;
+ }
+}
+
+int main(void)
+{
+ struct kvm_cpuid2 *best;
+ struct kvm_vm *vm;
+ vm_vaddr_t msr_gva, hcall_page, hcall_params;
+ struct kvm_enable_cap cap = {
+ .cap = KVM_CAP_HYPERV_ENFORCE_CPUID,
+ .args = {1}
+ };
+
+ /* Test MSRs */
+ vm = vm_create_default(VCPU_ID, 0, guest_msr);
+
+ msr_gva = vm_vaddr_alloc_page(vm);
+ memset(addr_gva2hva(vm, msr_gva), 0x0, getpagesize());
+ vcpu_args_set(vm, VCPU_ID, 1, msr_gva);
+ vcpu_enable_cap(vm, VCPU_ID, &cap);
+
+ vcpu_set_hv_cpuid(vm, VCPU_ID);
+
+ best = kvm_get_supported_hv_cpuid();
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vm, VCPU_ID);
+ vm_handle_exception(vm, GP_VECTOR, guest_gp_handler);
+
+ pr_info("Testing access to Hyper-V specific MSRs\n");
+ guest_test_msrs_access(vm, addr_gva2hva(vm, msr_gva),
+ best);
+ kvm_vm_free(vm);
+
+ /* Test hypercalls */
+ vm = vm_create_default(VCPU_ID, 0, guest_hcall);
+
+ /* Hypercall input/output */
+ hcall_page = vm_vaddr_alloc_pages(vm, 2);
+ memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
+
+ hcall_params = vm_vaddr_alloc_page(vm);
+ memset(addr_gva2hva(vm, hcall_params), 0x0, getpagesize());
+
+ vcpu_args_set(vm, VCPU_ID, 2, addr_gva2gpa(vm, hcall_page), hcall_params);
+ vcpu_enable_cap(vm, VCPU_ID, &cap);
+
+ vcpu_set_hv_cpuid(vm, VCPU_ID);
+
+ best = kvm_get_supported_hv_cpuid();
+
+ pr_info("Testing access to Hyper-V hypercalls\n");
+ guest_test_hcalls_access(vm, addr_gva2hva(vm, hcall_params),
+ addr_gva2hva(vm, hcall_page),
+ addr_gva2hva(vm, hcall_page) + getpagesize(),
+ best);
+
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
index 732b244d6956..04ed975662c9 100644
--- a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
@@ -227,7 +227,7 @@ int main(void)
vm_init_descriptor_tables(vm);
vcpu_init_descriptor_tables(vm, VCPU_ID);
- vm_handle_exception(vm, GP_VECTOR, guest_gp_handler);
+ vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
enter_guest(vm);
kvm_vm_free(vm);
diff --git a/tools/testing/selftests/kvm/x86_64/mmu_role_test.c b/tools/testing/selftests/kvm/x86_64/mmu_role_test.c
new file mode 100644
index 000000000000..523371cf8e8f
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/mmu_role_test.c
@@ -0,0 +1,147 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "kvm_util.h"
+#include "processor.h"
+
+#define VCPU_ID 1
+
+#define MMIO_GPA 0x100000000ull
+
+static void guest_code(void)
+{
+ (void)READ_ONCE(*((uint64_t *)MMIO_GPA));
+ (void)READ_ONCE(*((uint64_t *)MMIO_GPA));
+
+ GUEST_ASSERT(0);
+}
+
+static void guest_pf_handler(struct ex_regs *regs)
+{
+ /* PFEC == RSVD | PRESENT (read, kernel). */
+ GUEST_ASSERT(regs->error_code == 0x9);
+ GUEST_DONE();
+}
+
+static void mmu_role_test(u32 *cpuid_reg, u32 evil_cpuid_val)
+{
+ u32 good_cpuid_val = *cpuid_reg;
+ struct kvm_run *run;
+ struct kvm_vm *vm;
+ uint64_t cmd;
+ int r;
+
+ /* Create VM */
+ vm = vm_create_default(VCPU_ID, 0, guest_code);
+ run = vcpu_state(vm, VCPU_ID);
+
+ /* Map 1gb page without a backing memlot. */
+ __virt_pg_map(vm, MMIO_GPA, MMIO_GPA, X86_PAGE_SIZE_1G);
+
+ r = _vcpu_run(vm, VCPU_ID);
+
+ /* Guest access to the 1gb page should trigger MMIO. */
+ TEST_ASSERT(r == 0, "vcpu_run failed: %d\n", r);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_MMIO,
+ "Unexpected exit reason: %u (%s), expected MMIO exit (1gb page w/o memslot)\n",
+ run->exit_reason, exit_reason_str(run->exit_reason));
+
+ TEST_ASSERT(run->mmio.len == 8, "Unexpected exit mmio size = %u", run->mmio.len);
+
+ TEST_ASSERT(run->mmio.phys_addr == MMIO_GPA,
+ "Unexpected exit mmio address = 0x%llx", run->mmio.phys_addr);
+
+ /*
+ * Effect the CPUID change for the guest and re-enter the guest. Its
+ * access should now #PF due to the PAGE_SIZE bit being reserved or
+ * the resulting GPA being invalid. Note, kvm_get_supported_cpuid()
+ * returns the struct that contains the entry being modified. Eww.
+ */
+ *cpuid_reg = evil_cpuid_val;
+ vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+
+ /*
+ * Add a dummy memslot to coerce KVM into bumping the MMIO generation.
+ * KVM does not "officially" support mucking with CPUID after KVM_RUN,
+ * and will incorrectly reuse MMIO SPTEs. Don't delete the memslot!
+ * KVM x86 zaps all shadow pages on memslot deletion.
+ */
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ MMIO_GPA << 1, 10, 1, 0);
+
+ /* Set up a #PF handler to eat the RSVD #PF and signal all done! */
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vm, VCPU_ID);
+ vm_handle_exception(vm, PF_VECTOR, guest_pf_handler);
+
+ r = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(r == 0, "vcpu_run failed: %d\n", r);
+
+ cmd = get_ucall(vm, VCPU_ID, NULL);
+ TEST_ASSERT(cmd == UCALL_DONE,
+ "Unexpected guest exit, exit_reason=%s, ucall.cmd = %lu\n",
+ exit_reason_str(run->exit_reason), cmd);
+
+ /*
+ * Restore the happy CPUID value for the next test. Yes, changes are
+ * indeed persistent across VM destruction.
+ */
+ *cpuid_reg = good_cpuid_val;
+
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_cpuid_entry2 *entry;
+ int opt;
+
+ /*
+ * All tests are opt-in because TDP doesn't play nice with reserved #PF
+ * in the GVA->GPA translation. The hardware page walker doesn't let
+ * software change GBPAGES or MAXPHYADDR, and KVM doesn't manually walk
+ * the GVA on fault for performance reasons.
+ */
+ bool do_gbpages = false;
+ bool do_maxphyaddr = false;
+
+ setbuf(stdout, NULL);
+
+ while ((opt = getopt(argc, argv, "gm")) != -1) {
+ switch (opt) {
+ case 'g':
+ do_gbpages = true;
+ break;
+ case 'm':
+ do_maxphyaddr = true;
+ break;
+ case 'h':
+ default:
+ printf("usage: %s [-g (GBPAGES)] [-m (MAXPHYADDR)]\n", argv[0]);
+ break;
+ }
+ }
+
+ if (!do_gbpages && !do_maxphyaddr) {
+ print_skip("No sub-tests selected");
+ return 0;
+ }
+
+ entry = kvm_get_supported_cpuid_entry(0x80000001);
+ if (!(entry->edx & CPUID_GBPAGES)) {
+ print_skip("1gb hugepages not supported");
+ return 0;
+ }
+
+ if (do_gbpages) {
+ pr_info("Test MMIO after toggling CPUID.GBPAGES\n\n");
+ mmu_role_test(&entry->edx, entry->edx & ~CPUID_GBPAGES);
+ }
+
+ if (do_maxphyaddr) {
+ pr_info("Test MMIO after changing CPUID.MAXPHYADDR\n\n");
+ entry = kvm_get_supported_cpuid_entry(0x80000008);
+ mmu_role_test(&entry->eax, (entry->eax & ~0xff) | 0x20);
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c
index 12c558fc8074..ae76436af0cc 100644
--- a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c
+++ b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c
@@ -14,16 +14,12 @@
#include "test_util.h"
#include "kvm_util.h"
#include "processor.h"
+#include "apic.h"
#define N_VCPU 2
#define VCPU_ID0 0
#define VCPU_ID1 1
-static uint32_t get_bsp_flag(void)
-{
- return rdmsr(MSR_IA32_APICBASE) & MSR_IA32_APICBASE_BSP;
-}
-
static void guest_bsp_vcpu(void *arg)
{
GUEST_SYNC(1);
@@ -94,7 +90,7 @@ static struct kvm_vm *create_vm(void)
pages = vm_adjust_num_guest_pages(VM_MODE_DEFAULT, pages);
vm = vm_create(VM_MODE_DEFAULT, pages, O_RDWR);
- kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+ kvm_vm_elf_load(vm, program_invocation_name);
vm_create_irqchip(vm);
return vm;
@@ -106,8 +102,6 @@ static void add_x86_vcpu(struct kvm_vm *vm, uint32_t vcpuid, bool bsp_code)
vm_vcpu_add_default(vm, vcpuid, guest_bsp_vcpu);
else
vm_vcpu_add_default(vm, vcpuid, guest_not_bsp_vcpu);
-
- vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid());
}
static void run_vm_bsp(uint32_t bsp_vcpu)
diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c
index 613c42c5a9b8..c1f831803ad2 100644
--- a/tools/testing/selftests/kvm/x86_64/smm_test.c
+++ b/tools/testing/selftests/kvm/x86_64/smm_test.c
@@ -55,8 +55,8 @@ static inline void sync_with_host(uint64_t phase)
void self_smi(void)
{
- wrmsr(APIC_BASE_MSR + (APIC_ICR >> 4),
- APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_SMI);
+ x2apic_write_reg(APIC_ICR,
+ APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_SMI);
}
void guest_code(void *arg)
diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
index d672f0a473f8..fc03a150278d 100644
--- a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
@@ -24,6 +24,10 @@
#define UCALL_PIO_PORT ((uint16_t)0x1000)
+struct ucall uc_none = {
+ .cmd = UCALL_NONE,
+};
+
/*
* ucall is embedded here to protect against compiler reshuffling registers
* before calling a function. In this test we only need to get KVM_EXIT_IO
@@ -34,7 +38,8 @@ void guest_code(void)
asm volatile("1: in %[port], %%al\n"
"add $0x1, %%rbx\n"
"jmp 1b"
- : : [port] "d" (UCALL_PIO_PORT) : "rax", "rbx");
+ : : [port] "d" (UCALL_PIO_PORT), "D" (&uc_none)
+ : "rax", "rbx");
}
static void compare_regs(struct kvm_regs *left, struct kvm_regs *right)
diff --git a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
index e357d8e222d4..5a6a662f2e59 100644
--- a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
@@ -18,15 +18,6 @@
#define rounded_rdmsr(x) ROUND(rdmsr(x))
#define rounded_host_rdmsr(x) ROUND(vcpu_get_msr(vm, 0, x))
-#define GUEST_ASSERT_EQ(a, b) do { \
- __typeof(a) _a = (a); \
- __typeof(b) _b = (b); \
- if (_a != _b) \
- ucall(UCALL_ABORT, 4, \
- "Failed guest assert: " \
- #a " == " #b, __LINE__, _a, _b); \
- } while(0)
-
static void guest_code(void)
{
u64 val = 0;
diff --git a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
index 72c0d0797522..e3e20e8848d0 100644
--- a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
+++ b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
@@ -574,7 +574,7 @@ static void test_msr_filter_allow(void) {
vm_init_descriptor_tables(vm);
vcpu_init_descriptor_tables(vm, VCPU_ID);
- vm_handle_exception(vm, GP_VECTOR, guest_gp_handler);
+ vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
/* Process guest code userspace exits. */
run_guest_then_process_rdmsr(vm, MSR_IA32_XSS);
@@ -588,12 +588,12 @@ static void test_msr_filter_allow(void) {
run_guest_then_process_wrmsr(vm, MSR_NON_EXISTENT);
run_guest_then_process_rdmsr(vm, MSR_NON_EXISTENT);
- vm_handle_exception(vm, UD_VECTOR, guest_ud_handler);
+ vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
run_guest(vm);
- vm_handle_exception(vm, UD_VECTOR, NULL);
+ vm_install_exception_handler(vm, UD_VECTOR, NULL);
if (process_ucall(vm) != UCALL_DONE) {
- vm_handle_exception(vm, GP_VECTOR, guest_fep_gp_handler);
+ vm_install_exception_handler(vm, GP_VECTOR, guest_fep_gp_handler);
/* Process emulated rdmsr and wrmsr instructions. */
run_guest_then_process_rdmsr(vm, MSR_IA32_XSS);
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c
index d14888b34adb..d438c4d3228a 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c
@@ -96,7 +96,7 @@ int main(int argc, char *argv[])
}
vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva);
- prepare_virtualize_apic_accesses(vmx, vm, 0);
+ prepare_virtualize_apic_accesses(vmx, vm);
vcpu_args_set(vm, VCPU_ID, 2, vmx_pages_gva, high_gpa);
while (!done) {
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
index 537de1068554..06a64980a5d2 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
@@ -97,7 +97,7 @@ int main(int argc, char *argv[])
* Add an identity map for GVA range [0xc0000000, 0xc0002000). This
* affects both L1 and L2. However...
*/
- virt_map(vm, GUEST_TEST_MEM, GUEST_TEST_MEM, TEST_MEM_PAGES, 0);
+ virt_map(vm, GUEST_TEST_MEM, GUEST_TEST_MEM, TEST_MEM_PAGES);
/*
* ... pages in the L2 GPA range [0xc0001000, 0xc0003000) will map to
@@ -107,9 +107,9 @@ int main(int argc, char *argv[])
* meaning after the last call to virt_map.
*/
prepare_eptp(vmx, vm, 0);
- nested_map_memslot(vmx, vm, 0, 0);
- nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096, 0);
- nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096, 0);
+ nested_map_memslot(vmx, vm, 0);
+ nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096);
+ nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096);
bmap = bitmap_alloc(TEST_MEM_PAGES);
host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM);
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c b/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c
new file mode 100644
index 000000000000..280c01fd2412
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c
@@ -0,0 +1,242 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vmx_nested_tsc_scaling_test
+ *
+ * Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ *
+ * This test case verifies that nested TSC scaling behaves as expected when
+ * both L1 and L2 are scaled using different ratios. For this test we scale
+ * L1 down and scale L2 up.
+ */
+
+#include <time.h>
+
+#include "kvm_util.h"
+#include "vmx.h"
+#include "kselftest.h"
+
+
+#define VCPU_ID 0
+
+/* L2 is scaled up (from L1's perspective) by this factor */
+#define L2_SCALE_FACTOR 4ULL
+
+#define TSC_OFFSET_L2 ((uint64_t) -33125236320908)
+#define TSC_MULTIPLIER_L2 (L2_SCALE_FACTOR << 48)
+
+#define L2_GUEST_STACK_SIZE 64
+
+enum { USLEEP, UCHECK_L1, UCHECK_L2 };
+#define GUEST_SLEEP(sec) ucall(UCALL_SYNC, 2, USLEEP, sec)
+#define GUEST_CHECK(level, freq) ucall(UCALL_SYNC, 2, level, freq)
+
+
+/*
+ * This function checks whether the "actual" TSC frequency of a guest matches
+ * its expected frequency. In order to account for delays in taking the TSC
+ * measurements, a difference of 1% between the actual and the expected value
+ * is tolerated.
+ */
+static void compare_tsc_freq(uint64_t actual, uint64_t expected)
+{
+ uint64_t tolerance, thresh_low, thresh_high;
+
+ tolerance = expected / 100;
+ thresh_low = expected - tolerance;
+ thresh_high = expected + tolerance;
+
+ TEST_ASSERT(thresh_low < actual,
+ "TSC freq is expected to be between %"PRIu64" and %"PRIu64
+ " but it actually is %"PRIu64,
+ thresh_low, thresh_high, actual);
+ TEST_ASSERT(thresh_high > actual,
+ "TSC freq is expected to be between %"PRIu64" and %"PRIu64
+ " but it actually is %"PRIu64,
+ thresh_low, thresh_high, actual);
+}
+
+static void check_tsc_freq(int level)
+{
+ uint64_t tsc_start, tsc_end, tsc_freq;
+
+ /*
+ * Reading the TSC twice with about a second's difference should give
+ * us an approximation of the TSC frequency from the guest's
+ * perspective. Now, this won't be completely accurate, but it should
+ * be good enough for the purposes of this test.
+ */
+ tsc_start = rdmsr(MSR_IA32_TSC);
+ GUEST_SLEEP(1);
+ tsc_end = rdmsr(MSR_IA32_TSC);
+
+ tsc_freq = tsc_end - tsc_start;
+
+ GUEST_CHECK(level, tsc_freq);
+}
+
+static void l2_guest_code(void)
+{
+ check_tsc_freq(UCHECK_L2);
+
+ /* exit to L1 */
+ __asm__ __volatile__("vmcall");
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ uint32_t control;
+
+ /* check that L1's frequency looks alright before launching L2 */
+ check_tsc_freq(UCHECK_L1);
+
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+
+ /* prepare the VMCS for L2 execution */
+ prepare_vmcs(vmx_pages, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /* enable TSC offsetting and TSC scaling for L2 */
+ control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
+ control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING;
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
+
+ control = vmreadz(SECONDARY_VM_EXEC_CONTROL);
+ control |= SECONDARY_EXEC_TSC_SCALING;
+ vmwrite(SECONDARY_VM_EXEC_CONTROL, control);
+
+ vmwrite(TSC_OFFSET, TSC_OFFSET_L2);
+ vmwrite(TSC_MULTIPLIER, TSC_MULTIPLIER_L2);
+ vmwrite(TSC_MULTIPLIER_HIGH, TSC_MULTIPLIER_L2 >> 32);
+
+ /* launch L2 */
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+ /* check that L1's frequency still looks good */
+ check_tsc_freq(UCHECK_L1);
+
+ GUEST_DONE();
+}
+
+static void tsc_scaling_check_supported(void)
+{
+ if (!kvm_check_cap(KVM_CAP_TSC_CONTROL)) {
+ print_skip("TSC scaling not supported by the HW");
+ exit(KSFT_SKIP);
+ }
+}
+
+static void stable_tsc_check_supported(void)
+{
+ FILE *fp;
+ char buf[4];
+
+ fp = fopen("/sys/devices/system/clocksource/clocksource0/current_clocksource", "r");
+ if (fp == NULL)
+ goto skip_test;
+
+ if (fgets(buf, sizeof(buf), fp) == NULL)
+ goto skip_test;
+
+ if (strncmp(buf, "tsc", sizeof(buf)))
+ goto skip_test;
+
+ return;
+skip_test:
+ print_skip("Kernel does not use TSC clocksource - assuming that host TSC is not stable");
+ exit(KSFT_SKIP);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vm *vm;
+ vm_vaddr_t vmx_pages_gva;
+
+ uint64_t tsc_start, tsc_end;
+ uint64_t tsc_khz;
+ uint64_t l1_scale_factor;
+ uint64_t l0_tsc_freq = 0;
+ uint64_t l1_tsc_freq = 0;
+ uint64_t l2_tsc_freq = 0;
+
+ nested_vmx_check_supported();
+ tsc_scaling_check_supported();
+ stable_tsc_check_supported();
+
+ /*
+ * We set L1's scale factor to be a random number from 2 to 10.
+ * Ideally we would do the same for L2's factor but that one is
+ * referenced by both main() and l1_guest_code() and using a global
+ * variable does not work.
+ */
+ srand(time(NULL));
+ l1_scale_factor = (rand() % 9) + 2;
+ printf("L1's scale down factor is: %"PRIu64"\n", l1_scale_factor);
+ printf("L2's scale up factor is: %llu\n", L2_SCALE_FACTOR);
+
+ tsc_start = rdtsc();
+ sleep(1);
+ tsc_end = rdtsc();
+
+ l0_tsc_freq = tsc_end - tsc_start;
+ printf("real TSC frequency is around: %"PRIu64"\n", l0_tsc_freq);
+
+ vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
+ vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+
+ tsc_khz = _vcpu_ioctl(vm, VCPU_ID, KVM_GET_TSC_KHZ, NULL);
+ TEST_ASSERT(tsc_khz != -1, "vcpu ioctl KVM_GET_TSC_KHZ failed");
+
+ /* scale down L1's TSC frequency */
+ vcpu_ioctl(vm, VCPU_ID, KVM_SET_TSC_KHZ,
+ (void *) (tsc_khz / l1_scale_factor));
+
+ for (;;) {
+ volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ struct ucall uc;
+
+ vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vm, VCPU_ID, &uc)) {
+ case UCALL_ABORT:
+ TEST_FAIL("%s", (const char *) uc.args[0]);
+ case UCALL_SYNC:
+ switch (uc.args[0]) {
+ case USLEEP:
+ sleep(uc.args[1]);
+ break;
+ case UCHECK_L1:
+ l1_tsc_freq = uc.args[1];
+ printf("L1's TSC frequency is around: %"PRIu64
+ "\n", l1_tsc_freq);
+
+ compare_tsc_freq(l1_tsc_freq,
+ l0_tsc_freq / l1_scale_factor);
+ break;
+ case UCHECK_L2:
+ l2_tsc_freq = uc.args[1];
+ printf("L2's TSC frequency is around: %"PRIu64
+ "\n", l2_tsc_freq);
+
+ compare_tsc_freq(l2_tsc_freq,
+ l1_tsc_freq * L2_SCALE_FACTOR);
+ break;
+ }
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+
+done:
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
index 2f964cdc273c..afbbc40df884 100644
--- a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
@@ -42,8 +42,6 @@
#define HALTER_VCPU_ID 0
#define SENDER_VCPU_ID 1
-volatile uint32_t *apic_base = (volatile uint32_t *)APIC_DEFAULT_GPA;
-
/*
* Vector for IPI from sender vCPU to halting vCPU.
* Value is arbitrary and was chosen for the alternating bit pattern. Any
@@ -86,45 +84,6 @@ struct thread_params {
uint64_t *pipis_rcvd; /* host address of ipis_rcvd global */
};
-uint32_t read_apic_reg(uint reg)
-{
- return apic_base[reg >> 2];
-}
-
-void write_apic_reg(uint reg, uint32_t val)
-{
- apic_base[reg >> 2] = val;
-}
-
-void disable_apic(void)
-{
- wrmsr(MSR_IA32_APICBASE,
- rdmsr(MSR_IA32_APICBASE) &
- ~(MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD));
-}
-
-void enable_xapic(void)
-{
- uint64_t val = rdmsr(MSR_IA32_APICBASE);
-
- /* Per SDM: to enable xAPIC when in x2APIC must first disable APIC */
- if (val & MSR_IA32_APICBASE_EXTD) {
- disable_apic();
- wrmsr(MSR_IA32_APICBASE,
- rdmsr(MSR_IA32_APICBASE) | MSR_IA32_APICBASE_ENABLE);
- } else if (!(val & MSR_IA32_APICBASE_ENABLE)) {
- wrmsr(MSR_IA32_APICBASE, val | MSR_IA32_APICBASE_ENABLE);
- }
-
- /*
- * Per SDM: reset value of spurious interrupt vector register has the
- * APIC software enabled bit=0. It must be enabled in addition to the
- * enable bit in the MSR.
- */
- val = read_apic_reg(APIC_SPIV) | APIC_SPIV_APIC_ENABLED;
- write_apic_reg(APIC_SPIV, val);
-}
-
void verify_apic_base_addr(void)
{
uint64_t msr = rdmsr(MSR_IA32_APICBASE);
@@ -136,10 +95,10 @@ void verify_apic_base_addr(void)
static void halter_guest_code(struct test_data_page *data)
{
verify_apic_base_addr();
- enable_xapic();
+ xapic_enable();
- data->halter_apic_id = GET_APIC_ID_FIELD(read_apic_reg(APIC_ID));
- data->halter_lvr = read_apic_reg(APIC_LVR);
+ data->halter_apic_id = GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID));
+ data->halter_lvr = xapic_read_reg(APIC_LVR);
/*
* Loop forever HLTing and recording halts & wakes. Disable interrupts
@@ -150,8 +109,8 @@ static void halter_guest_code(struct test_data_page *data)
* TPR and PPR for diagnostic purposes in case the test fails.
*/
for (;;) {
- data->halter_tpr = read_apic_reg(APIC_TASKPRI);
- data->halter_ppr = read_apic_reg(APIC_PROCPRI);
+ data->halter_tpr = xapic_read_reg(APIC_TASKPRI);
+ data->halter_ppr = xapic_read_reg(APIC_PROCPRI);
data->hlt_count++;
asm volatile("sti; hlt; cli");
data->wake_count++;
@@ -166,7 +125,7 @@ static void halter_guest_code(struct test_data_page *data)
static void guest_ipi_handler(struct ex_regs *regs)
{
ipis_rcvd++;
- write_apic_reg(APIC_EOI, 77);
+ xapic_write_reg(APIC_EOI, 77);
}
static void sender_guest_code(struct test_data_page *data)
@@ -179,7 +138,7 @@ static void sender_guest_code(struct test_data_page *data)
uint64_t tsc_start;
verify_apic_base_addr();
- enable_xapic();
+ xapic_enable();
/*
* Init interrupt command register for sending IPIs
@@ -206,8 +165,8 @@ static void sender_guest_code(struct test_data_page *data)
* First IPI can be sent unconditionally because halter vCPU
* starts earlier.
*/
- write_apic_reg(APIC_ICR2, icr2_val);
- write_apic_reg(APIC_ICR, icr_val);
+ xapic_write_reg(APIC_ICR2, icr2_val);
+ xapic_write_reg(APIC_ICR, icr_val);
data->ipis_sent++;
/*
@@ -462,13 +421,13 @@ int main(int argc, char *argv[])
vm_init_descriptor_tables(vm);
vcpu_init_descriptor_tables(vm, HALTER_VCPU_ID);
- vm_handle_exception(vm, IPI_VECTOR, guest_ipi_handler);
+ vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler);
- virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA, 0);
+ virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
vm_vcpu_add_default(vm, SENDER_VCPU_ID, sender_guest_code);
- test_data_page_vaddr = vm_vaddr_alloc(vm, 0x1000, 0x1000, 0, 0);
+ test_data_page_vaddr = vm_vaddr_alloc_page(vm);
data =
(struct test_data_page *)addr_gva2hva(vm, test_data_page_vaddr);
memset(data, 0, sizeof(*data));
diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
index 1f4a0599683c..117bf49a3d79 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
@@ -146,7 +146,7 @@ int main(int argc, char *argv[])
/* Map a region for the shared_info page */
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 2, 0);
- virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 2, 0);
+ virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 2);
struct kvm_xen_hvm_config hvmc = {
.flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
diff --git a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
index 8389e0bfd711..adc94452b57c 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
@@ -103,7 +103,7 @@ int main(int argc, char *argv[])
/* Map a region for the hypercall pages */
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
HCALL_REGION_GPA, HCALL_REGION_SLOT, 2, 0);
- virt_map(vm, HCALL_REGION_GPA, HCALL_REGION_GPA, 2, 0);
+ virt_map(vm, HCALL_REGION_GPA, HCALL_REGION_GPA, 2);
for (;;) {
volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
diff --git a/tools/testing/selftests/lib/Makefile b/tools/testing/selftests/lib/Makefile
index a105f094676e..ee71fc99d5b5 100644
--- a/tools/testing/selftests/lib/Makefile
+++ b/tools/testing/selftests/lib/Makefile
@@ -4,6 +4,6 @@
# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
all:
-TEST_PROGS := printf.sh bitmap.sh prime_numbers.sh strscpy.sh
+TEST_PROGS := printf.sh bitmap.sh prime_numbers.sh scanf.sh strscpy.sh
include ../lib.mk
diff --git a/tools/testing/selftests/lib/config b/tools/testing/selftests/lib/config
index b80ee3f6e265..645839b50b0a 100644
--- a/tools/testing/selftests/lib/config
+++ b/tools/testing/selftests/lib/config
@@ -1,4 +1,5 @@
CONFIG_TEST_PRINTF=m
+CONFIG_TEST_SCANF=m
CONFIG_TEST_BITMAP=m
CONFIG_PRIME_NUMBERS=m
CONFIG_TEST_STRSCPY=m
diff --git a/tools/testing/selftests/lib/scanf.sh b/tools/testing/selftests/lib/scanf.sh
new file mode 100755
index 000000000000..b59b8ba561c3
--- /dev/null
+++ b/tools/testing/selftests/lib/scanf.sh
@@ -0,0 +1,4 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Tests the scanf infrastructure using test_scanf kernel module.
+$(dirname $0)/../kselftest/module.sh "scanf" test_scanf
diff --git a/tools/testing/selftests/mount_setattr/mount_setattr_test.c b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
index 4e94e566e040..f31205f04ee0 100644
--- a/tools/testing/selftests/mount_setattr/mount_setattr_test.c
+++ b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
@@ -136,6 +136,10 @@ struct mount_attr {
#define MOUNT_ATTR_IDMAP 0x00100000
#endif
+#ifndef MOUNT_ATTR_NOSYMFOLLOW
+#define MOUNT_ATTR_NOSYMFOLLOW 0x00200000
+#endif
+
static inline int sys_mount_setattr(int dfd, const char *path, unsigned int flags,
struct mount_attr *attr, size_t size)
{
@@ -235,6 +239,10 @@ static int prepare_unpriv_mountns(void)
return 0;
}
+#ifndef ST_NOSYMFOLLOW
+#define ST_NOSYMFOLLOW 0x2000 /* do not follow symlinks */
+#endif
+
static int read_mnt_flags(const char *path)
{
int ret;
@@ -245,9 +253,9 @@ static int read_mnt_flags(const char *path)
if (ret != 0)
return -EINVAL;
- if (stat.f_flag &
- ~(ST_RDONLY | ST_NOSUID | ST_NODEV | ST_NOEXEC | ST_NOATIME |
- ST_NODIRATIME | ST_RELATIME | ST_SYNCHRONOUS | ST_MANDLOCK))
+ if (stat.f_flag & ~(ST_RDONLY | ST_NOSUID | ST_NODEV | ST_NOEXEC |
+ ST_NOATIME | ST_NODIRATIME | ST_RELATIME |
+ ST_SYNCHRONOUS | ST_MANDLOCK | ST_NOSYMFOLLOW))
return -EINVAL;
mnt_flags = 0;
@@ -269,6 +277,8 @@ static int read_mnt_flags(const char *path)
mnt_flags |= MS_SYNCHRONOUS;
if (stat.f_flag & ST_MANDLOCK)
mnt_flags |= ST_MANDLOCK;
+ if (stat.f_flag & ST_NOSYMFOLLOW)
+ mnt_flags |= ST_NOSYMFOLLOW;
return mnt_flags;
}
@@ -368,8 +378,13 @@ static bool mount_setattr_supported(void)
FIXTURE(mount_setattr) {
};
+#define NOSYMFOLLOW_TARGET "/mnt/A/AA/data"
+#define NOSYMFOLLOW_SYMLINK "/mnt/A/AA/symlink"
+
FIXTURE_SETUP(mount_setattr)
{
+ int fd = -EBADF;
+
if (!mount_setattr_supported())
SKIP(return, "mount_setattr syscall not supported");
@@ -412,6 +427,11 @@ FIXTURE_SETUP(mount_setattr)
ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts",
MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0);
+
+ fd = creat(NOSYMFOLLOW_TARGET, O_RDWR | O_CLOEXEC);
+ ASSERT_GT(fd, 0);
+ ASSERT_EQ(symlink(NOSYMFOLLOW_TARGET, NOSYMFOLLOW_SYMLINK), 0);
+ ASSERT_EQ(close(fd), 0);
}
FIXTURE_TEARDOWN(mount_setattr)
@@ -1421,4 +1441,66 @@ TEST_F(mount_setattr_idmapped, idmap_mount_tree_invalid)
ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/BB/b", 0, 0, 0), 0);
}
+TEST_F(mount_setattr, mount_attr_nosymfollow)
+{
+ int fd;
+ unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_NOSYMFOLLOW,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
+ ASSERT_GT(fd, 0);
+ ASSERT_EQ(close(fd), 0);
+
+ old_flags = read_mnt_flags("/mnt/A");
+ ASSERT_GT(old_flags, 0);
+
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ expected_flags = old_flags;
+ expected_flags |= ST_NOSYMFOLLOW;
+
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
+ ASSERT_LT(fd, 0);
+ ASSERT_EQ(errno, ELOOP);
+
+ attr.attr_set &= ~MOUNT_ATTR_NOSYMFOLLOW;
+ attr.attr_clr |= MOUNT_ATTR_NOSYMFOLLOW;
+
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ expected_flags &= ~ST_NOSYMFOLLOW;
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
+ ASSERT_GT(fd, 0);
+ ASSERT_EQ(close(fd), 0);
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/nci/.gitignore b/tools/testing/selftests/nci/.gitignore
new file mode 100644
index 000000000000..448eeb4590fc
--- /dev/null
+++ b/tools/testing/selftests/nci/.gitignore
@@ -0,0 +1 @@
+/nci_dev
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 61ae899cfc17..19deb9cdf72f 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -30,3 +30,4 @@ hwtstamp_config
rxtimestamp
timestamping
txtimestamp
+so_netns_cookie
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 3915bb7bfc39..79c9eb0034d5 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -30,7 +30,7 @@ TEST_GEN_FILES = socket nettest
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite
TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx ip_defrag
-TEST_GEN_FILES += so_txtime ipv6_flowlabel ipv6_flowlabel_mgr
+TEST_GEN_FILES += so_txtime ipv6_flowlabel ipv6_flowlabel_mgr so_netns_cookie
TEST_GEN_FILES += tcp_fastopen_backup_key
TEST_GEN_FILES += fin_ack_lat
TEST_GEN_FILES += reuseaddr_ports_exhausted
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 614d5477365a..6f905b53904f 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -1,4 +1,5 @@
CONFIG_USER_NS=y
+CONFIG_NET_NS=y
CONFIG_BPF_SYSCALL=y
CONFIG_TEST_BPF=m
CONFIG_NUMA=y
diff --git a/tools/testing/selftests/net/devlink_port_split.py b/tools/testing/selftests/net/devlink_port_split.py
index 834066d465fc..2b5d6ff87373 100755
--- a/tools/testing/selftests/net/devlink_port_split.py
+++ b/tools/testing/selftests/net/devlink_port_split.py
@@ -18,6 +18,8 @@ import sys
#
+# Kselftest framework requirement - SKIP code is 4
+KSFT_SKIP=4
Port = collections.namedtuple('Port', 'bus_info name')
@@ -239,7 +241,11 @@ def main(cmdline=None):
assert stderr == ""
devs = json.loads(stdout)['dev']
- dev = list(devs.keys())[0]
+ if devs:
+ dev = list(devs.keys())[0]
+ else:
+ print("no devlink device was found, test skipped")
+ sys.exit(KSFT_SKIP)
cmd = "devlink dev show %s" % dev
stdout, stderr = run_command(cmd)
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index 49774a8a7736..0d293391e9a4 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -925,6 +925,14 @@ ipv6_fcnal_runtime()
run_cmd "$IP nexthop add id 86 via 2001:db8:91::2 dev veth1"
run_cmd "$IP ro add 2001:db8:101::1/128 nhid 81"
+ # route can not use prefsrc with nexthops
+ run_cmd "$IP ro add 2001:db8:101::2/128 nhid 86 from 2001:db8:91::1"
+ log_test $? 2 "IPv6 route can not use src routing with external nexthop"
+
+ # check cleanup path on invalid metric
+ run_cmd "$IP ro add 2001:db8:101::2/128 nhid 86 congctl lock foo"
+ log_test $? 2 "IPv6 route with invalid metric"
+
# rpfilter and default route
$IP nexthop flush >/dev/null 2>&1
run_cmd "ip netns exec me ip6tables -t mangle -I PREROUTING 1 -m rpfilter --invert -j DROP"
@@ -1366,6 +1374,10 @@ ipv4_fcnal_runtime()
run_cmd "$IP nexthop replace id 22 via 172.16.2.2 dev veth3"
log_test $? 2 "Nexthop replace with invalid scope for existing route"
+ # check cleanup path on invalid metric
+ run_cmd "$IP ro add 172.16.101.2/32 nhid 22 congctl lock foo"
+ log_test $? 2 "IPv4 route with invalid metric"
+
#
# add route with nexthop and check traffic
#
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 76d9487fb03c..5abe92d55b69 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -1384,12 +1384,37 @@ ipv4_rt_replace()
ipv4_rt_replace_mpath
}
+# checks that cached input route on VRF port is deleted
+# when VRF is deleted
+ipv4_local_rt_cache()
+{
+ run_cmd "ip addr add 10.0.0.1/32 dev lo"
+ run_cmd "ip netns add test-ns"
+ run_cmd "ip link add veth-outside type veth peer name veth-inside"
+ run_cmd "ip link add vrf-100 type vrf table 1100"
+ run_cmd "ip link set veth-outside master vrf-100"
+ run_cmd "ip link set veth-inside netns test-ns"
+ run_cmd "ip link set veth-outside up"
+ run_cmd "ip link set vrf-100 up"
+ run_cmd "ip route add 10.1.1.1/32 dev veth-outside table 1100"
+ run_cmd "ip netns exec test-ns ip link set veth-inside up"
+ run_cmd "ip netns exec test-ns ip addr add 10.1.1.1/32 dev veth-inside"
+ run_cmd "ip netns exec test-ns ip route add 10.0.0.1/32 dev veth-inside"
+ run_cmd "ip netns exec test-ns ip route add default via 10.0.0.1"
+ run_cmd "ip netns exec test-ns ping 10.0.0.1 -c 1 -i 1"
+ run_cmd "ip link delete vrf-100"
+
+ # if we do not hang test is a success
+ log_test $? 0 "Cached route removed from VRF port device"
+}
+
ipv4_route_test()
{
route_setup
ipv4_rt_add
ipv4_rt_replace
+ ipv4_local_rt_cache
route_cleanup
}
diff --git a/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh
new file mode 100755
index 000000000000..a15d21dc035a
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh
@@ -0,0 +1,364 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test traffic distribution between two paths when using custom hash policy.
+#
+# +--------------------------------+
+# | H1 |
+# | $h1 + |
+# | 198.51.100.{2-253}/24 | |
+# | 2001:db8:1::{2-fd}/64 | |
+# +-------------------------|------+
+# |
+# +-------------------------|-------------------------+
+# | SW1 | |
+# | $rp1 + |
+# | 198.51.100.1/24 |
+# | 2001:db8:1::1/64 |
+# | |
+# | |
+# | $rp11 + + $rp12 |
+# | 192.0.2.1/28 | | 192.0.2.17/28 |
+# | 2001:db8:2::1/64 | | 2001:db8:3::1/64 |
+# +------------------|-------------|------------------+
+# | |
+# +------------------|-------------|------------------+
+# | SW2 | | |
+# | | | |
+# | $rp21 + + $rp22 |
+# | 192.0.2.2/28 192.0.2.18/28 |
+# | 2001:db8:2::2/64 2001:db8:3::2/64 |
+# | |
+# | |
+# | $rp2 + |
+# | 203.0.113.1/24 | |
+# | 2001:db8:4::1/64 | |
+# +-------------------------|-------------------------+
+# |
+# +-------------------------|------+
+# | H2 | |
+# | $h2 + |
+# | 203.0.113.{2-253}/24 |
+# | 2001:db8:4::{2-fd}/64 |
+# +--------------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+ custom_hash
+"
+
+NUM_NETIFS=8
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 198.51.100.2/24 2001:db8:1::2/64
+ ip route add vrf v$h1 default via 198.51.100.1 dev $h1
+ ip -6 route add vrf v$h1 default via 2001:db8:1::1 dev $h1
+}
+
+h1_destroy()
+{
+ ip -6 route del vrf v$h1 default
+ ip route del vrf v$h1 default
+ simple_if_fini $h1 198.51.100.2/24 2001:db8:1::2/64
+}
+
+sw1_create()
+{
+ simple_if_init $rp1 198.51.100.1/24 2001:db8:1::1/64
+ __simple_if_init $rp11 v$rp1 192.0.2.1/28 2001:db8:2::1/64
+ __simple_if_init $rp12 v$rp1 192.0.2.17/28 2001:db8:3::1/64
+
+ ip route add vrf v$rp1 203.0.113.0/24 \
+ nexthop via 192.0.2.2 dev $rp11 \
+ nexthop via 192.0.2.18 dev $rp12
+
+ ip -6 route add vrf v$rp1 2001:db8:4::/64 \
+ nexthop via 2001:db8:2::2 dev $rp11 \
+ nexthop via 2001:db8:3::2 dev $rp12
+}
+
+sw1_destroy()
+{
+ ip -6 route del vrf v$rp1 2001:db8:4::/64
+
+ ip route del vrf v$rp1 203.0.113.0/24
+
+ __simple_if_fini $rp12 192.0.2.17/28 2001:db8:3::1/64
+ __simple_if_fini $rp11 192.0.2.1/28 2001:db8:2::1/64
+ simple_if_fini $rp1 198.51.100.1/24 2001:db8:1::1/64
+}
+
+sw2_create()
+{
+ simple_if_init $rp2 203.0.113.1/24 2001:db8:4::1/64
+ __simple_if_init $rp21 v$rp2 192.0.2.2/28 2001:db8:2::2/64
+ __simple_if_init $rp22 v$rp2 192.0.2.18/28 2001:db8:3::2/64
+
+ ip route add vrf v$rp2 198.51.100.0/24 \
+ nexthop via 192.0.2.1 dev $rp21 \
+ nexthop via 192.0.2.17 dev $rp22
+
+ ip -6 route add vrf v$rp2 2001:db8:1::/64 \
+ nexthop via 2001:db8:2::1 dev $rp21 \
+ nexthop via 2001:db8:3::1 dev $rp22
+}
+
+sw2_destroy()
+{
+ ip -6 route del vrf v$rp2 2001:db8:1::/64
+
+ ip route del vrf v$rp2 198.51.100.0/24
+
+ __simple_if_fini $rp22 192.0.2.18/28 2001:db8:3::2/64
+ __simple_if_fini $rp21 192.0.2.2/28 2001:db8:2::2/64
+ simple_if_fini $rp2 203.0.113.1/24 2001:db8:4::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 203.0.113.2/24 2001:db8:4::2/64
+ ip route add vrf v$h2 default via 203.0.113.1 dev $h2
+ ip -6 route add vrf v$h2 default via 2001:db8:4::1 dev $h2
+}
+
+h2_destroy()
+{
+ ip -6 route del vrf v$h2 default
+ ip route del vrf v$h2 default
+ simple_if_fini $h2 203.0.113.2/24 2001:db8:4::2/64
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+
+ rp1=${NETIFS[p2]}
+
+ rp11=${NETIFS[p3]}
+ rp21=${NETIFS[p4]}
+
+ rp12=${NETIFS[p5]}
+ rp22=${NETIFS[p6]}
+
+ rp2=${NETIFS[p7]}
+
+ h2=${NETIFS[p8]}
+
+ vrf_prepare
+ h1_create
+ sw1_create
+ sw2_create
+ h2_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ h2_destroy
+ sw2_destroy
+ sw1_destroy
+ h1_destroy
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 203.0.113.2
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:4::2
+}
+
+send_src_ipv4()
+{
+ $MZ $h1 -q -p 64 -A "198.51.100.2-198.51.100.253" -B 203.0.113.2 \
+ -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_dst_ipv4()
+{
+ $MZ $h1 -q -p 64 -A 198.51.100.2 -B "203.0.113.2-203.0.113.253" \
+ -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_src_udp4()
+{
+ $MZ $h1 -q -p 64 -A 198.51.100.2 -B 203.0.113.2 \
+ -d 1msec -t udp "sp=0-32768,dp=30000"
+}
+
+send_dst_udp4()
+{
+ $MZ $h1 -q -p 64 -A 198.51.100.2 -B 203.0.113.2 \
+ -d 1msec -t udp "sp=20000,dp=0-32768"
+}
+
+send_src_ipv6()
+{
+ $MZ -6 $h1 -q -p 64 -A "2001:db8:1::2-2001:db8:1::fd" -B 2001:db8:4::2 \
+ -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_dst_ipv6()
+{
+ $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B "2001:db8:4::2-2001:db8:4::fd" \
+ -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_flowlabel()
+{
+ # Generate 16384 echo requests, each with a random flow label.
+ for _ in $(seq 1 16384); do
+ ip vrf exec v$h1 \
+ $PING6 2001:db8:4::2 -F 0 -c 1 -q >/dev/null 2>&1
+ done
+}
+
+send_src_udp6()
+{
+ $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:4::2 \
+ -d 1msec -t udp "sp=0-32768,dp=30000"
+}
+
+send_dst_udp6()
+{
+ $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:4::2 \
+ -d 1msec -t udp "sp=20000,dp=0-32768"
+}
+
+custom_hash_test()
+{
+ local field="$1"; shift
+ local balanced="$1"; shift
+ local send_flows="$@"
+
+ RET=0
+
+ local t0_rp11=$(link_stats_tx_packets_get $rp11)
+ local t0_rp12=$(link_stats_tx_packets_get $rp12)
+
+ $send_flows
+
+ local t1_rp11=$(link_stats_tx_packets_get $rp11)
+ local t1_rp12=$(link_stats_tx_packets_get $rp12)
+
+ local d_rp11=$((t1_rp11 - t0_rp11))
+ local d_rp12=$((t1_rp12 - t0_rp12))
+
+ local diff=$((d_rp12 - d_rp11))
+ local sum=$((d_rp11 + d_rp12))
+
+ local pct=$(echo "$diff / $sum * 100" | bc -l)
+ local is_balanced=$(echo "-20 <= $pct && $pct <= 20" | bc)
+
+ [[ ( $is_balanced -eq 1 && $balanced == "balanced" ) ||
+ ( $is_balanced -eq 0 && $balanced == "unbalanced" ) ]]
+ check_err $? "Expected traffic to be $balanced, but it is not"
+
+ log_test "Multipath hash field: $field ($balanced)"
+ log_info "Packets sent on path1 / path2: $d_rp11 / $d_rp12"
+}
+
+custom_hash_v4()
+{
+ log_info "Running IPv4 custom multipath hash tests"
+
+ sysctl_set net.ipv4.fib_multipath_hash_policy 3
+
+ # Prevent the neighbour table from overflowing, as different neighbour
+ # entries will be created on $ol4 when using different destination IPs.
+ sysctl_set net.ipv4.neigh.default.gc_thresh1 1024
+ sysctl_set net.ipv4.neigh.default.gc_thresh2 1024
+ sysctl_set net.ipv4.neigh.default.gc_thresh3 1024
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0001
+ custom_hash_test "Source IP" "balanced" send_src_ipv4
+ custom_hash_test "Source IP" "unbalanced" send_dst_ipv4
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0002
+ custom_hash_test "Destination IP" "balanced" send_dst_ipv4
+ custom_hash_test "Destination IP" "unbalanced" send_src_ipv4
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0010
+ custom_hash_test "Source port" "balanced" send_src_udp4
+ custom_hash_test "Source port" "unbalanced" send_dst_udp4
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0020
+ custom_hash_test "Destination port" "balanced" send_dst_udp4
+ custom_hash_test "Destination port" "unbalanced" send_src_udp4
+
+ sysctl_restore net.ipv4.neigh.default.gc_thresh3
+ sysctl_restore net.ipv4.neigh.default.gc_thresh2
+ sysctl_restore net.ipv4.neigh.default.gc_thresh1
+
+ sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+custom_hash_v6()
+{
+ log_info "Running IPv6 custom multipath hash tests"
+
+ sysctl_set net.ipv6.fib_multipath_hash_policy 3
+
+ # Prevent the neighbour table from overflowing, as different neighbour
+ # entries will be created on $ol4 when using different destination IPs.
+ sysctl_set net.ipv6.neigh.default.gc_thresh1 1024
+ sysctl_set net.ipv6.neigh.default.gc_thresh2 1024
+ sysctl_set net.ipv6.neigh.default.gc_thresh3 1024
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0001
+ custom_hash_test "Source IP" "balanced" send_src_ipv6
+ custom_hash_test "Source IP" "unbalanced" send_dst_ipv6
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0002
+ custom_hash_test "Destination IP" "balanced" send_dst_ipv6
+ custom_hash_test "Destination IP" "unbalanced" send_src_ipv6
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0008
+ custom_hash_test "Flowlabel" "balanced" send_flowlabel
+ custom_hash_test "Flowlabel" "unbalanced" send_src_ipv6
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0010
+ custom_hash_test "Source port" "balanced" send_src_udp6
+ custom_hash_test "Source port" "unbalanced" send_dst_udp6
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0020
+ custom_hash_test "Destination port" "balanced" send_dst_udp6
+ custom_hash_test "Destination port" "unbalanced" send_src_udp6
+
+ sysctl_restore net.ipv6.neigh.default.gc_thresh3
+ sysctl_restore net.ipv6.neigh.default.gc_thresh2
+ sysctl_restore net.ipv6.neigh.default.gc_thresh1
+
+ sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
+
+custom_hash()
+{
+ # Test that when the hash policy is set to custom, traffic is
+ # distributed only according to the fields set in the
+ # fib_multipath_hash_fields sysctl.
+ #
+ # Each time set a different field and make sure traffic is only
+ # distributed when the field is changed in the packet stream.
+ custom_hash_v4
+ custom_hash_v6
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh
index 9c12c4fd3afc..13d3d4428a32 100644
--- a/tools/testing/selftests/net/forwarding/devlink_lib.sh
+++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh
@@ -18,6 +18,12 @@ if [[ ! -v DEVLINK_DEV ]]; then
DEVLINK_VIDDID=$(lspci -s $(echo $DEVLINK_DEV | cut -d"/" -f2) \
-n | cut -d" " -f3)
+elif [[ ! -z "$DEVLINK_DEV" ]]; then
+ devlink dev show $DEVLINK_DEV &> /dev/null
+ if [ $? -ne 0 ]; then
+ echo "SKIP: devlink device \"$DEVLINK_DEV\" not found"
+ exit 1
+ fi
fi
##############################################################################
@@ -318,6 +324,14 @@ devlink_trap_rx_bytes_get()
| jq '.[][][]["stats"]["rx"]["bytes"]'
}
+devlink_trap_drop_packets_get()
+{
+ local trap_name=$1; shift
+
+ devlink -js trap show $DEVLINK_DEV trap $trap_name \
+ | jq '.[][][]["stats"]["rx"]["dropped"]'
+}
+
devlink_trap_stats_idle_test()
{
local trap_name=$1; shift
@@ -339,6 +353,24 @@ devlink_trap_stats_idle_test()
fi
}
+devlink_trap_drop_stats_idle_test()
+{
+ local trap_name=$1; shift
+ local t0_packets t0_bytes
+
+ t0_packets=$(devlink_trap_drop_packets_get $trap_name)
+
+ sleep 1
+
+ t1_packets=$(devlink_trap_drop_packets_get $trap_name)
+
+ if [[ $t0_packets -eq $t1_packets ]]; then
+ return 0
+ else
+ return 1
+ fi
+}
+
devlink_traps_enable_all()
{
local trap_name
diff --git a/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh
new file mode 100755
index 000000000000..a73f52efcb6c
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh
@@ -0,0 +1,456 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test traffic distribution when there are multiple paths between an IPv4 GRE
+# tunnel. The tunnel carries IPv4 and IPv6 traffic between multiple hosts.
+# Multiple routes are in the underlay network. With the default multipath
+# policy, SW2 will only look at the outer IP addresses, hence only a single
+# route would be used.
+#
+# +--------------------------------+
+# | H1 |
+# | $h1 + |
+# | 198.51.100.{2-253}/24 | |
+# | 2001:db8:1::{2-fd}/64 | |
+# +-------------------------|------+
+# |
+# +-------------------------|------------------+
+# | SW1 | |
+# | $ol1 + |
+# | 198.51.100.1/24 |
+# | 2001:db8:1::1/64 |
+# | |
+# | + g1 (gre) |
+# | loc=192.0.2.1 |
+# | rem=192.0.2.2 --. |
+# | tos=inherit | |
+# | v |
+# | + $ul1 |
+# | | 192.0.2.17/28 |
+# +---------------------|----------------------+
+# |
+# +---------------------|----------------------+
+# | SW2 | |
+# | $ul21 + |
+# | 192.0.2.18/28 | |
+# | | |
+# ! __________________+___ |
+# | / \ |
+# | | | |
+# | + $ul22.111 (vlan) + $ul22.222 (vlan) |
+# | | 192.0.2.33/28 | 192.0.2.49/28 |
+# | | | |
+# +--|----------------------|------------------+
+# | |
+# +--|----------------------|------------------+
+# | | | |
+# | + $ul32.111 (vlan) + $ul32.222 (vlan) |
+# | | 192.0.2.34/28 | 192.0.2.50/28 |
+# | | | |
+# | \__________________+___/ |
+# | | |
+# | | |
+# | $ul31 + |
+# | 192.0.2.65/28 | SW3 |
+# +---------------------|----------------------+
+# |
+# +---------------------|----------------------+
+# | + $ul4 |
+# | ^ 192.0.2.66/28 |
+# | | |
+# | + g2 (gre) | |
+# | loc=192.0.2.2 | |
+# | rem=192.0.2.1 --' |
+# | tos=inherit |
+# | |
+# | $ol4 + |
+# | 203.0.113.1/24 | |
+# | 2001:db8:2::1/64 | SW4 |
+# +-------------------------|------------------+
+# |
+# +-------------------------|------+
+# | | |
+# | $h2 + |
+# | 203.0.113.{2-253}/24 |
+# | 2001:db8:2::{2-fd}/64 H2 |
+# +--------------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+ custom_hash
+"
+
+NUM_NETIFS=10
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 198.51.100.2/24 2001:db8:1::2/64
+ ip route add vrf v$h1 default via 198.51.100.1 dev $h1
+ ip -6 route add vrf v$h1 default via 2001:db8:1::1 dev $h1
+}
+
+h1_destroy()
+{
+ ip -6 route del vrf v$h1 default
+ ip route del vrf v$h1 default
+ simple_if_fini $h1 198.51.100.2/24 2001:db8:1::2/64
+}
+
+sw1_create()
+{
+ simple_if_init $ol1 198.51.100.1/24 2001:db8:1::1/64
+ __simple_if_init $ul1 v$ol1 192.0.2.17/28
+
+ tunnel_create g1 gre 192.0.2.1 192.0.2.2 tos inherit dev v$ol1
+ __simple_if_init g1 v$ol1 192.0.2.1/32
+ ip route add vrf v$ol1 192.0.2.2/32 via 192.0.2.18
+
+ ip route add vrf v$ol1 203.0.113.0/24 dev g1
+ ip -6 route add vrf v$ol1 2001:db8:2::/64 dev g1
+}
+
+sw1_destroy()
+{
+ ip -6 route del vrf v$ol1 2001:db8:2::/64
+ ip route del vrf v$ol1 203.0.113.0/24
+
+ ip route del vrf v$ol1 192.0.2.2/32
+ __simple_if_fini g1 192.0.2.1/32
+ tunnel_destroy g1
+
+ __simple_if_fini $ul1 192.0.2.17/28
+ simple_if_fini $ol1 198.51.100.1/24 2001:db8:1::1/64
+}
+
+sw2_create()
+{
+ simple_if_init $ul21 192.0.2.18/28
+ __simple_if_init $ul22 v$ul21
+ vlan_create $ul22 111 v$ul21 192.0.2.33/28
+ vlan_create $ul22 222 v$ul21 192.0.2.49/28
+
+ ip route add vrf v$ul21 192.0.2.1/32 via 192.0.2.17
+ ip route add vrf v$ul21 192.0.2.2/32 \
+ nexthop via 192.0.2.34 \
+ nexthop via 192.0.2.50
+}
+
+sw2_destroy()
+{
+ ip route del vrf v$ul21 192.0.2.2/32
+ ip route del vrf v$ul21 192.0.2.1/32
+
+ vlan_destroy $ul22 222
+ vlan_destroy $ul22 111
+ __simple_if_fini $ul22
+ simple_if_fini $ul21 192.0.2.18/28
+}
+
+sw3_create()
+{
+ simple_if_init $ul31 192.0.2.65/28
+ __simple_if_init $ul32 v$ul31
+ vlan_create $ul32 111 v$ul31 192.0.2.34/28
+ vlan_create $ul32 222 v$ul31 192.0.2.50/28
+
+ ip route add vrf v$ul31 192.0.2.2/32 via 192.0.2.66
+ ip route add vrf v$ul31 192.0.2.1/32 \
+ nexthop via 192.0.2.33 \
+ nexthop via 192.0.2.49
+
+ tc qdisc add dev $ul32 clsact
+ tc filter add dev $ul32 ingress pref 111 prot 802.1Q \
+ flower vlan_id 111 action pass
+ tc filter add dev $ul32 ingress pref 222 prot 802.1Q \
+ flower vlan_id 222 action pass
+}
+
+sw3_destroy()
+{
+ tc qdisc del dev $ul32 clsact
+
+ ip route del vrf v$ul31 192.0.2.1/32
+ ip route del vrf v$ul31 192.0.2.2/32
+
+ vlan_destroy $ul32 222
+ vlan_destroy $ul32 111
+ __simple_if_fini $ul32
+ simple_if_fini $ul31 192.0.2.65/28
+}
+
+sw4_create()
+{
+ simple_if_init $ol4 203.0.113.1/24 2001:db8:2::1/64
+ __simple_if_init $ul4 v$ol4 192.0.2.66/28
+
+ tunnel_create g2 gre 192.0.2.2 192.0.2.1 tos inherit dev v$ol4
+ __simple_if_init g2 v$ol4 192.0.2.2/32
+ ip route add vrf v$ol4 192.0.2.1/32 via 192.0.2.65
+
+ ip route add vrf v$ol4 198.51.100.0/24 dev g2
+ ip -6 route add vrf v$ol4 2001:db8:1::/64 dev g2
+}
+
+sw4_destroy()
+{
+ ip -6 route del vrf v$ol4 2001:db8:1::/64
+ ip route del vrf v$ol4 198.51.100.0/24
+
+ ip route del vrf v$ol4 192.0.2.1/32
+ __simple_if_fini g2 192.0.2.2/32
+ tunnel_destroy g2
+
+ __simple_if_fini $ul4 192.0.2.66/28
+ simple_if_fini $ol4 203.0.113.1/24 2001:db8:2::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 203.0.113.2/24 2001:db8:2::2/64
+ ip route add vrf v$h2 default via 203.0.113.1 dev $h2
+ ip -6 route add vrf v$h2 default via 2001:db8:2::1 dev $h2
+}
+
+h2_destroy()
+{
+ ip -6 route del vrf v$h2 default
+ ip route del vrf v$h2 default
+ simple_if_fini $h2 203.0.113.2/24 2001:db8:2::2/64
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+
+ ol1=${NETIFS[p2]}
+ ul1=${NETIFS[p3]}
+
+ ul21=${NETIFS[p4]}
+ ul22=${NETIFS[p5]}
+
+ ul32=${NETIFS[p6]}
+ ul31=${NETIFS[p7]}
+
+ ul4=${NETIFS[p8]}
+ ol4=${NETIFS[p9]}
+
+ h2=${NETIFS[p10]}
+
+ vrf_prepare
+ h1_create
+ sw1_create
+ sw2_create
+ sw3_create
+ sw4_create
+ h2_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ h2_destroy
+ sw4_destroy
+ sw3_destroy
+ sw2_destroy
+ sw1_destroy
+ h1_destroy
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 203.0.113.2
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:2::2
+}
+
+send_src_ipv4()
+{
+ $MZ $h1 -q -p 64 -A "198.51.100.2-198.51.100.253" -B 203.0.113.2 \
+ -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_dst_ipv4()
+{
+ $MZ $h1 -q -p 64 -A 198.51.100.2 -B "203.0.113.2-203.0.113.253" \
+ -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_src_udp4()
+{
+ $MZ $h1 -q -p 64 -A 198.51.100.2 -B 203.0.113.2 \
+ -d 1msec -t udp "sp=0-32768,dp=30000"
+}
+
+send_dst_udp4()
+{
+ $MZ $h1 -q -p 64 -A 198.51.100.2 -B 203.0.113.2 \
+ -d 1msec -t udp "sp=20000,dp=0-32768"
+}
+
+send_src_ipv6()
+{
+ $MZ -6 $h1 -q -p 64 -A "2001:db8:1::2-2001:db8:1::fd" -B 2001:db8:2::2 \
+ -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_dst_ipv6()
+{
+ $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B "2001:db8:2::2-2001:db8:2::fd" \
+ -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_flowlabel()
+{
+ # Generate 16384 echo requests, each with a random flow label.
+ for _ in $(seq 1 16384); do
+ ip vrf exec v$h1 \
+ $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1
+ done
+}
+
+send_src_udp6()
+{
+ $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
+ -d 1msec -t udp "sp=0-32768,dp=30000"
+}
+
+send_dst_udp6()
+{
+ $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
+ -d 1msec -t udp "sp=20000,dp=0-32768"
+}
+
+custom_hash_test()
+{
+ local field="$1"; shift
+ local balanced="$1"; shift
+ local send_flows="$@"
+
+ RET=0
+
+ local t0_111=$(tc_rule_stats_get $ul32 111 ingress)
+ local t0_222=$(tc_rule_stats_get $ul32 222 ingress)
+
+ $send_flows
+
+ local t1_111=$(tc_rule_stats_get $ul32 111 ingress)
+ local t1_222=$(tc_rule_stats_get $ul32 222 ingress)
+
+ local d111=$((t1_111 - t0_111))
+ local d222=$((t1_222 - t0_222))
+
+ local diff=$((d222 - d111))
+ local sum=$((d111 + d222))
+
+ local pct=$(echo "$diff / $sum * 100" | bc -l)
+ local is_balanced=$(echo "-20 <= $pct && $pct <= 20" | bc)
+
+ [[ ( $is_balanced -eq 1 && $balanced == "balanced" ) ||
+ ( $is_balanced -eq 0 && $balanced == "unbalanced" ) ]]
+ check_err $? "Expected traffic to be $balanced, but it is not"
+
+ log_test "Multipath hash field: $field ($balanced)"
+ log_info "Packets sent on path1 / path2: $d111 / $d222"
+}
+
+custom_hash_v4()
+{
+ log_info "Running IPv4 overlay custom multipath hash tests"
+
+ # Prevent the neighbour table from overflowing, as different neighbour
+ # entries will be created on $ol4 when using different destination IPs.
+ sysctl_set net.ipv4.neigh.default.gc_thresh1 1024
+ sysctl_set net.ipv4.neigh.default.gc_thresh2 1024
+ sysctl_set net.ipv4.neigh.default.gc_thresh3 1024
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0040
+ custom_hash_test "Inner source IP" "balanced" send_src_ipv4
+ custom_hash_test "Inner source IP" "unbalanced" send_dst_ipv4
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0080
+ custom_hash_test "Inner destination IP" "balanced" send_dst_ipv4
+ custom_hash_test "Inner destination IP" "unbalanced" send_src_ipv4
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0400
+ custom_hash_test "Inner source port" "balanced" send_src_udp4
+ custom_hash_test "Inner source port" "unbalanced" send_dst_udp4
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0800
+ custom_hash_test "Inner destination port" "balanced" send_dst_udp4
+ custom_hash_test "Inner destination port" "unbalanced" send_src_udp4
+
+ sysctl_restore net.ipv4.neigh.default.gc_thresh3
+ sysctl_restore net.ipv4.neigh.default.gc_thresh2
+ sysctl_restore net.ipv4.neigh.default.gc_thresh1
+}
+
+custom_hash_v6()
+{
+ log_info "Running IPv6 overlay custom multipath hash tests"
+
+ # Prevent the neighbour table from overflowing, as different neighbour
+ # entries will be created on $ol4 when using different destination IPs.
+ sysctl_set net.ipv6.neigh.default.gc_thresh1 1024
+ sysctl_set net.ipv6.neigh.default.gc_thresh2 1024
+ sysctl_set net.ipv6.neigh.default.gc_thresh3 1024
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0040
+ custom_hash_test "Inner source IP" "balanced" send_src_ipv6
+ custom_hash_test "Inner source IP" "unbalanced" send_dst_ipv6
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0080
+ custom_hash_test "Inner destination IP" "balanced" send_dst_ipv6
+ custom_hash_test "Inner destination IP" "unbalanced" send_src_ipv6
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0200
+ custom_hash_test "Inner flowlabel" "balanced" send_flowlabel
+ custom_hash_test "Inner flowlabel" "unbalanced" send_src_ipv6
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0400
+ custom_hash_test "Inner source port" "balanced" send_src_udp6
+ custom_hash_test "Inner source port" "unbalanced" send_dst_udp6
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0800
+ custom_hash_test "Inner destination port" "balanced" send_dst_udp6
+ custom_hash_test "Inner destination port" "unbalanced" send_src_udp6
+
+ sysctl_restore net.ipv6.neigh.default.gc_thresh3
+ sysctl_restore net.ipv6.neigh.default.gc_thresh2
+ sysctl_restore net.ipv6.neigh.default.gc_thresh1
+}
+
+custom_hash()
+{
+ # Test that when the hash policy is set to custom, traffic is
+ # distributed only according to the fields set in the
+ # fib_multipath_hash_fields sysctl.
+ #
+ # Each time set a different field and make sure traffic is only
+ # distributed when the field is changed in the packet stream.
+
+ sysctl_set net.ipv4.fib_multipath_hash_policy 3
+
+ custom_hash_v4
+ custom_hash_v6
+
+ sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh
new file mode 100755
index 000000000000..8fea2c2e0b25
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh
@@ -0,0 +1,458 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test traffic distribution when there are multiple paths between an IPv6 GRE
+# tunnel. The tunnel carries IPv4 and IPv6 traffic between multiple hosts.
+# Multiple routes are in the underlay network. With the default multipath
+# policy, SW2 will only look at the outer IP addresses, hence only a single
+# route would be used.
+#
+# +--------------------------------+
+# | H1 |
+# | $h1 + |
+# | 198.51.100.{2-253}/24 | |
+# | 2001:db8:1::{2-fd}/64 | |
+# +-------------------------|------+
+# |
+# +-------------------------|-------------------+
+# | SW1 | |
+# | $ol1 + |
+# | 198.51.100.1/24 |
+# | 2001:db8:1::1/64 |
+# | |
+# |+ g1 (ip6gre) |
+# | loc=2001:db8:3::1 |
+# | rem=2001:db8:3::2 -. |
+# | tos=inherit | |
+# | v |
+# | + $ul1 |
+# | | 2001:db8:10::1/64 |
+# +---------------------|-----------------------+
+# |
+# +---------------------|-----------------------+
+# | SW2 | |
+# | $ul21 + |
+# | 2001:db8:10::2/64 | |
+# | | |
+# ! __________________+___ |
+# | / \ |
+# | | | |
+# | + $ul22.111 (vlan) + $ul22.222 (vlan) |
+# | | 2001:db8:11::1/64 | 2001:db8:12::1/64 |
+# | | | |
+# +--|----------------------|-------------------+
+# | |
+# +--|----------------------|-------------------+
+# | | | |
+# | + $ul32.111 (vlan) + $ul32.222 (vlan) |
+# | | 2001:db8:11::2/64 | 2001:db8:12::2/64 |
+# | | | |
+# | \__________________+___/ |
+# | | |
+# | | |
+# | $ul31 + |
+# | 2001:db8:13::1/64 | SW3 |
+# +---------------------|-----------------------+
+# |
+# +---------------------|-----------------------+
+# | + $ul4 |
+# | ^ 2001:db8:13::2/64 |
+# | | |
+# |+ g2 (ip6gre) | |
+# | loc=2001:db8:3::2 | |
+# | rem=2001:db8:3::1 -' |
+# | tos=inherit |
+# | |
+# | $ol4 + |
+# | 203.0.113.1/24 | |
+# | 2001:db8:2::1/64 | SW4 |
+# +-------------------------|-------------------+
+# |
+# +-------------------------|------+
+# | | |
+# | $h2 + |
+# | 203.0.113.{2-253}/24 |
+# | 2001:db8:2::{2-fd}/64 H2 |
+# +--------------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+ custom_hash
+"
+
+NUM_NETIFS=10
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 198.51.100.2/24 2001:db8:1::2/64
+ ip route add vrf v$h1 default via 198.51.100.1 dev $h1
+ ip -6 route add vrf v$h1 default via 2001:db8:1::1 dev $h1
+}
+
+h1_destroy()
+{
+ ip -6 route del vrf v$h1 default
+ ip route del vrf v$h1 default
+ simple_if_fini $h1 198.51.100.2/24 2001:db8:1::2/64
+}
+
+sw1_create()
+{
+ simple_if_init $ol1 198.51.100.1/24 2001:db8:1::1/64
+ __simple_if_init $ul1 v$ol1 2001:db8:10::1/64
+
+ tunnel_create g1 ip6gre 2001:db8:3::1 2001:db8:3::2 tos inherit \
+ dev v$ol1
+ __simple_if_init g1 v$ol1 2001:db8:3::1/128
+ ip route add vrf v$ol1 2001:db8:3::2/128 via 2001:db8:10::2
+
+ ip route add vrf v$ol1 203.0.113.0/24 dev g1
+ ip -6 route add vrf v$ol1 2001:db8:2::/64 dev g1
+}
+
+sw1_destroy()
+{
+ ip -6 route del vrf v$ol1 2001:db8:2::/64
+ ip route del vrf v$ol1 203.0.113.0/24
+
+ ip route del vrf v$ol1 2001:db8:3::2/128
+ __simple_if_fini g1 2001:db8:3::1/128
+ tunnel_destroy g1
+
+ __simple_if_fini $ul1 2001:db8:10::1/64
+ simple_if_fini $ol1 198.51.100.1/24 2001:db8:1::1/64
+}
+
+sw2_create()
+{
+ simple_if_init $ul21 2001:db8:10::2/64
+ __simple_if_init $ul22 v$ul21
+ vlan_create $ul22 111 v$ul21 2001:db8:11::1/64
+ vlan_create $ul22 222 v$ul21 2001:db8:12::1/64
+
+ ip -6 route add vrf v$ul21 2001:db8:3::1/128 via 2001:db8:10::1
+ ip -6 route add vrf v$ul21 2001:db8:3::2/128 \
+ nexthop via 2001:db8:11::2 \
+ nexthop via 2001:db8:12::2
+}
+
+sw2_destroy()
+{
+ ip -6 route del vrf v$ul21 2001:db8:3::2/128
+ ip -6 route del vrf v$ul21 2001:db8:3::1/128
+
+ vlan_destroy $ul22 222
+ vlan_destroy $ul22 111
+ __simple_if_fini $ul22
+ simple_if_fini $ul21 2001:db8:10::2/64
+}
+
+sw3_create()
+{
+ simple_if_init $ul31 2001:db8:13::1/64
+ __simple_if_init $ul32 v$ul31
+ vlan_create $ul32 111 v$ul31 2001:db8:11::2/64
+ vlan_create $ul32 222 v$ul31 2001:db8:12::2/64
+
+ ip -6 route add vrf v$ul31 2001:db8:3::2/128 via 2001:db8:13::2
+ ip -6 route add vrf v$ul31 2001:db8:3::1/128 \
+ nexthop via 2001:db8:11::1 \
+ nexthop via 2001:db8:12::1
+
+ tc qdisc add dev $ul32 clsact
+ tc filter add dev $ul32 ingress pref 111 prot 802.1Q \
+ flower vlan_id 111 action pass
+ tc filter add dev $ul32 ingress pref 222 prot 802.1Q \
+ flower vlan_id 222 action pass
+}
+
+sw3_destroy()
+{
+ tc qdisc del dev $ul32 clsact
+
+ ip -6 route del vrf v$ul31 2001:db8:3::1/128
+ ip -6 route del vrf v$ul31 2001:db8:3::2/128
+
+ vlan_destroy $ul32 222
+ vlan_destroy $ul32 111
+ __simple_if_fini $ul32
+ simple_if_fini $ul31 2001:db8:13::1/64
+}
+
+sw4_create()
+{
+ simple_if_init $ol4 203.0.113.1/24 2001:db8:2::1/64
+ __simple_if_init $ul4 v$ol4 2001:db8:13::2/64
+
+ tunnel_create g2 ip6gre 2001:db8:3::2 2001:db8:3::1 tos inherit \
+ dev v$ol4
+ __simple_if_init g2 v$ol4 2001:db8:3::2/128
+ ip -6 route add vrf v$ol4 2001:db8:3::1/128 via 2001:db8:13::1
+
+ ip route add vrf v$ol4 198.51.100.0/24 dev g2
+ ip -6 route add vrf v$ol4 2001:db8:1::/64 dev g2
+}
+
+sw4_destroy()
+{
+ ip -6 route del vrf v$ol4 2001:db8:1::/64
+ ip route del vrf v$ol4 198.51.100.0/24
+
+ ip -6 route del vrf v$ol4 2001:db8:3::1/128
+ __simple_if_fini g2 2001:db8:3::2/128
+ tunnel_destroy g2
+
+ __simple_if_fini $ul4 2001:db8:13::2/64
+ simple_if_fini $ol4 203.0.113.1/24 2001:db8:2::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 203.0.113.2/24 2001:db8:2::2/64
+ ip route add vrf v$h2 default via 203.0.113.1 dev $h2
+ ip -6 route add vrf v$h2 default via 2001:db8:2::1 dev $h2
+}
+
+h2_destroy()
+{
+ ip -6 route del vrf v$h2 default
+ ip route del vrf v$h2 default
+ simple_if_fini $h2 203.0.113.2/24 2001:db8:2::2/64
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+
+ ol1=${NETIFS[p2]}
+ ul1=${NETIFS[p3]}
+
+ ul21=${NETIFS[p4]}
+ ul22=${NETIFS[p5]}
+
+ ul32=${NETIFS[p6]}
+ ul31=${NETIFS[p7]}
+
+ ul4=${NETIFS[p8]}
+ ol4=${NETIFS[p9]}
+
+ h2=${NETIFS[p10]}
+
+ vrf_prepare
+ h1_create
+ sw1_create
+ sw2_create
+ sw3_create
+ sw4_create
+ h2_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ h2_destroy
+ sw4_destroy
+ sw3_destroy
+ sw2_destroy
+ sw1_destroy
+ h1_destroy
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 203.0.113.2
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:2::2
+}
+
+send_src_ipv4()
+{
+ $MZ $h1 -q -p 64 -A "198.51.100.2-198.51.100.253" -B 203.0.113.2 \
+ -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_dst_ipv4()
+{
+ $MZ $h1 -q -p 64 -A 198.51.100.2 -B "203.0.113.2-203.0.113.253" \
+ -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_src_udp4()
+{
+ $MZ $h1 -q -p 64 -A 198.51.100.2 -B 203.0.113.2 \
+ -d 1msec -t udp "sp=0-32768,dp=30000"
+}
+
+send_dst_udp4()
+{
+ $MZ $h1 -q -p 64 -A 198.51.100.2 -B 203.0.113.2 \
+ -d 1msec -t udp "sp=20000,dp=0-32768"
+}
+
+send_src_ipv6()
+{
+ $MZ -6 $h1 -q -p 64 -A "2001:db8:1::2-2001:db8:1::fd" -B 2001:db8:2::2 \
+ -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_dst_ipv6()
+{
+ $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B "2001:db8:2::2-2001:db8:2::fd" \
+ -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_flowlabel()
+{
+ # Generate 16384 echo requests, each with a random flow label.
+ for _ in $(seq 1 16384); do
+ ip vrf exec v$h1 \
+ $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1
+ done
+}
+
+send_src_udp6()
+{
+ $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
+ -d 1msec -t udp "sp=0-32768,dp=30000"
+}
+
+send_dst_udp6()
+{
+ $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
+ -d 1msec -t udp "sp=20000,dp=0-32768"
+}
+
+custom_hash_test()
+{
+ local field="$1"; shift
+ local balanced="$1"; shift
+ local send_flows="$@"
+
+ RET=0
+
+ local t0_111=$(tc_rule_stats_get $ul32 111 ingress)
+ local t0_222=$(tc_rule_stats_get $ul32 222 ingress)
+
+ $send_flows
+
+ local t1_111=$(tc_rule_stats_get $ul32 111 ingress)
+ local t1_222=$(tc_rule_stats_get $ul32 222 ingress)
+
+ local d111=$((t1_111 - t0_111))
+ local d222=$((t1_222 - t0_222))
+
+ local diff=$((d222 - d111))
+ local sum=$((d111 + d222))
+
+ local pct=$(echo "$diff / $sum * 100" | bc -l)
+ local is_balanced=$(echo "-20 <= $pct && $pct <= 20" | bc)
+
+ [[ ( $is_balanced -eq 1 && $balanced == "balanced" ) ||
+ ( $is_balanced -eq 0 && $balanced == "unbalanced" ) ]]
+ check_err $? "Expected traffic to be $balanced, but it is not"
+
+ log_test "Multipath hash field: $field ($balanced)"
+ log_info "Packets sent on path1 / path2: $d111 / $d222"
+}
+
+custom_hash_v4()
+{
+ log_info "Running IPv4 overlay custom multipath hash tests"
+
+ # Prevent the neighbour table from overflowing, as different neighbour
+ # entries will be created on $ol4 when using different destination IPs.
+ sysctl_set net.ipv4.neigh.default.gc_thresh1 1024
+ sysctl_set net.ipv4.neigh.default.gc_thresh2 1024
+ sysctl_set net.ipv4.neigh.default.gc_thresh3 1024
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0040
+ custom_hash_test "Inner source IP" "balanced" send_src_ipv4
+ custom_hash_test "Inner source IP" "unbalanced" send_dst_ipv4
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0080
+ custom_hash_test "Inner destination IP" "balanced" send_dst_ipv4
+ custom_hash_test "Inner destination IP" "unbalanced" send_src_ipv4
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0400
+ custom_hash_test "Inner source port" "balanced" send_src_udp4
+ custom_hash_test "Inner source port" "unbalanced" send_dst_udp4
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0800
+ custom_hash_test "Inner destination port" "balanced" send_dst_udp4
+ custom_hash_test "Inner destination port" "unbalanced" send_src_udp4
+
+ sysctl_restore net.ipv4.neigh.default.gc_thresh3
+ sysctl_restore net.ipv4.neigh.default.gc_thresh2
+ sysctl_restore net.ipv4.neigh.default.gc_thresh1
+}
+
+custom_hash_v6()
+{
+ log_info "Running IPv6 overlay custom multipath hash tests"
+
+ # Prevent the neighbour table from overflowing, as different neighbour
+ # entries will be created on $ol4 when using different destination IPs.
+ sysctl_set net.ipv6.neigh.default.gc_thresh1 1024
+ sysctl_set net.ipv6.neigh.default.gc_thresh2 1024
+ sysctl_set net.ipv6.neigh.default.gc_thresh3 1024
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0040
+ custom_hash_test "Inner source IP" "balanced" send_src_ipv6
+ custom_hash_test "Inner source IP" "unbalanced" send_dst_ipv6
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0080
+ custom_hash_test "Inner destination IP" "balanced" send_dst_ipv6
+ custom_hash_test "Inner destination IP" "unbalanced" send_src_ipv6
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0200
+ custom_hash_test "Inner flowlabel" "balanced" send_flowlabel
+ custom_hash_test "Inner flowlabel" "unbalanced" send_src_ipv6
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0400
+ custom_hash_test "Inner source port" "balanced" send_src_udp6
+ custom_hash_test "Inner source port" "unbalanced" send_dst_udp6
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0800
+ custom_hash_test "Inner destination port" "balanced" send_dst_udp6
+ custom_hash_test "Inner destination port" "unbalanced" send_src_udp6
+
+ sysctl_restore net.ipv6.neigh.default.gc_thresh3
+ sysctl_restore net.ipv6.neigh.default.gc_thresh2
+ sysctl_restore net.ipv6.neigh.default.gc_thresh1
+}
+
+custom_hash()
+{
+ # Test that when the hash policy is set to custom, traffic is
+ # distributed only according to the fields set in the
+ # fib_multipath_hash_fields sysctl.
+ #
+ # Each time set a different field and make sure traffic is only
+ # distributed when the field is changed in the packet stream.
+
+ sysctl_set net.ipv6.fib_multipath_hash_policy 3
+
+ custom_hash_v4
+ custom_hash_v6
+
+ sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/pedit_dsfield.sh b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh
index 55eeacf59241..64fbd211d907 100755
--- a/tools/testing/selftests/net/forwarding/pedit_dsfield.sh
+++ b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh
@@ -75,7 +75,9 @@ switch_destroy()
tc qdisc del dev $swp2 clsact
tc qdisc del dev $swp1 clsact
+ ip link set dev $swp2 down
ip link set dev $swp2 nomaster
+ ip link set dev $swp1 down
ip link set dev $swp1 nomaster
ip link del dev br1
}
diff --git a/tools/testing/selftests/net/forwarding/pedit_l4port.sh b/tools/testing/selftests/net/forwarding/pedit_l4port.sh
index 5f20d289ee43..10e594c55117 100755
--- a/tools/testing/selftests/net/forwarding/pedit_l4port.sh
+++ b/tools/testing/selftests/net/forwarding/pedit_l4port.sh
@@ -71,7 +71,9 @@ switch_destroy()
tc qdisc del dev $swp2 clsact
tc qdisc del dev $swp1 clsact
+ ip link set dev $swp2 down
ip link set dev $swp2 nomaster
+ ip link set dev $swp1 down
ip link set dev $swp1 nomaster
ip link del dev br1
}
diff --git a/tools/testing/selftests/net/forwarding/skbedit_priority.sh b/tools/testing/selftests/net/forwarding/skbedit_priority.sh
index e3bd8a6bb8b4..bde11dc27873 100755
--- a/tools/testing/selftests/net/forwarding/skbedit_priority.sh
+++ b/tools/testing/selftests/net/forwarding/skbedit_priority.sh
@@ -72,7 +72,9 @@ switch_destroy()
tc qdisc del dev $swp2 clsact
tc qdisc del dev $swp1 clsact
+ ip link set dev $swp2 down
ip link set dev $swp2 nomaster
+ ip link set dev $swp1 down
ip link set dev $swp1 nomaster
ip link del dev br1
}
diff --git a/tools/testing/selftests/net/icmp.sh b/tools/testing/selftests/net/icmp.sh
new file mode 100755
index 000000000000..e4b04cd1644a
--- /dev/null
+++ b/tools/testing/selftests/net/icmp.sh
@@ -0,0 +1,74 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for checking ICMP response with dummy address instead of 0.0.0.0.
+# Sets up two namespaces like:
+# +----------------------+ +--------------------+
+# | ns1 | v4-via-v6 routes: | ns2 |
+# | | ' | |
+# | +--------+ -> 172.16.1.0/24 -> +--------+ |
+# | | veth0 +--------------------------+ veth0 | |
+# | +--------+ <- 172.16.0.0/24 <- +--------+ |
+# | 172.16.0.1 | | 2001:db8:1::2/64 |
+# | 2001:db8:1::2/64 | | |
+# +----------------------+ +--------------------+
+#
+# And then tries to ping 172.16.1.1 from ns1. This results in a "net
+# unreachable" message being sent from ns2, but there is no IPv4 address set in
+# that address space, so the kernel should substitute the dummy address
+# 192.0.0.8 defined in RFC7600.
+
+NS1=ns1
+NS2=ns2
+H1_IP=172.16.0.1/32
+H1_IP6=2001:db8:1::1
+RT1=172.16.1.0/24
+PINGADDR=172.16.1.1
+RT2=172.16.0.0/24
+H2_IP6=2001:db8:1::2
+
+TMPFILE=$(mktemp)
+
+cleanup()
+{
+ rm -f "$TMPFILE"
+ ip netns del $NS1
+ ip netns del $NS2
+}
+
+trap cleanup EXIT
+
+# Namespaces
+ip netns add $NS1
+ip netns add $NS2
+
+# Connectivity
+ip -netns $NS1 link add veth0 type veth peer name veth0 netns $NS2
+ip -netns $NS1 link set dev veth0 up
+ip -netns $NS2 link set dev veth0 up
+ip -netns $NS1 addr add $H1_IP dev veth0
+ip -netns $NS1 addr add $H1_IP6/64 dev veth0 nodad
+ip -netns $NS2 addr add $H2_IP6/64 dev veth0 nodad
+ip -netns $NS1 route add $RT1 via inet6 $H2_IP6
+ip -netns $NS2 route add $RT2 via inet6 $H1_IP6
+
+# Make sure ns2 will respond with ICMP unreachable
+ip netns exec $NS2 sysctl -qw net.ipv4.icmp_ratelimit=0 net.ipv4.ip_forward=1
+
+# Run the test - a ping runs in the background, and we capture ICMP responses
+# with tcpdump; -c 1 means it should exit on the first ping, but add a timeout
+# in case something goes wrong
+ip netns exec $NS1 ping -w 3 -i 0.5 $PINGADDR >/dev/null &
+ip netns exec $NS1 timeout 10 tcpdump -tpni veth0 -c 1 'icmp and icmp[icmptype] != icmp-echo' > $TMPFILE 2>/dev/null
+
+# Parse response and check for dummy address
+# tcpdump output looks like:
+# IP 192.0.0.8 > 172.16.0.1: ICMP net 172.16.1.1 unreachable, length 92
+RESP_IP=$(awk '{print $2}' < $TMPFILE)
+if [[ "$RESP_IP" != "192.0.0.8" ]]; then
+ echo "FAIL - got ICMP response from $RESP_IP, should be 192.0.0.8"
+ exit 1
+else
+ echo "OK"
+ exit 0
+fi
diff --git a/tools/testing/selftests/net/icmp_redirect.sh b/tools/testing/selftests/net/icmp_redirect.sh
index bf361f30d6ef..c19ecc6a8614 100755
--- a/tools/testing/selftests/net/icmp_redirect.sh
+++ b/tools/testing/selftests/net/icmp_redirect.sh
@@ -63,10 +63,14 @@ log_test()
local rc=$1
local expected=$2
local msg="$3"
+ local xfail=$4
if [ ${rc} -eq ${expected} ]; then
printf "TEST: %-60s [ OK ]\n" "${msg}"
nsuccess=$((nsuccess+1))
+ elif [ ${rc} -eq ${xfail} ]; then
+ printf "TEST: %-60s [XFAIL]\n" "${msg}"
+ nxfail=$((nxfail+1))
else
ret=1
nfail=$((nfail+1))
@@ -322,7 +326,7 @@ check_exception()
ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \
grep -v "mtu" | grep -q "${R1_LLADDR}"
fi
- log_test $? 0 "IPv6: ${desc}"
+ log_test $? 0 "IPv6: ${desc}" 1
}
run_ping()
@@ -488,6 +492,7 @@ which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
ret=0
nsuccess=0
nfail=0
+nxfail=0
while getopts :pv o
do
@@ -532,5 +537,6 @@ fi
printf "\nTests passed: %3d\n" ${nsuccess}
printf "Tests failed: %3d\n" ${nfail}
+printf "Tests xfailed: %3d\n" ${nxfail}
exit $ret
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index d88e1fdfb147..89c4753c2760 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -6,6 +6,7 @@
#include <limits.h>
#include <fcntl.h>
#include <string.h>
+#include <stdarg.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
@@ -25,6 +26,7 @@
#include <netinet/in.h>
#include <linux/tcp.h>
+#include <linux/time_types.h>
extern int optind;
@@ -66,6 +68,13 @@ static unsigned int cfg_do_w;
static int cfg_wait;
static uint32_t cfg_mark;
+struct cfg_cmsg_types {
+ unsigned int cmsg_enabled:1;
+ unsigned int timestampns:1;
+};
+
+static struct cfg_cmsg_types cfg_cmsg_types;
+
static void die_usage(void)
{
fprintf(stderr, "Usage: mptcp_connect [-6] [-u] [-s MPTCP|TCP] [-p port] [-m mode]"
@@ -80,11 +89,22 @@ static void die_usage(void)
fprintf(stderr, "\t-M mark -- set socket packet mark\n");
fprintf(stderr, "\t-u -- check mptcp ulp\n");
fprintf(stderr, "\t-w num -- wait num sec before closing the socket\n");
+ fprintf(stderr, "\t-c cmsg -- test cmsg type <cmsg>\n");
fprintf(stderr,
"\t-P [saveWithPeek|saveAfterPeek] -- save data with/after MSG_PEEK form tcp socket\n");
exit(1);
}
+static void xerror(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ exit(1);
+}
+
static void handle_signal(int nr)
{
quit = true;
@@ -338,6 +358,58 @@ static size_t do_write(const int fd, char *buf, const size_t len)
return offset;
}
+static void process_cmsg(struct msghdr *msgh)
+{
+ struct __kernel_timespec ts;
+ bool ts_found = false;
+ struct cmsghdr *cmsg;
+
+ for (cmsg = CMSG_FIRSTHDR(msgh); cmsg ; cmsg = CMSG_NXTHDR(msgh, cmsg)) {
+ if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SO_TIMESTAMPNS_NEW) {
+ memcpy(&ts, CMSG_DATA(cmsg), sizeof(ts));
+ ts_found = true;
+ continue;
+ }
+ }
+
+ if (cfg_cmsg_types.timestampns) {
+ if (!ts_found)
+ xerror("TIMESTAMPNS not present\n");
+ }
+}
+
+static ssize_t do_recvmsg_cmsg(const int fd, char *buf, const size_t len)
+{
+ char msg_buf[8192];
+ struct iovec iov = {
+ .iov_base = buf,
+ .iov_len = len,
+ };
+ struct msghdr msg = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = msg_buf,
+ .msg_controllen = sizeof(msg_buf),
+ };
+ int flags = 0;
+ int ret = recvmsg(fd, &msg, flags);
+
+ if (ret <= 0)
+ return ret;
+
+ if (msg.msg_controllen && !cfg_cmsg_types.cmsg_enabled)
+ xerror("got %lu bytes of cmsg data, expected 0\n",
+ (unsigned long)msg.msg_controllen);
+
+ if (msg.msg_controllen == 0 && cfg_cmsg_types.cmsg_enabled)
+ xerror("%s\n", "got no cmsg data");
+
+ if (msg.msg_controllen)
+ process_cmsg(&msg);
+
+ return ret;
+}
+
static ssize_t do_rnd_read(const int fd, char *buf, const size_t len)
{
int ret = 0;
@@ -357,6 +429,8 @@ static ssize_t do_rnd_read(const int fd, char *buf, const size_t len)
} else if (cfg_peek == CFG_AFTER_PEEK) {
ret = recv(fd, buf, cap, MSG_PEEK);
ret = (ret < 0) ? ret : read(fd, buf, cap);
+ } else if (cfg_cmsg_types.cmsg_enabled) {
+ ret = do_recvmsg_cmsg(fd, buf, cap);
} else {
ret = read(fd, buf, cap);
}
@@ -786,6 +860,48 @@ static void init_rng(void)
srand(foo);
}
+static void xsetsockopt(int fd, int level, int optname, const void *optval, socklen_t optlen)
+{
+ int err;
+
+ err = setsockopt(fd, level, optname, optval, optlen);
+ if (err) {
+ perror("setsockopt");
+ exit(1);
+ }
+}
+
+static void apply_cmsg_types(int fd, const struct cfg_cmsg_types *cmsg)
+{
+ static const unsigned int on = 1;
+
+ if (cmsg->timestampns)
+ xsetsockopt(fd, SOL_SOCKET, SO_TIMESTAMPNS_NEW, &on, sizeof(on));
+}
+
+static void parse_cmsg_types(const char *type)
+{
+ char *next = strchr(type, ',');
+ unsigned int len = 0;
+
+ cfg_cmsg_types.cmsg_enabled = 1;
+
+ if (next) {
+ parse_cmsg_types(next + 1);
+ len = next - type;
+ } else {
+ len = strlen(type);
+ }
+
+ if (strncmp(type, "TIMESTAMPNS", len) == 0) {
+ cfg_cmsg_types.timestampns = 1;
+ return;
+ }
+
+ fprintf(stderr, "Unrecognized cmsg option %s\n", type);
+ exit(1);
+}
+
int main_loop(void)
{
int fd;
@@ -801,6 +917,8 @@ int main_loop(void)
set_rcvbuf(fd, cfg_rcvbuf);
if (cfg_sndbuf)
set_sndbuf(fd, cfg_sndbuf);
+ if (cfg_cmsg_types.cmsg_enabled)
+ apply_cmsg_types(fd, &cfg_cmsg_types);
return copyfd_io(0, fd, 1);
}
@@ -887,7 +1005,7 @@ static void parse_opts(int argc, char **argv)
{
int c;
- while ((c = getopt(argc, argv, "6jr:lp:s:hut:m:S:R:w:M:P:")) != -1) {
+ while ((c = getopt(argc, argv, "6jr:lp:s:hut:m:S:R:w:M:P:c:")) != -1) {
switch (c) {
case 'j':
cfg_join = true;
@@ -943,6 +1061,9 @@ static void parse_opts(int argc, char **argv)
case 'P':
cfg_peek = parse_peek(optarg);
break;
+ case 'c':
+ parse_cmsg_types(optarg);
+ break;
}
}
@@ -976,6 +1097,8 @@ int main(int argc, char *argv[])
set_sndbuf(fd, cfg_sndbuf);
if (cfg_mark)
set_mark(fd, cfg_mark);
+ if (cfg_cmsg_types.cmsg_enabled)
+ apply_cmsg_types(fd, &cfg_cmsg_types);
return main_loop_s(fd);
}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 3c4cb72ed8a4..559173a8e387 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -3,7 +3,7 @@
time_start=$(date +%s)
-optstring="S:R:d:e:l:r:h4cm:f:t"
+optstring="S:R:d:e:l:r:h4cm:f:tC"
ret=0
sin=""
sout=""
@@ -22,6 +22,7 @@ sndbuf=0
rcvbuf=0
options_log=true
do_tcp=0
+checksum=false
filesize=0
if [ $tc_loss -eq 100 ];then
@@ -47,6 +48,7 @@ usage() {
echo -e "\t-R: set rcvbuf value (default: use kernel default)"
echo -e "\t-m: test mode (poll, sendfile; default: poll)"
echo -e "\t-t: also run tests with TCP (use twice to non-fallback tcp)"
+ echo -e "\t-C: enable the MPTCP data checksum"
}
while getopts "$optstring" option;do
@@ -104,6 +106,9 @@ while getopts "$optstring" option;do
"t")
do_tcp=$((do_tcp+1))
;;
+ "C")
+ checksum=true
+ ;;
"?")
usage $0
exit 1
@@ -197,8 +202,11 @@ ip -net "$ns4" link set ns4eth3 up
ip -net "$ns4" route add default via 10.0.3.2
ip -net "$ns4" route add default via dead:beef:3::2
-# use TCP syn cookies, even if no flooding was detected.
-ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=2
+if $checksum; then
+ for i in "$ns1" "$ns2" "$ns3" "$ns4";do
+ ip netns exec $i sysctl -q net.mptcp.checksum_enabled=1
+ done
+fi
set_ethtool_flags() {
local ns="$1"
@@ -501,6 +509,7 @@ do_transfer()
local stat_ackrx_now_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
local stat_cookietx_now=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesSent")
local stat_cookierx_now=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesRecv")
+ local stat_ooo_now=$(get_mib_counter "${listener_ns}" "TcpExtTCPOFOQueue")
expect_synrx=$((stat_synrx_last_l))
expect_ackrx=$((stat_ackrx_last_l))
@@ -518,10 +527,14 @@ do_transfer()
"${stat_synrx_now_l}" "${expect_synrx}" 1>&2
retc=1
fi
- if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} ]; then
- printf "[ FAIL ] lower MPC ACK rx (%d) than expected (%d)\n" \
- "${stat_ackrx_now_l}" "${expect_ackrx}" 1>&2
- rets=1
+ if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} -a ${stat_ooo_now} -eq 0 ]; then
+ if [ ${stat_ooo_now} -eq 0 ]; then
+ printf "[ FAIL ] lower MPC ACK rx (%d) than expected (%d)\n" \
+ "${stat_ackrx_now_l}" "${expect_ackrx}" 1>&2
+ rets=1
+ else
+ printf "[ Note ] fallback due to TCP OoO"
+ fi
fi
if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then
@@ -667,6 +680,25 @@ run_tests_peekmode()
run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 "-P ${peekmode}"
}
+display_time()
+{
+ time_end=$(date +%s)
+ time_run=$((time_end-time_start))
+
+ echo "Time: ${time_run} seconds"
+}
+
+stop_if_error()
+{
+ local msg="$1"
+
+ if [ ${ret} -ne 0 ]; then
+ echo "FAIL: ${msg}" 1>&2
+ display_time
+ exit ${ret}
+ fi
+}
+
make_file "$cin" "client"
make_file "$sin" "server"
@@ -674,6 +706,8 @@ check_mptcp_disabled
check_mptcp_ulp_setsockopt
+stop_if_error "The kernel configuration is not valid for MPTCP"
+
echo "INFO: validating network environment with pings"
for sender in "$ns1" "$ns2" "$ns3" "$ns4";do
do_ping "$ns1" $sender 10.0.1.1
@@ -693,6 +727,8 @@ for sender in "$ns1" "$ns2" "$ns3" "$ns4";do
do_ping "$ns4" $sender dead:beef:3::1
done
+stop_if_error "Could not even run ping tests"
+
[ -n "$tc_loss" ] && tc -net "$ns2" qdisc add dev ns2eth3 root netem loss random $tc_loss delay ${tc_delay}ms
echo -n "INFO: Using loss of $tc_loss "
test "$tc_delay" -gt 0 && echo -n "delay $tc_delay ms "
@@ -720,18 +756,24 @@ echo "on ns3eth4"
tc -net "$ns3" qdisc add dev ns3eth4 root netem delay ${reorder_delay}ms $tc_reorder
+run_tests_lo "$ns1" "$ns1" 10.0.1.1 1
+stop_if_error "Could not even run loopback test"
+
+run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1
+stop_if_error "Could not even run loopback v6 test"
+
for sender in $ns1 $ns2 $ns3 $ns4;do
- run_tests_lo "$ns1" "$sender" 10.0.1.1 1
- if [ $ret -ne 0 ] ;then
- echo "FAIL: Could not even run loopback test" 1>&2
- exit $ret
- fi
- run_tests_lo "$ns1" $sender dead:beef:1::1 1
- if [ $ret -ne 0 ] ;then
- echo "FAIL: Could not even run loopback v6 test" 2>&1
- exit $ret
+ # ns1<->ns2 is not subject to reordering/tc delays. Use it to test
+ # mptcp syncookie support.
+ if [ $sender = $ns1 ]; then
+ ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=2
+ else
+ ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=1
fi
+ run_tests "$ns1" $sender 10.0.1.1
+ run_tests "$ns1" $sender dead:beef:1::1
+
run_tests "$ns2" $sender 10.0.1.2
run_tests "$ns2" $sender dead:beef:1::2
run_tests "$ns2" $sender 10.0.2.1
@@ -744,14 +786,13 @@ for sender in $ns1 $ns2 $ns3 $ns4;do
run_tests "$ns4" $sender 10.0.3.1
run_tests "$ns4" $sender dead:beef:3::1
+
+ stop_if_error "Tests with $sender as a sender have failed"
done
run_tests_peekmode "saveWithPeek"
run_tests_peekmode "saveAfterPeek"
+stop_if_error "Tests with peek mode have failed"
-time_end=$(date +%s)
-time_run=$((time_end-time_start))
-
-echo "Time: ${time_run} seconds"
-
+display_time
exit $ret
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index fd99485cf2a4..9a191c1a5de8 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -12,6 +12,7 @@ timeout_poll=30
timeout_test=$((timeout_poll * 2 + 1))
mptcp_connect=""
capture=0
+checksum=0
do_all_tests=1
TEST_COUNT=0
@@ -49,6 +50,9 @@ init()
ip netns exec $netns sysctl -q net.mptcp.enabled=1
ip netns exec $netns sysctl -q net.ipv4.conf.all.rp_filter=0
ip netns exec $netns sysctl -q net.ipv4.conf.default.rp_filter=0
+ if [ $checksum -eq 1 ]; then
+ ip netns exec $netns sysctl -q net.mptcp.checksum_enabled=1
+ fi
done
# ns1 ns2
@@ -124,6 +128,28 @@ reset_with_add_addr_timeout()
-j DROP
}
+reset_with_checksum()
+{
+ local ns1_enable=$1
+ local ns2_enable=$2
+
+ reset
+
+ ip netns exec $ns1 sysctl -q net.mptcp.checksum_enabled=$ns1_enable
+ ip netns exec $ns2 sysctl -q net.mptcp.checksum_enabled=$ns2_enable
+}
+
+reset_with_allow_join_id0()
+{
+ local ns1_enable=$1
+ local ns2_enable=$2
+
+ reset
+
+ ip netns exec $ns1 sysctl -q net.mptcp.allow_join_initial_addr_port=$ns1_enable
+ ip netns exec $ns2 sysctl -q net.mptcp.allow_join_initial_addr_port=$ns2_enable
+}
+
ip -Version > /dev/null 2>&1
if [ $? -ne 0 ];then
echo "SKIP: Could not run test without ip tool"
@@ -476,6 +502,45 @@ run_tests()
fi
}
+chk_csum_nr()
+{
+ local msg=${1:-""}
+ local count
+ local dump_stats
+
+ if [ ! -z "$msg" ]; then
+ printf "%02u" "$TEST_COUNT"
+ else
+ echo -n " "
+ fi
+ printf " %-36s %s" "$msg" "sum"
+ count=`ip netns exec $ns1 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}'`
+ [ -z "$count" ] && count=0
+ if [ "$count" != 0 ]; then
+ echo "[fail] got $count data checksum error[s] expected 0"
+ ret=1
+ dump_stats=1
+ else
+ echo -n "[ ok ]"
+ fi
+ echo -n " - csum "
+ count=`ip netns exec $ns2 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}'`
+ [ -z "$count" ] && count=0
+ if [ "$count" != 0 ]; then
+ echo "[fail] got $count data checksum error[s] expected 0"
+ ret=1
+ dump_stats=1
+ else
+ echo "[ ok ]"
+ fi
+ if [ "${dump_stats}" = 1 ]; then
+ echo Server ns stats
+ ip netns exec $ns1 nstat -as | grep MPTcp
+ echo Client ns stats
+ ip netns exec $ns2 nstat -as | grep MPTcp
+ fi
+}
+
chk_join_nr()
{
local msg="$1"
@@ -523,6 +588,9 @@ chk_join_nr()
echo Client ns stats
ip netns exec $ns2 nstat -as | grep MPTcp
fi
+ if [ $checksum -eq 1 ]; then
+ chk_csum_nr
+ fi
}
chk_add_nr()
@@ -1374,6 +1442,94 @@ syncookies_tests()
chk_add_nr 1 1
}
+checksum_tests()
+{
+ # checksum test 0 0
+ reset_with_checksum 0 0
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_csum_nr "checksum test 0 0"
+
+ # checksum test 1 1
+ reset_with_checksum 1 1
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_csum_nr "checksum test 1 1"
+
+ # checksum test 0 1
+ reset_with_checksum 0 1
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_csum_nr "checksum test 0 1"
+
+ # checksum test 1 0
+ reset_with_checksum 1 0
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_csum_nr "checksum test 1 0"
+}
+
+deny_join_id0_tests()
+{
+ # subflow allow join id0 ns1
+ reset_with_allow_join_id0 1 0
+ ip netns exec $ns1 ./pm_nl_ctl limits 1 1
+ ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr "single subflow allow join id0 ns1" 1 1 1
+
+ # subflow allow join id0 ns2
+ reset_with_allow_join_id0 0 1
+ ip netns exec $ns1 ./pm_nl_ctl limits 1 1
+ ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr "single subflow allow join id0 ns2" 0 0 0
+
+ # signal address allow join id0 ns1
+ # ADD_ADDRs are not affected by allow_join_id0 value.
+ reset_with_allow_join_id0 1 0
+ ip netns exec $ns1 ./pm_nl_ctl limits 1 1
+ ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr "signal address allow join id0 ns1" 1 1 1
+ chk_add_nr 1 1
+
+ # signal address allow join id0 ns2
+ # ADD_ADDRs are not affected by allow_join_id0 value.
+ reset_with_allow_join_id0 0 1
+ ip netns exec $ns1 ./pm_nl_ctl limits 1 1
+ ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr "signal address allow join id0 ns2" 1 1 1
+ chk_add_nr 1 1
+
+ # subflow and address allow join id0 ns1
+ reset_with_allow_join_id0 1 0
+ ip netns exec $ns1 ./pm_nl_ctl limits 2 2
+ ip netns exec $ns2 ./pm_nl_ctl limits 2 2
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr "subflow and address allow join id0 1" 2 2 2
+
+ # subflow and address allow join id0 ns2
+ reset_with_allow_join_id0 0 1
+ ip netns exec $ns1 ./pm_nl_ctl limits 2 2
+ ip netns exec $ns2 ./pm_nl_ctl limits 2 2
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr "subflow and address allow join id0 2" 1 1 1
+}
+
all_tests()
{
subflows_tests
@@ -1387,6 +1543,8 @@ all_tests()
backup_tests
add_addr_ports_tests
syncookies_tests
+ checksum_tests
+ deny_join_id0_tests
}
usage()
@@ -1403,7 +1561,10 @@ usage()
echo " -b backup_tests"
echo " -p add_addr_ports_tests"
echo " -k syncookies_tests"
+ echo " -S checksum_tests"
+ echo " -d deny_join_id0_tests"
echo " -c capture pcap files"
+ echo " -C enable data checksum"
echo " -h help"
}
@@ -1418,13 +1579,16 @@ make_file "$sin" "server" 1
trap cleanup EXIT
for arg in "$@"; do
- # check for "capture" arg before launching tests
+ # check for "capture/checksum" args before launching tests
if [[ "${arg}" =~ ^"-"[0-9a-zA-Z]*"c"[0-9a-zA-Z]*$ ]]; then
capture=1
fi
+ if [[ "${arg}" =~ ^"-"[0-9a-zA-Z]*"C"[0-9a-zA-Z]*$ ]]; then
+ checksum=1
+ fi
- # exception for the capture option, the rest means: a part of the tests
- if [ "${arg}" != "-c" ]; then
+ # exception for the capture/checksum options, the rest means: a part of the tests
+ if [ "${arg}" != "-c" ] && [ "${arg}" != "-C" ]; then
do_all_tests=0
fi
done
@@ -1434,7 +1598,7 @@ if [ $do_all_tests -eq 1 ]; then
exit $ret
fi
-while getopts 'fsltra64bpkch' opt; do
+while getopts 'fsltra64bpkdchCS' opt; do
case $opt in
f)
subflows_tests
@@ -1469,8 +1633,16 @@ while getopts 'fsltra64bpkch' opt; do
k)
syncookies_tests
;;
+ S)
+ checksum_tests
+ ;;
+ d)
+ deny_join_id0_tests
+ ;;
c)
;;
+ C)
+ ;;
h | *)
usage
;;
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
index 2fa13946ac04..1579e471a5e7 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
@@ -178,7 +178,7 @@ do_transfer()
timeout ${timeout_test} \
ip netns exec ${listener_ns} \
- $mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} \
+ $mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} -c TIMESTAMPNS \
${local_addr} < "$sin" > "$sout" &
spid=$!
@@ -186,7 +186,7 @@ do_transfer()
timeout ${timeout_test} \
ip netns exec ${connector_ns} \
- $mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} \
+ $mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} -c TIMESTAMPNS \
$connect_addr < "$cin" > "$cout" &
cpid=$!
diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
index 3aeef3bcb101..fd63ebfe9a2b 100755
--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -60,6 +60,8 @@ setup()
for i in "$ns1" "$ns2" "$ns3";do
ip netns add $i || exit $ksft_skip
ip -net $i link set lo up
+ ip netns exec $i sysctl -q net.ipv4.conf.all.rp_filter=0
+ ip netns exec $i sysctl -q net.ipv4.conf.default.rp_filter=0
done
ip link add ns1eth1 netns "$ns1" type veth peer name ns2eth1 netns "$ns2"
@@ -80,7 +82,6 @@ setup()
ip netns exec "$ns1" ./pm_nl_ctl limits 1 1
ip netns exec "$ns1" ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags subflow
- ip netns exec "$ns1" sysctl -q net.ipv4.conf.all.rp_filter=0
ip -net "$ns2" addr add 10.0.1.2/24 dev ns2eth1
ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad
diff --git a/tools/testing/selftests/net/so_netns_cookie.c b/tools/testing/selftests/net/so_netns_cookie.c
new file mode 100644
index 000000000000..b39e87e967cd
--- /dev/null
+++ b/tools/testing/selftests/net/so_netns_cookie.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <sched.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#ifndef SO_NETNS_COOKIE
+#define SO_NETNS_COOKIE 71
+#endif
+
+#define pr_err(fmt, ...) \
+ ({ \
+ fprintf(stderr, "%s:%d:" fmt ": %m\n", \
+ __func__, __LINE__, ##__VA_ARGS__); \
+ 1; \
+ })
+
+int main(int argc, char *argvp[])
+{
+ uint64_t cookie1, cookie2;
+ socklen_t vallen;
+ int sock1, sock2;
+
+ sock1 = socket(AF_INET, SOCK_STREAM, 0);
+ if (sock1 < 0)
+ return pr_err("Unable to create TCP socket");
+
+ vallen = sizeof(cookie1);
+ if (getsockopt(sock1, SOL_SOCKET, SO_NETNS_COOKIE, &cookie1, &vallen) != 0)
+ return pr_err("getsockopt(SOL_SOCKET, SO_NETNS_COOKIE)");
+
+ if (!cookie1)
+ return pr_err("SO_NETNS_COOKIE returned zero cookie");
+
+ if (unshare(CLONE_NEWNET))
+ return pr_err("unshare");
+
+ sock2 = socket(AF_INET, SOCK_STREAM, 0);
+ if (sock2 < 0)
+ return pr_err("Unable to create TCP socket");
+
+ vallen = sizeof(cookie2);
+ if (getsockopt(sock2, SOL_SOCKET, SO_NETNS_COOKIE, &cookie2, &vallen) != 0)
+ return pr_err("getsockopt(SOL_SOCKET, SO_NETNS_COOKIE)");
+
+ if (!cookie2)
+ return pr_err("SO_NETNS_COOKIE returned zero cookie");
+
+ if (cookie1 == cookie2)
+ return pr_err("SO_NETNS_COOKIE returned identical cookies for distinct ns");
+
+ close(sock1);
+ close(sock2);
+ return 0;
+}
diff --git a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
new file mode 100755
index 000000000000..75ada17ac061
--- /dev/null
+++ b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
@@ -0,0 +1,573 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# author: Andrea Mayer <andrea.mayer@uniroma2.it>
+# author: Paolo Lungaroni <paolo.lungaroni@uniroma2.it>
+
+# This test is designed for evaluating the new SRv6 End.DT46 Behavior used for
+# implementing IPv4/IPv6 L3 VPN use cases.
+#
+# The current SRv6 code in the Linux kernel only implements SRv6 End.DT4 and
+# End.DT6 Behaviors which can be used respectively to support IPv4-in-IPv6 and
+# IPv6-in-IPv6 VPNs. With End.DT4 and End.DT6 it is not possible to create a
+# single SRv6 VPN tunnel to carry both IPv4 and IPv6 traffic.
+# The SRv6 End.DT46 Behavior implementation is meant to support the
+# decapsulation of IPv4 and IPv6 traffic coming from a single SRv6 tunnel.
+# Therefore, the SRv6 End.DT46 Behavior in the Linux kernel greatly simplifies
+# the setup and operations of SRv6 VPNs.
+#
+# Hereafter a network diagram is shown, where two different tenants (named 100
+# and 200) offer IPv4/IPv6 L3 VPN services allowing hosts to communicate with
+# each other across an IPv6 network.
+#
+# Only hosts belonging to the same tenant (and to the same VPN) can communicate
+# with each other. Instead, the communication among hosts of different tenants
+# is forbidden.
+# In other words, hosts hs-t100-1 and hs-t100-2 are connected through the
+# IPv4/IPv6 L3 VPN of tenant 100 while hs-t200-3 and hs-t200-4 are connected
+# using the IPv4/IPv6 L3 VPN of tenant 200. Cross connection between tenant 100
+# and tenant 200 is forbidden and thus, for example, hs-t100-1 cannot reach
+# hs-t200-3 and vice versa.
+#
+# Routers rt-1 and rt-2 implement IPv4/IPv6 L3 VPN services leveraging the SRv6
+# architecture. The key components for such VPNs are: a) SRv6 Encap behavior,
+# b) SRv6 End.DT46 Behavior and c) VRF.
+#
+# To explain how an IPv4/IPv6 L3 VPN based on SRv6 works, let us briefly
+# consider an example where, within the same domain of tenant 100, the host
+# hs-t100-1 pings the host hs-t100-2.
+#
+# First of all, L2 reachability of the host hs-t100-2 is taken into account by
+# the router rt-1 which acts as a arp/ndp proxy.
+#
+# When the host hs-t100-1 sends an IPv6 or IPv4 packet destined to hs-t100-2,
+# the router rt-1 receives the packet on the internal veth-t100 interface. Such
+# interface is enslaved to the VRF vrf-100 whose associated table contains the
+# SRv6 Encap route for encapsulating any IPv6 or IPv4 packet in a IPv6 plus the
+# Segment Routing Header (SRH) packet. This packet is sent through the (IPv6)
+# core network up to the router rt-2 that receives it on veth0 interface.
+#
+# The rt-2 router uses the 'localsid' routing table to process incoming
+# IPv6+SRH packets which belong to the VPN of the tenant 100. For each of these
+# packets, the SRv6 End.DT46 Behavior removes the outer IPv6+SRH headers and
+# performs the lookup on the vrf-100 table using the destination address of
+# the decapsulated IPv6 or IPv4 packet. Afterwards, the packet is sent to the
+# host hs-t100-2 through the veth-t100 interface.
+#
+# The ping response follows the same processing but this time the roles of rt-1
+# and rt-2 are swapped.
+#
+# Of course, the IPv4/IPv6 L3 VPN for tenant 200 works exactly as the IPv4/IPv6
+# L3 VPN for tenant 100. In this case, only hosts hs-t200-3 and hs-t200-4 are
+# able to connect with each other.
+#
+#
+# +-------------------+ +-------------------+
+# | | | |
+# | hs-t100-1 netns | | hs-t100-2 netns |
+# | | | |
+# | +-------------+ | | +-------------+ |
+# | | veth0 | | | | veth0 | |
+# | | cafe::1/64 | | | | cafe::2/64 | |
+# | | 10.0.0.1/24 | | | | 10.0.0.2/24 | |
+# | +-------------+ | | +-------------+ |
+# | . | | . |
+# +-------------------+ +-------------------+
+# . .
+# . .
+# . .
+# +-----------------------------------+ +-----------------------------------+
+# | . | | . |
+# | +---------------+ | | +---------------- |
+# | | veth-t100 | | | | veth-t100 | |
+# | | cafe::254/64 | | | | cafe::254/64 | |
+# | | 10.0.0.254/24 | +----------+ | | +----------+ | 10.0.0.254/24 | |
+# | +-------+-------+ | localsid | | | | localsid | +-------+-------- |
+# | | | table | | | | table | | |
+# | +----+----+ +----------+ | | +----------+ +----+----+ |
+# | | vrf-100 | | | | vrf-100 | |
+# | +---------+ +------------+ | | +------------+ +---------+ |
+# | | veth0 | | | | veth0 | |
+# | | fd00::1/64 |.|...|.| fd00::2/64 | |
+# | +---------+ +------------+ | | +------------+ +---------+ |
+# | | vrf-200 | | | | vrf-200 | |
+# | +----+----+ | | +----+----+ |
+# | | | | | |
+# | +-------+-------+ | | +-------+-------- |
+# | | veth-t200 | | | | veth-t200 | |
+# | | cafe::254/64 | | | | cafe::254/64 | |
+# | | 10.0.0.254/24 | | | | 10.0.0.254/24 | |
+# | +---------------+ rt-1 netns | | rt-2 netns +---------------- |
+# | . | | . |
+# +-----------------------------------+ +-----------------------------------+
+# . .
+# . .
+# . .
+# . .
+# +-------------------+ +-------------------+
+# | . | | . |
+# | +-------------+ | | +-------------+ |
+# | | veth0 | | | | veth0 | |
+# | | cafe::3/64 | | | | cafe::4/64 | |
+# | | 10.0.0.3/24 | | | | 10.0.0.4/24 | |
+# | +-------------+ | | +-------------+ |
+# | | | |
+# | hs-t200-3 netns | | hs-t200-4 netns |
+# | | | |
+# +-------------------+ +-------------------+
+#
+#
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+# | Network configuration |
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# rt-1: localsid table (table 90)
+# +--------------------------------------------------+
+# |SID |Action |
+# +--------------------------------------------------+
+# |fc00:21:100::6046|apply SRv6 End.DT46 vrftable 100|
+# +--------------------------------------------------+
+# |fc00:21:200::6046|apply SRv6 End.DT46 vrftable 200|
+# +--------------------------------------------------+
+#
+# rt-1: VRF tenant 100 (table 100)
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |cafe::2 |apply seg6 encap segs fc00:12:100::6046|
+# +---------------------------------------------------+
+# |cafe::/64 |forward to dev veth-t100 |
+# +---------------------------------------------------+
+# |10.0.0.2 |apply seg6 encap segs fc00:12:100::6046|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth-t100 |
+# +---------------------------------------------------+
+#
+# rt-1: VRF tenant 200 (table 200)
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |cafe::4 |apply seg6 encap segs fc00:12:200::6046|
+# +---------------------------------------------------+
+# |cafe::/64 |forward to dev veth-t200 |
+# +---------------------------------------------------+
+# |10.0.0.4 |apply seg6 encap segs fc00:12:200::6046|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth-t200 |
+# +---------------------------------------------------+
+#
+#
+# rt-2: localsid table (table 90)
+# +--------------------------------------------------+
+# |SID |Action |
+# +--------------------------------------------------+
+# |fc00:12:100::6046|apply SRv6 End.DT46 vrftable 100|
+# +--------------------------------------------------+
+# |fc00:12:200::6046|apply SRv6 End.DT46 vrftable 200|
+# +--------------------------------------------------+
+#
+# rt-2: VRF tenant 100 (table 100)
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |cafe::1 |apply seg6 encap segs fc00:21:100::6046|
+# +---------------------------------------------------+
+# |cafe::/64 |forward to dev veth-t100 |
+# +---------------------------------------------------+
+# |10.0.0.1 |apply seg6 encap segs fc00:21:100::6046|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth-t100 |
+# +---------------------------------------------------+
+#
+# rt-2: VRF tenant 200 (table 200)
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |cafe::3 |apply seg6 encap segs fc00:21:200::6046|
+# +---------------------------------------------------+
+# |cafe::/64 |forward to dev veth-t200 |
+# +---------------------------------------------------+
+# |10.0.0.3 |apply seg6 encap segs fc00:21:200::6046|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth-t200 |
+# +---------------------------------------------------+
+#
+
+readonly LOCALSID_TABLE_ID=90
+readonly IPv6_RT_NETWORK=fd00
+readonly IPv6_HS_NETWORK=cafe
+readonly IPv4_HS_NETWORK=10.0.0
+readonly VPN_LOCATOR_SERVICE=fc00
+PING_TIMEOUT_SEC=4
+
+ret=0
+
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ nsuccess=$((nsuccess+1))
+ printf "\n TEST: %-60s [ OK ]\n" "${msg}"
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "\n TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+}
+
+print_log_test_results()
+{
+ if [ "$TESTS" != "none" ]; then
+ printf "\nTests passed: %3d\n" ${nsuccess}
+ printf "Tests failed: %3d\n" ${nfail}
+ fi
+}
+
+log_section()
+{
+ echo
+ echo "################################################################################"
+ echo "TEST SECTION: $*"
+ echo "################################################################################"
+}
+
+cleanup()
+{
+ ip link del veth-rt-1 2>/dev/null || true
+ ip link del veth-rt-2 2>/dev/null || true
+
+ # destroy routers rt-* and hosts hs-*
+ for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do
+ ip netns del ${ns} || true
+ done
+}
+
+# Setup the basic networking for the routers
+setup_rt_networking()
+{
+ local rt=$1
+ local nsname=rt-${rt}
+
+ ip netns add ${nsname}
+ ip link set veth-rt-${rt} netns ${nsname}
+ ip -netns ${nsname} link set veth-rt-${rt} name veth0
+
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+ ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0 nodad
+ ip -netns ${nsname} link set veth0 up
+ ip -netns ${nsname} link set lo up
+
+ ip netns exec ${nsname} sysctl -wq net.ipv4.ip_forward=1
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.forwarding=1
+}
+
+setup_hs()
+{
+ local hs=$1
+ local rt=$2
+ local tid=$3
+ local hsname=hs-t${tid}-${hs}
+ local rtname=rt-${rt}
+ local rtveth=veth-t${tid}
+
+ # set the networking for the host
+ ip netns add ${hsname}
+
+ ip netns exec ${hsname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec ${hsname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+ ip -netns ${hsname} link add veth0 type veth peer name ${rtveth}
+ ip -netns ${hsname} link set ${rtveth} netns ${rtname}
+ ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hs}/64 dev veth0 nodad
+ ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hs}/24 dev veth0
+ ip -netns ${hsname} link set veth0 up
+ ip -netns ${hsname} link set lo up
+
+ # configure the VRF for the tenant X on the router which is directly
+ # connected to the source host.
+ ip -netns ${rtname} link add vrf-${tid} type vrf table ${tid}
+ ip -netns ${rtname} link set vrf-${tid} up
+
+ ip netns exec ${rtname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec ${rtname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+ # enslave the veth-tX interface to the vrf-X in the access router
+ ip -netns ${rtname} link set ${rtveth} master vrf-${tid}
+ ip -netns ${rtname} addr add ${IPv6_HS_NETWORK}::254/64 dev ${rtveth} nodad
+ ip -netns ${rtname} addr add ${IPv4_HS_NETWORK}.254/24 dev ${rtveth}
+ ip -netns ${rtname} link set ${rtveth} up
+
+ ip netns exec ${rtname} sysctl -wq net.ipv6.conf.${rtveth}.proxy_ndp=1
+ ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.proxy_arp=1
+
+ # disable the rp_filter otherwise the kernel gets confused about how
+ # to route decap ipv4 packets.
+ ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0
+ ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.rp_filter=0
+
+ ip netns exec ${rtname} sh -c "echo 1 > /proc/sys/net/vrf/strict_mode"
+}
+
+setup_vpn_config()
+{
+ local hssrc=$1
+ local rtsrc=$2
+ local hsdst=$3
+ local rtdst=$4
+ local tid=$5
+
+ local hssrc_name=hs-t${tid}-${hssrc}
+ local hsdst_name=hs-t${tid}-${hsdst}
+ local rtsrc_name=rt-${rtsrc}
+ local rtdst_name=rt-${rtdst}
+ local rtveth=veth-t${tid}
+ local vpn_sid=${VPN_LOCATOR_SERVICE}:${hssrc}${hsdst}:${tid}::6046
+
+ ip -netns ${rtsrc_name} -6 neigh add proxy ${IPv6_HS_NETWORK}::${hsdst} dev ${rtveth}
+
+ # set the encap route for encapsulating packets which arrive from the
+ # host hssrc and destined to the access router rtsrc.
+ ip -netns ${rtsrc_name} -6 route add ${IPv6_HS_NETWORK}::${hsdst}/128 vrf vrf-${tid} \
+ encap seg6 mode encap segs ${vpn_sid} dev veth0
+ ip -netns ${rtsrc_name} -4 route add ${IPv4_HS_NETWORK}.${hsdst}/32 vrf vrf-${tid} \
+ encap seg6 mode encap segs ${vpn_sid} dev veth0
+ ip -netns ${rtsrc_name} -6 route add ${vpn_sid}/128 vrf vrf-${tid} \
+ via fd00::${rtdst} dev veth0
+
+ # set the decap route for decapsulating packets which arrive from
+ # the rtdst router and destined to the hsdst host.
+ ip -netns ${rtdst_name} -6 route add ${vpn_sid}/128 table ${LOCALSID_TABLE_ID} \
+ encap seg6local action End.DT46 vrftable ${tid} dev vrf-${tid}
+
+ # all sids for VPNs start with a common locator which is fc00::/16.
+ # Routes for handling the SRv6 End.DT46 behavior instances are grouped
+ # together in the 'localsid' table.
+ #
+ # NOTE: added only once
+ if [ -z "$(ip -netns ${rtdst_name} -6 rule show | \
+ grep "to ${VPN_LOCATOR_SERVICE}::/16 lookup ${LOCALSID_TABLE_ID}")" ]; then
+ ip -netns ${rtdst_name} -6 rule add \
+ to ${VPN_LOCATOR_SERVICE}::/16 \
+ lookup ${LOCALSID_TABLE_ID} prio 999
+ fi
+
+ # set default routes to unreachable for both ipv4 and ipv6
+ ip -netns ${rtsrc_name} -6 route add unreachable default metric 4278198272 \
+ vrf vrf-${tid}
+
+ ip -netns ${rtsrc_name} -4 route add unreachable default metric 4278198272 \
+ vrf vrf-${tid}
+}
+
+setup()
+{
+ ip link add veth-rt-1 type veth peer name veth-rt-2
+ # setup the networking for router rt-1 and router rt-2
+ setup_rt_networking 1
+ setup_rt_networking 2
+
+ # setup two hosts for the tenant 100.
+ # - host hs-1 is directly connected to the router rt-1;
+ # - host hs-2 is directly connected to the router rt-2.
+ setup_hs 1 1 100 #args: host router tenant
+ setup_hs 2 2 100
+
+ # setup two hosts for the tenant 200
+ # - host hs-3 is directly connected to the router rt-1;
+ # - host hs-4 is directly connected to the router rt-2.
+ setup_hs 3 1 200
+ setup_hs 4 2 200
+
+ # setup the IPv4/IPv6 L3 VPN which connects the host hs-t100-1 and host
+ # hs-t100-2 within the same tenant 100.
+ setup_vpn_config 1 1 2 2 100 #args: src_host src_router dst_host dst_router tenant
+ setup_vpn_config 2 2 1 1 100
+
+ # setup the IPv4/IPv6 L3 VPN which connects the host hs-t200-3 and host
+ # hs-t200-4 within the same tenant 200.
+ setup_vpn_config 3 1 4 2 200
+ setup_vpn_config 4 2 3 1 200
+}
+
+check_rt_connectivity()
+{
+ local rtsrc=$1
+ local rtdst=$2
+
+ ip netns exec rt-${rtsrc} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \
+ >/dev/null 2>&1
+}
+
+check_and_log_rt_connectivity()
+{
+ local rtsrc=$1
+ local rtdst=$2
+
+ check_rt_connectivity ${rtsrc} ${rtdst}
+ log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}"
+}
+
+check_hs_ipv6_connectivity()
+{
+ local hssrc=$1
+ local hsdst=$2
+ local tid=$3
+
+ ip netns exec hs-t${tid}-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \
+ ${IPv6_HS_NETWORK}::${hsdst} >/dev/null 2>&1
+}
+
+check_hs_ipv4_connectivity()
+{
+ local hssrc=$1
+ local hsdst=$2
+ local tid=$3
+
+ ip netns exec hs-t${tid}-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \
+ ${IPv4_HS_NETWORK}.${hsdst} >/dev/null 2>&1
+}
+
+check_and_log_hs_connectivity()
+{
+ local hssrc=$1
+ local hsdst=$2
+ local tid=$3
+
+ check_hs_ipv6_connectivity ${hssrc} ${hsdst} ${tid}
+ log_test $? 0 "IPv6 Hosts connectivity: hs-t${tid}-${hssrc} -> hs-t${tid}-${hsdst} (tenant ${tid})"
+
+ check_hs_ipv4_connectivity ${hssrc} ${hsdst} ${tid}
+ log_test $? 0 "IPv4 Hosts connectivity: hs-t${tid}-${hssrc} -> hs-t${tid}-${hsdst} (tenant ${tid})"
+
+}
+
+check_and_log_hs_isolation()
+{
+ local hssrc=$1
+ local tidsrc=$2
+ local hsdst=$3
+ local tiddst=$4
+
+ check_hs_ipv6_connectivity ${hssrc} ${hsdst} ${tidsrc}
+ # NOTE: ping should fail
+ log_test $? 1 "IPv6 Hosts isolation: hs-t${tidsrc}-${hssrc} -X-> hs-t${tiddst}-${hsdst}"
+
+ check_hs_ipv4_connectivity ${hssrc} ${hsdst} ${tidsrc}
+ # NOTE: ping should fail
+ log_test $? 1 "IPv4 Hosts isolation: hs-t${tidsrc}-${hssrc} -X-> hs-t${tiddst}-${hsdst}"
+
+}
+
+
+check_and_log_hs2gw_connectivity()
+{
+ local hssrc=$1
+ local tid=$2
+
+ check_hs_ipv6_connectivity ${hssrc} 254 ${tid}
+ log_test $? 0 "IPv6 Hosts connectivity: hs-t${tid}-${hssrc} -> gw (tenant ${tid})"
+
+ check_hs_ipv4_connectivity ${hssrc} 254 ${tid}
+ log_test $? 0 "IPv4 Hosts connectivity: hs-t${tid}-${hssrc} -> gw (tenant ${tid})"
+
+}
+
+router_tests()
+{
+ log_section "IPv6 routers connectivity test"
+
+ check_and_log_rt_connectivity 1 2
+ check_and_log_rt_connectivity 2 1
+}
+
+host2gateway_tests()
+{
+ log_section "IPv4/IPv6 connectivity test among hosts and gateway"
+
+ check_and_log_hs2gw_connectivity 1 100
+ check_and_log_hs2gw_connectivity 2 100
+
+ check_and_log_hs2gw_connectivity 3 200
+ check_and_log_hs2gw_connectivity 4 200
+}
+
+host_vpn_tests()
+{
+ log_section "SRv6 VPN connectivity test among hosts in the same tenant"
+
+ check_and_log_hs_connectivity 1 2 100
+ check_and_log_hs_connectivity 2 1 100
+
+ check_and_log_hs_connectivity 3 4 200
+ check_and_log_hs_connectivity 4 3 200
+}
+
+host_vpn_isolation_tests()
+{
+ local i
+ local j
+ local k
+ local tmp
+ local l1="1 2"
+ local l2="3 4"
+ local t1=100
+ local t2=200
+
+ log_section "SRv6 VPN isolation test among hosts in different tentants"
+
+ for k in 0 1; do
+ for i in ${l1}; do
+ for j in ${l2}; do
+ check_and_log_hs_isolation ${i} ${t1} ${j} ${t2}
+ done
+ done
+
+ # let us test the reverse path
+ tmp="${l1}"; l1="${l2}"; l2="${tmp}"
+ tmp=${t1}; t1=${t2}; t2=${tmp}
+ done
+}
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit 0
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit 0
+fi
+
+modprobe vrf &>/dev/null
+if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
+ echo "SKIP: vrf sysctl does not exist"
+ exit 0
+fi
+
+cleanup &>/dev/null
+
+setup
+
+router_tests
+host2gateway_tests
+host_vpn_tests
+host_vpn_isolation_tests
+
+print_log_test_results
+
+cleanup &>/dev/null
+
+exit ${ret}
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index 426d07875a48..112d41d01b12 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -25,6 +25,47 @@
#define TLS_PAYLOAD_MAX_LEN 16384
#define SOL_TLS 282
+struct tls_crypto_info_keys {
+ union {
+ struct tls12_crypto_info_aes_gcm_128 aes128;
+ struct tls12_crypto_info_chacha20_poly1305 chacha20;
+ };
+ size_t len;
+};
+
+static void tls_crypto_info_init(uint16_t tls_version, uint16_t cipher_type,
+ struct tls_crypto_info_keys *tls12)
+{
+ memset(tls12, 0, sizeof(*tls12));
+
+ switch (cipher_type) {
+ case TLS_CIPHER_CHACHA20_POLY1305:
+ tls12->len = sizeof(struct tls12_crypto_info_chacha20_poly1305);
+ tls12->chacha20.info.version = tls_version;
+ tls12->chacha20.info.cipher_type = cipher_type;
+ break;
+ case TLS_CIPHER_AES_GCM_128:
+ tls12->len = sizeof(struct tls12_crypto_info_aes_gcm_128);
+ tls12->aes128.info.version = tls_version;
+ tls12->aes128.info.cipher_type = cipher_type;
+ break;
+ default:
+ break;
+ }
+}
+
+static void memrnd(void *s, size_t n)
+{
+ int *dword = s;
+ char *byte;
+
+ for (; n >= 4; n -= 4)
+ *dword++ = rand();
+ byte = (void *)dword;
+ while (n--)
+ *byte++ = rand();
+}
+
FIXTURE(tls_basic)
{
int fd, cfd;
@@ -133,33 +174,16 @@ FIXTURE_VARIANT_ADD(tls, 13_chacha)
FIXTURE_SETUP(tls)
{
- union {
- struct tls12_crypto_info_aes_gcm_128 aes128;
- struct tls12_crypto_info_chacha20_poly1305 chacha20;
- } tls12;
+ struct tls_crypto_info_keys tls12;
struct sockaddr_in addr;
socklen_t len;
int sfd, ret;
- size_t tls12_sz;
self->notls = false;
len = sizeof(addr);
- memset(&tls12, 0, sizeof(tls12));
- switch (variant->cipher_type) {
- case TLS_CIPHER_CHACHA20_POLY1305:
- tls12_sz = sizeof(struct tls12_crypto_info_chacha20_poly1305);
- tls12.chacha20.info.version = variant->tls_version;
- tls12.chacha20.info.cipher_type = variant->cipher_type;
- break;
- case TLS_CIPHER_AES_GCM_128:
- tls12_sz = sizeof(struct tls12_crypto_info_aes_gcm_128);
- tls12.aes128.info.version = variant->tls_version;
- tls12.aes128.info.cipher_type = variant->cipher_type;
- break;
- default:
- tls12_sz = 0;
- }
+ tls_crypto_info_init(variant->tls_version, variant->cipher_type,
+ &tls12);
addr.sin_family = AF_INET;
addr.sin_addr.s_addr = htonl(INADDR_ANY);
@@ -187,7 +211,7 @@ FIXTURE_SETUP(tls)
if (!self->notls) {
ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12,
- tls12_sz);
+ tls12.len);
ASSERT_EQ(ret, 0);
}
@@ -200,7 +224,7 @@ FIXTURE_SETUP(tls)
ASSERT_EQ(ret, 0);
ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12,
- tls12_sz);
+ tls12.len);
ASSERT_EQ(ret, 0);
}
@@ -308,6 +332,8 @@ TEST_F(tls, recv_max)
char recv_mem[TLS_PAYLOAD_MAX_LEN];
char buf[TLS_PAYLOAD_MAX_LEN];
+ memrnd(buf, sizeof(buf));
+
EXPECT_GE(send(self->fd, buf, send_len, 0), 0);
EXPECT_NE(recv(self->cfd, recv_mem, send_len, 0), -1);
EXPECT_EQ(memcmp(buf, recv_mem, send_len), 0);
@@ -588,6 +614,8 @@ TEST_F(tls, recvmsg_single_max)
struct iovec vec;
struct msghdr hdr;
+ memrnd(send_mem, sizeof(send_mem));
+
EXPECT_EQ(send(self->fd, send_mem, send_len, 0), send_len);
vec.iov_base = (char *)recv_mem;
vec.iov_len = TLS_PAYLOAD_MAX_LEN;
@@ -610,6 +638,8 @@ TEST_F(tls, recvmsg_multiple)
struct msghdr hdr;
int i;
+ memrnd(buf, sizeof(buf));
+
EXPECT_EQ(send(self->fd, buf, send_len, 0), send_len);
for (i = 0; i < msg_iovlen; i++) {
iov_base[i] = (char *)malloc(iov_len);
@@ -634,6 +664,8 @@ TEST_F(tls, single_send_multiple_recv)
char send_mem[TLS_PAYLOAD_MAX_LEN * 2];
char recv_mem[TLS_PAYLOAD_MAX_LEN * 2];
+ memrnd(send_mem, sizeof(send_mem));
+
EXPECT_GE(send(self->fd, send_mem, total_len, 0), 0);
memset(recv_mem, 0, total_len);
@@ -834,18 +866,17 @@ TEST_F(tls, bidir)
int ret;
if (!self->notls) {
- struct tls12_crypto_info_aes_gcm_128 tls12;
+ struct tls_crypto_info_keys tls12;
- memset(&tls12, 0, sizeof(tls12));
- tls12.info.version = variant->tls_version;
- tls12.info.cipher_type = TLS_CIPHER_AES_GCM_128;
+ tls_crypto_info_init(variant->tls_version, variant->cipher_type,
+ &tls12);
ret = setsockopt(self->fd, SOL_TLS, TLS_RX, &tls12,
- sizeof(tls12));
+ tls12.len);
ASSERT_EQ(ret, 0);
ret = setsockopt(self->cfd, SOL_TLS, TLS_TX, &tls12,
- sizeof(tls12));
+ tls12.len);
ASSERT_EQ(ret, 0);
}
diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh
index a8fa64136282..7f26591f236b 100755
--- a/tools/testing/selftests/net/udpgro_fwd.sh
+++ b/tools/testing/selftests/net/udpgro_fwd.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
readonly BASE="ns-$(mktemp -u XXXXXX)"
diff --git a/tools/testing/selftests/net/unicast_extensions.sh b/tools/testing/selftests/net/unicast_extensions.sh
index dbf0421986df..66354cdd5ce4 100755
--- a/tools/testing/selftests/net/unicast_extensions.sh
+++ b/tools/testing/selftests/net/unicast_extensions.sh
@@ -189,6 +189,15 @@ segmenttest 255.255.255.1 255.255.255.254 24 "assign and ping inside 255.255.255
route_test 240.5.6.7 240.5.6.1 255.1.2.1 255.1.2.3 24 "route between 240.5.6/24 and 255.1.2/24 (is allowed)"
route_test 0.200.6.7 0.200.38.1 245.99.101.1 245.99.200.111 16 "route between 0.200/16 and 245.99/16 (is allowed)"
#
+# Test support for lowest address ending in .0
+segmenttest 5.10.15.20 5.10.15.0 24 "assign and ping lowest address (/24)"
+#
+# Test support for lowest address not ending in .0
+segmenttest 192.168.101.192 192.168.101.193 26 "assign and ping lowest address (/26)"
+#
+# Routing using lowest address as a gateway/endpoint
+route_test 192.168.42.1 192.168.42.0 9.8.7.6 9.8.7.0 24 "routing using lowest address"
+#
# ==============================================
# ==== TESTS THAT CURRENTLY EXPECT FAILURE =====
# ==============================================
@@ -202,14 +211,6 @@ segmenttest 255.255.255.1 255.255.255.255 16 "assigning 255.255.255.255 (is forb
# Currently Linux does not allow this, so this should fail too
segmenttest 127.99.4.5 127.99.4.6 16 "assign and ping inside 127/8 (is forbidden)"
#
-# Test support for lowest address
-# Currently Linux does not allow this, so this should fail too
-segmenttest 5.10.15.20 5.10.15.0 24 "assign and ping lowest address (is forbidden)"
-#
-# Routing using lowest address as a gateway/endpoint
-# Currently Linux does not allow this, so this should fail too
-route_test 192.168.42.1 192.168.42.0 9.8.7.6 9.8.7.0 24 "routing using lowest address (is forbidden)"
-#
# Test support for unicast use of class D
# Currently Linux does not allow this, so this should fail too
segmenttest 225.1.2.3 225.1.2.200 24 "assign and ping class D address (is forbidden)"
diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh
index 2fedc0781ce8..11d7cdb898c0 100755
--- a/tools/testing/selftests/net/veth.sh
+++ b/tools/testing/selftests/net/veth.sh
@@ -18,7 +18,8 @@ ret=0
cleanup() {
local ns
- local -r jobs="$(jobs -p)"
+ local jobs
+ readonly jobs="$(jobs -p)"
[ -n "${jobs}" ] && kill -1 ${jobs} 2>/dev/null
rm -f $STATS
@@ -108,7 +109,7 @@ chk_gro() {
if [ ! -f ../bpf/xdp_dummy.o ]; then
echo "Missing xdp_dummy helper. Build bpf selftest first"
- exit -1
+ exit 1
fi
create_ns
diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile
index 3171069a6b46..cd6430b39982 100644
--- a/tools/testing/selftests/netfilter/Makefile
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
# Makefile for netfilter selftests
-TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \
+TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \
conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \
nft_concat_range.sh nft_conntrack_helper.sh \
nft_queue.sh nft_meta.sh nf_nat_edemux.sh \
diff --git a/tools/testing/selftests/netfilter/nft_fib.sh b/tools/testing/selftests/netfilter/nft_fib.sh
new file mode 100755
index 000000000000..6caf6ac8c285
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_fib.sh
@@ -0,0 +1,221 @@
+#!/bin/bash
+#
+# This tests the fib expression.
+#
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+
+sfx=$(mktemp -u "XXXXXXXX")
+ns1="ns1-$sfx"
+ns2="ns2-$sfx"
+nsrouter="nsrouter-$sfx"
+timeout=4
+
+log_netns=$(sysctl -n net.netfilter.nf_log_all_netns)
+
+cleanup()
+{
+ ip netns del ${ns1}
+ ip netns del ${ns2}
+ ip netns del ${nsrouter}
+
+ [ $log_netns -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns
+}
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without nft tool"
+ exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+ip netns add ${nsrouter}
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not create net namespace"
+ exit $ksft_skip
+fi
+
+trap cleanup EXIT
+
+dmesg | grep -q ' nft_rpfilter: '
+if [ $? -eq 0 ]; then
+ dmesg -c | grep ' nft_rpfilter: '
+ echo "WARN: a previous test run has failed" 1>&2
+fi
+
+sysctl -q net.netfilter.nf_log_all_netns=1
+ip netns add ${ns1}
+ip netns add ${ns2}
+
+load_ruleset() {
+ local netns=$1
+
+ip netns exec ${netns} nft -f /dev/stdin <<EOF
+table inet filter {
+ chain prerouting {
+ type filter hook prerouting priority 0; policy accept;
+ fib saddr . iif oif missing counter log prefix "$netns nft_rpfilter: " drop
+ }
+}
+EOF
+}
+
+load_ruleset_count() {
+ local netns=$1
+
+ip netns exec ${netns} nft -f /dev/stdin <<EOF
+table inet filter {
+ chain prerouting {
+ type filter hook prerouting priority 0; policy accept;
+ ip daddr 1.1.1.1 fib saddr . iif oif missing counter drop
+ ip6 daddr 1c3::c01d fib saddr . iif oif missing counter drop
+ }
+}
+EOF
+}
+
+check_drops() {
+ dmesg | grep -q ' nft_rpfilter: '
+ if [ $? -eq 0 ]; then
+ dmesg | grep ' nft_rpfilter: '
+ echo "FAIL: rpfilter did drop packets"
+ return 1
+ fi
+
+ return 0
+}
+
+check_fib_counter() {
+ local want=$1
+ local ns=$2
+ local address=$3
+
+ line=$(ip netns exec ${ns} nft list table inet filter | grep 'fib saddr . iif' | grep $address | grep "packets $want" )
+ ret=$?
+
+ if [ $ret -ne 0 ];then
+ echo "Netns $ns fib counter doesn't match expected packet count of $want for $address" 1>&2
+ ip netns exec ${ns} nft list table inet filter
+ return 1
+ fi
+
+ if [ $want -gt 0 ]; then
+ echo "PASS: fib expression did drop packets for $address"
+ fi
+
+ return 0
+}
+
+load_ruleset ${nsrouter}
+load_ruleset ${ns1}
+load_ruleset ${ns2}
+
+ip link add veth0 netns ${nsrouter} type veth peer name eth0 netns ${ns1} > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: No virtual ethernet pair device support in kernel"
+ exit $ksft_skip
+fi
+ip link add veth1 netns ${nsrouter} type veth peer name eth0 netns ${ns2}
+
+ip -net ${nsrouter} link set lo up
+ip -net ${nsrouter} link set veth0 up
+ip -net ${nsrouter} addr add 10.0.1.1/24 dev veth0
+ip -net ${nsrouter} addr add dead:1::1/64 dev veth0
+
+ip -net ${nsrouter} link set veth1 up
+ip -net ${nsrouter} addr add 10.0.2.1/24 dev veth1
+ip -net ${nsrouter} addr add dead:2::1/64 dev veth1
+
+ip -net ${ns1} link set lo up
+ip -net ${ns1} link set eth0 up
+
+ip -net ${ns2} link set lo up
+ip -net ${ns2} link set eth0 up
+
+ip -net ${ns1} addr add 10.0.1.99/24 dev eth0
+ip -net ${ns1} addr add dead:1::99/64 dev eth0
+ip -net ${ns1} route add default via 10.0.1.1
+ip -net ${ns1} route add default via dead:1::1
+
+ip -net ${ns2} addr add 10.0.2.99/24 dev eth0
+ip -net ${ns2} addr add dead:2::99/64 dev eth0
+ip -net ${ns2} route add default via 10.0.2.1
+ip -net ${ns2} route add default via dead:2::1
+
+test_ping() {
+ local daddr4=$1
+ local daddr6=$2
+
+ ip netns exec ${ns1} ping -c 1 -q $daddr4 > /dev/null
+ ret=$?
+ if [ $ret -ne 0 ];then
+ check_drops
+ echo "FAIL: ${ns1} cannot reach $daddr4, ret $ret" 1>&2
+ return 1
+ fi
+
+ ip netns exec ${ns1} ping -c 3 -q $daddr6 > /dev/null
+ ret=$?
+ if [ $ret -ne 0 ];then
+ check_drops
+ echo "FAIL: ${ns1} cannot reach $daddr6, ret $ret" 1>&2
+ return 1
+ fi
+
+ return 0
+}
+
+ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+
+sleep 3
+
+test_ping 10.0.2.1 dead:2::1 || exit 1
+check_drops || exit 1
+
+test_ping 10.0.2.99 dead:2::99 || exit 1
+check_drops || exit 1
+
+echo "PASS: fib expression did not cause unwanted packet drops"
+
+ip netns exec ${nsrouter} nft flush table inet filter
+
+ip -net ${ns1} route del default
+ip -net ${ns1} -6 route del default
+
+ip -net ${ns1} addr del 10.0.1.99/24 dev eth0
+ip -net ${ns1} addr del dead:1::99/64 dev eth0
+
+ip -net ${ns1} addr add 10.0.2.99/24 dev eth0
+ip -net ${ns1} addr add dead:2::99/64 dev eth0
+
+ip -net ${ns1} route add default via 10.0.2.1
+ip -net ${ns1} -6 route add default via dead:2::1
+
+ip -net ${nsrouter} addr add dead:2::1/64 dev veth0
+
+# switch to ruleset that doesn't log, this time
+# its expected that this does drop the packets.
+load_ruleset_count ${nsrouter}
+
+# ns1 has a default route, but nsrouter does not.
+# must not check return value, ping to 1.1.1.1 will
+# fail.
+check_fib_counter 0 ${nsrouter} 1.1.1.1 || exit 1
+check_fib_counter 0 ${nsrouter} 1c3::c01d || exit 1
+
+ip netns exec ${ns1} ping -c 1 -W 1 -q 1.1.1.1 > /dev/null
+check_fib_counter 1 ${nsrouter} 1.1.1.1 || exit 1
+
+sleep 2
+ip netns exec ${ns1} ping -c 3 -q 1c3::c01d > /dev/null
+check_fib_counter 3 ${nsrouter} 1c3::c01d || exit 1
+
+exit 0
diff --git a/tools/testing/selftests/openat2/openat2_test.c b/tools/testing/selftests/openat2/openat2_test.c
index 381d874cce99..d7ec1e7da0d0 100644
--- a/tools/testing/selftests/openat2/openat2_test.c
+++ b/tools/testing/selftests/openat2/openat2_test.c
@@ -155,7 +155,7 @@ struct flag_test {
int err;
};
-#define NUM_OPENAT2_FLAG_TESTS 24
+#define NUM_OPENAT2_FLAG_TESTS 25
void test_openat2_flags(void)
{
@@ -229,6 +229,11 @@ void test_openat2_flags(void)
{ .name = "invalid how.resolve and O_PATH",
.how.flags = O_PATH,
.how.resolve = 0x1337, .err = -EINVAL },
+
+ /* currently unknown upper 32 bit rejected. */
+ { .name = "currently unknown bit (1 << 63)",
+ .how.flags = O_RDONLY | (1ULL << 63),
+ .how.resolve = 0, .err = -EINVAL },
};
BUILD_BUG_ON(ARRAY_LEN(tests) != NUM_OPENAT2_FLAG_TESTS);
diff --git a/tools/testing/selftests/perf_events/sigtrap_threads.c b/tools/testing/selftests/perf_events/sigtrap_threads.c
index 78ddf5e11625..8e83cf91513a 100644
--- a/tools/testing/selftests/perf_events/sigtrap_threads.c
+++ b/tools/testing/selftests/perf_events/sigtrap_threads.c
@@ -43,7 +43,7 @@ static struct {
siginfo_t first_siginfo; /* First observed siginfo_t. */
} ctx;
-/* Unique value to check si_perf is correctly set from perf_event_attr::sig_data. */
+/* Unique value to check si_perf_data is correctly set from perf_event_attr::sig_data. */
#define TEST_SIG_DATA(addr) (~(unsigned long)(addr))
static struct perf_event_attr make_event_attr(bool enabled, volatile void *addr)
@@ -164,8 +164,8 @@ TEST_F(sigtrap_threads, enable_event)
EXPECT_EQ(ctx.signal_count, NUM_THREADS);
EXPECT_EQ(ctx.tids_want_signal, 0);
EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on);
- EXPECT_EQ(ctx.first_siginfo.si_errno, PERF_TYPE_BREAKPOINT);
- EXPECT_EQ(ctx.first_siginfo.si_perf, TEST_SIG_DATA(&ctx.iterate_on));
+ EXPECT_EQ(ctx.first_siginfo.si_perf_type, PERF_TYPE_BREAKPOINT);
+ EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on));
/* Check enabled for parent. */
ctx.iterate_on = 0;
@@ -183,8 +183,8 @@ TEST_F(sigtrap_threads, modify_and_enable_event)
EXPECT_EQ(ctx.signal_count, NUM_THREADS);
EXPECT_EQ(ctx.tids_want_signal, 0);
EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on);
- EXPECT_EQ(ctx.first_siginfo.si_errno, PERF_TYPE_BREAKPOINT);
- EXPECT_EQ(ctx.first_siginfo.si_perf, TEST_SIG_DATA(&ctx.iterate_on));
+ EXPECT_EQ(ctx.first_siginfo.si_perf_type, PERF_TYPE_BREAKPOINT);
+ EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on));
/* Check enabled for parent. */
ctx.iterate_on = 0;
@@ -203,8 +203,8 @@ TEST_F(sigtrap_threads, signal_stress)
EXPECT_EQ(ctx.signal_count, NUM_THREADS * ctx.iterate_on);
EXPECT_EQ(ctx.tids_want_signal, 0);
EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on);
- EXPECT_EQ(ctx.first_siginfo.si_errno, PERF_TYPE_BREAKPOINT);
- EXPECT_EQ(ctx.first_siginfo.si_perf, TEST_SIG_DATA(&ctx.iterate_on));
+ EXPECT_EQ(ctx.first_siginfo.si_perf_type, PERF_TYPE_BREAKPOINT);
+ EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on));
}
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/powerpc/benchmarks/null_syscall.c b/tools/testing/selftests/powerpc/benchmarks/null_syscall.c
index 579f0215c6e7..9836838a529f 100644
--- a/tools/testing/selftests/powerpc/benchmarks/null_syscall.c
+++ b/tools/testing/selftests/powerpc/benchmarks/null_syscall.c
@@ -14,6 +14,7 @@
#include <time.h>
#include <sys/types.h>
#include <sys/time.h>
+#include <sys/syscall.h>
#include <signal.h>
static volatile int soak_done;
@@ -121,7 +122,7 @@ static void do_null_syscall(unsigned long nr)
unsigned long i;
for (i = 0; i < nr; i++)
- getppid();
+ syscall(__NR_gettid);
}
#define TIME(A, STR) \
diff --git a/tools/testing/selftests/powerpc/nx-gzip/Makefile b/tools/testing/selftests/powerpc/nx-gzip/Makefile
index 640fad6cc2c7..0785c2e99d40 100644
--- a/tools/testing/selftests/powerpc/nx-gzip/Makefile
+++ b/tools/testing/selftests/powerpc/nx-gzip/Makefile
@@ -1,8 +1,8 @@
-CFLAGS = -O3 -m64 -I./include
+CFLAGS = -O3 -m64 -I./include -I../include
TEST_GEN_FILES := gzfht_test gunz_test
TEST_PROGS := nx-gzip-test.sh
include ../../lib.mk
-$(TEST_GEN_FILES): gzip_vas.c
+$(TEST_GEN_FILES): gzip_vas.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c
index b099753b50e4..095195a25687 100644
--- a/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c
+++ b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c
@@ -60,6 +60,7 @@
#include <assert.h>
#include <errno.h>
#include <signal.h>
+#include "utils.h"
#include "nxu.h"
#include "nx.h"
@@ -70,6 +71,8 @@ FILE *nx_gzip_log;
#define FNAME_MAX 1024
#define FEXT ".nx.gz"
+#define SYSFS_MAX_REQ_BUF_PATH "devices/vio/ibm,compression-v1/nx_gzip_caps/req_max_processed_len"
+
/*
* LZ counts returned in the user supplied nx_gzip_crb_cpb_t structure.
*/
@@ -244,6 +247,7 @@ int compress_file(int argc, char **argv, void *handle)
struct nx_gzip_crb_cpb_t *cmdp;
uint32_t pagelen = 65536;
int fault_tries = NX_MAX_FAULTS;
+ char buf[32];
cmdp = (void *)(uintptr_t)
aligned_alloc(sizeof(struct nx_gzip_crb_cpb_t),
@@ -263,8 +267,17 @@ int compress_file(int argc, char **argv, void *handle)
assert(NULL != (outbuf = (char *)malloc(outlen)));
nxu_touch_pages(outbuf, outlen, pagelen, 1);
- /* Compress piecemeal in smallish chunks */
- chunk = 1<<22;
+ /*
+ * On PowerVM, the hypervisor defines the maximum request buffer
+ * size is defined and this value is available via sysfs.
+ */
+ if (!read_sysfs_file(SYSFS_MAX_REQ_BUF_PATH, buf, sizeof(buf))) {
+ chunk = atoi(buf);
+ } else {
+ /* sysfs entry is not available on PowerNV */
+ /* Compress piecemeal in smallish chunks */
+ chunk = 1<<22;
+ }
/* Write the gzip header to the stream */
num_hdr_bytes = gzip_header_blank(outbuf);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/Makefile b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
index c5ecb4634094..010160690227 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
@@ -24,7 +24,7 @@ TEST_GEN_PROGS := reg_access_test event_attributes_test cycles_test \
fork_cleanup_test ebb_on_child_test \
ebb_on_willing_child_test back_to_back_ebbs_test \
lost_exception_test no_handler_test \
- cycles_with_mmcr2_test
+ cycles_with_mmcr2_test regs_access_pmccext_test
top_srcdir = ../../../../../..
include ../../../lib.mk
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb.h b/tools/testing/selftests/powerpc/pmu/ebb/ebb.h
index b5bc2b616075..2c803b5b48d6 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/ebb.h
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb.h
@@ -55,8 +55,6 @@ void ebb_global_disable(void);
bool ebb_is_supported(void);
void ebb_freeze_pmcs(void);
void ebb_unfreeze_pmcs(void);
-void event_ebb_init(struct event *e);
-void event_leader_ebb_init(struct event *e);
int count_pmc(int pmc, uint32_t sample_period);
void dump_ebb_state(void);
void dump_summary_ebb_state(void);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c b/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c
index fc5bf4870d8e..01e827c31169 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c
@@ -50,8 +50,6 @@ static int no_handler_test(void)
event_close(&event);
- dump_ebb_state();
-
/* The real test is that we never took an EBB at 0x0 */
return 0;
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/regs_access_pmccext_test.c b/tools/testing/selftests/powerpc/pmu/ebb/regs_access_pmccext_test.c
new file mode 100644
index 000000000000..1eda8e9932e8
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/regs_access_pmccext_test.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2021, Athira Rajeev, IBM Corp.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <setjmp.h>
+#include <signal.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test that closing the EBB event clears MMCR0_PMCC and
+ * sets MMCR0_PMCCEXT preventing further read access to the
+ * group B PMU registers.
+ */
+
+static int regs_access_pmccext(void)
+{
+ struct event event;
+
+ SKIP_IF(!ebb_is_supported());
+
+ event_init_named(&event, 0x1001e, "cycles");
+ event_leader_ebb_init(&event);
+
+ FAIL_IF(event_open(&event));
+
+ ebb_enable_pmc_counting(1);
+ setup_ebb_handler(standard_ebb_callee);
+ ebb_global_enable();
+ FAIL_IF(ebb_event_enable(&event));
+
+ mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+ while (ebb_state.stats.ebb_count < 1)
+ FAIL_IF(core_busy_loop());
+
+ ebb_global_disable();
+ event_close(&event);
+
+ FAIL_IF(ebb_state.stats.ebb_count == 0);
+
+ /*
+ * For ISA v3.1, verify the test takes a SIGILL when reading
+ * PMU regs after the event is closed. With the control bit
+ * in MMCR0 (PMCCEXT) restricting access to group B PMU regs,
+ * sigill is expected.
+ */
+ if (have_hwcap2(PPC_FEATURE2_ARCH_3_1))
+ FAIL_IF(catch_sigill(dump_ebb_state));
+ else
+ dump_ebb_state();
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(regs_access_pmccext, "regs_access_pmccext");
+}
diff --git a/tools/testing/selftests/powerpc/security/Makefile b/tools/testing/selftests/powerpc/security/Makefile
index 844d18cd5f93..7488315fd847 100644
--- a/tools/testing/selftests/powerpc/security/Makefile
+++ b/tools/testing/selftests/powerpc/security/Makefile
@@ -1,6 +1,8 @@
# SPDX-License-Identifier: GPL-2.0+
TEST_GEN_PROGS := rfi_flush entry_flush uaccess_flush spectre_v2
+TEST_PROGS := mitigation-patching.sh
+
top_srcdir = ../../../../..
CFLAGS += -I../../../../../usr/include
diff --git a/tools/testing/selftests/powerpc/security/mitigation-patching.sh b/tools/testing/selftests/powerpc/security/mitigation-patching.sh
new file mode 100755
index 000000000000..00197acb7ff1
--- /dev/null
+++ b/tools/testing/selftests/powerpc/security/mitigation-patching.sh
@@ -0,0 +1,75 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+TIMEOUT=10
+
+function do_one
+{
+ local mitigation="$1"
+ local orig
+ local start
+ local now
+
+ orig=$(cat "$mitigation")
+
+ start=$EPOCHSECONDS
+ now=$start
+
+ while [[ $((now-start)) -lt "$TIMEOUT" ]]
+ do
+ echo 0 > "$mitigation"
+ echo 1 > "$mitigation"
+
+ now=$EPOCHSECONDS
+ done
+
+ echo "$orig" > "$mitigation"
+}
+
+rc=0
+cd /sys/kernel/debug/powerpc || rc=1
+if [[ "$rc" -ne 0 ]]; then
+ echo "Error: couldn't cd to /sys/kernel/debug/powerpc" >&2
+ exit 1
+fi
+
+tainted=$(cat /proc/sys/kernel/tainted)
+if [[ "$tainted" -ne 0 ]]; then
+ echo "Error: kernel already tainted!" >&2
+ exit 1
+fi
+
+mitigations="barrier_nospec stf_barrier count_cache_flush rfi_flush entry_flush uaccess_flush"
+
+for m in $mitigations
+do
+ do_one "$m" &
+done
+
+echo "Spawned threads enabling/disabling mitigations ..."
+
+if stress-ng > /dev/null 2>&1; then
+ stress="stress-ng"
+elif stress > /dev/null 2>&1; then
+ stress="stress"
+else
+ stress=""
+fi
+
+if [[ -n "$stress" ]]; then
+ "$stress" -m "$(nproc)" -t "$TIMEOUT" &
+ echo "Spawned VM stressors ..."
+fi
+
+echo "Waiting for timeout ..."
+wait
+
+tainted=$(cat /proc/sys/kernel/tainted)
+if [[ "$tainted" -ne 0 ]]; then
+ echo "Error: kernel became tainted!" >&2
+ exit 1
+fi
+
+echo "OK"
+exit 0
diff --git a/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c b/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c
index e2a0c07e8362..9ef37a9836ac 100644
--- a/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c
+++ b/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c
@@ -17,7 +17,6 @@
#include <pthread.h>
#include <sys/mman.h>
#include <unistd.h>
-#include <pthread.h>
#include "tm.h"
#include "utils.h"
diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore
index bed4b5318a86..8f3e72e626fa 100644
--- a/tools/testing/selftests/proc/.gitignore
+++ b/tools/testing/selftests/proc/.gitignore
@@ -10,6 +10,7 @@
/proc-self-map-files-002
/proc-self-syscall
/proc-self-wchan
+/proc-subset-pid
/proc-uptime-001
/proc-uptime-002
/read
diff --git a/tools/testing/selftests/rlimits/.gitignore b/tools/testing/selftests/rlimits/.gitignore
new file mode 100644
index 000000000000..091021f255b3
--- /dev/null
+++ b/tools/testing/selftests/rlimits/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+rlimits-per-userns
diff --git a/tools/testing/selftests/rlimits/Makefile b/tools/testing/selftests/rlimits/Makefile
new file mode 100644
index 000000000000..03aadb406212
--- /dev/null
+++ b/tools/testing/selftests/rlimits/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+CFLAGS += -Wall -O2 -g
+TEST_GEN_PROGS := rlimits-per-userns
+
+include ../lib.mk
diff --git a/tools/testing/selftests/rlimits/config b/tools/testing/selftests/rlimits/config
new file mode 100644
index 000000000000..416bd53ce982
--- /dev/null
+++ b/tools/testing/selftests/rlimits/config
@@ -0,0 +1 @@
+CONFIG_USER_NS=y
diff --git a/tools/testing/selftests/rlimits/rlimits-per-userns.c b/tools/testing/selftests/rlimits/rlimits-per-userns.c
new file mode 100644
index 000000000000..26dc949e93ea
--- /dev/null
+++ b/tools/testing/selftests/rlimits/rlimits-per-userns.c
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Author: Alexey Gladkov <gladkov.alexey@gmail.com>
+ */
+#define _GNU_SOURCE
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/prctl.h>
+#include <sys/stat.h>
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sched.h>
+#include <signal.h>
+#include <limits.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <err.h>
+
+#define NR_CHILDS 2
+
+static char *service_prog;
+static uid_t user = 60000;
+static uid_t group = 60000;
+
+static void setrlimit_nproc(rlim_t n)
+{
+ pid_t pid = getpid();
+ struct rlimit limit = {
+ .rlim_cur = n,
+ .rlim_max = n
+ };
+
+ warnx("(pid=%d): Setting RLIMIT_NPROC=%ld", pid, n);
+
+ if (setrlimit(RLIMIT_NPROC, &limit) < 0)
+ err(EXIT_FAILURE, "(pid=%d): setrlimit(RLIMIT_NPROC)", pid);
+}
+
+static pid_t fork_child(void)
+{
+ pid_t pid = fork();
+
+ if (pid < 0)
+ err(EXIT_FAILURE, "fork");
+
+ if (pid > 0)
+ return pid;
+
+ pid = getpid();
+
+ warnx("(pid=%d): New process starting ...", pid);
+
+ if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0)
+ err(EXIT_FAILURE, "(pid=%d): prctl(PR_SET_PDEATHSIG)", pid);
+
+ signal(SIGUSR1, SIG_DFL);
+
+ warnx("(pid=%d): Changing to uid=%d, gid=%d", pid, user, group);
+
+ if (setgid(group) < 0)
+ err(EXIT_FAILURE, "(pid=%d): setgid(%d)", pid, group);
+ if (setuid(user) < 0)
+ err(EXIT_FAILURE, "(pid=%d): setuid(%d)", pid, user);
+
+ warnx("(pid=%d): Service running ...", pid);
+
+ warnx("(pid=%d): Unshare user namespace", pid);
+ if (unshare(CLONE_NEWUSER) < 0)
+ err(EXIT_FAILURE, "unshare(CLONE_NEWUSER)");
+
+ char *const argv[] = { "service", NULL };
+ char *const envp[] = { "I_AM_SERVICE=1", NULL };
+
+ warnx("(pid=%d): Executing real service ...", pid);
+
+ execve(service_prog, argv, envp);
+ err(EXIT_FAILURE, "(pid=%d): execve", pid);
+}
+
+int main(int argc, char **argv)
+{
+ size_t i;
+ pid_t child[NR_CHILDS];
+ int wstatus[NR_CHILDS];
+ int childs = NR_CHILDS;
+ pid_t pid;
+
+ if (getenv("I_AM_SERVICE")) {
+ pause();
+ exit(EXIT_SUCCESS);
+ }
+
+ service_prog = argv[0];
+ pid = getpid();
+
+ warnx("(pid=%d) Starting testcase", pid);
+
+ /*
+ * This rlimit is not a problem for root because it can be exceeded.
+ */
+ setrlimit_nproc(1);
+
+ for (i = 0; i < NR_CHILDS; i++) {
+ child[i] = fork_child();
+ wstatus[i] = 0;
+ usleep(250000);
+ }
+
+ while (1) {
+ for (i = 0; i < NR_CHILDS; i++) {
+ if (child[i] <= 0)
+ continue;
+
+ errno = 0;
+ pid_t ret = waitpid(child[i], &wstatus[i], WNOHANG);
+
+ if (!ret || (!WIFEXITED(wstatus[i]) && !WIFSIGNALED(wstatus[i])))
+ continue;
+
+ if (ret < 0 && errno != ECHILD)
+ warn("(pid=%d): waitpid(%d)", pid, child[i]);
+
+ child[i] *= -1;
+ childs -= 1;
+ }
+
+ if (!childs)
+ break;
+
+ usleep(250000);
+
+ for (i = 0; i < NR_CHILDS; i++) {
+ if (child[i] <= 0)
+ continue;
+ kill(child[i], SIGUSR1);
+ }
+ }
+
+ for (i = 0; i < NR_CHILDS; i++) {
+ if (WIFEXITED(wstatus[i]))
+ warnx("(pid=%d): pid %d exited, status=%d",
+ pid, -child[i], WEXITSTATUS(wstatus[i]));
+ else if (WIFSIGNALED(wstatus[i]))
+ warnx("(pid=%d): pid %d killed by signal %d",
+ pid, -child[i], WTERMSIG(wstatus[i]));
+
+ if (WIFSIGNALED(wstatus[i]) && WTERMSIG(wstatus[i]) == SIGUSR1)
+ continue;
+
+ warnx("(pid=%d): Test failed", pid);
+ exit(EXIT_FAILURE);
+ }
+
+ warnx("(pid=%d): Test passed", pid);
+ exit(EXIT_SUCCESS);
+}
diff --git a/tools/testing/selftests/sched/.gitignore b/tools/testing/selftests/sched/.gitignore
new file mode 100644
index 000000000000..6996d4654d92
--- /dev/null
+++ b/tools/testing/selftests/sched/.gitignore
@@ -0,0 +1 @@
+cs_prctl_test
diff --git a/tools/testing/selftests/sched/Makefile b/tools/testing/selftests/sched/Makefile
new file mode 100644
index 000000000000..10c72f14fea9
--- /dev/null
+++ b/tools/testing/selftests/sched/Makefile
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0+
+
+ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep clang),)
+CLANG_FLAGS += -no-integrated-as
+endif
+
+CFLAGS += -O2 -Wall -g -I./ -I../../../../usr/include/ -Wl,-rpath=./ \
+ $(CLANG_FLAGS)
+LDLIBS += -lpthread
+
+TEST_GEN_FILES := cs_prctl_test
+TEST_PROGS := cs_prctl_test
+
+include ../lib.mk
diff --git a/tools/testing/selftests/sched/config b/tools/testing/selftests/sched/config
new file mode 100644
index 000000000000..e8b09aa7c0c4
--- /dev/null
+++ b/tools/testing/selftests/sched/config
@@ -0,0 +1 @@
+CONFIG_SCHED_DEBUG=y
diff --git a/tools/testing/selftests/sched/cs_prctl_test.c b/tools/testing/selftests/sched/cs_prctl_test.c
new file mode 100644
index 000000000000..63fe6521c56d
--- /dev/null
+++ b/tools/testing/selftests/sched/cs_prctl_test.c
@@ -0,0 +1,338 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Use the core scheduling prctl() to test core scheduling cookies control.
+ *
+ * Copyright (c) 2021 Oracle and/or its affiliates.
+ * Author: Chris Hyser <chris.hyser@oracle.com>
+ *
+ *
+ * This library is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, see <http://www.gnu.org/licenses>.
+ */
+
+#define _GNU_SOURCE
+#include <sys/eventfd.h>
+#include <sys/wait.h>
+#include <sys/types.h>
+#include <sched.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <time.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if __GLIBC_PREREQ(2, 30) == 0
+#include <sys/syscall.h>
+static pid_t gettid(void)
+{
+ return syscall(SYS_gettid);
+}
+#endif
+
+#ifndef PR_SCHED_CORE
+#define PR_SCHED_CORE 62
+# define PR_SCHED_CORE_GET 0
+# define PR_SCHED_CORE_CREATE 1 /* create unique core_sched cookie */
+# define PR_SCHED_CORE_SHARE_TO 2 /* push core_sched cookie to pid */
+# define PR_SCHED_CORE_SHARE_FROM 3 /* pull core_sched cookie to pid */
+# define PR_SCHED_CORE_MAX 4
+#endif
+
+#define MAX_PROCESSES 128
+#define MAX_THREADS 128
+
+static const char USAGE[] = "cs_prctl_test [options]\n"
+" options:\n"
+" -P : number of processes to create.\n"
+" -T : number of threads per process to create.\n"
+" -d : delay time to keep tasks alive.\n"
+" -k : keep tasks alive until keypress.\n";
+
+enum pid_type {PIDTYPE_PID = 0, PIDTYPE_TGID, PIDTYPE_PGID};
+
+const int THREAD_CLONE_FLAGS = CLONE_THREAD | CLONE_SIGHAND | CLONE_FS | CLONE_VM | CLONE_FILES;
+
+static int _prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4,
+ unsigned long arg5)
+{
+ int res;
+
+ res = prctl(option, arg2, arg3, arg4, arg5);
+ printf("%d = prctl(%d, %ld, %ld, %ld, %lx)\n", res, option, (long)arg2, (long)arg3,
+ (long)arg4, arg5);
+ return res;
+}
+
+#define STACK_SIZE (1024 * 1024)
+
+#define handle_error(msg) __handle_error(__FILE__, __LINE__, msg)
+static void __handle_error(char *fn, int ln, char *msg)
+{
+ printf("(%s:%d) - ", fn, ln);
+ perror(msg);
+ exit(EXIT_FAILURE);
+}
+
+static void handle_usage(int rc, char *msg)
+{
+ puts(USAGE);
+ puts(msg);
+ putchar('\n');
+ exit(rc);
+}
+
+static unsigned long get_cs_cookie(int pid)
+{
+ unsigned long long cookie;
+ int ret;
+
+ ret = prctl(PR_SCHED_CORE, PR_SCHED_CORE_GET, pid, PIDTYPE_PID,
+ (unsigned long)&cookie);
+ if (ret) {
+ printf("Not a core sched system\n");
+ return -1UL;
+ }
+
+ return cookie;
+}
+
+struct child_args {
+ int num_threads;
+ int pfd[2];
+ int cpid;
+ int thr_tids[MAX_THREADS];
+};
+
+static int child_func_thread(void __attribute__((unused))*arg)
+{
+ while (1)
+ usleep(20000);
+ return 0;
+}
+
+static void create_threads(int num_threads, int thr_tids[])
+{
+ void *child_stack;
+ pid_t tid;
+ int i;
+
+ for (i = 0; i < num_threads; ++i) {
+ child_stack = malloc(STACK_SIZE);
+ if (!child_stack)
+ handle_error("child stack allocate");
+
+ tid = clone(child_func_thread, child_stack + STACK_SIZE, THREAD_CLONE_FLAGS, NULL);
+ if (tid == -1)
+ handle_error("clone thread");
+ thr_tids[i] = tid;
+ }
+}
+
+static int child_func_process(void *arg)
+{
+ struct child_args *ca = (struct child_args *)arg;
+
+ close(ca->pfd[0]);
+
+ create_threads(ca->num_threads, ca->thr_tids);
+
+ write(ca->pfd[1], &ca->thr_tids, sizeof(int) * ca->num_threads);
+ close(ca->pfd[1]);
+
+ while (1)
+ usleep(20000);
+ return 0;
+}
+
+static unsigned char child_func_process_stack[STACK_SIZE];
+
+void create_processes(int num_processes, int num_threads, struct child_args proc[])
+{
+ pid_t cpid;
+ int i;
+
+ for (i = 0; i < num_processes; ++i) {
+ proc[i].num_threads = num_threads;
+
+ if (pipe(proc[i].pfd) == -1)
+ handle_error("pipe() failed");
+
+ cpid = clone(child_func_process, child_func_process_stack + STACK_SIZE,
+ SIGCHLD, &proc[i]);
+ proc[i].cpid = cpid;
+ close(proc[i].pfd[1]);
+ }
+
+ for (i = 0; i < num_processes; ++i) {
+ read(proc[i].pfd[0], &proc[i].thr_tids, sizeof(int) * proc[i].num_threads);
+ close(proc[i].pfd[0]);
+ }
+}
+
+void disp_processes(int num_processes, struct child_args proc[])
+{
+ int i, j;
+
+ printf("tid=%d, / tgid=%d / pgid=%d: %lx\n", gettid(), getpid(), getpgid(0),
+ get_cs_cookie(getpid()));
+
+ for (i = 0; i < num_processes; ++i) {
+ printf(" tid=%d, / tgid=%d / pgid=%d: %lx\n", proc[i].cpid, proc[i].cpid,
+ getpgid(proc[i].cpid), get_cs_cookie(proc[i].cpid));
+ for (j = 0; j < proc[i].num_threads; ++j) {
+ printf(" tid=%d, / tgid=%d / pgid=%d: %lx\n", proc[i].thr_tids[j],
+ proc[i].cpid, getpgid(0), get_cs_cookie(proc[i].thr_tids[j]));
+ }
+ }
+ puts("\n");
+}
+
+static int errors;
+
+#define validate(v) _validate(__LINE__, v, #v)
+void _validate(int line, int val, char *msg)
+{
+ if (!val) {
+ ++errors;
+ printf("(%d) FAILED: %s\n", line, msg);
+ } else {
+ printf("(%d) PASSED: %s\n", line, msg);
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ struct child_args procs[MAX_PROCESSES];
+
+ int keypress = 0;
+ int num_processes = 2;
+ int num_threads = 3;
+ int delay = 0;
+ int res = 0;
+ int pidx;
+ int pid;
+ int opt;
+
+ while ((opt = getopt(argc, argv, ":hkT:P:d:")) != -1) {
+ switch (opt) {
+ case 'P':
+ num_processes = (int)strtol(optarg, NULL, 10);
+ break;
+ case 'T':
+ num_threads = (int)strtoul(optarg, NULL, 10);
+ break;
+ case 'd':
+ delay = (int)strtol(optarg, NULL, 10);
+ break;
+ case 'k':
+ keypress = 1;
+ break;
+ case 'h':
+ printf(USAGE);
+ exit(EXIT_SUCCESS);
+ default:
+ handle_usage(20, "unknown option");
+ }
+ }
+
+ if (num_processes < 1 || num_processes > MAX_PROCESSES)
+ handle_usage(1, "Bad processes value");
+
+ if (num_threads < 1 || num_threads > MAX_THREADS)
+ handle_usage(2, "Bad thread value");
+
+ if (keypress)
+ delay = -1;
+
+ srand(time(NULL));
+
+ /* put into separate process group */
+ if (setpgid(0, 0) != 0)
+ handle_error("process group");
+
+ printf("\n## Create a thread/process/process group hiearchy\n");
+ create_processes(num_processes, num_threads, procs);
+ disp_processes(num_processes, procs);
+ validate(get_cs_cookie(0) == 0);
+
+ printf("\n## Set a cookie on entire process group\n");
+ if (_prctl(PR_SCHED_CORE, PR_SCHED_CORE_CREATE, 0, PIDTYPE_PGID, 0) < 0)
+ handle_error("core_sched create failed -- PGID");
+ disp_processes(num_processes, procs);
+
+ validate(get_cs_cookie(0) != 0);
+
+ /* get a random process pid */
+ pidx = rand() % num_processes;
+ pid = procs[pidx].cpid;
+
+ validate(get_cs_cookie(0) == get_cs_cookie(pid));
+ validate(get_cs_cookie(0) == get_cs_cookie(procs[pidx].thr_tids[0]));
+
+ printf("\n## Set a new cookie on entire process/TGID [%d]\n", pid);
+ if (_prctl(PR_SCHED_CORE, PR_SCHED_CORE_CREATE, pid, PIDTYPE_TGID, 0) < 0)
+ handle_error("core_sched create failed -- TGID");
+ disp_processes(num_processes, procs);
+
+ validate(get_cs_cookie(0) != get_cs_cookie(pid));
+ validate(get_cs_cookie(pid) != 0);
+ validate(get_cs_cookie(pid) == get_cs_cookie(procs[pidx].thr_tids[0]));
+
+ printf("\n## Copy the cookie of current/PGID[%d], to pid [%d] as PIDTYPE_PID\n",
+ getpid(), pid);
+ if (_prctl(PR_SCHED_CORE, PR_SCHED_CORE_SHARE_TO, pid, PIDTYPE_PID, 0) < 0)
+ handle_error("core_sched share to itself failed -- PID");
+ disp_processes(num_processes, procs);
+
+ validate(get_cs_cookie(0) == get_cs_cookie(pid));
+ validate(get_cs_cookie(pid) != 0);
+ validate(get_cs_cookie(pid) != get_cs_cookie(procs[pidx].thr_tids[0]));
+
+ printf("\n## Copy cookie from a thread [%d] to current/PGID [%d] as PIDTYPE_PID\n",
+ procs[pidx].thr_tids[0], getpid());
+ if (_prctl(PR_SCHED_CORE, PR_SCHED_CORE_SHARE_FROM, procs[pidx].thr_tids[0],
+ PIDTYPE_PID, 0) < 0)
+ handle_error("core_sched share from thread failed -- PID");
+ disp_processes(num_processes, procs);
+
+ validate(get_cs_cookie(0) == get_cs_cookie(procs[pidx].thr_tids[0]));
+ validate(get_cs_cookie(pid) != get_cs_cookie(procs[pidx].thr_tids[0]));
+
+ printf("\n## Copy cookie from current [%d] to current as pidtype PGID\n", getpid());
+ if (_prctl(PR_SCHED_CORE, PR_SCHED_CORE_SHARE_TO, 0, PIDTYPE_PGID, 0) < 0)
+ handle_error("core_sched share to self failed -- PGID");
+ disp_processes(num_processes, procs);
+
+ validate(get_cs_cookie(0) == get_cs_cookie(pid));
+ validate(get_cs_cookie(pid) != 0);
+ validate(get_cs_cookie(pid) == get_cs_cookie(procs[pidx].thr_tids[0]));
+
+ if (errors) {
+ printf("TESTS FAILED. errors: %d\n", errors);
+ res = 10;
+ } else {
+ printf("SUCCESS !!!\n");
+ }
+
+ if (keypress)
+ getchar();
+ else
+ sleep(delay);
+
+ for (pidx = 0; pidx < num_processes; ++pidx)
+ kill(procs[pidx].cpid, 15);
+
+ return res;
+}
diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c
index fcc806585266..6e5102a7d7c9 100644
--- a/tools/testing/selftests/seccomp/seccomp_benchmark.c
+++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c
@@ -143,9 +143,15 @@ int main(int argc, char *argv[])
unsigned long long native, filter1, filter2, bitmap1, bitmap2;
unsigned long long entry, per_filter1, per_filter2;
+ setbuf(stdout, NULL);
+
+ printf("Running on:\n");
+ system("uname -a");
+
printf("Current BPF sysctl settings:\n");
- system("sysctl net.core.bpf_jit_enable");
- system("sysctl net.core.bpf_jit_harden");
+ /* Avoid using "sysctl" which may not be installed. */
+ system("grep -H . /proc/sys/net/core/bpf_jit_enable");
+ system("grep -H . /proc/sys/net/core/bpf_jit_harden");
if (argc > 1)
samples = strtoull(argv[1], NULL, 0);
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 98c3b647f54d..1d64891e6492 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -235,6 +235,10 @@ struct seccomp_notif_addfd {
};
#endif
+#ifndef SECCOMP_ADDFD_FLAG_SEND
+#define SECCOMP_ADDFD_FLAG_SEND (1UL << 1) /* Addfd and return it, atomically */
+#endif
+
struct seccomp_notif_addfd_small {
__u64 id;
char weird[4];
@@ -1753,16 +1757,25 @@ TEST_F(TRACE_poke, getpid_runs_normally)
# define SYSCALL_RET_SET(_regs, _val) \
do { \
typeof(_val) _result = (_val); \
- /* \
- * A syscall error is signaled by CR0 SO bit \
- * and the code is stored as a positive value. \
- */ \
- if (_result < 0) { \
- SYSCALL_RET(_regs) = -_result; \
- (_regs).ccr |= 0x10000000; \
- } else { \
+ if ((_regs.trap & 0xfff0) == 0x3000) { \
+ /* \
+ * scv 0 system call uses -ve result \
+ * for error, so no need to adjust. \
+ */ \
SYSCALL_RET(_regs) = _result; \
- (_regs).ccr &= ~0x10000000; \
+ } else { \
+ /* \
+ * A syscall error is signaled by the \
+ * CR0 SO bit and the code is stored as \
+ * a positive value. \
+ */ \
+ if (_result < 0) { \
+ SYSCALL_RET(_regs) = -_result; \
+ (_regs).ccr |= 0x10000000; \
+ } else { \
+ SYSCALL_RET(_regs) = _result; \
+ (_regs).ccr &= ~0x10000000; \
+ } \
} \
} while (0)
# define SYSCALL_RET_SET_ON_PTRACE_EXIT
@@ -3950,7 +3963,7 @@ TEST(user_notification_addfd)
{
pid_t pid;
long ret;
- int status, listener, memfd, fd;
+ int status, listener, memfd, fd, nextfd;
struct seccomp_notif_addfd addfd = {};
struct seccomp_notif_addfd_small small = {};
struct seccomp_notif_addfd_big big = {};
@@ -3959,25 +3972,34 @@ TEST(user_notification_addfd)
/* 100 ms */
struct timespec delay = { .tv_nsec = 100000000 };
+ /* There may be arbitrary already-open fds at test start. */
memfd = memfd_create("test", 0);
ASSERT_GE(memfd, 0);
+ nextfd = memfd + 1;
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
ASSERT_EQ(0, ret) {
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
}
+ /* fd: 4 */
/* Check that the basic notification machinery works */
listener = user_notif_syscall(__NR_getppid,
SECCOMP_FILTER_FLAG_NEW_LISTENER);
- ASSERT_GE(listener, 0);
+ ASSERT_EQ(listener, nextfd++);
pid = fork();
ASSERT_GE(pid, 0);
if (pid == 0) {
+ /* fds will be added and this value is expected */
if (syscall(__NR_getppid) != USER_NOTIF_MAGIC)
exit(1);
+
+ /* Atomic addfd+send is received here. Check it is a valid fd */
+ if (fcntl(syscall(__NR_getppid), F_GETFD) == -1)
+ exit(1);
+
exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
}
@@ -4019,14 +4041,14 @@ TEST(user_notification_addfd)
/* Verify we can set an arbitrary remote fd */
fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
- EXPECT_GE(fd, 0);
+ EXPECT_EQ(fd, nextfd++);
EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0);
/* Verify we can set an arbitrary remote fd with large size */
memset(&big, 0x0, sizeof(big));
big.addfd = addfd;
fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big);
- EXPECT_GE(fd, 0);
+ EXPECT_EQ(fd, nextfd++);
/* Verify we can set a specific remote fd */
addfd.newfd = 42;
@@ -4056,6 +4078,32 @@ TEST(user_notification_addfd)
ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
ASSERT_EQ(addfd.id, req.id);
+ /* Verify we can do an atomic addfd and send */
+ addfd.newfd = 0;
+ addfd.flags = SECCOMP_ADDFD_FLAG_SEND;
+ fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
+ /*
+ * Child has earlier "low" fds and now 42, so we expect the next
+ * lowest available fd to be assigned here.
+ */
+ EXPECT_EQ(fd, nextfd++);
+ EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0);
+
+ /*
+ * This sets the ID of the ADD FD to the last request plus 1. The
+ * notification ID increments 1 per notification.
+ */
+ addfd.id = req.id + 1;
+
+ /* This spins until the underlying notification is generated */
+ while (ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd) != -1 &&
+ errno != -EINPROGRESS)
+ nanosleep(&delay, NULL);
+
+ memset(&req, 0, sizeof(req));
+ ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+ ASSERT_EQ(addfd.id, req.id);
+
resp.id = req.id;
resp.error = 0;
resp.val = USER_NOTIF_MAGIC;
@@ -4116,6 +4164,10 @@ TEST(user_notification_addfd_rlimit)
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
EXPECT_EQ(errno, EMFILE);
+ addfd.flags = SECCOMP_ADDFD_FLAG_SEND;
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
+ EXPECT_EQ(errno, EMFILE);
+
addfd.newfd = 100;
addfd.flags = SECCOMP_ADDFD_FLAG_SETFD;
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/scapyPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/scapyPlugin.py
index 229ee185b27e..254136e3da5a 100644
--- a/tools/testing/selftests/tc-testing/plugin-lib/scapyPlugin.py
+++ b/tools/testing/selftests/tc-testing/plugin-lib/scapyPlugin.py
@@ -29,22 +29,26 @@ class SubPlugin(TdcPlugin):
return
# Check for required fields
- scapyinfo = self.args.caseinfo['scapy']
- scapy_keys = ['iface', 'count', 'packet']
- missing_keys = []
- keyfail = False
- for k in scapy_keys:
- if k not in scapyinfo:
- keyfail = True
- missing_keys.add(k)
- if keyfail:
- print('{}: Scapy block present in the test, but is missing info:'
- .format(self.sub_class))
- print('{}'.format(missing_keys))
-
- pkt = eval(scapyinfo['packet'])
- if '$' in scapyinfo['iface']:
- tpl = Template(scapyinfo['iface'])
- scapyinfo['iface'] = tpl.safe_substitute(NAMES)
- for count in range(scapyinfo['count']):
- sendp(pkt, iface=scapyinfo['iface'])
+ lscapyinfo = self.args.caseinfo['scapy']
+ if type(lscapyinfo) != list:
+ lscapyinfo = [ lscapyinfo, ]
+
+ for scapyinfo in lscapyinfo:
+ scapy_keys = ['iface', 'count', 'packet']
+ missing_keys = []
+ keyfail = False
+ for k in scapy_keys:
+ if k not in scapyinfo:
+ keyfail = True
+ missing_keys.append(k)
+ if keyfail:
+ print('{}: Scapy block present in the test, but is missing info:'
+ .format(self.sub_class))
+ print('{}'.format(missing_keys))
+
+ pkt = eval(scapyinfo['packet'])
+ if '$' in scapyinfo['iface']:
+ tpl = Template(scapyinfo['iface'])
+ scapyinfo['iface'] = tpl.safe_substitute(NAMES)
+ for count in range(scapyinfo['count']):
+ sendp(pkt, iface=scapyinfo['iface'])
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json b/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json
index 4202e95e27b9..bd843ab00a58 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json
@@ -406,5 +406,50 @@
"teardown": [
"$TC actions flush action ct"
]
+ },
+ {
+ "id": "3992",
+ "name": "Add ct action triggering DNAT tuple conflict",
+ "category": [
+ "actions",
+ "ct",
+ "scapy"
+ ],
+ "plugins": {
+ "requires": [
+ "nsPlugin",
+ "scapyPlugin"
+ ]
+ },
+ "setup": [
+ [
+ "$TC qdisc del dev $DEV1 ingress",
+ 0,
+ 1,
+ 2,
+ 255
+ ],
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 ingress protocol ip prio 1 flower ct_state -trk action ct commit nat dst addr 20.0.0.1 port 10 pipe action drop",
+ "scapy": [
+ {
+ "iface": "$DEV0",
+ "count": 1,
+ "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.10')/TCP(sport=5000,dport=10)"
+ },
+ {
+ "iface": "$DEV0",
+ "count": 1,
+ "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)"
+ }
+ ],
+ "expExitCode": "0",
+ "verifyCmd": "cat /proc/net/nf_conntrack",
+ "matchPattern": "dst=10.0.0.20",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json b/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json
index 41d783254b08..2aad4caa8581 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json
@@ -446,6 +446,30 @@
"teardown": []
},
{
+ "id": "ba5b",
+ "name": "Add vlan modify action for protocol 802.1Q setting priority 0",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action vlan modify protocol 802.1Q id 5 priority 0 index 100",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action vlan index 100",
+ "matchPattern": "action order [0-9]+: vlan.*modify id 100 priority 0 protocol 802.1Q pipe.*index 100 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action vlan"
+ ]
+ },
+ {
"id": "6812",
"name": "Add vlan modify action for protocol 802.1Q",
"category": [
@@ -463,7 +487,7 @@
"cmdUnderTest": "$TC actions add action vlan modify protocol 802.1Q id 5 index 100",
"expExitCode": "0",
"verifyCmd": "$TC actions get action vlan index 100",
- "matchPattern": "action order [0-9]+: vlan.*modify id 100 protocol 802.1Q priority 0 pipe.*index 100 ref",
+ "matchPattern": "action order [0-9]+: vlan.*modify id 100 protocol 802.1Q pipe.*index 100 ref",
"matchCount": "0",
"teardown": [
"$TC actions flush action vlan"
@@ -487,7 +511,7 @@
"cmdUnderTest": "$TC actions add action vlan modify protocol 802.1ad id 500 reclassify index 12",
"expExitCode": "0",
"verifyCmd": "$TC actions get action vlan index 12",
- "matchPattern": "action order [0-9]+: vlan.*modify id 500 protocol 802.1ad priority 0 reclassify.*index 12 ref",
+ "matchPattern": "action order [0-9]+: vlan.*modify id 500 protocol 802.1ad reclassify.*index 12 ref",
"matchCount": "1",
"teardown": [
"$TC actions flush action vlan"
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json
index 1cda2e11b3ad..773c5027553d 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json
@@ -9,11 +9,11 @@
"setup": [
"$IP link add dev $DUMMY type dummy || /bin/true"
],
- "cmdUnderTest": "$TC qdisc add dev $DUMMY root fq_pie flows 65536",
- "expExitCode": "2",
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq_pie flows 65536",
+ "expExitCode": "0",
"verifyCmd": "$TC qdisc show dev $DUMMY",
- "matchPattern": "qdisc",
- "matchCount": "0",
+ "matchPattern": "qdisc fq_pie 1: root refcnt 2 limit 10240p flows 65536",
+ "matchCount": "1",
"teardown": [
"$IP link del dev $DUMMY"
]
diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore
index 1f651e85ed60..d683a49d07d5 100644
--- a/tools/testing/selftests/vm/.gitignore
+++ b/tools/testing/selftests/vm/.gitignore
@@ -12,6 +12,9 @@ mremap_test
on-fault-limit
transhuge-stress
protection_keys
+protection_keys_32
+protection_keys_64
+madv_populate
userfaultfd
mlock-intersect-test
mlock-random-test
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 73e1cc96d7c2..812bc03e3142 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -31,6 +31,7 @@ TEST_GEN_FILES += hmm-tests
TEST_GEN_FILES += hugepage-mmap
TEST_GEN_FILES += hugepage-shm
TEST_GEN_FILES += khugepaged
+TEST_GEN_FILES += madv_populate
TEST_GEN_FILES += map_fixed_noreplace
TEST_GEN_FILES += map_hugetlb
TEST_GEN_FILES += map_populate
@@ -100,7 +101,7 @@ $(1) $(1)_64: $(OUTPUT)/$(1)_64
endef
ifeq ($(CAN_BUILD_I386),1)
-$(BINARIES_32): CFLAGS += -m32
+$(BINARIES_32): CFLAGS += -m32 -mxsave
$(BINARIES_32): LDLIBS += -lrt -ldl -lm
$(BINARIES_32): $(OUTPUT)/%_32: %.c
$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@
@@ -108,7 +109,7 @@ $(foreach t,$(TARGETS),$(eval $(call gen-target-rule-32,$(t))))
endif
ifeq ($(CAN_BUILD_X86_64),1)
-$(BINARIES_64): CFLAGS += -m64
+$(BINARIES_64): CFLAGS += -m64 -mxsave
$(BINARIES_64): LDLIBS += -lrt -ldl
$(BINARIES_64): $(OUTPUT)/%_64: %.c
$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@
diff --git a/tools/testing/selftests/vm/gup_test.c b/tools/testing/selftests/vm/gup_test.c
index 1e662d59c502..fe043f67798b 100644
--- a/tools/testing/selftests/vm/gup_test.c
+++ b/tools/testing/selftests/vm/gup_test.c
@@ -6,6 +6,8 @@
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
+#include <pthread.h>
+#include <assert.h>
#include "../../../../mm/gup_test.h"
#define MB (1UL << 20)
@@ -15,6 +17,12 @@
#define FOLL_WRITE 0x01 /* check pte is writable */
#define FOLL_TOUCH 0x02 /* mark page accessed */
+static unsigned long cmd = GUP_FAST_BENCHMARK;
+static int gup_fd, repeats = 1;
+static unsigned long size = 128 * MB;
+/* Serialize prints */
+static pthread_mutex_t print_mutex = PTHREAD_MUTEX_INITIALIZER;
+
static char *cmd_to_str(unsigned long cmd)
{
switch (cmd) {
@@ -34,17 +42,55 @@ static char *cmd_to_str(unsigned long cmd)
return "Unknown command";
}
+void *gup_thread(void *data)
+{
+ struct gup_test gup = *(struct gup_test *)data;
+ int i;
+
+ /* Only report timing information on the *_BENCHMARK commands: */
+ if ((cmd == PIN_FAST_BENCHMARK) || (cmd == GUP_FAST_BENCHMARK) ||
+ (cmd == PIN_LONGTERM_BENCHMARK)) {
+ for (i = 0; i < repeats; i++) {
+ gup.size = size;
+ if (ioctl(gup_fd, cmd, &gup))
+ perror("ioctl"), exit(1);
+
+ pthread_mutex_lock(&print_mutex);
+ printf("%s: Time: get:%lld put:%lld us",
+ cmd_to_str(cmd), gup.get_delta_usec,
+ gup.put_delta_usec);
+ if (gup.size != size)
+ printf(", truncated (size: %lld)", gup.size);
+ printf("\n");
+ pthread_mutex_unlock(&print_mutex);
+ }
+ } else {
+ gup.size = size;
+ if (ioctl(gup_fd, cmd, &gup)) {
+ perror("ioctl");
+ exit(1);
+ }
+
+ pthread_mutex_lock(&print_mutex);
+ printf("%s: done\n", cmd_to_str(cmd));
+ if (gup.size != size)
+ printf("Truncated (size: %lld)\n", gup.size);
+ pthread_mutex_unlock(&print_mutex);
+ }
+
+ return NULL;
+}
+
int main(int argc, char **argv)
{
struct gup_test gup = { 0 };
- unsigned long size = 128 * MB;
- int i, fd, filed, opt, nr_pages = 1, thp = -1, repeats = 1, write = 1;
- unsigned long cmd = GUP_FAST_BENCHMARK;
+ int filed, i, opt, nr_pages = 1, thp = -1, write = 1, nthreads = 1, ret;
int flags = MAP_PRIVATE, touch = 0;
char *file = "/dev/zero";
+ pthread_t *tid;
char *p;
- while ((opt = getopt(argc, argv, "m:r:n:F:f:abctTLUuwWSHpz")) != -1) {
+ while ((opt = getopt(argc, argv, "m:r:n:F:f:abcj:tTLUuwWSHpz")) != -1) {
switch (opt) {
case 'a':
cmd = PIN_FAST_BENCHMARK;
@@ -74,6 +120,9 @@ int main(int argc, char **argv)
/* strtol, so you can pass flags in hex form */
gup.gup_flags = strtol(optarg, 0, 0);
break;
+ case 'j':
+ nthreads = atoi(optarg);
+ break;
case 'm':
size = atoi(optarg) * MB;
break;
@@ -154,8 +203,8 @@ int main(int argc, char **argv)
if (write)
gup.gup_flags |= FOLL_WRITE;
- fd = open("/sys/kernel/debug/gup_test", O_RDWR);
- if (fd == -1) {
+ gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR);
+ if (gup_fd == -1) {
perror("open");
exit(1);
}
@@ -185,32 +234,17 @@ int main(int argc, char **argv)
p[0] = 0;
}
- /* Only report timing information on the *_BENCHMARK commands: */
- if ((cmd == PIN_FAST_BENCHMARK) || (cmd == GUP_FAST_BENCHMARK) ||
- (cmd == PIN_LONGTERM_BENCHMARK)) {
- for (i = 0; i < repeats; i++) {
- gup.size = size;
- if (ioctl(fd, cmd, &gup))
- perror("ioctl"), exit(1);
-
- printf("%s: Time: get:%lld put:%lld us",
- cmd_to_str(cmd), gup.get_delta_usec,
- gup.put_delta_usec);
- if (gup.size != size)
- printf(", truncated (size: %lld)", gup.size);
- printf("\n");
- }
- } else {
- gup.size = size;
- if (ioctl(fd, cmd, &gup)) {
- perror("ioctl");
- exit(1);
- }
-
- printf("%s: done\n", cmd_to_str(cmd));
- if (gup.size != size)
- printf("Truncated (size: %lld)\n", gup.size);
+ tid = malloc(sizeof(pthread_t) * nthreads);
+ assert(tid);
+ for (i = 0; i < nthreads; i++) {
+ ret = pthread_create(&tid[i], NULL, gup_thread, &gup);
+ assert(ret == 0);
+ }
+ for (i = 0; i < nthreads; i++) {
+ ret = pthread_join(tid[i], NULL);
+ assert(ret == 0);
}
+ free(tid);
return 0;
}
diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c
index 5d1ac691b9f4..864f126ffd78 100644
--- a/tools/testing/selftests/vm/hmm-tests.c
+++ b/tools/testing/selftests/vm/hmm-tests.c
@@ -1485,4 +1485,162 @@ TEST_F(hmm2, double_map)
hmm_buffer_free(buffer);
}
+/*
+ * Basic check of exclusive faulting.
+ */
+TEST_F(hmm, exclusive)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Map memory exclusively for device access. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_EXCLUSIVE, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ /* Fault pages back to system memory and check them. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i]++, i);
+
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i+1);
+
+ /* Check atomic access revoked */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_CHECK_EXCLUSIVE, buffer, npages);
+ ASSERT_EQ(ret, 0);
+
+ hmm_buffer_free(buffer);
+}
+
+TEST_F(hmm, exclusive_mprotect)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Map memory exclusively for device access. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_EXCLUSIVE, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ ret = mprotect(buffer->ptr, size, PROT_READ);
+ ASSERT_EQ(ret, 0);
+
+ /* Simulate a device writing system memory. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages);
+ ASSERT_EQ(ret, -EPERM);
+
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Check copy-on-write works.
+ */
+TEST_F(hmm, exclusive_cow)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Map memory exclusively for device access. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_EXCLUSIVE, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ fork();
+
+ /* Fault pages back to system memory and check them. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i]++, i);
+
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i+1);
+
+ hmm_buffer_free(buffer);
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/vm/khugepaged.c b/tools/testing/selftests/vm/khugepaged.c
index 8b75821302a7..155120b67a16 100644
--- a/tools/testing/selftests/vm/khugepaged.c
+++ b/tools/testing/selftests/vm/khugepaged.c
@@ -86,7 +86,6 @@ struct settings {
enum thp_enabled thp_enabled;
enum thp_defrag thp_defrag;
enum shmem_enabled shmem_enabled;
- bool debug_cow;
bool use_zero_page;
struct khugepaged_settings khugepaged;
};
@@ -95,7 +94,6 @@ static struct settings default_settings = {
.thp_enabled = THP_MADVISE,
.thp_defrag = THP_DEFRAG_ALWAYS,
.shmem_enabled = SHMEM_NEVER,
- .debug_cow = 0,
.use_zero_page = 0,
.khugepaged = {
.defrag = 1,
@@ -268,7 +266,6 @@ static void write_settings(struct settings *settings)
write_string("defrag", thp_defrag_strings[settings->thp_defrag]);
write_string("shmem_enabled",
shmem_enabled_strings[settings->shmem_enabled]);
- write_num("debug_cow", settings->debug_cow);
write_num("use_zero_page", settings->use_zero_page);
write_num("khugepaged/defrag", khugepaged->defrag);
@@ -304,7 +301,6 @@ static void save_settings(void)
.thp_defrag = read_string("defrag", thp_defrag_strings),
.shmem_enabled =
read_string("shmem_enabled", shmem_enabled_strings),
- .debug_cow = read_num("debug_cow"),
.use_zero_page = read_num("use_zero_page"),
};
saved_settings.khugepaged = (struct khugepaged_settings) {
diff --git a/tools/testing/selftests/vm/madv_populate.c b/tools/testing/selftests/vm/madv_populate.c
new file mode 100644
index 000000000000..b959e4ebdad4
--- /dev/null
+++ b/tools/testing/selftests/vm/madv_populate.c
@@ -0,0 +1,342 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * MADV_POPULATE_READ and MADV_POPULATE_WRITE tests
+ *
+ * Copyright 2021, Red Hat, Inc.
+ *
+ * Author(s): David Hildenbrand <david@redhat.com>
+ */
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+
+#include "../kselftest.h"
+
+#if defined(MADV_POPULATE_READ) && defined(MADV_POPULATE_WRITE)
+
+/*
+ * For now, we're using 2 MiB of private anonymous memory for all tests.
+ */
+#define SIZE (2 * 1024 * 1024)
+
+static size_t pagesize;
+
+static uint64_t pagemap_get_entry(int fd, char *start)
+{
+ const unsigned long pfn = (unsigned long)start / pagesize;
+ uint64_t entry;
+ int ret;
+
+ ret = pread(fd, &entry, sizeof(entry), pfn * sizeof(entry));
+ if (ret != sizeof(entry))
+ ksft_exit_fail_msg("reading pagemap failed\n");
+ return entry;
+}
+
+static bool pagemap_is_populated(int fd, char *start)
+{
+ uint64_t entry = pagemap_get_entry(fd, start);
+
+ /* Present or swapped. */
+ return entry & 0xc000000000000000ull;
+}
+
+static bool pagemap_is_softdirty(int fd, char *start)
+{
+ uint64_t entry = pagemap_get_entry(fd, start);
+
+ return entry & 0x0080000000000000ull;
+}
+
+static void sense_support(void)
+{
+ char *addr;
+ int ret;
+
+ addr = mmap(0, pagesize, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
+ if (!addr)
+ ksft_exit_fail_msg("mmap failed\n");
+
+ ret = madvise(addr, pagesize, MADV_POPULATE_READ);
+ if (ret)
+ ksft_exit_skip("MADV_POPULATE_READ is not available\n");
+
+ ret = madvise(addr, pagesize, MADV_POPULATE_WRITE);
+ if (ret)
+ ksft_exit_skip("MADV_POPULATE_WRITE is not available\n");
+
+ munmap(addr, pagesize);
+}
+
+static void test_prot_read(void)
+{
+ char *addr;
+ int ret;
+
+ ksft_print_msg("[RUN] %s\n", __func__);
+
+ addr = mmap(0, SIZE, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
+ if (addr == MAP_FAILED)
+ ksft_exit_fail_msg("mmap failed\n");
+
+ ret = madvise(addr, SIZE, MADV_POPULATE_READ);
+ ksft_test_result(!ret, "MADV_POPULATE_READ with PROT_READ\n");
+
+ ret = madvise(addr, SIZE, MADV_POPULATE_WRITE);
+ ksft_test_result(ret == -1 && errno == EINVAL,
+ "MADV_POPULATE_WRITE with PROT_READ\n");
+
+ munmap(addr, SIZE);
+}
+
+static void test_prot_write(void)
+{
+ char *addr;
+ int ret;
+
+ ksft_print_msg("[RUN] %s\n", __func__);
+
+ addr = mmap(0, SIZE, PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
+ if (addr == MAP_FAILED)
+ ksft_exit_fail_msg("mmap failed\n");
+
+ ret = madvise(addr, SIZE, MADV_POPULATE_READ);
+ ksft_test_result(ret == -1 && errno == EINVAL,
+ "MADV_POPULATE_READ with PROT_WRITE\n");
+
+ ret = madvise(addr, SIZE, MADV_POPULATE_WRITE);
+ ksft_test_result(!ret, "MADV_POPULATE_WRITE with PROT_WRITE\n");
+
+ munmap(addr, SIZE);
+}
+
+static void test_holes(void)
+{
+ char *addr;
+ int ret;
+
+ ksft_print_msg("[RUN] %s\n", __func__);
+
+ addr = mmap(0, SIZE, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
+ if (addr == MAP_FAILED)
+ ksft_exit_fail_msg("mmap failed\n");
+ ret = munmap(addr + pagesize, pagesize);
+ if (ret)
+ ksft_exit_fail_msg("munmap failed\n");
+
+ /* Hole in the middle */
+ ret = madvise(addr, SIZE, MADV_POPULATE_READ);
+ ksft_test_result(ret == -1 && errno == ENOMEM,
+ "MADV_POPULATE_READ with holes in the middle\n");
+ ret = madvise(addr, SIZE, MADV_POPULATE_WRITE);
+ ksft_test_result(ret == -1 && errno == ENOMEM,
+ "MADV_POPULATE_WRITE with holes in the middle\n");
+
+ /* Hole at end */
+ ret = madvise(addr, 2 * pagesize, MADV_POPULATE_READ);
+ ksft_test_result(ret == -1 && errno == ENOMEM,
+ "MADV_POPULATE_READ with holes at the end\n");
+ ret = madvise(addr, 2 * pagesize, MADV_POPULATE_WRITE);
+ ksft_test_result(ret == -1 && errno == ENOMEM,
+ "MADV_POPULATE_WRITE with holes at the end\n");
+
+ /* Hole at beginning */
+ ret = madvise(addr + pagesize, pagesize, MADV_POPULATE_READ);
+ ksft_test_result(ret == -1 && errno == ENOMEM,
+ "MADV_POPULATE_READ with holes at the beginning\n");
+ ret = madvise(addr + pagesize, pagesize, MADV_POPULATE_WRITE);
+ ksft_test_result(ret == -1 && errno == ENOMEM,
+ "MADV_POPULATE_WRITE with holes at the beginning\n");
+
+ munmap(addr, SIZE);
+}
+
+static bool range_is_populated(char *start, ssize_t size)
+{
+ int fd = open("/proc/self/pagemap", O_RDONLY);
+ bool ret = true;
+
+ if (fd < 0)
+ ksft_exit_fail_msg("opening pagemap failed\n");
+ for (; size > 0 && ret; size -= pagesize, start += pagesize)
+ if (!pagemap_is_populated(fd, start))
+ ret = false;
+ close(fd);
+ return ret;
+}
+
+static bool range_is_not_populated(char *start, ssize_t size)
+{
+ int fd = open("/proc/self/pagemap", O_RDONLY);
+ bool ret = true;
+
+ if (fd < 0)
+ ksft_exit_fail_msg("opening pagemap failed\n");
+ for (; size > 0 && ret; size -= pagesize, start += pagesize)
+ if (pagemap_is_populated(fd, start))
+ ret = false;
+ close(fd);
+ return ret;
+}
+
+static void test_populate_read(void)
+{
+ char *addr;
+ int ret;
+
+ ksft_print_msg("[RUN] %s\n", __func__);
+
+ addr = mmap(0, SIZE, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
+ if (addr == MAP_FAILED)
+ ksft_exit_fail_msg("mmap failed\n");
+ ksft_test_result(range_is_not_populated(addr, SIZE),
+ "range initially not populated\n");
+
+ ret = madvise(addr, SIZE, MADV_POPULATE_READ);
+ ksft_test_result(!ret, "MADV_POPULATE_READ\n");
+ ksft_test_result(range_is_populated(addr, SIZE),
+ "range is populated\n");
+
+ munmap(addr, SIZE);
+}
+
+static void test_populate_write(void)
+{
+ char *addr;
+ int ret;
+
+ ksft_print_msg("[RUN] %s\n", __func__);
+
+ addr = mmap(0, SIZE, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
+ if (addr == MAP_FAILED)
+ ksft_exit_fail_msg("mmap failed\n");
+ ksft_test_result(range_is_not_populated(addr, SIZE),
+ "range initially not populated\n");
+
+ ret = madvise(addr, SIZE, MADV_POPULATE_WRITE);
+ ksft_test_result(!ret, "MADV_POPULATE_WRITE\n");
+ ksft_test_result(range_is_populated(addr, SIZE),
+ "range is populated\n");
+
+ munmap(addr, SIZE);
+}
+
+static bool range_is_softdirty(char *start, ssize_t size)
+{
+ int fd = open("/proc/self/pagemap", O_RDONLY);
+ bool ret = true;
+
+ if (fd < 0)
+ ksft_exit_fail_msg("opening pagemap failed\n");
+ for (; size > 0 && ret; size -= pagesize, start += pagesize)
+ if (!pagemap_is_softdirty(fd, start))
+ ret = false;
+ close(fd);
+ return ret;
+}
+
+static bool range_is_not_softdirty(char *start, ssize_t size)
+{
+ int fd = open("/proc/self/pagemap", O_RDONLY);
+ bool ret = true;
+
+ if (fd < 0)
+ ksft_exit_fail_msg("opening pagemap failed\n");
+ for (; size > 0 && ret; size -= pagesize, start += pagesize)
+ if (pagemap_is_softdirty(fd, start))
+ ret = false;
+ close(fd);
+ return ret;
+}
+
+static void clear_softdirty(void)
+{
+ int fd = open("/proc/self/clear_refs", O_WRONLY);
+ const char *ctrl = "4";
+ int ret;
+
+ if (fd < 0)
+ ksft_exit_fail_msg("opening clear_refs failed\n");
+ ret = write(fd, ctrl, strlen(ctrl));
+ if (ret != strlen(ctrl))
+ ksft_exit_fail_msg("writing clear_refs failed\n");
+ close(fd);
+}
+
+static void test_softdirty(void)
+{
+ char *addr;
+ int ret;
+
+ ksft_print_msg("[RUN] %s\n", __func__);
+
+ addr = mmap(0, SIZE, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
+ if (addr == MAP_FAILED)
+ ksft_exit_fail_msg("mmap failed\n");
+
+ /* Clear any softdirty bits. */
+ clear_softdirty();
+ ksft_test_result(range_is_not_softdirty(addr, SIZE),
+ "range is not softdirty\n");
+
+ /* Populating READ should set softdirty. */
+ ret = madvise(addr, SIZE, MADV_POPULATE_READ);
+ ksft_test_result(!ret, "MADV_POPULATE_READ\n");
+ ksft_test_result(range_is_not_softdirty(addr, SIZE),
+ "range is not softdirty\n");
+
+ /* Populating WRITE should set softdirty. */
+ ret = madvise(addr, SIZE, MADV_POPULATE_WRITE);
+ ksft_test_result(!ret, "MADV_POPULATE_WRITE\n");
+ ksft_test_result(range_is_softdirty(addr, SIZE),
+ "range is softdirty\n");
+
+ munmap(addr, SIZE);
+}
+
+int main(int argc, char **argv)
+{
+ int err;
+
+ pagesize = getpagesize();
+
+ ksft_print_header();
+ ksft_set_plan(21);
+
+ sense_support();
+ test_prot_read();
+ test_prot_write();
+ test_holes();
+ test_populate_read();
+ test_populate_write();
+ test_softdirty();
+
+ err = ksft_get_fail_cnt();
+ if (err)
+ ksft_exit_fail_msg("%d out of %d tests failed\n",
+ err, ksft_test_num());
+ return ksft_exit_pass();
+}
+
+#else /* defined(MADV_POPULATE_READ) && defined(MADV_POPULATE_WRITE) */
+
+#warning "missing MADV_POPULATE_READ or MADV_POPULATE_WRITE definition"
+
+int main(int argc, char **argv)
+{
+ ksft_print_header();
+ ksft_exit_skip("MADV_POPULATE_READ or MADV_POPULATE_WRITE not defined\n");
+}
+
+#endif /* defined(MADV_POPULATE_READ) && defined(MADV_POPULATE_WRITE) */
diff --git a/tools/testing/selftests/vm/pkey-x86.h b/tools/testing/selftests/vm/pkey-x86.h
index 3be20f5d5275..e4a4ce2b826d 100644
--- a/tools/testing/selftests/vm/pkey-x86.h
+++ b/tools/testing/selftests/vm/pkey-x86.h
@@ -126,6 +126,7 @@ static inline u32 pkey_bit_position(int pkey)
#define XSTATE_PKEY_BIT (9)
#define XSTATE_PKEY 0x200
+#define XSTATE_BV_OFFSET 512
int pkey_reg_xstate_offset(void)
{
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c
index fdbb602ecf32..2d0ae88665db 100644
--- a/tools/testing/selftests/vm/protection_keys.c
+++ b/tools/testing/selftests/vm/protection_keys.c
@@ -510,7 +510,7 @@ int alloc_pkey(void)
" shadow: 0x%016llx\n",
__func__, __LINE__, ret, __read_pkey_reg(),
shadow_pkey_reg);
- if (ret) {
+ if (ret > 0) {
/* clear both the bits: */
shadow_pkey_reg = set_pkey_bits(shadow_pkey_reg, ret,
~PKEY_MASK);
@@ -561,7 +561,6 @@ int alloc_random_pkey(void)
int nr_alloced = 0;
int random_index;
memset(alloced_pkeys, 0, sizeof(alloced_pkeys));
- srand((unsigned int)time(NULL));
/* allocate every possible key and make a note of which ones we got */
max_nr_pkey_allocs = NR_PKEYS;
@@ -1278,6 +1277,78 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
}
}
+void arch_force_pkey_reg_init(void)
+{
+#if defined(__i386__) || defined(__x86_64__) /* arch */
+ u64 *buf;
+
+ /*
+ * All keys should be allocated and set to allow reads and
+ * writes, so the register should be all 0. If not, just
+ * skip the test.
+ */
+ if (read_pkey_reg())
+ return;
+
+ /*
+ * Just allocate an absurd about of memory rather than
+ * doing the XSAVE size enumeration dance.
+ */
+ buf = mmap(NULL, 1*MB, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+
+ /* These __builtins require compiling with -mxsave */
+
+ /* XSAVE to build a valid buffer: */
+ __builtin_ia32_xsave(buf, XSTATE_PKEY);
+ /* Clear XSTATE_BV[PKRU]: */
+ buf[XSTATE_BV_OFFSET/sizeof(u64)] &= ~XSTATE_PKEY;
+ /* XRSTOR will likely get PKRU back to the init state: */
+ __builtin_ia32_xrstor(buf, XSTATE_PKEY);
+
+ munmap(buf, 1*MB);
+#endif
+}
+
+
+/*
+ * This is mostly useless on ppc for now. But it will not
+ * hurt anything and should give some better coverage as
+ * a long-running test that continually checks the pkey
+ * register.
+ */
+void test_pkey_init_state(int *ptr, u16 pkey)
+{
+ int err;
+ int allocated_pkeys[NR_PKEYS] = {0};
+ int nr_allocated_pkeys = 0;
+ int i;
+
+ for (i = 0; i < NR_PKEYS; i++) {
+ int new_pkey = alloc_pkey();
+
+ if (new_pkey < 0)
+ continue;
+ allocated_pkeys[nr_allocated_pkeys++] = new_pkey;
+ }
+
+ dprintf3("%s()::%d\n", __func__, __LINE__);
+
+ arch_force_pkey_reg_init();
+
+ /*
+ * Loop for a bit, hoping to get exercise the kernel
+ * context switch code.
+ */
+ for (i = 0; i < 1000000; i++)
+ read_pkey_reg();
+
+ for (i = 0; i < nr_allocated_pkeys; i++) {
+ err = sys_pkey_free(allocated_pkeys[i]);
+ pkey_assert(!err);
+ read_pkey_reg(); /* for shadow checking */
+ }
+}
+
/*
* pkey 0 is special. It is allocated by default, so you do not
* have to call pkey_alloc() to use it first. Make sure that it
@@ -1449,6 +1520,13 @@ void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
ret = mprotect(p1, PAGE_SIZE, PROT_EXEC);
pkey_assert(!ret);
+ /*
+ * Reset the shadow, assuming that the above mprotect()
+ * correctly changed PKRU, but to an unknown value since
+ * the actual alllocated pkey is unknown.
+ */
+ shadow_pkey_reg = __read_pkey_reg();
+
dprintf2("pkey_reg: %016llx\n", read_pkey_reg());
/* Make sure this is an *instruction* fault */
@@ -1502,6 +1580,7 @@ void (*pkey_tests[])(int *ptr, u16 pkey) = {
test_implicit_mprotect_exec_only_memory,
test_mprotect_with_pkey_0,
test_ptrace_of_child,
+ test_pkey_init_state,
test_pkey_syscalls_on_non_allocated_pkey,
test_pkey_syscalls_bad_args,
test_pkey_alloc_exhaust,
@@ -1552,6 +1631,8 @@ int main(void)
int nr_iterations = 22;
int pkeys_supported = is_pkeys_supported();
+ srand((unsigned int)time(NULL));
+
setup_handlers();
printf("has pkeys: %d\n", pkeys_supported);
diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh
index e953f3cd9664..955782d138ab 100755
--- a/tools/testing/selftests/vm/run_vmtests.sh
+++ b/tools/testing/selftests/vm/run_vmtests.sh
@@ -346,4 +346,20 @@ else
exitcode=1
fi
+echo "--------------------------------------------------------"
+echo "running MADV_POPULATE_READ and MADV_POPULATE_WRITE tests"
+echo "--------------------------------------------------------"
+./madv_populate
+ret_val=$?
+
+if [ $ret_val -eq 0 ]; then
+ echo "[PASS]"
+elif [ $ret_val -eq $ksft_skip ]; then
+ echo "[SKIP]"
+ exitcode=$ksft_skip
+else
+ echo "[FAIL]"
+ exitcode=1
+fi
+
exit $exitcode
diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index f5ab5e0312e7..e363bdaff59d 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -85,10 +85,12 @@ static bool test_uffdio_wp = false;
static bool test_uffdio_minor = false;
static bool map_shared;
+static int shm_fd;
static int huge_fd;
static char *huge_fd_off0;
static unsigned long long *count_verify;
-static int uffd, uffd_flags, finished, *pipefd;
+static int uffd = -1;
+static int uffd_flags, finished, *pipefd;
static char *area_src, *area_src_alias, *area_dst, *area_dst_alias;
static char *zeropage;
pthread_attr_t attr;
@@ -140,11 +142,18 @@ static void usage(void)
exit(1);
}
-#define uffd_error(code, fmt, ...) \
- do { \
- fprintf(stderr, fmt, ##__VA_ARGS__); \
- fprintf(stderr, ": %" PRId64 "\n", (int64_t)(code)); \
- exit(1); \
+#define _err(fmt, ...) \
+ do { \
+ int ret = errno; \
+ fprintf(stderr, "ERROR: " fmt, ##__VA_ARGS__); \
+ fprintf(stderr, " (errno=%d, line=%d)\n", \
+ ret, __LINE__); \
+ } while (0)
+
+#define err(fmt, ...) \
+ do { \
+ _err(fmt, ##__VA_ARGS__); \
+ exit(1); \
} while (0)
static void uffd_stats_reset(struct uffd_stats *uffd_stats,
@@ -171,56 +180,50 @@ static void uffd_stats_report(struct uffd_stats *stats, int n_cpus)
minor_total += stats[i].minor_faults;
}
- printf("userfaults: %llu missing (", miss_total);
- for (i = 0; i < n_cpus; i++)
- printf("%lu+", stats[i].missing_faults);
- printf("\b), %llu wp (", wp_total);
- for (i = 0; i < n_cpus; i++)
- printf("%lu+", stats[i].wp_faults);
- printf("\b), %llu minor (", minor_total);
- for (i = 0; i < n_cpus; i++)
- printf("%lu+", stats[i].minor_faults);
- printf("\b)\n");
+ printf("userfaults: ");
+ if (miss_total) {
+ printf("%llu missing (", miss_total);
+ for (i = 0; i < n_cpus; i++)
+ printf("%lu+", stats[i].missing_faults);
+ printf("\b) ");
+ }
+ if (wp_total) {
+ printf("%llu wp (", wp_total);
+ for (i = 0; i < n_cpus; i++)
+ printf("%lu+", stats[i].wp_faults);
+ printf("\b) ");
+ }
+ if (minor_total) {
+ printf("%llu minor (", minor_total);
+ for (i = 0; i < n_cpus; i++)
+ printf("%lu+", stats[i].minor_faults);
+ printf("\b)");
+ }
+ printf("\n");
}
-static int anon_release_pages(char *rel_area)
+static void anon_release_pages(char *rel_area)
{
- int ret = 0;
-
- if (madvise(rel_area, nr_pages * page_size, MADV_DONTNEED)) {
- perror("madvise");
- ret = 1;
- }
-
- return ret;
+ if (madvise(rel_area, nr_pages * page_size, MADV_DONTNEED))
+ err("madvise(MADV_DONTNEED) failed");
}
static void anon_allocate_area(void **alloc_area)
{
- if (posix_memalign(alloc_area, page_size, nr_pages * page_size)) {
- fprintf(stderr, "out of memory\n");
- *alloc_area = NULL;
- }
+ if (posix_memalign(alloc_area, page_size, nr_pages * page_size))
+ err("posix_memalign() failed");
}
static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset)
{
}
-/* HugeTLB memory */
-static int hugetlb_release_pages(char *rel_area)
+static void hugetlb_release_pages(char *rel_area)
{
- int ret = 0;
-
if (fallocate(huge_fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
- rel_area == huge_fd_off0 ? 0 :
- nr_pages * page_size,
- nr_pages * page_size)) {
- perror("fallocate");
- ret = 1;
- }
-
- return ret;
+ rel_area == huge_fd_off0 ? 0 : nr_pages * page_size,
+ nr_pages * page_size))
+ err("fallocate() failed");
}
static void hugetlb_allocate_area(void **alloc_area)
@@ -233,20 +236,16 @@ static void hugetlb_allocate_area(void **alloc_area)
MAP_HUGETLB,
huge_fd, *alloc_area == area_src ? 0 :
nr_pages * page_size);
- if (*alloc_area == MAP_FAILED) {
- perror("mmap of hugetlbfs file failed");
- goto fail;
- }
+ if (*alloc_area == MAP_FAILED)
+ err("mmap of hugetlbfs file failed");
if (map_shared) {
area_alias = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_HUGETLB,
huge_fd, *alloc_area == area_src ? 0 :
nr_pages * page_size);
- if (area_alias == MAP_FAILED) {
- perror("mmap of hugetlb file alias failed");
- goto fail_munmap;
- }
+ if (area_alias == MAP_FAILED)
+ err("mmap of hugetlb file alias failed");
}
if (*alloc_area == area_src) {
@@ -257,16 +256,6 @@ static void hugetlb_allocate_area(void **alloc_area)
}
if (area_alias)
*alloc_area_alias = area_alias;
-
- return;
-
-fail_munmap:
- if (munmap(*alloc_area, nr_pages * page_size) < 0) {
- perror("hugetlb munmap");
- exit(1);
- }
-fail:
- *alloc_area = NULL;
}
static void hugetlb_alias_mapping(__u64 *start, size_t len, unsigned long offset)
@@ -282,33 +271,43 @@ static void hugetlb_alias_mapping(__u64 *start, size_t len, unsigned long offset
*start = (unsigned long) area_dst_alias + offset;
}
-/* Shared memory */
-static int shmem_release_pages(char *rel_area)
+static void shmem_release_pages(char *rel_area)
{
- int ret = 0;
-
- if (madvise(rel_area, nr_pages * page_size, MADV_REMOVE)) {
- perror("madvise");
- ret = 1;
- }
-
- return ret;
+ if (madvise(rel_area, nr_pages * page_size, MADV_REMOVE))
+ err("madvise(MADV_REMOVE) failed");
}
static void shmem_allocate_area(void **alloc_area)
{
+ void *area_alias = NULL;
+ bool is_src = alloc_area == (void **)&area_src;
+ unsigned long offset = is_src ? 0 : nr_pages * page_size;
+
*alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
- MAP_ANONYMOUS | MAP_SHARED, -1, 0);
- if (*alloc_area == MAP_FAILED) {
- fprintf(stderr, "shared memory mmap failed\n");
- *alloc_area = NULL;
- }
+ MAP_SHARED, shm_fd, offset);
+ if (*alloc_area == MAP_FAILED)
+ err("mmap of memfd failed");
+
+ area_alias = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
+ MAP_SHARED, shm_fd, offset);
+ if (area_alias == MAP_FAILED)
+ err("mmap of memfd alias failed");
+
+ if (is_src)
+ area_src_alias = area_alias;
+ else
+ area_dst_alias = area_alias;
+}
+
+static void shmem_alias_mapping(__u64 *start, size_t len, unsigned long offset)
+{
+ *start = (unsigned long)area_dst_alias + offset;
}
struct uffd_test_ops {
unsigned long expected_ioctls;
void (*allocate_area)(void **alloc_area);
- int (*release_pages)(char *rel_area);
+ void (*release_pages)(char *rel_area);
void (*alias_mapping)(__u64 *start, size_t len, unsigned long offset);
};
@@ -332,7 +331,7 @@ static struct uffd_test_ops shmem_uffd_test_ops = {
.expected_ioctls = SHMEM_EXPECTED_IOCTLS,
.allocate_area = shmem_allocate_area,
.release_pages = shmem_release_pages,
- .alias_mapping = noop_alias_mapping,
+ .alias_mapping = shmem_alias_mapping,
};
static struct uffd_test_ops hugetlb_uffd_test_ops = {
@@ -344,6 +343,111 @@ static struct uffd_test_ops hugetlb_uffd_test_ops = {
static struct uffd_test_ops *uffd_test_ops;
+static void userfaultfd_open(uint64_t *features)
+{
+ struct uffdio_api uffdio_api;
+
+ uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY);
+ if (uffd < 0)
+ err("userfaultfd syscall not available in this kernel");
+ uffd_flags = fcntl(uffd, F_GETFD, NULL);
+
+ uffdio_api.api = UFFD_API;
+ uffdio_api.features = *features;
+ if (ioctl(uffd, UFFDIO_API, &uffdio_api))
+ err("UFFDIO_API failed.\nPlease make sure to "
+ "run with either root or ptrace capability.");
+ if (uffdio_api.api != UFFD_API)
+ err("UFFDIO_API error: %" PRIu64, (uint64_t)uffdio_api.api);
+
+ *features = uffdio_api.features;
+}
+
+static inline void munmap_area(void **area)
+{
+ if (*area)
+ if (munmap(*area, nr_pages * page_size))
+ err("munmap");
+
+ *area = NULL;
+}
+
+static void uffd_test_ctx_clear(void)
+{
+ size_t i;
+
+ if (pipefd) {
+ for (i = 0; i < nr_cpus * 2; ++i) {
+ if (close(pipefd[i]))
+ err("close pipefd");
+ }
+ free(pipefd);
+ pipefd = NULL;
+ }
+
+ if (count_verify) {
+ free(count_verify);
+ count_verify = NULL;
+ }
+
+ if (uffd != -1) {
+ if (close(uffd))
+ err("close uffd");
+ uffd = -1;
+ }
+
+ huge_fd_off0 = NULL;
+ munmap_area((void **)&area_src);
+ munmap_area((void **)&area_src_alias);
+ munmap_area((void **)&area_dst);
+ munmap_area((void **)&area_dst_alias);
+}
+
+static void uffd_test_ctx_init_ext(uint64_t *features)
+{
+ unsigned long nr, cpu;
+
+ uffd_test_ctx_clear();
+
+ uffd_test_ops->allocate_area((void **)&area_src);
+ uffd_test_ops->allocate_area((void **)&area_dst);
+
+ uffd_test_ops->release_pages(area_src);
+ uffd_test_ops->release_pages(area_dst);
+
+ userfaultfd_open(features);
+
+ count_verify = malloc(nr_pages * sizeof(unsigned long long));
+ if (!count_verify)
+ err("count_verify");
+
+ for (nr = 0; nr < nr_pages; nr++) {
+ *area_mutex(area_src, nr) =
+ (pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER;
+ count_verify[nr] = *area_count(area_src, nr) = 1;
+ /*
+ * In the transition between 255 to 256, powerpc will
+ * read out of order in my_bcmp and see both bytes as
+ * zero, so leave a placeholder below always non-zero
+ * after the count, to avoid my_bcmp to trigger false
+ * positives.
+ */
+ *(area_count(area_src, nr) + 1) = 1;
+ }
+
+ pipefd = malloc(sizeof(int) * nr_cpus * 2);
+ if (!pipefd)
+ err("pipefd");
+ for (cpu = 0; cpu < nr_cpus; cpu++)
+ if (pipe2(&pipefd[cpu * 2], O_CLOEXEC | O_NONBLOCK))
+ err("pipe");
+}
+
+static inline void uffd_test_ctx_init(uint64_t features)
+{
+ uffd_test_ctx_init_ext(&features);
+}
+
static int my_bcmp(char *str1, char *str2, size_t n)
{
unsigned long i;
@@ -363,27 +467,33 @@ static void wp_range(int ufd, __u64 start, __u64 len, bool wp)
/* Undo write-protect, do wakeup after that */
prms.mode = wp ? UFFDIO_WRITEPROTECT_MODE_WP : 0;
- if (ioctl(ufd, UFFDIO_WRITEPROTECT, &prms)) {
- fprintf(stderr, "clear WP failed for address 0x%" PRIx64 "\n",
- (uint64_t)start);
- exit(1);
- }
+ if (ioctl(ufd, UFFDIO_WRITEPROTECT, &prms))
+ err("clear WP failed: address=0x%"PRIx64, (uint64_t)start);
}
static void continue_range(int ufd, __u64 start, __u64 len)
{
struct uffdio_continue req;
+ int ret;
req.range.start = start;
req.range.len = len;
req.mode = 0;
- if (ioctl(ufd, UFFDIO_CONTINUE, &req)) {
- fprintf(stderr,
- "UFFDIO_CONTINUE failed for address 0x%" PRIx64 "\n",
- (uint64_t)start);
- exit(1);
- }
+ if (ioctl(ufd, UFFDIO_CONTINUE, &req))
+ err("UFFDIO_CONTINUE failed for address 0x%" PRIx64,
+ (uint64_t)start);
+
+ /*
+ * Error handling within the kernel for continue is subtly different
+ * from copy or zeropage, so it may be a source of bugs. Trigger an
+ * error (-EEXIST) on purpose, to verify doing so doesn't cause a BUG.
+ */
+ req.mapped = 0;
+ ret = ioctl(ufd, UFFDIO_CONTINUE, &req);
+ if (ret >= 0 || req.mapped != -EEXIST)
+ err("failed to exercise UFFDIO_CONTINUE error handling, ret=%d, mapped=%" PRId64,
+ ret, (int64_t) req.mapped);
}
static void *locking_thread(void *arg)
@@ -395,7 +505,6 @@ static void *locking_thread(void *arg)
unsigned long long count;
char randstate[64];
unsigned int seed;
- time_t start;
if (bounces & BOUNCE_RANDOM) {
seed = (unsigned int) time(NULL) - bounces;
@@ -403,10 +512,8 @@ static void *locking_thread(void *arg)
seed += cpu;
bzero(&rand, sizeof(rand));
bzero(&randstate, sizeof(randstate));
- if (initstate_r(seed, randstate, sizeof(randstate), &rand)) {
- fprintf(stderr, "srandom_r error\n");
- exit(1);
- }
+ if (initstate_r(seed, randstate, sizeof(randstate), &rand))
+ err("initstate_r failed");
} else {
page_nr = -bounces;
if (!(bounces & BOUNCE_RACINGFAULTS))
@@ -415,92 +522,26 @@ static void *locking_thread(void *arg)
while (!finished) {
if (bounces & BOUNCE_RANDOM) {
- if (random_r(&rand, &rand_nr)) {
- fprintf(stderr, "random_r 1 error\n");
- exit(1);
- }
+ if (random_r(&rand, &rand_nr))
+ err("random_r failed");
page_nr = rand_nr;
if (sizeof(page_nr) > sizeof(rand_nr)) {
- if (random_r(&rand, &rand_nr)) {
- fprintf(stderr, "random_r 2 error\n");
- exit(1);
- }
+ if (random_r(&rand, &rand_nr))
+ err("random_r failed");
page_nr |= (((unsigned long) rand_nr) << 16) <<
16;
}
} else
page_nr += 1;
page_nr %= nr_pages;
-
- start = time(NULL);
- if (bounces & BOUNCE_VERIFY) {
- count = *area_count(area_dst, page_nr);
- if (!count) {
- fprintf(stderr,
- "page_nr %lu wrong count %Lu %Lu\n",
- page_nr, count,
- count_verify[page_nr]);
- exit(1);
- }
-
-
- /*
- * We can't use bcmp (or memcmp) because that
- * returns 0 erroneously if the memory is
- * changing under it (even if the end of the
- * page is never changing and always
- * different).
- */
-#if 1
- if (!my_bcmp(area_dst + page_nr * page_size, zeropage,
- page_size)) {
- fprintf(stderr,
- "my_bcmp page_nr %lu wrong count %Lu %Lu\n",
- page_nr, count, count_verify[page_nr]);
- exit(1);
- }
-#else
- unsigned long loops;
-
- loops = 0;
- /* uncomment the below line to test with mutex */
- /* pthread_mutex_lock(area_mutex(area_dst, page_nr)); */
- while (!bcmp(area_dst + page_nr * page_size, zeropage,
- page_size)) {
- loops += 1;
- if (loops > 10)
- break;
- }
- /* uncomment below line to test with mutex */
- /* pthread_mutex_unlock(area_mutex(area_dst, page_nr)); */
- if (loops) {
- fprintf(stderr,
- "page_nr %lu all zero thread %lu %p %lu\n",
- page_nr, cpu, area_dst + page_nr * page_size,
- loops);
- if (loops > 10)
- exit(1);
- }
-#endif
- }
-
pthread_mutex_lock(area_mutex(area_dst, page_nr));
count = *area_count(area_dst, page_nr);
- if (count != count_verify[page_nr]) {
- fprintf(stderr,
- "page_nr %lu memory corruption %Lu %Lu\n",
- page_nr, count,
- count_verify[page_nr]); exit(1);
- }
+ if (count != count_verify[page_nr])
+ err("page_nr %lu memory corruption %llu %llu",
+ page_nr, count, count_verify[page_nr]);
count++;
*area_count(area_dst, page_nr) = count_verify[page_nr] = count;
pthread_mutex_unlock(area_mutex(area_dst, page_nr));
-
- if (time(NULL) - start > 1)
- fprintf(stderr,
- "userfault too slow %ld "
- "possible false positive with overcommit\n",
- time(NULL) - start);
}
return NULL;
@@ -514,22 +555,21 @@ static void retry_copy_page(int ufd, struct uffdio_copy *uffdio_copy,
offset);
if (ioctl(ufd, UFFDIO_COPY, uffdio_copy)) {
/* real retval in ufdio_copy.copy */
- if (uffdio_copy->copy != -EEXIST) {
- uffd_error(uffdio_copy->copy,
- "UFFDIO_COPY retry error");
- }
- } else
- uffd_error(uffdio_copy->copy, "UFFDIO_COPY retry unexpected");
+ if (uffdio_copy->copy != -EEXIST)
+ err("UFFDIO_COPY retry error: %"PRId64,
+ (int64_t)uffdio_copy->copy);
+ } else {
+ err("UFFDIO_COPY retry unexpected: %"PRId64,
+ (int64_t)uffdio_copy->copy);
+ }
}
static int __copy_page(int ufd, unsigned long offset, bool retry)
{
struct uffdio_copy uffdio_copy;
- if (offset >= nr_pages * page_size) {
- fprintf(stderr, "unexpected offset %lu\n", offset);
- exit(1);
- }
+ if (offset >= nr_pages * page_size)
+ err("unexpected offset %lu\n", offset);
uffdio_copy.dst = (unsigned long) area_dst + offset;
uffdio_copy.src = (unsigned long) area_src + offset;
uffdio_copy.len = page_size;
@@ -541,9 +581,10 @@ static int __copy_page(int ufd, unsigned long offset, bool retry)
if (ioctl(ufd, UFFDIO_COPY, &uffdio_copy)) {
/* real retval in ufdio_copy.copy */
if (uffdio_copy.copy != -EEXIST)
- uffd_error(uffdio_copy.copy, "UFFDIO_COPY error");
+ err("UFFDIO_COPY error: %"PRId64,
+ (int64_t)uffdio_copy.copy);
} else if (uffdio_copy.copy != page_size) {
- uffd_error(uffdio_copy.copy, "UFFDIO_COPY unexpected copy");
+ err("UFFDIO_COPY error: %"PRId64, (int64_t)uffdio_copy.copy);
} else {
if (test_uffdio_copy_eexist && retry) {
test_uffdio_copy_eexist = false;
@@ -572,11 +613,10 @@ static int uffd_read_msg(int ufd, struct uffd_msg *msg)
if (ret < 0) {
if (errno == EAGAIN)
return 1;
- perror("blocking read error");
+ err("blocking read error");
} else {
- fprintf(stderr, "short read\n");
+ err("short read");
}
- exit(1);
}
return 0;
@@ -587,10 +627,8 @@ static void uffd_handle_page_fault(struct uffd_msg *msg,
{
unsigned long offset;
- if (msg->event != UFFD_EVENT_PAGEFAULT) {
- fprintf(stderr, "unexpected msg event %u\n", msg->event);
- exit(1);
- }
+ if (msg->event != UFFD_EVENT_PAGEFAULT)
+ err("unexpected msg event %u", msg->event);
if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WP) {
/* Write protect page faults */
@@ -621,11 +659,8 @@ static void uffd_handle_page_fault(struct uffd_msg *msg,
stats->minor_faults++;
} else {
/* Missing page faults */
- if (bounces & BOUNCE_VERIFY &&
- msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE) {
- fprintf(stderr, "unexpected write fault\n");
- exit(1);
- }
+ if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)
+ err("unexpected write fault");
offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst;
offset &= ~(page_size-1);
@@ -652,32 +687,20 @@ static void *uffd_poll_thread(void *arg)
for (;;) {
ret = poll(pollfd, 2, -1);
- if (!ret) {
- fprintf(stderr, "poll error %d\n", ret);
- exit(1);
- }
- if (ret < 0) {
- perror("poll");
- exit(1);
- }
+ if (ret <= 0)
+ err("poll error: %d", ret);
if (pollfd[1].revents & POLLIN) {
- if (read(pollfd[1].fd, &tmp_chr, 1) != 1) {
- fprintf(stderr, "read pipefd error\n");
- exit(1);
- }
+ if (read(pollfd[1].fd, &tmp_chr, 1) != 1)
+ err("read pipefd error");
break;
}
- if (!(pollfd[0].revents & POLLIN)) {
- fprintf(stderr, "pollfd[0].revents %d\n",
- pollfd[0].revents);
- exit(1);
- }
+ if (!(pollfd[0].revents & POLLIN))
+ err("pollfd[0].revents %d", pollfd[0].revents);
if (uffd_read_msg(uffd, &msg))
continue;
switch (msg.event) {
default:
- fprintf(stderr, "unexpected msg event %u\n",
- msg.event); exit(1);
+ err("unexpected msg event %u\n", msg.event);
break;
case UFFD_EVENT_PAGEFAULT:
uffd_handle_page_fault(&msg, stats);
@@ -691,10 +714,8 @@ static void *uffd_poll_thread(void *arg)
uffd_reg.range.start = msg.arg.remove.start;
uffd_reg.range.len = msg.arg.remove.end -
msg.arg.remove.start;
- if (ioctl(uffd, UFFDIO_UNREGISTER, &uffd_reg.range)) {
- fprintf(stderr, "remove failure\n");
- exit(1);
- }
+ if (ioctl(uffd, UFFDIO_UNREGISTER, &uffd_reg.range))
+ err("remove failure");
break;
case UFFD_EVENT_REMAP:
area_dst = (char *)(unsigned long)msg.arg.remap.to;
@@ -797,9 +818,7 @@ static int stress(struct uffd_stats *uffd_stats)
* UFFDIO_COPY without writing zero pages into area_dst
* because the background threads already completed).
*/
- if (uffd_test_ops->release_pages(area_src))
- return 1;
-
+ uffd_test_ops->release_pages(area_src);
finished = 1;
for (cpu = 0; cpu < nr_cpus; cpu++)
@@ -809,10 +828,8 @@ static int stress(struct uffd_stats *uffd_stats)
for (cpu = 0; cpu < nr_cpus; cpu++) {
char c;
if (bounces & BOUNCE_POLL) {
- if (write(pipefd[cpu*2+1], &c, 1) != 1) {
- fprintf(stderr, "pipefd write error\n");
- return 1;
- }
+ if (write(pipefd[cpu*2+1], &c, 1) != 1)
+ err("pipefd write error");
if (pthread_join(uffd_threads[cpu],
(void *)&uffd_stats[cpu]))
return 1;
@@ -827,40 +844,6 @@ static int stress(struct uffd_stats *uffd_stats)
return 0;
}
-static int userfaultfd_open_ext(uint64_t *features)
-{
- struct uffdio_api uffdio_api;
-
- uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
- if (uffd < 0) {
- fprintf(stderr,
- "userfaultfd syscall not available in this kernel\n");
- return 1;
- }
- uffd_flags = fcntl(uffd, F_GETFD, NULL);
-
- uffdio_api.api = UFFD_API;
- uffdio_api.features = *features;
- if (ioctl(uffd, UFFDIO_API, &uffdio_api)) {
- fprintf(stderr, "UFFDIO_API failed.\nPlease make sure to "
- "run with either root or ptrace capability.\n");
- return 1;
- }
- if (uffdio_api.api != UFFD_API) {
- fprintf(stderr, "UFFDIO_API error: %" PRIu64 "\n",
- (uint64_t)uffdio_api.api);
- return 1;
- }
-
- *features = uffdio_api.features;
- return 0;
-}
-
-static int userfaultfd_open(uint64_t features)
-{
- return userfaultfd_open_ext(&features);
-}
-
sigjmp_buf jbuf, *sigbuf;
static void sighndl(int sig, siginfo_t *siginfo, void *ptr)
@@ -912,10 +895,8 @@ static int faulting_process(int signal_test)
memset(&act, 0, sizeof(act));
act.sa_sigaction = sighndl;
act.sa_flags = SA_SIGINFO;
- if (sigaction(SIGBUS, &act, 0)) {
- perror("sigaction");
- return 1;
- }
+ if (sigaction(SIGBUS, &act, 0))
+ err("sigaction");
lastnr = (unsigned long)-1;
}
@@ -925,10 +906,8 @@ static int faulting_process(int signal_test)
if (signal_test) {
if (sigsetjmp(*sigbuf, 1) != 0) {
- if (steps == 1 && nr == lastnr) {
- fprintf(stderr, "Signal repeated\n");
- return 1;
- }
+ if (steps == 1 && nr == lastnr)
+ err("Signal repeated");
lastnr = nr;
if (signal_test == 1) {
@@ -953,12 +932,9 @@ static int faulting_process(int signal_test)
}
count = *area_count(area_dst, nr);
- if (count != count_verify[nr]) {
- fprintf(stderr,
- "nr %lu memory corruption %Lu %Lu\n",
- nr, count,
- count_verify[nr]);
- }
+ if (count != count_verify[nr])
+ err("nr %lu memory corruption %llu %llu\n",
+ nr, count, count_verify[nr]);
/*
* Trigger write protection if there is by writing
* the same value back.
@@ -974,18 +950,16 @@ static int faulting_process(int signal_test)
area_dst = mremap(area_dst, nr_pages * page_size, nr_pages * page_size,
MREMAP_MAYMOVE | MREMAP_FIXED, area_src);
- if (area_dst == MAP_FAILED) {
- perror("mremap");
- exit(1);
- }
+ if (area_dst == MAP_FAILED)
+ err("mremap");
+ /* Reset area_src since we just clobbered it */
+ area_src = NULL;
for (; nr < nr_pages; nr++) {
count = *area_count(area_dst, nr);
if (count != count_verify[nr]) {
- fprintf(stderr,
- "nr %lu memory corruption %Lu %Lu\n",
- nr, count,
- count_verify[nr]); exit(1);
+ err("nr %lu memory corruption %llu %llu\n",
+ nr, count, count_verify[nr]);
}
/*
* Trigger write protection if there is by writing
@@ -994,15 +968,11 @@ static int faulting_process(int signal_test)
*area_count(area_dst, nr) = count;
}
- if (uffd_test_ops->release_pages(area_dst))
- return 1;
+ uffd_test_ops->release_pages(area_dst);
- for (nr = 0; nr < nr_pages; nr++) {
- if (my_bcmp(area_dst + nr * page_size, zeropage, page_size)) {
- fprintf(stderr, "nr %lu is not zero\n", nr);
- exit(1);
- }
- }
+ for (nr = 0; nr < nr_pages; nr++)
+ if (my_bcmp(area_dst + nr * page_size, zeropage, page_size))
+ err("nr %lu is not zero", nr);
return 0;
}
@@ -1015,13 +985,12 @@ static void retry_uffdio_zeropage(int ufd,
uffdio_zeropage->range.len,
offset);
if (ioctl(ufd, UFFDIO_ZEROPAGE, uffdio_zeropage)) {
- if (uffdio_zeropage->zeropage != -EEXIST) {
- uffd_error(uffdio_zeropage->zeropage,
- "UFFDIO_ZEROPAGE retry error");
- }
+ if (uffdio_zeropage->zeropage != -EEXIST)
+ err("UFFDIO_ZEROPAGE error: %"PRId64,
+ (int64_t)uffdio_zeropage->zeropage);
} else {
- uffd_error(uffdio_zeropage->zeropage,
- "UFFDIO_ZEROPAGE retry unexpected");
+ err("UFFDIO_ZEROPAGE error: %"PRId64,
+ (int64_t)uffdio_zeropage->zeropage);
}
}
@@ -1034,10 +1003,8 @@ static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry)
has_zeropage = uffd_test_ops->expected_ioctls & (1 << _UFFDIO_ZEROPAGE);
- if (offset >= nr_pages * page_size) {
- fprintf(stderr, "unexpected offset %lu\n", offset);
- exit(1);
- }
+ if (offset >= nr_pages * page_size)
+ err("unexpected offset %lu", offset);
uffdio_zeropage.range.start = (unsigned long) area_dst + offset;
uffdio_zeropage.range.len = page_size;
uffdio_zeropage.mode = 0;
@@ -1045,14 +1012,13 @@ static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry)
res = uffdio_zeropage.zeropage;
if (ret) {
/* real retval in ufdio_zeropage.zeropage */
- if (has_zeropage) {
- uffd_error(res, "UFFDIO_ZEROPAGE %s",
- res == -EEXIST ? "-EEXIST" : "error");
- } else if (res != -EINVAL)
- uffd_error(res, "UFFDIO_ZEROPAGE not -EINVAL");
+ if (has_zeropage)
+ err("UFFDIO_ZEROPAGE error: %"PRId64, (int64_t)res);
+ else if (res != -EINVAL)
+ err("UFFDIO_ZEROPAGE not -EINVAL");
} else if (has_zeropage) {
if (res != page_size) {
- uffd_error(res, "UFFDIO_ZEROPAGE unexpected");
+ err("UFFDIO_ZEROPAGE unexpected size");
} else {
if (test_uffdio_zeropage_eexist && retry) {
test_uffdio_zeropage_eexist = false;
@@ -1062,7 +1028,7 @@ static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry)
return 1;
}
} else
- uffd_error(res, "UFFDIO_ZEROPAGE succeeded");
+ err("UFFDIO_ZEROPAGE succeeded");
return 0;
}
@@ -1081,37 +1047,24 @@ static int userfaultfd_zeropage_test(void)
printf("testing UFFDIO_ZEROPAGE: ");
fflush(stdout);
- if (uffd_test_ops->release_pages(area_dst))
- return 1;
+ uffd_test_ctx_init(0);
- if (userfaultfd_open(0))
- return 1;
uffdio_register.range.start = (unsigned long) area_dst;
uffdio_register.range.len = nr_pages * page_size;
uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
if (test_uffdio_wp)
uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP;
- if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
- fprintf(stderr, "register failure\n");
- exit(1);
- }
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
+ err("register failure");
expected_ioctls = uffd_test_ops->expected_ioctls;
- if ((uffdio_register.ioctls & expected_ioctls) !=
- expected_ioctls) {
- fprintf(stderr,
- "unexpected missing ioctl for anon memory\n");
- exit(1);
- }
+ if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls)
+ err("unexpected missing ioctl for anon memory");
- if (uffdio_zeropage(uffd, 0)) {
- if (my_bcmp(area_dst, zeropage, page_size)) {
- fprintf(stderr, "zeropage is not zero\n");
- exit(1);
- }
- }
+ if (uffdio_zeropage(uffd, 0))
+ if (my_bcmp(area_dst, zeropage, page_size))
+ err("zeropage is not zero");
- close(uffd);
printf("done.\n");
return 0;
}
@@ -1129,13 +1082,10 @@ static int userfaultfd_events_test(void)
printf("testing events (fork, remap, remove): ");
fflush(stdout);
- if (uffd_test_ops->release_pages(area_dst))
- return 1;
-
features = UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_EVENT_REMAP |
UFFD_FEATURE_EVENT_REMOVE;
- if (userfaultfd_open(features))
- return 1;
+ uffd_test_ctx_init(features);
+
fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
uffdio_register.range.start = (unsigned long) area_dst;
@@ -1143,46 +1093,31 @@ static int userfaultfd_events_test(void)
uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
if (test_uffdio_wp)
uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP;
- if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
- fprintf(stderr, "register failure\n");
- exit(1);
- }
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
+ err("register failure");
expected_ioctls = uffd_test_ops->expected_ioctls;
- if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls) {
- fprintf(stderr, "unexpected missing ioctl for anon memory\n");
- exit(1);
- }
+ if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls)
+ err("unexpected missing ioctl for anon memory");
- if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats)) {
- perror("uffd_poll_thread create");
- exit(1);
- }
+ if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats))
+ err("uffd_poll_thread create");
pid = fork();
- if (pid < 0) {
- perror("fork");
- exit(1);
- }
+ if (pid < 0)
+ err("fork");
if (!pid)
exit(faulting_process(0));
waitpid(pid, &err, 0);
- if (err) {
- fprintf(stderr, "faulting process failed\n");
- exit(1);
- }
-
- if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) {
- perror("pipe write");
- exit(1);
- }
+ if (err)
+ err("faulting process failed");
+ if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
+ err("pipe write");
if (pthread_join(uffd_mon, NULL))
return 1;
- close(uffd);
-
uffd_stats_report(&stats, 1);
return stats.missing_faults != nr_pages;
@@ -1202,12 +1137,9 @@ static int userfaultfd_sig_test(void)
printf("testing signal delivery: ");
fflush(stdout);
- if (uffd_test_ops->release_pages(area_dst))
- return 1;
-
features = UFFD_FEATURE_EVENT_FORK|UFFD_FEATURE_SIGBUS;
- if (userfaultfd_open(features))
- return 1;
+ uffd_test_ctx_init(features);
+
fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
uffdio_register.range.start = (unsigned long) area_dst;
@@ -1215,57 +1147,40 @@ static int userfaultfd_sig_test(void)
uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
if (test_uffdio_wp)
uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP;
- if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
- fprintf(stderr, "register failure\n");
- exit(1);
- }
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
+ err("register failure");
expected_ioctls = uffd_test_ops->expected_ioctls;
- if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls) {
- fprintf(stderr, "unexpected missing ioctl for anon memory\n");
- exit(1);
- }
+ if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls)
+ err("unexpected missing ioctl for anon memory");
- if (faulting_process(1)) {
- fprintf(stderr, "faulting process failed\n");
- exit(1);
- }
+ if (faulting_process(1))
+ err("faulting process failed");
- if (uffd_test_ops->release_pages(area_dst))
- return 1;
+ uffd_test_ops->release_pages(area_dst);
- if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats)) {
- perror("uffd_poll_thread create");
- exit(1);
- }
+ if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats))
+ err("uffd_poll_thread create");
pid = fork();
- if (pid < 0) {
- perror("fork");
- exit(1);
- }
+ if (pid < 0)
+ err("fork");
if (!pid)
exit(faulting_process(2));
waitpid(pid, &err, 0);
- if (err) {
- fprintf(stderr, "faulting process failed\n");
- exit(1);
- }
-
- if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) {
- perror("pipe write");
- exit(1);
- }
+ if (err)
+ err("faulting process failed");
+ if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
+ err("pipe write");
if (pthread_join(uffd_mon, (void **)&userfaults))
return 1;
printf("done.\n");
if (userfaults)
- fprintf(stderr, "Signal test failed, userfaults: %ld\n",
- userfaults);
- close(uffd);
+ err("Signal test failed, userfaults: %ld", userfaults);
+
return userfaults != 0;
}
@@ -1279,7 +1194,7 @@ static int userfaultfd_minor_test(void)
void *expected_page;
char c;
struct uffd_stats stats = { 0 };
- uint64_t features = UFFD_FEATURE_MINOR_HUGETLBFS;
+ uint64_t req_features, features_out;
if (!test_uffdio_minor)
return 0;
@@ -1287,13 +1202,17 @@ static int userfaultfd_minor_test(void)
printf("testing minor faults: ");
fflush(stdout);
- if (uffd_test_ops->release_pages(area_dst))
+ if (test_type == TEST_HUGETLB)
+ req_features = UFFD_FEATURE_MINOR_HUGETLBFS;
+ else if (test_type == TEST_SHMEM)
+ req_features = UFFD_FEATURE_MINOR_SHMEM;
+ else
return 1;
- if (userfaultfd_open_ext(&features))
- return 1;
- /* If kernel reports the feature isn't supported, skip the test. */
- if (!(features & UFFD_FEATURE_MINOR_HUGETLBFS)) {
+ features_out = req_features;
+ uffd_test_ctx_init_ext(&features_out);
+ /* If kernel reports required features aren't supported, skip test. */
+ if ((features_out & req_features) != req_features) {
printf("skipping test due to lack of feature support\n");
fflush(stdout);
return 0;
@@ -1302,17 +1221,13 @@ static int userfaultfd_minor_test(void)
uffdio_register.range.start = (unsigned long)area_dst_alias;
uffdio_register.range.len = nr_pages * page_size;
uffdio_register.mode = UFFDIO_REGISTER_MODE_MINOR;
- if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
- fprintf(stderr, "register failure\n");
- exit(1);
- }
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
+ err("register failure");
expected_ioctls = uffd_test_ops->expected_ioctls;
expected_ioctls |= 1 << _UFFDIO_CONTINUE;
- if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls) {
- fprintf(stderr, "unexpected missing ioctl(s)\n");
- exit(1);
- }
+ if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls)
+ err("unexpected missing ioctl(s)");
/*
* After registering with UFFD, populate the non-UFFD-registered side of
@@ -1323,10 +1238,8 @@ static int userfaultfd_minor_test(void)
page_size);
}
- if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats)) {
- perror("uffd_poll_thread create");
- exit(1);
- }
+ if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats))
+ err("uffd_poll_thread create");
/*
* Read each of the pages back using the UFFD-registered mapping. We
@@ -1335,92 +1248,173 @@ static int userfaultfd_minor_test(void)
* page's contents, and then issuing a CONTINUE ioctl.
*/
- if (posix_memalign(&expected_page, page_size, page_size)) {
- fprintf(stderr, "out of memory\n");
- return 1;
- }
+ if (posix_memalign(&expected_page, page_size, page_size))
+ err("out of memory");
for (p = 0; p < nr_pages; ++p) {
expected_byte = ~((uint8_t)(p % ((uint8_t)-1)));
memset(expected_page, expected_byte, page_size);
if (my_bcmp(expected_page, area_dst_alias + (p * page_size),
- page_size)) {
- fprintf(stderr,
- "unexpected page contents after minor fault\n");
- exit(1);
- }
+ page_size))
+ err("unexpected page contents after minor fault");
}
- if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) {
- perror("pipe write");
- exit(1);
- }
+ if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
+ err("pipe write");
if (pthread_join(uffd_mon, NULL))
return 1;
- close(uffd);
-
uffd_stats_report(&stats, 1);
return stats.missing_faults != 0 || stats.minor_faults != nr_pages;
}
-static int userfaultfd_stress(void)
+#define BIT_ULL(nr) (1ULL << (nr))
+#define PM_SOFT_DIRTY BIT_ULL(55)
+#define PM_MMAP_EXCLUSIVE BIT_ULL(56)
+#define PM_UFFD_WP BIT_ULL(57)
+#define PM_FILE BIT_ULL(61)
+#define PM_SWAP BIT_ULL(62)
+#define PM_PRESENT BIT_ULL(63)
+
+static int pagemap_open(void)
{
- void *area;
- char *tmp_area;
- unsigned long nr;
- struct uffdio_register uffdio_register;
- unsigned long cpu;
- int err;
- struct uffd_stats uffd_stats[nr_cpus];
+ int fd = open("/proc/self/pagemap", O_RDONLY);
- uffd_test_ops->allocate_area((void **)&area_src);
- if (!area_src)
- return 1;
- uffd_test_ops->allocate_area((void **)&area_dst);
- if (!area_dst)
- return 1;
+ if (fd < 0)
+ err("open pagemap");
- if (userfaultfd_open(0))
- return 1;
+ return fd;
+}
- count_verify = malloc(nr_pages * sizeof(unsigned long long));
- if (!count_verify) {
- perror("count_verify");
- return 1;
- }
+static uint64_t pagemap_read_vaddr(int fd, void *vaddr)
+{
+ uint64_t value;
+ int ret;
- for (nr = 0; nr < nr_pages; nr++) {
- *area_mutex(area_src, nr) = (pthread_mutex_t)
- PTHREAD_MUTEX_INITIALIZER;
- count_verify[nr] = *area_count(area_src, nr) = 1;
+ ret = pread(fd, &value, sizeof(uint64_t),
+ ((uint64_t)vaddr >> 12) * sizeof(uint64_t));
+ if (ret != sizeof(uint64_t))
+ err("pread() on pagemap failed");
+
+ return value;
+}
+
+/* This macro let __LINE__ works in err() */
+#define pagemap_check_wp(value, wp) do { \
+ if (!!(value & PM_UFFD_WP) != wp) \
+ err("pagemap uffd-wp bit error: 0x%"PRIx64, value); \
+ } while (0)
+
+static int pagemap_test_fork(bool present)
+{
+ pid_t child = fork();
+ uint64_t value;
+ int fd, result;
+
+ if (!child) {
+ /* Open the pagemap fd of the child itself */
+ fd = pagemap_open();
+ value = pagemap_read_vaddr(fd, area_dst);
/*
- * In the transition between 255 to 256, powerpc will
- * read out of order in my_bcmp and see both bytes as
- * zero, so leave a placeholder below always non-zero
- * after the count, to avoid my_bcmp to trigger false
- * positives.
+ * After fork() uffd-wp bit should be gone as long as we're
+ * without UFFD_FEATURE_EVENT_FORK
*/
- *(area_count(area_src, nr) + 1) = 1;
+ pagemap_check_wp(value, false);
+ /* Succeed */
+ exit(0);
}
+ waitpid(child, &result, 0);
+ return result;
+}
- pipefd = malloc(sizeof(int) * nr_cpus * 2);
- if (!pipefd) {
- perror("pipefd");
- return 1;
- }
- for (cpu = 0; cpu < nr_cpus; cpu++) {
- if (pipe2(&pipefd[cpu*2], O_CLOEXEC | O_NONBLOCK)) {
- perror("pipe");
- return 1;
- }
- }
+static void userfaultfd_pagemap_test(unsigned int test_pgsize)
+{
+ struct uffdio_register uffdio_register;
+ int pagemap_fd;
+ uint64_t value;
- if (posix_memalign(&area, page_size, page_size)) {
- fprintf(stderr, "out of memory\n");
- return 1;
+ /* Pagemap tests uffd-wp only */
+ if (!test_uffdio_wp)
+ return;
+
+ /* Not enough memory to test this page size */
+ if (test_pgsize > nr_pages * page_size)
+ return;
+
+ printf("testing uffd-wp with pagemap (pgsize=%u): ", test_pgsize);
+ /* Flush so it doesn't flush twice in parent/child later */
+ fflush(stdout);
+
+ uffd_test_ctx_init(0);
+
+ if (test_pgsize > page_size) {
+ /* This is a thp test */
+ if (madvise(area_dst, nr_pages * page_size, MADV_HUGEPAGE))
+ err("madvise(MADV_HUGEPAGE) failed");
+ } else if (test_pgsize == page_size) {
+ /* This is normal page test; force no thp */
+ if (madvise(area_dst, nr_pages * page_size, MADV_NOHUGEPAGE))
+ err("madvise(MADV_NOHUGEPAGE) failed");
}
+
+ uffdio_register.range.start = (unsigned long) area_dst;
+ uffdio_register.range.len = nr_pages * page_size;
+ uffdio_register.mode = UFFDIO_REGISTER_MODE_WP;
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
+ err("register failed");
+
+ pagemap_fd = pagemap_open();
+
+ /* Touch the page */
+ *area_dst = 1;
+ wp_range(uffd, (uint64_t)area_dst, test_pgsize, true);
+ value = pagemap_read_vaddr(pagemap_fd, area_dst);
+ pagemap_check_wp(value, true);
+ /* Make sure uffd-wp bit dropped when fork */
+ if (pagemap_test_fork(true))
+ err("Detected stall uffd-wp bit in child");
+
+ /* Exclusive required or PAGEOUT won't work */
+ if (!(value & PM_MMAP_EXCLUSIVE))
+ err("multiple mapping detected: 0x%"PRIx64, value);
+
+ if (madvise(area_dst, test_pgsize, MADV_PAGEOUT))
+ err("madvise(MADV_PAGEOUT) failed");
+
+ /* Uffd-wp should persist even swapped out */
+ value = pagemap_read_vaddr(pagemap_fd, area_dst);
+ pagemap_check_wp(value, true);
+ /* Make sure uffd-wp bit dropped when fork */
+ if (pagemap_test_fork(false))
+ err("Detected stall uffd-wp bit in child");
+
+ /* Unprotect; this tests swap pte modifications */
+ wp_range(uffd, (uint64_t)area_dst, page_size, false);
+ value = pagemap_read_vaddr(pagemap_fd, area_dst);
+ pagemap_check_wp(value, false);
+
+ /* Fault in the page from disk */
+ *area_dst = 2;
+ value = pagemap_read_vaddr(pagemap_fd, area_dst);
+ pagemap_check_wp(value, false);
+
+ close(pagemap_fd);
+ printf("done\n");
+}
+
+static int userfaultfd_stress(void)
+{
+ void *area;
+ char *tmp_area;
+ unsigned long nr;
+ struct uffdio_register uffdio_register;
+ struct uffd_stats uffd_stats[nr_cpus];
+
+ uffd_test_ctx_init(0);
+
+ if (posix_memalign(&area, page_size, page_size))
+ err("out of memory");
zeropage = area;
bzero(zeropage, page_size);
@@ -1429,7 +1423,6 @@ static int userfaultfd_stress(void)
pthread_attr_init(&attr);
pthread_attr_setstacksize(&attr, 16*1024*1024);
- err = 0;
while (bounces--) {
unsigned long expected_ioctls;
@@ -1458,25 +1451,18 @@ static int userfaultfd_stress(void)
uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
if (test_uffdio_wp)
uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP;
- if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
- fprintf(stderr, "register failure\n");
- return 1;
- }
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
+ err("register failure");
expected_ioctls = uffd_test_ops->expected_ioctls;
if ((uffdio_register.ioctls & expected_ioctls) !=
- expected_ioctls) {
- fprintf(stderr,
- "unexpected missing ioctl for anon memory\n");
- return 1;
- }
+ expected_ioctls)
+ err("unexpected missing ioctl for anon memory");
if (area_dst_alias) {
uffdio_register.range.start = (unsigned long)
area_dst_alias;
- if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
- fprintf(stderr, "register failure alias\n");
- return 1;
- }
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
+ err("register failure alias");
}
/*
@@ -1503,8 +1489,7 @@ static int userfaultfd_stress(void)
* MADV_DONTNEED only after the UFFDIO_REGISTER, so it's
* required to MADV_DONTNEED here.
*/
- if (uffd_test_ops->release_pages(area_dst))
- return 1;
+ uffd_test_ops->release_pages(area_dst);
uffd_stats_reset(uffd_stats, nr_cpus);
@@ -1518,33 +1503,22 @@ static int userfaultfd_stress(void)
nr_pages * page_size, false);
/* unregister */
- if (ioctl(uffd, UFFDIO_UNREGISTER, &uffdio_register.range)) {
- fprintf(stderr, "unregister failure\n");
- return 1;
- }
+ if (ioctl(uffd, UFFDIO_UNREGISTER, &uffdio_register.range))
+ err("unregister failure");
if (area_dst_alias) {
uffdio_register.range.start = (unsigned long) area_dst;
if (ioctl(uffd, UFFDIO_UNREGISTER,
- &uffdio_register.range)) {
- fprintf(stderr, "unregister failure alias\n");
- return 1;
- }
+ &uffdio_register.range))
+ err("unregister failure alias");
}
/* verification */
- if (bounces & BOUNCE_VERIFY) {
- for (nr = 0; nr < nr_pages; nr++) {
- if (*area_count(area_dst, nr) != count_verify[nr]) {
- fprintf(stderr,
- "error area_count %Lu %Lu %lu\n",
- *area_count(area_src, nr),
- count_verify[nr],
- nr);
- err = 1;
- bounces = 0;
- }
- }
- }
+ if (bounces & BOUNCE_VERIFY)
+ for (nr = 0; nr < nr_pages; nr++)
+ if (*area_count(area_dst, nr) != count_verify[nr])
+ err("error area_count %llu %llu %lu\n",
+ *area_count(area_src, nr),
+ count_verify[nr], nr);
/* prepare next bounce */
tmp_area = area_src;
@@ -1558,10 +1532,21 @@ static int userfaultfd_stress(void)
uffd_stats_report(uffd_stats, nr_cpus);
}
- if (err)
- return err;
+ if (test_type == TEST_ANON) {
+ /*
+ * shmem/hugetlb won't be able to run since they have different
+ * behavior on fork() (file-backed memory normally drops ptes
+ * directly when fork), meanwhile the pagemap test will verify
+ * pgtable entry of fork()ed child.
+ */
+ userfaultfd_pagemap_test(page_size);
+ /*
+ * Hard-code for x86_64 for now for 2M THP, as x86_64 is
+ * currently the only one that supports uffd-wp
+ */
+ userfaultfd_pagemap_test(page_size * 512);
+ }
- close(uffd);
return userfaultfd_zeropage_test() || userfaultfd_sig_test()
|| userfaultfd_events_test() || userfaultfd_minor_test();
}
@@ -1610,8 +1595,9 @@ static void set_test_type(const char *type)
map_shared = true;
test_type = TEST_SHMEM;
uffd_test_ops = &shmem_uffd_test_ops;
+ test_uffdio_minor = true;
} else {
- fprintf(stderr, "Unknown test type: %s\n", type); exit(1);
+ err("Unknown test type: %s", type);
}
if (test_type == TEST_HUGETLB)
@@ -1619,15 +1605,11 @@ static void set_test_type(const char *type)
else
page_size = sysconf(_SC_PAGE_SIZE);
- if (!page_size) {
- fprintf(stderr, "Unable to determine page size\n");
- exit(2);
- }
+ if (!page_size)
+ err("Unable to determine page size");
if ((unsigned long) area_count(NULL, 0) + sizeof(unsigned long long) * 2
- > page_size) {
- fprintf(stderr, "Impossible to run this test\n");
- exit(2);
- }
+ > page_size)
+ err("Impossible to run this test");
}
static void sigalrm(int sig)
@@ -1644,10 +1626,8 @@ int main(int argc, char **argv)
if (argc < 4)
usage();
- if (signal(SIGALRM, sigalrm) == SIG_ERR) {
- fprintf(stderr, "failed to arm SIGALRM");
- exit(1);
- }
+ if (signal(SIGALRM, sigalrm) == SIG_ERR)
+ err("failed to arm SIGALRM");
alarm(ALARM_INTERVAL_SECS);
set_test_type(argv[1]);
@@ -1656,13 +1636,13 @@ int main(int argc, char **argv)
nr_pages_per_cpu = atol(argv[2]) * 1024*1024 / page_size /
nr_cpus;
if (!nr_pages_per_cpu) {
- fprintf(stderr, "invalid MiB\n");
+ _err("invalid MiB");
usage();
}
bounces = atoi(argv[3]);
if (bounces <= 0) {
- fprintf(stderr, "invalid bounces\n");
+ _err("invalid bounces");
usage();
}
nr_pages = nr_pages_per_cpu * nr_cpus;
@@ -1671,16 +1651,20 @@ int main(int argc, char **argv)
if (argc < 5)
usage();
huge_fd = open(argv[4], O_CREAT | O_RDWR, 0755);
- if (huge_fd < 0) {
- fprintf(stderr, "Open of %s failed", argv[3]);
- perror("open");
- exit(1);
- }
- if (ftruncate(huge_fd, 0)) {
- fprintf(stderr, "ftruncate %s to size 0 failed", argv[3]);
- perror("ftruncate");
- exit(1);
- }
+ if (huge_fd < 0)
+ err("Open of %s failed", argv[4]);
+ if (ftruncate(huge_fd, 0))
+ err("ftruncate %s to size 0 failed", argv[4]);
+ } else if (test_type == TEST_SHMEM) {
+ shm_fd = memfd_create(argv[0], 0);
+ if (shm_fd < 0)
+ err("memfd_create");
+ if (ftruncate(shm_fd, nr_pages * page_size * 2))
+ err("ftruncate");
+ if (fallocate(shm_fd,
+ FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0,
+ nr_pages * page_size * 2))
+ err("fallocate");
}
printf("nr_pages: %lu, nr_pages_per_cpu: %lu\n",
nr_pages, nr_pages_per_cpu);
diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh
index 7ed7cd95e58f..ebc4ee0fe179 100755
--- a/tools/testing/selftests/wireguard/netns.sh
+++ b/tools/testing/selftests/wireguard/netns.sh
@@ -363,6 +363,7 @@ ip1 -6 rule add table main suppress_prefixlength 0
ip1 -4 route add default dev wg0 table 51820
ip1 -4 rule add not fwmark 51820 table 51820
ip1 -4 rule add table main suppress_prefixlength 0
+n1 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/vethc/rp_filter'
# Flood the pings instead of sending just one, to trigger routing table reference counting bugs.
n1 ping -W 1 -c 100 -f 192.168.99.7
n1 ping -W 1 -c 100 -f abab::1111
diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config
index 4eecb432a66c..74db83a0aedd 100644
--- a/tools/testing/selftests/wireguard/qemu/kernel.config
+++ b/tools/testing/selftests/wireguard/qemu/kernel.config
@@ -19,7 +19,6 @@ CONFIG_NETFILTER_XTABLES=y
CONFIG_NETFILTER_XT_NAT=y
CONFIG_NETFILTER_XT_MATCH_LENGTH=y
CONFIG_NETFILTER_XT_MARK=y
-CONFIG_NF_CONNTRACK_IPV4=y
CONFIG_NF_NAT_IPV4=y
CONFIG_IP_NF_IPTABLES=y
CONFIG_IP_NF_FILTER=y
diff --git a/tools/testing/selftests/x86/syscall_numbering.c b/tools/testing/selftests/x86/syscall_numbering.c
index d6b09cb1aa2c..991591718bb0 100644
--- a/tools/testing/selftests/x86/syscall_numbering.c
+++ b/tools/testing/selftests/x86/syscall_numbering.c
@@ -1,6 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
- * syscall_arg_fault.c - tests faults 32-bit fast syscall stack args
+ * syscall_numbering.c - test calling the x86-64 kernel with various
+ * valid and invalid system call numbers.
+ *
* Copyright (c) 2018 Andrew Lutomirski
*/
@@ -11,79 +13,470 @@
#include <stdbool.h>
#include <errno.h>
#include <unistd.h>
-#include <syscall.h>
+#include <string.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <signal.h>
+#include <sysexits.h>
-static int nerrs;
+#include <sys/ptrace.h>
+#include <sys/user.h>
+#include <sys/wait.h>
+#include <sys/mman.h>
-#define X32_BIT 0x40000000UL
+#include <linux/ptrace.h>
-static void check_enosys(unsigned long nr, bool *ok)
+/* Common system call numbers */
+#define SYS_READ 0
+#define SYS_WRITE 1
+#define SYS_GETPID 39
+/* x64-only system call numbers */
+#define X64_IOCTL 16
+#define X64_READV 19
+#define X64_WRITEV 20
+/* x32-only system call numbers (without X32_BIT) */
+#define X32_IOCTL 514
+#define X32_READV 515
+#define X32_WRITEV 516
+
+#define X32_BIT 0x40000000
+
+static int nullfd = -1; /* File descriptor for /dev/null */
+static bool with_x32; /* x32 supported on this kernel? */
+
+enum ptrace_pass {
+ PTP_NOTHING,
+ PTP_GETREGS,
+ PTP_WRITEBACK,
+ PTP_FUZZRET,
+ PTP_FUZZHIGH,
+ PTP_INTNUM,
+ PTP_DONE
+};
+
+static const char * const ptrace_pass_name[] =
{
- /* If this fails, a segfault is reasonably likely. */
- fflush(stdout);
+ [PTP_NOTHING] = "just stop, no data read",
+ [PTP_GETREGS] = "only getregs",
+ [PTP_WRITEBACK] = "getregs, unmodified setregs",
+ [PTP_FUZZRET] = "modifying the default return",
+ [PTP_FUZZHIGH] = "clobbering the top 32 bits",
+ [PTP_INTNUM] = "sign-extending the syscall number",
+};
- long ret = syscall(nr, 0, 0, 0, 0, 0, 0);
- if (ret == 0) {
- printf("[FAIL]\tsyscall %lu succeeded, but it should have failed\n", nr);
- *ok = false;
- } else if (errno != ENOSYS) {
- printf("[FAIL]\tsyscall %lu had error code %d, but it should have reported ENOSYS\n", nr, errno);
- *ok = false;
- }
+/*
+ * Shared memory block between tracer and test
+ */
+struct shared {
+ unsigned int nerr; /* Total error count */
+ unsigned int indent; /* Message indentation level */
+ enum ptrace_pass ptrace_pass;
+ bool probing_syscall; /* In probe_syscall() */
+};
+static volatile struct shared *sh;
+
+static inline unsigned int offset(void)
+{
+ unsigned int level = sh ? sh->indent : 0;
+
+ return 8 + level * 4;
}
-static void test_x32_without_x32_bit(void)
+#define msg(lvl, fmt, ...) printf("%-*s" fmt, offset(), "[" #lvl "]", \
+ ## __VA_ARGS__)
+
+#define run(fmt, ...) msg(RUN, fmt, ## __VA_ARGS__)
+#define info(fmt, ...) msg(INFO, fmt, ## __VA_ARGS__)
+#define ok(fmt, ...) msg(OK, fmt, ## __VA_ARGS__)
+
+#define fail(fmt, ...) \
+ do { \
+ msg(FAIL, fmt, ## __VA_ARGS__); \
+ sh->nerr++; \
+ } while (0)
+
+#define crit(fmt, ...) \
+ do { \
+ sh->indent = 0; \
+ msg(FAIL, fmt, ## __VA_ARGS__); \
+ msg(SKIP, "Unable to run test\n"); \
+ exit(EX_OSERR); \
+ } while (0)
+
+/* Sentinel for ptrace-modified return value */
+#define MODIFIED_BY_PTRACE -9999
+
+/*
+ * Directly invokes the given syscall with nullfd as the first argument
+ * and the rest zero. Avoids involving glibc wrappers in case they ever
+ * end up intercepting some system calls for some reason, or modify
+ * the system call number itself.
+ */
+static long long probe_syscall(int msb, int lsb)
{
- bool ok = true;
+ register long long arg1 asm("rdi") = nullfd;
+ register long long arg2 asm("rsi") = 0;
+ register long long arg3 asm("rdx") = 0;
+ register long long arg4 asm("r10") = 0;
+ register long long arg5 asm("r8") = 0;
+ register long long arg6 asm("r9") = 0;
+ long long nr = ((long long)msb << 32) | (unsigned int)lsb;
+ long long ret;
/*
- * Syscalls 512-547 are "x32" syscalls. They are intended to be
- * called with the x32 (0x40000000) bit set. Calling them without
- * the x32 bit set is nonsense and should not work.
+ * We pass in an extra copy of the extended system call number
+ * in %rbx, so we can examine it from the ptrace handler without
+ * worrying about it being possibly modified. This is to test
+ * the validity of struct user regs.orig_rax a.k.a.
+ * struct pt_regs.orig_ax.
*/
- printf("[RUN]\tChecking syscalls 512-547\n");
- for (int i = 512; i <= 547; i++)
- check_enosys(i, &ok);
+ sh->probing_syscall = true;
+ asm volatile("syscall"
+ : "=a" (ret)
+ : "a" (nr), "b" (nr),
+ "r" (arg1), "r" (arg2), "r" (arg3),
+ "r" (arg4), "r" (arg5), "r" (arg6)
+ : "rcx", "r11", "memory", "cc");
+ sh->probing_syscall = false;
+
+ return ret;
+}
+
+static const char *syscall_str(int msb, int start, int end)
+{
+ static char buf[64];
+ const char * const type = (start & X32_BIT) ? "x32" : "x64";
+ int lsb = start;
/*
- * Check that a handful of 64-bit-only syscalls are rejected if the x32
- * bit is set.
+ * Improve readability by stripping the x32 bit, but round
+ * toward zero so we don't display -1 as -1073741825.
*/
- printf("[RUN]\tChecking some 64-bit syscalls in x32 range\n");
- check_enosys(16 | X32_BIT, &ok); /* ioctl */
- check_enosys(19 | X32_BIT, &ok); /* readv */
- check_enosys(20 | X32_BIT, &ok); /* writev */
+ if (lsb < 0)
+ lsb |= X32_BIT;
+ else
+ lsb &= ~X32_BIT;
+
+ if (start == end)
+ snprintf(buf, sizeof buf, "%s syscall %d:%d",
+ type, msb, lsb);
+ else
+ snprintf(buf, sizeof buf, "%s syscalls %d:%d..%d",
+ type, msb, lsb, lsb + (end-start));
+
+ return buf;
+}
+
+static unsigned int _check_for(int msb, int start, int end, long long expect,
+ const char *expect_str)
+{
+ unsigned int err = 0;
+
+ sh->indent++;
+ if (start != end)
+ sh->indent++;
+
+ for (int nr = start; nr <= end; nr++) {
+ long long ret = probe_syscall(msb, nr);
+
+ if (ret != expect) {
+ fail("%s returned %lld, but it should have returned %s\n",
+ syscall_str(msb, nr, nr),
+ ret, expect_str);
+ err++;
+ }
+ }
+
+ if (start != end)
+ sh->indent--;
+
+ if (err) {
+ if (start != end)
+ fail("%s had %u failure%s\n",
+ syscall_str(msb, start, end),
+ err, err == 1 ? "s" : "");
+ } else {
+ ok("%s returned %s as expected\n",
+ syscall_str(msb, start, end), expect_str);
+ }
+
+ sh->indent--;
+
+ return err;
+}
+
+#define check_for(msb,start,end,expect) \
+ _check_for(msb,start,end,expect,#expect)
+
+static bool check_zero(int msb, int nr)
+{
+ return check_for(msb, nr, nr, 0);
+}
+
+static bool check_enosys(int msb, int nr)
+{
+ return check_for(msb, nr, nr, -ENOSYS);
+}
+
+/*
+ * Anyone diagnosing a failure will want to know whether the kernel
+ * supports x32. Tell them. This can also be used to conditionalize
+ * tests based on existence or nonexistence of x32.
+ */
+static bool test_x32(void)
+{
+ long long ret;
+ pid_t mypid = getpid();
+
+ run("Checking for x32 by calling x32 getpid()\n");
+ ret = probe_syscall(0, SYS_GETPID | X32_BIT);
+
+ sh->indent++;
+ if (ret == mypid) {
+ info("x32 is supported\n");
+ with_x32 = true;
+ } else if (ret == -ENOSYS) {
+ info("x32 is not supported\n");
+ with_x32 = false;
+ } else {
+ fail("x32 getpid() returned %lld, but it should have returned either %lld or -ENOSYS\n", ret, (long long)mypid);
+ with_x32 = false;
+ }
+ sh->indent--;
+ return with_x32;
+}
+
+static void test_syscalls_common(int msb)
+{
+ enum ptrace_pass pass = sh->ptrace_pass;
+
+ run("Checking some common syscalls as 64 bit\n");
+ check_zero(msb, SYS_READ);
+ check_zero(msb, SYS_WRITE);
+
+ run("Checking some 64-bit only syscalls as 64 bit\n");
+ check_zero(msb, X64_READV);
+ check_zero(msb, X64_WRITEV);
+
+ run("Checking out of range system calls\n");
+ check_for(msb, -64, -2, -ENOSYS);
+ if (pass >= PTP_FUZZRET)
+ check_for(msb, -1, -1, MODIFIED_BY_PTRACE);
+ else
+ check_for(msb, -1, -1, -ENOSYS);
+ check_for(msb, X32_BIT-64, X32_BIT-1, -ENOSYS);
+ check_for(msb, -64-X32_BIT, -1-X32_BIT, -ENOSYS);
+ check_for(msb, INT_MAX-64, INT_MAX-1, -ENOSYS);
+}
+static void test_syscalls_with_x32(int msb)
+{
/*
- * Check some syscalls with high bits set.
+ * Syscalls 512-547 are "x32" syscalls. They are
+ * intended to be called with the x32 (0x40000000) bit
+ * set. Calling them without the x32 bit set is
+ * nonsense and should not work.
*/
- printf("[RUN]\tChecking numbers above 2^32-1\n");
- check_enosys((1UL << 32), &ok);
- check_enosys(X32_BIT | (1UL << 32), &ok);
+ run("Checking x32 syscalls as 64 bit\n");
+ check_for(msb, 512, 547, -ENOSYS);
- if (!ok)
- nerrs++;
- else
- printf("[OK]\tThey all returned -ENOSYS\n");
+ run("Checking some common syscalls as x32\n");
+ check_zero(msb, SYS_READ | X32_BIT);
+ check_zero(msb, SYS_WRITE | X32_BIT);
+
+ run("Checking some x32 syscalls as x32\n");
+ check_zero(msb, X32_READV | X32_BIT);
+ check_zero(msb, X32_WRITEV | X32_BIT);
+
+ run("Checking some 64-bit syscalls as x32\n");
+ check_enosys(msb, X64_IOCTL | X32_BIT);
+ check_enosys(msb, X64_READV | X32_BIT);
+ check_enosys(msb, X64_WRITEV | X32_BIT);
}
-int main()
+static void test_syscalls_without_x32(int msb)
{
+ run("Checking for absence of x32 system calls\n");
+ check_for(msb, 0 | X32_BIT, 999 | X32_BIT, -ENOSYS);
+}
+
+static void test_syscall_numbering(void)
+{
+ static const int msbs[] = {
+ 0, 1, -1, X32_BIT-1, X32_BIT, X32_BIT-1, -X32_BIT, INT_MAX,
+ INT_MIN, INT_MIN+1
+ };
+
+ sh->indent++;
+
/*
- * Anyone diagnosing a failure will want to know whether the kernel
- * supports x32. Tell them.
+ * The MSB is supposed to be ignored, so we loop over a few
+ * to test that out.
*/
- printf("\tChecking for x32...");
- fflush(stdout);
- if (syscall(39 | X32_BIT, 0, 0, 0, 0, 0, 0) >= 0) {
- printf(" supported\n");
- } else if (errno == ENOSYS) {
- printf(" not supported\n");
+ for (size_t i = 0; i < sizeof(msbs)/sizeof(msbs[0]); i++) {
+ int msb = msbs[i];
+ run("Checking system calls with msb = %d (0x%x)\n",
+ msb, msb);
+
+ sh->indent++;
+
+ test_syscalls_common(msb);
+ if (with_x32)
+ test_syscalls_with_x32(msb);
+ else
+ test_syscalls_without_x32(msb);
+
+ sh->indent--;
+ }
+
+ sh->indent--;
+}
+
+static void syscall_numbering_tracee(void)
+{
+ enum ptrace_pass pass;
+
+ if (ptrace(PTRACE_TRACEME, 0, 0, 0)) {
+ crit("Failed to request tracing\n");
+ return;
+ }
+ raise(SIGSTOP);
+
+ for (sh->ptrace_pass = pass = PTP_NOTHING; pass < PTP_DONE;
+ sh->ptrace_pass = ++pass) {
+ run("Running tests under ptrace: %s\n", ptrace_pass_name[pass]);
+ test_syscall_numbering();
+ }
+}
+
+static void mess_with_syscall(pid_t testpid, enum ptrace_pass pass)
+{
+ struct user_regs_struct regs;
+
+ sh->probing_syscall = false; /* Do this on entry only */
+
+ /* For these, don't even getregs */
+ if (pass == PTP_NOTHING || pass == PTP_DONE)
+ return;
+
+ ptrace(PTRACE_GETREGS, testpid, NULL, &regs);
+
+ if (regs.orig_rax != regs.rbx) {
+ fail("orig_rax %#llx doesn't match syscall number %#llx\n",
+ (unsigned long long)regs.orig_rax,
+ (unsigned long long)regs.rbx);
+ }
+
+ switch (pass) {
+ case PTP_GETREGS:
+ /* Just read, no writeback */
+ return;
+ case PTP_WRITEBACK:
+ /* Write back the same register state verbatim */
+ break;
+ case PTP_FUZZRET:
+ regs.rax = MODIFIED_BY_PTRACE;
+ break;
+ case PTP_FUZZHIGH:
+ regs.rax = MODIFIED_BY_PTRACE;
+ regs.orig_rax = regs.orig_rax | 0xffffffff00000000ULL;
+ break;
+ case PTP_INTNUM:
+ regs.rax = MODIFIED_BY_PTRACE;
+ regs.orig_rax = (int)regs.orig_rax;
+ break;
+ default:
+ crit("invalid ptrace_pass\n");
+ break;
+ }
+
+ ptrace(PTRACE_SETREGS, testpid, NULL, &regs);
+}
+
+static void syscall_numbering_tracer(pid_t testpid)
+{
+ int wstatus;
+
+ do {
+ pid_t wpid = waitpid(testpid, &wstatus, 0);
+ if (wpid < 0 && errno != EINTR)
+ break;
+ if (wpid != testpid)
+ continue;
+ if (!WIFSTOPPED(wstatus))
+ break; /* Thread exited? */
+
+ if (sh->probing_syscall && WSTOPSIG(wstatus) == SIGTRAP)
+ mess_with_syscall(testpid, sh->ptrace_pass);
+ } while (sh->ptrace_pass != PTP_DONE &&
+ !ptrace(PTRACE_SYSCALL, testpid, NULL, NULL));
+
+ ptrace(PTRACE_DETACH, testpid, NULL, NULL);
+
+ /* Wait for the child process to terminate */
+ while (waitpid(testpid, &wstatus, 0) != testpid || !WIFEXITED(wstatus))
+ /* wait some more */;
+}
+
+static void test_traced_syscall_numbering(void)
+{
+ pid_t testpid;
+
+ /* Launch the test thread; this thread continues as the tracer thread */
+ testpid = fork();
+
+ if (testpid < 0) {
+ crit("Unable to launch tracer process\n");
+ } else if (testpid == 0) {
+ syscall_numbering_tracee();
+ _exit(0);
} else {
- printf(" confused\n");
+ syscall_numbering_tracer(testpid);
}
+}
- test_x32_without_x32_bit();
+int main(void)
+{
+ unsigned int nerr;
- return nerrs ? 1 : 0;
+ /*
+ * It is quite likely to get a segfault on a failure, so make
+ * sure the message gets out by setting stdout to nonbuffered.
+ */
+ setvbuf(stdout, NULL, _IONBF, 0);
+
+ /*
+ * Harmless file descriptor to work on...
+ */
+ nullfd = open("/dev/null", O_RDWR);
+ if (nullfd < 0) {
+ crit("Unable to open /dev/null: %s\n", strerror(errno));
+ }
+
+ /*
+ * Set up a block of shared memory...
+ */
+ sh = mmap(NULL, sysconf(_SC_PAGE_SIZE), PROT_READ|PROT_WRITE,
+ MAP_ANONYMOUS|MAP_SHARED, 0, 0);
+ if (sh == MAP_FAILED) {
+ crit("Unable to allocated shared memory block: %s\n",
+ strerror(errno));
+ }
+
+ with_x32 = test_x32();
+
+ run("Running tests without ptrace...\n");
+ test_syscall_numbering();
+
+ test_traced_syscall_numbering();
+
+ nerr = sh->nerr;
+ if (!nerr) {
+ ok("All system calls succeeded or failed as expected\n");
+ return 0;
+ } else {
+ fail("A total of %u system call%s had incorrect behavior\n",
+ nerr, nerr != 1 ? "s" : "");
+ return 1;
+ }
}
diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c
index 93cbd6f603f9..2acbb7703c6a 100644
--- a/tools/testing/vsock/util.c
+++ b/tools/testing/vsock/util.c
@@ -84,7 +84,7 @@ void vsock_wait_remote_close(int fd)
}
/* Connect to <cid, port> and return the file descriptor. */
-int vsock_stream_connect(unsigned int cid, unsigned int port)
+static int vsock_connect(unsigned int cid, unsigned int port, int type)
{
union {
struct sockaddr sa;
@@ -101,7 +101,7 @@ int vsock_stream_connect(unsigned int cid, unsigned int port)
control_expectln("LISTENING");
- fd = socket(AF_VSOCK, SOCK_STREAM, 0);
+ fd = socket(AF_VSOCK, type, 0);
timeout_begin(TIMEOUT);
do {
@@ -120,11 +120,21 @@ int vsock_stream_connect(unsigned int cid, unsigned int port)
return fd;
}
+int vsock_stream_connect(unsigned int cid, unsigned int port)
+{
+ return vsock_connect(cid, port, SOCK_STREAM);
+}
+
+int vsock_seqpacket_connect(unsigned int cid, unsigned int port)
+{
+ return vsock_connect(cid, port, SOCK_SEQPACKET);
+}
+
/* Listen on <cid, port> and return the first incoming connection. The remote
* address is stored to clientaddrp. clientaddrp may be NULL.
*/
-int vsock_stream_accept(unsigned int cid, unsigned int port,
- struct sockaddr_vm *clientaddrp)
+static int vsock_accept(unsigned int cid, unsigned int port,
+ struct sockaddr_vm *clientaddrp, int type)
{
union {
struct sockaddr sa;
@@ -145,7 +155,7 @@ int vsock_stream_accept(unsigned int cid, unsigned int port,
int client_fd;
int old_errno;
- fd = socket(AF_VSOCK, SOCK_STREAM, 0);
+ fd = socket(AF_VSOCK, type, 0);
if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0) {
perror("bind");
@@ -189,6 +199,18 @@ int vsock_stream_accept(unsigned int cid, unsigned int port,
return client_fd;
}
+int vsock_stream_accept(unsigned int cid, unsigned int port,
+ struct sockaddr_vm *clientaddrp)
+{
+ return vsock_accept(cid, port, clientaddrp, SOCK_STREAM);
+}
+
+int vsock_seqpacket_accept(unsigned int cid, unsigned int port,
+ struct sockaddr_vm *clientaddrp)
+{
+ return vsock_accept(cid, port, clientaddrp, SOCK_SEQPACKET);
+}
+
/* Transmit one byte and check the return value.
*
* expected_ret:
diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h
index e53dd09d26d9..a3375ad2fb7f 100644
--- a/tools/testing/vsock/util.h
+++ b/tools/testing/vsock/util.h
@@ -36,8 +36,11 @@ struct test_case {
void init_signals(void);
unsigned int parse_cid(const char *str);
int vsock_stream_connect(unsigned int cid, unsigned int port);
+int vsock_seqpacket_connect(unsigned int cid, unsigned int port);
int vsock_stream_accept(unsigned int cid, unsigned int port,
struct sockaddr_vm *clientaddrp);
+int vsock_seqpacket_accept(unsigned int cid, unsigned int port,
+ struct sockaddr_vm *clientaddrp);
void vsock_wait_remote_close(int fd);
void send_byte(int fd, int expected_ret, int flags);
void recv_byte(int fd, int expected_ret, int flags);
diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index 5a4fb80fa832..67766bfe176f 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -14,6 +14,8 @@
#include <errno.h>
#include <unistd.h>
#include <linux/kernel.h>
+#include <sys/types.h>
+#include <sys/socket.h>
#include "timeout.h"
#include "control.h"
@@ -279,6 +281,110 @@ static void test_stream_msg_peek_server(const struct test_opts *opts)
close(fd);
}
+#define MESSAGES_CNT 7
+static void test_seqpacket_msg_bounds_client(const struct test_opts *opts)
+{
+ int fd;
+
+ fd = vsock_seqpacket_connect(opts->peer_cid, 1234);
+ if (fd < 0) {
+ perror("connect");
+ exit(EXIT_FAILURE);
+ }
+
+ /* Send several messages, one with MSG_EOR flag */
+ for (int i = 0; i < MESSAGES_CNT; i++)
+ send_byte(fd, 1, 0);
+
+ control_writeln("SENDDONE");
+ close(fd);
+}
+
+static void test_seqpacket_msg_bounds_server(const struct test_opts *opts)
+{
+ int fd;
+ char buf[16];
+ struct msghdr msg = {0};
+ struct iovec iov = {0};
+
+ fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL);
+ if (fd < 0) {
+ perror("accept");
+ exit(EXIT_FAILURE);
+ }
+
+ control_expectln("SENDDONE");
+ iov.iov_base = buf;
+ iov.iov_len = sizeof(buf);
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ for (int i = 0; i < MESSAGES_CNT; i++) {
+ if (recvmsg(fd, &msg, 0) != 1) {
+ perror("message bound violated");
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ close(fd);
+}
+
+#define MESSAGE_TRUNC_SZ 32
+static void test_seqpacket_msg_trunc_client(const struct test_opts *opts)
+{
+ int fd;
+ char buf[MESSAGE_TRUNC_SZ];
+
+ fd = vsock_seqpacket_connect(opts->peer_cid, 1234);
+ if (fd < 0) {
+ perror("connect");
+ exit(EXIT_FAILURE);
+ }
+
+ if (send(fd, buf, sizeof(buf), 0) != sizeof(buf)) {
+ perror("send failed");
+ exit(EXIT_FAILURE);
+ }
+
+ control_writeln("SENDDONE");
+ close(fd);
+}
+
+static void test_seqpacket_msg_trunc_server(const struct test_opts *opts)
+{
+ int fd;
+ char buf[MESSAGE_TRUNC_SZ / 2];
+ struct msghdr msg = {0};
+ struct iovec iov = {0};
+
+ fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL);
+ if (fd < 0) {
+ perror("accept");
+ exit(EXIT_FAILURE);
+ }
+
+ control_expectln("SENDDONE");
+ iov.iov_base = buf;
+ iov.iov_len = sizeof(buf);
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ ssize_t ret = recvmsg(fd, &msg, MSG_TRUNC);
+
+ if (ret != MESSAGE_TRUNC_SZ) {
+ printf("%zi\n", ret);
+ perror("MSG_TRUNC doesn't work");
+ exit(EXIT_FAILURE);
+ }
+
+ if (!(msg.msg_flags & MSG_TRUNC)) {
+ fprintf(stderr, "MSG_TRUNC expected\n");
+ exit(EXIT_FAILURE);
+ }
+
+ close(fd);
+}
+
static struct test_case test_cases[] = {
{
.name = "SOCK_STREAM connection reset",
@@ -309,6 +415,16 @@ static struct test_case test_cases[] = {
.run_client = test_stream_msg_peek_client,
.run_server = test_stream_msg_peek_server,
},
+ {
+ .name = "SOCK_SEQPACKET msg bounds",
+ .run_client = test_seqpacket_msg_bounds_client,
+ .run_server = test_seqpacket_msg_bounds_server,
+ },
+ {
+ .name = "SOCK_SEQPACKET MSG_TRUNC flag",
+ .run_client = test_seqpacket_msg_trunc_client,
+ .run_server = test_seqpacket_msg_trunc_server,
+ },
{},
};
diff --git a/tools/vm/page_owner_sort.c b/tools/vm/page_owner_sort.c
index 85eb65ea16d3..0e75f22c9475 100644
--- a/tools/vm/page_owner_sort.c
+++ b/tools/vm/page_owner_sort.c
@@ -132,6 +132,10 @@ int main(int argc, char **argv)
qsort(list, list_size, sizeof(list[0]), compare_txt);
list2 = malloc(sizeof(*list) * list_size);
+ if (!list2) {
+ printf("Out of memory\n");
+ exit(1);
+ }
printf("culling\n");